;---------------------------
; solid air (256b intro - well actually 224b of code; lovebyte doesn't let you exclude TOS header)
; jan 2025
; tom
;---------------------------
;assumes a4=a5=bss at execution (as we have no data section here)
;assumes all data registers = 0 at execution

		output	.tos
		opt 	o-


		section	text

init:		moveq	#$20,d2			;horrendous super mode set
		move.w	d2,-(sp)			;we'll use that $20 in d2 later :)
		trap	#1	
		addq.w	#2,sp			;I wouldn't normally lower myself to a stack correction in sizecoding, but I had a spare word, so enjoy!
		
		move.w	d0,a0			;borrow the return word in d0 for the position of some lower memory generated code - seems to work on all TOS versions.....!
		move.w	#$2700,sr			;turn off all interrupts.  This effect doesn't even have a vbl...
		subq.w	#1,$ffff8200.w		;absolutely filthy way to get a clear screenbuffer.  Sets the screen address back $10000.
		
						;fuck we're only six instructions in and I've already done three risky hacks.......!!
		
;-------------------
;time to generate a sinewave - adapted from superogue's cool 14b sine generator
;the sinewave values are used to step through our big jump table (generated below) to create the ripple/reflection effect

sinegen:		move.l	a5,usp			;store the start of our wave in usp
		moveq	#127,d4		 
.loop:
			subq.w	#2,d4
			move.w	d3,d0
			asr.w	#7,d0		;divide our wave value by 128
			add.w	d0,d0		;double it (because our jump table is words)
			move.w	d0,(a5)+		;store +ve wave phase
			sub.w	d0,256-2(a5)	;store -ve wave phase
			add.w	d4,d3		
			bne.s	.loop

		lea	256(a5),a5		;this loop duplicates the wave (far too many times as it uses a dirty reg for the number of iterations, but who cares - saves some bytes)
.copyloop:		move.w	(a4)+,(a5)+
			dbf	d4,.copyloop	
	
		
;-------------------
;this code now generates eight different truecolour "line" subroutines in lower memory
;each line is just made up of a string of simple move.w dX,(a6) where a6 points to palette colour 0, and a different colour value will be in each register.
;there are sixteen of these in each subroutine, so plus call and rts each subroutine takes 40 nops when executed.
;the addresses of our subroutines (in lower memory, so words only) are stored in a big word aligned jump table

gencode:

		suba.w	a5,a5				;word align a5
		move.l	a5,d0				;keep in d0
		
		moveq	#8-1,d7				;for each of our eight line subroutines...
.gcl0:			moveq	#128-1,d6			;store the start address (lower mem, so .w) 128 times into our word aligned jump table
.adrl:				move.w	a0,(a5)+
				dbf	d6,.adrl

			move.w	#$3c82,d1			;this is the opcode for move.w d2,(a6)
			move.w	d7,d6
			bmi.s	.skip0
.gcil0:				move.w	d1,(a0)+		;write this a few times
				dbf	d6,.gcil0
				
.skip0:			moveq	#7-1,d6
			sub.w	d7,d6			;fill the rest of the 8 slots with opcodes incrementing up the data registers
			bmi.s	.skip1
.gcil1:				addq.w	#1,d1		;now our opcode is move.w d2+n,(a6)....
				move.w	d1,(a0)+
				dbf	d6,.gcil1
				
.skip1:			move.l	a0,a1
			moveq	#8-1,d6			;and now mirror what we've just done - so we have 16 opcodes in our "line", reflected down the middle!
.gcil2:				move.w	-(a1),(a0)+
				dbf	d6,.gcil2

			move.w	#$4e75,(a0)+		;pop an rts on the end
			dbf	d7,.gcl0			;next line
			
		move.l	a5,a6				;this loop duplicates but mirrors what we've just written in the jump table
		move.w	#128*8-1,d7			;so our jump table now has start address of line0 x 128, line1 x128.... up to line7x128, then line7x128,line6x128... to line0x128
.gccopl:			move.w	-(a6),(a5)+
			dbf	d7,.gccopl
		
.gccopl2:		move.w	(a6)+,(a5)+			;and now fill the whole jump table with this repeating pattern of 128x16 start addresses of our generated routines, up to the next word aligned address
		move.w	a5,d1
		bne.s	.gccopl2


;--------------------------------

		move.w	d2,(a5)			;a5 now points to clear memory straight after our jump table.  so let's use this space for some variables.
						;first, write 32 to (a5). this value will be used to control the different x position
						;of each of the two effect windows.

;Generate a palette - it's down in lower memory after the above generated code subroutines, but that's fine!
		
		move.l	a0,a4				;store our palette address in a4
		moveq	#7-1,d7				;we only need seven colours
.colloop:			addi.w	#$101,d5
			move.w	d5,(a0)+
			dbf	d7,.colloop
		
;--------------------------------
;the effect!

;This is essentially a version of a scanline selection effect. It's a baby version of tech that I used to death in "A Series Of Raster Effects".
;On each scanline, we:
;-- choose a line subroutine from the jump table
;-- call it three times to (nearly) fill the whole scanline with raster changes.

;Each generated subroutine (+jsr +rts) takes 40 nops.  We do three of them = 120 nops. The "choosing a line to call" part takes 5 nops, and the dbf is 3 nops
;This means each loop iteration takes 128 nops which is exactly one scanline - which means everything will be perfectly aligned on the screen.

;After a fixed number of scanlines, there are then two (well, nearly two) "blank" scanlines which are mainly just burnt with a wait loop.  However,
;these also do some variable setting and modify the colours, creating the visual of having two effect windows.
;The vertical scroll effect is produced by carefully choosing (a) the number of scanlines in each window and (b) the length
;of the wait loop to ensure that two effect windows take very slightly less screen time to execute than the 313x128 nops in a full vertical blank.
;This means there is a slight offset between each frame - creating the scroll effect. I ended up working this out by trial and error.  
;There is no vbl at all, and no synclocking!
;If you watch the effect frame by frame, you can see exactly what's happening as the solid background colour change offset position is obvious as
;it moves across the screen. (ew)

;This is our register usage:
;d0  = jump table.l
;d1  = loop

;d2  = $00000020
;d3  = palette.w
;d4  = "
;d5  = "
;d6  = "
;d7  = "
;a0  = "
;a1  = "

;a2  = temp
;a3  = sinewave
;a4  = generated palette
;a5  = clear varspace (.w = x offset; .w = start position into jump table; .l = counter for wave)
;a6  = $ffff8240.w
;a7  = stack, needed for jsr
;usp = start sinewave


effect:		lea	$ffff8240.w,a6

.mainloop:		

			moveq	#82-16-1,d1		;1	This loop burns around two scanlines between effect windows
			add.w	(a5),d1			;2	Add the x-offset to the length of the loop
			neg.w	(a5)+			;3	Negs so next time, the x-offset is subtracted from the length of the loop - so everything stays aligned
.waitloop:			dbf	d1,.waitloop	

			move.w	(a5),d0			;2	Set the start of the jump tab
			addi.w	#70,(a5)+			;4	And add to the offset so the next effect window starts in a different place

			move.l	usp,a3			;1	Move the very start of the sinewave to a3						
			move.l	(a5),d1			;3	So the wave only increments every other frame, we use a longword and swap
			adda.w	d1,a3			;2	whilst only adding the bottom word to our wave.
			addq.w	#2,d1			;1
			andi.w	#511,d1			;2	Wave has 256 steps so this makes it wrap.
			swap	d1			;1
			move.l	d1,(a5)			;3	

			movem.w	(a4)+,d3-d7/a0-a1		;10	Set the palette (that is, the registers that are then written to 8240.w)
			
			move.l	d0,a2			;1	Read the address of the first line subrout to jump to from the jump table, and move to the next point in the table by adding the next sine value.
			move.w	(a2),a2			;2	It's upsettingly repetitive as we also do these instructions in the mainloop
			add.w	(a3)+,d0			;2 	However, we need to do this here so we can more evenly space the three line subroutines across a scanline, and we need a jump address already in a2 when the loop starts!

			move.w	#154-1,d1			;2	This is the height of one effect window. The number is chosen to just fall short of 313 scanlines for both windows
.iloop:	
				;For every scanline....
				jsr	(a2)		;4+2*16+4 == 40	Call the line fragment routine in a2.

				move.l	d0,a2		;1	Grab the next line subroutine into a2 - see what I mean about even spacing?
				move.w	(a2),a2		;2	There's either 2 or 3 nops between each line fragment, meaning the diamonds look evenly spaced on screen.

				jsr	(a2)		;40

				add.w	(a3)+,d0		;2	Add the next sine value to our jump table position

				jsr	(a2)		;40
				
				dbf	d1,.iloop		;3	;Next scanline
			
			;Effect window now complete
			moveq	#7-1,d1			;1	This loop whips back through the generated palette and negs it, to change the colours for the next window!
.palloop:				neg.w	-(a4)		;4	It also puts a4 back to the start of the generated palette - ah sizecoding is so elegant...	
				dbf	d1,.palloop	;3 == 7x7+1 = 50
				
			swap	d2			;1	Swaps d2, changing the subsequent window background colour from $020 to black and then back again
			subq.w	#4,a5			;2	Reset our variable pointer back to the start of our variables.
			
			bra.s	.mainloop			;3	Loop forever........

		
;----------------------------------------------------------------------

		section	data
		
		
		
		section	bss
		
membase:		ds.b	30000