// [ Chaos Equations ] intro for ARM / RISC OS
// (c) 2021 Jin X (t.me/jinxonik, jin_x@list.ru)

// Bonus 1152x896/24bpp version with fade

//// SETTINGS //////////////////////////////////////////////////////////////////////////////////////////////////////////

way_length	=	256
star_count	=	512

//// CODE //////////////////////////////////////////////////////////////////////////////////////////////////////////////

.include "riscos_swi.inc"

.syntax unified
.thumb

		// Set screen mode
		movs	r0,15			// reason code to set screen mode by string
		adr	r1,screen_mode		// screen mode string
		swi	OS_ScreenMode		// set screen mode
		swi	OS_RemoveCursors	// remove cursor

		adr	r0,screen_address
		movs	r1,r0			// write = read address
		swi	OS_ReadVduVariables	// screen address at screen_address
		ldr	r5,[r1]			// r5 = screen address

		// Create VFP context
		movs	r1,32
		lsls	r0,r1,26
		adds	r0,3			// r0 = 0x80000003
		movs	r2,0
		movw	r10,VFPSupport_CreateContext & 0xFFFF
		movt	r10,VFPSupport_CreateContext >> 16 // r10 = SWI number
		swi	OS_CallASWI		// call SWI number r10 (used for values > 0xFF)

		// Clear screen buffer
		adr	r4,screen_buffer	// r4 = screen buffer address
		movs	r0,1152*896*4		// screen size in bytes
	clear_buffer:
		subs	r0,4
		str	r2,[r4,r0]		// clear screen buffer
		bcs	clear_buffer		// loop for whole screen

		// Other preparations
		swi	OS_ReadMonotonicTime	// number of centiseconds since the last hard reset
		movs	r6,r0			// r6 = random seed
		ldr	r7,rng_factor		// r7 = RNG factor

		adr	r3,coefs		// r3 = coefficient array
		vmov	s9,3.0			// s9 = tmax = 3.0

new_equation:	// New equation
		adds	r1,r3,11*4		// 11+1 coefs
	next_coef:
		muls	r6,r7
		adds	r6,1			// r6 = r6 * r7 + 1 = random number
		vmov	s0,r6
		vcvt.f32.s32 s0,s0,31		// convert int to float and scale to range -1..1
		vstr	s0,[r1]			// store value to coefs
		subs	r1,4
		cmp	r1,r3
		bhs	next_coef		// loop 12 times

		vmov	s6,-3.0			// s6 = t0 = -3.0

mainloop:	// Calculations
		movs	r9,way_length
	next_step:
		vldr	s8,dt_large		// s8 = dt (large step)

		vmov	s2,s6			// s2 = x = t
		vmov	s3,s6			// s3 = y = t
		vmul.f32 s7,s6,s6		// s7 = t*t

		movs	r2,r6			// r2 = color (random number)
		movs	r8,star_count
	next_star:
		muls	r2,r7
		adds	r2,1			// r0 = r0 * r7 + 1 = color (random number)

		vmul.f32 d2,d1,d1		// s4 = s2*s2 = x*x, s5 = s3*s3 = y*y
		vmov.f32 d0,0.0			// s0, s1 = x', y' = 0
		mov	r0,r3			// coefficient array
	calc_again:
		vldmia	r0!,{d5,d6,d7}		// s10, s11 = coefs1, s12, s13 = coefs2, s14, s15 = coefs3
		vmla.f32 d0,d1,d5		// s0, s1 += s2, s3 * s10, s11 (x', y' | y', x' += x, y * coefs1)
		vmla.f32 d0,d2,d6		// s0, s1 += s4, s5 * s12, s13 (x', y' | y', x' += x*x, y*y * coefs2)
		vmla.f32 d0,d3,d7		// s0, s1 += s6, s7 * s14, s15 (x', y' | y', x' += t, t*t * coefs3)
		vmov	r11,r10,s0,s1
		vmov	s0,s1,r10,r11		// swap s0 <--> s1 (s0 = y', s1 = x' | s0 = x', s1 = y')
		mvns	r3,r3
		bmi	calc_again		// repeat 1 more time for swapped x, y

		vmov	d1,d0			// s2, s3 = s0, s1 (x, y = x', y')
		vcvt.s32.f32 d0,d1,9		// convert floats to ints (s2, s3 --> s0, s1) and multiply 512x

		vmov	r0,r1,s0,s1		// r0, r1 = x_int, y_int
		cbnz	r0,x_ok
		cbz	r1,off_screen		// skip if r0 == r1 == 0 (wrong value)
	x_ok:
		adds	r0,1152/2		// center x
		cmp	r0,1152
		bhs	off_screen

		rsbs	r1,896/2		// center y
		cmp	r1,896
		bhs	off_screen

		adds	r0,r0,r1,lsl 10
		adds	r0,r0,r1,lsl 7		// r0 = r0 + r1 * 1152 (x_int + y_int * 1152)
		str	r2,[r4,r0,lsl 2]	// put pixel

		vldr	s8,dt_small		// s8 = dt (small step)
	off_screen:
		subs	r8,1
		bne	next_star		// loop star_count times

		vadd.f32 s6,s8			// t += dt
		vcmp.f32 s6,s9			// t >= tmax?
		vmrs	APSR_nzcv,FPSCR
		bhs	new_equation		// yes, new equation

		subs	r9,1
		bne	next_step		// loop way_length times

		// Fade out
		ldr	r1,fade_out_sub		// r1 = fade out speed value
		movs	r2,1152*896*4		// screen size in bytes
fade_out:
		subs	r2,4
		ldr	r0,[r4,r2]		// read pixel
		str	r0,[r5,r2]		// store pixel to screen
		cbz	r0,fade_skip		// skip fade out for black color
		uqsub8	r0,r0,r1		// fade out each color component (red, green, blue)
		str	r0,[r4,r2]		// store pixel to screen buffer
	fade_skip:
		bcs	fade_out		// loop for whole screen

		// Keyboard check and exit
		swi	OS_ReadEscapeState	// is ESC pressed?
		bcc	mainloop

		swi	OS_Exit			// yes, exit to OS

//// DATA //////////////////////////////////////////////////////////////////////////////////////////////////////////////

.align	2					// data align (by word = 4 bytes)

dt_large:
.single	0.0002					// high delta t (for off-screen pixels)

dt_small:
.single	0.00002					// low delta t (for on-screen pixels)

fade_out_sub:
.word	0xFF040404				// fade out speed value

coefs:						// equation coefficients

rng_factor:
.word	0x8088405				// RNG factor

screen_address:
.word	148					// input block to read screen address
.word	-1					// request block terminator

screen_mode:
.string	"23 C16M"				// screen size and number of colors

screen_buffer = coefs + 12*4			// screen buffer address
