;"codegrinder" by Kuemmel for Outline 2018
;main algorithm based on a shader by kusma https://www.shadertoy.com/view/4d33RM
;and the discussion on pout here => http://www.pouet.net/topic.php?which=10564
org 100h
use16
push 0a000h - 70			;center x axis
aas							;part of correct aspect ratio => sizecoding.org
pop es
mov al,13h
int 10h

;---greyscale palette generation
mov dx,0x3c9
palette:  	
	mov al,cl
	shr al,2				;...as Intel doesn't like shrd ax,cx,18 => undefined result, but works on DOSBox
	out dx,al
	out dx,al
	out dx,al
loop palette

;---main intro routine	
fld1						;1
main_loop:
  mov ax,0xcccd  		
  mul di					;rrrola's trick => y=dh, x=dl
  sub dh,[si]				;center y axis
  pusha
  fild  word[bx-8]			;dh = y			|1		   
  fild  word[bx-9]			;dl = x			|y			|1	  
  fmul	dword[bx+si]		;correct aspect ratio => sizecoding.org
  fld   st1					;y		       	|x			|y	  |1	  
  fmul  st0,st0				;y*y			|x			|y	  |1  	
  fld   st1					;x				|y*y	   	|x	  |y   	|1	
  fmul  st0,st0				;x*x	     	|y*y	   	|x	  |y	|1
  faddp st1,st0				;x*x+y*y	   	|x		  	|y	  |1
  fmul dword[si-258+f]		;(x*x+y*y)/f	|x	  		|y	  |1
  fsubr st0,st3				;1-xx*xx+yy*yy 	|x			|y	  |1	
  fabs				
  fst   st4  				;t				|x			|y	  |1	|t
  fsqrt
  fsubr st0,st3				;1-e			|x		  	|y	  |1
  fadd  st0,st3				;2-e			|x		  	|y	  |1
  fmul  st1,st0				;2-e			|x*(2-e)	|y	  |1
  fmulp st2,st0				;x*(2-e)		|y*(2-e)	|1	
  fistp word[bx-4]			;new y at al (only highbyte is interesting)
  fistp word[bx-5]			;new x at ah (overwrite former lowbyte)
  popa
  mov bx,si					;the code itself is the random texture !
  add al,byte[bp+si]		;inc y_movement
  sub ah,byte[bp+si]		;inc x_movement
  and ax,0111111101111111b	;try to modify the range here
  xlatb						;get ty from ds:bx+al
  xchg ax,dx				;backup
  mov al,dh				
  xlatb						;get tx from ds:bx+al
  or al,dl					;combine x+y => add is good, or also, sub,xor,and not so
  xor ah,ah					;clear ah for word access
  xor bx,bx					;clear for correct addressing above and here
  shr ax,1					;works with 'or al,dl'
  push ax
  fild word[bx-4]			;c				|1			|t	
  fmul st0,st0				;c*c			|1			|t	
  fmul st0,st2				;c*c*t			|1			|t
  fsqrt						;sqrt(c*c*t)	|1			|t
  fistp word[bx-4]			;1				|t	
  pop ax
  neg al					;neg al gives some white particles near the seam :-) not al is also good but no particles
  stosb
loop main_loop
inc byte[bp+si]				;update global movement counter
in al,60h
  dec al
jnz main_loop
ret	
f dw 0x3120					;dword float constant for sphere size, first 2 bytes precision not needed
							;there is a similar number in code at +63, but shows more artifacts...so 2 Bytes more wasted ;-) 3160