ON ERROR: ON ERROR OFF: PRINT REPORT$;" at line ";ERL:END

DIM code% 4096

P%=code%:[OPT 0:dcd 1:dcd 1920:dcd 1080:dcd 5:dcd -1:dcd -1:]
SYS "OS_ScreenMode", 0, code%

PROCassemble("", FALSE)

SYS "OS_ReadMonotonicTime" TO start_time%
num_frames% = USR code%
SYS "OS_ReadMonotonicTime" TO end_time%

PRINT "fps: " num_frames% / (end_time% - start_time%) * 100

PROCassemble("Elsecaller", FALSE)
PROCassemble("Elsecaller- 800x600", TRUE)

END

DEF PROCassemble(filename$, fixed_mode%)
  texture_shift% = 18
  IF fixed_mode% THEN
    texture_shift% = 17
  ENDIF
  FOR pass%=0 TO 2 STEP 2
    P% = code%
    [OPT pass%
  mov r0, #(1<<1) + (1<<31)
  mov r1, #32
  mov r2, #0
  swi "VFPSupport_CreateContext"

  adr r1, constants
  vld4.16 {d1[], d2[], d3[], d4[]}, [r1]!

    ]
    IF fixed_mode% THEN
      [OPT pass%
  mov r0, #15
  swi "OS_ScreenMode"
      ]
    ENDIF
    [OPT pass%

  adr r0, vdu
  swi "OS_ReadVduVariables"

    ]
    IF filename$ = "" THEN
      [OPT pass%: mov r12, #0: ]
    ENDIF
    [OPT pass%

  ; the first time into the mainloop, r2 is stil 0 from VFPSupport_CreateContext
  ; above
.mainloop
  ; dec y counter. whenever this reaches 0 (or below), initialize a new frame
  subs r2, r2, #1
    ]
    IF filename$ = "" THEN [OPT pass%: addle r12, r12, #1: ]
    IF fixed_mode% THEN
      [OPT pass%
  ldmleia r1, {r3, r6}
  movle r2, #600
  swile "OS_ReadMonotonicTime"
  vmov d7, r0, r0
  vshl.u32 d6, d7, #1
  sub r8, r2, #300
  vmov d9, r8, r8
  mov r7, #800
.xloop
  sub r8, r7, #400
  add r9, r8, #1
  vmov d8, r9, r8
      ]
    ELSE
      [OPT pass%
  ldmleia r1, {r3, r4, r5, r6}
  addle r2, r5, #1
  swile "OS_ReadMonotonicTime"
  vmov d7, r0, r0
  vshl.u32 d6, d7, #1
  sub r8, r2, r5, lsr #1
  vmov d9, r8, r8
  add r7, r4, #1
.xloop
  sub r8, r7, r4, lsr #1
  add r9, r8, #1
  vmov d8, r9, r8
      ]
    ENDIF
    [OPT pass%

; d1-d4  constants
; d6/d7  time
; d8/d9  screen pos (int)
; d10/d11  screen pos (float)
; d12  1/len
; d13  angle
; d16  smaller side with sign for quadrant
; d18  depth fade factor (temp)
; d22  depth fade factor
; d23  eor texture
; d26  final pixel
; d27  0
; see TunnelLinearSrc for a more readable version of the per pixel code
; this is folded 4 times to avoid stalls due to data dependencies

      vadd.s32 q10, q10, q3
        vmul.u32 q11, q11, d1[0]
    vsri.32 d17, d27, #1
    vmin.f32 d16, d14, d15
  vcvt.f32.s32 q5, q4
        vmull.u8 q12, d23, d4
      veor d23, d20, d21
    veor d16, d16, d17
  vmul.f32 q6, q5, q5
        vqshrn.u16 d26, q12, #6
      vsli.u32 d23, d27, #8
    vmul.f32 d5, d28, d16
  vadd.f32 d12, d12, d13
        vqsub.u8 q13, q13, q1
      vmin.f32 d30, d18, d3
    vmul.f32 d29, d5, d5
  vabs.f32 q7, q5
        vqabs.s8 d26, d26
      vsub.f32 d30, d3, d30
    vmul.f32 q9, q14, q2
  vacgt.f32 d16, d10, d11
        vmull.u8 q10, d26, d22
      vmul.f32 d30, d30, d30
    vfma.f32 d5, d19, d3
  veor d17, d10, d11
        vqshrn.u16 d26, q10, #6
      vcvt.s32.f32 d22, d30, #12
    vmul.f32 d29, d5, d2
  veor d17, d17, d16
        ; shift r6 (initialized to -1 each frame) until it is zero
        ; only start storing pixels once it has reached zero
        ; (since a pixel needs 4 iteration through the loop before it is done)
        movs r6, r6, lsl#10
        vstmiaeq r3!, {d26}
  subs r7, r7 ,#2
    vcvt.s32.f32 q10, q14, #texture_shift%
  vrsqrte.f32 d28, d12

  bgt xloop

    ]
    IF filename$ = "" THEN
      [OPT pass%
  swi "OS_ReadEscapeState"
  bcc mainloop
  mov r0, #124
  swi "OS_Byte"
  mov r0, r12
  mov pc, r14
      ]
    ELSE
      IF fixed_mode% THEN
        [OPT pass%
  b mainloop
        ]
      ELSE
        [OPT pass%
  swi "OS_ReadEscapeState"
  bcc mainloop
  swi &10a
        ]
      ENDIF
    ENDIF
    [OPT pass%

.constants
    ]
    IF fixed_mode% THEN
      [OPT pass%
  dcd &101 OR (FNf16(4/PI/512) << 16)
  dcd FNf16(0.2215) OR (&413e << 16)
.mode
  equs "32 C16M": dcb 0
.vdu
  dcd 148: dcd -1
      ]
    ELSE
      [OPT pass%
  dcd &101 OR (FNf16(2/PI/512) << 16)
  dcd FNf16(0.2215) OR (&4176 << 16)
.vdu
  dcd 148: dcd 11: dcd 12: dcd -1
      ]
    ENDIF
  NEXT

  IF filename$ <> "" THEN
    SYS "XOS_ReadVarVal", "Tunnel256$Dir", 0, -1, 0, 0 TO ,,length%
    IF length% < -1 THEN
      SYS "OS_File", 10, "<Tunnel256$Dir>." + filename$, &ff8, 0, code%, P%
      PRINT "Saved " filename$
    ENDIF
    PRINT filename$ ": " P% - code% " bytes"
  ENDIF
ENDPROC

DEF FNf16(value)
  value% = FNf32(value)
=(value% >> 16) + ((value% >> 15) AND 1)

DEF FNf32(value)
  IF value = 0 THEN =0
  sign = SGN value
  value = value * sign
  exp% = INT(LN value / LN 2)
  mant = value / EXP(exp% * LN 2)
  value% = (mant * &800000) AND &7fffff
  value% = value% + (((exp% + &7f) AND &ff) << 23)
  IF sign < 0 THEN value% = value% OR &80000000
=value%
