	  BITS 32
	  GLOBAL _asmblurscreen
          GLOBAL _asmblurscreenrgb
          GLOBAL _asmblurscreenrgbmmx
	  GLOBAL _asmputpixel
	  GLOBAL _asmputpixel2
      	  GLOBAL _asmputpixel2MMX
          GLOBAL _asmcalcbuffer
          GLOBAL _asmclearbuffer
          GLOBAL _asmdrawblock
          GLOBAL _asmcopyfontschweif
	  GLOBAL _buffer
          GLOBAL _schweif
          GLOBAL _screen
          GLOBAL _logo32
          GLOBAL _mmxdetection
          GLOBAL _mmxdetected
          GLOBAL _lineimp
          GLOBAL _xtab
          GLOBAL _testpal

	  SECTION .text

;=================================================================================
;=================================================================================
_asmblurscreen:
	push    ebp
	mov     ebp,esp
        push    esi
        push    ecx
        push    eax

        mov     esi,320*4               ; esi = n = counter
        xor     eax,eax
        xor     ebx,ebx

@dablurloop:
        mov     bl, [_schweif+esi+319*4]
        mov     al, [_schweif+esi+321*4]
        add     ebx,eax
        mov     al, [_schweif+esi-319*4]
        add     ebx,eax
        mov     al, [_schweif+esi-321*4]
        add     ebx,eax

        cmp     ebx,10
        jb      @nedbiggerthan10
        lea     ebx,[ebx-10]
@nedbiggerthan10:
        test    ebx,ebx
        jz      @schonnull
        dec     ebx
        shr     ebx,2

        mov     [_schweif+esi  ],bl
        mov     [_schweif+esi+1],bl
        mov     [_schweif+esi+2],bl
        jmp     @weiter

@schonnull:
        mov     [_schweif+esi],ebx

@weiter:
        lea     esi,[esi+4]
        cmp     esi,199*320*4
        jb      @dablurloop

        pop     eax
        pop     ecx
        pop     esi
	mov     esp,ebp
	pop     ebp
	ret



;=================================================================================
;=================================================================================
_asmblurscreenrgb:
	push    ebp
	mov     ebp,esp
        push    esi
        push    ecx
        push    eax
        push    ebx
        push    edx

	mov     edx,[ebp+8]                     ;position
        mov     esi,320*4                       ;esi = n = counter
        xor     eax,eax

@dablurlooprgb:
        mov     ebx,[_schweif+esi]              ;wenn schon 0 ist weiter
        test    ebx,ebx
        jnz     @ouke
        jmp     @nextone                        ;evtl BUG weil wenn rot, nix mehr blur!
@ouke:
        xor     ebx,ebx
        mov     bl, [_schweif+esi+319*4]
        mov     al, [_schweif+esi+321*4]
        add     ebx,eax
        mov     al, [_schweif+esi-319*4]
        add     ebx,eax
        mov     al, [_schweif+esi-321*4]
        add     ebx,eax                         ;maximal ebx=4*127=508
        cmp     ebx,edx
        ja      @nedbiggerthan50
        xor     ebx,ebx
        jmp     @schonnullrgb
@nedbiggerthan50:
        sub     ebx,edx                    ;maximal ebx=508-50
        shr     ebx,2                           ;maximal ebx=114
@schonnullrgb:
        mov     [_schweif+esi],bl


        mov     bl, [_schweif+esi+319*4+1]
        mov     al, [_schweif+esi+321*4+1]
        add     ebx,eax
        mov     al, [_schweif+esi-319*4+1]
        add     ebx,eax
        mov     al, [_schweif+esi-321*4+1]
        add     ebx,eax
        cmp     ebx,edx
        ja      @nedbiggerthan501
        xor     ebx,ebx
        jmp     @schonnullrgb1
@nedbiggerthan501:
        sub     ebx,edx
        shr     ebx,2
@schonnullrgb1:
        mov     [_schweif+esi+1],bl


        mov     bl, [_schweif+esi+319*4+2]
        mov     al, [_schweif+esi+321*4+2]
        add     ebx,eax
        mov     al, [_schweif+esi-319*4+2]
        add     ebx,eax
        mov     al, [_schweif+esi-321*4+2]
        add     ebx,eax
        cmp     ebx,edx
        ja      @nedbiggerthan502
        xor     ebx,ebx
        jmp     @schonnullrgb2
@nedbiggerthan502:
        sub     ebx,edx
        shr     ebx,2
@schonnullrgb2:
        mov     [_schweif+esi+2],bl

@nextone:
        lea     esi,[esi+4]
        cmp     esi,199*320*4
        jae     @done
        jmp     @dablurlooprgb
@done:

        pop     edx
        pop     ebx
        pop     eax
        pop     ecx
        pop     esi
	mov     esp,ebp
	pop     ebp
	ret


;=================================================================================
;=================================================================================
_asmblurscreenrgbmmx:
	push    ebp
	mov     ebp,esp
        push    esi

        pxor    mm7,mm7                         ;mmxregister auf 0
        movd    mm6,[ebp+8]                     ;parameter in mm6 register

        mov     esi,320*4                         ;esi = n = counter

@mmxloop:
        movd    mm0,[_schweif+esi+319*4]        ;(l)mm0 = schweif {0000000011111111}
        movd    mm1,[_schweif+esi+321*4]
        movd    mm2,[_schweif+esi-319*4]
        movd    mm3,[_schweif+esi-321*4]

        punpcklbw mm0,mm7                       ;byte->word       {0101010101010101}
        punpcklbw mm1,mm7
        punpcklbw mm2,mm7
        punpcklbw mm3,mm7

        paddusw mm0,mm1                         ;unsig. saturation byte addition
        paddusw mm0,mm2
        paddusw mm0,mm3

        psrlw   mm0,2                           ; div 4
        packuswb mm0,mm7                        ; wieder zurckpacken
        psubusb mm0,mm6                         ; sub variable

        movd     [_schweif+esi],mm0

@wkrlfg:
        lea     esi,[esi+4]
        cmp     esi,199*320*4
        ja      @mmxend
        jmp     @mmxloop

@mmxend:
        emms                                    ;clear mmx

        pop     esi
	mov     esp,ebp
	pop     ebp
	ret

;=================================================================================
;=================================================================================
_asmputpixel2MMX:           ;(pos,r+,g,b)
	push    ebp
	mov     ebp,esp
        push    ebx

        mov     ebx,[ebp+8]

        movd    mm0,[_schweif+ebx]        ;4xbyte in buffer
;        movd    mm1,[ebp+12]              ;color pallette
        paddsb  mm0,[ebp+12];mm1          ;diese bytes mit pallette addieren (max 127)
        movd    [_schweif+ebx],mm0

        emms

        pop     ebx
	mov     esp,ebp
	pop     ebp
	ret

;=================================================================================
;=================================================================================
_asmputpixel2:           ;(pos,r+,g,b)
;void put_pixel(int x,int y,unsigned char r,unsigned char g,unsigned char b){
;long pos;
;pos=y*(X_RES<<2)+(x<<2);
;if ((buffer[pos  ]+r) <255)  buffer[pos  ]+=r;
;                         else buffer[pos  ]=255;
;if ((buffer[pos+1]+g) <255)  buffer[pos+1]+=g;
;                         else buffer[pos+1]=255;
;if ((buffer[pos+2]+b) <255)  buffer[pos+2]+=b;
;                         else buffer[pos+2]=255;
;}


	push    ebp
	mov     ebp,esp
        push    edi
        push    eax
        push    ebx
        push    edx

	mov     edi,[ebp+8]     ;position

        xor     eax,eax
        xor     ebx,ebx
        xor     edx,edx
        mov     al,[_schweif+edi]      ;r
        mov     bl,[_schweif+edi+1]    ;g
        mov     dl,[_schweif+edi+2]    ;b
        add     eax,[ebp+12]           ;r
        add     ebx,[ebp+16]           ;g
        add     edx,[ebp+20]           ;b

        cmp     eax,127                ;r
        jb      @_ok                   ;r
        mov     eax,127                ;r
@_ok:                                  ;r
        mov     [_schweif+edi],al      ;r


        cmp     ebx,127                ;g
        jb      @_ok1                  ;g
        mov     ebx,127                ;g
@_ok1:                                 ;g
        mov     [_schweif+edi+1],bl    ;g


        cmp     edx,127                ;b
        jb      @_ok2                  ;b
        mov     edx,127                ;b
@_ok2:                                 ;b
        mov     [_schweif+edi+2],dl    ;b

        pop     edx
        pop     ebx
        pop     eax
        pop     edi
	mov     esp,ebp
	pop     ebp
	ret



;=================================================================================
;=================================================================================
_asmputpixel:           ;(pos,r+,g,b)
	push    ebp
	mov     ebp,esp
        push    edi
        push    ebx
        push    eax
        push    edx

	mov     edi,[ebp+8]     ;position

        mov     eax,[ebp+12]           ;r
        mov     ebx,[ebp+16]           ;g
        mov     edx,[ebp+20]           ;b
;GEHHTTT NNNEEEEDDDDD!
        mov     [_schweif+edi],al       ;r
        mov     [_schweif+edi+1],bl     ;g
        mov     [_schweif+edi+2],dl     ;b

        pop     edx
        pop     eax
        pop     ebx
        pop     edi
	mov     esp,ebp
	pop     ebp
	ret


;=================================================================================
;=================================================================================
;y=0;
;for (x=0;x<320*200;x++){
;    buffer[y]=schweif[y]+logo32[y];
;    buffer[y+1]=schweif[y+1]+logo32[y+1];
;    buffer[y+2]=schweif[y+2]+logo32[y+2];
;    y+=4;
;}

_asmcalcbuffer:
	push    ebp
	mov     ebp,esp
        push    esi
        push    eax

        mov     esi,0     ;pointer
@calcloop:
        mov     eax,[_logo32+esi]
        add     eax,[_schweif+esi]
        mov     [_buffer+esi],eax
        add     esi,4
        cmp     esi,320*200*4
        jb      @calcloop

        pop     eax
        pop     esi
	mov     esp,ebp
	pop     ebp
	ret


;=================================================================================
;=================================================================================
_asmclearbuffer:
	push    ebp
	mov     ebp,esp
        push    ecx
;        push    eax
        push    edi

;        mov     eax,0ffffffffh
        mov     ecx,320*200
        xor     edi,edi
@clearit:
;       mov     [_buffer+edi],eax
        not     dword [_buffer+edi]
        lea     edi,[edi+4]
        dec     ecx
        jnz     @clearit

        pop     edi
;        pop     eax
        pop     ecx
	mov     esp,ebp
	pop     ebp
	ret



;=================================================================================
;=================================================================================
_asmdrawblock:
	push    ebp
	mov     ebp,esp
        push    edi
        push    eax
        push    esi

        mov     esi,[ebp+8] ;the TARGET
	mov     edi,[ebp+12]     ;position
        mov     eax,[ebp+16]    ;0x7f7f7f7f

;@draw_3*3_pixels:
       mov     [esi+edi],eax
       mov     [esi+edi+4],eax
       mov     [esi+edi+8],eax
       mov     [esi+edi+1280],eax
       mov     [esi+edi+1284],eax
       mov     [esi+edi+1288],eax
       mov     [esi+edi+2560],eax
       mov     [esi+edi+2564],eax
       mov     [esi+edi+2568],eax

        pop     esi
        pop     eax
        pop     edi
	mov     esp,ebp
	pop     ebp

	ret

;=================================================================================
;=================================================================================
_asmcopyfontschweif:
;for (x=0;x<320*200*4;x+=4){
; l=(long)screen[x];
;if (l!=0) (long)schweif[x]=l;
; l=(long)screen[x+1];
;if (l!=0) (long)schweif[x+1]=l;
; l=(long)screen[x+2];
;if (l!=0) (long)schweif[x+2]=l;
;}
;memset(screen,0,320*200*4);
	push    ebp
	mov     ebp,esp
        push    ebx
        push    eax

        mov     ebx,320*200*4
@cpybeg:
        mov     al,[_screen+ebx]
        test    eax,eax
        jz      @blackpixelr
        mov     [_schweif+ebx],al
@blackpixelr:
        mov     al,[_screen+ebx+1]
        test    eax,eax
        jz      @blackpixelg
        mov     [_schweif+ebx+1],al
@blackpixelg:
        mov     al,[_screen+ebx+2]
        test    eax,eax
        jz      @blackpixelb
        mov     [_schweif+ebx+2],al
@blackpixelb:

        mov     [_screen+ebx],dword 0   ;kill the buffer

        sub     ebx,4
        jnz     @cpybeg


        pop     eax
        pop     ebx
	mov     esp,ebp
	pop     ebp

	ret

;=================================================================================
;=================================================================================
_mmxdetection:
	push    ebp
	mov     ebp,esp
        push    ebx

        mov     [_mmxdetected],dword 0
        mov     eax,1
        cpuid
        test    edx,00800000h
        jz      @nommxav
        mov     [_mmxdetected],dword 1

 ;      mov     eax,65536
 ;      movd    MM0,eax                 ;eax in (l)mm0           = 65536
                                        ;psllx MM0,1 ; mm0=mm0*2 = 131072 | x={d/w}
                                        ;psllx MM0,2 ; mm0=mm0*4 = 262144
                                        ;psllx MM0,16; (l)MM0=0
;       psllw   MM0,16
;       psrlw   MM0,16

;        movd    MM0,eax
;       movd    [_mmxdetected],MM0
;       emms

@nommxav:
        pop     ebx
	mov     esp,ebp
	pop     ebp

	ret

;=================================================================================
;=================================================================================
_lineimp:
	push    ebp
	mov     ebp,esp
        push    ebx
        push    eax
        push    edi
        push    ecx

        mov     eax,[ebp+8]
        mov     [_steep],eax
        mov     eax,[ebp+12]
        mov     [_x],eax
        mov     eax,[ebp+16]
        mov     [_xx],eax
        mov     eax,[ebp+28]    ; color
        mov     [_c],eax
        mov     eax,[ebp+40]
        mov     [_y],eax
        mov     eax,[ebp+44]
        mov     [_yy],eax
        mov     eax,[ebp+48]
        mov     [_e],eax
        mov     eax,[ebp+52]
        mov     [_sx],eax
        mov     eax,[ebp+56]
        mov     [_sy],eax
        mov     eax,[ebp+60]
        mov     [_dx],eax
        mov     eax,[ebp+64]
        mov     [_dy],eax

;LOOP BEGIN
@100erloop:
        mov     ebx,[_c]
        shl     ebx,2                   ;richtiger offset fr pallette (pal = long)
        movd    mm1,[_testpal+ebx]

;unterscheidung welche richtung
        cmp     [_steep],dword 0
        jnz     @1st_version
        jmp     @2te_version
;==
@1st_version:
;clipping?
        mov     ebx,[_x]
        test    ebx,ebx
        jbe     @clipactive1
        cmp     ebx,199
        ja      @clipactive1

;position berechnen
        mov     ebx,[_x]
        mov     edi,[_y]
        shl     edi,2
        shl     ebx,2
        add     edi,[_xtab+ebx]

;mmxputpixel
        movd    mm0,[_schweif+edi]        ;4xbyte in buffer
        paddsb  mm0,mm1;mm1          ;diese bytes mit pallette addieren (max 127)
        movd    [_schweif+edi],mm0

@clipactive1:
;clipping?
        mov     ebx,[_xx]
        test    ebx,ebx
        jbe     @2te_version
        cmp     ebx,199
        ja      @2te_version

;position berechnen
        mov     ebx,[_xx]
        mov     edi,[_yy]
        shl     edi,2
        shl     ebx,2
        add     edi,[_xtab+ebx]

;mmx putpixel
        movd    mm0,[_schweif+edi]        ;4xbyte in buffer
        paddsb  mm0,mm1;mm1          ;diese bytes mit pallette addieren (max 127)
        movd    [_schweif+edi],mm0
        jmp     @endofput
;==
@2te_version:
        mov     ebx,[_y]
        test    ebx,ebx
        jbe     @clipactive2
        cmp     ebx,199
        ja      @clipactive2

;position berechnen
        mov     ebx,[_y]
        mov     edi,[_x]
        shl     edi,2
        shl     ebx,2
        add     edi,[_xtab+ebx]

;mmxputpixel
        movd    mm0,[_schweif+edi]        ;4xbyte in buffer
        paddsb  mm0,mm1;mm1          ;diese bytes mit pallette addieren (max 127)
        movd    [_schweif+edi],mm0
@clipactive2:

;clipping?
        mov     ebx,[_yy]
        test    ebx,ebx
        jbe     @endofput
        cmp     ebx,199
        ja      @endofput

;position berechnen
        mov     ebx,[_yy]
        mov     edi,[_xx]
        shl     edi,2
        shl     ebx,2
        add     edi,[_xtab+ebx]

;putpixel mit mmx
        movd    mm0,[_schweif+edi]        ;4xbyte in buffer
        paddsb  mm0,mm1;mm1          ;diese bytes mit pallette addieren (max 127)
        movd    [_schweif+edi],mm0
@endofput:

;     while(e >= 0){
;        y  += sy;
;        yy -= sy;
;        e  -= dx;
;     }
@whileloop:
        cmp     [_e], dword 3000                ;100=star (4)
        ja      @end_e_check
        mov     eax,[_sy]
        add     [_y],eax
        sub     [_yy],eax
        mov     eax,[_dx]
        sub     [_e],eax
        jmp     @whileloop
@end_e_check:

;     x  += sx;
;     xx -= sx;
;     e  += dy;
        mov     eax,[_sx]
        add     [_x],eax
        sub     [_xx],eax
        mov     eax,[_dy]
        add     [_e],eax

;     if (c>6) c-=6;
;     if (c>3) c-=3;
;     if (c>1) c-=1;
;     if (c<2)  break;/**/

;;nur am anfang in _C schreiben!!!!

        cmp     [_c], dword 6
        jb      @kleinerals6
        mov     eax,[_c]
        sub     eax,6
        mov     [_c],eax
@kleinerals6:

        cmp     [_c], dword 3
        jb      @kleinerals3
        mov     eax,[_c]
        sub     eax,3
        mov     [_c],eax

@kleinerals3:
        mov     eax,[_c]
        cmp     eax, 1
        jbe     @outahere
        cmp     eax,1000
        ja      @outahere
        dec     eax
        mov     [_c],eax

        jmp     @100erloop      ;abbruchbedingung=color =< 0

@outahere:
        emms

        pop     ecx
        pop     edi
        pop     eax
        pop     ebx
	mov     esp,ebp
	pop     ebp

	ret


	  SECTION .data
	  SECTION .bss

_buffer      resb 320*200*4
_schweif     resb 320*200*4
_screen      resb 320*200*4
_logo32      resb 320*200*4
_mmxdetected resd 1
_dwvar2      resd 1
_mmxdata1    resq 1
_xtab        resd 200
_testpal     resd 256
_steep       resd 1
_x           resd 1
_xx          resd 1
_y           resd 1
_yy          resd 1
_color       resd 1
_e           resd 1
_sx          resd 1
_sy          resd 1
_dx          resd 1
_dy          resd 1
_c           resd 1


