; 386POWER is part of the public domain portion of the XGE system
; XGE == eXtended Game Engine toolkit
; See 386power.txt and history.txt for more info & credits

        .386p
        
; respect this segment order or you'll get troubles
NOSMART
NOJUMPS

code16  segment para public use16
code16  ends
code32  segment para public use32
code32  ends
codeend segment page stack use32 'stack'
codeend ends

; Here follows the dos extender identifiers in "implementation order"
IS_VCPI  = 0
IS_DPMI  = 1
IS_XMS   = 2
IS_HARD  = 3

LOWMIN           =128      ; minimum free low memory (in K)
EXTMIN           =512      ; minimum free extended memory (in K)

STACKSIZE        =0FFFh ; TOTAL AVAILABLE stack size in paragraphs
STACKUSER        =0400h ; PROT. MODE MAIN STACK space in paragraphs

STACKSWTR        =0080h  ; STACK SIZE (in paragraphs ) available in real mode
                         ; after a mode switch from prot. mode
STACKSWTP        =0080h  ; STACK SIZE (in paragraphs) available in prot. mode
                         ; after a mode switch from real mode

SWITCHSTACK    =0010h  ; STACK SIZE (in paragraphs) available in real mode
                       ; for "int 31h" direct switches under DPMI
                       ; and when reflecting irqs
CR = 13
LF = 10

;=======================================
; Real mode and 16bit 386 code
;=======================================


code16  segment para public use16
        assume cs:code16, ds:code32, fs:code32
        align byte
                  
typemsg:  ;writes a message to consolle
        pushf
        push ds
        push es
        push fs
        push gs
        pushad
        mov ah,09
        int 21h
        popad
        pop gs
        pop fs
        pop es
        pop ds
        popf
        ret
        
; this macro is used to produce debug messages while in V86 mode
say macro t_item
        push edx
        mov edx,offset t_item
        call typemsg
        pop edx
        endm        
        
; d16ttty calls the dos function int 21h, ah=09
; under DPMI once the critical segments are set up 

d16tty:
        ; DPMI teletyper

        pushad
        pushfd
        push ds
        push es
        push fs
        push gs

        push ds
        pop  es

        mov eax,switchstackaddr
        mov V86ds,code32
        mov V86edx,edx
        mov V86ah,9
        mov bx,0021h        ; call int 21h
        mov tempstack,eax   ; set stack for direct DPMI calls
        mov cx,0
        mov edi, offset V86edi
        mov ax,300h
        int 31h
        xor eax,eax       ; set back dpmi stack to default
        mov tempstack,eax ; "use small DPMI stack" value

        pop gs
        pop fs
        pop es
        pop ds
        popfd
        popad
        ret
        
psay macro d_item
        push edx
        mov  edx, offset d_item
        call d16tty
        pop edx
        endm

DOSRAMTOP = 0002
        align byte ; we need byte align to "inline" the CPUID instruction
Boot16: ; DOS EXTENDER starts here and kicks program into 386 protected mode

        mov ax,code32   ;
        mov ds,ax       ; program data is into the 32bit segment

        cld ; DEFAULT INCREMENT DIRECTION IS UP!!!!!!
        
        mov ax,0003 ; set 80x25 text mode for text output
        int 10h     ;
        
        ; show who holds the copyrights etc. etc.
        mov dx,small offset _386power_info
        mov ah,09
        int 21h

        ;========================================================
        ; Detect if current processor is a 386 or more
        ; checking what happens to the flags when pushed on stack
        ; or when special flags are flipped
        ; CHECK CPU TYPE
        ; (N.B. nothing is checked about Floating Point Unit capabilities!!!!)

        mov cx,0F000h  ; CL processor type 0 (8086) in case of exit
                       ; CH = mask for flags

        pushf  ; copy FLAGS to BX
        pop bx ;

        mov ax,bx  ; try to clear high 4 bits of FLAGS
        and ah,0fh ;
        push ax    ;
        popf       ;
        pushf      ;
        pop ax     ; result into ax

        and ah,ch             ; check high 4 bits
        cmp ah,ch             ;
        je short ccheckdone ; if bits are set, CPU is 8086/8

        mov cl,2 ; 286 ?

        or bh,ch ; try to set high 4 bits of FLAGS
        push bx  ;
        popf     ;
        pushf    ;
        pop ax   ;

        and ah,ch           ; check high 4 bits
        jz short ccheckdone ; if bits are not set, CPU is 80286

        inc cl  ; 386dx/sx

        pushfd   ; copy EFLAGS to EBX
        pop ebx  ;

        mov eax,ebx     ; try to flip AC bit in EFLAGS
        xor eax,40000h  ;
        push eax        ;
        popfd           ;
        pushfd          ;
        pop eax         ;
        xor eax,ebx         ; AC bit fliped?
        jz short sccheckdone; if no, CPU is 386

        inc cl ; 486dx/sx

        mov eax,ebx      ; try to flip ID bit in EFLAGS
        xor eax,200000h  ;
        push eax         ;
        popfd            ;
        pushfd           ;
        pop eax          ;

        xor eax,ebx         ; ID bit fliped?
        jz short sccheckdone; if no, CPU is an "old" 486
        ; this is a Pentium or a "recent" Intel CPU with
        ; CPUID support

        push ecx ;save counter
        xor eax,eax ; eax=0 --> "check for CPUID and gimme vendor info"
        nop ; a nop is a good thing to avoid undocumented bugs
        db 0Fh,0A2h ; CPUID OPCODE
        nop
        pop ecx ; restore
        cmp eax,1 ; if eax==1 then this is the "real" CPUID
        jne short sccheckdone ; Bah! it was just a "not CPUID" thing
        nop
        ; eax = 1 , "hey CPUID! Gimme processor info"
        db 0Fh,0A2h
        nop
        and ah,0Fh ; select "processor family" info
        mov cl,ah ; set CL = "processor family" code
sccheckdone:
        push ebx  ; restore flags as found at start of test
        popfd     ;
ccheckdone:
        mov _CPUPower,cl
        cmp cl,3  ; at least a 386 ?
        jnb Is386 ;
NotA386: ; if some higher bits are zeroed this is a 286
        xor ch,ch
        mov bx, offset msg_cputype
        add cx,cx
        add cx,cx
        add bx,cx
        mov dx,[bx]
        jmp exit16err

Is386:  ; this is a 386 or a 486 or a Pentium
        and ecx,0FFh
        mov edx,[ecx*4+msg_cputype]
        call typemsg
        ;=======================================================
        ; now we can use all the features a 386 has

        cld    ; default direction is down
        pushf         ; Set current V86 virtual Flag register
        pop ax        ;
        mov V86F,ax   ;

        ;==============================================================
        ; save current interrupt table
        say msg_gints
        ; copy old ints to _OldInt table before kicking into prot. mode
        mov si,offset _OldInt
        mov ax,03500h ; get interrupt vectors
        push es
getints:
        int 21h
        mov [si],bx
        mov [si+2],es
        add si,4
        inc al
        jnz getints
        pop es
        
        cli ; turn off interrupts, while initializing we don't want
            ; other things around

        ;===========================================================
        ; Now it's time to convert pointers from
        ; seg:ofs to LINEAR and/or CODE32 OFFSETS

        xor eax,eax      ;
        mov ax,es        ; PSP LINEAR OFFSET
        shl eax,4        ;
        mov _PSPBase,eax ;


        mov eax,code16      ; CODE16 LINEAR OFFSET
        shl eax,4           ;
        mov _Code16Base,eax ;
        or dword ptr baseGDTcode16,eax ; SET UP code16 GDT entry
        or dword ptr baseGDTdata16,eax ; SET UP data16 GTD entry

        mov eax,code32      ; CODE32 LINEAR OFFSET
        shl eax,4           ;
        mov _Code32Base,eax ;
        or dword ptr baseGDTcode32,eax ; SET UP code32
        or dword ptr baseGDTdata32,eax ; and data32 GTD entries
        
        add dword ptr bases16_GDTaddr,eax ; SET UP GTD descriptor
        
        mov ebx,codeend            ; eax == code32  linear address
        shl ebx,4                  ; ebx == codeend linear address
        sub ebx,eax                ; ebx == codeend offset from code32
        mov _LoMemBase,ebx         ; set _LoMemBase ( it grows up   )
                                   ; at stack segment

        mov Stack32BaseOffset,ebx ; set Stack32BaseOffset
                                  ; stack base position
                                  ; as code32 relative offset

        xor edx,edx                 ;
        mov dx,es:[DOSRAMTOP]       ; get low memory size in paragraphs
        shl edx,4                   ; paragraphs to bytes
        sub edx,eax                 ; code32 relative offset
        mov _LoMemTop,edx           ; set _LoMemTop
        sub edx,ebx ; get memory between _LoMemBase and _LoMemTop
                    ; (stack size included)
        mov dx,offset em1_no_lomem
        cmp edx,(LOWMIN*1024)
        jnb short ramright
        jmp exit16err
ramright:
        
        mov eax,((1+STACKSIZE)*16) ; set up stack frame with an extra paragraph
        call InitAlloc             ; (initial _LoMemBase was at stack segment
                                   ;  NOW will be after the stack)

        ; allocate mode switch private stack
        mov eax,((1+SWITCHSTACK)*16)
        call InitAlloc
        add eax,15 ; round to paragraph
        shr eax,4  ;
        add eax,code32     ; get real mode segment address
        shl eax,16                  ; get COMPLETE
        mov ax,(SWITCHSTACK*16)   ; real mode FAR pointer
        mov switchstackaddr,eax ; SS:SP of real mode stack to use when
                           ; calling real mode from dpmi during "d16tty"


        push es ; save PSP seg (DPMI test may kill ES)

        ; HERE the possible "dos-extender interface" hookers diverge

        say msg_DPMItest
        ; now TEST FOR DPMI
	mov ax,1687h   ; Get Real-to-Protected-Mode Switch entry point:
		       ;
		       ; The entry point you get must be called ONLY ONCE
		       ; FOR THE FIRST SWITCH to DPMI protected mode.
		       ; Input:
		       ;	AX = 1687h
		       ; Output:
		       ;	AX = 0 if function successfull
		       ;        BX = DPMI flags
                       ;             bit 0:    0 = 16bit program support only
                       ;                       1 = 32bit program support
		       ;             bit 1..15 NOT USED
		       ;        CL = processor type 02h= 80286
		       ;		            03h= 80386
		       ;                            04h= 80486
		       ;                            05h= Pentium ?
		       ;                            06h..0FFh = not used
		       ;        DH = DPMI version major number (binary)
		       ;        DL = DPMI version minor number (binary)
		       ;           [386power fully supports DPMI 0.9 and 1.0]
		       ;        SI = number of paragraphs required for
		       ;	     DPMI host private data (it may be 0)
		       ;        ES:DI = segment:offset of procedure to call
		       ;                to enter protected mode

	int 2fh        ;
	or ax,ax       ;
	jz StartDPMI   ; Go DPMI if present
        
        say msg_VCPItest
        ; Not DPMI, try VCPI.
        ; Because of the new EMM386 from DOS 6.0 and above
        ; we have to test for VCPI services only
        ; (on old EMM386 versions this may spell troubles if EMS support
        ;  is not enabled)

        pop  es ;  restore es for safety

        xor eax,eax
        mov gs,ax ; gs == segment zero
        cmp eax,gs:(4*67h) ; NULL vector ?
        je short NotIntoVCPI
        mov ax,0de00h       ;
        int 67h             ;  Call VCPI INSTALLATION CHECK
        or ah,ah            ;
        jnz short NotIntoVCPI   ; if (ah != 0) it isn't VCPI
        jmp StartVCPI
NotIntoVCPI:
        pushfd
        pop eax
        test eax,20000h
        jz try_xms  ; if not into V86 mode, try real mode XMS startup
        ; No other VMM is supported
        mov dx,offset em2_unkV86
        jmp short exit16err
try_xms:
        ; now check for XMS (this is the best environment
        ; you can use, good hardware indipendence, irq reflection
        ; and maximum speed)
        ; set XMS/HARD "switch to real mode" functions
        mov _ExecINT,offset s32_ExecINT
        mov _ExecReal,offset s32_ExecReal
        say msg_XMStest
        mov ax,4300h
        int 2Fh
        cmp al,80h
        jne NotIntoXMS
        jmp StartXMS
NotIntoXMS:
        ; Aww! We have to try to run on basic hardware and bios.
        ; Maybe we'll hang up if this system is not enough compatible
        say msg_HARDtest
        jmp StartHARD
        
;------------------------------------------------------------------------------

InitAlloc:  
        ; Allocate low memory or die. (called from initialization code)
	; EAX=space to allocate
        push ebx
        add eax,_LoMemBase
        mov ebx,_LoMemTop
        cmp ebx,eax            ; gone above _LoMemTop ?
        jb short noinitmem     ;
        sub  ebx,eax
        xchg eax,_LoMemBase
        cmp ebx,LOWMIN*1024  ; gone above lower memory limit of
        jb short noinitmem   ; free memory reserved for program data ?
        pop ebx
        ret
noinitmem:
        mov dx,offset emINI_no_lomem
        ; join error report & exit routine
exit16err:
        mov ah,09       ; Tell what's gone wrong and get out
	int 21h

        say emX_stderr
        jmp ds:exitrout

exit:   ; Good Old terminate program function (default exitrout)
        ; for real mode termination or the final VCPI shutdown
        ; (DPMI shutdown goes thru a different route)
        ; restore interrupts before getting out
        say msg_rints
        ; now restore interrupts
        mov si,offset _OldInt
        mov ax,02500h
        push ds
        pop es
setints:
        mov dx,es:[si]
        mov ds,es:[si+2]
        add si,4
        int 21h
        inc al
        jnz setints
        push es
        pop ds
        sti
        say msg_end
        mov ax,4C00h ; terminate this program
	int 21h

; 16 bit common system code starts here

; INTREAL  & CALLREAL
; temporary INT to real mode OR temporary CALL FAR to real mode.
; CALLS TO INTREAL OR CALLREAL ARE PERFORMED FROM CODE32 SWITCHPOINTS
; SO WHEN THE CPU GETS HERE THE MODE SWITCH HAS ALREADY BEEN DONE.
        align byte

intreal:; temporary far call to real mode WITH INTERRUPT STACK FRAME
        ; tempaddr contains the far address of the interrupt to call
        ; SIMULATE INTERRUPT STACK FRAME
        pushf                 ; FLAGS
callreal:
        ; temporary far call to real mode , NOW in 16bit mode
        ; SIMULATE CALL FAR STACK FRAME with INTERRUPTS DISABLED
        ; tempaddr contains the far address of the routine to call
        push cs                 ; CS
        push (offset exec_done) ; IP

        push dword ptr tempaddr ;address to call

        ; load virtual registers and jump to real-mode code to execute
        mov fs,V86fs         ; load Vregs
        mov gs,V86gs         ;
        mov es,V86es         ;
        ; ss:esp      set by mode switch code
        mov eax,V86eax       ;
        mov ecx,V86ecx       ;
        mov edx,V86edx       ;
        mov ebx,V86ebx       ;
        mov esi,V86esi       ;
        mov edi,V86edi       ;
        mov ebp,V86ebp       ;
        mov ds,V86ds
        retf ; jmp to tempaddr
exec_done: ; done with real mode interrupt/call
        cli
        push eax
        push ds
        mov ax,code32
        mov ds,ax  ; save returned ds segment
        pop ax
        mov V86ds,ax
        pop V86eax
        pushf    ; save flags
        pop V86F ;
        mov V86ecx,ecx
        mov V86edx,edx
        mov V86ebx,ebx
        mov V86esi,esi
        mov V86edi,edi
        mov V86ebp,ebp
        mov V86es,es
        mov V86fs,fs
        mov V86gs,gs
IREAL_TERMINATOR:
        jmp word ptr ds:[s16_irToSYS32]

; here follows the two possible "switchpoints" pointed by s16_irToSYS32

irToVCPI32:  ; VCPI return to 386P
        mov v16_sw_dest,offset v32_exec_d ; jump to end of 386P INT32/33
        mov esi,v16_VCPIsys               ; system data for mode switch
        mov ax,0de0ch   ; SWITCH TO PROTECTED MODE thru VCPI
        int 67h         ;
        
irToDPMI32: ; DPMI return to 386P
        mov ebx,s32_SavStackOfs ;
        mov dx,s32_SavStackSel  ; get 386P stack in dx:ebx
        mov cx,dx                  ; cx  = current stack seg. (same of 386P)
        mov si,_SelCode            ; si  = new code sel
        mov edi,offset d32_16done  ; edi = new code offset
        mov ax,_SelData            ; ax  = new data sel
        jmp ds:d16_Fast16_To_32       ; mode switch routine

irToHARD32:
        mov esi,08h                 ; _SelCode
        mov edi,offset s32_exec_d   ; s32_exec_d
        jmp HARD_R2P

;-----------------------------------------------------------------------------
; real mode "foot" of VCPI irq-from-protected-mode reflector
;-----------------------------------------------------------------------------
        align byte
v16_irqreal:
        ; VCPI real mode IRQ from 32bit prot. mode
        ; fast irq response without copying virtual registers
        ; ebp= ISR to call
        cli
        pushf                              ;  iret stack frame
        push cs                            ;
        push (small offset irqrealToVCPI32);
        push ebp ; address of real mode isr
        retf
irqrealToVCPI32:  ; VCPI return to 386P
        mov v16_sw_dest,offset v32_irqret ; jump to end of irq reflection
        mov esi,v16_VCPIsys   ; system data for mode switch
        mov ax,0de0ch   ; SWITCH TO PROTECTED MODE thru VCPI
        int 67h         ;

;-----------------------------------------------------------------------------
; real mode "foot" of HARD/XMS irq-from-protected-mode reflector
;-----------------------------------------------------------------------------
        align byte
s16_irqreal:
        ; VCPI real mode IRQ from 32bit prot. mode
        ; fast irq response without copying virtual registers
        cli
        pushf                              ;  iret stack frame
        push cs                            ;
        push (small offset irqrealToHARD32);
        push ebp ; address of real mode isr
        retf
irqrealToHARD32:  ; HARD/XMS return to 386P
        mov esi,08h                ; _SelCode
        mov edi,offset s32_irqret  ; return IP
        ; continue straight into HARD_R2P
;=============================================================================
; HARD/XMS real mode to protected mode switch
;=============================================================================
HARD_R2P:
        ; XMS/HARD real to protected switch
        ; esi:edi == protected mode cs:eip to jump to
        ; we suppose ss:sp is into 386P stack
        ; IF IT IS NOT, set it there BEFORE SWITCHING!!!!!!!
        ; eax modified
        ; and
        cli     ; clear irqs, this sequence is very critical
                ; so even the switch destination will have irq disabled

        lidt fword ptr s16_IDTaddr ; load protected mode IDT
        lgdt fword ptr s16_GDTaddr ; load protected mode GDT
        mov ax,10h ; code32 data selector
        mov ds,ax                       ; load protected mode DS
        mov es,ax                       ; load protected mode ES
        mov fs,ax                       ; load protected mode FS
        mov ss,ax  ; set prot mode stack segment

        mov ax,18h ;
        mov gs,ax  ; load protected mode GS with _SelZero
        mov eax,cr0    ; TURN ON PROTECTION
        or al,1        ;
        mov cr0,eax    ;
        db 0EAh                        ; JMP FAR code32_selector:HARDPROT
        dw small offset HARDPROT,08h   ;


;=============================================================================
; HARD/XMS protected mode to real mode switch
;=============================================================================

        align dword
        dd 0
        align byte
HARD_P2R:
        ; XMS/HARD PROTECTED MODE TO REAL MODE SWICTH
        ; si,di == real mode destination
        ; WE SUPPOSE SS:ESP is into 386P stack !!!!!!
        ; If it is not (i.e. when performing irqs)
        ; set it there before calling this!!!!!
        ; only ss,cs and ds will be set correctly
        ; EAX trashed

        cli
        sub esp,Stack32BaseOffset ; convert to real mode stack offset

        lidt fword ptr s16_IDT86        ; load real mode IDT
        mov ax,codeend
        mov ss,ax
        mov ax,code32
        mov ds,ax
        mov eax,cr0   ; TURN OFF PROTECTION
        and al,0feh   ;
        mov cr0,eax   ;
        db 0EAh                      ; JMP FAR code16:intoreal
        dw offset HARDREAL,code16    ; (flush pipeline)
HARDREAL:
        push si                         ; store real mode target CS
        push di                         ; store real mode target IP
        retf                            ; go to real mode destination

;=============================================================================
; SET INT SLOTS TABLE
; BL=low PIC val, BH=high PIC val, 
; THE FIRST  16 INT SLOTS ARE THE IRQ INT SLOTS
; AND UNDER VCPI AND DPMI THEY CAN BE MAPPED ANYWHERE

setintslots:  ; set int slot table
        push ecx
        push ebx
        push esi
        mov si,offset intslotnum
        mov cx,8
setpicints:
        mov [si],bl
        mov [si+8],bh
        inc si
        add bx,0101h
        dec cx
        jne setpicints
        pop esi
        pop ebx
        pop ecx
        ret
        
;--------------------------------------------------------------------------        
; DPMI 16 bit TERMINATION ROUTINE

d16_retreal: ; DPMI TERMINATE
        ; Restore interrupts grabbed by DPMI interface
        psay msg_pints
        mov ax,0205h                   
        xor bx,bx
        mov edi,15 ; 16 ints for irqs
d16_int_restore:
        mov bl,intslotnum[di]
        mov edx,dword ptr d32_OldInts[edi*8]
        mov cx,word ptr d32_OldInts[edi*8+4]
        int 31h
        dec di
        jns d16_int_restore  ; loop until less than zero
        jmp d16_exit
        
d16_exit16err:    ; DPMI Exit with error message
        mov V86ds,code32
        mov V86ah,9
        mov ax,0300h
        mov bx,0021h
        mov cx,0
        mov edi,offset V86edi
	push ds
	pop es
	int 31h

d16_exit:                                 ; DPMI exit to real mode
        mov es,d16_PSPSel                 ; restore env selector
        mov ax,d16_DosEnvSegSel
	mov es:[2ch],ax

        ; TERMINATE PROGRAM
        ; but restore interrupts before getting out
        ; using the DPMI API function
        psay msg_rints
        mov si,offset _OldInt
        mov ax,0201h
        mov bl,00
dsetints:
        mov dx,[si]
        mov cx,[si+2]
        add si,4
        int 31h
        inc bl
        jnz dsetints
        ; Now terminate program
        sti
        psay msg_end

        ; Time to unlock the memory we tried to lock a startup
        psay msg_unlock
        mov esi,hidpmi_size
        mov ax,0601h
        mov ebx,hidpmi_base
        mov di,si
        mov cx,bx
        shr esi,16
        shr ebx,16
        int 31h
        ; we don't need to check for failure

        ; NOW RELEASE EXTENDED MEMORY BLOCK
        psay msg_releasemem
        mov esi,hidpmi_handle
        mov ax,0502h
        mov di,si
        shr esi,16
        int 31h
        ; we don't need to check for failure

        ; now we can free the selectors
        psay msg_DPMIrzero32
        mov ax,0001h
        mov bx,_SelZero
        int 31h
        psay msg_DPMIrcode32
        mov ax,0001h
        mov bx,_SelCode
        int 31h

        mov ss,_EXIT_SS     ; change stack so we don't use _SelData anymore
        mov esp,_EXIT_ESP   ;

        psay msg_DPMIrdata32
        mov ax,0001h
        mov bx,_SelData
        int 31h
        ; terminate
        psay msg_DPMIfinal
        mov ax,4C00h
        int 21h

StartDPMI:  ; DPMI initialization
        mov _386Man,IS_DPMI           ; set system type DPMI byte
        mov _ExecINT,offset d32_ExecINT
        mov _ExecReal,offset d32_ExecReal
        pop gs ; get previous PSP segment into GS

        ; modify code switchpoint
        mov s16_irToSYS32, offset irToDPMI32

        test bl,1                       ; test if 32bit DPMI server
	mov dx,offset em7_noDPMI32      ;
	jz exit16err                    ;

        xor eax,eax    ;
        mov ax,si      ; get mem for DPMI block
        
        mov d16_EnterDPMI,di               ; store enter protected mode addr
        mov ds:(2+offset d16_EnterDPMI),es ;
        
        inc eax ; add an extra paragraph to dpmi mem block for security
        
        push word ptr gs:[2ch]   ; preserve environment segment
                                 ; located into PSP

	; NOW GET READY TO ENTER 16BIT PROTECTED MODE
        ; remember, eax = paragraphs needed for DPMI private data

	shl eax,4         ;
	call InitAlloc    ;
	shr eax,4         ;
        add ax,code32     ;
	mov es,ax         ; es:0000 = base of DPMI private data

        mov ax,0001h      ; This is a 32bit application
                          ; so turn on 32bit register interface
                          
        call dword ptr ds:d16_EnterDPMI
        
	; NOW IN 16 BIT PROTECTED MODE
        ; (we are into code16 for code and code32 for data )
        ; PSP:[2ch] now is a SELECTOR to the environment space
        ; cs,ds,ss are now SELECTORS equivalent to their 
        ; previous real-mode segment values.
        ; es contains the PSP selector
        ; fs,gs are set to zero
        
        cli ; Better don't trust DPMI , clear interrupts again

        mov V86dx,offset msg_DPMIifail    ;
        jc d16_exit16err                  ; Initialization ok?

        pop ax             ; swap environment segment  with equivalent selector
        xchg ax,es:[2ch]   ; now pspa+2ch == environment REAL segment
                           ; we use this trick because VCPI
                           ; startup code doesn't set up selectors for PSP
                           ; so we "standardize" on real-mode segments
                           ; instead of pmode selectors

        mov d16_DosEnvSegSel,ax  ; store DPMI ENVIRONMENT selector

        mov d16_PSPSel,es        ; store PSP selector

        push ds                  ; no more need for PSP segment
        pop es                   ; now ES == DS

        mov _EXIT_DATA_SEL,ds                    ;
        mov _EXIT_SEL,cs                         ;
        mov _EXIT_ADDR,large offset d16_retreal  ; set program termination data
        mov _EXIT_SS,ss

        mov _SetIRQ,offset d32_setirq  ; set new IRQ managers
        mov _GetIRQ,offset d32_getirq  ;

	; MUST ASK DPMI FOR VALID SELECTORS
	mov ax,0003   ; get selector increment value
	int 31h       ;
	mov bx,ax     ;

	mov ax,0000   ; get base selector for a list of 3 selectors
        mov cx,3      ; cx= number of selector to allocate in LDT
	int 31h       ;
        mov V86dx, offset em8_DPMIdesc
        jc d16_exit16err
	; set up descriptors
	;  bx= selector increment  ,  ax = first selector value

        

	; INITIALIZE & STORE SELECTORS
        mov si,ax                       ; si = _SelCode selector
        mov _SelCode,ax                ; store segment selectors
        
        lea ecx,[eax+ebx]               ; cx = _SelData selector
        mov _SelData,cx                ; store segment selectors
        
        lea ebp,[ecx+ebx]               ; bp = _SelZero selector
        mov _SelZero,bp                ; store segment selectors
        
        psay msg_DPMIlar
        ; WARNING! THIS CAN BE DANGEROUS!!! 
        mov dx,cs   ;
        cmp dx,dx   ; set zero flag so we will know if LAR set it to zero
        lar dx,dx   ;
        jnz badlar
        and dh,060h ; access rights AND CPL 3
                    ; (just in case we find a DPMI server running
                    ;  in more powerful cpu levels :) )
        jmp short desclink
badlar: ;Failed loading access rights
        psay msg_DPMIbadlar
        mov dh,060h
desclink:
        clc ; clear carry
            ; to be sure only the following int 31h will modify it

        ; NOW LINK THE DESCRIPTORS TO THEIR SELECTOR INTO LDT
        ; and with the same CPU LEVEL of the cs set by dpmi init routine
        psay msg_DPMIcode32
        ; set error message if selector modification fails
        mov V86dx,offset em9_DPMImod
        mov ax,000ch                      ; COPY descriptors into DPMI LDT
					  ; ax = 000ch
					  ; bx     = selector value
					  ; ds:edi = pointer to the 8 bytes
					  ;          of data to COPY into
					  ;          the descriptor into GTD

	mov bx,si                         ;
        mov edi,offset GDTcode32          ;
        or byte ptr [edi+5],dh            ; change CPL
	int 31h                           ;
        jc d16_exit16err                  ;

        psay msg_DPMIdata32
        ; set error message if selector modification fails
        mov V86dx,offset em9_DPMImod
        mov ax,000ch
	mov bx,cx                         ;
        mov edi,offset GDTdata32          ;
        or byte ptr [edi+5],dh            ;
	int 31h                           ;
        jc d16_exit16err                  ;

        ; SET SEGMENT REGISTERS TO 32BIT ADDRESSING
        mov ds,cx    ;
        mov es,cx    ;
        mov fs,cx    ; DS,ES,FS = Data32 (alias for Code32)

        psay msg_DPMIzero32
        ; set error message if selector modification fails
        mov V86dx,offset em9_DPMImod
        mov ax,000ch
	mov bx,bp                         ;
        mov edi,offset GDTzero32          ;
        or byte ptr [edi+5],dh            ;
	int 31h                           ;
        jc d16_exit16err                  ;
        
        ; SET "LINEAR" segment GS
        mov gs,bp    ; GS = selector to linear addressing base

	; CHECK IF THERE IS ENOUGH LOW MEMORY FOR PROGRAM DATA
        ; AND DPMI TABLES
        mov edi,_LoMemBase
        mov eax,_LoMemTop
	sub eax,edi
        cmp eax,48  ; minimum space needed for extended info
        mov V86dx,offset em1_no_lomem
        jb d16_exit16err
         
        mov _HiMemBase,0  ; better assume the worst
        mov _HiMemTop ,1  ;
        psay msg_DPMIhiget
        mov eax,EXTMIN
        or eax,eax
        jnz getminfo
        jmp DPMI_state ; if no ext. mem required, go to set to_DPMI_state
getminfo:
        ; DPMI 00.90  ext. memory allocation
        ; initially my code checked for DPMI version numbers
        ; but found some inconsistent major release numbers
        ; so decided to support only the lowest common denominator (DPMI 0.9)
        mov ax,0500h             ;  ax     = 0500 == GET DPMI INFO 0.9
        int 31h                  ;  es:edi = info block  (48 bytes wide)
                                 ;  remember edi was set on _LoMembase
        mov V86dx,small offset msg_DPMInominfo
        jc d16_exit16err

        mov edx,es:[edi+08h]  ; largest available lockable page number
        cmp edx,-1            ; (1page = 4K)
        je  short DPMIdefault_alloc
        shl edx,12 ; allocate lockable pages
        jmp short DPMIbyte_alloc
DPMIdefault_alloc:        
        mov edx,(EXTMIN*1024)  ; minimum space in Kbyte
DPMIbyte_alloc:
        mov V86dx,offset em3_no_himem
        cmp edx,(EXTMIN*1024)
        jb d16_exit16err

        or edx,edx  ; zero bytes available ? go to next section
        jnz gotobumper
        jmp DPMI_state    ; no need for extended memory
gotobumper:
        ; ALLOCATE EXT. MEMORY THRU DPMI
        psay msg_allocmem
d16_winbumper:
  
        push edx      
	mov cx,dx                         ; in:
        shld ebx,edx,16                   ; ax = 0501
	mov ax,0501h                      ; bx:cx = ext. memory needed
	int 31h                           ; out:
        jnc d16_works                     ; CARRY CLEAR == NO ERRORS and ...
        mov V86dx,offset em5_hifault      ; bx:cx = linear address allocated
                                          ; si:di = memory block handle
        ; Aw! Something has gone wrong! 
        pop edx

        cmp edx,(EXTMIN*1024)
        jbe d16_exit16err
        sub edx,4096     ; try one page less, and see if it works
        
        jmp short d16_winbumper ; Usually Windows is to blame for this!

d16_works:
        pop edx  ;get requested memory size back
        shl esi,16
        mov si,di
        mov hidpmi_handle,esi
        psay msg_lockmem
        pushad
        mov di,dx
        shr edx,16
        mov si,dx
        mov ax,0600h
        int 31h
        jnc goodlock
        psay msg_badlock
goodlock:
        popad
        mov hidpmi_size,edx

        ; FROM VERSION 1.03 and up, no automatic locks are performed
        psay msg_DPMIwin

        shl ebx,16            ;
        mov bx,cx             ;  ebx = memory block linear address
        mov hidpmi_base,ebx
        sub ebx,_Code32Base   ;
        mov _HiMemBase,ebx    ;

        add ebx,edx           ;
        mov _HiMemTop,ebx     ;  SET EXT MEM LIMITS

DPMI_state:
        ; ALLOCATE DPMI_SAVE_TASK_STATE BUFFERS AND POINTERS

	mov ax,0305h  ; get save/restore state addresses
	int 31h       ;
        mov V86dx,offset msg_DPMI_SR_FAIL
        jc d16_exit16err

        mov word ptr d32_StateBufferSize,ax  ; leght of save state block

	; 32bit save/restore state  protected-mode routine
        mov  dword ptr d32_SaveRestoreState,edi               ; offset32
        mov  word ptr ds:(4+ offset d32_SaveRestoreState),si  ; seg32

	; 16 bit save/restore state real-mode routine
        mov word ptr d16_SaveRestoreState,cx                 ; offset16
        mov word ptr ds:(2+ offset d16_SaveRestoreState),bx  ; seg16

	; SET MODE SWITCH CODE POINTERS

	mov ax,0306h  ; get raw Mode Switch to 16<-->32 bit mode
	int 31h       ;
        mov V86dx,offset msg_DPMI_MS_FAIL
        jc d16_exit16err

	; switch protected->real mode routine
        mov dword ptr d32_Fast32_To_16,edi                ; offset 32
        mov  word ptr ds:(4+ offset d32_Fast32_To_16),si  ; seg32

	; switch real->protected mode routine
        mov word ptr d16_Fast16_To_32,cx             ; offset16
        mov word ptr ds:(2+ offset d16_Fast16_To_32),bx ; seg16

	; NOW SET IRQs & INTs

	; set IRQ handlers to PIC values
	mov ax,0400h ; Get DPMI version info
	int 31h      ; dh = 1st PIC base vector dl= 2nd PIC base vector
        mov V86dx,offset msg_DPMI_IV_FAIL
        jc d16_exit16err

	xchg dl,dh                    ;
	mov bx,dx                     ;
	call setintslots              ; set new vector table for IRQ
        
        psay msg_DPMIirq
        xor bx,bx
        mov si,_SelCode            ;
        mov edi,15                 ; 16 P.I.C ints
        mov ah,02 ; set higher byte of function code
DPMI_SavSetInts:
        mov bl,intslotnum[di]   ;
        mov al,04h                 ; ah was 02, so execute function 0204h
        int 31h                    ; GET PROT. MODE INT VECTOR bl
        mov V86dx,offset msg_DPMI_GI_FAIL
        jc d16_exit16err
        mov dword ptr d32_OldInts[edi*8],edx    ; save ints
        mov word ptr  d32_OldInts[edi*8+4],cx   ;
        
        mov al,05h                 ;
        mov edx,d16_nintoff[edi*4] ; ah was 02, so execute function 0205h
        mov cx,si                  ; SET PROT. MODE INT VECTOR bl
	int 31h                    ;
        mov V86dx,offset msg_DPMI_SI_FAIL
        jc d16_exit16err
        dec di               ; loop if greater or equal to zero
        jns DPMI_SavSetInts  ;
        psay msg_DPMIgood
        ; DPMI joins DPMI for 32bit init stuff
        push ds  ;
        pop  ss  ; ss = prot. mode 32bit selector
        add  esp,Stack32BaseOffset  ; esp = prot. mode stack base
        jmp JoinDPMI

;-----------------------------------------------------------------------------
; 16bit VCPI system code

        
;------------------------------------------------------------------------------        
; CALL TO prot. mode  : v16_sw_dest = offset to jump to in code32
        
v16_retreal:
        ; just returned to real mode from protected mode
        cli        
        ; if something fails, you get here ..
VCPI_exit: ; VCPI exit (clean up paging stuff)
        mov es,v16_PageDirSeg
        mov si,v16_PageBase
        mov cx,v16_PageTop
        
        sub cx,si          ; Need to deallocate VCPI memory pages ?
        
        jz short page_cleaned  ; No! Only remove EMS data
        ; Yes! Clean pages 
        say msg_VCPIfree
VCPI_clean:  ; Deallocate memory pages allocated thru VCPI

        mov edx,es:[si] ;
        and dx,0f000h   ;  edx == address of 4k page to deallocate & unlink
        
        mov ax,0de05h   ;  call page unlink function
        int 67h         ;
        
        add si,4 ; next page
        
        sub cx,4        ; loop if there are other pages to clean
        jnz VCPI_clean  ;
page_cleaned: ; now go to standard exit code
        jmp exit
        
VCPI_err1: ; VCPI not enough low mem exit
        mov dx,offset em1_no_lomem
        jmp exit16err
        
;=============================================================================
        align byte
StartVCPI:
        mov _386Man,IS_VCPI ; set system type = VCPI

        ; set up mode switch linear pointers
        mov eax,_Code32Base       ; adjust mode-switch linear pointers
        add v16_VCPIsys,eax       ;
        add v16_sw_gdtaddrptr,eax ; from code32-relative offsets
        add v16_sw_idtaddrptr,eax ; to LINEAR addresses
        
        mov exitrout,offset exit ; set standard dos-cleanup exit

        
        ;-------------------------------------------------------------------
        ; NOW HANDLE PIC MAPPINGS
        say msg_VCPIpic
        mov ax,0de0ah   ; get PIC base vector mappings
        int 67h         ; BX,CX = first vector of master PIC and slave PIC 
        mov bh,cl       ; bh= now is first vector of slave PIC
        ; N.B. A single PIC uses 8 consecutive vector numbers
        ;      so you'd better avoid to use a vector base 30h
        ;      or your interrupts will "cover" the int 31h DPMI interface
        
        mov dx,offset em11_EqPIC ; set error message if something goes wrong
        
        ; Check for compatible PIC mapping ...
        cmp bl,bh
        je exit16err  ; Uh? Mapped as equal ? Whats that? Are you crazy ?
                      ; This is NOT for fucking old IBM XTs
                      
        mov dx,offset em6_PICfault ; set error message if something goes wrong              
        cmp bl,30h
        je exit16err  ; Low mapping clashes with service ints
        cmp bh,30h
        je exit16err  ; High mapping clashes with service ints
        
        ; search the highest interrupt number needed
        mov eax,33h         ; maximum int needed by 386p "without irqs"
                            ; and clear eax high word

        cmp al,bl           ;
        jnb short HIPIC1    ; skip if max int mapping >= master pic mapping
        mov al,bl           ; master pic has the highest mapping
HIPIC1:                     ; 

        cmp al,bh           ; 
        jnb short HIPIC2    ; skip if max int mapping >= slave pic mapping
        mov al,bh           ; slave pic has the highest mapping
HIPIC2:                     ;

        add al,7 ; add the 7 exceptions handlers 
                 ; to get the IDT SIZE

        ; AX = TOP INTERRUPT  NEEDED

        mov s16_Int_to_replace_ctr,ax  ; save number of ints to replace
        
        ; SET INT VECTOR TABLE

        lea eax,[eax*8+7]    ; set limit of IDT (every entry is 8 bytes)
        mov s16_IDTaddr,ax   ; and we need entries from 0 to ax

        ; bl,bh == int slots for master & slave PIC
        push eax
        call setintslots  ; remap irq int slots
        pop eax
        say msg_idt
        ;----------------------------------------------------------------------
        ; ALLOCATE SPACE FOR IDT
        call InitAlloc
        mov s32_idt32ptr,eax  ; save code32 idt pointer
        mov ebp,_Code32Base ;_Code32Base into ebp for immediate and later usage
        add eax,ebp                        ; get linear address of IDT
        mov dword ptr bases16_IDTaddr,eax  ; set IDT base address

        mov exitrout,offset VCPI_exit     ; set VCPI error&exit routine
        ;---------------------------------------------------------------------
        ; SET UP FIRST BLOCK OF PAGE TABLE AND PAGE DIRECTORY
        say msg_paging
        mov eax,_LoMemBase        ; align _LoMemBase on a 4k page
        lea eax,[ebp+eax+0fffh]   ; (ebp == _Code32Base )
        and eax,0fffff000h        ; ecx == _LoMemBase as LINEAR address
        mov ecx,eax    ; copy linear offset into ECX
        sub eax,ebp         ; linear_offset - _Code32Base
        mov _LoMemBase,eax  ; NOW _LoMemBase is 4KPAGE ALIGNED
                            ; so we can store page entries
                            ; and ecx contains the equivalent linear address

        
        mov ebp,_LoMemTop      ; get available low memory
        sub ebp,eax            ;
        sub ebp,LOWMIN*1024    ; die if not enough low memory
        jc VCPI_err1           ;
        cmp ebp,8192           ; die if no space for minimal page structure
        jb VCPI_err1           ;

        shr   ecx,4            ; set VCPI 386POWER page-directory segment
        mov v16_PageDirSeg,cx  ;

        ; The 8k allocated in low memory are split into
        ; 1) a 4K (1000h) PAGE DIRECTORY 
        ; 2) a 4k PAGE TABLE (the first page table, others are chained to it)

        mov es,cx                   ; reset all addresses
        xor edi,edi                 ;
        mov cx,2048                 ;
        xor eax,eax                 ;
        rep stosd                   ;

        ;--------------------------------------------------------------------
        ; GET VCPI 32BIT INTERFACE
        ; AND INITIALIZE SHARED PAGES' ENTRIES
        say msg_VCPIhookserver
        mov di,1000h ; es:di == pointer to FIRST PAGE TABLE
                     ; (vcpi inits it with the pages already allocated)
                     ; (the page directory initalization is up to us)
                     
        mov si,offset GDTvcpi    ; Get Vcpi Protected Mode Interface
        mov ax,0de01h            ; in:    ax = 0de01h
        int 67h                  ;  es:di = ptr to 4k page table buffer
                                 ;  ds:si = ptr to 3 descriptor table entries 
                                 ;          the first becomes the
                                 ;          code segment descriptor
                                 ;          the other two are used by
                                 ;          the MAIN CONTROL PROGRAM
                                 ; out:  ebx = offset of prot. mode entry point
                                 ;             (relative to GDTvcpi)
                                 ;       es:di = first unused page table entry
                                 ;               in page table buffer
                                  
        mov dword ptr v32_vcpientryaddr,ebx  ;store entry offset
                                             ;into fword
        
        ; DI = end of allocated entries on page table
        ; ONE ENTRY takes 4 bytes, one entry maps a 4k page
        ; so if you subtract the page base offset (1000h)
        ; DI is EXACTLY how many Kbytes has been allocated by VCPI
        ; starting from "your task" linear address 00000000h
        
        mov v16_PageBase,di      ; set the starting position of blocks
        mov v16_PageTop,di       ; not allocated automatically by VCPI
        ;( the automatically allocated ones are handled by VCPI
        ;  while the other ones will be up to us )
        
        movzx eax,di           ;  
        sub eax,1000h          ;
        shl eax,10             ;  eax = base of extended memory in bytes
        
        mov ebp,_Code32Base ;
        
        sub eax,ebp         ;
        mov _HiMemBase,eax  ;  set initial _HiMemBase value
        
        ; NOW SET EBX AS A COUNTER OF THE BYTES ALLOCATED
        ; IN LOW MEMORY FOR PAGING INFO
        mov ebx,8192           ; one page directory + one page table
        add ebx,_LoMemBase     ; + _LoMemBase = pointer beyound last byte
                               ;                used by paging tables
        say msg_VCPIaddpages
page_table_alloc:                      
        mov ax,0de04h          ; Allocate one page, ax =0de04h
        int 67h                ; out: ah = error code ( 00 == no errors)
        or ah,ah               ;     edx = linear address of allocated page
        jnz short end_page_alloc    
        
        test di,0fffh             ; Check if at end of page table
        jnz short not_4k_boundary ;
        ; else advance one page
        add ebx,4096        ; add space for another 4k page table

        cmp ebx,_LoMemTop    ; run out of low memory ?
        jnb VCPI_err1        ;
        
not_4k_boundary:                      
        and dx,0f000h          ; Mark this page as one-page-block
        or dl,7                ; present,user,read/write
        mov es:[di],edx        ; store entry on page table
        add di,4               ;
        cmp di,(65536-4096)
        jb page_table_alloc   ; STOP if allocated 64k-4k of mapping ram
                              ; (equivalent to 64-4-4 ==  56 megabytes
                              ;  due to the first 4k block used by the page dir
                              ;  and to the last 4M used for phys mapping)
        
end_page_alloc:                         
        mov v16_PageTop,di ; Store final allocated page table limit
        ; beyound v16_PageTop there will be the "extra mappings"
        ; available for mapping of devices

        ; now...  read carefully:
        ; a single page entry maps 4kbytes
        ; and uses 4 bytes
        ; so the space used by page entries
        ; is equivalent to the linear addresses in kbyte multiples
        ; IF YOU EXCLUDE THE SPACE USED BY THE PAGE DIRECTORY TABLE

        lea si,[di-1000h] ; (1000h == 4096) 1000h = space used by the page dir
                          ; will be used later

        movzx eax,si      ;
        shl eax,10        ; eax = LINEAR address of allocated ext. mem. limit
        
        sub eax,ebp             ; EBP is still_Code32Base
        mov _HiMemTop,eax       ; store _HiMemTop value
        
        ; PHYS MAPPING STUFF
        mov _MapMemBase,eax   ; store base of mappable phys ram
        add eax,(4*1024*1024) ; 4MegaByte
                              ; Don't touch this, it is hard to fix
                              ; all the references to this 4M limit
                              ; if you don't coded it. (sorry)

        mov _MapMemTop,eax ; store limit of mappable ram

        movzx eax,v16_PageDirSeg
        and edi,0000FFFFh  ; clear upper word
        shl eax,4
        add eax,edi
        sub eax,_Code32Base   ; this is a code relative offset
        mov _PhysMapTable,eax

        
        
        sub di,v16_PageBase        ;
        cmp di,EXTMIN              ;
        mov dx,offset em3_no_himem
        jb exit16err
        
        add ebx,4096            ; add the mappable mem block

        mov _LoMemBase,ebx      ; move _LoMembase beyound paging data
        ;----------------------------------------------------------------------
        ; NOW WE SET THE CR3 PAGE DIRECTORY REGISTER VALUE
        say msg_VCPIpagedir

        movzx eax,v16_PageDirSeg ; 
        shl   eax,4              ; page_dir segment as linear address
        mov v16_sw_cr3,eax                ; SET PAGE REGISTER

        ;----------------------------------------------------------------------
        ; now set up PAGE DIRECTORY
        ;
        mov   ebx,4096   ; PAGE SIZE
        xor di,di   ; es:di == page directory address
        ; si = allocated space for page tables
set_page_directory:
        add eax,ebx      ; next page table address
        and ax,0f000h    ; Why this ? because we need to keep "paging flags"
        or al,7          ; clean (for vcpi compatibility) except the needed ones
        stosd            ; store into page table location of page dir. entry
        sub si,bx ; 4Mbytes has been mapped in a single page dir entry
        jnbe set_page_directory
        add eax,ebx      ; 4M "physical mappings" page table address
        and ax,0f000h    ; Why this ? because we need to keep "paging flags"
        or al,7          ; clean (for vcpi compatibility) except the needed ones
        stosd            ; store into page table location of page dir. entry

        ;----------------------------------------------------------------------
        ; TSS SETUP
        ;
        say msg_tss
        mov eax,68h   ; TSS is 68h bytes wide
        call InitAlloc; allocate space for TSS and IDT
        add eax,_Code32Base ; get linear address of TSS

        ; set Task Switch Segment (TSS) descriptor base address
        or dword ptr baseGDTtask,eax

        mov ebx,eax
        shr eax,4
        mov es,ax
        and ebx,0Fh
        ;  es:bx = TSS start

        ; first of all, clear everything (i'm paranoid)
        mov di,bx    ; clear TSS and IO bitmap
        xor eax,eax  ;
        mov ecx,1Ah  ; TSS size in dwords
        rep stosd    ; T-bit is cleared into this loop

        ; then set IObitmap position, so the server will know there is not
        mov byte ptr es:[bx+66h],68h ; 68h == end of tss

        mov eax,v16_sw_cr3
        mov es:[bx+1Ch],eax ; set CR3 in TSS (i'm paranoid)

        say msg_VCPIkick
        ;---------------------------------------------------------------------
        ; ENTER 16BIT PROTECTED MODE
        mov v16_sw_dest,offset VCPI_protected16 ; offset to jump to in 386P
        mov ax,0de0ch   ; SWITCH TO 16bit PROTECTED MODE thru VCPI      
        mov esi,v16_VCPIsys   ; system data for mode switch
        int 67h         ; GO!
        
VCPI_protected16:        ; in 16bit prot. mode
        ; Now set 32 bit protected mode segments and stack pointer
        ; for VCPI
        cli
        mov ax,18h ;
        mov gs,ax  ; gs= linear
        
        mov ax,10h ;  ds,es,fs,ss = data32
        mov ds,ax  ;
        mov es,ax  ;
        mov fs,ax  ;
        mov ss,ax  ;
        
        mov esp,(STACKSIZE*16)     ; esp set at stack start in 16bit mode
        add esp,Stack32BaseOffset  ; esp = prot. mode stack base
        
        ; now set up 32bit mode completely

VCPI_ready32:
        ; First switch was from 16bit real to 16bit prot. CODE SEG
        ; next switches will be from 16bit prot. to 32bit prot. CODE SEG
        ; so, set 32bit seg selector into mode switch table
        mov word ptr ds:(4+ offset v16_sw_dest),08h ; _SelCode

        ; now initialize IDT with general exception handler
        mov edi,s32_idt32ptr; get code32 relative ptr to IDT
        ; now EDI == base pointer to IDT
        
        ; set general exception handler
        ; into ALL SLOTS OF VCPI CLIENT INTERRUPT TABLE
        ; (default reaction to "unknown interrupt" is auto-reset)
        xor esi,esi ; clear esi
        mov si,s16_Int_to_replace_ctr
        
        mov eax,offset s32_exc  ; set the generic exception handler as default
        and eax,0000ffffh       ; offset s32_exc (we know it's under 64k)
        or  eax,00080000h       ; code32 pmode selector
        
        mov ebp,00008E00h       ; flags & higher (clear) offset
                                
        ; IDT entry (8 bytes)
        ; offset  size
        ;      0     2   low offset  of ISR (Interrupt Service Routine)
        ;      2     2   selector    of ISR
        ;      4     2   flags
        ;      6     2   high offset of ISR
        
SetIDTEntry:
        ; selector = 8
        ; offset    = offset s32_exc (generic handler)
        mov [edi+esi*8],eax ; offset basso e selettore
        mov [edi+esi*8+4],ebp ; flags & offset alto azzerato
        dec si
        jns SetIDTEntry
        
        ; NOW IT'S TIME TO SET INTO THE INTERRUPT TABLE
        ; THE OFFSETS OF THE ISR ROUTINES WE SUPPORT
        ; copy exception handlers from IDT
        mov esi,16 ; 16 irq + 1 "DPMI api interrupt"
        xor ebx,ebx
SetIDTInt:
        mov bl,intslotnum[si]
        mov eax,[esi*4+v16_idt_default] ; get ISR offset from v16_idt_default
        mov [edi+ebx*8],ax ; put it into the correct idt slot
                           ; (because we know it fits into 16bit
                           ;  we don't set the higher word)
        mov [esi*8+ s32_irqtable],eax
                                  ; put it into the irq table
                                  ; (default values are the XMS/HARD ones)
        dec si
        jns SetIDTInt
        
        ; HERE WE GO TO 32bit PROTECTED MODE
        pushfd               ; set eflags: NT=0
        pop eax              ;
        and ah,0bfh          ;
        push eax             ;
        popfd                ;
; READY TO ENTER 32BIT PROTECTED MODE
        align byte
JoinDPMI: ; shared by VCPI & DPMI
          ; NOW WE CAN ENTER 32BIT PROTECTED MODE

        mov ax,_SelCode
        mov word  ptr ds:(4 + offset Kick32),ax
        jmp fword ptr ds:Kick32

;==============================================================================
; 16 bit VCPI redirectors

; VCPI real mode irq reflector to prot mode

v16_virqf:
        push ecx
        mov  ecx,15*4
        jmp v16_virq
v16_virqe:
        push ecx
        mov  ecx,14*4
        jmp v16_virq
v16_virqd:
        push ecx
        mov  ecx,13*4
        jmp short v16_virq
v16_virqc:
        push ecx
        mov  ecx,12*4
        jmp short v16_virq
v16_virqb:
        push ecx
        mov  ecx,11*4
        jmp short v16_virq
v16_virqa:
        push ecx
        mov  ecx,10*4
        jmp short v16_virq
v16_virq9:
        push ecx
        mov  ecx,9*4
        jmp short v16_virq
v16_virq8:
        push ecx
        mov  ecx,8*4
        jmp short v16_virq
v16_virq7:
        push ecx
        mov  ecx,7*4
        jmp short v16_virq
v16_virq6:
        push ecx
        mov  ecx,6*4
        jmp short v16_virq
v16_virq5:
        push ecx
        mov  ecx,5*4
        jmp short v16_virq
v16_virq4:
        push ecx
        mov  ecx,4*4
        jmp short v16_virq
v16_virq3:
        push ecx
        mov  ecx,3*4
        jmp short v16_virq
v16_virq2:
        push ecx
        mov  ecx,2*4
        jmp short v16_virq
v16_virq1:
        push ecx
        mov  ecx,1*4
        jmp short v16_virq
v16_virq0:
        push ecx
        xor ecx,ecx
v16_virq:
        ; ecx = vector to call from protected mode
        pushad
        push ds
        push es
        push fs
        push gs
        mov ax,code32 ; now on shared data segment
        mov ds,ax     ;
        push s16_SavStackOfs      ; save on current stack
        push s16_SavStackSeg      ;
        mov ax,codeend
        mov s16_SavStackOfs,esp   ;
        mov s16_SavStackSeg,ss    ; save stack
        mov ss,ax              ;
        mov esp,nextmodestack  ; setup real mode stack on next stack frame
        ; the instruction
        ; sub nextmodestack,(STACKSWTP*16) ; new stack frame
        ; will be executed IN PROTECTED MODE
        ; AFTER stack reloading

        mov v16_sw_dest,offset v32_virqserver ; jump to p. mode virq server
        mov esi,v16_VCPIsys   ; system data for mode switch
        mov ax,0de0ch   ; SWITCH TO PROTECTED MODE thru VCPI
        int 67h         ;
v16_virqdone:
        ; ss:esp  set to values saved into s16_SavStackSeg:s16_SavStackOfs
        ; ds,es,fs,gs = code32
        pop s16_SavStackSeg
        pop s16_SavStackOfs
        add nextmodestack,(STACKSWTP*16) ; previous new stack frame
        pop gs
        pop fs
        pop es
        pop ds
        popad
        pop ecx
        iret

;-----------------------------------------------------------------------------
; XMS dos-extender
;
x16_retreal:
        mov esi,code16
        mov edi,offset XMSExit
        jmp HARD_P2R

XMSExit:
        say msg_unlock
        mov dx,xms_handle
        mov ax,0D00h        ; unlock block
        call xms_entry
XMSMemExit:
        say msg_releasemem
        mov dx,xms_handle
        mov ax,0A00h        ; release block
        call xms_entry
        jmp exit

fail_a20:
        say em4_noA20
        jmp exit
fail_mem:
        say em3_no_himem
        jmp exit
fail_lock:
        say msg_badlock
        jmp exit

StartXMS:
        ; get XMS entry point
        say msg_XMSfound
        mov _386Man,IS_XMS

        mov _EXIT_SEL,20h                        ;
        mov _EXIT_ADDR,large offset x16_retreal  ; set program termination data

        mov ax,4310h
        int 2Fh
        mov word ptr xms_entry,bx
        mov word ptr ds:(2+ offset xms_entry),es
        ; check if A20 line is enabled
        mov ax,700h
        call xms_entry
        cmp ax,1
        je xms_a20_on
        ; enable A20 line
        mov ax,300h
        call xms_entry
        or ax,ax
        jz fail_a20
xms_a20_on:
        xor bx,bx
        ; how much ext mem ?
        mov ax,800h
        call xms_entry
        cmp ax,EXTMIN
        jb fail_mem
        mov word ptr _HiMemTop,ax
        ; try to allocate ext mem
        mov dx,ax
        mov ax,900h
        call xms_entry
        or ax,ax
        jz fail_mem
        mov xms_handle,dx
        ; try to lock block
        mov ax,0C00h
        or ax,ax
        jz fail_lock
        shl edx,16
        mov eax,_HiMemTop
        mov dx,bx
        shl eax,10 ; Kbytes --> Bytes
        mov _HiMemBase,edx
        add eax,edx
        mov _HiMemTop,eax
        ;
        ; Now, A20 line is enabled &  ext. mem is allocated and locked
        ; go on and initialize protected mode
        say msg_XMSjoin
JoinXMS: ; HARD init code will be the same of XMS from this point on

        ; modify code switchpoint
        mov s16_irToSYS32, offset irToHARD32

        ; AX = TOP INTERRUPT  NEEDED
        mov eax,77h ; we need to replace ints from 0 to 77h
        mov s16_Int_to_replace_ctr,ax  ; save number of ints to replace
        
        ; SET INT VECTOR TABLE

        lea eax,[eax*8+7]    ; set limit of IDT (every entry is 8 bytes)
        mov s16_IDTaddr,ax   ; (don't, worry we use the lower 16 bits)

        ; NOW ALLOCATE&INITIALIZE IDT
        say msg_idt
HARD_IDT:
        ; ALLOCATE SPACE FOR IDT
        call InitAlloc
        mov s32_idt32ptr,eax ; store pointer
        mov ebp,_Code32Base ;_Code32Base into ebp for immediate and later usage
        add eax,ebp         ; get linear address of IDT
        mov dword ptr bases16_IDTaddr,eax  ; set IDT base address
        
HARD_TSS_SETUP:
        say msg_tss
        mov eax,68h   ; TSS is 68h bytes wide

        call InitAlloc; allocate space for TSS

        add eax,ebp   ; get linear address of TSS

        ; set Task Switch Segment (TSS) descriptor base address
        or dword ptr baseGDTtask,eax

        mov ebx,eax
        shr eax,4
        mov es,ax
        and ebx,0Fh
        ;  es:bx = TSS start

        ; first of all, clear everything (i'm paranoid)
        mov di,bx    ; clear TSS and IO bitmap
        xor eax,eax  ;
        mov ecx,1Ah  ; TSS size in dwords
        rep stosd    ; T-bit is cleared into this loop

        ; then set IObitmap position, so the server will know there is not
        mov byte ptr es:[bx+66h],68h ; 68h == end of tss

        mov eax,CR3
        mov es:[bx+1Ch],eax ; set CR3 in TSS (i'm paranoid)

        say msg_HARDkick
        ; go protected
        mov esi,20h
        mov edi,large offset hard_protected16
        jmp HARD_R2P
hard_protected16:        ; in 16bit prot. mode
        ; Now set 32 bit protected mode segments and stack pointer
        cli

        ; gs= linear
        ;  ds,es,fs,ss = data32
        
        mov esp,(STACKSIZE*16)     ; esp set at stack start in 16bit mode
        add esp,Stack32BaseOffset  ; esp = prot. mode stack base
        
        ; now set up 32bit mode completely

HARD_ready32:

        ; now inizialize IDT with general exception handler
        mov edi,s32_idt32ptr ; get code32 relative ptr to IDT
        
        ; now EDI == base pointer to IDT
        
        ; set general exception handler
        ; into ALL SLOTS OF VCPI CLIENT INTERRUPT TABLE
        ; (default reaction to "unknown interrupt" is auto-reset)
        xor esi,esi ; clear esi
        mov si,s16_Int_to_replace_ctr
        
        mov eax,offset s32_exc  ; set the generic exception handler as default
        and eax,0000ffffh       ; offset s32_exc (we know it's under 64k)
        or  eax,00080000h       ; code32 pmode selector
        
        mov ebp,00008E00h       ; flags & higher (clear) offset
                                
        ; IDT entry (8 bytes)
        ; offset  size
        ;      0     2   low offset  of ISR (Interrupt Service Routine)
        ;      2     2   selector    of ISR
        ;      4     2   flags
        ;      6     2   high offset of ISR
        
HX_IDTEntry:
        ; selector = 8
        ; offset    = offset s32_exc (generic handler)
        mov [edi+esi*8],eax ; offset basso e selettore
        mov [edi+esi*8+4],ebp ; flags & offset alto azzerato
        dec si
        jns HX_IDTEntry
        
        ; NOW IT'S TIME TO SET INTO THE INTERRUPT TABLE
        ; THE OFFSETS OF THE ISR ROUTINES WE SUPPORT
        ; copy exception handlers from IDT
        mov esi,16 ; 16 irq + 1 "DPMI api interrupt"
        xor ebx,ebx
HX_IDTInt:
        mov bl,intslotnum[si]
        mov eax,[esi*4+s16_idt_default] ; get ISR offset from s16_idt_default
        mov [edi+ebx*8],ax ; put into into the correct slot
        dec si
        jns HX_IDTInt

        jmp JoinDPMI

;==============================================================================
; 16 bit HARD/XMS redirectors

; HARD/XMS real mode irq reflector to prot mode

s16_virqf:
        push ecx
        mov  ecx,15*4
        jmp s16_virq
s16_virqe:
        push ecx
        mov  ecx,14*4
        jmp s16_virq
s16_virqd:
        push ecx
        mov  ecx,13*4
        jmp short s16_virq
s16_virqc:
        push ecx
        mov  ecx,12*4
        jmp short s16_virq
s16_virqb:
        push ecx
        mov  ecx,11*4
        jmp short s16_virq
s16_virqa:
        push ecx
        mov  ecx,10*4
        jmp short s16_virq
s16_virq9:
        push ecx
        mov  ecx,9*4
        jmp short s16_virq
s16_virq8:
        push ecx
        mov  ecx,8*4
        jmp short s16_virq
s16_virq7:
        push ecx
        mov  ecx,7*4
        jmp short s16_virq
s16_virq6:
        push ecx
        mov  ecx,6*4
        jmp short s16_virq
s16_virq5:
        push ecx
        mov  ecx,5*4
        jmp short s16_virq
s16_virq4:
        push ecx
        mov  ecx,4*4
        jmp short s16_virq
s16_virq3:
        push ecx
        mov  ecx,3*4
        jmp short s16_virq
s16_virq2:
        push ecx
        mov  ecx,2*4
        jmp short s16_virq
s16_virq1:
        push ecx
        mov  ecx,1*4
        jmp short s16_virq
s16_virq0:
        push ecx
        xor ecx,ecx
s16_virq:
        ; ecx = vector to call from protected mode
        pushad
        push ds
        push es
        push fs
        push gs
        mov ax,code32 ; now on shared data segment
        mov ds,ax     ;
        push s16_SavStackOfs      ; save "old" stack on this stack
        push s16_SavStackSeg      ;

        mov ax,codeend

        mov s16_SavStackOfs,esp   ;
        mov s16_SavStackSeg,ss    ; save stack into global vars
        
        mov ss,ax
        mov esp,nextmodestack
        sub nextmodestack,(STACKSWTP*16)
        mov esi,08h ; _SelCode
        mov edi,offset s32_virqserver ; jump to p. mode virq server
        jmp HARD_R2P ; go real to protected mode

s16_virqdone:
        ; ss:esp  set to values saved into s16_SavStackSeg:s16_SavStackOfs
        ; ds,es,fs,gs = code32
        mov ss,s16_SavStackSeg   ; restore previous stack
        mov esp,s16_SavStackOfs  ;
        pop s16_SavStackSeg ; restore saved stack
        pop s16_SavStackOfs ;
        add nextmodestack,(STACKSWTP*16) ; back to last new stack frame
        pop gs
        pop fs
        pop es
        pop ds
        popad
        pop ecx
        iret

;-----------------------------------------------------------------------------
; HARD 386P init code

A20KBwait:  ; wait for safe to write to 8042
        xor cx,cx
KBsooloong:
        jmp short $+2 ;  waste so time to let 802 keep up
        jmp short $+2 ;
        jmp short $+2 ;
        in al,64h           ; read 8042 status
        test al,2           ; buffer full?
        loopnz  KBsooloong  ; if yes, loop
        ret

;-----------------------------------------------------------------------------
A20test: ; test for enabled A20
        mov al,fs:[0]         ; get byte from 0:0
        mov ah,al             ; preserve old byte
        not al                ; modify byte
        xchg al,gs:[10h]      ; put modified byte to 0ffffh:10h
        cmp ah,fs:[0]         ; set zero if byte at 0:0 not modified
        mov gs:[10h],al       ; put back old byte at 0ffffh:10h
        ret                   ; return, zero if A20 enabled

s16_retreal:
        mov esi,code16
        mov edi,offset exit
        jmp HARD_P2R


StartHARD: ; Initialize protected mode
           ; on a system with just PC BIOS support.
           ; This means we will have to handle hardware directly
           ; and hope everything is pc compatible.
           ; Really HARD to run on raw HARDware. :)
        mov _386Man,IS_HARD
        mov _EXIT_SEL,20h                        ;
        mov _EXIT_ADDR,large offset s16_retreal  ; set program termination data
        ;---------------------------
        ; Set _HiMemTop
        ;

        mov ah,88h  ; how much extended memory free ?
        int 15h     ;
        ; this bios call sometimes has implementation bugs
        ; the docs says you can check the carry flag
        ; to see if it is successful, but no all BIOSes around
        ; set the carry flag correctly, so it better to hope for good and go..
        cmp ax,EXTMIN
        jb fail_mem
        ; this method allows access to max 64Mbytes of ram
        and eax,0FFFFh
        shl eax,10
        add eax,100000h   ; add one megabyte of ram
        mov _HiMemTop,eax

        ;-------------------------------------------------------------------
        ; NOW ENABLE a20 address line

        xor ax,ax                       ; set A20 test segments 0 and 0ffffh
        mov fs,ax
        dec ax  ; now ax = 0FFFFh
        mov gs,ax

        call A20test          ; is A20 already enabled?
        jz short a20done      ; if yes, done

        in al,92h                       ; PS/2 A20 enable
        or al,2
        jmp short $+2   ; wait a little ...
        jmp short $+2   ;
        jmp short $+2   ;
        out 92h,al

        call A20test        ; is A20 enabled?
        jz short a20done    ; if yes, done

        in al,65h       ; AT fast A20 enable
        or al,4
        jmp short $+2   ; wait a little ...
        jmp short $+2   ;
        jmp short $+2   ;
        out 65h,al

        call A20test        ; is A20 enabled?
        jz short a20done    ; if yes, done

        call A20KBwait            ; AT "keyboard controlled" A20 enable
        jnz short a20slowdone

        mov al,0d1h
        out 64h,al

        call A20KBwait
        jnz short a20slowdone

        mov al,0dfh
        out 60h,al

        call A20KBwait
a20slowdone:
                        ; wait for A20 to enable
        mov cx,0A00h    ; do A00h tries

a20slowloop:
        call A20test   ; is A20 enabled?
        jz a20done           ; if yes, done

        in al,40h      ; get current tick counter
        jmp short $+2
        jmp short $+2
        jmp short $+2
        in al,40h
        mov ah,al

same_tick:             ; wait a single tick
        in al,40h
        jmp short $+2
        jmp short $+2
        jmp short $+2
        in al,40h
        cmp al,ah
        je same_tick

        loop a20slowloop              ; loop for another try
        call A20test
        jz a20done
        jmp fail_a20

a20done:
        ;--------------------------------
        ; now set _HiMemBase
        ;

        xor dx,dx                       ; ES -> 0 (interrupt vector table)
        mov es,dx                       ;
        les bx,dword ptr es:[4*19h]     ; ES:BX -> int vector table
        ; now es:bx points to int 19h (reset) vector

        mov eax,100000h                 ; initial free extended memory base

        cmp dword ptr es:[bx+12h],'SIDV'; VDISK memory allocation?
        jne short go_check_ext_base ; no, free ext mem starts at first meg
        ; if VDISK present, get base of free ext mem
        ; AFTER vdisk data
        mov eax,es:[bx+2ch] ; get first free byte of extended mem
        add eax,0fh         ; align on paragraph
        and eax,0fffff0h    ; base address is only 24bit

go_check_ext_base:
        dec dx     ; ES -> 0ffffh for ext mem addressing
        mov es,dx  ;
        ; now try other VDISK presence method

        cmp dword ptr es:[13h],'SIDV'   ; VDISK memory allocation?
        jne short vdisk_done            ; if present, get base of free mem
        xor ebx,ebx      ;
        mov bx,es:[2eh]  ; get first free K of extended memory
        shl ebx,10       ; adjust K to bytes

        cmp eax,ebx         ; what's the higher one ?
        ja short vdisk_done

        mov eax,ebx

vdisk_done:
        mov _HiMemBase,eax
        mov edx,_HiMemTop
        sub edx,eax
        shr edx,10
        cmp edx,EXTMIN
        jb  fail_mem

        push ds
        mov ax,code16
        mov ds,ax
        mov dx,offset hardint15
        mov ax,2515h
        int 21h           ; patch int15
        pop ds
        jmp JoinXMS

dummypointer dd 0

hardint15:                         ; real mode INT 15h handler
        cmp ah,88h          ; function 88h?
        je short my15       ;
        push ds
        push eax
        mov ax,code32
        mov ds,ax
        mov eax,ds:[(15h*4)+ offset _OldInt] ; old INT 15h handler
        mov cs:dummypointer,eax
        pop eax
        pop ds
        jmp dword ptr cs:dummypointer
        
my15:
        xor ax,ax  ; zero k free
        iret ; return without modifying the carry flag
             ;  (to be consistent with the documentated bug ;) )


code16  ends

; 32bit code
code32  segment para public use32
        assume cs:code32, ds:code32, ss:code32

; THE "START" OF CODE32 MUST ALWAYS LOOK AS FOLLOWS HERE
; TO LET "EXTERNAL" 386P DRIVERS HAVE FULL ACCESS TO THE 386P API CODE AND DATA
; SO DON'T CHANGE
; HOW THE SUBSEQUENT DECLARATIONS ARE ALIGNED, ORDERED AND DECLARED!!!!!

; N.B. this "fixed table" is useful to access virtual registers
;      and the memory allocation vars. But,please don't abuse of it.

; 32 bit common system data
                align dword
                dd      0               ; scratch dword 
_LoMemBase      dd      0               ; low mem base for allocation
_LoMemTop       dd      0               ; top of low mem

_HiMemBase      dd      0               ; high mem base for allocation
_HiMemTop       dd      0               ; top of high mem

_PSPBase        dd      0               ; LINEAR offset of start of PSP 
_Code16Base     dd      0               ; LINEAR offset of start of 16bit code 
_Code32Base     dd      0               ; LINEAR offset of start of 32bit code

                align byte
                ; (byte alignment, but we know we will have optimal alignment)
; Virtual 8086 Registers
V86edi        label   dword      ; vregs for 386P<-->V86 communication
V86di         dw      0, 0       ; we need this order if we want
V86esi        label   dword      ; to bypass 386POWER and go thru DPMI API
V86si         dw      0, 0       
V86ebp        label   dword      
V86bp         dw      0, 0       
              dd      0          ; this DWORD is a 'fake' esp (see POPAD)
V86ebx        label   dword
V86bx         label   word
V86bl         db      0
V86bh         db      0, 0,0
V86edx        label   dword
V86dx         label   word
V86dl         db      0
V86dh         db      0, 0,0
V86ecx        label   dword
V86cx         label   word
V86cl         db      0
V86ch         db      0, 0,0
V86eax        label   dword
V86ax         label   word
V86al         db      0
V86ah         db      0, 0,0
V86F          dw      0
V86es         dw      0
V86ds         dw      0
V86fs         dw      0
V86gs         dw      0
tempaddr      dd      0 ; dummy CS:IP or EIP to call when irq-ing to/from pmode
tempstack     dd      0 ; dummy SS:SP (DPMI will set up a 30 word stack)
              dw      0 ; plus a word to keep things aligned at 32bit
                
; integrated irq management support
_GetIRQ     dd      s32_getirq       ; get IRQ handler offset routine addr
_SetIRQ     dd      s32_setirq       ; set IRQ handler offset routine addr
_GetIMask   dd      _GetIRQMask
_SetIMask   dd      _SetIRQMask


                align word
_SelCode        dw      08h     ; code32 segment selector
_SelData        dw      10h     ; data segment alias for code32
_SelZero        dw      18h     ; data segment starting at addressing base
                dw      0 ; unused word, to align things

; ptr to termination message
                public _386Return
_386Return      dd offset _386Terminator
                align byte
_CPUPower       db      0             ; cpu type: (NO INFO ABOUT FPU!!!!!!)
                                      ; 0 = 8086
                                      ; 1 = 80186
                                      ; 2 = 286
                                      ; 3 = 386
                                      ; 4 = 486
                                      ; 5 = Pentium

_386Man         db      0             ; 386 manager
                                      ; 0=VCPI, 1=DPMI
                dw      0    ; reserved

; basic dos-extender services
_ExecINT        dd    offset v32_ExecINT    ; executes an ms-dos INT call
_ExecReal       dd    offset v32_ExecReal   ; executes a real-mode FAR routine
        public _ExecINT,_ExecReal
; integrated dma services
_DMAInfo        dd   offset VDMACheck    ; get info on dma resources
_DMAInit        dd   offset VDMAInit     ; initialize DMA services
; hook to virtual dma services or emulate 'em
_DMALock        dd   offset DChanLock      ; lock dma channel
_DMAUnLock      dd   offset DChanUnLock    ; unlock dma channel
_DMASend        dd   offset VDMASend    ; send data thru dma
_DMAReceive     dd   offset VDMAReceive  ; receive data thru dma
_DMAMap         dd   offset DScatLock    ; map&lock scatter/gather memory
_DMAUnMap       dd   offset DScatUnLock  ; unmap&unlock scatter/gather memory
        public _DMAInfo,_DMAInit,_DMALock,_DMAUnLock
        public _DMASend,_DMAReceive,_DMAMap,_DMAUnMap

; "STANDARD"  DRIVER_TO_386POWER INTERFACE TABLE ENDS HERE


; standard termination text
                public _386Terminator                        
_386Terminator  db '386Power 2.00 extended environment',CR,LF,'$'

public  V86eax, V86ebx, V86ecx, V86edx, V86esi, V86edi, V86ebp
public  V86ax, V86bx, V86cx, V86dx, V86si, V86di, V86bp
public  V86al, V86ah, V86bl, V86bh, V86cl, V86ch, V86dl, V86dh
public  V86ds, V86es, V86fs, V86gs
public  _SelCode, _SelData, _SelZero, _LoMemBase, _LoMemTop, _HiMemBase
public  _HiMemTop, _PSPBase, _Code16Base, _Code32Base, _GetIRQ, _SetIRQ
public  _386Man,_CPUPower

extrn   _Main:near

;------------------------------------------------------------------------------
; 16 bit common system data
;------------------------------------------------------------------------------
; debug messages  THESE MUST BE 16bit-accessible
msg_DPMItest  db 'Checking DPMI',CR,LF,'$'
msg_VCPItest  db 'Checking VCPI',CR,LF,'$'
msg_XMStest   db 'Checking XMS',CR,LF,'$'
msg_HARDtest  db 'Support software not detected, trying to run directly',CR,LF
              db 'using native bios and hardware [HARDware mode]',CR,LF,'$'
msg_gints     db 'Saving DOS interrupt table',CR,LF,'$'
msg_pints     db 'Restoring IDT to default values',CR,LF,'$'
msg_rints     db 'Restoring DOS interrupt table',CR,LF,'$'
msg_end       db 'Terminating program ... CIAO! See you later.',CR,LF,'$'
msg_DPMIlar    db 'Checking access rights allowed by the DPMI server',CR,LF,'$'
msg_DPMIbadlar db 'LAR instruction failed, CPL set to default RING 3',CR,LF,'$'
msg_DPMInominfo db 'DPMI does not respond to memory info request',CR,LF,'$'
msg_DPMIhiget   db 'Checking if extended memory has to be allocated',CR,LF,'$'
msg_allocmem  db 'Allocating all available memory, please wait ...',CR,LF,'$'
msg_lockmem   db 'Locking extended memory ...',CR,LF,'$'
msg_badlock   db 'FAILED LOCKING EXTENDED MEMORY!',CR,LF,'$'
msg_DPMIwin   db 'Extended memory allocated',CR,LF,'$'
msg_DPMIirq   db 'Setting new DPMI irq handlers',CR,LF,'$'
msg_VCPIfree  db 'Freeing pages allocated thru VCPI',CR,LF,'$'
msg_DPMIifail db 'DPMI prot. mode irq allocation failure',CR,LF,'$'
msg_DPMIgood  db 'DPMI initialization completed',CR,LF,'$'
msg_VCPIkick  db 'Ready to enter VCPI protected mode',CR,LF,'$'
msg_XMSfound  db 'Detected XMS interface',CR,LF,'$'
msg_XMSjoin   db 'XMS/HARD common initialization ...',CR,LF,'$'
msg_HARDkick  db 'Ready to enter HARD protected mode',CR,LF,'$'
msg_DPMIcode32 db 'Setting 32bit code   selector',CR,LF,'$'
msg_DPMIdata32 db 'Setting 32bit data   selector',CR,LF,'$'
msg_DPMIzero32 db 'Setting 32bit linear selector',CR,LF,'$'
msg_unlock      db 'Unlocking extended memory',CR,LF,'$'
msg_releasemem  db 'Freeing extended memory',CR,LF,'$'
msg_DPMIrcode32 db 'Releasing 32bit code   selector',CR,LF,'$'
msg_DPMIrdata32 db 'Releasing 32bit data   selector',CR,LF,'$'
msg_DPMIrzero32 db 'Releasing 32bit linear selector',CR,LF,'$'
msg_DPMIfinal   db 'Final DPMI call for total shutdown',CR,LF,'$'

msg_DPMI_SR_FAIL db 'DPMI ERROR: SAVE/RESTORE STATE FAILURE',CR,LF,'$'
msg_DPMI_MS_FAIL db 'DPMI ERROR: MODE SWITCH FAILURE',CR,LF,'$'
msg_DPMI_IV_FAIL db 'DPMI ERROR: INTERRUPT CONTROLLER ACCESS FAILURE',CR,LF,'$'
msg_DPMI_GI_FAIL db 'DPMI ERROR: GET IRQ FAILURE',CR,LF,'$'
msg_DPMI_SI_FAIL db 'DPMI ERROR: SET IRQ FAILURE',CR,LF,'$'

msg_VCPIpic  db 'Looking for P.I.C. vector mapping',CR,LF,'$'
msg_idt  db 'Allocating IDT (will be set while into prot. mode)',CR,LF,'$'
msg_tss  db 'Allocating and setting TSS ',CR,LF,'$'
msg_paging  db 'Allocating and setting page tables ',CR,LF,'$'
msg_VCPIhookserver  db 'Interfacing with VCPI server',CR,LF,'$'
msg_VCPIaddpages    db 'Adding extended memory to page table',CR,LF,'$'
msg_VCPIpagedir     db 'Initializing page directory',CR,LF,'$'
msg_going_main db ' ENTERED FULL 32BIT MODE, ready for main program start',CR,LF,'$'

        

; hex ->ascii translation table
emT_hextable db '0123456789ABCDEF'

; standard message             
emX_stderr   db '[ See file 386ERROR.TXT for more info ]',7,CR,LF,'$'

; error strings %%%
em0_86  db 'Error 00: 386 COMPATIBLE PROCESSOR NOT DETECTED!',CR,LF
        db 'Found an 8086 cpu',CR,LF,'$'
em0_286 db 'Error 00: 386 COMPATIBLE PROCESSOR NOT DETECTED!',CR,LF
        db 'Found a 80286 cpu',CR,LF,'$'
em1_no_lomem   db 'Error 01: NOT ENOUGH MEMORY UNDER 640KB!',CR,LF,'$'
emINI_no_lomem db 'Error 01: NOT ENOUGH MEMORY UNDER 640KB!',CR,LF
               db '  (While allocating internal structures)',CR,LF,CR,LF,'$'

em2_unkV86 db CR,LF,'Error 02: UNKNOWN MEMORY MANAGER DRIVES V86 MODE!',CR,LF
           db CR,LF,'          DPMI or VCPI server NOT detected.',CR,LF
           db CR,LF,'          Detected an unknown V86 memory manager '
           db CR,LF,'          maybe you are running old system software.'
           db CR,LF,CR,LF,'$'

em3_no_himem db 'Error 03: NOT ENOUGHT MEMORY ABOVE 1MB!',CR,LF,'$'
em4_noA20    db 'Error 04: A20 ADDRESS LINE NOT ENABLED!',CR,LF,'$'
em5_hifault  db 'Error 05: FAILED ALLOCATION OF MEMORY ABOVE 1MB!',CR,LF,'$'
em6_PICfault db 'Error 06: VCPI P.I.C. INT MAPPED ON 386POWER INTS',CR,LF,'$'
em7_noDPMI32 db 'Error 07: NOT A 32 BIT DPMI HOST!',CR,LF,'$'
em8_DPMIdesc db 'Error 08: NOT ENOUGH DPMI DESCRIPTORS!',CR,LF,'$'
em9_DPMImod  db 'Error 09: CANNOT MODIFY DPMI DESCRIPTORS!',CR,LF,'$'
emA_FIX      db 'Error 0A: VCPI HAS NOT ENOUGH PAGES AVAILABLE!',CR,LF,'$'
emB_lowDPMI  db 'Error 0B: DPMI API IS OLDER THAN DPMI 0.9',CR,LF,'$'
emC_linear   db 'Error 0C: LINEAR MEMORY SPACE EXAUSTED',CR,LF,'$'
emD_phys     db 'Error 0D: PHYSICAL MEMORY SPACE EXAUSTED',CR,LF,'$'
emE_backing  db 'Error 0E: BACKING STORAGE EXAUSTED',CR,LF,'$'
emF_handle   db 'Error 0F: INVALID HANDLE',CR,LF,'$'
em10_invalid db 'Error 10: INVALID PARAMETER VALUE',CR,LF,'$'
em11_EqPIC   db 'Error 11: P.I.C. MAP ON SAME INTERRUPT SLOT',CR,LF,'$'

_386power_info db CR,LF
  db 'Ŀ',CR,LF
  db '  386Power Dos-Extender     386P Revision 2.000             ',CR,LF
  db '                                                            ',CR,LF
  db '     This is a public domain dos-extender module            ',CR,LF  
  db '     designed for a VCPI/DPMI interface to protected mode.  ',CR,LF  
  db '     You need a VCPI or DPMI manager to run this.           ',CR,LF
  db '                                                            ',CR,LF    
  db '     (c) Copyright Lorenzo Micheletto MCHLNZ67T19C890A      ',CR,LF
  db '     All rights reserved, except the portions of 386Power   ',CR,LF
  db '     based on the source code of the PMODE dos-extender     ',CR,LF  
  db '     by Thomas "Tran" Pytel.                                ',CR,LF  
  db '     See the 386Power documentation for explanations.       ',CR,LF  
  db '                                                            ',CR,LF      
  db '',CR,LF  
  db CR,LF,'$'

wow_386 db CR,LF,'*******  386DX OR 386SX   PROCESSOR DETECTED  *******',CR,LF,CR,LF,'$'
wow_486 db CR,LF,'*******  486DX OR 486SX   PROCESSOR DETECTED  *******',CR,LF,CR,LF,'$'
wow_586 db CR,LF,'*******  PENTIUM OR 586   PROCESSOR DETECTED  *******',CR,LF,CR,LF,'$'
wow_cpu db CR,LF,'*******  686 OR SUPERIOR  PROCESSOR DETECTED  *******',CR,LF,CR,LF,'$'

msg_cputype  dd offset em0_86,   offset em0_86, offset em0_286
             dd offset wow_386, offset wow_486, offset wow_586
             dd offset wow_cpu, offset wow_cpu, offset wow_cpu
             dd offset wow_cpu, offset wow_cpu, offset wow_cpu
             dd offset wow_cpu, offset wow_cpu, offset wow_cpu
             dd offset wow_cpu
nullint      db   0cfh            ; IRET instruction
exitrout     dw   exit            ; exit routine, modified if VCPI

;=============================================================================
; 16 bit DPMI system data

                  align dword
d16_Fast16_To_32          dd      0  ; switch from 16bit to 32bit
d16_SaveRestoreState      dd      0  ; save/restore state addr

hidpmi_handle dd 0 ; dpmi memory manager handle of allocate high memory block
hidpmi_size   dd 0 ; size and linear address base of high memory
hidpmi_base   dd 0 ; locked by dpmi
                  align word
d16_EnterDPMI     dw      0,0 ; DPMI switch to protected mode (16bit)

d16_PSPSel        dw      0   ; PSP selector (use it in prot. mode)
d16_DosEnvSegSel  dw      0   ; ENVIRONMENT selector

            align dword
d16_nintoff dd offset d32_irq0,offset d32_irq1,offset d32_irq2
            dd offset d32_irq3,offset d32_irq4,offset d32_irq5
            dd offset d32_irq6,offset d32_irq7
            dd offset d32_irq8,offset d32_irq9,offset d32_irqa
            dd offset d32_irqb,offset d32_irqc,offset d32_irqd
            dd offset d32_irqe,offset d32_irqf

            
;-----------------------------------------------------------------------------
; 16bit VCPI system data

; VCPI EMS DATA

; VCPI PAGE DIRECTORY DATA
                  align word
v16_PageDirSeg    dw      0             ; seg of page directory
v16_PageBase      dw      0             ; first page of himem (*4)+1000h
v16_PageTop       dw      0             ; top page of himem (*4)+1000h
                  dw      0A0Bh
;------------------------------------------------------------------------------
                  align word
                  ; VCPI mode-switch data
                  ; after initialization this table may be copied
                  ; by the IRQ redirectors
v16_sw_cr3        dd      0                    ; new CR3 for 386P (physical)
v16_sw_gdtaddrptr dd      offset s16_GDTaddr   ; lin. ptr to GDT data for 386P
v16_sw_idtaddrptr dd      offset s16_IDTaddr ; lin. ptr to IDT data for 386P
v16_sw_ldtsel     dw      0      ; don't need LDTs, zero selector
v16_sw_trsel      dw      30h    ; task state segment (TSS) selector
v16_sw_dest       dd      0      ; switch destination EIP
                  dw      20h    ; switch destination CS (code16)
                  dw      0

v16_VCPIsys       dd offset v16_sw_cr3

;----------------------------------------------------------------------------
; 16 bit VCPI/XMS/HARD mode system data

                         align word

switchstackaddr          dd  0  ; SS:SP to pass during real mode switches

s16_Int_to_replace_ctr   dw  0  ; number of int vects needed -1

; switchpoint to return to protected mode after int33, int32 from prot. mode.
s16_irToSYS32   dw offset irToVCPI32

s16_SavStackOfs dd 0   ; current saved stack offset  during real mode irq rfx
s16_SavStackSeg dw 0,0 ; current saved stack segment during real mode irq rfx

s16_IDTaddr     dw 0      ; limit must be set by VCPI/XMS/HARD init code
bases16_IDTaddr dd 0      ; 32bit IDT address

s16_GDTaddr       dw 04fh        ; limit for selectors 00h .. 48h
bases16_GDTaddr   dd offset GDT  ; 32bit GDT address

s16_IDT86       dw 3FFh   ; REAL MODE IDT (set to INT table)
                dd 0      ;

                  align dword
s16_idt_default   dd   offset s32_irq0,offset s32_irq1,offset s32_irq2
                  dd   offset s32_irq3,offset s32_irq4,offset s32_irq5
                  dd   offset s32_irq6,offset s32_irq7,offset s32_irq8
                  dd   offset s32_irq9,offset s32_irqa,offset s32_irqb
                  dd   offset s32_irqc,offset s32_irqd,offset s32_irqe
                  dd   offset s32_irqf
                  dd   offset s32_int31
v16_idt_default   dd   offset v32_irq0,offset v32_irq1,offset v32_irq2
                  dd   offset v32_irq3,offset v32_irq4,offset v32_irq5
                  dd   offset v32_irq6,offset v32_irq7,offset v32_irq8
                  dd   offset v32_irq9,offset v32_irqa,offset v32_irqb
                  dd   offset v32_irqc,offset v32_irqd,offset v32_irqe
                  dd   offset v32_irqf
                  dd   offset s32_int31
                  ; int 31h is the same for vcpi and xms/hard

                align dword
                dd 0   ;scratch dword
                align byte
; Global Descriptors Table
GDT           dq      0                 ; 00h null selector, first into GDT
GDTcode32     dw      0ffffh
baseGDTcode32 db      0,0,0,9ah,0cfh,0  ; 08h code32 as 32bit code seg sel
GDTdata32     dw      0ffffh
baseGDTdata32 db      0,0,0,92h,0cfh,0  ; 10h code32 as 32bit data seg sel
GDTzero32     dw      0ffffh
              db      0,0,0,92h,0cfh,0  ; 18h 32bit LINEAR SPACE data seg sel
GDTcode16     dw      0ffffh
baseGDTcode16 db      0,0,0,9ah,0,0     ; 20h code16 as 16bit code seg sel
GDTdata16     dw      0ffffh
baseGDTdata16 db      0,0,0,92h,0,0     ; 28h code16 as 16bit data seg sel
GDTtask       dw      0067h ; TSS limit without i/o bitmap
baseGDTtask   db      0,0,0,89h,0,0     ; 30h TSS code seg sel
GDTvcpi       dq      3 dup(0)          ; 38h,40h,48h
     ;  il selettore 38h e' quello usato dal codice di inizializzazione VCPI
     ;  il selettore 40h e 48h e' per uso "interno" da parte di VCPI

        ; table containing interrupt vectors as found at 386P initialization          
        align dword
_OldInt dd 256 dup(0)
        public _OldInt
;------------------------------------------------------------------------------
; continue with 32bit data
;------------------------------------------------------------------------------

public  _Exit, _GetMem, _GetLoMem, _GetHiMem
public  _GetIRQMask, _SetIRQMask
              
                align byte
; these are the 386power API & exception handler vector numbers
; they must be consistent with the vectors into the XXX_idt_default tables
intslotnum      db      08h,09h,0Ah,0Bh,0Ch,0Dh,0Eh,0Fh
                db      70h,71h,72h,73h,74h,75h,76h,77h
                        ; 16 int vectors for the two PICs
                        ; (VCPI and DPMI servers will rewrite 'em)
                db      31h       ; the extra int to add to VCPI

                align word
                
_EXIT_ADDR      dd      offset v32_retreal ; offset of exit function
_EXIT_SEL       dw      08h,0              ; offset of exit function
                                           ; (defaults are VCPI values)

_EXIT_DATA_SEL  dw      10h          ; 32bit data code32 selector
_EXIT_SS        dw      0            ; set by DPMI code
_EXIT_ESP       dd      (STACKSIZE-(STACKUSER/2))*16
                                     ; if we are gonna terminate
                                     ; we can trash as much we want
                                     ; but it is better to avoid problems

OldIRQMask      dw      0            ; old port 21h and 0a1h masks
                 align dword
                 
OldBreakISR      dd      0               ; old int 1Bh  (ctrl+break)

nextmodestack    dd      (STACKSIZE-STACKUSER)*16 ; stack for next mode switch                 
Stack32BaseOffset dd      0    ; linear ptr to beginning of codeend
                              ; stack segment
                      align byte
d32_Fast32_To_16      df      0               ; switch from 32 to 16
d32_SaveRestoreState  df      0               ; save/restore state addr
d32_StateBufferSize   dd      0               ; length of state buffer

                      ; 16 IRQ ints
d32_OldInts           dq      16 dup(0)       ; saved interrupt addr buffer
                                              ; in dword aligned form
;----------------------------------------------------------------------------                  
; 32 bit VCPI system data

v32_vcpientryaddr df      3800000000h    ; VCPI entry point in 386P
                  ; n.b. this "in bytes" is stored as 00,00,00,00,38h,00

;----------------------------------------------------------------------------
; XMS data

xms_entry  dd 0
xms_handle dw 0

;----------------------------------------------------------------------------
; 32bit HARD/XMS system data

s32_switchentryaddr dd large offset HARD_P2R  ;  fword ptr to hard/xms
                    dw 20h                    ;  switchpoint

;----------------------------------------------------------------------------
; 32 bit custom system data

                 align dword

s32_idt32ptr     dd      0               ; ptr to 32bit IDT

s32_SavStackOfs  dd      0               ; current saved stack offset
s32_SavStackSel  dw      0,0             ; current saved stack selector

; interrupt table needed for irq reflection from real mode
; this stores 32bit far pointers (fword) aligned into qword boundaries
; and because it is used only by VCPI/XMS/HARD, segment selector is 08h
s32_irqtable      dd   offset s32_irq0,8,offset s32_irq1,8,offset s32_irq2,8
                  dd   offset s32_irq3,8,offset s32_irq4,8,offset s32_irq5,8
                  dd   offset s32_irq6,8,offset s32_irq7,8,offset s32_irq8,8
                  dd   offset s32_irq9,8,offset s32_irqa,8,offset s32_irqb,8
                  dd   offset s32_irqc,8,offset s32_irqd,8,offset s32_irqe,8
                  dd   offset s32_irqf,8
                  dd   0,0 ,0,0 ,0,0    ; DUMMY ENTRIES TO AVOID
                                       ; OVERWRITES WHILE SETTING THE irqtable
                                       ; IN THE SET_IDT LOOP

        ;  pointer to the 32bit entry point for program start
Kick32  dd offset INIT_386P,0 ; higher word will be set with
                              ; the 32bit code selector at runtime
        align byte            
INIT_386P: ; common 32bit startup

        ; disable ctrl+break
        mov eax,gs:[1bh*4]      ; SAVE ctrl+break value, then crtl+break OFF
        mov OldBreakISR,eax     ;

        ;db 65h,67h,0c7h,6       ; MOV DWORD PTR GS:[1bh*4],code32:nullint
        ;dw 1bh*4,nullint,code32 ;
        ; this is the equivalent code in a "wider" form
        mov word ptr GS:[(1Bh)*4],offset nullint
        mov word ptr GS:[(1Bh*4)+2],code32
        
        in al,21h                       ; save old PIC masks
        mov ah,al                       ;
        jc ukka
ukka:   jnc akka
akka:   jc ekka
ekka:   jnc okka
okka:
        in al,0a1h                      ;
        mov OldIRQMask,ax               ;

        mov V86dx, small offset msg_going_main
        mov V86ds,code32
        mov V86ah,09
        mov al,21h
        call _ExecINT

        jmp _Main                       ; go to main code

; protected mode position where HARD_R2P goes
; before going to the final prot. mode destination
HARDPROT:
        cld        ; 386P default increment direction
        add esp,Stack32BaseOffset       ; set prot mode esp
        push esi                        ; store protected mode target CS
        push edi                        ; store protected mode target EIP
        retf                            ; go to protected mode destination



; 386POWER API routines (very similar to pmode routines)

; Allocate memory (first try low, then high)
; In:
;   EAX = size requested
; Out:
;   CF CLEAR = memory allocated
;   CF SET   = not enough mem
;   EAX = linear pointer to mem or ?
_GetMem:
        push eax
        call _GetLoMem
        jnc short getmemd
        pop eax
        jmp short _GetHiMem
getmemd:
        add esp,4
        ret
        
; Allocate some low mem
; In:
;   EAX = size requested
; Out:
;   CF CLEAR = memory allocated
;   CF SET   = not enough mem
;   EAX = linear pointer to mem or ?
_GetLoMem:
        add eax,_LoMemBase
        cmp eax,_LoMemTop
        jnbe short getmemerr
        xchg eax,_LoMemBase
        clc
        ret
getmemerr:
        stc
        ret
        
; Allocate some high mem
; In:
;   EAX = size requested
; Out:
;   CF CLEAR = memory allocated
;   CF SET   = not enough mem
;   EAX = linear pointer to mem or ?
_GetHiMem:
        add eax,_HiMemBase
        cmp eax,_HiMemTop
        jnbe short getmemerr
        xchg eax,_HiMemBase
        clc
        ret
        
; Get status of IRQ mask bit
; In:
;   BL = IRQ num (0-15)
; Out:
;   AL = status: 0=enabled, 1=disabled
_GetIRQMask:
        push ax
        in al,0a1h ; get IRQ mask
        mov ah,al  ;
        in al,21h  ;
        xchg cl,bl ;
        shr ax,cl    ; shift bit to bit0
        xchg cl,bl
        and al,1     ; mask out other bits
        pop ax
        ret
        
; Set status of IRQ mask bit
; In:
;   BL = IRQ num (0-15)
;   AL = status: 0=enabled, 1=disabled
_SetIRQMask:
        push ax bx cx dx
        mov cl,bl
        mov bx,0fffeh
        movzx dx,al
        rol bx,cl
        shl dx,cl
        in al,0a1h
        mov ah,al
        in al,21h
        and ax,bx
        or ax,dx
        out 21h,al
        mov al,ah
        out 0a1h,al
        pop dx cx bx ax
        ret
        
;------------------------------------------------------------------------------        
; Exit to real mode

MAXX_EX=32

excount db 0

        align dword
ex_list dd MAXX_EX dup(0)
        align byte

xtooex  db '386Power: _OnExit list full, set an higher MAXX_EX',CR,LF,'$'
        align byte
        public _OnExit
_OnExit:
        ; add routine pointed by EAX to the exit-list
        ; (routines to call on exit)
        push ebx
        movzx ebx,excount
        cmp bl,MAXX_EX ; max. exit extensions allowed
        jnb tooex
        mov [ebx*4+ex_list],eax
        inc excount
        pop ebx
        ret
tooex:
        mov _386Return, offset xtooex
        ; .....continue into the exit routine

_Exit:  ; 32bit side of shutdown code
        cli
        cld
        ; set up a trusted register configuration
        mov ax,cs:_SelData
        mov ds,ax
        mov es,ax
        mov fs,ax
        mov ss,ax              ; termination, force stack
        mov esp,(STACKSIZE)*16 ; into a "safe" zone
        add esp,Stack32BaseOffset
        mov nextmodestack,(STACKSIZE-STACKUSER)*16 ;"reset" next stack frame
        mov gs,_SelZero
        ; execute custom shutdown code first
        movzx ebx,excount
        jmp short countcheck
exithem:
        dec ebx
        push ebx  ; save general register to avoid problems
        call [ebx*4+ex_list]
        cli
        pop ebx
countcheck:
        or ebx,ebx
        jne exithem
        ; now execute a generic exit
        mov ecx,_386Return  ;  Write exit message
        mov V86ah,9         ;
        add ecx,_Code32Base ;
        mov eax,ecx         ;
        shr ecx,4           ;
        and eax,0Fh         ;
        mov V86ds,cx        ;
        mov V86dx,ax        ;
        mov al,21h          ;
        call _ExecINT             ;

        mov eax,OldBreakISR              ; restore ctrl+break
        mov gs:[1bh*4],eax

        mov ax,OldIRQMask                ; restore PIC masks
        out 0a1h,al
        jc delay1  ; delay a little
delay1: jnc delay2 ;
delay2:            ;
        mov al,ah
        out 21h,al

        ; back to 16bit code16
        mov ds,_EXIT_DATA_SEL    ; this must be an alias (even a 16bit one)
                                 ; for code32
        jmp fword ptr _EXIT_ADDR
            
;------------------------------------------------------------------------------
; 32 bit DPMI system code
;------------------------------------------------------------------------------
d32_ExecReal:        ; call real mode far proc: CX:DX=seg:off
        pushad
        pushfd
        shl ecx,16  ;  store seg:ofs into ECX
        mov cx,dx   ;
        
        mov ebp,offset callreal   ; redirect to CALL REAL MODE ROUTINE
        
        ; now join common int code
        jmp short d32_16common
        
d32_ExecINT: ; call real mode INT: AL=int num
        pushad
        pushfd
        ; AND HERE WE GO TO VIRTUAL 8086 MODE
        ; PUTTING INTO tempaddr the address of the interrupt vector to call
        ; from the _OldInt vector table
        and eax,0FFh
        mov ebp,offset intreal  ; redirect to INT REAL MODE ROUTINE
        mov ecx,[eax*4+_OldInt] ; get int handler into ecx
        ; join common int code

d32_16common:  ; int or call to real mode
        mov ax,0900h ; DPMI get state of Interrupt Flag and DISABLE IT
        int 31h
        ; AL = Interrupt Flag status
        push eax ; save Int Flag status

        mov tempaddr,ecx ; address to jump at in seg:ofs format
        
        push s32_SavStackOfs   ; save current SAVED stack
        push s32_SavStackSel   ;
        
        mov ebx,nextmodestack      ; allocate a new stack frame

        mov ax,ss ;

        sub nextmodestack,(STACKSWTR*16)      ; set new next stack

        mov es,ax ; ES == save state segment
        
        sub esp,d32_StateBufferSize ; allocate DPMI save state buffer
        mov edi,esp                 ;
        
        ; es:edi = pointer to save state buffer
        xor al,al                   ; AL=0 -> SAVE TASK STATE INFO
        call d32_SaveRestoreState   ; save DPMI info about this stack
        
        mov s32_SavStackOfs,esp  ; save stack
        mov s32_SavStackSel,ss   ;
        
        mov cx,V86es     ; real mode ES
        mov dx,codeend   ; real mode SS
        mov ax,code32    ; real mode DS
        
        mov edi,ebp     ; real mode EIP
        mov esi,code16   ; real mode CS
        jmp d32_Fast32_To_16
        
        ; DPMI RETURN FROM V86
d32_16done:
        mov edi,esp  ; current stack pos
        ; ES:EDI == pointer to save state buffer
        mov al,1                   ; AL=1 -> RESTORE TASK STATE INFO
        call d32_SaveRestoreState  ; get last DPMI info saved 
        
        add esp,d32_StateBufferSize ; remove DPMI INFO block
        
        pop s32_SavStackSel   ; Restore previous saved stack
        pop s32_SavStackOfs   ;
        
        add nextmodestack,STACKSWTR*16 ; restore space used for previous 
                                       ; stack frame
        
        pop eax    ; RESTORE Int Flag found on entry to mode-switch routine
        mov ah,9  ;
        int 31h   ; restore previous Int Flag status
        
        mov ax,ds        ; restore selectors
        mov es,ax        ;
        mov fs,ax        ;
        mov gs,_SelZero  ;

        popfd
        mov ah,byte ptr V86F
        sahf
        popad   ; restore registers
        ret
        
;------------------------------------------------------------------------------        
; DPMI IRQ redirectors (needed to make all IRQ vector selectors = CS)
         align dword
d32_irq0:        jmp fword ptr cs:d32_OldInts
d32_irq1:        jmp fword ptr cs:(8   + offset d32_OldInts)
d32_irq2:        jmp fword ptr cs:(16  + offset d32_OldInts)
d32_irq3:        jmp fword ptr cs:(24  + offset d32_OldInts)
d32_irq4:        jmp fword ptr cs:(32  + offset d32_OldInts)
d32_irq5:        jmp fword ptr cs:(40  + offset d32_OldInts)
d32_irq6:        jmp fword ptr cs:(48  + offset d32_OldInts)
d32_irq7:        jmp fword ptr cs:(56  + offset d32_OldInts)
d32_irq8:        jmp fword ptr cs:(64  + offset d32_OldInts)
d32_irq9:        jmp fword ptr cs:(72  + offset d32_OldInts)
d32_irqa:        jmp fword ptr cs:(80  + offset d32_OldInts)
d32_irqb:        jmp fword ptr cs:(88  + offset d32_OldInts)
d32_irqc:        jmp fword ptr cs:(96  + offset d32_OldInts)
d32_irqd:        jmp fword ptr cs:(104 + offset d32_OldInts)
d32_irqe:        jmp fword ptr cs:(112 + offset d32_OldInts)
d32_irqf:        jmp fword ptr cs:(120 + offset d32_OldInts)

;------------------------------------------------------------------------------
; DPMI IRQ REDIRECTORS
;------------------------------------------------------------------------------
        align byte
; DPMI get IRQ handler offset
; In:
;   BL - IRQ num (0-0fh)
; Out:
;   EDX - offset of IRQ handler

d32_getirq:
        push ebx
        push eax
        push ecx
        and ebx,0Fh
        mov eax,0204h ;upper word is resetted
        mov bl,[ebx+intslotnum]
        int 31h
        pop ecx
        pop eax
        pop ebx
        ret

; DPMI set IRQ handler offset
; In:
;   BL - IRQ num (0-0fh)
;   EDX - offset of IRQ handler

d32_setirq:
        pushad
        and ebx,0Fh
        mov cx,cs
        mov ax,0205h
        mov bl,[ebx+intslotnum]
        int 31h
        popad
        ret
        
;---------------------------------------------------------------
; SIMULATED INT 31h INTERRUPT FLAG FUNCTIONS UNDER VCPI,XMS,HARD
;---------------------------------------------------------------

s32_int31: ; INT 31h: AX=900h,901h,902h
        cmp al,1
        mov al,[esp+9]
        jb short s32_int31f0
        ja short s32_int31f1
        or byte ptr [esp+9],2 ; set I.F.
        shr al,1  ;
        and al,1  ; return I.F. status BEFORE int call
        iretd
s32_int31f0:
        and byte ptr [esp+9],0fdh ; clear I.F.
s32_int31f1:
        shr al,1  ;
        and al,1  ; return I.F. status BEFORE int call
        iretd


;-----------------------------------------------------------------------------
; VCPI STUFF
;-----------------------------------------------------------------------------
        
; 386POWER API ints
v32_ExecReal:              ; real mode call far : CX:DX=seg:off
        pushad
        pushfd
        cli
        shl ecx,16
        mov cx,dx
        mov ebp,offset callreal
        jmp short v32_16common
        
v32_ExecINT:                 ; real mode INT : AL=int num
        pushad
        pushfd
        cli
        and eax,0FFh
        mov ebp,offset intreal
        mov ecx,[eax*4+_OldInt]

v32_16common: ; int or call to real mode
        mov esi,nextmodestack     ; new stack frame
        sub nextmodestack,(STACKSWTR*16)
        mov edi,code32
        mov tempaddr,ecx
        xor eax,eax
        push s32_SavStackOfs       ;
        mov s32_SavStackOfs,esp    ; save stack
        mov ebx,codeend ; SS value
        mov ax,V86es   ; ES value
        mov edx,code16
        push edi     ;  FS
        push edi     ;  GS
        push edi     ;  DS (will be switched to V86ds into real mode side)
        push eax     ;  ES
        push ebx     ;  SS
        push esi     ;  ESP
        sub esp,4   ; make space for eflags
        mov ebx,18h ; selzero
        mov eax,0de0ch  ; vcpi switch code
        push edx                ; CS
        push ebp                ; EIP = intreal xor callreal
        ; VCPI switch to real mode routine
        mov ds,bx
        call cs:v32_vcpientryaddr  ; n.b. this is a FAR call
        
v32_exec_d:  ; done with INT32/33 from V86 mode
        mov ebx,18h
        mov eax,10h
        mov gs,bx
        mov ds,ax
        mov es,ax
        mov fs,ax
        mov ss,ax
        mov esp,s32_SavStackOfs
        pop s32_SavStackOfs
        add nextmodestack,STACKSWTR*16
        popfd
        mov ah,byte ptr V86F
        sahf
        popad
        ret
        
;------------------------------------------------------------------------        
; exceptions handlers: some are terminal, others are redirected
; to the irq handler.
        align byte
v32_exc: ; generic exception handler
        jmp _Exit ; TERMINATE PROGRAM (or at least try to)

;-----------------------------------------------------------------------------
; IRQ redirector between modes
; this is the prot. mode to real mode switch-code

        align byte
v32_irq0:
        push ecx
        mov ecx,8
        jmp short v32_irq
v32_irq1:
        push ecx
        mov ecx,9
        jmp short v32_irq
v32_irq2:
        push ecx
        mov ecx,0ah
        jmp short v32_irq
v32_irq3:
        push ecx
        mov ecx,0bh
        jmp short v32_irq
v32_irq4:
        push ecx
        mov ecx,0ch
        jmp short v32_irq
v32_irq5:
        push ecx
        mov ecx,0dh
        jmp short v32_irq
v32_irq6:
        push ecx
        mov ecx,0eh
        jmp short v32_irq
v32_irq7:
        push ecx
        mov ecx,0fh
        jmp short v32_irq
v32_irq8:
        push ecx
        mov ecx,70h
        jmp short v32_irq
v32_irq9:
        push ecx
        mov ecx,71h
        jmp short v32_irq
v32_irqa:
        push ecx
        mov ecx,72h
        jmp short v32_irq
v32_irqb:
        push ecx
        mov ecx,73h
        jmp short v32_irq
v32_irqc:
        push ecx
        mov ecx,74h
        jmp short v32_irq
v32_irqd:
        push ecx
        mov ecx,75h
        jmp short v32_irq
v32_irqe:
        push ecx
        mov ecx,76h
        jmp short v32_irq
v32_irqf:
        push ecx
        mov ecx,77h
;-----------------------------------------------------------------------------
; generic IRQ handler, ecx =real mode INT to call
;
v32_irq:
         
        push ds 
        push es 
        push fs 
        push gs
        pushad
        mov eax,10h ; _SelData
        mov ds,ax
        ; ecx has been already set with the int number to call
        mov ebp,[ecx*4+_OldInt]  ; set address of interrupt routine

        mov esi,nextmodestack                 ; new stack frame
        sub nextmodestack,(STACKSWTR*16)      ;
        mov eax,code32
        push s32_SavStackOfs       ; save stack
        mov s32_SavStackOfs,esp    ;
        push eax           ; GS
        push eax           ; FS
        mov  ebx,codeend
        push eax           ; DS
        push eax           ; ES
        push ebx           ; SS
        mov eax,code16
        push esi           ; ESP
        sub esp,4 ;make space for eflags
        mov edx,18h     ; set linear selector into edx
        push eax  ;cs
        db 68h                        ; 32bit PUSH IMMEDIATE offset v16_irqreal
        dd large offset v16_irqreal   ;
        ; VCPI switch to real mode routine
        mov eax,0de0ch
        mov ds,dx
        call cs:v32_vcpientryaddr  ; n.b. this is a FAR call
        
v32_irqret: ; now we are back from real mode handler
        mov eax,10h ; _SelData
        mov ds,ax
        mov ss,ax
        mov esp,s32_SavStackOfs   ; restore saved stack
        pop s32_SavStackOfs       ;
        
        add nextmodestack,STACKSWTR*16
        
        popad
        pop gs 
        pop fs 
        pop es 
        pop ds
        pop ecx
        iretd

;------------------------------------------
; vcpi irq from real mode to protected mode
; "foot" into prot. mode
;------------------------------------------
        align byte
v32_virqserver:
         ; IRQ reflection: real/v86 --> prot. mode
         ; this is an IRQ with a prot. mode handler
         ; happened while in real mode.
         ;
        mov eax,10h ; _SelData
        mov ds,ax

        mov ss,ax                             ;
        mov esp,nextmodestack                 ; reload stack pointer
        sub nextmodestack,(STACKSWTP*16)      ; adjust it to 32bit
        add esp,Stack32BaseOffset             ; and move "next stack" pointer

        ; ecx has been already set with the offset into the irq table
        ; of the handler to call

        pushfd                            ;
        call fword ptr [ecx+s32_irqtable] ; execute the p. mode irq

        
        mov  esi,s16_SavStackOfs ; get back real mode stack

        mov eax,code32
        mov  ebx,dword ptr s16_SavStackSeg
                        ;get back real mode segment (high word is zero)
        push eax           ; GS
        push eax           ; FS
        push eax           ; DS
        push eax           ; ES
        push ebx           ; SS
        push esi           ; ESP
        sub esp,4 ;make space for eflags
        mov edx,18h     ; set linear selector into edx
        db 68h      ; 32bit PUSH IMMEDIATE code16
        dd code16   ;
        db 68h                       ; 32bit PUSH IMMEDIATE offset v16_virqdone
        dd large offset v16_virqdone ;
        ; VCPI switch to real mode routine
        mov eax,0de0ch
        mov ds,dx
        call cs:v32_vcpientryaddr  ; n.b. this is a FAR call

;-----------------------------------------------------------------------------
; HARD/XMS STUFF
;-----------------------------------------------------------------------------
                  
; 386POWER API ints
        align byte
s32_ExecReal:                  ; real mode FAR proc: CX:DX=seg:off
        pushad
        pushfd
        cli
        shl ecx,16
        mov cx,dx
        mov edi,offset callreal
        jmp short s32_16common
        
s32_ExecINT:                   ; real mode INT: AL=int num
        pushad
        pushfd
        cli
        and eax,0FFh
        mov edi,offset intreal
        mov ecx,[eax*4+_OldInt]

s32_16common: ; int or call to real mode
        push s32_SavStackOfs       ;
        mov s32_SavStackOfs,esp    ; save stack
        mov esp,nextmodestack     ; new stack frame
        sub nextmodestack,(STACKSWTR*16)     ; ss:esp to new stack frame
        add esp,Stack32BaseOffset           ;
        ; (will be converted to 16bit by mode switch code)
        ; ECX = address/vector to call
        mov esi,code16    ; set "switcher" address
                          ; edi  already set to destination offset
        call fword ptr s32_switchentryaddr

s32_exec_d:  ; done with INT32/33 from V86 mode
        mov eax,10h               ; restore stack
        mov ss,ax                 ;
        mov esp,s32_SavStackOfs   ;
        pop s32_SavStackOfs       ;

        add nextmodestack,STACKSWTR*16
        popfd
        mov ah,byte ptr V86F
        sahf
        popad
        ret
        
;------------------------------------------------------------------------        
; exceptions handlers: some are terminal, others are redirected
; to the irq handler.
        align byte
s32_exc: ; generic exception handler
        jmp _Exit ; TERMINATE PROGRAM (or at least try to)

;-----------------------------------------------------------------------------
; IRQ redirector between modes
; this is the prot. mode to real mode switch-code

        align byte
s32_irq0:
        push ecx
        mov ecx,8
        jmp short s32_irq
s32_irq1:
        push ecx
        mov ecx,9
        jmp short s32_irq
s32_irq2:
        push ecx
        mov ecx,0ah
        jmp short s32_irq
s32_irq3:
        push ecx
        mov ecx,0bh
        jmp short s32_irq
s32_irq4:
        push ecx
        mov ecx,0ch
        jmp short s32_irq
s32_irq5:
        push ecx
        mov ecx,0dh
        jmp short s32_irq
s32_irq6:
        push ecx
        mov ecx,0eh
        jmp short s32_irq
s32_irq7:
        push ecx
        mov ecx,0fh
        jmp short s32_irq
s32_irq8:
        push ecx
        mov ecx,70h
        jmp short s32_irq
s32_irq9:
        push ecx
        mov ecx,71h
        jmp short s32_irq
s32_irqa:
        push ecx
        mov ecx,72h
        jmp short s32_irq
s32_irqb:
        push ecx
        mov ecx,73h
        jmp short s32_irq
s32_irqc:
        push ecx
        mov ecx,74h
        jmp short s32_irq
s32_irqd:
        push ecx
        mov ecx,75h
        jmp short s32_irq
s32_irqe:
        push ecx
        mov ecx,76h
        jmp short s32_irq
s32_irqf:
        push ecx
        mov ecx,77h
;-----------------------------------------------------------------------------
; generic IRQ handler, ecx =real mode INT to call
;
s32_irq:
         ; reflect irq: prot. mode -> real mode
         ; this is an IRQ happened while in protected mode
         ; with a real mode interrupt handler, so we must switch back
         ; to real mode, execute the irq and then get back.
        push ds 
        push es 
        push fs 
        push gs     
        pushad
        mov eax,10h ; _SelData
        mov ebx,18h ; _SelZero
        mov ds,ax
        mov gs,bx
        
        push s32_SavStackOfs       ; save stack
        mov s32_SavStackOfs,esp    ;
        ; ecx has been already set with the int number to call
        mov ebp,[ecx*4+_OldInt]  ; set address of interrupt routine

        
        add esp,Stack32BaseOffset        ;
        mov esp,nextmodestack             ; new stack frame
        sub nextmodestack,(STACKSWTR*16)  ;

        mov esi,code16              ; set jump destination
        mov edi,offset s16_irqreal  ;
        db 0EAh                  ; jump to switchpoint
        dd large offset HARD_P2R ;
        dw 20h ; code16 selector ;

s32_irqret:
        mov esp,s32_SavStackOfs   ; restore saved stack
        pop s32_SavStackOfs       ;
        
        add nextmodestack,STACKSWTR*16
        
        popad
        pop gs 
        pop fs 
        pop es 
        pop ds
        pop ecx
        iretd

;------------------------------------------
; hard/xms irq from real mode to protected mode
; "foot" into prot. mode
;------------------------------------------
        align byte
s32_virqserver:
         ; irq reflection: real mode -> prot. mode
         ; this is an IRQ happened while in real mode
         ; with a prot. mode interrupt handler, so we must switch back
         ; to prot. mode, execute the irq and then get back.

         ; all segregs are properly set
         ; and stack is already converted to 32bit

         pushfd     ;
         call fword ptr [ecx+s32_irqtable] ; execute the p. mode irq

         mov esi,code16                    ; set return address
         mov edi,large offset s16_virqdone ;
         db 0EAh                  ; jump to switchpoint
         dd large offset HARD_P2R ;
         dw 20h ; code16 selector ;



; Custom get IRQ handler offset
; In:
;   BL - IRQ num (0-0fh)
; Out:
;   EDX - offset of current IRQ handler
s32_getirq:
        push ebx
        pushfd
        cli
        and ebx,0Fh

        mov bl,[ebx+intslotnum] ; get int number
        mov edx, dword ptr [ebx*8+s32_irqtable]
                                         ; get offset from irqtable
                                         ; (uses less code)
        popfd
        pop ebx
        ret        

                  align word
v16_flexers       dw offset v16_virq0,offset v16_virq1,offset v16_virq2
                  dw offset v16_virq3,offset v16_virq4,offset v16_virq5
                  dw offset v16_virq6,offset v16_virq7,offset v16_virq8
                  dw offset v16_virq9,offset v16_virqa,offset v16_virqb
                  dw offset v16_virqc,offset v16_virqd,offset v16_virqe
                  dw offset v16_virqf
s16_flexers       dw offset s16_virq0,offset s16_virq1,offset s16_virq2
                  dw offset s16_virq3,offset s16_virq4,offset s16_virq5
                  dw offset s16_virq6,offset s16_virq7,offset s16_virq8
                  dw offset s16_virq9,offset s16_virqa,offset s16_virqb
                  dw offset s16_virqc,offset s16_virqd,offset s16_virqe
                  dw offset s16_virqf

                  align byte
; Custom set IRQ handler offset
; In:
;   BL  - IRQ num (0-0fh)
;   EDX - offset of new IRQ handler
s32_setirq:
        pushad
        pushfd
        cli
        push ebx
        push edx

        and ebx,0Fh  ; MASK to expand bl into ebx

        mov bl,[ebx+intslotnum] ; get irq int number into prot. mode IDT

        ; SET TABLE for reflection from real mode
        mov [ebx*8+s32_irqtable],edx

        ; SET PROT. MODE IDT ENTRY
        lea ecx,[ebx*8]      ; get location in IDT table
        add ecx,s32_idt32ptr ; get complete pointer to int entry
        mov [ecx],dx         ; modify descriptor for
        shr edx,16           ; new 32bit offset & same segment
        mov [ecx+6],dx       ;

        pop edx
        pop ebx

        ; now it's time to check for the reflection strategy
        ; between real and protected mode

        mov esi,offset s16_idt_default  ; XMS/HARD data
        mov edi,offset s16_flexers      ;
        cmp _386Man,IS_VCPI    ; if not vcpi, force irq reflection
        jne turn_on_reflector  ;

        ; if VCPI
        ;       irq reflection strategy is more complex
        ;       because vcpi does not fully support reflection
        ;       IRQ 0 and IRQ 1 cannot be directly reflected
        ;       because the VCPI server "interacts" with 'em
        ;       ( don't ask me why, maybe it's because the VCPI server
        ;         interacts with the keyboard controller, or maybe
        ;         it reflects itself 'em)

        ; first of all, check if we need to kill this
        mov esi,offset v16_idt_default
        mov edi,offset v16_flexers
        ; then check if this is an irq it is better to not reflect
        ; directly... (irq0 or irq1)
        or bl,bl
        je short s32_sirq_c4d
        cmp bl,1
        jne short turn_on_reflector
s32_sirq_c4d:
        cmp edx,[ebx*4+esi] ;Are we restoring the default handler ?
        je  short check4dos ;Yes! restore the real mode handler to default too!
        popfd
        popad
        ret

turn_on_reflector:
        ; this forces a full reflection from real to protected mode
        and ebx,0Fh                  ; set reflector's offset
        mov di,word ptr [ebx*2+edi]  ;
check4dos:               ;
        mov V86ds,code16 ; irq reflector's code segment
        cmp edx,[ebx*4+esi]     ; are we restoring the default handler ?
        jne  short map_real_int ; no, go set the reflector
control_to_dos: ; yes, restore "real" real mode handler
        mov eax,[ebx*4+_OldInt]
        mov edi,eax  ; copy seg:offset and avoid to use the "word" override
        shr eax,16
        mov V86ds,ax ; irq reflector's code segment
map_real_int:
        mov ax,02508h  ; set interrupt vectors MS-DOS function
                       ; irq 0 ...  7  (master pic irqs) starts from int 8
        cmp bl,8
        jb enslave
        mov al,(70h-8) ; irq 8 ... 15 (slave pic irqs) starts from int 70h
enslave:
        mov V86dx,di
        add al,bl
        mov V86ax,ax
        mov al,21h
        call _ExecINT
        ; IRQ installation and reflection completed
        popfd
        popad
        ret


        
; physical memory mapper
        align dword
_MapMemBase   dd 0
_MapMemTop    dd 0
_PhysMapTable dd 0  ; page table ptr initialized by startup code
        align byte

map_soo_hard:
        ; well, this is a simple conversion from "linear"
        ; to code32 (no paging under XMS and HARD mode)
        sub eax,_Code32Base
        clc
        ret

        public _MapPhysMem
_MapPhysMem:
        ; in:
        ; eax = phys mem base , edx = phys mem size
        ; out:
        ; IF CARRY CLEAR eax = code32 offset equivalent to base address
        ; ELSE ERROR
        cmp _386Man,IS_DPMI
        je dpmimap
        cmp _386Man,IS_VCPI
        jne map_soo_hard
vcpimap:push edx
        push ebx
        push ecx
        push edi
        mov ebx,_MapMemBase
        add edx,4095
        mov ecx,_MapMemTop
        and edx,0FFFFF000h ; round to pages
        sub ecx,ebx ; available space
        cmp ecx,edx
        jb nophyspace
        mov ecx,edx
        shr edx,12 ; get how many pages to allocate        
        add ecx,ebx 
        mov _MapMemBase,ecx ; set new base
        ; ebx = value to return
        and eax,0FFFFF000h ; page align and set paging bits
        or al,7        ;
        mov edi,_PhysMapTable
pluppa:        
        mov [edi],eax
        add eax,1000h
        dec edx
        jne pluppa
        mov _PhysMapTable,edi
        mov eax,ebx ; base offset
        clc
gpout:        
        pop edi
        pop ecx
        pop ebx
        pop edx
        ret
nophyspace:
        stc
        jmp short gpout        
        
dpmimap:        
        pushad
        mov _MapMemBase,0
        mov cx,ax
        shr eax,16        
        mov di,dx
        mov bx,ax
        shr edx,16        
        mov si,dx
        mov ax,800h
        int 31h
        jc soo_bad
        mov ax,bx
        shl eax,16
        mov ax,cx
        mov _MapMemBase,eax
        popad
        mov eax,_MapMemBase
        clc
        ret
soo_bad:stc
        popad
        ret

        align byte
v32_retreal: ; VCPI return to real mode ( called from _Exit routine)
        mov ebx,((STACKSIZE-(STACKUSER/2))*16)
        mov edx,code32
        mov ecx,codeend
        mov esi,code16
        mov edi, offset v16_retreal
        ; now set-up the return environment
        ; (n.b. 16bit values MUST be pushed as dwords)
        push edx   ; DWORD  GS  = code32
        push edx   ; DWORD  FS  = code32
        push edx   ; DWORD  DS  = code32
        push edx   ; DWORD  ES  = code32
        push ecx   ; DWORD  SS  = codeend
        push ebx   ; DWORD ESP  = translated SP
        sub esp,4  ; make space for eflags
        push esi   ; DWORD  CS  = code16
        mov ax,18h
        push edi   ; DWORD EIP  = offset v16_retreal
        mov ds,ax ; load linear memory descriptor into DS
        mov ax,0de0ch
        call cs:v32_vcpientryaddr ; N.B. this is a FAR call with no return

; Virtual DMA support module (integrated into the dos-extender)
; It uses the Virtual DMA services and directly access the DMA hardware
; to provide an higher level interface to VDS scatter/gather DMA
; As you can see the VDS is called from VIRTUAL 8086 MODE
; the main reason for this is that i'm not sure if ALL the current VDS 
; implementations let you use VDS from protected mode
; (if you know more, please tell me)

_VDSVersion   dw 0 ; version
_VDSPNumber   dw 0 ; Product number
_VDSPRevision dw 0 ; product revision
_VDSFlags     dw 0 ; Virtual DMA Services flags
                   ; (see the VDS doc on x2ftp.oulu.fi for more info)
            public _VDSVersion,_VDSPNumber,_VDSPRevision,_VDSFlags

DMAPAGE db 87h,83h,81h,82h,8Fh,8Bh,89h,8Ah



;Initializes the VDMA system
VDMAInit: ; out: Carry clear if OK
          ;      eax=number of dma channels
        ; current initialization stuff does nothing
        mov eax,16
        clc ; initialized
        ret
        
VDMACheck: ; out: carry clear if OK
        ; checks for VDMA support installed
        ; (always present under VCPI and DPMI)
        ; (never present under XMS and "raw 386", direct access to services is
        ;  used instead)
        clc ; present
        ret

; ScatterList structure (returned by a call to _DMAMap )
VDMA_PTR   = 0  ; dd code32 relative pointer
VDMA_SIZE  = 4  ; dd size
VDMA_OFS   = 8  ; dd offset (the current version uses low memory)
VDMA_SEG   = 12 ; dw real mode offset (the current version uses low mem)
VDMA_PAGES = 16 ; pages requested
VDMA_USED  = 18 ; pages actually allocated  (use this to determine
                ; how many (code32,physical) address couples
                ; are present starting from VDMA_SCATS
VDMA_SCATS = 20 ; start of "couples" of scattered page entries
; where the VDS server puts only page entries
; the VDMA system will put COUPLES of dwords
; (the first dword is the code32 relative
; offset of the page,the 2nd dword is the physical address you have to send
; to the dma hardware
; so at offset VDMA_SCATS    you find the code32 address of the first page
;          at  VDMA_SCATS+4  you find the equivalent PHYSICAL address
; and so on ...

; Allocates & initializes a VDMA ScatterList into low memory
; IN: EAX= Size of buffer
; OUT: if Carry Clear then
;         EAX = Ptr to Scatterlist, _LoMemBase Updated
;      else EAX= pointer to error string for 386Return
DScatLock:
        push ecx
        push edx
        push ebx
        push esi
        push edi
        add eax,000000FFFh ; align size to page boundary
        mov ebx,eax
        mov esi,_LoMemBase
        and eax,0FFFFF000h ; align address to page
        shr ebx,12 ; how many pages ?
        lea ecx,[eax+ebx*8+(20+0FFFh)] ; worst case allocation
                ; eax= size requested (aligned to 4k page)
                ; ebx= size of scatter/gather list
                ; plus "overflow" constants
        mov edx,_LoMemTop
        sub edx,esi
        sub edx,ecx
        jb FAILURE
        sub edx,(LOWMIN*1024)
        jb FAILURE
        add esi,ecx
        mov _LoMemBase,esi ; Store NEW low memory base
        sub esi,ecx
        add esi,_Code32Base ; get linear address
        lea edi,[esi+ebx*8+20] ;LINEAR start of dma data region
        mov [esi+VDMA_SIZE],eax ; SET SIZE
        add edi,000000FFFh ; align to page boundary
        and edi,0FFFFF000h ;
        mov [esi+VDMA_PAGES],ebx ; SET AVAILABLE PAGES (and zeroes VDMA_USED)
        mov [esi],edi  ; SET ALIGNED ADDRESS
        mov  ecx,edi
        mov  edx,edi
        and  ecx,0Fh ; ofs
        shr  edx,4   ; seg
        mov [esi+VDMA_OFS],ecx ; SET REAL MODE OFS
        mov [esi+VDMA_SEG],edx ; SET REAL MODE SEG
        cmp _386Man,IS_XMS
        je falselock
        cmp _386Man,IS_HARD
        je falselock
        pushad
        add esi,4
        mov ecx,esi
        mov edx,esi
        and ecx,0Fh
        shr edx,4
        mov V86ax,8105h ; Lock
        mov al,4Bh
        mov V86di,cx
        mov V86es,dx
        mov V86dx,0040h ;4k scatter/gather
        call _ExecINT
        popad
        jc FAILURE
falsedone:
        ; Well! Now expand the scatter/gather structure with
        ; the code32 relative values
        push esi ; save pointer to table
        lea edx,[edi+eax-4096] ; code32 offset of last page
        lea esi,[edi+ebx*4-4]  ; last page entry
        lea edi,[edi+ebx*8-8]  ; last extended page entry
extrloader:        
        mov eax,[esi]
        mov [edi],edx
        sub esi,4
        mov [edi+4],eax
        sub edx,4096
        sub edi,8
        dec ebx
        jne extrloader
        pop eax ;get back pointer to scatter/gather table
        ; here carry is clear
FAILURE:        
        pop edi
        pop esi
        pop ebx
        pop edx 
        pop ecx
        ret

falselock:
        ; edi =linear start of region to lock
        ; esi =scatter list
        ; ebx =pages to allocate
        pushad
scatset:
        mov [esi+VDMA_SCATS],edi
        add esi,4
        add edi,4096
        dec ebx
        jne scatset
        popad
        jmp short falsedone
                     
; Frees a VDMA ScatterList
; IN: EAX= ScatterList to unlock

falseunlock:
        clc
        ret

DScatUnLock:
        pushad
        cmp _386Man,IS_XMS
        je falseunlock
        cmp _386Man,IS_HARD
        je falseunlock
        xor ebx,ebx
        lea ecx,[eax+4]
        lea edi,[eax+(4+VDMA_SCATS)] 
        mov  bx,[eax+VDMA_PAGES]     
        lea esi,[eax+VDMA_SCATS]     
        mov edx,ecx
GatherPack:
        mov eax,[esi]
        mov [edi],eax
        add esi,4
        add edi,8
        dec ebx
        jne GatherPack
        ;
        and ecx,0Fh
        shr edx,4
        mov V86ax,8106h ; Unlock
        mov al,4Bh
        mov V86di,cx
        mov V86es,dx
        mov V86dx,0040h ;4k scatter/gather
        call _ExecINT
        popad
        ret


; Locks a DMA channel, EAX = channel number 
; Carry set if error
DChanLock:
        cmp _386Man,IS_XMS
        je  lok
        cmp _386Man,IS_HARD
        je lok
        mov V86ax,0810Bh
        mov V86bx,ax
        mov V86dx,0000h
        mov al,4bh
        call _ExecINT
        ret
lok:
        clc
        ret

; Unlocks a DMA channel, EAX = channel number 
; Carry set if error
DChanUnLock:
        cmp _386Man,IS_XMS
        je  lok
        cmp _386Man,IS_HARD
        je lok
        mov V86ax,0810Ch
        mov V86bx,ax
        mov V86dx,0000h
        mov al,4bh
        call _ExecINT
        ret

; Sends data TO a device using DMA
; EAX = channel, EBX = Physical mem ptr, ECX = Lenght in BYTES
; n.b.if you send/receive on a 16bit channel (4..7) remember to keep
;     address and byte count WORD ALIGNED
; Remember that using the scatterlist you have to send data in chunks
; of up to 4k bytes (not a big problem to me, if you use dma to send
; voice data you HAVE to scatter it to perform software mixing)

VDMASend:
        pushad
        test al,4 ; using 16 bit channels ?
        jnz DMA16
DMA8:   mov ah,al

        or al,4 ; set mask to disable dma
        out 0Ah,al ; mask reg
        
        xor al,al
        out 0Ch,al ; clear byte ptr

        mov al,48h ; write to device       
        or  al,ah 
        out 0Bh,al ; mode
        
        xor edx,edx
        mov al,bl
        add dl,ah  ; dx = dma address
        out dx,al  ; address LSB
        mov al,bh
        out dx,al  ; address MSB
        
        inc edx    ;dx = dma counter
        
        shr ebx,16 ; get page number into bl
        
        mov al,cl
        out dx,al  ; counter LSB
        mov al,ch
        out dx,al  ; counter MSB
        
        mov dl,ah
        mov al,bl
        mov dl,[edx+DMAPAGE]
        out dx,al             ; send page
        
        mov al,ah  ; UNmask DMA channel
        out 0Ah,al ; 
        popad
        ret
DMA16:  mov ah,al
        xor edx,edx ;@ INTER...
        out 0D4h,al ; mask reg (mask already se because of channel number)
        and ah,3    ; clear higher bit to get chip dma number
        shr ebx,1 ; WORD ALIGN 16BIT DMA ADDR
        xor al,al
        out 0D8h,al ; clear byte ptr
        mov dl,ah   ;@...LEAVE  ....
        mov al,48h ; write to device       
        or  al,ah 
        shl edx,1   ;@... THIS ....
        out 0D6h,al ; mode
        shr ecx,1  ;WORD ALIGN 16BIT DMA COUNTER
        mov al,bl
        add dl,0C0h  ; dx = dma address ...STUFF TO GET THE CORRECT ADDR
        out dx,al    ; address LSB
        mov al,bh
        out dx,al  ; address MSB
        
        add  edx,2 ;dx = dma counter
        
        shr ebx,16 ; get page number into bl cutting out bit 17
        
        mov al,cl
        out dx,al  ; counter LSB
        shl ebx,1  ; PAGE ALIGN INTO STANDARD FORMAT
        mov al,ch
        out dx,al  ; counter MSB
        
        mov dl,ah
        mov al,bl
        mov dl,[edx+DMAPAGE]
        out dx,al             ; send page
        
        mov al,ah   ; UNmask DMA channel
        out 0D4h,al ; 
        popad
        ret

; receives data FROM a device using DMA
; EAX = channel, EBX = Physical mem ptr, ECX = Lenght in BYTES
; n.b.if you send/receive on a 16bit channel (4..7) remember to keep
;     address and byte count WORD ALIGNED
; Remember that using the scatterlist you have to send data in chunks
; of up to 4k bytes (not a big problem to me, if you use dma to send
; voice data you HAVE to scatter it to perform software mixing)

VDMAReceive:
        pushad
        test al,4 ; using 16 bit channels ?
        jnz RDMA16
RDMA8:   mov ah,al

        or al,4 ; set mask to disable dma
        out 0Ah,al ; mask reg
        
        xor al,al
        out 0Ch,al ; clear byte ptr

        mov al,44h ; read from device
        or  al,ah 
        out 0Bh,al ; mode
        
        xor edx,edx
        mov al,bl
        add dl,ah  ; dx = dma address
        out dx,al  ; address LSB
        mov al,bh
        out dx,al  ; address MSB
        
        inc edx    ;dx = dma counter
        
        shr ebx,16 ; get page number into bl
        
        mov al,cl
        out dx,al  ; counter LSB
        mov al,ch
        out dx,al  ; counter MSB
        
        mov dl,ah
        mov al,bl
        mov dl,[edx+DMAPAGE]
        out dx,al             ; send page
        
        mov al,ah  ; UNmask DMA channel
        out 0Ah,al ; 
        popad
        ret
RDMA16:  mov ah,al
        xor edx,edx ;@ INTER...
        out 0D4h,al ; mask reg (mask already se because of channel number)
        and ah,3    ; clear higher bit to get chip dma number
        shr ebx,1 ; WORD ALIGN 16BIT DMA ADDR
        xor al,al
        out 0D8h,al ; clear byte ptr
        mov dl,ah   ;@...LEAVE  ....
        mov al,44h ; read from device
        or  al,ah 
        shl edx,1   ;@... THIS ....
        out 0D6h,al ; mode
        shr ecx,1  ;WORD ALIGN 16BIT DMA COUNTER
        mov al,bl
        add dl,0C0h  ; dx = dma address ...STUFF TO GET THE CORRECT ADDR
        out dx,al    ; address LSB
        mov al,bh
        out dx,al  ; address MSB
        
        add  edx,2 ;dx = dma counter
        
        shr ebx,16 ; get page number into bl cutting out bit 17
        
        mov al,cl
        out dx,al  ; counter LSB
        shl ebx,1  ; PAGE ALIGN INTO STANDARD FORMAT
        mov al,ch
        out dx,al  ; counter MSB
        
        mov dl,ah
        mov al,bl
        mov dl,[edx+DMAPAGE]
        out dx,al             ; send page
        
        mov al,ah   ; UNmask DMA channel
        out 0D4h,al ; 
        popad
        ret

code32  ends

; End of program 
; (codeend segment must be at end of program or you will get lots of pain)

codeend segment page stack use32 'stack'
db      STACKSIZE*16 dup(?)
        ; Stack Starts here
codeend ends
        ; Boot16 is program starting point
        end     Boot16

