;---------------------------------------------------
; dmc32 by st0ne
;
; Compress stdin to stdout using DMC
; Done in asm due to too many compatibility
; problems between a C encoder and an asm decoder
; (about floating point precision and rounding mode)
; Some instructions/comments may be pointless since
; I did some wild copy/paste from the decoder
;---------------------------------------------------

.model tiny
.386
Ideal

&	equ	offset
xval	equ	dword ebx+(& _val-& _fpu)
xmin	equ	dword ebx+(& _min-& _fpu)
xmax	equ	dword ebx+(& _max-& _fpu)
xfpu	equ	dword ebx

struc	Node
c0	dd	?
c1	dd	?
n0	dd	?
n1	dd	?
ends	Node

segment cod32 use32
assume cs:cod32,ds:cod32

;----------------------------------------------------------------------
; Initialization

	org	0000h
_fpu	dd	000000FDh	;fpu temp space
_esi	dd	0		;unused
_edi	dd	00006000h	;data destination
_max	dd	?
_min	dd	?
_val	dd	?
	org	000Ch		;overlap variables and code

;Init selectors and fpu
start:	mov	ds,ax
	mov	es,ax
	mov	fs,ax
	mov	gs,ax
	xor	ebx,ebx		;& _fpu
	fldcw	[ebx]		;'float' precision, round even

;----------------------------------------------------------------------
; Init prediction tree

;Clear memory
@encod:	mov	edi,ebp
	xor	eax,eax
	mov	ecx,00400000h
	rep	stosb

;Init nodes[j][i] (8)
	mov	esi,3EA7EF9Eh		;0.328
	mov	edi,ebp			;edi=&nodes[0][0]
	cdq				;for (j=0;i<256)

;Init i=0..126
@flush:	mov	eax,edi			;&nodes[j][0]
	mov	cl,7Fh			;for (i=0;i<127)
@fl1:	xchg	eax,esi
	stosd				;c0=0.328
	stosd				;c1=0.328
	xchg	eax,esi
	add	eax,size Node
	stosd				;n0=&nodes[j][2i+1]
	add	eax,size Node
	stosd				;n1=&nodes[j][2i+2]
	loop	@fl1

;Init i=127..255
	mov	eax,ebp			;&nodes[0][0]
	mov	cl,80h			;for (i=127;i<255)
@fl2:	xchg	eax,esi
	stosd				;c0=0.328
	stosd				;c1=0.328
	xchg	eax,esi
	stosd				;n0=&nodes[2*(i-127)][0]
	add	eax,256*size Node
_row	= dword $-4
	stosd				;n1=&nodes[2*(i-127)+1][0]
	add	eax,[ebx+(& _row-& _fpu)]
	loop	@fl2

;Init loop
	add	edi,size Node		;j++
	dec	dl
	jnz	@flush
	mov	esi,ebp

;----------------------------------------------------------------------
; Encode using DMC

	mov	[xmin],ecx
	mov	[xmax],01000000h

@e_do:	push	eax
	push	ebx
	push	ecx
	push	edx
	mov	ah,3Fh
	mov	bx,0000h
	mov	cx,0001h
	mov	dx,6000h
	int	21h
	cmp	ax,0
	je	@e_end		;EOF occured
	pop	edx
	mov	dl,[byte 6000h]	;c=getchar
	pop	ecx
	pop	ebx
	pop	eax
	mov	cl,08h

@e_for:	fld	[dword esi+(Node).c1]
	fld	[dword esi+(Node).c0]
	fadd	st(1),st
	mov	eax,[xmax]
	sub	eax,[xmin]
	dec	eax
	mov	[xfpu],eax
	fild	[xfpu]		;mid=(max-min-1)
	fmulp			;*(p.c0)
	fdivrp	st(1)		;/(p.c0+p.c1)
	fistp	[xfpu]

	mov	eax,[xfpu]
	or	eax,eax
	jnz	@e_if1
	inc	eax		;mid++
@e_if1:	add	eax,[xmin]	;mid+=min

	inc	eax
	cmp	eax,[xmax]
	jne	@e_if2
	dec	eax		;mid--
@e_if2:	dec	eax

	mov	dh,dl
	dec	ecx
	shr	dh,cl
	inc	ecx
	and	dh,01h		;bit=(c<<i)&80h

	cmp	dh,00h
	je	@e_if3
	mov	[xmin],eax	;min=mid
	jmp	@e_el3
@e_if3:	mov	[xmax],eax	;max=mid
@e_el3:	

@up:	push	ecx
	mov	ecx,esi
	or	dh,dh
	jz	@up1
	add	ecx,& (Node).c1-& (Node).c0	;ecx=&(p->count[b])
@up1:	mov	esi,[ecx+(Node).n0]		;esi=(p->next[b])
	
	mov	eax,40000000h
	cmp	[dword ecx+(Node).c0],eax
	jl	@up2
	fld	[dword esi+(Node).c0]	;nx->c0
	fadd	[dword esi+(Node).c1]	;+nx->c1
	fsub	[dword ecx+(Node).c0]	;-p->cx
	fstp	[xfpu]
	cmp	[xfpu],eax
	jl	@up2

	fld	[dword esi+(Node).c0]
	fadd	[dword esi+(Node).c1]
	fdivr	[dword ecx+(Node).c0]	;r=(p->cx)/(nx->c0+nx->c1)
	fld	st
	xor	eax,eax
@dup:	fmul	[dword esi+(Node).c0]
	fst	[dword edi+(Node).c0]	;(new->c0)=r*(nx->c0)
	fsubr	[dword esi+(Node).c0]
	fstp	[dword esi+(Node).c0]	;(nx->c0)-=r*(nx->c0)
	cmpsd				;point to c1
	dec	eax
	jp	@dup			;do the same whith c1
	movsd				;copy n0 and point to n1
	movsd				;copy n1 and point to new++
	mov	[ecx+(Node).n0],edi	;(p->next[b])=new
	sub	[ecx+(Node).n0],size Node

@up2:	fld1
	fadd	[dword ecx+(Node).c0]
	fstp	[dword ecx+(Node).c0]	;(p->count[b])++
	mov	esi,[ecx+(Node).n0]
	pop	ecx

	mov	eax,[xmax]
	sub	eax,[xmin]
	cmp	eax,00000100h	;max-min<256
	jge	@e_nxt
	or	dh,dh
	jz	@e_if4
	dec	[xmax]		;max--
@e_if4:	mov	eax,[xmin]
	shr	eax,16
	call	small @stosb
	shl	[xmin],16
	shr	[xmin],8
	shl	[xmax],16
	shr	[xmax],8
	mov	eax,[xmax]
	cmp	eax,[xmin]
	jg	@e_nxt
	mov	[xmax],01000000h
@e_nxt:	dec	ecx
	jnz	@e_for
	jmp	@e_do

@e_end:	pop	edx
	pop	ecx
	pop	ebx
	pop	eax
	mov	eax,[xmax]
	dec	eax
	push	eax
	shr	eax,16
	call	small @stosb
	pop	eax
	push	eax
	shr	eax,8
	call	small @stosb
	pop	eax
	call	small @stosb

	mov	ax,4C00h
	int	21h

;----------------------------------------------------------------------
; Write data to stdout

@stosb:	push	eax
	push	ebx
	push	ecx
	push	edx
	mov	ah,40h
	mov	bx,0001h
	mov	cx,0001h
	mov	dx,6000h
	mov	[byte 6000h],al
	int	21h
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax
	db	66h
	ret

ends	cod32
end	start