; This program is made by Daniel Horchner.
; email: dbjh@gmx.net

;%define calldirect      0               ; call putpixel func (direct to vid)
;%define direct          0               ; put pixels direct to video memory
;%define buffered        0               ; put pixels in buf and blit to vid
%define physmax         0               ; write vid mem with one color

segment code32 public align=16 use32

%include "raw32.inc"

;32-bit data
; PIT is tied to 3 clock lines all generating 1.193181 MHz
; A tick counter of 0 means 65536; the rest is normal i.e. 1=1, 2=2 etc.
; Don't use 0 because of the equate '1193181/TICK_COUNT' (division by zero)
; The default BIOS PIT counter is 65536; 1193181/6553618.2 IRQ 0s per second
TICK_COUNT      equ     62799           ; IRQ 0 at 19Hz
USER_FREQ       equ     1193181/TICK_COUNT
N_USER_PER_BIOS equ     USER_FREQ/18    ; times faster our IRQ 0 handler is
                                        ;  called than the BIOS handler
DELAYCNT        equ     USER_FREQ*10    ; Execute test code for 10 seconds
org_IRQ0        dd      0               ; offset and selector of original
                dw      0               ;  IRQ 0 handler
n_IRQ0          dd      N_USER_PER_BIOS ; counter decrements on every IRQ 0
n_IRQ0_2        dd      USER_FREQ       ; second counter
timercntr       dd      0               ; third counter, incr (never resets)
counter_mode    db      0
font            dd      0               ; pointer to font table
fpsmsg          db      'FPS=',0
donemsg         db      'End of test.',0
frames          dd      0               ; counter of frames drawn
meanframes      dd      0               ; mean frame rate of last second
screenbuf       dd      0               ; pointer to screen buffer

;
; Draw a picture element on the output screen ;) (macro)
; In:
;   gs = zerosel
;
;   %1 = x coordinate (dword)
;   %2 = y coordinate (dword)
;   %3 = palette entry (byte)
;   %4 = 1st temporary register (dword)
;   %5 = 2nd temporary register (dword)
; Out:
;   %4 = ?
;   %5 = ?
;
%macro  putpixel 5
        mov %4,%2
        mov %5,%2
        shl %4,8                        ; y * 256
        shl %5,6                        ; y * 64
        add %4,%5                       ; y * 320
        add %4,%1                       ; %4 = y * 320 + x
        mov byte [gs:0a0000h+%4],%3
%endmacro

;
; Put a pixel in a screen buffer (macro)
; In:
;   ds:%4 = address of screen buffer (%4 must be a register)
;
;   %1 = x coordinate (dword)
;   %2 = y coordinate (dword)
;   %3 = palette entry (byte)
;   %5 = 1st temporary register (dword)
;   %6 = 2nd temporary register (dword)
; Out:
;   %5 = ?
;   %6 = ?
;
%macro  putpixelbuf 6
        mov %5,%2
        mov %6,%2
        shl %5,8                        ; y * 256
        shl %6,6                        ; y * 64
        add %5,%6                       ; y * 320
        add %5,%1                       ; %5 = y * 320 + x
        mov byte [%4+%5],%3
%endmacro

;
; Synchronize with vertical retrace (macro)
; Out:
;   Retrace just began
;   al = ?
;   dx = ? (3dah)
;
; Note:
;   Waiting for a vertical retrace to do something in this (short) time is
;   only meaningful if the code that needs to be done during this period can
;   be done. If not, synchronizing with the vertical retrace needlessly slows
;   down the code.
;   For example, on a monitor that has a refresh rate of 70Hz in a certain
;   video mode, a blitting routine that waits for a vertical retrace must
;   execute in less than 1/70s. If it runs too slow, the trace has already
;   begun and might catch up with the changing of the video RAM (by the
;   blitting code). Then, the top part of the screen shows the new frame
;   while the bottom part shows the old one, just as would happen if the code
;   didn't wait for the retrace except that it now runs slower :(
;
%macro  vsync 0
        mov dx,03dah                    ; =Color Graphics Monitor Adapter,
%%retrace:                              ;  status register
        in al,dx
        test al,8                       ; bit 3=1 -> vertical retrace
        jnz short %%retrace             ; Wait for end retrace
%%trace:
        in al,dx
        test al,8
        jz short %%trace                ; Wait for retrace
%endmacro

;32-bit code
main:
        call setfont                    ; Set font variable (for put funcs)

        mov word [v86r_ax],13h
        mov al,10h
        int RMCALL_VECT

        mov bl,[IRQ0_vect]
        call getvect
        mov [org_IRQ0+4],cx             ; cx:edx=addr of IRQ handler
        mov [org_IRQ0],edx

        cli
        mov cx,cs                       ; Install own handler
        mov edx,timer
        call setvect
                                        ; Save PIT counter mode
        mov al,0e2h                     ; Read back command; bit 5!=0 -> Don't
        out 43h,al                      ;  latch counter; bit 4=0 -> Latch
        in al,40h                       ;  status; bit 1=1 -> Select cntr 0
        and al,0eh                      ; bits 3 to 1 contain counter mode
        shr al,1
        mov [counter_mode],al
                                        ; Initialize PIT
        mov al,30h                      ; bits 7-6=0 -> Select cntr 0; bits
        mov ah,[counter_mode]           ;  5-4=3 -> Read/Write LSB followed
        shl ah,1                        ;  by MSB; bits 3-1=counter mode;
        add al,ah                       ;  bit 0=0 -> 16 binary counter
        out 43h,al
        mov al,TICK_COUNT & 0ffh        ; LSB of tick counter
        out 40h,al
        mov al,TICK_COUNT >> 8          ; MSB of tick counter
        out 40h,al
        sti

        mov al,255                      ; al=base pal entry that rgb values
        mov edx,3c8h                    ;  are written for
        out dx,al
        mov edx,3c9h
        mov al,63
        out dx,al                       ; red
        mov al,0
        out dx,al                       ; green
        out dx,al                       ; blue
;Code to test begins here
        mov dword [timercntr],0         ; Reset counter
%ifdef  calldirect                      ; Call putpixel func; direct to video
        mov ebx,-1
        mov eax,0
.next_screen:
        xor al,0ffh                     ; Switch between color 0 and 255
;        push eax                        ; vsync destroys al
;        vsync
;        pop eax
        mov edx,0
.next_y:
        mov ecx,0
.next_x:
        call putpixel2
        inc ecx
        cmp ecx,320
        jb short .next_x
        inc edx
        cmp edx,200
        jb short .next_y
        inc dword [frames]
        call putfps
        cmp dword [timercntr],DELAYCNT
        jae short .done
        dec ebx
        jnz short .next_screen
%endif
%ifdef  direct                          ; Use putpixel macro; direct to vid
        mov ecx,-1
        mov ebx,0
.next_screen:
        xor bl,0ffh                     ; Switch between color 0 and 255
;        vsync
        mov edx,0
.next_y:
        mov eax,0
.next_x:
        putpixel eax,edx,bl, esi,edi
        inc eax
        cmp eax,320
        jb short .next_x
        inc edx
        cmp edx,200
        jb short .next_y
        inc dword [frames]
        call putfps
        cmp dword [timercntr],DELAYCNT
        jae short .done
        dec ecx
        jnz short .next_screen
;        loop .next_screen
%endif
%ifdef  buffered                        ; Use putpixel-to-buf macro and blit
        mov eax,320*200
        call gethimem
        jc near @exit
        mov [screenbuf],eax

        mov es,[zerosel]
        mov ebp,-1
        mov edx,0
.next_screen:
        mov esi,[screenbuf]
        mov ecx,0
        xor dl,0ffh                     ; Switch between color 0 and 255
.next_y:
        mov eax,0
align 4
.next_x:                                ; Put pixel to ds:esi
        putpixelbuf eax,ecx,dl, esi, ebx,edi
        inc eax
        cmp eax,320
        jb short .next_x
        inc ecx
        cmp ecx,200
        jb short .next_y

        mov edi,0a0000h
        mov ecx,320*200/4
;        push edx                        ; vsync destroys dx
;        vsync
;        pop edx
        rep movsd                       ; Blit the buffer to the screen
        inc dword [frames]
        call putfps
        cmp dword [timercntr],DELAYCNT
        jae short .done
        dec ebp
        jnz short .next_screen
%endif
%ifdef  physmax                         ; Write entire screen one color
        mov es,[zerosel]
        mov eax,0
        mov ebx,-1
.next_screen:
        mov edi,0a0000h
        xor eax,0ffffffffh              ; Switch between color 0 and 255
        mov ecx,320*200/4
;        push eax                        ; vsync destroys al
;        vsync
;        pop eax
        rep stosd
        inc dword [frames]
        call putfps
        cmp dword [timercntr],DELAYCNT
        jae short .done
        dec ebx
        jnz short .next_screen
%endif
.done:
        mov esi,donemsg
        mov ecx,0                       ; x
        mov edx,0                       ; y
        mov bl,14                       ; color
        call gputstr
;Code to test ends here

@exit:
        cli                             ; Restore default PIT counter value
        mov al,30h
        mov ah,[counter_mode]
        shl ah,1
        add al,ah
        out 43h,al
        mov al,0
        out 40h,al
        out 40h,al
                                        ; Restore original timer handler
        mov bl,[IRQ0_vect]
        mov cx,[org_IRQ0+4]             ; cx:edx=addr of IRQ handler
        mov edx,[org_IRQ0]
        call setvect
        sti

        mov byte [v86r_ah],0            ; ah=0 -> Wait for key and read char
        mov al,16h
        int RMCALL_VECT
        mov word [v86r_ax],3            ; Set video mode 3 (text 80x25x16)
        mov al,10h
        int RMCALL_VECT
        jmp exit                        ; Return to real/V86 mode

;
align 4
timer:
        push eax
        push ds

        mov ds,[cs:data32sel]
        inc dword [timercntr]
        dec dword [n_IRQ0_2]
        jnz short .meanframes_ok        ; 1 second passed since vars reset?
        mov dword [n_IRQ0_2],USER_FREQ  ; Reset second IRQ 0 counter
        mov eax,[frames]
        mov [meanframes],eax
        mov dword [frames],0            ; Reset frames drawn counter

.meanframes_ok:
        dec dword [n_IRQ0]              ; Call original handler at about the
        jnz short .exit                 ;  original frequency
        mov dword [n_IRQ0],N_USER_PER_BIOS
        pop ds
        pop eax                         ; stack frame ok to enable JMP
        jmp far [cs:org_IRQ0]           ;  (eflags, cs and eip on stack)

.exit:
        mov al,20h                      ; 20h=non-specific EOI cmd for IC reg
        out 20h,al                      ; Send EOI to master PIC
        pop ds
        pop eax
        iretd

;
; Print current frames per second rate
;
align 4
putfps:
        push eax
        push ebx
        push ecx
        push edx

;        push esi
;        mov esi,fpsmsg
;        mov ecx,320-(5*8+3*8)           ; x
;        mov edx,200-16                  ; y
;        mov bl,7                        ; color
;        call gputstr
;        pop esi

        mov eax,[meanframes]
        mov ecx,320-4*8                 ; x
        mov edx,200-16                  ; y
        mov ebx,0307h                   ; color 7; at least 3 digits
        call gputnumdec
        pop edx
        pop ecx
        pop ebx
        pop eax
        ret

; This is the putpixel routine in non-macro form:
;
; Put a pixel on the screen
; In:
;   gs = zerosel
;   ecx = x coordinate
;   edx = y coordinate
;   al = palette entry
; Out:
;   esi = ?
;   edi = ?
;
putpixel2:
        mov edi,edx
        mov esi,edx
        shl edi,8                       ; y * 256
        shl esi,6                       ; y * 64
        add edi,esi                     ; y * 320
        add edi,ecx                     ; ebx = y * 320 + x
        mov [gs:0a0000h+edi],al
        ret

;
; Set font variable to linear address of 8x8 font in RAM
; In:
;   ds = data32sel
; Out:
;   font = linear address of font
;   eax = ?
;   ecx = ?
;   esi = ?
;   edi = ?
;
setfont:
        push es
        mov word [v86r_ax],1130h        ; al=30 -> Get cur char table info
        mov byte [v86r_bh],3            ; ROM 8x8 character table pointer
        mov al,10h
        int RMCALL_VECT
        movzx eax,word [v86r_es]        ; es:bp=pointer to table
        shl eax,4
        and dword [v86r_ebp],0ffffh     ; Clear high word for "add eax,..."
        add eax,[v86r_ebp]
        sub eax,[code32a]               ; offset of font in ROM from 'code32'
                                        ; Copy char table to RAM -> faster
        mov esi,eax                     ;  access -> faster writing of text
        mov eax,8*256                   ; 256 diff chars; 1 byte per scanline
        mov ecx,8*256/4
        call gethimem
        mov es,[data32sel]
        mov edi,eax
        rep movsd
        add eax,[code32a]
        mov [font],eax                  ; Linear address of font in RAM
        pop es
        ret

;
; Graphically put 0 terminated string to screen
; In:
;   ds:esi = address of 0 terminated string
;   gs = zerosel
;   bl = character color
;   cx = x coordinate to put top left corner of string
;   dx = y coordinate to ,,  ,,   ,,    ,,   ,,   ,,
;   font = linear address of 8x8 font
;   video mode = 8 bits per pixel
;
gputstr:
        pushad
        shl ecx,16
        mov cx,dx                       ; ecx=x:y coordinates to put string
        push ecx
        push es
        mov ax,ds                       ; First, get string length
        mov es,ax                       ; es=ds for scasb
        mov edi,esi
        mov ecx,0ffffffffh              ; Search max 4GB
        mov al,0                        ; Scan for 0 (=end of string)
        cld
        repne scasb                     ; Compare al with es:edi
        not ecx
        dec ecx                         ; When scasb stops, edi points 1 byte
                                        ;  past the 0; that's 1 byte too far
        pop es                          ; ecx=string length

        pop edx                         ; edx=x:y coordinates to put string
        mov ah,bl                       ; bl=character color

        push esp                        ; Minor adjustment for 'gputnumdec'
        push esi
putchar:
        pop esi
        mov al,[esi]                    ; al=ASCII # of character to print
        mov ebp,0                       ; ebp=scanline in character to print
        inc esi
        push esi                        ; Save pointer to string
        push ecx                        ; Save counter of chars left to print
.next_scanline:
        movzx edi,al
        shl edi,3
        add edi,[cs:font]               ; edi=font address + ASCII # * 8
        mov cl,[gs:edi+ebp]             ; cl=1 scanline of char (_8_x8...)
        mov ch,0                        ; ch=pixel # in scanline of char to
.next_pixel:                            ;  print
        test cl,80h                     ; The font is a character bit mask ->
        jz short .pixel_done            ;  Draw only the character itself
        mov edi,edx                     ; edx=x:y coordinates
        and edi,0ffffh                  ; edi=y coordinate
        mov esi,edx
        shr esi,16                      ; esi=x coordinate
        add edi,ebp                     ; edi=y + scanline # in character
        movzx ebx,ch
        add esi,ebx                     ; esi=x + pixel # in scanline of char
        putpixel esi,edi,ah, ebx,edi    ; edi= y==temp reg2 -> Is allowed
.pixel_done:
        shl cl,1                        ; Shift next bit to test position
        inc ch
        cmp ch,8                        ; Last pixel in char scanline printed?
        jb short .next_pixel
        inc ebp
        cmp ebp,8                       ; Last scanline printed?
        jb short .next_scanline
        add edx,80000h                  ; x=x+8 -> following char will be
        pop ecx                         ;  printed next to last one
        dec ecx
        jnz short putchar               ; Print next character

        pop esi
        pop esp                         ; Minor adjustment for 'gputnumdec'
        popad
        ret

;
; Graphically put number in eax to screen in decimal
; In:
;   eax = number
;   ds = data32sel; Add ds=ss code and ds-restore code if ds!=data32sel
;   gs = zerosel
;   bl = character color
;   bh = minimal number of characters to write (1 dword on stack per char)
;   cx = x coordinate to put top left corner of string
;   dx = y coordinate to ,,  ,,   ,,    ,,   ,,   ,,
;   font = linear address of font
;   video mode = 8 bits per pixel
;
gputnumdec:
        pushad
        mov ebp,esp                     ; Save stack pointer
        mov esi,ebx                     ; Save bl (color)
        shl ecx,16
        mov cx,dx                       ; ecx=x:y coordinates to put string
        mov edi,ecx                     ; Save coordinates

        mov cl,bh
        mov ebx,10                      ; Divide by 10
        xor ch,ch                       ; count of numbers pushed on stack
.push_digit:
        xor edx,edx                     ; Reset edx: eax = _edx_:eax / ebx
        div ebx
        push edx                        ; remains in edx
        inc ch
        cmp eax,0                       ; Are there any digits left?
        jne short .push_digit
        sub cl,ch                       ; If cl > ch add zero's
        jbe short .start_pop_digit
.extra_digit:
        push dword 0
        inc ch
        dec cl
        jnz short .extra_digit
.start_pop_digit:
        mov edx,esp                     ; Save esp
        mov ebx,esp                     ; String has to 'grow' up in mem
        movzx eax,ch
        push eax                        ; Save string length
.pop_digit:
        mov eax,[ss:edx]                ; Get next digit, but don't give up
        add edx,4                       ;  stack space (POP would)
        add al,'0'                      ; Convert to ASCII
        mov [ss:ebx],al                 ; Store character on stack
        inc ebx
        dec ch
        jnz short .pop_digit            ; ch=count of numbers pushed on stack

        mov eax,esi
        mov ah,al                       ; ah=color
        mov edx,edi                     ; edi=x:y coordinates
        pop ecx                         ; ecx=string length
        mov esi,esp                     ; esp=start of string
;       mov bx,ss                       ; Already done in extender (ss=ds);
;       mov ds,bx                       ;  str ptr at [esp] must point in ds
                                        ; Avoid redundant POPAD (see note)
        push ebp                        ; Provide ptr to PUSHAD stack frame
        push esi                        ; 'putchar' expects a ptr to the
        jmp putchar                     ;  string at [esp]
                                        ; The RET of gputstr returns to the
                                        ;  calling code
