;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE. ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED. ; ; $Source: f:/miner/source/texmap/rcs/tmap_per.asm $ ; $Revision: 1.26 $ ; $Author: john $ ; $Date: 1995/02/20 18:22:55 $ ; ; Perspective texture mapper inner loop. ; ; $Log: tmap_per.asm $ ; Revision 1.26 1995/02/20 18:22:55 john ; Put all the externs in the assembly modules into tmap_inc.asm. ; Also, moved all the C versions of the inner loops into a new module, ; scanline.c. ; ; Revision 1.25 1995/02/20 17:09:08 john ; Added code so that you can build the tmapper with no assembly! ; ; Revision 1.24 1995/01/10 09:32:07 mike ; mostly fix garbage at end of scanline, but slow down by 1-4%. ; ; Revision 1.23 1994/12/02 23:29:57 mike ; optimizations. ; ; Revision 1.22 1994/11/30 00:57:00 mike ; optimization. ; ; Revision 1.21 1994/11/21 13:57:42 mike ; fix right side shear bug ; ; Revision 1.20 1994/11/12 16:41:09 mike ; jae -> ja. ; ; Revision 1.19 1994/10/27 19:40:00 john ; Made lighting table lookup be _gr_fade_table[eax] instead ; of fs:[eax], which gets rig of a segment override that ; supposedly costs 1 clock on a 486. Mainly, I wanted to verify ; that the only reason we need selectors is for the source texture ; data . ; ; Revision 1.18 1994/05/03 11:08:32 mike ; Trap divide overflows. ; ; Revision 1.17 1994/04/21 15:03:41 mike ; make faster. ; ; Revision 1.16 1994/04/08 16:46:57 john ; Made 32 fade levels. Hacked. ; ; Revision 1.15 1994/03/31 08:35:18 mike ; Fix quantized-by-4 bug in inner loop. ; ; Revision 1.14 1994/03/14 17:41:14 mike ; Fix bug in unlighted version. ; ; Revision 1.13 1994/03/14 15:45:14 mike ; streamline code. ; ; Revision 1.12 1994/01/14 14:01:58 mike ; *** empty log message *** ; ; Revision 1.11 1993/12/18 14:43:44 john ; Messed around with doing 1/z, the u*(1/z) and v*(1/z) ; (Went from 23 fps to 21 fps... not good! ) ; ; Revision 1.10 1993/12/17 16:14:17 john ; Split lighted/nonlighted, so there is no cmp lighting ; in the inner loop. ; ; Revision 1.9 1993/12/17 12:34:29 john ; Made leftover bytes use linear approx instead of correct... ; should save about 8 divides per scanline on average. ; Also, took out anti-aliasing code and rearranged to ; order of some instructions to help on 486 pipelining. ; (The anti-aliasing code did *not* look good, so I ; figure there was no reason to keep it in. ) ; ; Revision 1.8 1993/12/16 18:37:52 mike ; Align some stuff on 4 byte boundaries. ; ; Revision 1.7 1993/11/30 08:44:18 john ; Made selector set check for < 64*64 bitmaps. ; ; Revision 1.6 1993/11/23 17:25:26 john ; Added safety "and eax, 0fffh" in lighting lookup. ; ; Revision 1.5 1993/11/23 15:08:52 mike ; Fixed lighting bug. ; ; Revision 1.4 1993/11/23 14:38:50 john ; optimized NORMAL code by switching EBX and ESI, so BH can be used in ; the lighting process. ; ; Revision 1.3 1993/11/23 14:30:53 john ; Made the perspective tmapper do 1/8 divides; added lighting. ; ; Revision 1.2 1993/11/22 10:24:59 mike ; *** empty log message *** ; ; Revision 1.1 1993/09/08 17:29:53 mike ; Initial revision ; ; ; .386 public asm_tmap_scanline_per_ include tmap_inc.asm public _max_ecx,_min_ecx _DATA SEGMENT DWORD PUBLIC USE32 'DATA' align 4 mem_edx dd ? ;**_v_window_left dd _window_left ;**_v_window_right dd _window_right ;**_v_window_top dd _window_top ;**_v_window_bottom dd _window_bottom ; ---------- These are passed in by the C caller ---------- ; ----------^^ These are passed in by the C caller ^^---------- public _x,_loop_count, new_end align 4 _x dd ? _loop_count dd ? _max_ecx dd 0 _min_ecx dd 55555555h extern _per2_flag:dword new_end dd 1 ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK public _scan_doubling_flag, _linear_if_far_flag _scan_doubling_flag dd 0 _linear_if_far_flag dd 0 ;---------- local variables align 4 req_base dd ? req_size dd ? U0 dd ? U1 dd ? V0 dd ? V1 dd ? num_left_over dd ? DU1 dd ? DV1 dd ? DZ1 dd ? ;**_fx_dl_dx1 dd ? ;**_fx_dl_dx2 dd ? _DATA ENDS DGROUP GROUP _DATA _TEXT SEGMENT PARA PUBLIC USE32 'CODE' ASSUME DS:_DATA ASSUME CS:_TEXT ; -------------------------------------------------------------------------------------------------- ; Enter: ; _xleft fixed point left x coordinate ; _xright fixed point right x coordinate ; _y fixed point y coordinate ; _pixptr address of source pixel map ; _u fixed point initial u coordinate ; _v fixed point initial v coordinate ; _z fixed point initial z coordinate ; _du_dx fixed point du/dx ; _dv_dx fixed point dv/dx ; _dz_dx fixed point dz/dx ; for (x = (int) xleft; x <= (int) xright; x++) { ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63)); ; _setpixel(x,y); ; ; u += du_dx; ; v += dv_dx; ; z += dz_dx; ; } ;;goto_none_to_do: ;; int 3 ; thinking this can't get hit, if so, kill compare against _window_bottom ;; jmp _none_to_do align 16 asm_tmap_scanline_per_: push es pusha ;---------------------------- setup for loop --------------------------------- ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft ; esi source pixel pointer = pixptr ; edi initial row pointer = y*320+x ; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94. ; set esi = pointer to start of texture map data ; set edi = address of first pixel to modify mov edi,_fx_y mov es,_pixel_data_selector ; selector[0*2] ;; cmp edi,_window_bottom ;; ja goto_none_to_do mov edi,_y_pointers[edi*4] mov ebx,_fx_xleft test ebx, ebx jns ebx_ok xor ebx, ebx ebx_ok: add edi,write_buffer add edi,ebx ; set _loop_count = # of iterations mov eax,_fx_xright ;; cmp eax,_window_right ;; jl eax_ok1 ;; mov eax,_window_right ;;eax_ok1: ;; cmp eax,_window_left ;; jg eax_ok2 ;; mov eax,_window_left ;;eax_ok2: sub eax,ebx js _none_to_do ;; cmp eax,_window_width ;; jbe _ok_to_do ;; mov eax,_window_width ;;_ok_to_do: mov _loop_count,eax ; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily ; get the integer by reading %bh sar _fx_l, 8 sar _fx_dl_dx,8 jns dl_dx_ok inc _fx_dl_dx ; round towards 0 for negative deltas dl_dx_ok: ; set initial values mov ebx,_fx_u mov ebp,_fx_v mov ecx,_fx_z test _per2_flag,-1 je tmap_loop test _Lighting_on, -1 je tmap_loop_fast_nolight jmp tmap_loop_fast ;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ======================== ; ; Usage in loop: eax division, pixel value ; ebx u ; ecx z ; edx division ; ebp v ; esi source pixel pointer ; edi destination pixel pointer ;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP ----------------- tmap_loop: mov esi, ebx ; esi becomes u coordinate align 4 tmap_loop0: ; compute v coordinate mov eax, ebp ; get v mov edx, eax sar edx, 31 idiv ecx ; eax = (v/z) and eax,3fh ; mask with height-1 mov ebx,eax ; compute u coordinate mov eax, esi ; get u mov edx, eax sar edx, 31 idiv ecx ; eax = (u/z) shl eax,26 shld ebx,eax,6 ; esi = v*64+u ; read 1 pixel xor eax, eax test _Lighting_on, -1 mov al, es:[ebx] ; get pixel from source bitmap je NoLight1 ; LIGHTING CODE mov ebx, _fx_l ; get temp copy of lighting value mov ah, bh ; get lighting level add ebx, _fx_dl_dx ; update lighting value mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables mov _fx_l, ebx ; save temp copy of lighting value ; transparency check NoLight1: cmp al,255 je skip1 mov [edi],al skip1: inc edi ; update deltas add ebp,_fx_dv_dx add esi,_fx_du_dx add ecx,_fx_dz_dx je _div_0_abort ; would be dividing by 0, so abort dec _loop_count jns tmap_loop0 _none_to_do: popa pop es ret ; We detected a z=0 condition, which seems pretty bogus, don't you think? ; So, we abort, but maybe we want to know about it. _div_0_abort: ;** int 3 jmp _none_to_do ;-------------------------- PER/4 TMAPPER ---------------- ; ; x = x1 ; U0 = u/w; V0 = v/w; ; while ( 1 ) ; u += du_dx*4; v+= dv_dx*4 ; U1 = u/w; V1 = v/w; ; DUDX = (U1-U0)/4; DVDX = (V1-V0)/4; ; ; ; Pixel 0 ; pixels = texmap[V0*64+U0]; ; U0 += DUDX; V0 += DVDX ; ; Pixel 1 ; pixels = (pixels<<8)+texmap[V0*64+U0]; ; U0 += DUDX; V0 += DVDX ; ; Pixel 2 ; pixels = (pixels<<8)+texmap[V0*64+U0]; ; U0 += DUDX; V0 += DVDX ; ; Pixel 3 ; pixels = (pixels<<8)+texmap[V0*64+U0]; ; ; screen[x] = pixel ; x += 4; ; U0 = U1; V0 = V1 NBITS = 4 ; 2^NBITS pixels plotted per divide ZSHIFT = 4 ; precision used in PDIV macro PDIV MACRO ; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX ; sig bits 6.3 mov edx,eax shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT ENDM wr_onepix macro num local skip cmp cl,255 je skip mov num[edi],cl skip: ror ecx,8 endm public tmap_loop_fast ; -------------------------------------- Start of Getting Dword Aligned ---------------------------------------------- ; ebx fx_u tmap_loop_fast: mov esi,ebx align 4 NotDwordAligned1: test edi, 11b jz DwordAligned1 ; compute v coordinate mov eax, ebp ; get v mov edx, eax sar edx, 31 idiv ecx ; eax = (v/z) and eax,3fh ; mask with height-1 mov ebx,eax ; compute u coordinate mov eax, esi ; get u mov edx, eax sar edx, 31 idiv ecx ; eax = (u/z) shl eax,26 shld ebx,eax,6 ; esi = v*64+u ; read 1 pixel xor eax, eax mov al, es:[ebx] ; get pixel from source bitmap ; lighting code mov ebx, _fx_l ; get temp copy of lighting value mov ah, bh ; get lighting level add ebx, _fx_dl_dx ; update lighting value mov _fx_l, ebx ; save temp copy of lighting value ; transparency check cmp al,255 je skip2 ; this pixel is transparent, so don't write it (or light it) mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables ; write 1 pixel mov [edi],al skip2: inc edi ; update deltas add ebp,_fx_dv_dx add esi,_fx_du_dx add ecx,_fx_dz_dx je _div_0_abort ; would be dividing by 0, so abort dec _loop_count jns NotDwordAligned1 jmp _none_to_do ; -------------------------------------- End of Getting Dword Aligned ---------------------------------------------- DwordAligned1: ;--; mov ebx,esi ; get fx_u ;--; ;--; mov eax, _loop_count ;--; inc eax ;--; mov num_left_over, eax ;--; shr eax, NBITS ;--; ;--; test eax, -1 ;--; je tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline ;--; ;--; mov _loop_count, eax ; _loop_count = pixels / NPIXS ;--; shl eax, NBITS ;--; sub num_left_over, eax ; num_left_over = obvious mov eax, _loop_count mov ebx, esi ; get fx_u [pentium pipelining] inc eax mov esi, eax and esi, (1 shl NBITS) - 1 sar eax, NBITS mov num_left_over, esi je tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline mov _loop_count, eax ; _loop_count = pixels / NPIXS ; compute initial v coordinate mov eax,ebp ; get v mov edx,ebp shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT mov V0, eax ; compute initial u coordinate mov eax,ebx ; get u mov edx,ebx shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT mov U0, eax ; Set deltas to NPIXS pixel increments mov eax, _fx_du_dx shl eax, NBITS mov DU1, eax mov eax, _fx_dv_dx shl eax, NBITS mov DV1, eax mov eax, _fx_dz_dx shl eax, NBITS mov DZ1, eax align 4 TopOfLoop4: add ebx, DU1 add ebp, DV1 add ecx, DZ1 je _div_0_abort ; would be dividing by 0, so abort ; Done with ebx, ebp, ecx until next iteration push ebx push ecx push ebp push edi ; Find fixed U1 mov eax, ebx mov edx,ebx shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT mov ebx, eax ; ebx = U1 until pop's ; Find fixed V1 mov eax, ebp mov edx, ebp shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) mov ecx, U0 ; ecx = U0 until pop's mov edi, V0 ; edi = V0 until pop's shl eax, 16-ZSHIFT mov ebp, eax ; ebp = V1 until pop's ; Make ESI = V0:U0 in 6:10,6:10 format mov eax, ecx shr eax, 6 mov esi, edi shl esi, 10 mov si, ax ; Make EDX = DV:DU in 6:10,6:10 format mov eax, ebx sub eax, ecx sar eax, NBITS+6 mov edx, ebp sub edx, edi shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac mov dx, ax ; put delta u in low word ; Save the U1 and V1 so we don't have to divide on the next iteration mov U0, ebx mov V0, ebp pop edi ; Restore EDI before using it ; LIGHTING CODE mov ebx, _fx_l mov ebp, _fx_dl_dx test _transparency_on,-1 je no_trans1 REPT (1 SHL (NBITS-2)) local skip3,no_trans1 REPT 2 local skipa1,skipa2 mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v add esi, edx ; inc u,v mov al, es:[eax] ; get pixel from source bitmap cmp al,255 je skipa1 mov ah, bh ; form lighting table lookup value add ebx, ebp ; update lighting value mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer mov [edi],al skipa1: inc edi ; Do odd pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v add esi, edx ; inc u,v mov al, es:[eax] ; get pixel from source bitmap cmp al,255 je skipa2 mov ah, bh ; form lighting table lookup value add ebx, ebp ; update lighting value mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer mov [edi],al skipa2: inc edi ENDM ENDM jmp cont1 ; ------------------------------------------------------- no_trans1: ;----------; push edx ;----------; mov mem_edx, edx ;----------; sub edx, edx REPT (1 SHL (NBITS-2)) REPT 2 mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v add esi, edx ; inc u,v mov al, es:[eax] ; get pixel from source bitmap mov ah, bh ; form lighting table lookup value add ebx, ebp ; update lighting value mov cl, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer ; Do odd pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v add esi, edx ; inc u,v mov al, es:[eax] ; get pixel from source bitmap mov ah, bh ; form lighting table lookup value add ebx, ebp ; update lighting value mov ch, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer ; ----- This is about 1% faster than the above, and could probably be optimized more. ; ----- Problem is, it gets the u,v coordinates backwards. What you would need to do ; ----- is switch the packing of the u,v coordinates above (about 95 lines up). ;----------; mov eax, esi ;----------; shr ax, 10 ;----------; rol eax, 6 ;----------; mov dx, ax ;----------; add esi, mem_edx ;----------; mov dl, es:[edx] ;----------; mov dh, bh ;----------; add ebx, ebp ;----------; mov cl, _gr_fade_table[edx] ;----------; ;----------; mov eax, esi ;----------; shr ax, 10 ;----------; rol eax, 6 ;----------; mov dx, ax ;----------; add esi, mem_edx ;----------; mov dl, es:[edx] ;----------; mov dh, bh ;----------; add ebx, ebp ;----------; mov ch, _gr_fade_table[edx] ror ecx, 16 ; move to next double dest pixel position ENDM mov [edi],ecx ; Draw 4 pixels to display add edi,4 ENDM ;; pop edx cont1: ; ------------------------------------------------------- ; LIGHTING CODE mov _fx_l, ebx pop ebp pop ecx pop ebx dec _loop_count jnz TopOfLoop4 EndOfLoop4: test num_left_over, -1 je _none_to_do ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------ DoEndPixels: push ecx mov eax, ecx lea eax, [eax*2+eax] add ecx, DZ1 js notokhere shl ecx,2 cmp eax, ecx pop ecx jl okhere jmp bah_bah notokhere: pop ecx bah_bah: test new_end,-1 jne NewDoEndPixels okhere: add ebx, DU1 add ebp, DV1 add ecx, DZ1 je _div_0_abort jns dep_cont ; z went negative. ; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels ; though we might only plot one more pixel. mov cl, 1 dep_loop: mov eax, DU1 sar eax, cl sub ebx, eax mov eax, DV1 sar eax, cl sub ebp, eax mov eax, DZ1 sar eax, cl sub ecx, eax je _div_0_abort jns dep_cont inc cl cmp cl, NBITS jne dep_loop dep_cont: push edi ; use edi as a temporary variable cmp ecx,1 shl (ZSHIFT+1) jg ecx_ok mov ecx, 1 shl (ZSHIFT+1) ecx_ok: ; Find fixed U1 mov eax, ebx PDIV mov ebx, eax ; ebx = U1 until pop's ; Find fixed V1 mov eax, ebp PDIV mov ebp, eax ; ebp = V1 until pop's mov ecx, U0 ; ecx = U0 until pop's mov edi, V0 ; edi = V0 until pop's ; Make ESI = V0:U0 in 6:10,6:10 format mov eax, ecx shr eax, 6 mov esi, edi shl esi, 10 mov si, ax ; Make EDX = DV:DU in 6:10,6:10 format mov eax, ebx sub eax, ecx sar eax, NBITS+6 mov edx, ebp sub edx, edi shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac mov dx, ax ; put delta u in low word pop edi ; Restore EDI before using it mov ecx, num_left_over ; LIGHTING CODE mov ebx, _fx_l mov ebp, _fx_dl_dx ITERATION = 0 REPT (1 SHL (NBITS-1)) local skip4, skip5 ; Do even pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov al, es:[eax] ; get pixel from source bitmap add esi, edx ; inc u,v mov ah, bh ; form lighting table lookup value add ebx, ebp ; update lighting value cmp al,255 je skip4 mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer mov [edi+ITERATION], al ; write pixel skip4: dec ecx jz _none_to_do ITERATION = ITERATION + 1 ; Do odd pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov al, es:[eax] ; get pixel from source bitmap add esi, edx ; inc u,v mov ah, bh ; form lighting table lookup value add ebx, _fx_dl_dx ; update lighting value cmp al,255 je skip5 mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer mov [edi+ITERATION], al ; write pixel skip5: dec ecx jz _none_to_do ITERATION = ITERATION + 1 ENDM ; Should never get here!!!! int 3 jmp _none_to_do ; ----------------------------------------- End of LeftOver Pixels ------------------------------------------ ; --BUGGY NEW--NewDoEndPixels: ; --BUGGY NEW-- mov eax, num_left_over ; --BUGGY NEW-- and num_left_over, 3 ; --BUGGY NEW-- shr eax, 2 ; --BUGGY NEW-- je NDEP_1 ; --BUGGY NEW-- mov _loop_count, eax ; --BUGGY NEW-- ; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4) ; --BUGGY NEW-- shr DU1,2 ; --BUGGY NEW-- shr DV1,2 ; --BUGGY NEW-- shr DZ1,2 ; --BUGGY NEW-- ; --BUGGY NEW--NDEP_TopOfLoop4: ; --BUGGY NEW-- add ebx, DU1 ; --BUGGY NEW-- add ebp, DV1 ; --BUGGY NEW-- add ecx, DZ1 ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort ; --BUGGY NEW-- ; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration ; --BUGGY NEW-- push ebx ; --BUGGY NEW-- push ecx ; --BUGGY NEW-- push ebp ; --BUGGY NEW-- push edi ; --BUGGY NEW-- ; --BUGGY NEW--; Find fixed U1 ; --BUGGY NEW-- mov eax, ebx ; --BUGGY NEW-- mov edx,ebx ; --BUGGY NEW-- shl eax,(ZSHIFT-2) ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2) ; --BUGGY NEW-- idiv ecx ; eax = (v/z) ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2) ; --BUGGY NEW-- mov ebx, eax ; ebx = U1 until pop's ; --BUGGY NEW-- ; --BUGGY NEW--; Find fixed V1 ; --BUGGY NEW-- mov eax, ebp ; --BUGGY NEW-- mov edx, ebp ; --BUGGY NEW-- shl eax,(ZSHIFT-2) ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2) ; --BUGGY NEW-- idiv ecx ; eax = (v/z) ; --BUGGY NEW-- ; --BUGGY NEW-- mov ecx, U0 ; ecx = U0 until pop's ; --BUGGY NEW-- mov edi, V0 ; edi = V0 until pop's ; --BUGGY NEW-- ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2) ; --BUGGY NEW-- mov ebp, eax ; ebp = V1 until pop's ; --BUGGY NEW-- ; --BUGGY NEW--; Make ESI = V0:U0 in 6:10,6:10 format ; --BUGGY NEW-- mov eax, ecx ; --BUGGY NEW-- shr eax, 6 ; --BUGGY NEW-- mov esi, edi ; --BUGGY NEW-- shl esi, 10 ; --BUGGY NEW-- mov si, ax ; --BUGGY NEW-- ; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format ; --BUGGY NEW-- mov eax, ebx ; --BUGGY NEW-- sub eax, ecx ; --BUGGY NEW-- sar eax, (NBITS-2)+6 ; --BUGGY NEW-- mov edx, ebp ; --BUGGY NEW-- sub edx, edi ; --BUGGY NEW-- shl edx, 10-(NBITS-2) ; EDX = V1-V0/ 4 in 6:10 int:frac ; --BUGGY NEW-- mov dx, ax ; put delta u in low word ; --BUGGY NEW-- ; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration ; --BUGGY NEW-- mov U0, ebx ; --BUGGY NEW-- mov V0, ebp ; --BUGGY NEW-- ; --BUGGY NEW-- pop edi ; Restore EDI before using it ; --BUGGY NEW-- ; --BUGGY NEW--; LIGHTING CODE ; --BUGGY NEW-- mov ebx, _fx_l ; --BUGGY NEW-- mov ebp, _fx_dl_dx ; --BUGGY NEW-- ; --BUGGY NEW--;** test _transparency_on,-1 ; --BUGGY NEW--;** je NDEP_no_trans1 ; --BUGGY NEW-- ; --BUGGY NEW-- REPT 2 ; --BUGGY NEW-- local NDEP_skipa1, NDEP_skipa2 ; --BUGGY NEW-- ; --BUGGY NEW-- mov eax, esi ; get u,v ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v) ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v ; --BUGGY NEW-- add esi, edx ; inc u,v ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap ; --BUGGY NEW-- cmp al,255 ; --BUGGY NEW-- je NDEP_skipa1 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value ; --BUGGY NEW-- add ebx, ebp ; update lighting value ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer ; --BUGGY NEW-- mov [edi],al ; --BUGGY NEW--NDEP_skipa1: ; --BUGGY NEW-- inc edi ; --BUGGY NEW-- ; --BUGGY NEW--; Do odd pixel ; --BUGGY NEW-- mov eax, esi ; get u,v ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v) ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v ; --BUGGY NEW-- add esi, edx ; inc u,v ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap ; --BUGGY NEW-- cmp al,255 ; --BUGGY NEW-- je NDEP_skipa2 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value ; --BUGGY NEW-- add ebx, ebp ; update lighting value ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer ; --BUGGY NEW-- mov [edi],al ; --BUGGY NEW--NDEP_skipa2: ; --BUGGY NEW-- inc edi ; --BUGGY NEW-- ; --BUGGY NEW-- ENDM ; --BUGGY NEW-- ; --BUGGY NEW-- mov _fx_l, ebx ; --BUGGY NEW-- pop ebp ; --BUGGY NEW-- pop ecx ; --BUGGY NEW-- pop ebx ; --BUGGY NEW-- dec _loop_count ; --BUGGY NEW-- jnz NDEP_TopOfLoop4 ; --BUGGY NEW-- ; --BUGGY NEW-- test num_left_over, -1 ; --BUGGY NEW-- je _none_to_do ; --BUGGY NEW-- ; --BUGGY NEW--NDEP_1: ; --BUGGY NEW-- mov esi,ebx ; --BUGGY NEW-- ; --BUGGY NEW-- align 4 ; --BUGGY NEW--NDEP_loop: ; --BUGGY NEW-- ; --BUGGY NEW--; compute v coordinate ; --BUGGY NEW-- mov eax, ebp ; get v ; --BUGGY NEW-- mov edx, eax ; --BUGGY NEW-- sar edx, 31 ; --BUGGY NEW-- idiv ecx ; eax = (v/z) ; --BUGGY NEW-- ; --BUGGY NEW-- and eax,3fh ; mask with height-1 ; --BUGGY NEW-- mov ebx,eax ; --BUGGY NEW-- ; --BUGGY NEW--; compute u coordinate ; --BUGGY NEW-- mov eax, esi ; get u ; --BUGGY NEW-- mov edx, eax ; --BUGGY NEW-- sar edx, 31 ; --BUGGY NEW-- idiv ecx ; eax = (u/z) ; --BUGGY NEW-- ; --BUGGY NEW-- shl eax,26 ; --BUGGY NEW-- shld ebx,eax,6 ; esi = v*64+u ; --BUGGY NEW-- ; --BUGGY NEW--; read 1 pixel ; --BUGGY NEW-- xor eax, eax ; --BUGGY NEW-- mov al, es:[ebx] ; get pixel from source bitmap ; --BUGGY NEW-- ; --BUGGY NEW--; lighting code ; --BUGGY NEW-- mov ebx, _fx_l ; get temp copy of lighting value ; --BUGGY NEW-- mov ah, bh ; get lighting level ; --BUGGY NEW-- add ebx, _fx_dl_dx ; update lighting value ; --BUGGY NEW-- mov _fx_l, ebx ; save temp copy of lighting value ; --BUGGY NEW-- ; --BUGGY NEW--; transparency check ; --BUGGY NEW-- cmp al,255 ; --BUGGY NEW-- je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it) ; --BUGGY NEW-- ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables ; --BUGGY NEW-- ; --BUGGY NEW--; write 1 pixel ; --BUGGY NEW-- mov [edi],al ; --BUGGY NEW--NDEP_skip2: inc edi ; --BUGGY NEW-- ; --BUGGY NEW--; update deltas ; --BUGGY NEW-- add ebp,_fx_dv_dx ; --BUGGY NEW-- add esi,_fx_du_dx ; --BUGGY NEW-- add ecx,_fx_dz_dx ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort ; --BUGGY NEW-- ; --BUGGY NEW-- dec num_left_over ; --BUGGY NEW-- jne NDEP_loop ; --BUGGY NEW-- ; --BUGGY NEW-- jmp _none_to_do NewDoEndPixels: mov esi,ebx align 4 NDEP_loop: ; compute v coordinate mov eax, ebp ; get v mov edx, eax sar edx, 31 idiv ecx ; eax = (v/z) and eax,3fh ; mask with height-1 mov ebx,eax ; compute u coordinate mov eax, esi ; get u mov edx, eax sar edx, 31 idiv ecx ; eax = (u/z) shl eax,26 shld ebx,eax,6 ; esi = v*64+u ; read 1 pixel xor eax, eax mov al, es:[ebx] ; get pixel from source bitmap ; lighting code mov ebx, _fx_l ; get temp copy of lighting value mov ah, bh ; get lighting level add ebx, _fx_dl_dx ; update lighting value mov _fx_l, ebx ; save temp copy of lighting value ; transparency check cmp al,255 je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it) mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables ; write 1 pixel mov [edi],al NDEP_skip2: inc edi ; update deltas add ebp,_fx_dv_dx add esi,_fx_du_dx add ecx,_fx_dz_dx je _div_0_abort ; would be dividing by 0, so abort dec num_left_over jne NDEP_loop jmp _none_to_do ; ==================================================== No Lighting Code ====================================================== public tmap_loop_fast_nolight tmap_loop_fast_nolight: mov esi,ebx align 4 NotDwordAligned1_nolight: test edi, 11b jz DwordAligned1_nolight ; compute v coordinate mov eax,ebp ; get v mov edx, eax sar edx, 31 idiv ecx ; eax = (v/z) and eax,3fh ; mask with height-1 mov ebx,eax ; compute u coordinate mov eax, esi ; get u mov edx, eax sar edx, 31 idiv ecx ; eax = (u/z) shl eax,26 shld ebx,eax,6 ; esi = v*64+u ; read 1 pixel mov al,es:[ebx] ; get pixel from source bitmap ; write 1 pixel cmp al,255 je skip6 mov [edi],al skip6: inc edi ; update deltas add ebp,_fx_dv_dx add esi,_fx_du_dx add ecx,_fx_dz_dx je _div_0_abort ; would be dividing by 0, so abort dec _loop_count jns NotDwordAligned1_nolight jmp _none_to_do DwordAligned1_nolight: mov ebx,esi mov eax, _loop_count inc eax mov num_left_over, eax shr eax, NBITS test eax, -1 je tmap_loop ; no 2^NBITS chunks, do divide/pixel for whole scanline mov _loop_count, eax ; _loop_count = pixels / NPIXS shl eax, NBITS sub num_left_over, eax ; num_left_over = obvious ; compute initial v coordinate mov eax,ebp ; get v PDIV mov V0, eax ; compute initial u coordinate mov eax,ebx ; get u PDIV mov U0, eax ; Set deltas to NPIXS pixel increments mov eax, _fx_du_dx shl eax, NBITS mov DU1, eax mov eax, _fx_dv_dx shl eax, NBITS mov DV1, eax mov eax, _fx_dz_dx shl eax, NBITS mov DZ1, eax align 4 TopOfLoop4_nolight: add ebx, DU1 add ebp, DV1 add ecx, DZ1 je _div_0_abort ; Done with ebx, ebp, ecx until next iteration push ebx push ecx push ebp push edi ; Find fixed U1 mov eax, ebx PDIV mov ebx, eax ; ebx = U1 until pop's ; Find fixed V1 mov eax, ebp PDIV mov ebp, eax ; ebp = V1 until pop's mov ecx, U0 ; ecx = U0 until pop's mov edi, V0 ; edi = V0 until pop's ; Make ESI = V0:U0 in 6:10,6:10 format mov eax, ecx shr eax, 6 mov esi, edi shl esi, 10 mov si, ax ; Make EDX = DV:DU in 6:10,6:10 format mov eax, ebx sub eax, ecx sar eax, NBITS+6 mov edx, ebp sub edx, edi shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac mov dx, ax ; put delta u in low word ; Save the U1 and V1 so we don't have to divide on the next iteration mov U0, ebx mov V0, ebp pop edi ; Restore EDI before using it REPT (1 SHL (NBITS-2)) local skip7, no_trans2, skip1q, skip2q, skip3q, skip4q ; Do 1 pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v add esi, edx ; inc u,v mov cl, es:[eax] ; load into buffer register ;;; ror ecx, 8 ; move to next dest pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov ch, es:[eax] ; load into buffer register add esi, edx ; inc u,v ror ecx, 16 ; move to next dest pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov cl, es:[eax] ; load into buffer register add esi, edx ; inc u,v ;;; ror ecx, 8 ; move to next dest pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov ch, es:[eax] ; load into buffer register add esi, edx ; inc u,v ror ecx, 16 ;-- can get rid of this, just write in different order below -- ; move to next dest pixel test _transparency_on,-1 je no_trans2 cmp ecx,-1 je skip7 cmp cl,255 je skip1q mov [edi],cl skip1q: cmp ch,255 je skip2q mov 1[edi],ch skip2q: ror ecx,16 cmp cl,255 je skip3q mov 2[edi],cl skip3q: cmp ch,255 je skip4q mov 3[edi],ch skip4q: ;; wr_onepix 0 ;; wr_onepix 1 ;; wr_onepix 2 ;; wr_onepix 3 jmp skip7 no_trans2: mov [edi],ecx ; Draw 4 pixels to display skip7: add edi,4 ENDM pop ebp pop ecx pop ebx dec _loop_count jnz TopOfLoop4_nolight EndOfLoop4_nolight: test num_left_over, -1 je _none_to_do DoEndPixels_nolight: add ebx, DU1 add ebp, DV1 add ecx, DZ1 je _div_0_abort push edi ; use edi as a temporary variable ; Find fixed U1 mov eax, ebx mov edx,eax shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT mov ebx, eax ; ebx = U1 until pop's ; Find fixed V1 mov eax, ebp mov edx,eax shl eax,ZSHIFT sar edx,32-ZSHIFT idiv ecx ; eax = (v/z) shl eax, 16-ZSHIFT mov ebp, eax ; ebp = V1 until pop's mov ecx, U0 ; ecx = U0 until pop's mov edi, V0 ; edi = V0 until pop's ; Make ESI = V0:U0 in 6:10,6:10 format mov eax, ecx shr eax, 6 mov esi, edi shl esi, 10 mov si, ax ; Make EDX = DV:DU in 6:10,6:10 format mov eax, ebx sub eax, ecx sar eax, NBITS+6 mov edx, ebp sub edx, edi shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac mov dx, ax ; put delta u in low word pop edi ; Restore EDI before using it mov ecx, num_left_over ITERATION = 0 REPT (1 SHL NBITS) local skip8 ; Do 1 pixel mov eax, esi ; get u,v shr eax, 26 ; shift out all but int(v) shld ax,si,6 ; shift in u, shifting up v mov al, es:[eax] ; load into buffer register add esi, edx ; inc u,v cmp al,255 je skip8 mov [edi+ITERATION], al ; write pixel skip8: dec ecx jz _none_to_do ITERATION = ITERATION + 1 ENDM ; Should never get here!!!!! int 3 jmp _none_to_do _TEXT ends end