;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE. ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED. ; ; $Source: f:/miner/source/texmap/rcs/tmap_lin.asm $ ; $Revision: 1.3 $ ; $Author: john $ ; $Date: 1995/02/20 18:22:57 $ ; ; Linearly interpolating texture mapper inner loop ; ; $Log: tmap_lin.asm $ ; Revision 1.3 1995/02/20 18:22:57 john ; Put all the externs in the assembly modules into tmap_inc.asm. ; Also, moved all the C versions of the inner loops into a new module, ; scanline.c. ; ; Revision 1.2 1995/02/20 17:09:10 john ; Added code so that you can build the tmapper with no assembly! ; ; Revision 1.1 1994/11/30 12:24:04 mike ; Initial revision ; ; Revision 1.6 1994/11/12 16:39:40 mike ; jae to ja. ; ; Revision 1.5 1994/02/10 21:24:43 matt ; Got rid of debug_on set ; ; Revision 1.4 1994/01/31 15:39:53 mike ; Write additional inner loop for transparency. ; ; Revision 1.3 1994/01/26 11:50:04 john ; Added transparency to linear unlighted texture mapper. ; ; Revision 1.2 1993/11/22 10:24:36 mike ; *** empty log message *** ; ; Revision 1.1 1993/09/08 17:29:50 mike ; Initial revision ; ; ; .386 option oldstructs .nolist include psmacros.inc .list public asm_tmap_scanline_lin_ include tmap_inc.asm _DATA SEGMENT DWORD PUBLIC USE32 'DATA' _loop_count dd ? _DATA ENDS DGROUP GROUP _DATA _TEXT SEGMENT DWORD PUBLIC USE32 'CODE' ASSUME DS:_DATA ASSUME CS:_TEXT extn mprintf_ ; -------------------------------------------------------------------------------------------------- ; Enter: ; _xleft fixed point left x coordinate ; _xright fixed point right x coordinate ; _y fixed point y coordinate ; _pixptr address of source pixel map ; _u fixed point initial u coordinate ; _v fixed point initial v coordinate ; _du_dx fixed point du/dx ; _dv_dx fixed point dv/dx ; for (x = (int) xleft; x <= (int) xright; x++) { ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63)); ; _setpixel(x,y); ; ; u += du_dx; ; v += dv_dx; ; z += dz_dx; ; } align 4 asm_tmap_scanline_lin_: pusha ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft ; esi source pixel pointer = pixptr ; edi initial row pointer = y*320+x ; set esi = pointer to start of texture map data mov esi,_pixptr ; set edi = address of first pixel to modify mov edi,_fx_y cmp edi,_window_bottom ja _none_to_do imul edi,_bytes_per_row mov eax,_fx_xleft test eax, eax jns eax_ok sub eax,eax eax_ok: add edi,eax add edi,write_buffer ; set _loop_count = # of iterations mov eax,_fx_xright cmp eax,_window_right jb eax_ok1 mov eax,_window_right eax_ok1: cmp eax,_window_left ja eax_ok2 mov eax,_window_left eax_ok2: mov ebx,_fx_xleft sub eax,ebx js _none_to_do cmp eax,_window_width jbe _ok_to_do mov eax,_window_width _ok_to_do: mov _loop_count,eax ; edi destination pixel pointer mov ebx,_fx_u mov ecx,_fx_du_dx mov edx,_fx_dv_dx mov ebp,_fx_v shl ebx,10 shl ebp,10 shl edx,10 shl ecx,10 ; eax work ; ebx u ; ecx du_dx ; edx dv_dx ; ebp v ; esi read address ; edi write address test _transparency_on,-1 jne transparent_texture _size = (_end1 - _start1)/num_iters mov eax,num_iters-1 sub eax,_loop_count jns j_eax_ok1 inc eax ; sort of a hack, but we can get -1 here and want to be graceful jns j_eax_ok1 ; if we jump, we had -1, which is kind of ok, if not, we int 3 int 3 ; oops, going to jump behind _start1, very bad... sub eax,eax ; ok to continue j_eax_ok1: imul eax,eax,dword ptr _size add eax,offset _start1 jmp eax align 4 _start1: ; "OPTIMIZATIONS" maybe not worth making ; Getting rid of the esi from the mov al,[esi+eax] instruction. ; This would require moving into eax at the top of the loop, rather than doing the sub eax,eax. ; You would have to align your bitmaps so that the two shlds would create the proper base address. ; In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps). ; I did timings without converting the sub to a mov eax,esi and setting esi to the proper value. ; There was a speedup of about 1% to 1.5% without converting the sub to a mov. ; Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al. ; The problem with this is you would have a dword offset for nnnn. My timings indicate it is slower. (I think.) ; Combining u,v and du,dv into single longwords. ; The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough ; instructions to separate a destination operand from being used by the next instruction. It shaves out one ; register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated. ; usage: ; eax work ; ebx u coordinate ; ecx delta u ; edx delta v ; ebp v coordinate ; esi pointer to source bitmap ; edi write address rept num_iters mov eax,ebp ; clear for add ebp,edx ; update v coordinate shr eax,26 ; shift in v coordinate shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate add ebx,ecx ; update u coordinate mov al,[esi+eax] ; get pixel from source bitmap mov [edi],al inc edi ; XPARENT ADDED BY JOHN ; inner loop if bitmaps are 256x256 ; your register usage is bogus, and you must clear ecx ; fix your setup ; this is only about 10% faster in the inner loop ; this method would adapt to writing two pixels at a time better than ; the 64x64 method because you wouldn't run out of registers ; Note that this method assumes that both dv_dx and du_dx are in edx. ; edx = vi|vf|ui|uf ; where each field is 8 bits, vi = integer v coordinate, vf = fractional v coordinate, etc. ;** add ebx,edx ;** mov cl,bh ;** shld cx,bx,8 ;** mov al,[esi+ecx] ;** mov [edi],al ;** inc edi endm _end1: _none_to_do: popa ret ; ---------------------------------------------------------------------------------------- ; if texture map has transparency, use this code. transparent_texture: test _loop_count,-1 je _t_none_to_do loop_transparent: mov eax,ebp ; clear for add ebp,edx ; update v coordinate shr eax,26 ; shift in v coordinate shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate add ebx,ecx ; update u coordinate mov al,[esi+eax] ; get pixel from source bitmap cmp al,255 je transp mov [edi],al transp: inc edi ; XPARENT ADDED BY JOHN dec _loop_count jne loop_transparent _t_none_to_do: popa ret _TEXT ends end ; This is the inner loop to write two pixels at a time ; This is about 2.5% faster overall (on Mike's 66 MHz 80486 DX2, VLB) ; You must write code to even align edi and do half as many iterations, and write ; the beginning and ending extra pixels, if necessary. ; sub eax,eax ; clear for ; shld eax,ebp,6 ; shift in v coordinate ; add ebp,_fx_dv_dx ; update v coordinate ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate ; add ebx,ecx ; update u coordinate ; mov dl,[esi+eax] ; get pixel from source bitmap ; ; sub eax,eax ; clear for ; shld eax,ebp,6 ; shift in v coordinate ; add ebp,_fx_dv_dx ; update v coordinate ; shld eax,ebx,6 ; shift in u coordinate while shifting up v coordinate ; add ebx,ecx ; update u coordinate ; mov dh,[esi+eax] ; get pixel from source bitmap ; ; mov [edi],dx ; add edi,2