;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.  
;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
;
; $Source: f:/miner/source/texmap/rcs/tmap_lin.asm $
; $Revision: 1.3 $
; $Author: john $
; $Date: 1995/02/20 18:22:57 $
;
; Linearly interpolating texture mapper inner loop
;
; $Log: tmap_lin.asm $
; Revision 1.3  1995/02/20  18:22:57  john
; Put all the externs in the assembly modules into tmap_inc.asm.
; Also, moved all the C versions of the inner loops into a new module, 
; scanline.c.
; 
; Revision 1.2  1995/02/20  17:09:10  john
; Added code so that you can build the tmapper with no assembly!
; 
; Revision 1.1  1994/11/30  12:24:04  mike
; Initial revision
; 
; Revision 1.6  1994/11/12  16:39:40  mike
; jae to ja.
; 
; Revision 1.5  1994/02/10  21:24:43  matt
; Got rid of debug_on set
; 
; Revision 1.4  1994/01/31  15:39:53  mike
; Write additional inner loop for transparency.
; 
; Revision 1.3  1994/01/26  11:50:04  john
; Added transparency to linear unlighted texture mapper.
; 
; Revision 1.2  1993/11/22  10:24:36  mike
; *** empty log message ***
; 
; Revision 1.1  1993/09/08  17:29:50  mike
; Initial revision
; 
;
;

	.386

	option	oldstructs

	.nolist
	include	psmacros.inc
	.list


	public	asm_tmap_scanline_lin_

	include	tmap_inc.asm

_DATA	SEGMENT DWORD PUBLIC USE32 'DATA'

_loop_count 	dd	?

_DATA	ENDS

DGROUP	GROUP	_DATA

_TEXT   SEGMENT DWORD PUBLIC USE32 'CODE'

	ASSUME	DS:_DATA
	ASSUME	CS:_TEXT

	extn	mprintf_

; --------------------------------------------------------------------------------------------------
; Enter:
;	_xleft	fixed point left x coordinate
;	_xright	fixed point right x coordinate
;	_y	fixed point y coordinate
;	_pixptr	address of source pixel map
;	_u	fixed point initial u coordinate
;	_v	fixed point initial v coordinate
;	_du_dx	fixed point du/dx
;	_dv_dx	fixed point dv/dx

;   for (x = (int) xleft; x <= (int) xright; x++) {
;      _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
;      _setpixel(x,y);
;
;      u += du_dx;
;      v += dv_dx;
;      z += dz_dx;
;   }

	align	4
asm_tmap_scanline_lin_:
	pusha

; Setup for loop:	_loop_count  iterations = (int) xright - (int) xleft
;	esi	source pixel pointer = pixptr
;	edi	initial row pointer = y*320+x

; set esi = pointer to start of texture map data
	mov	esi,_pixptr

; set edi = address of first pixel to modify
	mov	edi,_fx_y
	cmp	edi,_window_bottom
	ja	_none_to_do

	imul	edi,_bytes_per_row
	mov	eax,_fx_xleft
	test	eax, eax
	jns	eax_ok
	sub	eax,eax
eax_ok:
	add	edi,eax
	add	edi,write_buffer

; set _loop_count = # of iterations
	mov	eax,_fx_xright
	cmp	eax,_window_right
	jb	eax_ok1
	mov	eax,_window_right
eax_ok1:	cmp	eax,_window_left
	ja	eax_ok2
	mov	eax,_window_left
eax_ok2:

	mov	ebx,_fx_xleft
	sub	eax,ebx
	js	_none_to_do
	cmp	eax,_window_width
	jbe	_ok_to_do
	mov	eax,_window_width
_ok_to_do:
	mov	_loop_count,eax

;	edi	destination pixel pointer


	mov	ebx,_fx_u
	mov	ecx,_fx_du_dx
	mov	edx,_fx_dv_dx
	mov	ebp,_fx_v

	shl	ebx,10
	shl	ebp,10
	shl	edx,10
	shl	ecx,10

; eax	work
; ebx	u
; ecx	du_dx
; edx	dv_dx
; ebp	v
; esi	read address
; edi	write address

	test	_transparency_on,-1
	jne	transparent_texture

_size = (_end1 - _start1)/num_iters
	mov	eax,num_iters-1
	sub	eax,_loop_count
	jns	j_eax_ok1
	inc	eax	; sort of a hack, but we can get -1 here and want to be graceful
	jns	j_eax_ok1	; if we jump, we had -1, which is kind of ok, if not, we int 3
	int	3	; oops, going to jump behind _start1, very bad...
	sub	eax,eax	; ok to continue
j_eax_ok1:	imul	eax,eax,dword ptr _size
	add	eax,offset _start1
	jmp	eax

	align	4
_start1:

; "OPTIMIZATIONS" maybe not worth making
;    Getting rid of the esi from the mov al,[esi+eax] instruction.
;       This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
;       You would have to align your bitmaps so that the two shlds would create the proper base address.
;       In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
;       I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
;       There was a speedup of about 1% to 1.5% without converting the sub to a mov.
;    Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
;       The problem with this is you would have a dword offset for nnnn.  My timings indicate it is slower.  (I think.)
;    Combining u,v and du,dv into single longwords.
;       The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
;       instructions to separate a destination operand from being used by the next instruction.  It shaves out one
;       register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
; usage:
;	eax	work
;	ebx	u coordinate
;	ecx	delta u
;	edx	delta v
;	ebp	v coordinate
;	esi	pointer to source bitmap
;	edi	write address
 rept num_iters
	mov	eax,ebp	; clear for 
	add	ebp,edx	; update v coordinate
	shr	eax,26	; shift in v coordinate
	shld	eax,ebx,6	; shift in u coordinate while shifting up v coordinate
	add	ebx,ecx	; update u coordinate
	mov	al,[esi+eax]	; get pixel from source bitmap
	mov	[edi],al
	inc	edi		; XPARENT ADDED BY JOHN

; inner loop if bitmaps are 256x256
; your register usage is bogus, and you must clear ecx
; fix your setup
; this is only about 10% faster in the inner loop
; this method would adapt to writing two pixels at a time better than
; the 64x64 method because you wouldn't run out of registers
; Note that this method assumes that both dv_dx and du_dx are in edx.
; edx = vi|vf|ui|uf
; where each field is 8 bits, vi = integer v coordinate, vf = fractional v coordinate, etc.
;** add ebx,edx
;** mov cl,bh
;** shld cx,bx,8
;** mov al,[esi+ecx]
;** mov [edi],al
;** inc edi
 endm

_end1:

_none_to_do:	popa

	ret

; ----------------------------------------------------------------------------------------
; if texture map has transparency, use this code.
transparent_texture:
	test	_loop_count,-1
	je	_t_none_to_do
loop_transparent:
	mov	eax,ebp	; clear for 
	add	ebp,edx	; update v coordinate
	shr	eax,26	; shift in v coordinate
	shld	eax,ebx,6	; shift in u coordinate while shifting up v coordinate
	add	ebx,ecx	; update u coordinate
	mov	al,[esi+eax]	; get pixel from source bitmap
	cmp	al,255
	je	transp
	mov	[edi],al
transp:	inc	edi		; XPARENT ADDED BY JOHN

	dec	_loop_count
	jne	loop_transparent

_t_none_to_do:	popa
	ret

_TEXT	ends

	end



; This is the inner loop to write two pixels at a time
; This is about 2.5% faster overall (on Mike's 66 MHz 80486 DX2, VLB)
; You must write code to even align edi and do half as many iterations, and write
; the beginning and ending extra pixels, if necessary.
;	sub	eax,eax	; clear for 
;	shld	eax,ebp,6	; shift in v coordinate
;	add	ebp,_fx_dv_dx	; update v coordinate
;	shld	eax,ebx,6	; shift in u coordinate while shifting up v coordinate
;	add	ebx,ecx	; update u coordinate
;	mov	dl,[esi+eax]	; get pixel from source bitmap
;
;	sub	eax,eax	; clear for 
;	shld	eax,ebp,6	; shift in v coordinate
;	add	ebp,_fx_dv_dx	; update v coordinate
;	shld	eax,ebx,6	; shift in u coordinate while shifting up v coordinate
;	add	ebx,ecx	; update u coordinate
;	mov	dh,[esi+eax]	; get pixel from source bitmap
;
;	mov	[edi],dx
;	add	edi,2


