*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	
*
*	Revision Date:	04/2/98
*	
*	USAGE	This routine is C Callable and can be called as:
*		
*	   	x = dpinv(v);
*
*		If the routine is not to be used as a C callable function,
*		then you need to initialize values for all of the parameters
*		passed to the function since these are assumed to be in
*		registers as defined by the calling convention of the
*		compiler, (refer to the TMS320C6x Optimizing C Compiler
*		User's Guide).
*
*	C CODE
*		This is the C equivalent for the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		double dpinv(double v);
*		{
*		   double x; 
*
*		   x = 1/v;
*		   return x;
*		}
*
*	DESCRIPTION
*
*		This routine calculates the double precision floating point 
*		reciprocal of a double precision floating point number.
*
*	TECHNIQUES
*
*		1.  RCPDP is used to get the reciprocal of the double value	
*			held in register pair A5:A4.  However, to increase 
*			the precision of the mantissa to the full 52 bits, 
*			three iterations of the Newton-Rhapson algorithm are 
*			required.  Due to the high number of dependencies and the
*			long delay slots required in multdp and subdp, several nops 
*			are required in the loop. 
*
*       ARGUMENTS PASSED
*
*		v(high 16) -> A5
* 		v	   -> A4
*
*	CYCLES
*
*		84 cycles  
*
*===============================================================================
	.global _dpinv
	.text

cntr	.set	A2

two_h	.set	A9
two_l	.set	A8

two	.set	B5

x_h	.set	A5
x_l	.set	A4

v_h	.set	A7
v_l	.set	A6

prod1	.set	A11
prod0	.set	A10

sum1	.set	A13
sum0	.set	A12

_dpinv:

*** BEGIN Benchmark Timing ***

		MVK	.S1	2,cntr 			;counter

		INTDP	.L1	cntr,two_h:two_l	;move 2 into dp register pair
||		MV	.S1	A5,A7			;move v(high) to A7
||		MV	.D1	A4,A6			;move v(low) to A6
	
		RCPDP	.S1	v_h:v_l,x_h:x_l		;calculate dp reciprocal

loop:								;Newton-Rhapson algorithm
		MPYDP	.M1	v_h:v_l,x_h:x_l,prod1:prod0		;A11:A10 = v*x[n] 
		NOP		8					;wait for v*x[n]
		SUBDP	.L1	two_h:two_l,prod1:prod0,sum1:sum0	;A13:A12=2-v*x[n]
		NOP		5					;wait for 2-v*x[n]
		MPYDP	.M1	sum1:sum0,x_h:x_l,x_h:x_l		;x_h:x_l=x[n]*(2-v*x[n])
		NOP		2
	[cntr]	SUB	.D1	cntr,1,cntr				;if(cntr) cntr=-
||	[cntr]	B	.S1	loop					;if(cntr) b loop
||     [!cntr]	B	.S2	B3			;if not(cntr) return from function
		NOP		5			;wait for x_h:x_l before 


B_END:            	