*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.
*
*	VECTOR MAX
*	
*	Revision Date: 4/14/98
*
*	USAGE
*	
*		This routine is C Callable and can be called as:
*		
*		float vecmax(const float *a, const int n);
*
*		a[]	---	vector array 
*		n	---	number of elements of vector
*
*		If this routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*       ARGUMENTS PASSED
*
*		a	->	A4
*		n	->	B4
*
*	C CODE
*
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		float vecmax(const float *a, const int n )
*		{
*			int i ;
*			float max;
*			max = -3.40282347e+038 ; 
*	/* Smallest Single precision Floating Point Negative number */
*			for (i = 0; i < n; i++)
*				if (a[i] > max)
*					max = a[i];
*			return max;
*		}
*
*
*	DESCRIPTION
*
*		This routine finds the max element of all the vectors elements 
*		and returns that value. 
*		
*	TECHNIQUES
*
*		1.  The loop is unrolled five times
*		2.  Loads are conditional on Load counter value
*	
*	ASSUMPTIONS
*
*		1. n should at least be 10 and must be a multiple of 5 (10,15,20..)
*		2. Vector a[] should be aligned on word boundary
*		
*	MEMORY NOTE
*
*		No memory bank hits under any condition.
*
*	CYCLES
*
*		(3*n)/5 + 14
*
*	NOTATIONS
*
*		f = Function Prolog or Epilog
*		o = Outer Loop
*		p = Inner Loop Prolog
*
*=============================================================================
; NOTE: SFPNN 	= Smallest (single-precision) Floating-point Negative Number 
;				= -3.40282347e+038

	.text
	.global _vecmax

_vecmax:

	ADD	.L2X	0x4,A4,B5	; f b = a + 4
||	MVK	.S1	0xff7fffff,A5	; f max4 = SFPNN
||	SUB	.D2	B4,10,B1	; f (loop/load counter) lcntr = n - 10
||	MV	.L1	A4,A0		; f setup load pointer

	LDW	.D1T1	*A0++(20),A6	; p @ Load aa0
||	LDW	.D2T2	*B5++(20),B8	; p @ Load aa1
||	MVK	.S1	0xff7fffff,A3	; f max0 = SFPNN
||	MVK	.S2	0xff7fffff,B6	; f max1 = SFPNN

	LDW	.D1T1	*-A0(12),A7	; p @ Load aa2
||	LDW	.D2T2	*-B5(12),B7	; p @ Load aa3
||	MVKH	.S1	0xff7fffff,A3	; f max0 = SFPNN
||	MVKH	.S2	0xff7fffff,B6	; f max1 = SFPNN

	LDW	.D1T1	*-A0(4),A8	; p @ Load aa4
||	MVK	.S1	0xff7fffff,A4	; f max2 = SFPNN
||	MVK	.S2	0xff7fffff,B4	; f max3 = SFPNN

	LDW	.D1T1	*A0++(20),A6	; p @@ Load aa0
||	LDW	.D2T2	*B5++(20),B8	; p @@ Load aa1
||	MVKH	.S1	0xff7fffff,A4	; f max2 = SFPNN
||	B	.S2	LOOP

	LDW	.D1T1	*-A0(12),A7	; p @@ Load aa2
||	LDW	.D2T2	*-B5(12),B7	; p @@ Load aa3
||	MVKH	.S2	0xff7fffff,B4	; f max3 = SFPNN
||	MVKH	.S1	0xff7fffff,A5	; f max4 = SFPNN

	LDW	.D1T1	*-A0(4),A8	; p @@ Load aa4
||	CMPGTSP	.S1	A6,A3,A1	; p Is aa0 > max0 ?
||	CMPGTSP	.S2	B8,B6,B0	; p Is aa1 > max1 ?


;**--------------------------------------------------------------------------*
; KERNEL

LOOP:

[B1]	LDW	.D1T1	*A0++(20),A6	; @@@ if (lcntr) Load aa0
||[B1]	LDW	.D2T2	*B5++(20),B8	; @@@ if (lcntr) Load aa1
||[A1]	MV	.L1	A6,A3		; max0 = aa0 (if aa0 > max0)
||[B0]	MV	.L2	B8,B6		; max1 = aa1 (if aa1 > max1)
||	CMPGTSP	.S1	A7,A4,A1	; Is aa2 > max2 ?
||[B1]	B	.S2	LOOP		; if (lcntr) Branch LOOP

[B1]	LDW	.D1T1	*-A0(12),A7	; @@@ if (lcntr) Load aa2
||[B1]	LDW	.D2T2	*-B5(12),B7	; @@@ if (lcntr) Load aa3
||[A1]	MV	.L1	A7,A4		; max2 = aa2 (if aa2 > max2)
||	CMPGTSP	.S2	B7,B4,B0	; Is aa3 > max3 ?
||	CMPGTSP	.S1	A8,A5,A1	; Is aa4 > max4 ?

[B1]	LDW	.D1T1	*-A0(4),A8	; @@@ if (lcntr) Load aa4
||[B1]	SUB	.D2	B1,0x5,B1	; if (lcntr) lcntr = lcntr - 5
||[B0]	MV	.L2	B7,B4		; max3 = aa3 (if aa3 > max3)
||[A1]	MV	.L1	A8,A5		; max4 = aa4 (if aa4 > max4)
||	CMPGTSP	.S1	A6,A3,A1	; Is aa0 > max0 ?
||	CMPGTSP	.S2	B8,B6,B0	; Is aa1 > max1 ?

;**--------------------------------------------------------------------------*

	CMPGTSP	.S1X	A3,B6,A1	; o Is max0 > max1 ?
||	CMPGTSP	.S2X	A4,B4,B0	; o Is max2 > max3 ?

[!A1]	MV	.S1X	B6,A3		; o max0 = max1 (if max1 > max0)
||	B	.S2	B3

[!B0]	MV	.S1X	B4,A4		; o max2 = max3 (if max3 > max2)

	CMPGTSP	.S1	A3,A4,A1	; o Is max0 > max2 ?

[A1]	MV	.S1	A3,A4		; o max2 = max0 (if max0 > max2)

	CMPGTSP	.S1	A5,A4,A1	; o Is max4 > max2 ?

[A1]	MV	.L1	A5,A4		; o max2 = max4 (if max4 > max2)

; Branch to calling function occures