*================================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*	VECTOR SUM OF SQUARES
*
*	Revision Date:  07/17/97
*	
*	USAGE	This routine is C Callable and can be called as:
*		
*		int vecsumsq(short x[], int n)
*
*		 x[] --- pointer to input vector 
*                n   --- number of elements in x
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*		int vecsumsq(short x[], int n)
*		{
*			int i, sum=0;
*			for(i=0; i<n; i++)
*			{
*				sum += x[i]*x[i];
*			}
*			return(sum);
*		}
*
*
*	DESCRIPTION
*
*		This routine takes one vector and the number of elements (N)
*		and calculates the square of each element and accumulates the 
*		results and returns the sum.
*	
*	ASSUMPTIONS
*
*         	Number of elements N >= 2 (even OR odd)
*		
*	CYCLES
*
*		(N/2) + 9		[21 samples => 19 cycles]
*
*===============================================================================
	.global _vecsumsq
	.text

_vecsumsq:

*** BEGIN Benchmark Timing
B_START:	
	MV	.L1	A4,	A1		; move pointer to a[i]
||	AND	.L2	B4,	1,	B2	; check for N odd
||	SHR	.S1X	B4,	1,	A2	; init counter
||	ADD	.S2X	A4,	2,	B1	; create pointer to a[i+1]

  [A2]	B	.S2	LOOP			; branch back to loop
||	LDH	.D2	*B1++[2],	B6	;* get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;* get a[i] value

  [A2]	B	.S2	LOOP			;* branch back to loop
||[A2]	SUB	.S1	A2,	1,	A2	; decrement counter
||	LDH	.D2	*B1++[2],	B6	;** get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;** get a[i] value

  [A2]	B	.S2	LOOP			;** branch back to loop
||[A2]	SUB	.S1	A2,	1,	A2	; decrement counter
||	LDH	.D2	*B1++[2],	B6	;*** get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;*** get a[i] value

  [A2]	B	.S2	LOOP			;*** branch back to loop
||[A2]	SUB	.S1	A2,	1,	A2	; decrement counter
||	LDH	.D2	*B1++[2],	B6	;**** get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;**** get a[i] value
||	ZERO	.L2	B5			; zero product
||	ZERO	.L1	A5			; zero product

  [A2]	B	.S2	LOOP			;**** branch back to loop
||[A2]	SUB	.S1	A2,	1,	A2	; decrement counter
||	LDH	.D2	*B1++[2],	B6	;***** get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;***** get a[i] value
||	ZERO	.L1	A4			; zero accumulator
||	ZERO	.L2	B4			; zero accumulator

LOOP:
  	ADD	.L1	A4,	A5,	A4	; sum += sq
||	ADD	.L2	B4,	B5,	B4	; sum2 += sq2
||[A2]	SUB	.S1	A2,	1,	A2	; decrement counter
||	MPY	.M2	B6,	B6,	B5	; sq2=a[i+1]*a[i+1]
||	MPY	.M1	A6,	A6,	A5	; sq=a[i]*a[i]
||[A2]	B	.S2	LOOP			;***** branch back to loop
||	LDH	.D2	*B1++[2],	B6	;****** get a[i+1] value
||	LDH	.D1	*A1++[2],	A6	;****** get a[i] value
						; LOOP END

  [B2] 	ADD	.L1	A4,	A5,	A4	; sum += sq (for odd case)

	ADD	.S1X	A4,	B4,	A4	; final sum
B_END:
*** END Benchmark Timing

STOP:
	B	.S2	B3
	NOP	5
