*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*       LATTICE FILTER - FORWARD - SYNTHESIS - FLOATING POINT
*
*	Revision Date:	07/07/98
*	
*	USAGE	This routine is C Callable and can be called as:
*
*            float flattice(float f, int n, float* k, float* b)
*
*       f   --- floating point result of the forward synthesis
*               (also referred to as forward error)
*       n   --- number of coefficients
*       k   --- pointer to an array of floating point filter gain coefficients
*       b   --- pointer to an array of floating point backward error
*               coefficients
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*               float flattice(float f, int n, float* k, float* b)
*               {
*                       int   i;
*                       f -= b[n - 1] * k[n - 1];
*                       for (i = n - 2; i >= 0; i--)
*                         {
*                              f -= b[i] * k[i];
*                              b[i + 1] = f * k[i] + b[i];
*                         }
*                       return b[0] = f;
*               }
*
*	DESCRIPTION
*	
*               This routine implements a forward synthesis lattice filter
*               and stores the result in f.  The filter consists of n stages.
*		The value of f is calculated by doing a multiply accumulate 
*               on the backward error coefficients, b, and filter gains, k.
*               New backward error coefficients are also calculated.
*		
*	TECHNIQUES
*
*		1.  The algorithm requires both kn and bn to be live too
*                   long (LTL). Therefore, they are copied via the MV
*		    instructions (3 times for bn and 2 times for kn).
*		2.  A load counter is used so that no extraneous loads are
*                   performed.
*		3.  The loop counter and load counter are decremented by
*		    4 prior to entering the loop.
*		4.  The "[B0] SUBSP .L1 A4,A2,A4"  instruction is not
*                   performed in the last two iterations of the loop.
*                   This allows the correct value to be returned in A4.
*		5.  The stores to b[1] and b[0] are done outside the loop.
*		    Therefore, there is one iteration of an epilog
*		    (i.e. the loop is executed n-2 times rather than n-1).
*		6.  Part of the initialization is done in parallel with the
*		    loop prolog.
*
*	ASSUMPTIONS
*
*		1.  Since single assignment of registers is not used,
*		    interrupts should be disabled before this function is 
*                   called.
*		2.  n must be greater than or equal to 4.
*
*       ARGUMENTS PASSED
*
*               f        ->  A4
*               n        ->  B4
*               k        ->  A6 = ptr _k
*               b        ->  B6 = ptr _b
*
*	CYCLES
*
*		24+4n	With C Overhead
*
*	NOTATIONS
*
*		o  = outside of loop
*		fe = function epilog
*		e  = loop epilog
*===============================================================================
        .global _flattice
        .text

_flattice:
;;; initialization

c0:	 sub	 .S1x	B4,1,A7    ; o A7 = n - 1
||	 sub	 .S2	B4,1,B7    ; o B7 = n - 1
||	 zero	 .L2	B2	   ; o sum = 0

c1:	 addaw	 .D2	B6,B7,B6   ; o ptr_b = &b[n-1]
||	 addaw	 .D1	A6,A7,A6   ; o ptr_k = &k[n-1]
||	 sub	 .L1x	B4,4,A1    ; o lcntr = n - 4
||	 sub	 .L2	B4,4,B0    ; o cntr  = n - 4

c2:	 ldw	 .D2	*B6--,B7   ; o load b[n-1]
||	 mv	 .S2	B6,B5	   ; o ptr_b2 = ptr_b

c3:	 ldw	 .D1	*A6--,A7   ; o load k[n-1]

c4_5:	 NOP		2

;prolog for loop

c6:	 ldw	 .D1	*A6--,A7   ; load kn

c7:	 ldw	 .D2	*B6--,B7   ; load bn

c8:	 mpysp	 .M1x	A7,B7,A0   ; o prod1 = k[n-1] * b[n-1]

c9:	 NOP		1

c10:	 ldw	 .D1	*A6--,A7   ; @ load kn

c11:	 ldw	 .D2	*B6--,B7   ; @ load bn
        
c12:	 mpysp	 .M1x	A7,B7,A2   ; prod2 = kn * bn
||	 subsp	 .L1	A4,A0,A4   ; o fn = f - prod1

c13:	 NOP		1

c14:	 ldw	 .D1	*A6--,A7   ; @@ load kn
||	 mv	 .S1	A7,A8	   ; A8 = kn (LTL)

c15:	 ldw	 .D2	*B6--,B7   ; @@ load bn
 ||	 mv	 .S2	B7,B8	   ; B8 = bn (LTL)

c16:	 mpysp	 .M1x	A7,B7,A2   ; @ prod2 = kn * bn
||	 subsp	 .L1	A4,A2,A4   ; fn -= prod2

c17:	 mv	 .S1	A8,A9	   ; A9 = A8 = kn (LTL)

c18:	 mv	 .S2	B8,B9	   ; B9 = B8 = bn (LTL)
 || [A1] ldw	 .D1	*A6--,A7   ; @@@ if(lcntr) load kn
 ||	 mv	 .S1	A7,A8	   ; @ A8 = kn (LTL)

c19:[A1] ldw	 .D2	*B6--,B7   ; @@@ if(lcntr) load bn
 ||	 mv	 .S2	B7,B8	   ; @ B8 = bn (LTL)

c20:	 mpysp	 .M1x	A7,B7,A2   ; @@ prod2 = kn * bn
 ||	 subsp	 .L1	A4,A2,A4   ; @ fn -= prod2
 || [A1] sub	 .S1	A1,1,A1    ; if(lcntr) lcntr--

c21:	 mpysp	 .M1	A4,A9,A3   ; prod3 = fn * kn
 ||	 mv	 .S1	A8,A9	   ; @ A9 = A8 = kn (LTL)

c22:	 mv	 .D2	B9,B1	   ; B1 = B9 = B8 = bn (LTL)
 ||	 mv	 .S2	B8,B9	   ; @ B9 = B8 = bn (LTL)
 || [A1] ldw	 .D1	*A6--,A7   ; @@@@ if(lcntr) load kn
 ||	 mv	 .S1	A7,A8	   ; @@ A8 = kn (LTL)

c23:[A1] ldw	 .D2	*B6--,B7   ; @@@@ if(lcntr) load bn
 ||	 mv	 .S2	B7,B8	   ; @@ B8 = bn (LTL)

c24:	 mpysp	 .M1x	A7,B7,A2   ; @@@ prod2 = kn * bn
 ||	 B	 .S2	loop	   ; branch to loop
 ||	 subsp	 .L1	A4,A2,A4   ; @@ fn -= prod2
 || [A1] sub	 .S1	A1,1,A1    ; @ if(lcntr) lcntr--

c25:	 mpysp	 .M1	A4,A9,A3   ; @ prod3 = fn * kn
 ||	 addsp	 .L2x	A3,B1,B2   ; sum = bn + prod3
 ||	 mv	 .S1	A8,A9	   ; @@ A9 = A8 = kn (LTL)

loop:
	 mv	 .D2	B9,B1	   ; @ B1 = B9 = B8 = bn (LTL)
 ||	 mv	 .S2	B8,B9	   ; @@ B9 = B8 = bn (LTL)
 || [A1] ldw	 .D1	*A6--,A7   ; @@@@@ if(lcntr) load kn
 ||	 mv	 .S1	A7,A8	   ; @@@ A8 = kn (LTL)

    [A1] ldw	 .D2	*B6--,B7   ; @@@@@ if(lcntr) load bn
 ||	 mv	 .L2	B7,B8	   ; @@@ B8 = bn (LTL)

	 mpysp	 .M1x	A7,B7,A2   ; @@@@ prod2 = kn * bn
 || [B0] B	 .S2	loop	   ; @ if(cntr) branch to loop
 || [B0] subsp	 .L1	A4,A2,A4   ; @@@ if(cntr) fn -= prod2
 || [B0] sub	 .D2	B0,1,B0    ; if(cntr) cntr--
 || [A1] sub	 .S1	A1,1,A1    ; @@ if(lcntr) lcntr--

	 mpysp	 .M1	A4,A9,A3   ; @@ prod3 = fn * kn
 ||	 addsp	 .L2x	A3,B1,B2   ; @ sum = bn + prod3
 ||	 stw	 .D2	B2,*B5--   ; store bn
 ||	 mv	 .S1	A8,A9	   ; @@@ A9 = A8 = kn (LTL)

; branch to loop occurs here

********* begin epilog **************
e0:	 B	 .S2	B3	   ; fe return fn
        
e1_3	 NOP		3

	 stw	 .D2	B2,*B5--   ; e store b[1]

	 stw	 .D2	A4,*B5	   ; o store b[0]
