*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.		
*
*       LATTICE FILTER - INVERSE - FLOATING POINT
*
*	Revision Date:	07/07/98
*	
*	USAGE	This routine is C Callable and can be called as:
*	
*            float ilattice(float f, int n, float* k, float* b)
*
*       f   --- floating point result of the inverse analysis
*               (also referred to as forward error)
*       n   --- number of coefficients
*       k   --- pointer to an array of floating point filter gain coefficients
*       b   --- pointer to an array of floating point backward error
*               coefficients
*
*		If routine is not to be used as a C callable function then
*		you need to initialize values for all of the values passed
*		as these are assumed to be in registers as defined by the 
*		calling convention of the compiler, (refer to the C compiler
*		reference guide).
*
*	C CODE
*		This is the C equivalent of the assembly code.  Note that
*		the assembly code is hand optimized and restrictions may
*		apply.
*
*               float ilattice(float f, int n, float* k, float* b)
*               {
*                       int   i;
*                       float a, c;
*                       c = f;
*                       for (i = 0; i < n; i++)
*                         {
*                               a  = f * k[i] + b[i];
*                               f += b[i] * k[i];
*                               b[i] = c;
*                               c = a;
*                         }
*                         return f;
*               }
*
*	DESCRIPTION
*	
*		This routine implements an inverse analysis lattice filter
*               and stores the result in f.  The filter consists of n stages.
*		The value of f is calculated by doing a multiply accumulate 
*               on the backward error coefficients, b, and filter gains, k.
*               New backward error coefficients are also calculated.
*		
*	TECHNIQUES
*
*		1.  The algorithm requires both kn and bn to be live too
*                   long (LTL). Therefore, they are copied via the MV
*		    instructions (2 times for bn and 1 time for kn).
*		2.  A load counter is used so that an epilog is not needed.
*		    No extraneous loads are performed.
*		3.  The "ADDSP.L1X A4,B7,A4" instruction
*                   does not occur in the last 2 iterations of the loop.
*                   This allows the correct value to be returned in A4.
*
*	ASSUMPTIONS
*
*		1.  Since single assignment of registers is not used,
*		    interrupts should be disabled before this function is 
*                   called.
*		2.  There are no restrictions placed on n. n can be
*                   any integer greater than or equal to 1.
*
*       ARGUMENTS PASSED
*
*               f        ->  A4
*               n        ->  B4
*               k        ->  A6 = ptr _k
*               b        ->  B6 = ptr _b
*
*	CYCLES
*
*		16 + 4n      Without C overhead
*		22 + 4n      With C overhead (return branch)
*
*	NOTATIONS
*
*		i  = initialization
*		fe = Function epilog
*===============================================================================

        .global _ilattice
        .text

_ilattice:
*** Loop Prolog***
; BEGIN BENCHMARK

c0:	   LDW	   .D1	  *A6++,A3    ; load kn

c1:	   LDW	   .D2	  *B6,B8      ; load bn

c2:	   SUB	   .S1X   B4,1,A1     ; i cntr	= n - 1
||	   SUB	   .S2	  B4,1,B1     ; i lcntr = n - 1

c3:	   MV	   .L2x   A4,B5       ; i a = fn

c4:  [B1]  LDW	   .D1	  *A6++,A3    ; @ if(lcntr) load kn

c5:  [B1]  LDW	   .D2	  *+B6[1],B8  ; @ if(lcntr) load bn

c6:	   MPYSP   .M2X   A3,B8,B7    ; prod2 = kn * bn

c7:  [B1]  SUB	   .S2	  B1,1,B1     ; if(lcntr) lcntr--

c8:	   MV	   .L1	  A3,A5       ; A5 = kn (LTL)
||   [B1]  LDW	   .D1	  *A6++,A3    ; @@ if(lcntr) load kn

c9:	   MV	   .L2	  B8,B4       ; B4 = bn (LTL)
||   [B1]  LDW	   .D2	  *+B6[2],B8  ; @@ if(lcntr) load bn

c10:	   MPYSP   .M2X   A3,B8,B7    ; @ prod2 = kn * bn

c11:	   MPYSP   .M1	  A4,A5,A0    ; prod1 = fn * kn
||	   ADDSP   .L1X   A4,B7,A4    ; fn += prod2
||   [B1]  SUB	   .S2	  B1,1,B1     ; @ if(lcntr) lcntr--

c12:	   MV	   .L1	  A3,A5       ; @ A5 = kn (LTL)
||   [B1]  LDW	   .D1	  *A6++,A3    ; @@@ if(lcntr) load kn

c13:	   MV	   .S2	  B4,B9       ; B9 = B4 = bn (LTL)
||	   MV	   .L2	  B8,B4       ; @ B4 = bn (LTL)
||   [B1]  LDW	   .D2	  *+B6[3],B8  ; @@@ if(lcntr) load bn

c14: [A1]  B	   .S1	  loop	      ; if(cntr) branch to loop
||	   MPYSP   .M2X   A3,B8,B7    ; @@ prod2 = kn * bn

c15:	   ADDSP   .L2X   A0,B9,B5    ; a = prod1 * bn
||	   MPYSP   .M1	  A4,A5,A0    ; @ prod1 = fn * kn
||   [A1]  ADDSP   .L1X   A4,B7,A4    ; @ fn += prod2
||   [B1]  SUB	   .S2	  B1,1,B1     ; @@ if(lcntr) lcntr--

loop:      
	   MV	   .L1	  A3,A5       ; @@ A5 = kn (LTL)
||   [B1]  LDW	   .D1	  *A6++,A3    ; @@@@ if(lcntr) load kn

	   MV	   .S2	  B4,B9       ; @ B9 = B4 = bn (LTL)
||	   MV	   .L2	  B8,B4       ; @@ B4 = bn (LTL)
||   [B1]  LDW	   .D2	  *+B6[4],B8  ; @@@@ if(lcntr) load bn
||   [A1]  SUB	   .S1	  A1,1,A1     ; if(cntr) cntr--

     [A1]  B	   .S1	  loop	      ; @ if(cntr) branch to loop
||	   MPYSP   .M2X   A3,B8,B7    ; @@@ prod2 = kn * bn
||	   STW	   .D2	  B5,*B6++    ; store a

	   ADDSP   .L2X   A0,B9,B5    ; @ a = prod1 * bn
||	   MPYSP   .M1	  A4,A5,A0    ; @@ prod1 = fn * kn
||   [A1]  ADDSP   .L1X   A4,B7,A4    ; @@ fn += prod2
||   [B1]  SUB	   .S2	  B1,1,B1     ; @@@ if(lcntr) lcntr--

;endloop ** 1st branch occurs after 1st iteration of loop
;BENCHMARK ENDS
	   B	   .S2	  B3	      ; fe return fn
	   NOP		  5	      ; fe wait for branch
; return happens here
