*==============================================================================
*
*       TEXAS INSTRUMENTS ,INC.
*
*       VITERBI EQUALIZER - GSM
*
*       Revision Date:  06/12/97
*
*       USAGE  This routine is C callable and can be called as
*
*               void viteq(int n, short new_s[], short old[],
*                          short sd[], short pr[], short trans[])
*
*               n       --- decoder input length
*               new_s   --- state metrics at current instant
*               old     --- state metrics at previous instant
*               sd      --- soft decision values of input bit stream
*               pr      --- probability metrics
*               trans   --- path transition of each state
*
*       C CODE
*               This is the C equivalent of the assembly code. Note that
*               the assembly code is hand optimized and restrictions may
*               apply.
*
*               void viteq(int n, short new_s[], short old[],
*                          short sd[], short pr[], short trans[])
*               {
*                int             i, j, k, a, b, alpha0, alpha1;
*                short          *tmp;
*
*                for (i = 0; i < n; i++) {
*                     for (j = 0; j < 8; j++) {
*                          alpha0 = pr[2 * j] - sd[i];
*                          alpha1 = pr[2 * j + 1] - sd[i];
*                          for (k = 0; k < 16; k += 8) {
*                               a = old[2 * j] + alpha0;
*                               b = old[2 * j + 1] + alpha1;
*                               new_s[j + k] = (b > a) ? b : a;
*                               trans[i] = (trans[i] << 1) | (b > a);
*                               alpha0 = -alpha0;
*                               alpha1 = -alpha1;
*                          }
*                     }
*                tmp = old;
*                old = new_s;
*                new_s = tmp;
*                }
*               }
*
*       DESCRIPTION
*
*               This routine is used for channel equalization for the
*               GSM full rate system, with Viterbi algorithm and soft
*               decision.  The code is of rate 1/2 and of constraint
*               length K = 4.  The equilization formula is
*
*                               ___ k = 3
*                        y(n) = \         H(k)x(n-k)
*                               /__ k = 0
*
*
*       TECHNIQUES
*
*               The k loop is completely unrolled.
*
*       ASSUMPTIONS
*               1.  Rate -> 1/2
*               2.  K = 4
*
*       MEMORY NOTE:
*
*               There are no memory hits.
*
*       CYCLES
*               43*n + 6
*
*==============================================================================

        .global _viteq
        .text

*** BEGIN Benchmark Timing ***
_viteq:

        ADD     .L1X    -4,     B15,    A1      ; copy stack pointer
||      STW     .D2     A14,    *-B15[8]        ; push A14 on stack

        STW     .D1     A10,    *A1--[2]        ; push A10 on stack
||      STW     .D2     B10,    *B15--[2]       ; push B10 on stack

        STW     .D1     A11,    *A1--[2]        ; push A11 on stack
||      STW     .D2     B11,    *B15--[2]       ; push B11 on stack
||      MVK     .S2     3,      B0
||      MV      .L2X    A6,     B5

        STW     .D1     A12,    *A1--[2]        ; push A12 on stack
||      STW     .D2     B12,    *B15--[2]       ; push B12 on stack

        MVK     .S1     40,     A14             ; 20 halfs
||      STW     .D1     A13,    *A1             ; push A13 on stack
||      STW     .D2     B13,    *B15--[3]       ; push B13 on stack
||      MV      .L1X    B8,     A3

ILOOP:
        ADD     .L2     8,      B4,     B0      ; copy new_s
||      B       .S1     JLOOP                   ; for j
||      ADD     .L1X    2,      B5,     A6      ; copy old
||      MVK     .S2     9,      B2              ; reload i
||[!B0] STH     .D1     B8,     *A3++           ; trans[i] = tr
||      MV      .D2     B5,     B10

        ADD     .L2X    2,      A8,     B9      ; copy pr
||      LDH     .D2     *B6++,  A12             ; d = sd[i]
||      MVK     .S2     3,      B0              ; priming count
||      ADD     .L1X    B0,     8,      A11     ; copy new_s
||      MVK     .S1     1,      A13             ; enables outer loop branch
||      SUB     .D1     A4,     1,      A4      ; i--

JLOOP:
        ADD     .S1     A0,     A5,     A9      ; a0 = old0 + alpha0,
||      SUB     .L1     A0,     A5,     A7      ; a8 = old0 - alpha0,
||      ADD     .D2     B11,    B5,     B13     ; b0 = old1 + alpha1,
||      SUB     .L2     B11,    B5,     B7      ; b8 = old1 - alpha1,
||[!B0] STH     .D1     B7,     *A11++          ; new_s[j + 8] = a8,
||[B2]  SUB     .S2     B2,     1,      B2      ; j++
||      MPY     .M1     0,      A2,     A2      ; if innerloop prevent
                                                ; outerloop branch

        LDH     .D1     *A8++[2],       A10     ;** pr0 = pr[2*j],
||      LDH     .D2     *B9++[2],       B12     ;** pr1 = pr[2* j+1],
||      CMPGT   .L1X    B13,    A9,     A1      ; t0 = (b0 > a0),
||      CMPGT   .L2X    B7,     A7,     B1      ; t8 = (b8 > a8),
||      SHL     .S2     B8,     2,      B8      ; tr <<= 2,
||[!B2] MPY     .M1     A13,    A4,     A2      ; outer loop branch
condition

        LDH     .D2     *B10++[2],      A0      ;** old0 = old[2*j],
||      LDH     .D1     *A6++[2],       B11     ;** old1 = old[2*j+1],
||[A1]  MV      .L1X    B13,    A9              ; a0 = (t0) ? b0 : a0,
||[!B1] MPY     .M2X    1,      A7,     B7      ; b8 = (!t8) ? a8 : b8,
||[B1]  OR      .S2     1,      B8,     B8      ; tr |= t8,
||[B2]  B       .S1     JLOOP                   ; for j
||[B0]  SUB     .L2     B0,     1,      B0      ; priming count

        SUB     .D1     A10,    A12,    A5      ;* alpha0 = pr0 - d,
||      SUB     .L2X    B12,    A12,    B5      ;* alpha1 = pr1 - d,
||[!B0] STH     .D2     A9,     *B4++           ; new_s[j]     = a0,
||[A1]  OR      .S2     2,      B8,     B8      ; tr |= t0 << 1,
||[A2]  B       .S1     ILOOP                   ; for i
||[A2]  MPY     .M1     0,      A13,    A13     ; disables double outer
                                                ; loop branch

JLOOP_END:
        STH     .D1     B7,     *A11++          ; new_s[j + 8] = a8,
||      SUB     .L2X    B10,    A14,    B4      ; reset/swap old & new_s
||      ADD     .S2     -16,    B4,     B5      ;
||      SUB     .L1     A8,     A14,    A8      ; reset pr

ILOOP_END:

        STH     .D1     B8,     *A3++           ; trans[i] = tr

*** END Benchmark Timing ***

        LDW     .D2     *++B15, A14             ; pop A14 off stack
||      ADD     .L1X    0,      B15,    A1      ; copy stack pointer

        LDW     .D1     *++A1[2],       A13     ; pop A13 off stack
||      LDW     .D2     *++B15[2],      B13     ; pop B13 off stack

        LDW     .D1     *++A1[2],       A12     ; pop A12 off stack
||      LDW     .D2     *++B15[2],      B12     ; pop B12 off stack

        LDW     .D1     *++A1[2],       A11     ; pop A11 off stack
||      LDW     .D2     *++B15[2],      B11     ; pop B11 off stack
||      B       .S2     B3                      ; return

        LDW     .D1     *++A1[2],       A10     ; pop A10 off stack
||      LDW     .D2     *++B15[2],      B10     ; pop B10 off stack

        NOP     4
