;==========================================================================
; FFT_CR2.ASM
; Keith Larson
; TMS320 DSP Applications
; (C) Copyright 1995,1996
; Texas Instruments Incorporated
;
; This is unsupported freeware with no implied warranties or
; liabilities.  See the disclaimer document for details
;
; This application will work with either the DOS executable FFT_XXX.EXE
; or the windows application DSK3WIN.EXE
;==========================================================================
;  NOTES
;
;  Computed Twiddle values
;  -----------------------
;  To keep as much on chip ram free as possible, the FFT twiddle can be
;  be computed on the fly.  Twiddles are computed from an initial seed
;  value (complex(1,0)) which is then phase rotated using a complex
;  phasor held in a lookup table.  This computation is relatively fast and
;  occurs during a delayed branch.
;
;  Minimised Twiddle Access
;  ------------------------
;  The number of twiddle accesses during an FFT can also be greatly reduced
;  by noting that when each new stage occurs, half as many twiddles are
;  actualy used.  By adding an extra inner loop, the number of twiddle
;  accesses are reduced from N/2*log2(N) to N accesses.
;
;  >>>> The combination of minimized twiddle lookup <<<<
;  >>>> and computed twiddles is a double benefit.  <<<<
;
;  FFT Windowing
;  -------------
;  Windowing is done using a convolution of the window functions frequency
;  response with the FFT frequency data.  A raised cosine window is used
;  which has a very simple frequency domain response of -0.5,+1.0,-0.5
;  centered at the 0th bin (DC).
;
;  Since the convolution is being performed on a REAL window function and
;  the complex FFT frequency domain data, the convolution is performed on
;  the FFT frequency domain REAL and IMAG data seperately.
;
;  Bit-reversal of data
;  --------------------
;  After windowing, the data which is still in bit-reversed order, is
;  converted to MAG^2=REAL^2+IMAG^2 and placed back into the REAL data array.
;  The data is then bit-reversed and put into the IMAG data array.
;
;  log2(MAG^2) 'decibel' calculation
;  ---------------------------------
;  The MAG^2 array is then converted to 8 bit log2 pixel offsets which are
;  scaled to fit a decibel graphical display.  The resulting data array is
;  further packed at 4 bytes/long word to minimize host transfers.
;
;  STARTUP STUB
;  ------------
;  The initialization code which is used only on startup is placed inside
;  the volatile data memory array to gain back internal memory.
;
;  AIC SAMPLING RATES
;  ------------------
;  The AIC sampling rate can be adjusted well outside the tested limits found
;  in the data sheet.  Very high sample rates are possible at the expense of
;  signal degradation.  Some values work better than others, and some not
;  at all, so some experimentation is required.
;
;  HOST SYNCHRONIZATION
;  --------------------
;  Since the host should not disturb the ADC while data is being collected
;  an interlock is used to keep the host from timing out.  When the host
;  gains access to the DSK, the restart sequence is as follows
;
;    Set P_STRB=0
;    Wait for HPACK=0 indicating DSP is ready for transfer
;    Update any AIC registers by writing to the appropriate memory locations
;    Write a START to location AICLOAD if any AIC registers needs reloading
;    Write a START to the message box
;
;=========================================================================
; The following constants are used to initialize the AIC plus
; create the Twiddle, FFT and I/O buffer arrays
;=========================================================================
            .include "C3XMMRS.ASM"        ;
TA          .set    12                    ;
TB          .set    14                    ;
RA          .set    12                    ;
RB          .set    14                    ;
MaxN        .set    512                   ;
log2Max     .set    9         ;log2(MaxN) ;
PI          .set    3.1415926             ;
;=========================================
_STOP       .set    1                     ;
_START      .set    2                     ;
;=========================================
RAM0        .set    0x809800              ;
RAM1        .set    0x809C00              ;
DATA_ARRAY  .set    DR                    ;
DR          .set    RAM0                  ; DR/DI must be on SIZE boundary
DI          .set    DR+MaxN               ; for bit-reverse addressing
            .start  "TW_SEED",RAM1        ;
            .sect   "TW_SEED"             ;
TWx         .sdef   2.0*PI/MaxN           ; Step angle for first seed
            .loop   log2Max               ;
            .float  cos(TWx)              ;
            .float  -sin(TWx)             ;
TWx         .sdef   TWx * 2               ; Step angle doubles for each stage
            .endloop                      ;
TW_END                                    ;
;=========================================
TEMP         .word    0                   ;
TEMPADDR     .word    TEMP                ;
MSG_BOX      .word    _STOP               ;
A_REG        .word    (TA<<9)+(RA<<2)+0   ;
B_REG        .word    (TB<<9)+(RB<<2)+2   ;
C_REG        .word    00000011b           ; +/- 1.5 V
SIZE         .word    MaxN                ;
AICLOAD      .word    0xFFFFFFFF          ;
;                                         ;
;0_gctrl_val .word    0x0E973300          ; CLKR/X active low
S0_gctrl_val .word    0x0E970300          ; CLKR/X active high
S0_xctrl_val .word    0x00000111          ;
S0_rctrl_val .word    0x00000111          ;
;                                         ;
T_ADDR       .word    TW_END              ;
DR_ADDR      .word    DR                  ;
DI_ADDR      .word    DI                  ;
BFLY0        .word    B_FLY0              ; Program addresses used to
BFLY1        .word    B_FLY1              ; initialize a fast repeat block
FLAGS        .word    0                   ;
RAMP         .word    0                   ;
BYPASS       .word    1                   ;
;------------------------------------------------------------------------
; Set LDC_EVAL to 1 for Logarithmic Differential Compression (LDC)
; evaluation.  The evaluation will cycle from non-compressed to LDC
; compressed FFT data every FRAME_CNT/2 frames.
;
; LDC is a simple compression technique which is ideal for audio and
; video.  LDC is simply the first derivitive of the data stream,
; expressed in floating point, and with most of the mantissa bits
; masked off.  LDC quality is surprising good for the number of bits
; used, but has the additional advantage of being a linear operation.
; LDC compressed data can be filtered and then decompressed after
; processing.  For example, high quality 24 bit audio can be expressed
; using an 8/8 LDC floating point format.
;-------------------------------------------------------------------------
LDC_EVAL     .set     0                   ; Enable the LDC code
FRAME_CNT    .set     128                 ; Frame counts in repeat loop
MASK         .word    0xFFFFFFFF          ; How many bits of mantissa to use
FRAME        .word    0                   ; Current frame count
LDC          .float   0.0                 ; Forward compress LDC value
ILDC         .float   0.0                 ; Inverse compress LDC value
schmooze     .float   0.999               ; Leaky integrator coefficient
;========================================================================
        ; .start "CODE",RAM1
        ; .sect  "CODE"
main    ; ldi   0x1018,R0          ; Use this to set the DSK external
        ; sti   R0,@p_buscon       ; bus for 0 WS
          ;------------------------
          .if   LDC_EVAL           ;
          ldi   @FRAME,R0          ; Use LDC or full precision float
          subi  1,R0               ;
          ldile FRAME_CNT,R0       ;
          sti   R0,@FRAME          ;
          .endif                   ;

          ldi   0x30,IE            ;
          ldi   @S0_rdata,R0       ; Clear SP under/overflow
          ldi   0,R0               ;
          sti   R0,@S0_xdata       ;
          ldi   @S0_rdata,R0       ;
          ldi   0,R0               ;
          sti   R0,@S0_xdata       ;
          ldi   32,RC              ; Flush first ADC value (trash)
          rptb  $+1                ; plus some more samples
          call  GETADC             ;
          ;------------------------
          ldi   @DR_ADDR,AR0       ;
          ldi   @DI_ADDR,AR1       ;
          ldi   @SIZE,RC           ; Now get samples
          subi  1,RC               ; RC+1 repeats
          rptb  samples            ;
          ;------------------------
          call  GETADC             ;
          stf   R0,*AR0++          ; store to data array
          ldf   0,R0               ;
samples   stf   R0,*AR1++          ;
          ;------------------------; Put 0 into the DXR when it is not
          sti   R0,@S0_xdata       ; going to be used for awhile
          andn  0x30,IE            ; Turn off ADC interrupts
;===========================================================================
; This FFT is written for code size with resonably fast speed
;    - The first and last stages are not unrolled
;    - Twiddles are loaded outside the butterfly loop for maximum reuse
;    - Twiddles are computed 'on the fly' to minimize on-chip RAM use
;    - The butterfly can be further optimized (readable as is)
;    - Code is compatible with all versions of the C3x/C4x
;      The extended parallel addressing modes added to PG 6.0 of the
;      TMS320C31 and PG 2.0 of the TMS320C32 are not used.
;===========================================================================
FFT:      andn  0x40,IOF           ; Set XF1=0 to BEGIN oscilliscope benchmark
          ;- - - - - - - - - - - -
          float @SIZE,R0           ; log2(SIZE) is used to set the correct start
          pushf R0                 ; position for the sin/cos generator table
          pop   AR2                ;
          lsh   -23,AR2            ; shift to integer position
          subri @T_ADDR,AR2        ; AR2 = TR/TI base pointer
          ;- - - - - - - - - - - -
          ldi   @SIZE,AR3          ; AR3 = SIZE
          subi  1,AR3              ; AR3 = SIZE -1
          ldi   AR3,IR0            ; IR0=SIZE-1
          lsh   -1,AR3             ; AR3 = (SIZE/2)-1
          ldi   @SIZE,IR1          ; IR 0 = N
          lsh   -1,IR1             ; IR1=SIZE/2
          ;- - - - - - - - - - - -
          ldi   @BFLY0,RS          ; Load repeat block start and end addresses
          ldi   @BFLY1,RE          ;
          ldi   @TEMPADDR,AR0      ;
          ldi   0,AR7              ; Initial Block loop counter is 1 loop
          ;=================================================================
          ; NEW STAGE BEGIN
          ;=================================================================
STAGE     ldi   @DR_ADDR,AR4       ; AR4 = REAL DAT
          ldi   @DI_ADDR,AR5       ; AR5 = IMAG DAT
          LDF   1.0         ,R6    ; R6 = COS
          LDF   0.0         ,R7    ; R2 = SIN
          stf   R7,*AR0            ; Initialize temporary location for R2
          ;=================================================================
          ; BLOCK REPEAT BEGIN
          ;=================================================================
BLOCK     ldi   AR7,RC             ; Load first!  if RC==0 RPTM will turn off
          or    100h,ST            ; Turn on RPTMode bit (fast)
          ;=================================================================
          ; BUTTERFLY LOOP
          ; This is the main loop that consumes the most resources
          ;   The inner loop can be optimal for several things
          ;    - Readability               (nothing parallel)
          ;    - Internal execution speed  (fewest opcodes and pipe conflicts)
          ;    - External execution speed  (minimize read/writes)
          ; A temporary location (*AR0) is used for F7 to enable parallel
          ; codes within the Butterfly when older silicon versions are used
          ;=================================================================
EXEN      .set  0     ; PG6 and later C31, PG2 and later C32
EXOFF     .set  1     ; Older versions of C3x and all C4x
          .if   EXEN
          .xon
          ldf   *+AR5(IR1)     ,R4 ;= IM1  <- Move outside or parallel loops
B_FLY0    subf  R4,*AR5        ,R5 ;= IM0-IM1
          mpyf  R6,R5          ,R0 ;= TR(IM0-IM1)  I
       || subf  *+AR4(IR1),*AR4,R3 ;= RL0-RL1
          mpyf  R7,R3          ,R1 ;= TI(RL0-RL1)  I
       || addf  *+AR4(IR1),*AR4,R2 ;= RL0+RL1
          addf  R0,R1          ,R2 ;= TR(IM0-IM1)+TI(RL0-RL1)
       || stf   R2,*AR4++(IR1)     ;__RL0__
          mpyf  R6,R3          ,R1 ;= TR(RL0-RL1)  R
       || addf  *AR5,R4        ,R3 ;= IM0+IM1
          mpyf  R7,R5          ,R0 ;= TI(IM0-IM1) -R
       || stf   R3,*AR5++(IR1)     ;__IM0__
          subf  R0,R1          ,R1 ;= TR(RL0-RL1)-TI(IM0-IM1) -R
       || stf   R2,*AR5++(IR1)     ;__IM1__
          ldf   *+AR5(IR1)     ,R4 ;= IM1'
B_FLY1 || stf   R1,*AR4++(IR1)     ;__RL1__
          .endif
          .if    EXOFF
B_FLY0    SUBF  *+AR4(IR1),*AR4,R2 ; R2 = RL0-RL1
          SUBF  *+AR5(IR1),*AR5,R1 ; R1 = IM0-IM1
          MPYF  R2,R6,R0           ; R0 = (RL0-RL1)*COS
       || ADDF  *+AR5(IR1),*AR5,R3 ; R3 = IM0+IM1
          MPYF  R1,*AR0,R3         ; R3 = (IM0-IM1)*SIN
       || STF   R3,*AR5++(IR1)     ; IM0= IM0+IM1
          SUBF  R3,R0,R4           ; R4 = (RL0-RL1)*COS - (IM0-IM1)*SIN
          MPYF  R1,R6,R0           ; R0 = (IM0-IM1)*COS
       || ADDF  *+AR4(IR1),*AR4,R3 ; R3 = RL0+RL1
          MPYF  R2,*AR0,R3         ; R3 = (RL0-RL1)*SIN
       || STF   R3,*AR4++(IR1)     ; RL0= RL0+RL1
          ADDF  R0,R3,R5           ; R5 = (IM0-IM1)*COS + (RL0-RL1)*SIN
          STF   R4,*AR4++(IR1)     ; IM1= (IM0-IM1)*SIN - (RL0-RL1)*COS
B_FLY1 || STF   R5,*AR5++(IR1)     ; RL1= (IM0-IM1)*COS + (RL0-RL1)*SIN
          .endif
          ;=================================================================
          ; NEXT TWIDDLE GROUP
          ; Calculate new twiddles using complex vector phase rotation
          ;=================================================================
NxtBlock  mpyf3 *+AR2(1),R6,R1     ; I*Rb
       || addf3 *AR5--(IR0),R3,R3  ; Wrap ptr back to IMAG block start
          mpyf3 *+AR2(0),R7,R0     ; R*Ib
       || addf3 *AR4--(IR0),R3,R3  ; Wrap ptr back to REAL block start
          mpyf3 *+AR2(1),R7,R3     ; I*Ib
          dbud   AR3,BLOCK         ; decrement, test and branch
          addf3 R1,R0,R7           ; TW_IMAG = I*Rb + R*Ib
          mpyf3 *+AR2(0),R6,R0     ; R*Rb
       || stf   R7,*AR0            ;
          subf3 R3,R0,R6           ; TW_REAL = R*Rb - I*Ib
          ;=================================================================
          ; NEXT STAGE
          ; If there is a next stage,
          ; - The twiddle seed pointer is incremented.
          ; - Butterfly repeat doubles
          ; - Butterfly block counter halfs (same as offset)
          ; - Top to Bottom data to bottom offset halfs
          ; - DBUD tests for exit and adjusts loop count to N-1
          ;=================================================================
NxtStage  lsh   -1,IR1             ; Offset and next outer loop is
          ldi   IR1,AR3            ; 1/2 previous value
          dbud  AR3,STAGE          ; Test if at end, decrement counter by 1
          lsh   1,AR7              ; The innermost loop is 2x
          addi  1,AR7              ;
          addi  2,AR2              ; Point to next twiddle seed
;===========================================================================
; Windowing is performed by convolving the frequency domain with the
; frequency response a raised cosine window.  Coefficients are -.5,1.0,-.5
;===========================================================================
FFT_END   or    0x60,IOF           ; Set XF1=1 to END oscilliscope benchmark
          ldi   @DR_ADDR,AR0       ; Raised cosine convolutional filter is
          call  CONV_WIN           ; done in place on both REAL & IMAG
          ldi   @DI_ADDR,AR0       ;
          call  CONV_WIN           ;
          ;------------------------
          ldi   @DR_ADDR,AR0       ; RL^2+IM^2 is calculated and placed
          ldi   @DI_ADDR,AR1       ; in the RL array.  (still bit reversed)
          ldi   @SIZE,RC           ;
          subi  1,RC               ;
          rptb  MAG                ;
          mpyf3 *AR0,*AR0,R0       ;
          mpyf3 *AR1,*AR1,R1       ;
          addf  R1,R0              ;
          stf   R0,*AR0++          ;
MAG       ldi   *AR1++,R0          ;
          ;- - - - - - - - - - - -
BR_DATA   ldi   @DR_ADDR,AR0       ; Bit reverse copy magnitude in REAL array
          ldi   @DI_ADDR,AR1       ; into the IMAG array
          ldi   @SIZE,IR0          ;
          lsh   -1,IR0             ;
          ldi   @SIZE,RC           ;
          subi  1,RC               ;
          ldf   *AR0++(IR0)B,R0    ; Preload
          rptb  BRD                ;
          ldf   *AR0++(IR0)B,R0    ;
BRD  ||   stf   R0,*AR1++          ;
          ;--------------------------------------------------
          ; Create short log scaled bytes and pack 4 per word
          ;
          ;  AR0 = IMAG[] = R^2+I^2
          ;  AR1 = REAL[] destination
          ;==================================================
Pack      ldi   @DI_ADDR,AR0       ;
          ldi   @DR_ADDR,AR1       ;
          ldi   @SIZE,RC           ;
          lsh   -2,RC              ; Pack 4 short log values (bytes) per word
          subi  1,RC               ; SIZE/2 converted (pos F only)
          rptb  PACKEND            ;
          ;------------------------
          ldi   0,R7               ; Log_Mag returns with windowed R^2 + I^2
          call  Log_Mag            ; packed into 8 MSBs of R0 which is then
          lsh   -24,R0             ; shifted and packed into 4 samples per word
          or    R0,R7              ;
          call  Log_Mag            ;
          lsh   -16,R0             ;
          or    R0,R7              ;
          call  Log_Mag            ;
          lsh   -8 ,R0             ;
          or    R0,R7              ;
          call  Log_Mag            ; no shift this time
          lsh   0,R0               ;
          or    R0,R7              ;
PACKEND   sti   R7,*AR1++          ; store packed data
          ;------------------------
HPI       ldi   0x4,IE             ; Interlock with host only uses INT2
          ldi   _START,R0          ;
NO_START  cmpi  @MSG_BOX,R0        ; Restart when START message is received
          bnz   NO_START           ;
          ldi   _STOP,R0           ; Set MSG box to STOP
          sti   R0,@MSG_BOX        ;
          ;------------------------
          ldi   @AICLOAD,R2        ; Check to see if the host requested an
          bz    main               ; AIC reinitialization
          ldi   0,R2               ;
          sti   R2,@AICLOAD        ;
          call  AIC_INIT           ; Restart with new AIC setup
          b     main               ; Do it all over again!
;---------------------------------------------------------------
; AR0 = ptr to bit reversed data to be convolved with the window
;---------------------------------------------------------------
CONV_WIN  ldi   @SIZE,IR0          ;
          lsh   -1,IR0             ; IR0=SIZE/2 for bit-reverse access
          ldi   AR0,AR1            ;
          ldf   *AR0++(IR0)B,R0    ; Preload two points
          ldf   *AR0++(IR0)B,R1    ;
          nop   *AR1++(IR0)B       ;
          ldi   @SIZE,RC           ;
          subi  1,RC               ;
          rptb  CW                 ;
          ldf   *AR0++(IR0)B,R2    ;
          ldf   R0,R3              ;
          subf  R1,R3              ;
          subf  R1,R3              ;
          addf  R2,R3              ; R3=X[n] -2*X[n+1] + Xn[n+2]
          ldf   R1,R0              ; Rotate samples
          ldf   R2,R1              ;
CW        stf   R3,*AR1++(IR0)B    ; store result
          rets                     ;
;======================================================================
; Data sent to the host is displayed on a logarithmic dB scale.  The
; value of MAG^2=R^2+I^2 is converted to log2 form by concatenating
; the mantissa fraction bits to the exponent bit field using a left
; shift (sign bit was zero).  A PUSHF/POP then moves the data from the
; floating point bit field to a Q24 (integer with 8 integer bits and 24
; fractional bits).  The Q24 value is then shifted to a Q3 format and
; packed as a 5.3 integer.  The resulting range and resolution matches
; the graphical display of 256 pixels.
;
; Log_Mag: AR0 points to an array of data
;======================================================================
Log_Mag   ldf   *AR0++,R0          ; Get value, point to next value
          lsh   1,R0               ; To convert to log2, concatentate mantissa
          pushf R0                 ; to the exponent (shift) and move to an
          pop   R0                 ; integer register using pushf/pop
          ;- - - - - - - - - - - -
          ash   -21,R0             ; shift to 29.3 (5.3 log in 8 LSBs)
          ldi   @SIZE,R1           ; FFT data growth subtracted in log domain
          float R1,R1              ;
          pushf R1                 ; log2(BinVal^2) - log2(SIZE)
          pop   R1                 ;
          lsh   -20,R1             ; log2(SIZE) is now 28.4 log which is
          subi  R1,R0              ; scaled correctly for log2(mag^2) data
          subi  96,R0              ; Adjustment data up/down to fit display
          ;- - - - - - - - - - - -
          cmpi   -128,R0           ; preserving three mantissa bits
          ldile  -128,R0           ; and clipping the result
          cmpi   +127,R0           ;
          ldige  +127,R0           ;
          lsh    24,R0             ; 5.3 result is returned in 8 MSBs
          rets                     ; which is then used by the pack routine
;***************************************************************************
GETADC    ldi   0x30,IE            ; Come here and wait for ADC interrupt
          idle                     ; confirmation to save power and code space
          ldi   @FLAGS,R0          ;
          tstb  0x20,R0            ;
          bz    $-3                ;
          andn  0x20,R0            ;
          sti   R0,@FLAGS          ;
          ldi   @S0_rdata,R0       ; Return sign extended ADC value
          lsh   16,R0              ;
          ash   -16,R0             ;
          float R0,R0              ; F0 = input range -32768 to +32767

          .if   LDC_EVAL        ;
          ldi   @FRAME,R1       ;
          cmpi  FRAME_CNT/2,R1  ; Count is updated in outer main loop
          blt   NOLDC           ;
          ;- - - - - - - - - - -
DO_LDC    subf   @LDC,R0        ; LDC = input - LDC_SUM1
          and    @MASK,R0       ; mask out mantissa bits (LDC return val)
          ldf    R0,R1          ; construct LDC_SUM1+err
          addf   @LDC,R1        ; from LDC+err data
          stf    R1,@LDC        ;
          addf   @ILDC,R0       ;
          mpyf   @schmooze,R0   ;
          stf    R0,@ILDC       ;
          .endif

NOLDC     rets                     ;
;-------------------------------------------------------
; XMIT/RECV Serial Port Interrupt Service Routines
;-------------------------------------------------------
ADC       push  ST                 ; On interrupt, set a software flag to
          push  R0                 ; let the CPU know that RINT occured
          ldi   @S0_rdata,R0       ;
          ldi   @FLAGS,R0          ;
          or    0x20,R0            ;
          sti   R0,@FLAGS          ;
          pop   R0                 ;
          pop   ST                 ;
          reti                     ;
          ;- - - - - - - - - - - -
DAC       push  ST                 ;
          push  R1                 ;
          ldi   @BYPASS,R1         ;
          bnz   DACRET             ;
          ldi   @RAMP,R1           ;
          addi  1024,R1            ;
          and   0x7FFC,R1          ;
          sti   R1,@RAMP           ;
          sti   R1,@S0_xdata       ; loopback ADC->DAC
DACRET    pop   R1                 ;
          pop   ST                 ;
          reti                     ;
;----------------------------------
prog_AIC  push  R1                 ;
          push  IE                 ;
          ldi   0x10,IE            ;
          andn  0x30,IF            ;
          ldi   @S0_xdata,R1       ; Use original DXR data during 2 ndy
          or    3,R1               ; Request 2 ndy XMIT
          sti   R1,@S0_xdata       ;
          idle                     ;
          sti   R0,@S0_xdata       ; Send register value
          idle                     ;
          andn  3,R1               ;
          sti   R1,@S0_xdata       ; Leave with original safe value in DXR
          pop   IE                 ;
          pop   R1                 ;
          rets                     ;
;======================================================;
; This section of code is called by the initialization ;
; code as well as by the main program loop.  It is     ;
; therfor assembled into the regular program RAM       ;
;======================================================;
AIC_INIT  LDI   0x10,IE         ; Enable XINT interrupt
          andn  0x34,IF         ;
AIC_reset ldi   0,R0            ;
          sti   R0,@S0_xdata    ;
          RPTS  0x040           ;
          LDI   2,IOF           ; XF0=0 resets AIC
          rpts  0x40            ;
          LDI   6,IOF           ; XF0=1 runs AIC
          sti   IOF,@BYPASS     ;
          ldi   @S0_rdata,R0    ;
          ldi   0,R0            ;
          sti   R0,@S0_xdata    ;
          ;-----------------------------
          ldi   @C_REG,R0       ; Setup control register
          call  prog_AIC        ;
          ldi   0xfffc  ,R0     ; Program the AIC to be real slow
          call  prog_AIC        ;
          ldi   0xfffc|2,R0     ;
          call  prog_AIC        ;
          ldi   @B_REG,R0       ; Bump up the Fs to final rate
          call  prog_AIC        ; (smallest divisor should be last)
          ldi   @A_REG,R0       ;
          call  prog_AIC        ;
          ldi   0,R0            ; Put a safe 0 in DXR
          sti   R0,@BYPASS      ;
          sti   R0,@S0_xdata    ;
          ldi   @S0_rdata,R0    ; Clear receive underrun
          rets                  ;
;===========================================================================
; Initialization code is used only once and can be safely overwritten
; by assembling it into the stack or volatile data storage.
;===========================================================================
       ;  .start   "INIT",DR    ; Place this code in the data buffer
       ;  .sect    "INIT"       ; area as this is the first to go
          .entry   INIT_DSK     ;
INIT_DSK
          pop   R0 ; Junk pop SP--

          ldp   T0_ctrl         ; Use kernel data page and stack
          ldi   0,R0            ; Halt TIM0 & TIM1
          sti   R0,@T0_ctrl     ;
          sti   R0,@T0_count    ; Set counts to 0
          ldi   1,R0            ; Set periods to 1
          sti   R0,@T0_prd      ;
          ldi   0x2C1,R0        ; Restart both timers
          sti   R0,@T0_ctrl     ;
          ;---------------------
          ldi   @S0_xctrl_val,R0;
          sti   R0,@S0_xctrl    ; transmit control
          ldi   @S0_rctrl_val,R0;
          sti   R0,@S0_rctrl    ; receive control
          ldi   0,R0            ;
          sti   R0,@S0_xdata    ; DXR data value
          ldi   @S0_gctrl_val,R0; Setup serial port
          sti   R0,@S0_gctrl    ; global control
          ;---------------------
          call  AIC_INIT        ; Initialize the AIC
          ldi   0x30,IE         ; Service both RINT/XINT
          ldi   @S0_rdata,R0    ;
          b     main            ;
;======================================================================
; Since the C31 is being used in bootloader mode, interrupts begin
; execution at the secondary branch table in internal SRAM.  A branch
; to XINT/RINT ISR routine is placed directly into these locations
;======================================================================
          .start   "SP0VECTS",0x809FC5
          .sect    "SP0VECTS"
          B     DAC             ; XINT0
          B     ADC             ; RINT0



