;=========================================================================
; FFT_RR2.ASM
; Keith Larson
; TMS320 DSP Applications
; (C) Copyright 1995,1996
; Texas Instruments Incorporated
;
; This is unsupported freeware with no implied warranties or
; liabilities.  See the disclaimer document for details
;
; This application will work with either the DOS executable FFT_XXX.EXE
; or the windows application DSK3WIN.EXE
;-------------------------------------------------------------------------
;  NOTES
;
;  Computed Twiddle values
;  -----------------------
;  To keep as much on chip ram free as possible, the FFT twiddle can be
;  be computed on the fly.  Twiddles are computed from an initial seed
;  value (complex(1,0)) which is then phase rotated using a complex
;  phasor held in a lookup table.  This computation is relatively fast and
;  occurs during a delayed branch.
;
;  Minimised Twiddle Access
;  ------------------------
;  The number of twiddle accesses during an FFT can also be greatly reduced
;  by noting that when each new stage occurs, half as many twiddles are
;  actualy used.  By adding an extra inner loop, the number of twiddle
;  accesses are reduced from N/2*log2(N) to N accesses.
;
;  >>>> The combination of minimized twiddle lookup <<<<
;  >>>> and computed twiddles is a double benefit.  <<<<
;
;  FFT Windowing
;  -------------
;  Windowing is done using a convolution of the window functions frequency
;  response with the FFT frequency data.  A raised cosine window is used
;  which has a very simple frequency domain response of -0.5,+1.0,-0.5
;  centered at the 0th bin (DC).
;
;  Since the convolution is being performed on a REAL window function and
;  the complex FFT frequency domain data, the convolution is performed on
;  the FFT frequency domain REAL and IMAG data seperately.
;
;  Bit-reversal of data
;  --------------------
;  After windowing, the data which is still in bit-reversed order, is
;  converted to MAG^2=REAL^2+IMAG^2 and placed back into the REAL data array.
;  The data is then bit-reversed and put into the IMAG data array.
;
;  log2(MAG^2) 'decibel' calculation
;  ---------------------------------
;  The MAG^2 array is then converted to 8 bit log2 pixel offsets which are
;  scaled to fit a decibel graphical display.  The resulting data array is
;  further packed at 4 bytes/long word to minimize host transfers.
;
;  STARTUP STUB
;  ------------
;  The initialization code which is used only on startup is placed inside
;  the volatile data memory array to gain back internal memory.
;
;  AIC SAMPLING RATES
;  ------------------
;  The AIC sampling rate can be adjusted well outside the tested limits found
;  in the data sheet.  Very high sample rates are possible at the expense of
;  signal degradation.  Some values work better than others, and some not
;  at all, so some experimentation is required.
;
;  HOST SYNCHRONIZATION
;  --------------------
;  Since the host should not disturb the ADC while data is being collected
;  an interlock is used to keep the host from timing out.  When the host
;  gains access to the DSK, the restart sequence is as follows
;
;    Set P_STRB=0
;    Wait for HPACK=0 indicating DSP is ready for transfer
;    Update any AIC registers by writing to the appropriate memory locations
;    Write a START to location AICLOAD if any AIC registers needs reloading
;    Write a START to the message box
;
;-------------------------------------------------------------------------
; The following constants are used to initialize the AIC plus
; create the Twiddle, FFT and I/O buffer arrays
;=========================================================================
_REAL_      .set    1                     ;
Max         .set    1024                  ; Max on chip REAL FFT size
log2Max     .set    log(2*Max)/log(2)     ; Need 1 extra seed beyond log2(Max)
PI          .set    3.141592654           ;
            .if     _REAL_                ;
CR2Max      .set    Max/2                 ;
            .if     Max>1024              ;
 ERR: Max on chip _REAL_ FFT is 1024      ;
            .endif                        ;
            .else                         ;
CR2Max      .set    Max                   ;
            .if     Max>512               ;
 ERR: Max on chip FFT is 512 points (REAL up to 1024)
            .endif                        ;
            .endif                        ;
            .include "C3XMMRS.ASM"        ;
;-----------------------------------------
TA          .set    12                    ;
TB          .set    14                    ;
RA          .set    12                    ;
RB          .set    14                    ;
;=========================================
_STOP       .set    1                     ;
_START      .set    2                     ;
;=========================================
RAM0        .set    0x809800              ;
RAM1        .set    0x809C00              ;
DATA_ARRAY  .set    DR                    ; DR/DI must be on SIZE boundary
DR          .set    RAM0                  ; for bit-reverse addressing
DI          .set    DR+CR2Max             ; Buffer size for called CR2 FFT
;-----------------------------------------
BITREV      .set 1                        ;
            .start  "TW_SEED",DI+CR2Max   ; Seeds, constants and code are all
            .sect   "TW_SEED"             ; placed into second ram block
TWx         .sdef   PI/Max                ; Step angle for first seed
            .loop   log2Max               ;
            .float   cos(TWx)             ;
            .float  -sin(TWx)             ;
TWx         .sdef   TWx * 2               ; Step angle doubles for each stage
            .endloop                      ;
TW_END                                    ;
;=========================================
TEMP         .word    0                   ;
TEMPADDR     .word    TEMP                ;
MSG_BOX      .word    _STOP               ;
A_REG        .word    (TA<<9)+(RA<<2)+0   ;
B_REG        .word    (TB<<9)+(RB<<2)+2   ;
C_REG        .word    00000011b           ; +/- 1.5 V
SIZE         .word    Max                 ; Desired FFT size
CR2SIZE      .word    CR2Max              ; Max size of CR2 FFT
AICLOAD      .word    0xFFFFFFFF          ;
;                                         ;
;0_gctrl_val .word    0x0E973300          ; CLKR/X active low
S0_gctrl_val .word    0x0E970300          ; CLKR/X active high
S0_xctrl_val .word    0x00000111          ;
S0_rctrl_val .word    0x00000111          ;
;                                         ;
T_ADDR       .word    TW_END              ;
DR_ADDR      .word    DR                  ;
DI_ADDR      .word    DI                  ;
BFLY0        .word    B_FLY0              ; Program addresses used to
BFLY1        .word    B_FLY1              ; initialize a fast repeat block
FLAGS        .word    0                   ;
;RAMP         .word    0                   ;
BYPASS       .word    1                   ;
;=======================================================================
main      ldi   0x30,IE            ;
          ldi   @S0_rdata,R0       ; Clear SP under/overflow
          ldi   0,R0               ;
          sti   R0,@S0_xdata       ;
          ldi   @S0_rdata,R0       ;
          ldi   0,R0               ;
          sti   R0,@S0_xdata       ;
          ldi   32,RC              ; Flush first ADC value (trash)
          rptb  $+1                ; plus some more samples
          call  GETADC             ;
          ;------------------------
          ldi   @SIZE,R0           ;
          .if   _REAL_             ; If REAL, the computed Complex Radix 2
          lsh   -1,R0              ; FFT is half the total size
          .endif                   ;
          sti   R0,@CR2SIZE        ;
          ldi   @DR_ADDR,AR0       ;
          ldi   @DI_ADDR,AR1       ;
          ldi   @CR2SIZE,RC        ; Get SIZE samples
          subi  1,RC               ; RC=SIZE-1 repeats SIZE times
          rptb  samples            ;
          ;------------------------
          .if   _REAL_             ; REAL FFT packs data into both REAL & IMAG
          call  GETADC             ; imag=even samples, real=odd
          .else                    ;
          ldf   0,R0               ; Normal FFT zeros IMAG
          .endif                   ;
          stf   R0,*AR1++          ;
          call  GETADC             ;
samples   stf   R0,*AR0++          ;
          ;================================================================
          ; Set S0_xdata=0 if the serial port is not to be used.  This
          ; drives DX low such that if the serial port is underrun and DX
          ; tri-states, DX remains low.  On re-start, set S0_xdata=0 to wake
          ; up the port and drive DX to an active low state.  A pulldown
          ; resistor on the DX pin holds the pin low while it is tri-state
          ;================================================================
          sti   R0,@S0_xdata       ;
          andn  0x30,IE            ;
          andn  0x40,IOF           ; XF1=0 begins benchmark
          call  FFT                ; Call the Application
          or    0x60,IOF           ; XF1=1 ends benchmark
          call  Pack               ; Pack log2 byte data to be sent to host
          ;================================================================
          ; With the application finished, exchange data and messages
          ;  1) The host drives PSTRB low, and waits for an acknowledge
          ;  2) Transfer the data to the host for display using getmen()
          ;  3) If the AIC is to be reprogrammed, first load the
          ;     initialization parameters, such as A_REG, using putmem()
          ;     followed by a non-zero value in AICLOAD.
          ;  4) Write _START to MSG_BOX
          ;  5) The DSP then re-executes main and the host waits
          ;================================================================
HPI       ldi   0x4,IE             ; Interlock with host only uses INT2
          ldi   _START,R0          ;
NO_START  cmpi  @MSG_BOX,R0        ; Restart when START message is received
          bnz   NO_START           ;
          ldi   _STOP,R0           ; Set MSG box to STOP
          sti   R0,@MSG_BOX        ;
          ;------------------------
          ldi   @AICLOAD,R2        ; Check to see if the host requested an
          bz    main               ; AIC reinitialization
          ldi   0,R2               ;
          sti   R2,@AICLOAD        ;
          call  AIC_INIT           ; Restart with new AIC setup
          b     main               ; Do it all over again!
;===========================================================================
; The FFT is written for code size and resonably fast speed
;    - The first and last stages are not unrolled
;    - Twiddles are loaded outside the butterfly loop for maximum reuse
;    - Twiddles are computed 'on the fly' to minimize on-chip RAM use
;    - The butterfly can be further optimized (readable as is)
;    - Code is compatible with all versions of the C3x/C4x
;      The extended parallel addressing modes added to PG 6.0 of the
;      TMS320C31 and PG 2.0 of the TMS320C32 are not used.
;===========================================================================
          .start "FFTSECT",$       ;
          .sect  "FFTSECT"         ;
FFT:      float @CR2SIZE,R0        ; log2(SZ) is used to set the correct start
          pushf R0                 ; position for the sin/cos generator table
          pop   AR2                ;
          lsh   -23,AR2            ; shift to integer position
          subri @T_ADDR,AR2        ; AR2 = TR/TI base pointer
          ldi   @CR2SIZE,AR3       ; AR3 = SZ
          subi  1,AR3              ; AR3 = SZ -1
          ldi   AR3,IR0            ; IR0=SZ-1
          lsh   -1,AR3             ; AR3 = (SZ/2)-1
          ldi   @CR2SIZE,IR1       ; IR 0 = N
          lsh   -1,IR1             ; IR1=SZ/2
          ldi   @BFLY0,RS          ; Load repeat block start and end addresses
          ldi   @BFLY1,RE          ;
          ldi   @TEMPADDR,AR0      ;
          ldi   0,AR7              ; Initial Block loop counter is 1 loop
          ;------------------------
          ; NEW STAGE BEGIN
          ;------------------------
STAGE     ldi   @DR_ADDR,AR4       ; AR4 = REAL DAT
          ldi   @DI_ADDR,AR5       ; AR5 = IMAG DAT
          LDF   1.0         ,R6    ; R6 = COS
          LDF   0.0         ,R7    ; R2 = SIN
          stf   R7,*AR0            ; Initialize temporary location for R2
          ;------------------------
          ; BLOCK REPEAT BEGIN
          ;------------------------
BLOCK     ldi   AR7,RC             ; Load first!  if RC==0 RPTM will turn off
          or    100h,ST            ; Turn on RPTMode bit (fast)
          ;-----------------------------------------------------------------
          ; BUTTERFLY LOOP
          ; This is the main loop that consumes the most resources
          ;   The inner loop can be optimal for several things
          ;    - Readability               (nothing parallel)
          ;    - Internal execution speed  (fewest opcodes & pipe conflicts)
          ;    - External execution speed  (minimize read/writes)
          ; A temporary location (*AR0) is used for F7 to enable parallel
          ; codes within the Butterfly when older silicon versions are used
          ;-----------------------------------------------------------------
EXEN      .set  0                  ; PG6 and later C31, PG2 and later C32
          .if   EXEN               ; Older versions of C3x and all C4x
          .xon
B_FLY0    subf  *+AR5(IR1),*AR5,R5 ;= IM0-IM1
          mpyf  R6,R5          ,R0 ;= TR(IM0-IM1)
       || subf  *+AR4(IR1),*AR4,R3 ;= RL0-RL1
          mpyf  R7,R3          ,R1 ;= TI(RL0-RL1)
       || addf  *+AR4(IR1),*AR4,R2 ;= RL0+RL1
          addf  R0,R1          ,R2 ;= TR(IM0-IM1)+TI(RL0-RL1)
       || stf   R2,*AR4++(IR1)     ;RL0
          mpyf  R6,R3          ,R1 ;= TR(RL0-RL1)
       || addf  *AR5,*+AR5(IR1),R3 ;= IM0+IM1
          mpyf  R7,R5          ,R0 ;= TI(IM0-IM1)
       || stf   R3,*AR5++(IR1)     ;IM0
          subf  R0,R1          ,R1 ;= TR(RL0-RL1)-TI(IM0-IM1) -R
       || stf   R2,*AR5++(IR1)     ;IM1
B_FLY1    stf   R1,*AR4++(IR1)     ;RL1               (11 mem accesses)

          .else
B_FLY0    SUBF  *+AR4(IR1),*AR4,R2 ; R2 = RL0-RL1     (14 mem accesses)
          SUBF  *+AR5(IR1),*AR5,R1 ; R1 = IM0-IM1
          MPYF  R2,R6          ,R0 ; R0 = (RL0-RL1)COS
       || ADDF  *+AR5(IR1),*AR5,R3 ; R3 = IM0+IM1
          MPYF  R1,*AR0        ,R3 ; R3 = (IM0-IM1)SIN
       || STF   R3,*AR5++(IR1)     ; IM0
          SUBF  R3,R0          ,R4 ; R4 = (RL0-RL1)COS - (IM0-IM1)SIN
          MPYF  R1,R6          ,R0 ; R0 = (IM0-IM1)COS
       || ADDF  *+AR4(IR1),*AR4,R3 ; R3 = RL0+RL1
          MPYF  R2,*AR0        ,R3 ; R3 = (RL0-RL1)SIN
       || STF   R3,*AR4++(IR1)     ; RL0
          ADDF  R0,R3          ,R5 ; R5 = (IM0-IM1)COS + (RL0-RL1)SIN
          STF   R4,*AR4++(IR1)     ; RL1
B_FLY1 || STF   R5,*AR5++(IR1)     ; IM1
          .endif
          ;--------------------------------
          ; Calculate new twiddles using
          ; complex vector phase rotation
          ;--------------------------------
NxtBlock  mpyf3 *+AR2(1),R6,R1     ; I*Rb
       || addf3 *AR5--(IR0),R3,R3  ; Wrap ptr back to IMAG block start
          mpyf3 *+AR2(0),R7,R0     ; R*Ib
       || addf3 *AR4--(IR0),R3,R3  ; Wrap ptr back to REAL block start
          mpyf3 *+AR2(1),R7,R3     ; I*Ib
          dbud   AR3,BLOCK         ; decrement, test and branch
          addf3 R1,R0,R7           ; TW_IMAG = I*Rb + R*Ib
          mpyf3 *+AR2(0),R6,R0     ; R*Rb
       || stf   R7,*AR0            ;
          subf3 R3,R0,R6           ; TW_REAL = R*Rb - I*Ib
          ;-------------------------------------------------------
          ; NEXT STAGE
          ; If there is a next stage,
          ; - The twiddle seed pointer is incremented.
          ; - Butterfly repeat doubles
          ; - Butterfly block counter halfs (same as offset)
          ; - Top to Bottom data to bottom offset halfs
          ; - DBUD tests for exit and adjusts loop count to N-1
          ;-------------------------------------------------------
NxtStage  lsh   -1,IR1             ; Offset and next outer loop is
          ldi   IR1,AR3            ; 1/2 previous value
          dbud  AR3,STAGE          ; Test if at end, decrement counter by 1
          lsh   1,AR7              ; The innermost loop is 2x
          addi  1,AR7              ;
          addi  2,AR2              ; Point to next twiddle seed
          ;--------------------------------------------------------------
          ; If a REAL FFT has been selected, the output data needs to be
          ; passed through one more stage of fourier transform coding to
          ; generate the full size FFT.
          ;--------------------------------------------------------------
          .if   _REAL_             ;
          call  UNPACK             ; Unpack the REAL FFT into 2x FFT
          .endif                   ;
          call  CONV1              ; Convolve raised cosine window with F data
          call  RI2MAGR            ; Convert to magnitude (into REAL array)
          call  BR_R2I             ; Bit revesrse the magnitudes
FFT_END   rets                     ;
;========================================================================
; UNPACK unpacks a complex radix 2 FFT which has been used to perform a
; REAL FFT.  Before calling the FFT, the input data is packed as
; REAL=odd samples and IMAG=even samples.
;
; The unpack operation then applies a series of butterfly operations
; which 'work inward' from the endpoints of the FFT arrays.  That is,
; as one data pointer increments, the other decrements.  As each data
; set is addressed, it is replaced, allowing the operation to occur
; 'in place'.  However, the resulting FFT arrays are bit-reversed and
; would either need to be de bit-reversed (requiring a swap buffer),
; or operated on while stored in bit-reversed order.
;
; However, the C3x/C4x architecture only provides the bit-reverse pointer
; increment operation *AR0++(IR0)B, and not the complimentary decrement
; operation '*AR0--(IR0)B'.
;
; The solution is to reverse only the second half of each arrays and
; then use only bit-reverse increment operations. A careful examination
; of the bit-reverse data ordering shows that
;
; 1) The second half of each array is packed into the odd data slots
; 2) A reversal of the odd samples, will also reverse the ordering
;    if accessed using bit reverse pointer arithmetic
;
; NOTE: Only one call to BR_ODD is actualy used.  The second 'call'
;       occurs as a 'fall through' when the UNPACK routine finishes.
;       This saves code size and lowers cycle count.
;========================================================================
          .if   _REAL_             ; Dont assemble UNPACK if not _REAL_
UNPACK    call  BR_ODD             ; reverse odd samples
          ;----------------------------------------------------------------
          float @CR2SIZE,R0        ; log2(CR2SIZE) sets offset into the
          pushf R0                 ; sin/cos twiddle generator table
          pop   AR2                ;
          lsh   -23,AR2            ; shift to integer position
          subri @T_ADDR,AR2        ; AR2 = TR/TI base pointer
          subi  2,AR2              ; Actual SIZE is 2x bigger (use lower seed)
          ldi   @CR2SIZE,IR0       ;
          lsh   -1,IR0             ; SIZE/2 for Bit Reverse access
          ldi   IR0,RC             ; SIZE/2-1 repeats
          subi  1,RC               ;        (Skip first cell)
          ldi   @DR_ADDR      ,AR4 ; left  REAL
          ldi   @DI_ADDR      ,AR5 ; left  IMAG
          ldi   @DR_ADDR      ,AR6 ; right REAL
          ldi   @DI_ADDR      ,AR7 ; right IMAG
          addf  *AR5,*AR4,R0       ; REAL[0] = REAL[0] + IMAG[0]
          subf  *AR5,*AR4,R1       ; IMAG[0] = REAL[0] - IMAG[0]
          stf   R0,*AR4++(IR0)     ; Save DC component while incrementing
      ||  stf   R1,*AR5++(IR0)     ; pointer to 1st element of first half
          ldi   *AR6++,R0          ; increment to 1st element of second half
      ||  ldi   *AR7++,R0          ;
          ldf   *+AR2(0)      ,R6  ; Initial COS   1st cell (DC) is skipped
      ||  ldf   *+AR2(1)      ,R7  ; Initial SIN   so load TR/TI
          rptb  BEND               ;
          ;------ RFFT UNPACK BUTTERFLY -------
          subf  *AR4,*AR6      ,R4 ; AR6 - AR4
          mpyf  R7,R4          ,R0 ; R7  * R4
      ||  addf  *AR7,*AR5      ,R2 ; AR5 + AR7
          mpyf  R6,R2          ,R1 ; R6  * R2
      ||  addf  *AR6,*AR4      ,R3 ; AR4 + AR6
          addf  R0,R1          ,R2 ; R0  + R1
          subf  R2,R3          ,R5 ; R3  - R2
          addf  R3,R2          ,R0 ; R3  + R2
          stf   R5,*AR6++(IR0)B    ;
      ||  stf   R0,*AR4++(IR0)B    ;
          mpyf  R6,R4          ,R1 ; R6  * R4
      ||  addf  *AR7,*AR5      ,R2 ; AR5 + AR7
          mpyf  R7,R2          ,R0 ; R7  * R2
      ||  subf  *AR7,*AR5      ,R3 ; AR5 - AR7
          subf  R0,R1          ,R4 ; R1  - R5
          addf  R3,R4          ,R2 ; R3  + R4
          subf  R3,R4          ,R3 ; R4  - R3
          mpyf3 *+AR2(0),R6,R0     ; Ra*Rb
      ||  stf   R2,*AR5++(IR0)B    ;
          mpyf3 *+AR2(1),R7,R1     ; Ia*Ib
      ||  stf   R3,*AR7++(IR0)B    ;
          ;----------------------------- Calculate next twiddle
          mpyf3 *+AR2(0),R7,R2     ; Ra*Ib
          mpyf3 *+AR2(1),R6,R3     ; Ia*Rb
          addf3 R2,R3,R7           ; TW_IMAG = Ia*Rb + Ra*Ib
BEND      subf3 R1,R0,R6           ; TW_REAL = Ra*Rb - Ia*Ib
      ;   rets                     ; Fall through to BR_ODD
;===========================================================================
; A bit-reversed array that is reversed in normal order is also reversable
; in the bit reverse domain.  However, only the second half each array
; (in normal order) needs to be reversed.  This routine reverses the
; odd locations, which correspond to the second half of the array.
;--------------------------------------------------------------------------
BR_ODD    ldi   @DR_ADDR,AR0       ; Reverse second half of SIZE array
          call  REVERSER           ;
          ldi   @DI_ADDR,AR0       ;
REVERSER  ldi   @CR2SIZE,RC        ;
          addi  AR0,RC,AR1         ;
          ldf   *AR1--,R0          ; Adjust pointers to odd locations
       || ldf   *AR0++,R0          ;
          lsh   -2,RC              ; RC = SIZE/4 (work from both ends)
          subi  1,RC               ;
          ldi   2,IR0              ; Step by 2 parallelizing
          rptb  REV2               ;
          ldf   *AR0,R0            ; Read/write the ends (reverse)
       || ldf   *AR1,R1            ;
          stf   R1,*AR0++(IR0)     ;
REV2   || stf   R0,*AR1--(IR0)     ;
          rets                     ; Return to caller
          .endif                   ;
;===========================================================================
; Windowing is performed by convolving the frequency domain with the
; frequency response a raised cosine window.  Coefficients are -.5,1.0,-.5
; with the convolution (looks like an FIR) being done in BR data arrays
;===========================================================================
CONV1     ldi   @DR_ADDR,AR0       ; Raised cosine convolutional filter is
          call  CONV_WIN           ; done in place on both REAL & IMAG
          ldi   @DI_ADDR,AR0       ;
CONV_WIN  ldi   @CR2SIZE,IR0       ;
          lsh   -1,IR0             ; IR0=SIZE/2 for bit-reverse access
          ldi   @CR2SIZE,RC        ;
          subi  1,RC               ;
          ldi   AR0,AR1            ; AR1 pointer lags AR0
          ldf   *AR0++(IR0)B,R2    ; Preload first value
          rptb  CW                 ;
          mpyf  *AR1++(IR0)B,R1,R1 ; Update AR1 pointer via dummy math op
      ||  subf  *AR0,R2,R2         ; Y = X[-1] -2*X[0] + X[1]
          subf  *AR0++(IR0)B,R2    ;
          addf  *AR0,R2            ;
          ldf   *AR1,R2            ;
CW    ||  stf   R2,*AR1            ; store result
          rets                     ;
;========================================================================
; Compute MAG^2=R^2+I^2 placing result in REAL array
;========================================================================
RI2MAGR   ldi   @DI_ADDR,AR0       ; RL^2+IM^2 is calculated and placed
          ldi   @DR_ADDR,AR1       ; in the RL array.  (still bit reversed)
          ldi   @CR2SIZE,RC        ;
          subi  1,RC               ;
          rptb  MAGZ               ;
          mpyf3 *AR1,*AR1,R0       ;
          mpyf3 *AR0++,*AR0++,R1   ;
          addf  R1,R0              ;
MAGZ      stf   R0,*AR1++          ;
          rets                     ;
;======================================================================
; Bit reverse REAL array using IMAG array as the destination array
;======================================================================
BR_R2I    ldi   @DR_ADDR,AR0       ; Bit reverse copy magnitude in REAL
          ldi   @DI_ADDR,AR1       ; array into the IMAG array
          ;------------------------
BR_CPY    ldi   @CR2SIZE,IR0       ;
          lsh   -1,IR0             ;
          ldi   @CR2SIZE,RC        ;
          subi  1,RC               ;
          ldf   *AR0++(IR0)B,R0    ; Preload
          rptb  BRCPY              ;
          ldf   *AR0++(IR0)B,R0    ;
BRCPY  || stf   R0,*AR1++          ;
          rets                     ;
;----------------------------------------------------------------------
          .start  "ENDFFT",$       ; End current section reports
          .sect   "ENDFFT"         ; section size at end of *.DSK file
;======================================================================
; Data sent to the host is displayed on a logarithmic dB scale.  The
; value of MAG^2=R^2+I^2 is converted to log2 form by concatenating
; the mantissa fraction bits to the exponent bit field using a left
; shift (sign bit was zero).  A PUSHF/POP then moves the data from the
; floating point bit field to a Q24 (integer with 8 integer bits and 24
; fractional bits).  The Q24 value is then shifted to a Q3 format and
; packed as four Q3 (5.3 format) bytes.  Each byte is then used as a
; y offset in the graphical display of 256 pixels at 26 pixels = 10 dB.
;
; NOTE: A repeat loop is contained within the outer repeat block.
;       Rather than using loop control code using a register, a nested
;       call is used to create a 4 times repeat block.
;======================================================================
Pack      ldi   @DI_ADDR,AR0       ; Source array
          ldi   @DR_ADDR,AR1       ; Destination array
          ldi   @CR2SIZE,RC        ;
          lsh   -2,RC              ; Pack 4 short log values (bytes) per word
          subi  1,RC               ; SIZE/2 converted (pos F only)
          rptb  PACKEND            ; Log_Mag returns with four log2(R^2+I^2)
          call  Log_Mag0           ; bytes packed into R7
PACKEND   sti   R0,*AR1++          ; store packed data
          rets                     ;
;---------------------------------------------------------------------
Log_Mag0  ldi   -24,R6             ; Initialize shift value
          ldi   0,R0               ;       and sum register
          ldi   @CR2SIZE,R1        ; FFT data growth is subtracted in
          float R1,R1              ; log domain == log2(BinVal^2)-log2(SIZE^2)
          pushf R1                 ; Data is MAG^2, so SIZE^2 is subtracted
          pop   R1                 ; log2(SIZE^2) = 2*log2(SIZE)
          lsh   -20,R1             ;
          .if   _REAL_             ;
          addi  96,R1              ; Adjust data up/down to fit display
          .else                    ;
          addi  96-26,R1           ; Adjust is different for FFT and RFFT
          .endif                   ;
          call  $+1                ; Repeat the block 4x and return
          call  $+1                ; >> note use of CALL/CALL/RETS as rptb 4x!
          ;------------------------
Log_Mag   ldf   *AR0++,R7          ; Get value, point to next value
          lsh   1,R7               ; To convert to log2, concatentate mantissa
          pushf R7                 ; to the exponent (shift) and move to an
          pop   R7                 ; integer register using pushf/pop
          ash   -21,R7             ; shift to 29.3 (5.3 log in 8 LSBs)
          subi  R1,R7              ; Adjust for FFT size and display shift
          ;------------------------
          cmpi   -128,R7           ; Clip result to fit 2^8 vertical scale
          ldile  -128,R7           ;
          cmpi   +127,R7           ;
          ldige  +127,R7           ;
          lsh    24,R7             ; 5.3 result is in 8 MSBs (MSB's are 0)
          lsh    R6,R7             ; move to correct bitfield
          addi   8,R6              ;
          or     R7,R0             ; pack into result register
          rets                     ;
;=========================================================================
; SIGGEN uses a complex multiply phase rotate a signal vector.  The
; result is a sampled +/-1 magnitude sine wave which is returned in R0
;
; An error feedback routine is also included to show how a numerical
; error causing an amplitude variation can be quickly corrected.
;=========================================================================
          .if   1                        ; Generate SINE wave
SIGGEN    push  R2                       ; Save full register context
          pushf R2                       ; except return register R0
          push  AR7                      ;
          ldi   @SGPTR,AR7               ;
          mpyf  *AR7++(1), *-AR7(1),R0   ; STR*SR
          mpyf  *AR7     , *+AR7(1),R2   ; STI*SI
          mpyf  *+AR7(1) ,*--AR7(1),R0   ; STI*SR
      ||  subf  R2,R0,R2      ;R2=RL     ; RL = STR*SR-STI*SI
          mpyf  @MAG_ERR,R2   ;R2*=ERR   ; RL*=ERR
          stf   R2,*AR7                  ;
          mpyf  *+AR7(1) , *-AR7(1),R2   ; STR*SI
          addf  R0,R2,R2      ;R2=IM     ; IM = STR*SI+STI*SR
          mpyf  @MAG_ERR,R2   ;R2*=ERR   ; IM*=ERR
          stf   R2,*+AR7(1)              ;
          ;- - - - - - - - - - - - - - -
ERR_FBK   mpyf  *+AR7(1),*+AR7(1),R2     ; Compute MAG^2
          mpyf  *+AR7(0),*+AR7(0),R0     ;
          addf  R0,R2                    ; R2=MAG^2
          subrf 3,R2                     ;
          mpyf  0.5,R2                   ; Feedback R2=1/sqrt(x)~=(3-x)/2
          mpyf  R2,*AR7,R0               ; Correct amplitude and save
          mpyf  R2,*+AR7(1),R0           ;
      ||  stf   R0,*AR7                  ;
          stf   R0,*+AR7(1)              ;
          mpyf  R0,R0                    ;
          mpyf  *AR7,*AR7,R2             ;
          addf  R0,R2                    ; R2 corrected Mag^2
          ldf   *AR7,R0                  ;
          ;- - - - - - - - - - - - - - -
          pop   AR7                      ; Restore used registers
          popf  R2                       ;
          pop   R2                       ;
          rets                           ;
          ;------------------------------
          .else                          ; Generate RAMP signal
SIGGEN    ldf   @RAMP,R0                 ;
          addf  @RAMPRATE,R0             ;
          cmpf  1,R0                     ;
          ldfgt -1,R0                    ;
          stf   R0,@RAMP                 ;
          rets                           ;
          .endif                         ;
          ;------------------------------
Freq      .set    0.1                    ; Signal Frequency as a percentage
STR       .float  cos(Freq*2*PI)         ; of sample rate
SR        .float  1.0                    ; REAL vector
SI        .float  0.0                    ; IMAG vector
STI       .float  sin(Freq*2*PI)         ;
SGPTR     .word   SR                     ; pointer to this structure
RAMP      .float  0                      ;
RAMPRATE  .float  1/32.0                 ;
          ;------------------------------; A magnitude errors of 0.667
MAG_ERR   .float  0.9999                 ; will still oscillate
;***************************************************************************
SCALE     .float 2000              ;
GETADC    ldi   0x30,IE            ; IDLE until the ADC interrupt
          idle                     ;
          ldi   @FLAGS,R0          ; Continue if new sample is received
          tstb  0x20,R0            ;
          bz    $-3                ;
          andn  0x20,R0            ;
          sti   R0,@FLAGS          ;
          ldi   @S0_rdata,R0       ; Return sign extended ADC value
          lsh   16,R0              ;
          ash   -16,R0             ;
          float R0,R0              ; F0 = input range -32768 to +32767
NOLDC     rets                     ;
;-------------------------------------------------------
; XMIT/RECV Serial Port Interrupt Service Routines
;-------------------------------------------------------
ADC       push  ST                 ; On interrupt, set a software flag to
          push  R0                 ; let the CPU know that RINT occured
          ldi   @S0_rdata,R0       ;
          ldi   @FLAGS,R0          ;
          or    0x20,R0            ;
          sti   R0,@FLAGS          ;
          pop   R0                 ;
          pop   ST                 ;
          reti                     ;
;----------------------------------
DAC       push  ST                 ;
          push  R0                 ; Preserve register used by SIGGEN
          pushf R0                 ;
          ldi   @BYPASS,R0         ; Avoid corrupting DAC samples
          bnz   DACRET             ; during initialization
          ;- - - - - - - - - - - -
          call  SIGGEN             ; Call the signal generator
          mpyf  @SCALE,R0          ; return is float scaled to +/-1
          fix   R0,R0              ;
          andn  3,R0               ;
          ;- - - - - - - - - - - -
          sti   R0,@S0_xdata       ; loopback ADC->DAC
DACRET    popf  R0                 ;
          pop   R0                 ;
          pop   ST                 ;
          reti                     ;
;----------------------------------
prog_AIC  push  R1                 ;
          push  IE                 ;
          ldi   0x10,IE            ;
          andn  0x30,IF            ;
          ldi   @S0_xdata,R1       ; Use original DXR data during 2 ndy
          or    3,R1               ; Request 2 ndy XMIT
          sti   R1,@S0_xdata       ;
          idle                     ;
          sti   R0,@S0_xdata       ; Send register value
          idle                     ;
          andn  3,R1               ;
          sti   R1,@S0_xdata       ; Leave with original safe value in DXR
          pop   IE                 ;
          pop   R1                 ;
          rets                     ;
;======================================================;
; This section of code is called by the initialization ;
; code as well as by the main program loop.  It is     ;
; therfor assembled into the regular program RAM       ;
;======================================================;
AIC_INIT  LDI   0x10,IE         ; Enable XINT interrupt
          andn  0x34,IF         ;
AIC_reset ldi   0,R0            ;
          sti   R0,@S0_xdata    ;
          RPTS  0x040           ;
          LDI   2,IOF           ; XF0=0 resets AIC
          rpts  0x40            ;
          LDI   6,IOF           ; XF0=1 runs AIC
          sti   IOF,@BYPASS     ;
          ldi   @S0_rdata,R0    ;
          ldi   0,R0            ;
          sti   R0,@S0_xdata    ;
          ;-----------------------------
          ldi   @C_REG,R0       ; Setup control register
          call  prog_AIC        ;
          ldi   0xfffc  ,R0     ; Program the AIC to be real slow
          call  prog_AIC        ;
          ldi   0xfffc|2,R0     ;
          call  prog_AIC        ;
          ldi   @B_REG,R0       ; Bump up the Fs to final rate
          call  prog_AIC        ; (smallest divisor should be last)
          ldi   @A_REG,R0       ;
          call  prog_AIC        ;
          ldi   0,R0            ; Put a safe 0 in DXR
          sti   R0,@BYPASS      ;
          sti   R0,@S0_xdata    ;
          ldi   @S0_rdata,R0    ; Clear receive underrun
          rets                  ;
;===========================================================================
; Initialization code is used only once and can be safely overwritten
; by assembling it into the stack or volatile data storage.
;===========================================================================
       ;  .start   "INIT",DR    ; Place this code in the data buffer
       ;  .sect    "INIT"       ; area as this is the first to go
          .entry   INIT_DSK     ;
INIT_DSK
          subi  12,SP            ; Make additional stack space

          ldp   T0_ctrl         ; Use kernel data page and stack
          ldi   0,R0            ; Halt TIM0 & TIM1
          sti   R0,@T0_ctrl     ;
          sti   R0,@T0_count    ; Set counts to 0
          ldi   1,R0            ; Set periods to 1
          sti   R0,@T0_prd      ;
          ldi   0x2C1,R0        ; Restart both timers
          sti   R0,@T0_ctrl     ;
          ;---------------------
          ldi   @S0_xctrl_val,R0;
          sti   R0,@S0_xctrl    ; transmit control
          ldi   @S0_rctrl_val,R0;
          sti   R0,@S0_rctrl    ; receive control
          ldi   0,R0            ;
          sti   R0,@S0_xdata    ; DXR data value
          ldi   @S0_gctrl_val,R0; Setup serial port
          sti   R0,@S0_gctrl    ; global control
          ;---------------------
          call  AIC_INIT        ; Initialize the AIC
          ldi   0x30,IE         ; Service both RINT/XINT
          ldi   @S0_rdata,R0    ;
          b     main            ;
;======================================================================
; Since the C31 is being used in bootloader mode, interrupts begin
; execution at the secondary branch table in internal SRAM.  A branch
; to XINT/RINT ISR routine is placed directly into these locations
;======================================================================
          .start   "SP0VECTS",0x809FC5
          .sect    "SP0VECTS"
          B     DAC             ; XINT0
          B     ADC             ; RINT0



