/**** 

  PLEASE DO NOT USE THIS CODE OUTSIDE OF THE BENCHMARK!
  
  Instead, if possible, please download the original package
  (a URL is given in doc/ffts.html).  Thank you!

 ****/

/************* CHANGES by Steven G. Johnson 7/97 ************/
/*
   This file, originally fft4f.c, was modified slightly
   for use in the benchmark.  The modifications we made
   were:

   1) Used FFTW_REAL instead of double for floating-point type.
   
   2) Prepended "ooura_c_" before routine names to prevent
      conflicts with other software in the benchmark (epsecially
      with the Fortran version of this code).
*/

#include <fftw.h>

/*
Fast Fourier/Cosine/Sine Transform
    dimension   :one
    data length :power of 2
    decimation  :frequency
    radix       :4, 2
    data        :inplace
    table       :use
functions
    ooura_c_cdft: Complex Discrete Fourier Transform
    ooura_c_rdft: Real Discrete Fourier Transform
    ooura_c_ddct: Discrete Cosine Transform
    ooura_c_ddst: Discrete Sine Transform
    ooura_c_dfct: Cosine Transform of OOURA_C_RDFT (Real Symmetric DFT)
   ooura_c_dfst: Sine Transform of OOURA_C_RDFT (Real Anti-symmetric DFT)
function prototypes
    void ooura_c_cdft(int, int, FFTW_REAL *, int *, FFTW_REAL *);
    void ooura_c_rdft(int, int, FFTW_REAL *, int *, FFTW_REAL *);
    void ooura_c_ddct(int, int, FFTW_REAL *, int *, FFTW_REAL *);
    void ooura_c_ddst(int, int, FFTW_REAL *, int *, FFTW_REAL *);
    void ooura_c_dfct(int, FFTW_REAL *, FFTW_REAL *, int *, FFTW_REAL *);
    void ooura_c_dfst(int, FFTW_REAL *, FFTW_REAL *, int *, FFTW_REAL *);


-------- Complex DFT (Discrete Fourier Transform) --------
    [definition]
        <case1>
            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
        <case2>
            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ooura_c_cdft(2*n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ooura_c_cdft(2*n, -1, a, ip, w);
    [parameters]
        2*n            :data length (int)
                        n >= 1, n = power of 2
        a[0...2*n-1]   :input/output data (FFTW_REAL *)
                        input data
                            a[2*j] = Re(x[j]), 
                            a[2*j+1] = Im(x[j]), 0<=j<n
                        output data
                            a[2*k] = Re(X[k]), 
                            a[2*k+1] = Im(X[k]), 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n)  ; if n % 4 == 0
                                        2+sqrt(n/2); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
            ooura_c_cdft(2*n, -1, a, ip, w);
        is 
            ooura_c_cdft(2*n, 1, a, ip, w);
            for (j = 0; j <= 2 * n - 1; j++) {
                a[j] *= 1.0 / n;
            }
        .


-------- Real DFT / Inverse of Real DFT --------
    [definition]
        <case1> OOURA_C_RDFT
            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
        <case2> IRDFT (excluding scale)
            a[k] = R[0]/2 + R[n/2]/2 + 
                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 
                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ooura_c_rdft(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ooura_c_rdft(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (FFTW_REAL *)
                        <case1>
                            output data
                                a[2*k] = R[k], 0<=k<n/2
                                a[2*k+1] = I[k], 0<k<n/2
                                a[1] = R[n/2]
                        <case2>
                            input data
                                a[2*j] = R[j], 0<=j<n/2
                                a[2*j+1] = I[j], 0<j<n/2
                                a[1] = R[n/2]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2); if n % 4 == 2
                                        2+sqrt(n/4); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
            ooura_c_rdft(n, 1, a, ip, w);
        is 
            ooura_c_rdft(n, -1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
    [definition]
        <case1> IDCT (excluding scale)
            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DCT
            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ooura_c_ddct(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ooura_c_ddct(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (FFTW_REAL *)
                        output data
                            a[k] = C[k], 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2); if n % 4 == 2
                                        2+sqrt(n/4); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
            ooura_c_ddct(n, -1, a, ip, w);
        is 
            a[0] *= 0.5;
            ooura_c_ddct(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DST (Discrete Sine Transform) / Inverse of DST --------
    [definition]
        <case1> IDST (excluding scale)
            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DST
            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ooura_c_ddst(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ooura_c_ddst(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (FFTW_REAL *)
                        <case1>
                            input data
                                a[j] = A[j], 0<j<n
                                a[0] = A[n]
                            output data
                                a[k] = S[k], 0<=k<n
                        <case2>
                            output data
                                a[k] = S[k], 0<k<n
                                a[0] = S[n]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2); if n % 4 == 2
                                        2+sqrt(n/4); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
            ooura_c_ddst(n, -1, a, ip, w);
        is 
            a[0] *= 0.5;
            ooura_c_ddst(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Cosine Transform of OOURA_C_RDFT (Real Symmetric DFT) --------
    [definition]
        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
    [usage]
        ip[0] = 0; // first time only
        ooura_c_dfct(n, a, t, ip, w);
    [parameters]
        n              :data length - 1 (int)
                        n >= 2, n = power of 2
        a[0...n]       :input/output data (FFTW_REAL *)
                        output data
                            a[k] = C[k], 0<=k<=n
        t[0...n/2]     :work area (FFTW_REAL *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4); if n % 4 == 0
                                        2+sqrt(n/8); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
            a[0] *= 0.5;
            a[n] *= 0.5;
            ooura_c_dfct(n, a, t, ip, w);
        is 
            a[0] *= 0.5;
            a[n] *= 0.5;
            ooura_c_dfct(n, a, t, ip, w);
            for (j = 0; j <= n; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Sine Transform of OOURA_C_RDFT (Real Anti-symmetric DFT) --------
    [definition]
        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
    [usage]
        ip[0] = 0; // first time only
       ooura_c_dfst(n, a, t, ip, w);
    [parameters]
        n              :data length + 1 (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (FFTW_REAL *)
                        output data
                            a[k] = S[k], 0<k<n
                        (a[0] is used for work area)
        t[0...n/2-1]   :work area (FFTW_REAL *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4); if n % 4 == 0
                                        2+sqrt(n/8); otherwise
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (FFTW_REAL *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of 
           ooura_c_dfst(n, a, t, ip, w);
        is 
           ooura_c_dfst(n, a, t, ip, w);
            for (j = 1; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .
*/


void ooura_c_cdft(int n, int isgn, FFTW_REAL *a, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    int j;
    
    if (n > (ip[0] << 2)) {
        ooura_c_makewt(n >> 2, ip, w);
    }
    if (n > 4) {
        ooura_c_bitrv2(n, ip + 2, a);
    }
    if (n > 4 && isgn < 0) {
        for (j = 1; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
        ooura_c_cftsub(n, a, w);
        for (j = 1; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
    } else {
        ooura_c_cftsub(n, a, w);
    }
}


void ooura_c_rdft(int n, int isgn, FFTW_REAL *a, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_makect(int nc, int *ip, FFTW_REAL *c);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    int j, nw, nc;
    FFTW_REAL xi;
    
    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        ooura_c_makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 2)) {
        nc = n >> 2;
        ooura_c_makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        a[1] = 0.5 * (a[1] - a[0]);
        a[0] += a[1];
        for (j = 3; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        for (j = 1; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
    } else {
        if (n > 4) {
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
        }
        xi = a[0] - a[1];
        a[0] += a[1];
        a[1] = xi;
    }
}


void ooura_c_ddct(int n, int isgn, FFTW_REAL *a, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_makect(int nc, int *ip, FFTW_REAL *c);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    void ooura_c_dctsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    int j, nw, nc;
    FFTW_REAL xr;
    
    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        ooura_c_makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        ooura_c_makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = a[j - 1] - a[j];
            a[j] += a[j - 1];
        }
        a[1] = xr - a[0];
        a[0] += xr;
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        for (j = 1; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
    }
    ooura_c_dctsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j <= n - 2; j += 2) {
            a[j - 1] = a[j] - a[j + 1];
            a[j] += a[j + 1];
        }
        a[n - 1] = xr;
    }
}


void ooura_c_ddst(int n, int isgn, FFTW_REAL *a, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_makect(int nc, int *ip, FFTW_REAL *c);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    void ooura_c_dstsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    int j, nw, nc;
    FFTW_REAL xr;
    
    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        ooura_c_makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        ooura_c_makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = a[j - 1] + a[j];
            a[j] -= a[j - 1];
        }
        a[1] = -xr - a[0];
        a[0] -= xr;
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        for (j = 1; j <= n - 1; j += 2) {
            a[j] = -a[j];
        }
    }
   ooura_c_dstsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            ooura_c_bitrv2(n, ip + 2, a);
        }
        ooura_c_cftsub(n, a, w);
        if (n > 4) {
            ooura_c_rftsub(n, a, nc, w + nw);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j <= n - 2; j += 2) {
            a[j - 1] = -a[j] - a[j + 1];
            a[j] -= a[j + 1];
        }
        a[n - 1] = -xr;
    }
}


void ooura_c_dfct(int n, FFTW_REAL *a, FFTW_REAL *t, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_makect(int nc, int *ip, FFTW_REAL *c);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    void ooura_c_dctsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    int j, k, l, m, mh, nw, nc;
    FFTW_REAL xr, xi;
    
    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        ooura_c_makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        ooura_c_makect(nc, ip, w + nw);
    }
    m = n >> 1;
    xr = a[0] + a[n];
    a[0] -= a[n];
    t[0] = xr - a[m];
    t[m] = xr + a[m];
    if (n > 2) {
        mh = m >> 1;
        for (j = 1; j <= mh - 1; j++) {
            k = m - j;
            xr = a[j] + a[n - j];
            a[j] -= a[n - j];
            xi = a[k] + a[n - k];
            a[k] -= a[n - k];
            t[j] = xr - xi;
            t[k] = xr + xi;
        }
        t[mh] = a[mh] + a[n - mh];
        a[mh] -= a[n - mh];
        ooura_c_dctsub(m, a, nc, w + nw);
        if (m > 4) {
            ooura_c_bitrv2(m, ip + 2, a);
        }
        ooura_c_cftsub(m, a, w);
        if (m > 4) {
            ooura_c_rftsub(m, a, nc, w + nw);
        }
        xr = a[0] + a[1];
        a[n - 1] = a[0] - a[1];
        for (j = m - 2; j >= 2; j -= 2) {
            a[(j << 1) + 1] = a[j] + a[j + 1];
            a[(j << 1) - 1] = a[j] - a[j + 1];
        }
        a[1] = xr;
        l = 2;
        m = mh;
        while (m >= 2) {
            ooura_c_dctsub(m, t, nc, w + nw);
            if (m > 4) {
                ooura_c_bitrv2(m, ip + 2, t);
            }
            ooura_c_cftsub(m, t, w);
            if (m > 4) {
                ooura_c_rftsub(m, t, nc, w + nw);
            }
            a[n - l] = t[0] - t[1];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j <= m - 2; j += 2) {
                k += l << 2;
                a[k - l] = t[j] - t[j + 1];
                a[k + l] = t[j] + t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 0; j <= mh - 1; j++) {
                k = m - j;
                t[j] = t[m + k] - t[m + j];
                t[k] = t[m + k] + t[m + j];
            }
            t[mh] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
        a[n] = t[2] - t[1];
        a[0] = t[2] + t[1];
    } else {
        a[1] = a[0];
        a[2] = t[0];
        a[0] = t[1];
    }
}


void ooura_c_dfst(int n, FFTW_REAL *a, FFTW_REAL *t, int *ip, FFTW_REAL *w)
{
    void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w);
    void ooura_c_makect(int nc, int *ip, FFTW_REAL *c);
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w);
    void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    void ooura_c_dstsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c);
    int j, k, l, m, mh, nw, nc;
    FFTW_REAL xr, xi;
    
    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        ooura_c_makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        ooura_c_makect(nc, ip, w + nw);
    }
    if (n > 2) {
        m = n >> 1;
        mh = m >> 1;
        for (j = 1; j <= mh - 1; j++) {
            k = m - j;
            xr = a[j] - a[n - j];
            a[j] += a[n - j];
            xi = a[k] - a[n - k];
            a[k] += a[n - k];
            t[j] = xr + xi;
            t[k] = xr - xi;
        }
        t[0] = a[mh] - a[n - mh];
        a[mh] += a[n - mh];
        a[0] = a[m];
       ooura_c_dstsub(m, a, nc, w + nw);
        if (m > 4) {
            ooura_c_bitrv2(m, ip + 2, a);
        }
        ooura_c_cftsub(m, a, w);
        if (m > 4) {
            ooura_c_rftsub(m, a, nc, w + nw);
        }
        xr = a[0] + a[1];
        a[n - 1] = a[1] - a[0];
        for (j = m - 2; j >= 2; j -= 2) {
            a[(j << 1) + 1] = a[j] - a[j + 1];
            a[(j << 1) - 1] = -a[j] - a[j + 1];
        }
        a[1] = xr;
        l = 2;
        m = mh;
        while (m >= 2) {
           ooura_c_dstsub(m, t, nc, w + nw);
            if (m > 4) {
                ooura_c_bitrv2(m, ip + 2, t);
            }
            ooura_c_cftsub(m, t, w);
            if (m > 4) {
                ooura_c_rftsub(m, t, nc, w + nw);
            }
            a[n - l] = t[1] - t[0];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j <= m - 2; j += 2) {
                k += l << 2;
                a[k - l] = -t[j] - t[j + 1];
                a[k + l] = t[j] - t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 1; j <= mh - 1; j++) {
                k = m - j;
                t[j] = t[m + k] + t[m + j];
                t[k] = t[m + k] - t[m + j];
            }
            t[0] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
    }
    a[0] = 0;
}


/* -------- initializing routines -------- */


#include <math.h>

void ooura_c_makewt(int nw, int *ip, FFTW_REAL *w)
{
    void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a);
    int nwh, j;
    FFTW_REAL delta, x, y;
    
    ip[0] = nw;
    ip[1] = 1;
    if (nw > 2) {
        nwh = nw >> 1;
        delta = atan(1.0) / nwh;
        w[0] = 1;
        w[1] = 0;
        w[nwh] = cos(delta * nwh);
        w[nwh + 1] = w[nwh];
        for (j = 2; j <= nwh - 2; j += 2) {
            x = cos(delta * j);
            y = sin(delta * j);
            w[j] = x;
            w[j + 1] = y;
            w[nw - j] = y;
            w[nw - j + 1] = x;
        }
        ooura_c_bitrv2(nw, ip + 2, w);
    }
}


void ooura_c_makect(int nc, int *ip, FFTW_REAL *c)
{
    int nch, j;
    FFTW_REAL delta;
    
    ip[1] = nc;
    if (nc > 1) {
        nch = nc >> 1;
        delta = atan(1.0) / nch;
        c[0] = 0.5;
        c[nch] = 0.5 * cos(delta * nch);
        for (j = 1; j <= nch - 1; j++) {
            c[j] = 0.5 * cos(delta * j);
            c[nc - j] = 0.5 * sin(delta * j);
        }
    }
}


/* -------- child routines -------- */


void ooura_c_bitrv2(int n, int *ip, FFTW_REAL *a)
{
    int j, j1, k, k1, l, m, m2;
    FFTW_REAL xr, xi;
    
    ip[0] = 0;
    l = n;
    m = 1;
    while ((m << 2) < l) {
        l >>= 1;
        for (j = 0; j <= m - 1; j++) {
            ip[m + j] = ip[j] + l;
        }
        m <<= 1;
    }
    if ((m << 2) > l) {
        for (k = 1; k <= m - 1; k++) {
            for (j = 0; j <= k - 1; j++) {
                j1 = (j << 1) + ip[k];
                k1 = (k << 1) + ip[j];
                xr = a[j1];
                xi = a[j1 + 1];
                a[j1] = a[k1];
                a[j1 + 1] = a[k1 + 1];
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
        }
    } else {
        m2 = m << 1;
        for (k = 1; k <= m - 1; k++) {
            for (j = 0; j <= k - 1; j++) {
                j1 = (j << 1) + ip[k];
                k1 = (k << 1) + ip[j];
                xr = a[j1];
                xi = a[j1 + 1];
                a[j1] = a[k1];
                a[j1 + 1] = a[k1 + 1];
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += m2;
                k1 += m2;
                xr = a[j1];
                xi = a[j1 + 1];
                a[j1] = a[k1];
                a[j1 + 1] = a[k1 + 1];
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
        }
    }
}


void ooura_c_cftsub(int n, FFTW_REAL *a, FFTW_REAL *w)
{
    int j, j1, j2, j3, k, k1, ks, l, m;
    FFTW_REAL wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    FFTW_REAL x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    
    l = 2;
    while ((l << 1) < n) {
        m = l << 2;
        for (j = 0; j <= l - 2; j += 2) {
            j1 = j + l;
            j2 = j1 + l;
            j3 = j2 + l;
            x0r = a[j] + a[j1];
            x0i = a[j + 1] + a[j1 + 1];
            x1r = a[j] - a[j1];
            x1i = a[j + 1] - a[j1 + 1];
            x2r = a[j2] + a[j3];
            x2i = a[j2 + 1] + a[j3 + 1];
            x3r = a[j2] - a[j3];
            x3i = a[j2 + 1] - a[j3 + 1];
            a[j] = x0r + x2r;
            a[j + 1] = x0i + x2i;
            a[j2] = x0r - x2r;
            a[j2 + 1] = x0i - x2i;
            a[j1] = x1r - x3i;
            a[j1 + 1] = x1i + x3r;
            a[j3] = x1r + x3i;
            a[j3 + 1] = x1i - x3r;
        }
        if (m < n) {
            wk1r = w[2];
            for (j = m; j <= l + m - 2; j += 2) {
                j1 = j + l;
                j2 = j1 + l;
                j3 = j2 + l;
                x0r = a[j] + a[j1];
                x0i = a[j + 1] + a[j1 + 1];
                x1r = a[j] - a[j1];
                x1i = a[j + 1] - a[j1 + 1];
                x2r = a[j2] + a[j3];
                x2i = a[j2 + 1] + a[j3 + 1];
                x3r = a[j2] - a[j3];
                x3i = a[j2 + 1] - a[j3 + 1];
                a[j] = x0r + x2r;
                a[j + 1] = x0i + x2i;
                a[j2] = x2i - x0i;
                a[j2 + 1] = x0r - x2r;
                x0r = x1r - x3i;
                x0i = x1i + x3r;
                a[j1] = wk1r * (x0r - x0i);
                a[j1 + 1] = wk1r * (x0r + x0i);
                x0r = x3i + x1r;
                x0i = x3r - x1i;
                a[j3] = wk1r * (x0i - x0r);
                a[j3 + 1] = wk1r * (x0i + x0r);
            }
            k1 = 1;
            ks = -1;
            for (k = (m << 1); k <= n - m; k += m) {
                k1++;
                ks = -ks;
                wk1r = w[k1 << 1];
                wk1i = w[(k1 << 1) + 1];
                wk2r = ks * w[k1];
                wk2i = w[k1 + ks];
                wk3r = wk1r - 2 * wk2i * wk1i;
                wk3i = 2 * wk2i * wk1r - wk1i;
                for (j = k; j <= l + k - 2; j += 2) {
                    j1 = j + l;
                    j2 = j1 + l;
                    j3 = j2 + l;
                    x0r = a[j] + a[j1];
                    x0i = a[j + 1] + a[j1 + 1];
                    x1r = a[j] - a[j1];
                    x1i = a[j + 1] - a[j1 + 1];
                    x2r = a[j2] + a[j3];
                    x2i = a[j2 + 1] + a[j3 + 1];
                    x3r = a[j2] - a[j3];
                    x3i = a[j2 + 1] - a[j3 + 1];
                    a[j] = x0r + x2r;
                    a[j + 1] = x0i + x2i;
                    x0r -= x2r;
                    x0i -= x2i;
                    a[j2] = wk2r * x0r - wk2i * x0i;
                    a[j2 + 1] = wk2r * x0i + wk2i * x0r;
                    x0r = x1r - x3i;
                    x0i = x1i + x3r;
                    a[j1] = wk1r * x0r - wk1i * x0i;
                    a[j1 + 1] = wk1r * x0i + wk1i * x0r;
                    x0r = x1r + x3i;
                    x0i = x1i - x3r;
                    a[j3] = wk3r * x0r - wk3i * x0i;
                    a[j3 + 1] = wk3r * x0i + wk3i * x0r;
                }
            }
        }
        l = m;
    }
    if (l < n) {
        for (j = 0; j <= l - 2; j += 2) {
            j1 = j + l;
            x0r = a[j] - a[j1];
            x0i = a[j + 1] - a[j1 + 1];
            a[j] += a[j1];
            a[j + 1] += a[j1 + 1];
            a[j1] = x0r;
            a[j1 + 1] = x0i;
        }
    }
}


void ooura_c_rftsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c)
{
    int j, k, kk, ks;
    FFTW_REAL wkr, wki, xr, xi, yr, yi;
    
    ks = (nc << 2) / n;
    kk = 0;
    for (k = (n >> 1) - 2; k >= 2; k -= 2) {
        j = n - k;
        kk += ks;
        wkr = 0.5 - c[kk];
        wki = c[nc - kk];
        xr = a[k] - a[j];
        xi = a[k + 1] + a[j + 1];
        yr = wkr * xr - wki * xi;
        yi = wkr * xi + wki * xr;
        a[k] -= yr;
        a[k + 1] -= yi;
        a[j] += yr;
        a[j + 1] -= yi;
    }
}


void ooura_c_dctsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c)
{
    int j, k, kk, ks, m;
    FFTW_REAL wkr, wki, xr;
    
    ks = nc / n;
    kk = ks;
    m = n >> 1;
    for (k = 1; k <= m - 1; k++) {
        j = n - k;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        kk += ks;
        xr = wki * a[k] - wkr * a[j];
        a[k] = wkr * a[k] + wki * a[j];
        a[j] = xr;
    }
    a[m] *= 2 * c[kk];
}


void ooura_c_dstsub(int n, FFTW_REAL *a, int nc, FFTW_REAL *c)
{
    int j, k, kk, ks, m;
    FFTW_REAL wkr, wki, xr;
    
    ks = nc / n;
    kk = ks;
    m = n >> 1;
    for (k = 1; k <= m - 1; k++) {
        j = n - k;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        kk += ks;
        xr = wki * a[j] - wkr * a[k];
        a[j] = wkr * a[j] + wki * a[k];
        a[k] = xr;
    }
    a[m] *= 2 * c[kk];
}

