/*
 * Copyright (c) 1997 Massachusetts Institute of Technology
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to use, copy, modify, and distribute the Software without
 * restriction, provided the Software, including any modified copies made
 * under this license, is not distributed for a fee, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
 * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Except as contained in this notice, the name of the Massachusetts
 * Institute of Technology shall not be used in advertising or otherwise
 * to promote the sale, use or other dealings in this Software without
 * prior written authorization from the Massachusetts Institute of
 * Technology.
 *
 */

#include <stdio.h>
#include <stdlib.h>

#include "bench_utils.h"
#include "bench_1d_protos.h"
#include "bench_ffts.h"

#define USE_CWP 1
#define USE_BAILEY 1

/* The work array is WORK_SIZE times bigger than
   the array being fft'ed.  Should be big enough
   for the work space of any FFT routine. 

   For finer control, we recommend that you instead modify the
   allocation in the "compute max. array sizes" loop below.  (See the
   loop for examples.) */
#if USE_BAILEY
     #define WORK_SIZE 5 /* Bailey FFT requires lots of work space */
#else
     #define WORK_SIZE 3 /* more than enough for most routines */
#endif


short bench_1d(short compute_accuracy, 
	       factor_type allowed_factors, int which_N, double max_MB)
{
     int 
	  n,
	  arr_sizes[] = {
	  0, /* special...print header line */
	  2,4,6,8,9,12,15,16,18,24,32,36,64,80,108,128,210,256,
	  504,512,1000,1024,1960,2048,4096,4725,8192,10368,16384,
	  27000,32768,65536,75600,131072,165375,262144,362880,
	  524288,1048576,2097152,4194304,8388608,16777216,33554432,
	  0 /* end with a zero */
     };
     FFTW_COMPLEX *arr,*work;
     int size_index, size_arr = 0, size_work = 0;
     short is_power_of_two;

     if (which_N != 0)
          arr_sizes[1] = which_N, arr_sizes[2] = 0;

     /*******************************************************************/
     /* Allocate Arrays: */

     log_printf("Benchmarking for sizes:");

     /* compute max. array sizes: */
     for (size_index = 1; arr_sizes[size_index]; ++size_index) {
	  int sz_arr,sz_wrk,sz_arr_2;

	  if (arr_sizes[size_index] > 0 && allowed_factors != ALL_FACTORS) {
	       if (IS_POWER_OF_TWO(arr_sizes[size_index])) {
		   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
			continue;
	       }
	       else if (allowed_factors == POWERS_OF_TWO_ONLY)
		    continue;
	  }

	  sz_arr = arr_sizes[size_index];
	  sz_wrk = WORK_SIZE*arr_sizes[size_index];
	  if (sz_wrk < 2*arr_sizes[size_index] + 8)
	       sz_wrk = 2*arr_sizes[size_index] + 8; /* for FFTPACK */

	  #ifdef HAVE_F77
	  /* make sure we have enough work space for SCIPORT */
	  if (sz_wrk < 5*arr_sizes[size_index]/2)
	       sz_wrk = 5*arr_sizes[size_index]/2;
	  #endif

	  /* make sure we have enough work space for QFT */
	  if (IS_POWER_OF_TWO(arr_sizes[size_index])) {
	       int m = 0, nn = arr_sizes[size_index];
	       while (nn > 1) {  /* compute = log2 nn */
		    nn /= 2;
		    m += 1;
	       }
	       nn = m * sizeof(FFTW_REAL *)
		    + 3*(arr_sizes[size_index]+m)*sizeof(FFTW_REAL)
		    + 3*m * sizeof(int);
	       nn = (nn + sizeof(FFTW_COMPLEX) - 1) / sizeof(FFTW_COMPLEX);
	       if (sz_wrk < nn)
		    sz_wrk = nn;
	  }

	  #if USE_CWP
	  sz_arr_2 = npfao(sz_arr,1 << 30);
	  if (sz_arr_2 > sz_arr)
	       sz_arr = sz_arr_2;
	  #endif

	  #ifdef HAVE_LIBSCS
	  if (sz_wrk < 2*arr_sizes[size_index] + 15)
	       sz_wrk = 2*arr_sizes[size_index] + 15;
	  #endif

	  #ifdef HAVE_LIBCOMPLIB_SGIMATH
	  if (sz_wrk < 2*arr_sizes[size_index] + 15)
	       sz_wrk = 2*arr_sizes[size_index] + 15;
	  #endif

	  #ifdef HAVE_LIBIMSL
	  if (sz_wrk < 3*arr_sizes[size_index] + 8)
	       sz_wrk = 3*arr_sizes[size_index] + 8;
	  #endif

	  #ifdef HAVE_LIBSCI /* the Cray SCILIB routine needs
			       lots of workspace */
	  if (sz_wrk < 8*arr_sizes[size_index] + 100)
	       sz_wrk = 8*arr_sizes[size_index] + 100;
          #endif

	  if ((sz_arr + sz_wrk) * 
	      sizeof(FFTW_COMPLEX) * 1.0/1048576.0 > max_MB)
	       break;  /* maximum size exceeded */

	  log_printf("  %d (%g MB)",arr_sizes[size_index],
		     (sz_arr + sz_wrk) * sizeof(FFTW_COMPLEX)*1.0/1048576.0);
	  
	  if (sz_arr > size_arr)
	       size_arr = sz_arr;
	  if (sz_wrk > size_work)
	       size_work = sz_wrk;
     }

     if (size_arr == 0) {
          log_printf("No valid sizes found!  Sorry.\n");
	  return 1;
     }

     log_printf("\nMaximum array size = %d\n\n",size_arr);

     arr = fftw_malloc(size_arr * sizeof(FFTW_COMPLEX));
     work = fftw_malloc(size_work * sizeof(FFTW_COMPLEX));

     if (!arr || !work) {
	  printf("Not enough memory!  (Need at least %0.1f MB.)\n",
		 (size_arr + size_work) * 
		 sizeof(FFTW_COMPLEX) * 1.0/1048576.0);
	  log_printf("Not enough memory!  (Need at least %0.1f MB.)\n",
		     (size_arr + size_work) * 
		     sizeof(FFTW_COMPLEX) * 1.0/1048576.0);
	  if (arr)
	       fftw_free(arr);
	  if (work)
	       fftw_free(work);
	  return 1;
     }

     bench_init_array(arr,size_arr);
     bench_init_array(work,size_work);

     /*******************************************************************/

     for (size_index = 0; size_index == 0 || 
	  (arr_sizes[size_index] && arr_sizes[size_index] <= size_arr); 
	  ++size_index) {

	  n = arr_sizes[size_index];

	  if (n > 0 && allowed_factors != ALL_FACTORS) {
	       if (IS_POWER_OF_TWO(n)) {
		   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
			continue;
	       }
	       else if (allowed_factors == POWERS_OF_TWO_ONLY)
		    continue;
	  }

	  is_power_of_two = (size_index == 0 && 
			     allowed_factors != NON_POWERS_OF_TWO_ONLY) 
	                    || (size_index != 0 && IS_POWER_OF_TWO(n));

	  if (n == 0) {
	       dat_printf("N");
	       log_printf("Benchmarking FFTs:\n");
	  }
	  else {
	       log_printf("\nBenchmarking for array size = %d%s:\n",
			  n, is_power_of_two ? " (power of 2)" : "");
	       dat_printf("%d",n);
	  }

     /*******************************************************************/

	  do_arndt_ffts(1,&n,&n,n,is_power_of_two,arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);

	  #if USE_BAILEY
	  do_bailey_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,
			allowed_factors);
	  #endif

	  do_beauregard_fft(1,&n,&n,n,is_power_of_two,
			    arr,work,size_arr,size_work,
			    compute_accuracy,allowed_factors);

	  do_bergland_fft(1,&n,&n,n,is_power_of_two,
			  arr,work,size_arr,size_work,
			  compute_accuracy,allowed_factors);

	  do_bernstein_fft(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_bloodworth_fft(1,&n,&n,n,is_power_of_two,
			    arr,work,size_arr,size_work,
			    compute_accuracy,allowed_factors);
	  
	  do_brenner_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);
	  
	  do_burrus_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);
	  
	  #if USE_CWP
	  do_cwp_fft(1,&n,&n,n,is_power_of_two,
		     arr,work,size_arr,size_work,
		     compute_accuracy,allowed_factors);
	  #endif

	  do_edelblute_fft(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_emayer_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);

	  do_fftpack_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_fftpack_f2c_fft(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  {
	       int err;

	       if (n != 0) {
		    /* FFTW allocates its own work space; make sure it
		       there is room to do so. */
		    fftw_free(work);
		    work = fftw_malloc(sizeof(FFTW_COMPLEX) * n);
		    if (!work) {
			 printf("\nError reallocating work space "
				"before FFTW!\n");
			 return 1;
		    }
	       }

	       err = do_fftw_1d_fft(1,&n,&n,n,is_power_of_two,
				    arr,work,size_arr,size_work,
				    compute_accuracy,allowed_factors);
	       if (err) return err;

	       if (n != 0) {
		    /* Restore work space to previous size. */
		    fftw_free(work);
		    work = fftw_malloc(sizeof(FFTW_COMPLEX) * size_work);
		    if (!work) {
			 printf("\nError reallocating work space!\n");
			 return 1;
		    }
	       }
	  }

	  do_frigo_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);

	  do_green_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);

	  do_gsl_fft(1,&n,&n,n,is_power_of_two,
		     arr,work,size_arr,size_work,
		     compute_accuracy,allowed_factors);

	  do_krukar_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);
	  
	  do_monnier_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);
	  
	  do_monro_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);
	  
	  do_napack_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);
	  
	  do_nielsen_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);
	  
	  do_nrc_1d_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);
	  
	  do_nrf_1d_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,
			allowed_factors);

	  do_ooura_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);
	  do_qft_fft(1,&n,&n,n,is_power_of_two,
		     arr,work,size_arr,size_work,
		     compute_accuracy,allowed_factors);
	  
	  do_ransom_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);
	  
	  do_rmayer_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);
	  
	  do_sciport_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_singleton_fft(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);
	  do_singleton_f2c_fft(1,&n,&n,n,is_power_of_two,
			       arr,work,size_arr,size_work,
			       compute_accuracy,allowed_factors);

	  do_sorensen_ffts(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_sorensen_dit_fft(1,&n,&n,n,is_power_of_two,
			      arr,work,size_arr,size_work,
			      compute_accuracy,allowed_factors);

	  do_temperton_f_fft(1,&n,&n,n,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);

	  do_temperton_fft(1,&n,&n,n,is_power_of_two,
		       arr,work,size_arr,size_work,
		       compute_accuracy,allowed_factors);

	  do_valkenburg_fft(1,&n,&n,n,is_power_of_two,
			    arr,work,size_arr,size_work,
			    compute_accuracy,allowed_factors);

	  do_imsl_fft(1,&n,&n,n,is_power_of_two,
		      arr,work,size_arr,size_work,
		      compute_accuracy,allowed_factors);

	  do_nag_fft(1,&n,&n,n,is_power_of_two,
		     arr,work,size_arr,size_work,
		     compute_accuracy,allowed_factors);

	  do_scilib_1d_fft(1,&n,&n,n,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_essl_1d_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_sunperf_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_scsl_fft(1,&n,&n,n,is_power_of_two,
		      arr,work,size_arr,size_work,
		      compute_accuracy,allowed_factors);

	  do_dxml_fft(1,&n,&n,n,is_power_of_two,
		      arr,work,size_arr,size_work,
		      compute_accuracy,allowed_factors);

	  do_sgimath_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_asci_red_fft(1,&n,&n,n,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  dat_printf("\n");
	  log_printf("\n");

	  compute_normalized_averages();
     }

     dat_printf("\n");
     output_normalized_averages();
     destroy_fft_data();

     fftw_free(arr);
     fftw_free(work);

     return 0;
}


