/*
 * Copyright (c) 1997 Massachusetts Institute of Technology
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to use, copy, modify, and distribute the Software without
 * restriction, provided the Software, including any modified copies made
 * under this license, is not distributed for a fee, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
 * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Except as contained in this notice, the name of the Massachusetts
 * Institute of Technology shall not be used in advertising or otherwise
 * to promote the sale, use or other dealings in this Software without
 * prior written authorization from the Massachusetts Institute of
 * Technology.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "bench_utils.h"
#include "bench_ffts.h"

int maxn(int n, int nums[]);
int max2n(int n, int nums[]);
int get_work_size(int n[3]);

short bench_3d(short compute_accuracy, 
	       factor_type allowed_factors, int which_N, double max_MB)
{
     int 
	  *n, n_rev[3], N, 
	  arr_sizes[][3] = {
	       { 0, 0, 0 },
	       {4,4,4},
	       {5,5,5},{6,6,6},{7,7,7},
	       {8,8,8},
	       {9,9,9},{10,10,10},{11,11,11},{12,12,12},{13,13,13},
	       {14,14,14},{15,15,15},
	       {16,16,16},
	       {24,25,28},
	       {32,32,32},
	       {48,48,48}, {49,49,49}, {60,60,60}, {72,60,56},
	       {64,64,64},
	       {75,75,75}, {80,80,80},
	       {256,64,32},
	       {84,84,84}, {96,96,96}, 
	       {16,1024,64},
	       {105,105,105}, {112,112,112}, {120,120,120},
	       {128,128,128},
	       {144,144,144}, 
	       {512,128,64},
	       {180,180,180}, 
	       {256,128,256},
	       {240,240,240},
	       {256,256,256},
	       {512,64,1024},
	       {360,360,360},
	       {512,512,512},
	       { 0, 0, 0 }
	  };
     FFTW_COMPLEX *arr,*work;
     int size_index, size_arr = 0, size_work = 0;
     short is_power_of_two;

     if (which_N != 0) {
	  arr_sizes[1][0] = arr_sizes[1][1] = arr_sizes[1][2] = which_N;
	  arr_sizes[2][0] = 0;
     }

     /*******************************************************************/
     /* Allocate Arrays: */

     log_printf("Benchmarking for sizes:");

     /* compute max. array sizes: */
     for (size_index = 1; arr_sizes[size_index][0]; ++size_index) {
          int sz_arr,sz_wrk,sz_arr_2;

	  N = arr_sizes[size_index][0] * arr_sizes[size_index][1]
	       * arr_sizes[size_index][2];
	  if (N == 0)
	       break;

          if (allowed_factors != ALL_FACTORS) {
               if (IS_POWER_OF_TWO(N)) {
                   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
                        continue;
               }
               else if (allowed_factors == POWERS_OF_TWO_ONLY)
                    continue;
          }

          sz_arr = N;
          sz_wrk = get_work_size(arr_sizes[size_index]);

          if ((sz_arr + sz_wrk) *
              sizeof(FFTW_COMPLEX) * 1.0/1048576.0 > max_MB)
               break;  /* maximum size exceeded */

	  log_printf("  %dx%dx%d (%g MB)",
		     arr_sizes[size_index][0],
		     arr_sizes[size_index][1],
		     arr_sizes[size_index][2],
		     (sz_arr + sz_wrk) * sizeof(FFTW_COMPLEX)*1.0/1048576.0);

          if (sz_arr > size_arr)
               size_arr = sz_arr;
          if (sz_wrk > size_work)
               size_work = sz_wrk;
     }

     if (size_arr == 0) {
          log_printf("No valid sizes found!  Sorry.\n");
          return 1;
     }

     log_printf("\nMaximum array size N = %d\n\n",size_arr);

     arr = fftw_malloc(size_arr * sizeof(FFTW_COMPLEX));
     work = fftw_malloc(size_work * sizeof(FFTW_COMPLEX));

     if (!arr || !work) {
          printf("Not enough memory!  (Need at least %0.1f MB.)\n",
                 (size_arr + size_work) *
                 sizeof(FFTW_COMPLEX) * 1.0/1048576.0);
	  log_printf("Not enough memory!  (Need at least %0.1f MB.)\n",
		     (size_arr + size_work) *
		     sizeof(FFTW_COMPLEX) * 1.0/1048576.0);
          if (arr)
               fftw_free(arr);
          if (work)
               fftw_free(work);
          return 1;
     }

     bench_init_array(arr,size_arr);
     bench_init_array(work,size_work);

     /*******************************************************************/

     for (size_index = 0; ; ++size_index) {

          n = arr_sizes[size_index];
	  N = n[0]*n[1]*n[2];
	  if (N > size_arr)
	       break;
	  if (size_index != 0 && N == 0)
	       break;
	  n_rev[2] = n[0];
	  n_rev[1] = n[1];
	  n_rev[0] = n[2];

          if (N > 0 && allowed_factors != ALL_FACTORS) {
               if (IS_POWER_OF_TWO(N)) {
                   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
                        continue;
               }
               else if (allowed_factors == POWERS_OF_TWO_ONLY)
                    continue;
          }

          is_power_of_two = (size_index == 0 &&
                             allowed_factors != NON_POWERS_OF_TWO_ONLY)
                            || (size_index != 0 && IS_POWER_OF_TWO(N));

          if (N == 0) {
	       dat_printf("Array Dimensions");
	       log_printf("Benchmarking FFTs:\n");
          }
          else {
	       log_printf("\nBenchmarking for array size = %dx%dx%d%s:\n",
			  n[0],n[1],n[2],
			  is_power_of_two ? " (power of 2)" : "");
	       dat_printf("%dx%dx%d",n[0],n[1],n[2]);
          }

	  /*******************************************************************/

	  if (do_fftwnd_fft(3,n,n_rev,N,is_power_of_two,
			    arr,work,size_arr,size_work,
			    compute_accuracy,allowed_factors))
	       return 1;

	  do_green_3d_fft(3,n,n_rev,N,is_power_of_two,
			  arr,work,size_arr,size_work,
			  compute_accuracy,allowed_factors);

	  do_harm_fft(3,n,n_rev,N,is_power_of_two,
		      arr,work,size_arr,size_work,
		      compute_accuracy,allowed_factors);

	  do_harm_f2c_fft(3,n,n_rev,N,is_power_of_two,
			  arr,work,size_arr,size_work,
			  compute_accuracy,allowed_factors);

	  do_mfft_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_nrc_fourn_fft(3,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);
	  do_nrf_fourn_fft(3,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_pda_fft(3,n,n_rev,N,is_power_of_two,
		     arr,work,size_arr,size_work,
		     compute_accuracy,allowed_factors);
	  do_pda_f2c_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_singleton_3d_fft(3,n,n_rev,N,is_power_of_two,
			      arr,work,size_arr,size_work,
			      compute_accuracy,allowed_factors);
	  do_singleton_3d_f2c_fft(3,n,n_rev,N,is_power_of_two,
				  arr,work,size_arr,size_work,
				  compute_accuracy,allowed_factors);

	  do_temperton_f_3d_fft(3,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_temperton_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_imsl_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_nag_3d_fft(3,n,n_rev,N,is_power_of_two,
			arr,work,size_arr,size_work,
			compute_accuracy,allowed_factors);

	  do_essl_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_sgimath_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_scilib_3d_fft(3,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_scsl_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_dxml_3d_fft(3,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  /*******************************************************************/

	  dat_printf("\n");
	  log_printf("\n");

          compute_normalized_averages();
     }

     dat_printf("\n");
     output_normalized_averages();
     destroy_fft_data();

     fftw_free(arr);
     fftw_free(work);

     return 0;
}

int maxn(int n, int nums[])
/* return the maximum number in the array nums[n] */
{
     int mx;
     if (n <= 0)
	  return -1;
     mx = nums[0];
     for (--n; n >= 0; --n)
	  if (nums[n] > mx)
	       mx = nums[n];
     return mx;
}

int max2n(int n, int nums[])
/* return the product of the biggest two numbers in the array nums[n] */
{
     int mx, mi, mx2, mx3;
     
     if (n <= 1)
	  return -1;
     mx = maxn(n,nums);
     for (mi = 0; nums[mi] != mx; ++mi)
	  ;
     mx2 = maxn(n - mi - 1, nums + mi + 1);
     mx3 = maxn(mi,nums);
     if ((mx2 > mx3 && mi != n - 1) || mi == 0)
	  return (mx2 * mx);
     else
	  return (mx3 * mx);
}

#define MAX2(a,b) ((a)>(b) ? (a) : (b))

int get_work_size(int n[3])
/* return the size of the work array, given n[3]. */
{
     int w_size;

     w_size = 3*maxn(3,n) + 8; /* (PDA requires the most work space) */

#ifdef USE_SCSL
     /* on the Origin2000, the Cray SCSL FFT needs lots of scratch space! */
     w_size = MAX2(w_size,(n[0]+n[1]+n[2]) + 45 + n[0]*n[1]*n[2]);
#endif

#ifdef HAVE_LIBCOMPLIB_SGIMATH 
     w_size = MAX2(w_size,(n[0]+n[1]+n[2]) + 45);
#endif

#ifdef USE_SCILIB

#  ifdef _CRAYMPP
     /* on MPP machines, Cray FFT needs tons of scratch space! */
     w_size = MAX2(w_size,6*(n[0]+n[1]+n[2]) + n[0]*n[1]*n[2]);
#  else
     w_size = MAX2(w_size,50+(n[0]+n[1]+n[2]) + 2 * 8 * max2n(3,n));
#  endif

#endif
     
#ifdef USE_ESSL
     if (sizeof(FFTW_REAL) == sizeof(float))
	  w_size = MAX2(w_size,16*max2n(3,n)+1024);
#endif

#ifdef USE_IMSL
     w_size = MAX2(w_size, maxn(3,n) + 2*(n[0] + n[1] + n[2]) + 8*3);
#endif

     return w_size;
}

