/*
 * Copyright (c) 1997 Massachusetts Institute of Technology
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to use, copy, modify, and distribute the Software without
 * restriction, provided the Software, including any modified copies made
 * under this license, is not distributed for a fee, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
 * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Except as contained in this notice, the name of the Massachusetts
 * Institute of Technology shall not be used in advertising or otherwise
 * to promote the sale, use or other dealings in this Software without
 * prior written authorization from the Massachusetts Institute of
 * Technology.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "bench_utils.h"
#include "rbench_ffts.h"

extern int maxn(int n, int nums[]);
extern int max2n(int n, int nums[]);
int get_rwork_size(int n[3]);

short rbench_2d(short compute_accuracy, 
		factor_type allowed_factors, int which_N, double max_MB)
{
     int 
	  *n, n_rev[2], N, 
	  arr_sizes[][2] = {
	       { 0, 0 },
	       {4,4 },
	       {5,5},{6,6},{7,7},
	       {8,8},
	       {9,9},{10,10},{11,11},{12,12},{13,13},
	       {14,14},{15,15},
	       {16,16},
	       {25,24},
	       {32,32},
	       {48,48}, {49,49}, {60,60}, {72,56},
	       {64,64},
	       {75,75}, {80,80},
	       {84,84},
	       {128,64},
	       {16,512},
	       {96,96}, 
	       {105,105}, {112,112}, {120,120},
	       {128,128},
	       {144,144}, 
	       {180,180}, 
	       {512,64},
	       {256,128},
	       {240,240},
	       {256,256},
	       {64,1024},
	       {360,360},
	       {512,512},
	       {1000,1000},
	       {1024,1024},
	       {1960,1960},
	       {2048,2048},
	       {3360,3360},
	       {4096,4096},
	       {4725,4725},
	       {8192,8192},
	       {10368,10368},
	       {16384,16384},
	       {27000,27000},
	       {32768,32768},
	       { 0, 0 }
	  };
     FFTW_REAL *arr,*work;
     int size_index, size_arr = 0, size_work = 0;
     short is_power_of_two;

     if (which_N != 0) {
	  arr_sizes[1][0] = arr_sizes[1][1] = which_N;
	  arr_sizes[2][0] = 0;
     }

     /*******************************************************************/
     /* Allocate Arrays: */

     log_printf("Benchmarking for sizes:");

     /* compute max. array sizes: */
     for (size_index = 1; arr_sizes[size_index][0]; ++size_index) {
          int sz_arr,sz_wrk;

	  N = arr_sizes[size_index][0] * arr_sizes[size_index][1];
	  if (N == 0)
	       break;

          if (allowed_factors != ALL_FACTORS) {
               if (IS_POWER_OF_TWO(N)) {
                   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
                        continue;
               }
               else if (allowed_factors == POWERS_OF_TWO_ONLY)
                    continue;
          }

          sz_arr = (arr_sizes[size_index][0]+2) * (arr_sizes[size_index][1]+2);
          sz_wrk = get_rwork_size(arr_sizes[size_index]);

          if ((sz_arr + sz_wrk) *
              sizeof(FFTW_REAL) * 1.0/1048576.0 > max_MB)
               break;  /* maximum size exceeded */

	  log_printf("  %dx%d (%g MB)",
		     arr_sizes[size_index][0],
		     arr_sizes[size_index][1],
		     (sz_arr + sz_wrk) * sizeof(FFTW_REAL)*1.0/1048576.0);

          if (sz_arr > size_arr)
               size_arr = sz_arr;
          if (sz_wrk > size_work)
               size_work = sz_wrk;
     }

     if (size_arr == 0) {
          log_printf("No valid sizes found!  Sorry.\n");
          return 1;
     }

     log_printf("\nMaximum array size N = %d\n\n",size_arr);

     arr = fftw_malloc(size_arr * sizeof(FFTW_REAL));
     work = fftw_malloc(size_work * sizeof(FFTW_REAL));

     if (!arr || !work) {
          printf("Not enough memory!  (Need at least %0.1f MB.)\n",
                 (size_arr + size_work) *
                 sizeof(FFTW_REAL) * 1.0/1048576.0);
	  log_printf("Not enough memory!  (Need at least %0.1f MB.)\n",
		     (size_arr + size_work) *
		     sizeof(FFTW_REAL) * 1.0/1048576.0);
          if (arr)
               fftw_free(arr);
          if (work)
               fftw_free(work);
          return 1;
     }

     rbench_init_array(arr,size_arr);
     rbench_init_array(work,size_work);

     /*******************************************************************/

     for (size_index = 0; ; ++size_index) {

          n = arr_sizes[size_index];
	  N = n[0]*n[1];
	  if (N > size_arr)
	       break;
	  if (size_index != 0 && N == 0)
	       break;
	  n_rev[1] = n[0];
	  n_rev[0] = n[1];

          if (N > 0 && allowed_factors != ALL_FACTORS) {
               if (IS_POWER_OF_TWO(N)) {
                   if (allowed_factors == NON_POWERS_OF_TWO_ONLY)
                        continue;
               }
               else if (allowed_factors == POWERS_OF_TWO_ONLY)
                    continue;
          }

          is_power_of_two = (size_index == 0 &&
                             allowed_factors != NON_POWERS_OF_TWO_ONLY)
                            || (size_index != 0 && IS_POWER_OF_TWO(N));

          if (N == 0) {
	       dat_printf("Array Dimensions");
	       log_printf("Benchmarking FFTs:\n");
          }
          else {
	       log_printf("\nBenchmarking for array size = %dx%d%s:\n",
			  n[0],n[1],
			  is_power_of_two ? " (power of 2)" : "");
	       dat_printf("%dx%d",n[0],n[1]);
          }

	  /*******************************************************************/

	  do_fftwnd_rfft(2,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_green_2d_rfft(2,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);

	  do_mfft_2d_rfft(2,n,n_rev,N,is_power_of_two,
			  arr,work,size_arr,size_work,
			  compute_accuracy,allowed_factors);

	  do_ooura_2d_rfft(2,n,n_rev,N,is_power_of_two,
			   arr,work,size_arr,size_work,
			   compute_accuracy,allowed_factors);	  

	  do_nrf_2d_rfft(2,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_sgimath_2d_rfft(2,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  do_scsl_2d_rfft(2,n,n_rev,N,is_power_of_two,
			 arr,work,size_arr,size_work,
			 compute_accuracy,allowed_factors);

	  /*******************************************************************/

	  dat_printf("\n");
	  log_printf("\n");

          compute_normalized_averages();
     }

     dat_printf("\n");
     output_normalized_averages();
     destroy_fft_data();

     fftw_free(arr);
     fftw_free(work);

     return 0;
}

#define MAX2(a,b) ((a)>(b) ? (a) : (b))

int get_rwork_size(int n[2])
/* return the size of the work array, given n[2]. */
{
     int w_size;

     w_size = 2*maxn(2,n) + 8; /* (PDA requires the most work space) */

#ifdef HAVE_LIBSCS
     w_size = MAX2(w_size, n[0]*n[1] + (15+n[1]) + 2*(15+n[0]));
#endif

#ifdef HAVE_LIBCOMPLIB_SGIMATH
     w_size = MAX2(w_size, (15+n[1]) + 2*(15+n[0]));
#endif

     return w_size;
}

