#include "block2.h"

#include "block_io.h"
#include "script.h"
#include "utility.h"

static int nstore;
static float *store;
static float *plane_data;
static float *work;

static Bool have_output;

static int ndim;
static int ndim_process;
static int ndim_rest;

static int dim_process[MAX_NDIM];
static int dim_rest[MAX_NDIM];
static int inverse_dim_process[MAX_NDIM];
static Bool dim_processed[MAX_NDIM];

static int nblocks_per_input;
static int nblocks_per_output;

static int chunk_blocks[MAX_NDIM];
static int nblock_points[MAX_NDIM];
static int block_point[MAX_NDIM];

static int nplanes_to_do;
static int total_nblocks_in;

static int nscripts;
static Script *scripts;

static int nchunks;
static int nslices;
static int begin_chunk;
static int end_chunk;
static int step_chunk;

static int size_of_block;
static int disk_block;

static int npts_proc_block;
static int npts_rest_block;
static int npts_slice;
static int npoints_plane;

static int *block_size;
static int *npoints_in;

static int npoints_out[MAX_NDIM];
static int nblocks_in[MAX_NDIM];
static int nblocks_out[MAX_NDIM];
static int base_point[MAX_NDIM];
static int plane_point[MAX_NDIM];

static int cum_points_proc_in[MAX_NDIM];
static int cum_points_proc_out[MAX_NDIM];
static int cum_blocks_in[MAX_NDIM];
static int cum_blocks_out[MAX_NDIM];
static int cum_blocks_proc_in[MAX_NDIM];
static int cum_blocks_proc_out[MAX_NDIM];
static int cum_blocks_rest[MAX_NDIM];
static int cum_block_size[MAX_NDIM];
static int cum_block_size_proc[MAX_NDIM];
static int cum_block_size_rest[MAX_NDIM];

static int array[MAX_NDIM];

static Block_IO block_io_in;
static Block_IO block_io_out;

static void init_arrays()
{
    int i, j, k, m, n;

    for (i = 0; i < ndim; i++)
	dim_processed[i] = FALSE;

    for (i = 0; i < scripts[0].ndim; i++)
    {
	j = scripts[0].dims[i];
	dim_processed[j] = TRUE;
    }

    ndim_process = ndim_rest = 0;
    for (i = 0; i < ndim; i++)
    {
	if (dim_processed[i])
	{
	    dim_process[ndim_process] = i;
	    inverse_dim_process[i] = ndim_process;
	    ndim_process++;
	}
	else
	{
	    dim_rest[ndim_rest] = i;
	    ndim_rest++;
	}
    }

    COPY_VECTOR(npoints_out, npoints_in, ndim);

    npoints_plane = 1;

    for (n = 0; n < nscripts; n++)
    {
	for (j = 0; j < scripts[n].ndim; j++)
	{
	    k = scripts[n].dims[j];
	    npoints_plane = MAX(npoints_plane, scripts[n].npts_max);
	    npoints_out[k] = scripts[n].npts[j];
	}
    }

    BLOCKS(nblocks_in, npoints_in, block_size, ndim);
    BLOCKS(nblocks_out, npoints_out, block_size, ndim);

    CUMULATIVE(cum_blocks_in, nblocks_in, total_nblocks_in, ndim);
    CUMULATIVE(cum_blocks_out, nblocks_out, n, ndim);
    CUMULATIVE(cum_block_size, block_size, size_of_block, ndim);

    nblocks_per_input = 1;
    nblocks_per_output = 1;
    npts_proc_block = 1;
    for (i = 0; i < ndim_process; i++)
    {
	j = dim_process[i];

	cum_blocks_proc_in[i] = nblocks_per_input;
	cum_blocks_proc_out[i] = nblocks_per_output;
	cum_block_size_proc[i] = npts_proc_block;

	nblocks_per_input *= nblocks_in[j];
	nblocks_per_output *= nblocks_out[j];
	npts_proc_block *= block_size[j];
    }

    npts_rest_block = 1;
    for (i = 0; i < ndim_rest; i++)
    {
	j = dim_rest[i];

	cum_block_size_rest[i] = npts_rest_block;
	npts_rest_block *= block_size[j];
    }

    m = n = 1;
    for (i = 0; i < scripts[0].ndim; i++)
    {
	j = scripts[0].dims[i];
	k = inverse_dim_process[j];

	cum_points_proc_in[k] = m;
	cum_points_proc_out[k] = n;

	m *= npoints_in[j];
	n *= npoints_out[j];
    }

    nchunks = 1;

    for (i = 0; i < ndim_rest; i++)
    {
	j = dim_rest[i];
	cum_blocks_rest[i] = nchunks;
	nchunks *= nblocks_in[j];
		/* Note: for these j, nblocks_in == nblocks_out */
    }

    if ((nblocks_per_output > nblocks_per_input) &&
				(block_io_in.file == block_io_out.file))
    {
	begin_chunk = nchunks - 1;  /* work from the end of the file */
	end_chunk = -1;
	step_chunk = -1;
    }
    else
    {
	begin_chunk = 0;  /* work from the beginning of the file */
	end_chunk = nchunks;
	step_chunk = 1;
    }

    n = 1 + (npoints_plane*npts_rest_block - 1)/nstore;
 
    for (nslices = n; nslices <= npts_rest_block; nslices++)
    	if ((npts_rest_block % nslices) == 0)
	    break;

    npts_slice = npts_rest_block / nslices;
}

static void init_chunk(int chunk)
{
    int j, k;

    ARRAY_OF_INDEX(array, chunk, cum_blocks_rest, ndim_rest);

    for (j = 0; j < ndim_rest; j++)
    {
	k = dim_rest[j];
	chunk_blocks[k] = array[j];

	if (array[j] == (nblocks_in[k]-1))  /* last block */
	    nblock_points[k] = 1 + (npoints_in[k]-1) % block_size[k];
	else
	    nblock_points[k] = block_size[k];
    }
}

static void init_block_in(int block)
{
    int j, k;

    ARRAY_OF_INDEX(array, block, cum_blocks_proc_in, ndim_process);

    for (j = 0; j < ndim_process; j++)
    {
	k = dim_process[j];
	chunk_blocks[k] = array[j];

	if (array[j] == (nblocks_in[k]-1))  /* last block */
	    nblock_points[k] = 1 + (npoints_in[k]-1) % block_size[k];
	else
	    nblock_points[k] = block_size[k];

	base_point[j] = block_size[k] * array[j];
    }

    INDEX_OF_ARRAY(disk_block, chunk_blocks, cum_blocks_in, ndim);
}

static Status disk_to_work(int block, String error_msg)
{
    sprintf(error_msg, "block %d: ", block);
    error_msg += strlen(error_msg);

    CHECK_STATUS(read_file_block(&block_io_in, disk_block, work, error_msg));

    return  OK;
}

static void work_to_store(int slice, int block)
{
    int i, j, k, m, n, base_store, block_pt, pt;
    Bool flag;

    nplanes_to_do = 0;
    base_store = 0;
    n = slice * npts_slice;

    for (i = 0; i < npts_slice; i++, n++)
    {
	ARRAY_OF_INDEX(array, n, cum_block_size_rest, ndim_rest);

	flag = FALSE;
	for (j = 0; j < ndim_rest; j++)
	{
	    k = dim_rest[j];

	    if (array[j] >= nblock_points[k])
	    {
		flag = TRUE;
		break;
	    }

	    block_point[k] = array[j];
	}

	if (flag)  continue;

	for (m = 0; m < npts_proc_block; m++)
	{
	    ARRAY_OF_INDEX(array, m, cum_block_size_proc, ndim_process);

	    flag = FALSE;
	    for (j = 0; j < ndim_process; j++)
	    {
		k = dim_process[j];

	    	if (array[j] >= nblock_points[k])
	    	{
		    flag = TRUE;
		    break;
	    	}

		block_point[k] = array[j];
		plane_point[j] = array[j] + base_point[j];
	    }

	    if (flag)  continue;

	    INDEX_OF_ARRAY(block_pt, block_point, cum_block_size, ndim);
	    INDEX_OF_ARRAY(pt, plane_point, cum_points_proc_in, ndim_process);

	    store[pt+base_store] = work[block_pt];
	}

	nplanes_to_do++;
	base_store += npoints_plane;
    }
}

static void init_block_out(int block)
{
    int j, k;

    ARRAY_OF_INDEX(array, block, cum_blocks_proc_out, ndim_process);

    for (j = 0; j < ndim_process; j++)
    {
	k = dim_process[j];
	chunk_blocks[k] = array[j];

	if (array[j] == (nblocks_out[k]-1))  /* last block */
	    nblock_points[k] = 1 + (npoints_out[k]-1) % block_size[k];
	else
	    nblock_points[k] = block_size[k];

	base_point[j] = block_size[k] * array[j];
    }

    INDEX_OF_ARRAY(disk_block, chunk_blocks, cum_blocks_out, ndim);
}

static Status work_from_disk(int block, String error_msg)
{
    sprintf(error_msg, "read block %d: ", block);
    error_msg += strlen(error_msg);

    CHECK_STATUS(read_file_block(&block_io_out, disk_block, work, error_msg));

    return  OK;
}

static Status work_to_disk(int block, String error_msg)
{
    sprintf(error_msg, "block %d: ", block);
    error_msg += strlen(error_msg);

    CHECK_STATUS(write_file_block(&block_io_out, disk_block, work, error_msg));

    return  OK;
}

static void store_to_work(int slice, int block)
{
    int i, j, k, m, n, base_store, block_pt, pt;
    Bool flag;

    base_store = 0;
    n = slice * npts_slice;

    for (i = 0; i < npts_slice; i++, n++)
    {
	ARRAY_OF_INDEX(array, n, cum_block_size_rest, ndim_rest);

	flag = FALSE;
	for (j = 0; j < ndim_rest; j++)
	{
	    k = dim_rest[j];

	    if (array[j] >= nblock_points[k])
	    {
		flag = TRUE;
		break;
	    }

	    block_point[k] = array[j];
	}

	if (flag)  continue;

	for (m = 0; m < npts_proc_block; m++)
	{
	    ARRAY_OF_INDEX(array, m, cum_block_size_proc, ndim_process);

	    flag = FALSE;
	    for (j = 0; j < ndim_process; j++)
	    {
		k = dim_process[j];

	    	if (array[j] >= nblock_points[k])
	    	{
		    flag = TRUE;
		    break;
	    	}

		block_point[k] = array[j];
		plane_point[j] = array[j] + base_point[j];
	    }

	    if (flag)  continue;

	    INDEX_OF_ARRAY(block_pt, block_point, cum_block_size, ndim);
	    INDEX_OF_ARRAY(pt, plane_point, cum_points_proc_out, ndim_process);

	    work[block_pt] = store[pt+base_store];
	}

	base_store += npoints_plane;
    }
}

static void init_plane(int plane)
{
    plane_data = store + plane * npoints_plane;
}

static void process_script(int n)
{
    int i, m;
    Command *c;

    m = scripts[n].ncommands;

    for (i = 0; i < m; i++)
    {
	c = scripts[n].commands + i;
	(*(c->do_process))(c->code, plane_data);
    }
}

static Status block_process2(String error_msg)
{
    int h, i, j, m, n, c;
    char *msg;

    c = 1;
    for (i = begin_chunk; i != end_chunk; i += step_chunk)
    {
	printf("\t... working on chunk %d of %d\n", c++, nchunks);
	FLUSH;

	init_chunk(i);

	for (h = 0; h < nslices; h++)
	{
	    sprintf(error_msg, "chunk %d, slice %d, ", i, h);
	    msg = error_msg + strlen(error_msg);

	    for (j = 0; j < nblocks_per_input; j++)
	    {
		init_block_in(j);

		CHECK_STATUS(disk_to_work(j, msg));

		work_to_store(h, j);
	    }

	    for (n = 0; n < nscripts; n++)
	    {
		for (m = 0; m < nplanes_to_do; m++)
		{
		    init_plane(m);
		    process_script(n);
		}
	    }

	    if (have_output)
	    {
	    	if (block_io_in.file == block_io_out.file)
		    block_io_out.last_done = block_io_in.last_done;

	    	for (j = 0; j < nblocks_per_output; j++)
	    	{
		    init_block_out(j);

		    if ((h > 0) ||
				((block_io_in.file == block_io_out.file)
/*
				((file_in == file_out) && (nslices > 1)
*/
					&& (disk_block < total_nblocks_in)))
		    {
	    	    	CHECK_STATUS(work_from_disk(j, msg));
		    }

		    store_to_work(h, j);

	    	    CHECK_STATUS(work_to_disk(j, msg)); 
	    	}

		if (block_io_in.file == block_io_out.file)
		    block_io_in.last_done = block_io_out.last_done;
	    }
	}
    }

    return  OK;
}

Status process_blocks2(Size_info *size_info, Store_info *store_info,
			File_info *file_info, int n,
			Script *s, String error_msg)
{
    int i, j;

    nscripts = n;
    scripts = s;

    ndim = size_info->ndim;
    block_size = size_info->block_size;
    npoints_in = size_info->npoints;

    store = store_info->store;
    nstore = store_info->nstore;
    work = store_info->work;

    have_output = file_info->have_output;

    if (!(file_info->blocked))
	RETURN_ERROR_MSG("must have blocked data in process_blocks2()");

    for (i = 1; i < nscripts; i++)
    {
	if (scripts[i].ndim != scripts[0].ndim)
	    RETURN_ERROR_MSG("must have same ndim for all scripts in process_blocks2"); 

	for (j = 0; j < scripts[0].ndim; j++)
	    if (scripts[i].dims[j] != scripts[0].dims[j])
	    	RETURN_ERROR_MSG("must have same dims in same order for all scripts in process_blocks2"); 
    }

    block_io_in.file = file_info->file_in;
    block_io_out.file = file_info->file_out;

    init_arrays();

    block_io_in.name = file_info->input_file;
    block_io_in.swapped = file_info->swapped;
    block_io_in.integer = file_info->integer;
    block_io_in.deflated = FALSE;
    block_io_in.header = file_info->header;
    block_io_in.dir_size = 0;
    block_io_in.directory = (int *) NULL;
    block_io_in.block_size = size_of_block;
    block_io_in.byte_size = file_info->byte_size;

    block_io_out.name = file_info->output_file;
    block_io_out.block_size = size_of_block;
    block_io_out.deflated = FALSE;

/* following required because sometimes block_io_out used for input */
    block_io_out.swapped = determine_swapped();
    block_io_out.integer = FALSE;
    block_io_out.header = 0;
    block_io_out.byte_size = BYTES_PER_WORD;

    CHECK_STATUS(init_block_read(&block_io_in, error_msg));
    CHECK_STATUS(init_block_write(&block_io_out, error_msg));

    CHECK_STATUS(block_process2(error_msg));

    if (have_output)
    {
    	COPY_VECTOR(npoints_in, npoints_out, ndim);

	file_info->input_file = file_info->output_file;
	file_info->swapped = determine_swapped();
	file_info->integer = FALSE;
	file_info->blocked = TRUE;
	file_info->header = 0;
    }

    return  OK;
}
