/*  granulate.c

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>

#define BUFSIZE 1024
#define RADIX 26
#define BASE_SYMBOL 'a'
	
void makename(int *, char *, int, char *);
void decrement(int *, int);
int calculate_digits(int, int); 			

int main (int argc, char **argv) {
	struct stat in_stat;
	int in_fd, out_fd, bytes_read;
	int *counter, n_digits, i, seg_ptr, prefix_size; 
	int n_segs, mod_segs, in_size, seg_size;
	char buf[BUFSIZE], *name; 

	if (argc != 4) {
		fprintf(stderr, "Usage: granulate seg_size infile outfile_prefix\n"); 
		exit(1);
	}

	/* This is the maximum size for our output files. */
	seg_size = atoi(argv[1]);
	if (seg_size < 1) {
		fprintf(stderr, "seg_size must be >= 1\n");
		exit(1);
	}

	/* Get stats on input file.  We need to know the size. */
	if	(stat(argv[2], &in_stat) == -1) {
		fprintf(stderr, "couldn't stat() %s\n", argv[2]);
		exit(1);	
	}

	/* A prefix for output filenames is specified on the command line. */
	prefix_size = strlen(argv[3]);

	/* in_size holds the original size of the input file */
	in_size = in_stat.st_size;

	/* the number of digits needed for uniqueness of output filenames */ 
	n_digits = calculate_digits(in_size, seg_size);
	
	/* Counter is used to generate reverse sequential filenames. */
	if ((counter = (int *) malloc(n_digits * sizeof(int))) == NULL) {
		fprintf(stderr, "malloc() failed for counter\n");
		exit(1);
	}
	
	/* Initialize counter to max value.  We'll count down. */ 
	for (i = 0; i < n_digits; i++) counter[i] = RADIX - 1;

	/* Name is a string buffer for assembling filenames. */
	if ((name = (char *) malloc(prefix_size + n_digits + 1)) == NULL) {
		fprintf(stderr, "malloc() failed for name\n");
		exit(1);
	}

	/* Open the input file. */
	if ((in_fd = open(argv[2], O_RDWR)) == -1) {
		fprintf(stderr, "open %s failed.\n", argv[1]);
		exit(1);
	}
	
	if (seg_size > in_size) {
		fprintf(stderr, "error: seg_size greater than infile size\n");
		exit(1);
	}

	/* set the pointer to the end of infile */
	seg_ptr = in_size; 

	/* *********************************************************** */
	/* In this loop, we copy segments of seg_size bytes from the   */
	/* end of the input file to (hopefully) uniquely named output  */
	/* files.  A new output file is created on each pass, and they */
	/* are named in reverse-lexicographical order.  Each time we   */
	/* finish writing to the output file, we truncate the input    */
	/* file, reducing its size by seg_size.  Maximum disk usage    */
	/* occurs just before truncating.  This is the size of the     */
	/* input file plus seg_size.  Making seg_size very small is    */
	/* not beneficial because filesystem overhead takes over.      */ 
	/* *********************************************************** */
	do {
		seg_ptr -= seg_size; 

		/* trap the pointer when we get near the end (beginning, actually) */
		if (seg_ptr < 0) seg_ptr = 0;

		if (lseek(in_fd, seg_ptr, SEEK_SET) == -1) {
			fprintf(stderr, "lseek() failed\n");  
			exit(1);
		}

		/* after this call, name will be a filename, i.e. "zzxc" */
		makename(counter, name, n_digits, argv[3]);
		
		/* after this call, name will be decremented, i.e. "zzxb" */
		decrement(counter, n_digits);

		/* Open the output file */
		if ((out_fd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) == -1) {
			fprintf(stderr, "open %s failed.\n", argv[3]);
			exit(1);
		}
	
		/* Copy from sought position to the end, to the output file. */
		while((bytes_read = read(in_fd, buf, BUFSIZE)) != 0) {
			if (write (out_fd, buf, bytes_read) != bytes_read) {
				fprintf(stderr, "couldn't write to output file\n");
				exit(1);
			}
		}
		close(out_fd);	

		/* Truncate the input file at the point where we copied from. */
		if ((ftruncate(in_fd, seg_ptr)) == -1) {
			perror("ftruncate(): ");
			exit(1);
		}
	} while (seg_ptr != 0);

	close(in_fd);	

	/* The input file has been truncated to nothing.  Now we delete. */
	if (remove(argv[2]) == -1) {
		fprintf(stderr, "remove() failed\n");
		exit(1);
	}
	return 0;
}	

void makename(int *counter, char *name, int n_digits, char *prefix) {
	int i, j, prefix_size;
	prefix_size = strlen(prefix);
	
	/* Yes, I have heard of strncpy. */
	for (i = 0; i < prefix_size; i++) {
		name[i] = prefix[i];
	}
	/* At this point, i is the index of the first char after the prefix */

	/* BASE_SYMBOL serves as an offset into the ASCII table */
	for (j = 0; j < n_digits; j++) {
		name[i + j] = (char) counter[j] + BASE_SYMBOL;
	}
	name[i + j]  = '\0';
}
	
void decrement(int *counter, int n_digits) {
	/* This function implements a somewhat generalized down-counter */
	/* Each digit is stored as an int, and the radix is arbitrary;  */
	/* The least siginificant digit is stored in the element with   */
	/* the maximum index.  There is no profound reason for this.    */

	int i, carry = 0;
 	i = n_digits - 1;

	/* This is the actual decrement operation */
	counter[i]--;

	/* Now we just propagate the carry, if any */
	for (i = n_digits - 1; i >= 0; i--) {
		counter[i] -= carry;
		if (counter[i] < 0) {
			counter[i] += RADIX;
			carry = 1;	
		}
		else carry = 0;
	}
}

int calculate_digits(int in_size, int seg_size) {			
	/* ********************************************************* */
	/* n_segs is the number of output files we will generate     */
	/* We need enough digits so that they can be uniquely named. */
	/* We calculate the base-RADIX log of n_segs by repeatedly   */
	/* dividing by RADIX until the quotient becomes zero.        */
	/* ********************************************************* */

	int n_segs, n_digits = 0;
	n_segs = (in_size / seg_size) + 1;
	do {
		n_segs /= RADIX;
		n_digits++;
	} while (n_segs > 0);
	return n_digits;
}
