/* ----------------------------------------------------------------------
 * Measure codon deviation of ORFs
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "0.1"
#define PROGNAME "gp_cdndev"

char *progname ;

typedef struct {
	FILE *in ;
	FILE *out ; 
	FILE *codon_f ; 
	FILE *ctable_f ;
	int ctable[64] ;

	double cusage_r[64] ; /* holds codon frequencies of the reference set */

	double cusage_i[64] ; /* codon frequencies of the input set */
	double aa_freq_i[128] ; /* aa frequencies of the input set */

	int aa_num_i ;

	int width ;

	int show_orfs ;
	int show_total ;
} opt_s ;

int aa_freqs(int ctable[64], double cusage[64], double aa_freq[128]) ;
double calculate_bias(double cur[64], double cui[64], double aai[128], int ct[64]) ;
double sequence_get_bias(sekw *s, opt_s *o) ;

/*
 *
 */

int main(int argc, char *argv[])
{
	extern int optind ;
	/* extern char *optarg ;*/
	opt_s op ;
	sekw *inseq ;
	double bias ;
	int c;

	progname = argv[0] ;

	op.width = 70 ;
	op.ctable_f = NULL ;
	op.aa_num_i = 0 ;
	op.show_total = FALSE ;
	op.show_orfs = TRUE ;

	while ((c = gp_getopt(argc, argv, "otbc:Hqdvh")) != EOF)
		switch(c) {
		case 'b':
			op.show_orfs = TRUE ;
			op.show_total = TRUE ;
			break ;
		case 't':
			op.show_orfs = FALSE ;
			op.show_total = TRUE ;
			break ;
		case 'o':
			op.show_orfs = TRUE ;
			op.show_total = FALSE ;
			break ;
		case 'c':
			op.ctable_f = gp_file_open(optarg, "r") ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr, "%s version %s\n", progname, VERSION) ;
			exit(EXIT_SUCCESS) ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn("Running in debug mode") ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			gp_error("Type '%s -h' for help", progname) ;
			break;
		}

	/* one necessary argument: file containing codon usage */
	if(optind >= argc) {
		gp_error("Sorry, you have to specify the file with the codon usage") ;
	} else op.codon_f = gp_file_open(argv[optind], "r") ;

	optind++ ;

	/* open the file pointer to read the sequences 
	 * from: standard input or a file provided? */
	if(optind >= argc) op.in = stdin ;
	else op.in = gp_file_open(argv[optind],"r") ;

	/* opening the file pointer to write the output: 
	 * standard output or file provided? */
	optind++ ;

	if(optind >= argc) op.out = stdout ;
	else op.out = gp_file_open(argv[optind],"wb") ;

	/* load codon usage of the reference set from the provided file */
	gp_codon_load_usage(op.codon_f, op.cusage_r) ;

	/* we need to adjust the codes: codon usage tables use %, we need proportions */
	for(c = 0 ; c < 64 ; c++) op.cusage_r[c] /= 100 ;
	/* clear the codon usage table for the input set */
	for(c = 0 ; c < 64 ; c++) op.cusage_i[c] = 0.0 ;

	/* loading genetic code tables for input and output sequences */
	gp_codon_load_code_standard(op.ctable) ;

	if(op.ctable_f) gp_codon_load_code(op.ctable_f, op.ctable) ;

	if(op.show_orfs) fprintf(stdout, "#bias\tsequence name\n") ;
	while( (inseq = gp_seq_read(op.in))) {
		bias = sequence_get_bias(inseq, &op) ;
		if(op.show_orfs) fprintf(stdout, "%.3f\t%s\n", bias, inseq->name) ;
		gp_seq_free(inseq) ;
	}

	/* change absolute numbers into frequencies */
	for(c = 0 ; c < 64 ; c++) op.cusage_i[c] /= op.aa_num_i ;
	if(debug) gp_warn("Total aa: %i", op.aa_num_i) ;

	aa_freqs(op.ctable, op.cusage_i, op.aa_freq_i) ;
	bias = calculate_bias(op.cusage_r, op.cusage_i, op.aa_freq_i, op.ctable) ;
	if(op.show_total) fprintf(stdout, "#Total bias:\n%.3f\n", bias) ;

	if(html) gp_warn_print_all(op.out) ;
	/* closing streams */
	if(op.ctable_f) fclose(op.ctable_f) ;
	fclose(op.out) ;
	fclose(op.in) ;
	return EXIT_SUCCESS ;
}


/* given a reference set and an input set, calculate the codon bias */
double calculate_bias(
	double cusage_r[64],
	double cusage_i[64],
	double aa_freq_i[128],
	int ctable[64]) {

	double res = 0.0, part[128] ;
	int i, aa ;

	/* zero the partial sums */
	for(i = 0 ; i < 128 ; i++) part[i] = 0.0 ;

	for(i = 0 ; i < 64 ; i++) {
		aa = ctable[i] ;
		part[aa] += fabs(cusage_r[i] - cusage_i[i]) ;
	}
		
	for(i = 0 ; i < 128 ; i++) res += part[i] * aa_freq_i[i] ;

	return res ;
}
	

/* given a sequence, (1) calculate the codon and aa frequencies (2) update the
 * codon and aa frequencies for the whole input set (3) calculate the predicted
 * bias for the sequence */
double sequence_get_bias(sekw *s, opt_s *o) {
	double res, cusage_l[64], aa_freq_l[128] ;
	int i, aa_num ;

	aa_num = gp_codon_get_usage(s, cusage_l) ; /* get codon usage of the sequence */
	o->aa_num_i += aa_num ;

	/* updating global codon frequencies */
	for(i = 0 ; i < 64 ; i++) 
		o->cusage_i[i] += aa_num * cusage_l[i] ; /* add absolute numbers of new sequence */

	aa_freqs(o->ctable, cusage_l, aa_freq_l) ; /* get local aa frequencies */
	res = calculate_bias(o->cusage_r, cusage_l, aa_freq_l, o->ctable) ;

	return res ;
}


/* calculate the aa frequencies when given the codon frequencies */
int aa_freqs(int *ctable, double *cusage, double *aa_freq) {
	int aa, i ;

	for(i = 0 ; i < 128 ; i++) aa_freq[i] = 0.0 ; /* zero the aa_freq table */

	for(i = 0 ; i < 64 ; i++) {
		aa = ctable[i] ;
		aa_freq[aa] += cusage[i] ;
	}

	return EXIT_SUCCESS ;
}


/* Standard mesage */
void Help()
{
	fprintf(stdout,""
	"\n"
	"%s, v. %s- Measure codon deviation of ORFs\n"
	"\n"
	"  Usage:\n"
	"     %s [options] <codon usage file>  [ input file ] [ output file ]\n"
	"\n"
	"  Options:\n"
	"     -o        : show bias for all ORFs read (default)\n"
	"     -t        : show bias for the total set of read sequences\n"
	"     -b        : both of the above\n"
	"     -c <file> : use alternative gene code table\n"
	"     -H        : output adapted to be used in CGI/HTML\n"
	"     -H        : output adapted to be used in CGI/HTML\n"
	"     -v        : print version information & exit\n"
	"     -h        : print this help screen & exit\n"
	"     -q        : quiet, suppress error messages\n\n",
	PROGNAME,VERSION,progname);
	exit(EXIT_SUCCESS);
}


