/* ----------------------------------------------------------------------
 * Adjust codon usagage of an ORF
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "0.2"
#define PROGNAME "gp_adjust"

char *progname ;

typedef struct {
	FILE *in ;
	FILE *out ; 
	FILE *codon_f ; 
	FILE *intable_f ;
	FILE *outable_f ;
	int intable[64] ;
	int outable[64] ;

	/* stores which codon has the highest freq for a given aa */
	int aa_highest_index[128] ;
	/* stores highest freq values for the given aa's */
	double aa_highest_value[128] ;

	double cusage[64] ;
	int width ;
} opt_s ;


int usage_read(opt_s *o) ;
sekw* usage_adjust(sekw *s, opt_s *o) ;

/*
 *
 */

int main(int argc, char *argv[])
{
	extern int optind ;
	/* extern char *optarg ;*/
	opt_s options ;
	sekw *inseq, *outseq ;
	int c;

	progname = argv[0] ;

	options.width = 70 ;
	options.intable_f = NULL ;
	options.outable_f = NULL ;

	while ((c = gp_getopt(argc, argv, "u:s:Hqdvh")) != EOF)
		switch(c) {
		case 'u':
			options.intable_f = gp_file_open(optarg, "r") ;
			break ;
		case 's':
			options.outable_f = gp_file_open(optarg, "r") ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr, "%s version %s\n", progname, VERSION) ;
			exit(EXIT_SUCCESS) ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn("Running in debug mode") ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			gp_error("Type '%s -h' for help", progname) ;
			break;
		}

	/* one necessary argument: file containing codon usage */
	if(optind >= argc) {
		gp_error("Sorry, you have to specify the file with the codon usage") ;
	} else options.codon_f = gp_file_open(argv[optind], "r") ;

	optind++ ;

/* open the file pointer to read the sequences 
 * from: standard input or a file provided? */
	if(optind >= argc) options.in = stdin ;
	else options.in = gp_file_open(argv[optind],"r") ;

/* opening the file pointer to write the output: 
 * standard output or file provided? */
	optind++ ;

	if(optind >= argc) options.out = stdout ;
	else options.out = gp_file_open(argv[optind],"wb") ;

	/* loading genetic code tables for input and output sequences */
	gp_codon_load_code_standard(options.intable) ;
	gp_codon_load_code_standard(options.outable) ;

	if(options.intable_f) gp_codon_load_code(options.intable_f, options.intable) ;
	if(options.outable_f) gp_codon_load_code(options.outable_f, options.outable) ;

	usage_read(&options) ;

	while( (inseq = gp_seq_read(options.in))) {
		outseq = usage_adjust(inseq, &options) ;
		gp_seq_print_fasta(options.out, outseq, 70) ;
		gp_seq_free(inseq) ;
	}

	if(html) gp_warn_print_all(options.out) ;
	if(options.intable_f) fclose(options.intable_f) ;
	if(options.outable_f) fclose(options.outable_f) ;
	fclose(options.out) ;
	fclose(options.in) ;
	return EXIT_SUCCESS ;
}


/* translate a DNA sequence, and convert the amino acids into the frequent codons */
sekw* usage_adjust(sekw *s, opt_s *o) {
	int i, index, aa, aa_num ;
	sekw *res ;
	char *pos, in_cdn[4] = "\0\0\0\0", out_cdn[4] = "\0\0\0\0" ;

	/* duplicate sequence */
	res = gp_seq_copy_frag(s, 0, 0) ;
	*res->sequ = '\0' ;

	aa_num = (s->leng / 3) ;
	if(debug) gp_warn("%i amino acids", aa_num) ;

	for(i = 0, pos = s->sequ ; i < aa_num ; i++, pos += 3) {

		strncpy(in_cdn, pos, 3) ;

		/* get the index for the current codon */
		index = gp_codon_get_index(in_cdn) ;

		/* get the aa encoded by the current codon */
		aa = o->outable[index] ;

		/* get the index of the high freq codon for this aa */
		index = o->aa_highest_index[aa] ;

		if(index < 0) {
			/* no change if there is no frequency recorded */
			/* if this codon is different... well, then we have a problem */
			strcat(res->sequ, in_cdn) ;
		} else {
			/* get the most frequent codon for the given amino acid */
			gp_codon_get_codon(out_cdn, index) ;
			strcat(res->sequ, out_cdn) ;
		}
		
	}

	return res ;
}


/* reads the codon table */
int usage_read(opt_s *o) {
	double usage ;
	char codon[5] ;
	int aa, i ;

	/* zeroing the 'high score' table */
	for(i = 0 ; i < 128 ; i++) {
		o->aa_highest_value[i] = 0.0 ;
		o->aa_highest_index[i] = -1 ;
	}

	/* loading codon usage table from file */
	gp_codon_load_usage(o->codon_f, o->cusage) ;
	
	for(i = 0 ; i < 64 ; i++) {

		/* what AA is encoded by this codon - use the 'input' table! */
		aa = o->intable[i] ;
		usage = o->cusage[i] ;

		/* storing the highest freq for an aminoacid */
		if(o->aa_highest_value[aa] < usage || o->aa_highest_value[aa] < 0.00001) {
			o->aa_highest_value[aa] = usage ;
			o->aa_highest_index[aa] = i ;
		}
	}

	for(i = 0 ; i < 128 ; i++) {
		if(o->aa_highest_index[i] < 0) continue ;

		if(debug) {
			gp_codon_get_codon(codon, o->aa_highest_index[i]) ;
			gp_warn("AA: %c highest freq: %f codon: %s", i, o->aa_highest_value[i], codon) ;
		}

	}

	if(debug) gp_warn("loading done") ;
	return EXIT_SUCCESS ;
}


/* Standard mesage */
void Help()
{
	fprintf(stdout,""
	"\n"
	"%s, v. %s- adjust codon usage of a gene\n"
	"\n"
	"  Usage:\n"
	"     %s [options] <codon usage file>  [ input file ] [ output file ]\n"
	"\n"
	"  Options:\n"
	"     -u <file> : codon table for the codon usage table and output sequences\n"
	"     -s <file> : codon table for the input sequences\n"
	"     -H        : output adapted to be used in CGI/HTML\n"
	"     -H        : output adapted to be used in CGI/HTML\n"
	"     -v        : print version information & exit\n"
	"     -h        : print this help screen & exit\n"
	"     -q        : quiet, suppress error messages\n\n",
	PROGNAME,VERSION,progname);
	exit(EXIT_SUCCESS);
}


