/* ----------------------------------------------------------------------
 * cusage -- determining the codon usage
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#include "genpak.h"
#include "gp_getopt.h"

#ifdef NLS
	#include <libintl.h>
	#define _(string) gettext (string)
#else
	#define _(string) (string)
	#define bindtextdomain(string)
	#define textdomain(string)
#endif



#define VERSION "0.3"
#define PROGNAME "gp_cusage"

char *progname ;

/* table to convert amino acid codes from one letter codes to 3 ltrs */
char one2three[128][4] ;

typedef struct {
	double tbl[4][4][4] ;
} codonu ;

typedef struct {
	FILE *out ;
	FILE *in ;
	FILE *opt_ctab ; /* optional codon table */
	int onlyprint ;
	int tableformat ;
	codonu cod ;     /* results of the codon table analysis */
} opt_s ;

int PrintCodonUsageTable(FILE* out, codonu *intable, codont *cdtable, int type) ;
codont* ReadCodonTable(FILE *in, codont* outtable) ;
long CountCodons(sekw *inseq, codont* intable, codonu *outtable) ;
int PrintCodonTable(FILE* out, codont *intable,int type) ;
void ZeroCodonTable(codonu *table) ;



/*
 *
 */

int main(int argc, char *argv[])
{
	extern int optind ;
	extern char *optarg ;
	FILE *in, *out, *codet ;
	sekw *inseq ;
	codont *tabela ;
	int nseq = 0 ;

	opt_s opt ;

	int ncodons = 0;
	int i,j,k,c;
	char message[100] ;
	int errflg = 0 ;

	opt.in = NULL ;
	opt.out = NULL ;
	opt.opt_ctab = NULL ;
	opt.onlyprint = FALSE ;
	opt.tableformat = 1 ;

	textdomain("genpak") ;

	/* Load the translation to three letter AA code */
	gp_codon_init_conversion() ;

	/* Load the standard code table */
	tabela = gp_codon_table_load_std() ;

	/* Initialize the structure holding all warnings */
	allwarnings = NULL ;
	progname = argv[0] ;

	while ((c = gp_getopt(argc, argv, "ounc:Hqvdh")) != EOF)
		switch(c) {
		case 'u':
			opt.tableformat = 0 ;
			break ;
		case 'n':
			opt.tableformat = 1 ;
			break ;
		case 'o':
			opt.onlyprint = TRUE ;
			break ;
		case 'c':
			opt.opt_ctab = gp_file_open(optarg,"r") ;
			ReadCodonTable(opt.opt_ctab,tabela) ;
			if(debug) gp_warn(_("Using optional codon table %s"),optarg) ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr,"%s version %s\n",progname,VERSION) ;
			return EXIT_SUCCESS ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn(_("Running in debug mode")) ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			gp_error(_("Type '%s -h' for help"),progname) ;
			break;
		}

	/* open the file pointer to read the sequences 
 	 * from: standard input or a file provided? 
 	 * We must only do it if we _need_ any input */

	if(opt.onlyprint == FALSE) {
		if(optind >= argc)  in = stdin ; 
		else in = gp_file_open(argv[optind], "r") ;
		optind++ ;
	}

/* 
 * opening the file pointer to write the output: 
 * standard output or file provided? 
 */

	if(optind >= argc) out = stdout ;
	else in = gp_file_open(argv[optind],"wb") ;

	if(opt.onlyprint == TRUE) {
		PrintCodonTable(out,tabela,opt.tableformat) ;
		return EXIT_SUCCESS ;
	}

	ZeroCodonTable(&opt.cod) ;

	while( (inseq = gp_seq_read_fragment(in,0,0,0)) != NULL) {
		nseq++ ;
		ncodons = ncodons + CountCodons(inseq,tabela,&opt.cod) ;
		gp_seq_free(inseq) ;
	}

	for(i = 0;i<4;i++)
		for(j = 0;j<4;j++)
			for(k = 0;k<4;k++)
				opt.cod.tbl[i][j][k] = opt.cod.tbl[i][j][k]/ncodons * 100 ;


	if(debug) gp_warn(_("Total %i codons"),ncodons) ;
	if(nseq > 0) PrintCodonUsageTable(out,&opt.cod,tabela,opt.tableformat) ;
	
	if(html) gp_warn_print_all(out) ;
	fclose(out) ;
	fclose(in) ;
	return(EXIT_SUCCESS);
}


/* Count the codons of a sequence */
long CountCodons(sekw *inseq, codont* intable, codonu* outtable) {

	/* i is a counter for triplet acquirement, j denotes the current 
	 * position in the sequence. Aaread denotes the number of aa residues
	 * written in the sequence outseq */

	long dlugosc,i = 0,j = 0,aaread = 0 ;
	char t, *tmp;

	/* 
	 * The Conv matrix converts the nucleic acid letter to 
	 * apriopriate coordinate for the codont matrix 
	 */
	int Conv[128], coord[3], check = TRUE ;
	for(i = 0;i<128;i++) Conv[i] = 99 ;
	Conv['A'] = 0 ; Conv['C'] = 1; Conv['G'] = 2 ; Conv['T'] = 3 ; Conv['U'] = 3 ;

	dlugosc = strlen(inseq->sequ) ;

	/* first triplet: is it a valid start codon? */
	for(i = 0,j = 0;i<3;i++,j++) coord[i] = Conv[inseq->sequ[j]] ;

	if(gp_codon_isstart(inseq,0) == FALSE) {
		gp_warn(_("Sequence %s does not start with a start codon!"), inseq->name) ;
		return(0) ;
	}

	outtable->tbl[coord[0]][coord[1]][coord[2]]++ ;
	t = intable->tbl[coord[0]][coord[1]][coord[2]] ;

	while(t != '0') {

		aaread++ ;

		/* getting the next triplet. Watch out for the end of the sequence */
		for(i = 0;i<3;i++,j++) {
			if(j == dlugosc) check = FALSE ;
			else coord[i] = Conv[inseq->sequ[j]] ;
		}

		if(check == TRUE) {
			outtable->tbl[coord[0]][coord[1]][coord[2]]++ ;
			t = intable->tbl[coord[0]][coord[1]][coord[2]] ;
		}
		else t = '0' ;
	}
	
	if(check == FALSE) 
		gp_warn(_("Preliminary end of sequence %s! No stop codon found"), inseq->name) ;

	return(aaread) ;

}



void ZeroCodonTable(codonu *table) {
	int i = 0,j = 0,k = 0 ;

	for(i = 0;i<4;i++)
		for(j = 0;j<4;j++)
			for(k = 0;k<4;k++)
				table->tbl[i][j][k] = 0.0 ;

}


/* 
 * Read the codon table from a file. The provided outtable will be directly
 * modified. Actually, making this procedure codont* makes no sense, but
 * it's because of backward compatibility.
 */

codont* ReadCodonTable(FILE *in, codont* outtable) {
	int i,j ;
	char t,coord[3] ;
	char bufor[BUFSIZ] ;
	int codons_read = 0, lines_read = 0, Conv[128] ;
	int check = TRUE ;


	/* 
	 * The Conv matrix converts the nucleic acid letter to 
	 * apriopriate coordinate for the codont matrix 
	 */

	for(i = 0;i<128;i++) Conv[i] = 99 ;
	Conv['A'] = 0 ; Conv['C'] = 1; Conv['G'] = 2 ; Conv['T'] = 3 ; Conv['U'] = 3 ;

	while(fgets(bufor,BUFSIZ,in) != NULL) {

		/* skipping comments and blank lines */
		lines_read++ ;
		if(bufor[0] == '#' || bufor [0] == '\n') continue ; 

		/* reading the codon coordinates */
		for(i = 0,j = 0;i<3;i++,j++) {
			/* skipping blanks */
			while(bufor[j] == ' ' || bufor[j] == '\t') j++ ;
			t = toupper(bufor[j]) ;

			/* checking if the bases are in "ATCGU" */
			if(Conv[t] > 3 || Conv[t] < 0) {
				check = FALSE ;
			} else {
				coord[i] = Conv[t] ;
			}
		}
		
		/* reading the corresponding amino acid letter */
		/* skipping blanks */
		while(bufor[j] == ' ' || bufor[j] == '\t') j++ ;
		t = toupper(bufor[j]) ;
		if(check == FALSE || ((char*) strchr("0GAVLIPCMFWSTYNQKRHDE",t) == NULL)) {
  		printf("t = %c\n",t) ;
			gp_warn(_("Problems reading codon table")) ;
		} else {
			outtable->tbl[coord[0]][coord[1]][coord[2]] = t ;
			codons_read++ ;
		}

	}

	if(debug) 
		fprintf(stderr,"%i lines read, %i codons read\n",
						lines_read, codons_read) ;

	return outtable ;
}


/*
 * Printing out the codon usage table. Currently supported formats:
 * 0 - each codon followed by the AA 1 letter code in a separate line
 * 1 - nice ASCII table
 */

int PrintCodonUsageTable(FILE* out, codonu *intable, codont *cdtable, int type) {

	int i, ii, j, jj, k, kk, order[4];
	char Conv[4] ;

	order[0] = 3 ; order[1] = 1 ; order[2] = 0 ; order[3] = 2 ; 
	Conv[0] = 'A' ; Conv[1] = 'C' ; Conv[2] = 'G' ; Conv[3] = 'U' ; 
	if(type == 0) {
	  /* standard .cdn format, just like the one seq2prot can read */
		fprintf(out,_("Codon table \n")) ;
		for(i = 0;i<4;i++)
			for(j = 0;j<4;j++)
				for(k = 0;k<4;k++) {
					fprintf(out,"%c %s %c%c%c %4.2f\n",
						cdtable->tbl[i][j][k],
						one2three[cdtable->tbl[i][j][k]],
						Conv[i], Conv[j], Conv[k], 
						intable->tbl[i][j][k]) ;
				}
	} else {
	
	if(!html){
	  /* a nicely formatted output */
		fprintf(out,_(

"\n\n"
"1st   |                             2nd position of codon\n"
"posi- |          U        |          C        |         A         |        G          |\n"
"tion  +-------------------+-------------------+-------------------+-------------------+\n"
"")) ;


		for(ii = 0;ii<4;ii++) {
			i = order[ii] ;
			for(kk = 0;kk<4;kk++) {
				
				k = order[kk] ;


				if(k == 1)fprintf(out, "  %c  ",Conv[i]) ;
				else    fprintf(out, "     ") ;

				for(jj = 0;jj<4;jj++) {

					j = order[jj] ;

					/* Print the code and frequency */
					if(cdtable->tbl[i][j][k] == '0') {
						fprintf(out, " | STOP %c%c%c  %6.2f ",
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k]) ;
					} else {
						fprintf(out, " |  %s %c%c%c  %6.2f ",
					 		one2three[cdtable->tbl[i][j][k]],
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k]) ;
					}

				}

			fprintf(out," |\n") ; /* end of line */
			}
		fprintf(out,
"    --+-------------------+-------------------+-------------------+-------------------+\n"
		"") ;
		}
	fprintf(out,"\n\n") ;
	} else {

		fprintf(out,_(
"<TABLE BORDER = 1>"
"<TR><TH ROWSPAN = 20 VALIGN = center>1st postion</TH>\n"
"<TH></TH><TH></TH><TH>2nd codon</TH>\n"
"<TR><TH></TH><TH>U</TH><TH>C</TH><TH>A</TH><TH>G</TH></TR>\n"
"")) ;


		for(ii = 0;ii<4;ii++) {
			i = order[ii] ;
			for(kk = 0;kk<4;kk++) {
				
				k = order[kk] ;


				if(kk == 0)fprintf(out, 
				   "<TR><TH BGCOLOR = yellow VALIGN = middle ROWSPAN = 4>%c</TH>",Conv[i]) ;
				else    fprintf(out, "<TR> ") ;

				for(jj = 0;jj<4;jj++) {

					j = order[jj] ;

					/* Print the code and frequency */
					if(cdtable->tbl[i][j][k] == '0') {
						fprintf(out, "<TD STYLE = \"color: red\">STOP </FONT> %c%c%c  %6.2f</TD> ",
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k]) ;
					} else {
						fprintf(out, "<TD> %s %c%c%c  %6.2f </TD>",
					 		one2three[cdtable->tbl[i][j][k]],
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k]) ;
					}

				}

			fprintf(out,"</TR>\n") ; /* end of line */
			}
		}
	fprintf(out,"</TABLE>\n") ;


	}

	}
	
	
	
	
	/* end of the else clause */
	return(EXIT_SUCCESS) ;
}


/*
 * Printing out the codon table. Currently supported formats:
 * 0 - each codon followed by the AA 1 letter code in a separate line
 * 1 - nice ASCII table
 */

int PrintCodonTable(FILE* out, codont *intable, int type) {

	int i,j,k ;
	char Conv[4] ;

	Conv[0] = 'A' ;
	Conv[1] = 'C' ;
	Conv[2] = 'G' ;
	Conv[3] = 'U' ;

	/* standard .cdn format, just like the one seq2prot can read */
	if(type == 0) {
		fprintf(out,"# Codon table \n") ;
		for(i = 0;i<4;i++)
			for(j = 0;j<4;j++)
				for(k = 0;k<4;k++) {
					fprintf(out,"%c%c%c %c\n",
						Conv[i], Conv[j], Conv[k], 
						intable->tbl[i][j][k]) ;
				}
	} else {
		fprintf(out,"\n\n") ;
		fprintf(out, 
		"                              2nd position of codon\n") ;
		fprintf(out, 
		"1st                A              C              G              U\n") ;
		fprintf(out, 
		"position ------------------------------------------------------------\n") ;
		fprintf(out, "of codon\n") ;


		for(i = 0;i<4;i++) {
			for(k = 0;k<4;k++) {

				if(k == 1)fprintf(out, "     %c ",Conv[i]) ;
				else    fprintf(out, "       ") ;

				for(j = 0;j<4;j++) {

					/* Print the code and amino acid / STOP */
					if(intable->tbl[i][j][k] == '0') {
						fprintf(out, "    %c%c%c   STOP ",
							Conv[i],Conv[j],Conv[k] ) ;
					} else {
						fprintf(out, "    %c%c%c  %c(%s)",
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k],
					 		one2three[intable->tbl[i][j][k]]) ;
					}

				}

			fprintf(out,"\n") ; /* end of line */
			}
		fprintf(out,"\n") ;
		}
	fprintf(out,"\n\n") ;
	} /* end of the else clause */
		
	return(EXIT_SUCCESS) ;
}




void Help()
{
printf("\n");
printf("%s, version %s - print out codon usage of sequence(s)",PROGNAME,VERSION);
printf("\n");
printf("  Usage:\n");
printf("     %s [options] [ input file ] [ output file ]\n",progname);
printf("\n");
printf("  Options:\n");
printf("     -c file    : read the optional codon usage table\n");
printf("     -o         : print only the codon table\n") ;
printf("     -n         : output is a *N*icely formated codon table \n");
printf("     -u         : output is an *U*gly list of codons\n");
printf("     -H         : run in HTML mode (see manual)\n");
printf("     -q         : run in quiet mode\n");
printf("     -v         : print version information & exit\n");
printf("     -h         : print this help screen & exit\n\n");
exit(0);
}


			
