/* ----------------------------------------------------------------------
 * seq2prot -- translating DNA sequence into protein sequence
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "0.3"
#define PROGNAME "gp_seq2prot"

/* table to convert amino acid codes from one letter codes to 3 ltrs */

codont* ReadCodonTable(FILE *in, codont* outtable) ;
int PrintCodonTable(FILE* out, codont *intable,int type) ;

/*
 *
 */

int main(int argc, char *argv[])
{
	extern int optind ;
	extern char *optarg ;
	int width = 70 ; /* width with which the sequence gets formatted */
	FILE *in, *out, *codet ;
	sekw *inseq, *outseq ;
	codont *tabela ;

	int check = TRUE ;
	int onlyprint = FALSE, tableformat = 1 ; 
	int optionaltable = FALSE ;
	int c;
	char message[100] ;
	int errflg = 0 ;
	int fascist = TRUE ; /* checking for start and stop codons? */

	/* initialize structure with all warnings in it */
	allwarnings = NULL ;

	/* Load the translation to three letter AA code */
	gp_codon_init_conversion() ;

	/* Load the standard code table */
	tabela = gp_codon_table_load_std() ;

	progname = argv[0] ;

	while ((c = gp_getopt(argc, argv, "lpc:vHhqd")) != EOF)
		switch(c) {
		case 'p':
			onlyprint = TRUE ;
			if(optind<argc) {
				if(strcasecmp(argv[optind],"ugly") == 0) {
					optind++ ;
					tableformat = 0 ;
				} else if(strcasecmp(argv[optind],"nice") == 0) {
					optind++ ;
					tableformat = 1 ;
				}
			}
			break ;
		case 'l':
			fascist = FALSE ;
			if(debug) gp_warn("Will not care about proper start/stop codons") ;
			break ;
		case 'c':
			optionaltable = TRUE ;
			codet = gp_file_open(optarg,"r") ;
			ReadCodonTable(codet,tabela) ;
			gp_warn("Loaded optional code table from file %s",optarg) ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr,"%s version %s\n",progname,VERSION) ;
			exit(0) ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn("Running in debug mode") ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			errflg++ ;
			break;
		}

	if(errflg) {
			sprintf(message,"Type '%s -h' for help",progname) ;
			gp_error(message) ;
	}

	/* open the file pointer to read the sequences 
 	 * from: standard input or a file provided? 
 	 * We must only do it if we _need_ any input */

	if(onlyprint == FALSE) {
		if(optind >= argc) in = stdin ;
		else in = gp_file_open(argv[optind],"r") ;
	}


/* 
 * opening the file pointer to write the output: 
 * standard output or file provided? 
 */

	optind++ ;

	if(optind >= argc) out = stdout ;
	else out = gp_file_open(argv[optind],"wb") ;

	if(onlyprint == TRUE) {
		PrintCodonTable(out,tabela,tableformat) ;
		return(EXIT_SUCCESS) ;
	}

	while((inseq = gp_seq_read_fragment(in,0,0,0)) != NULL) {
		outseq = gp_seq_dna_to_protein(inseq,tabela,1,fascist) ;
		if(outseq != NULL) gp_seq_print_fasta(out,outseq,width) ;
		else gp_warn("Translation of %s did not succeed",inseq->name) ;
		free(inseq) ;
		free(outseq) ;
	}

	if(html) gp_warn_print_all(out) ;
	
	fclose(out) ;
	fclose(in) ;
	return(0);
}




/* 
 * Read the codon table from a file. The provided outtable will be directly
 * modified. Actually, making this procedure codont* makes no sense, but
 * it's because of backward compatibility.
 */

codont* ReadCodonTable(FILE *in, codont* outtable) {
	int i,j ;
	char t,coord[3] ;
	char bufor[BUFSIZ] ;
	int codons_read = 0, lines_read = 0, Conv[128] ;
	int check = TRUE ;


	/* 
	 * The Conv matrix converts the nucleic acid letter to 
	 * apriopriate coordinate for the codont matrix 
	 */

	for(i = 0;i<128;i++) Conv[i] = 99 ;
	Conv['A'] = 0 ; Conv['C'] = 1; Conv['G'] = 2 ; Conv['T'] = 3 ; Conv['U'] = 3 ;

	while(fgets(bufor,BUFSIZ,in) != NULL) {

		/* skipping comments and blank lines */
		lines_read++ ;
		if(bufor[0] == '#' || bufor [0] == '\n') continue ; 

		/* reading the codon coordinates */
		for(i = 0,j = 0;i<3;i++,j++) {
			/* skipping blanks */
			while(bufor[j] == ' ' || bufor[j] == '\t') j++ ;
			t = toupper(bufor[j]) ;

			/* checking if the bases are in "ATCGU" */
			if(Conv[t] > 3 || Conv[t] < 0) {
				check = FALSE ;
			} else {
				coord[i] = Conv[t] ;
			}
		}
		
		/* reading the corresponding amino acid letter */
		/* skipping blanks */
		while(bufor[j] == ' ' || bufor[j] == '\t') j++ ;
		t = toupper(bufor[j]) ;
		if(check == FALSE || ((char*) strchr("0GAVLIPCMFWSTYNQKRHDE",t) == NULL)) {
  		printf("t = %c\n",t) ;
			gp_warn("Problems reading codon table") ;
		} else {
			outtable->tbl[coord[0]][coord[1]][coord[2]] = t ;
			codons_read++ ;
		}

	}

	if(debug) 
		fprintf(stderr,"%i lines read, %i codons read\n",
						lines_read, codons_read) ;

	return outtable ;
}


/*
 * Printing out the codon table. Currently supported formats:
 * 0 - each codon followed by the AA 1 letter code in a separate line
 * 1 - nice ASCII table
 */

int PrintCodonTable(FILE* out, codont *intable, int type) {

	int i,j,k ;
	char Conv[4] ;

	Conv[0] = 'A' ;
	Conv[1] = 'C' ;
	Conv[2] = 'G' ;
	Conv[3] = 'U' ;

	/* standard .cdn format, just like the one seq2prot can read */
	if(type == 0) {
		fprintf(out,"# Codon table \n") ;
		for(i = 0;i<4;i++)
			for(j = 0;j<4;j++)
				for(k = 0;k<4;k++) {
					fprintf(out,"%c%c%c %c\n",
						Conv[i], Conv[j], Conv[k], 
						intable->tbl[i][j][k]) ;
				}
	} else {
		fprintf(out,"\n\n") ;
		fprintf(out, 
		"                              2nd position of codon\n") ;
		fprintf(out, 
		"1st                A              C              G              U\n") ;
		fprintf(out, 
		"position ------------------------------------------------------------\n") ;
		fprintf(out, "of codon\n") ;


		for(i = 0;i<4;i++) {
			for(k = 0;k<4;k++) {

				if(k == 1)fprintf(out, "     %c ",Conv[i]) ;
				else    fprintf(out, "       ") ;

				for(j = 0;j<4;j++) {

					/* Print the code and amino acid / STOP */
					if(intable->tbl[i][j][k] == '0') {
						fprintf(out, "    %c%c%c   STOP ",
							Conv[i],Conv[j],Conv[k] ) ;
					} else {
						fprintf(out, "    %c%c%c  %c(%s)",
							Conv[i],Conv[j],Conv[k],
							intable->tbl[i][j][k],
					 		one2three[intable->tbl[i][j][k]]) ;
					}

				}

			fprintf(out,"\n") ; /* end of line */
			}
		fprintf(out,"\n") ;
		}
	fprintf(out,"\n\n") ;
	} /* end of the else clause */

	return(EXIT_SUCCESS) ;
		
}





void Help()
{
printf("\n");
printf("%s %s - convert DNA sequence to protein sequence ",PROGNAME,VERSION);
printf("\n");
printf("  Usage:\n");
printf("     %s [options] [ input file ] [ output file ]\n",progname);
printf("\n");
printf("  Options:\n");
printf("     -c file    : read the optional codon usage table\n");
printf("     -l         : be liberal about stop / start codons\n");
printf("     -p [nice]  : will print out a formated codon table & exit(default)\n");
printf("     -p ugly    : will print out the codon table & exit\n");
printf("     -H         : run in HTML mode\n");
printf("     -q         : run in quiet mode\n");
printf("     -v         : print version information & exit\n");
printf("     -h         : print this help screen & exit\n\n");
exit(0);
}
