/* ----------------------------------------------------------------------
 * tm -- program for calculating the melting temperature
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "0.4"
#define PROGNAME "gp_tm"

/* this structure holds a full set of parameters */

typedef struct {
	char name[10] ;
	double seqpar[4][4] ;
	double init[2] ;
	double symm ;
	double term ;
} params ;

typedef struct {
	int symm ;			/* symmetric molecule? */
	int thumb ;
	int name ;
	double ions ;		/* Na+K ions concentration, in M */
	double mols ;		/* concentration of oligonucleotides */
} opt_s ;

float ComputateTm(sekw *s) ;
double CompParam(sekw *s, params DP, opt_s opt) ;
int InitiateParamMatrices(params* DH, params* DS, params* DG) ;

/*
 *
 */

int main(int argc, char *argv[])
{
	extern int optind ;
	extern char *optarg ;
	FILE *in, *out ;
	sekw* inseq ;

	double tm = 0.0;
	int c, onlymean = FALSE  ;
	int errflg = 0 ;
	const double R= 1.987 ; /* gas constant, cal/(K*mol) */
	const double absZero = -273.15 ; /* Celsius to K conversion */
	double dh, dg, ds ;

	opt_s opt ;
	params DS, DH, DG ;

	/* Initialize some parameters */
	progname = argv[0] ;
	allwarnings = NULL ;
	InitiateParamMatrices(&DH,&DS,&DG) ;

	opt.symm = FALSE ;
	opt.thumb = FALSE ;
	opt.ions = 1 ;
	opt.mols = 0.0001 ;

	while ((c = gp_getopt(argc, argv, "NtsM:c:mvHhqd")) != EOF)
		switch(c) {
		case 'N':
			opt.name = TRUE ;
			gp_warn("Will show sequence names") ;
		break ;
		case 't':
			opt.thumb = TRUE ;
			gp_warn("Using the old 4*[GC] + 2*[AT] thumb rule.") ;
		break ;
		case 's':
			opt.symm = TRUE ;
			gp_warn("Symmetry correction - self complementing molecule") ;
		break ;
		case 'M':
			if(sscanf(optarg,"%lf", &opt.mols) != 1) 
				gp_warn("I don't understand \"%s\"",optarg) ;
			else {
				gp_warn("Nucleic acid concentration = %f mM",opt.mols) ;
				opt.mols = opt.mols/1000 ;
			}
		break ;	
		case 'c':
			if(sscanf(optarg,"%lf", &opt.ions) != 1) 
				gp_warn("I don't understand \"%s\"",optarg) ;
			else {
				gp_warn("salt concentration = %f M",opt.ions) ;
			}
		break ;	
		case 'm':
			gp_warn("Will only computate the mean Tm") ;
			onlymean = TRUE ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr,"%s version %s\n",progname,VERSION) ;
			exit(0) ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn("Running in debug mode") ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			errflg++ ;
			break;
		}


	if(errflg) gp_error("Type '%s -h' for help",progname) ;

/* open the file pointer to read the sequences 
 * from: standard input or a file provided? */
	if(optind >= argc) in = stdin ;
	else in = gp_file_open(argv[optind],"r") ;


/* opening the file pointer to write the output: 
 * standard output or file provided? */
	optind++ ;

	if(optind >= argc) out = stdout ;
	else out = gp_file_open(argv[optind],"wb") ;

	while( (inseq = gp_seq_read_fragment(in,0,0,0)) != NULL) {
		
		if(opt.thumb) {
			tm = ComputateTm(inseq) ;
		} else {
			dh = CompParam(inseq, DH, opt) ;
			ds = CompParam(inseq, DS, opt) ;
			if(debug) gp_warn("dg: %f ds: %f\n", dh, ds) ;
			tm = dh * 1000 / (ds + R * log(opt.mols)) + absZero ;
			/* Salt correction */
			tm = tm + 12.5 * log10(opt.ions) ;
			if(tm < 0.0) tm = 0.0 ;
		}

		fprintf(out, "%.1f ", tm) ;
		if(opt.name) fprintf(out, "%s", inseq->name) ;
		fprintf(out, "\n") ;

		free(inseq) ;
	}

	if(html) gp_warn_print_all(out) ;

	fclose(out) ;
	fclose(in) ;
	return(EXIT_SUCCESS);
}



/*
 * Computates one set of parameters
 * 
 */

double CompParam(sekw *s, params DP, opt_s  opt) {
	long int i = 0,j = 0,dlugosc = 0 ;
  long int cnv[128] ;
  double result = 0 ;
	char cx,cy ;
	int onlyAT = TRUE ;

  cnv['A'] = 0 ; cnv['C'] = 1 ; cnv['G'] = 2 ; cnv['T'] = 3;
  dlugosc = s->leng ;

	if(dlugosc < 4) {
		gp_warn("sequece %s to short to be evaluated", s->name) ;
		return -1 ;
	}

	/* adding neigbor thermodynamic parameters */
	for(i = 0 ; i < dlugosc - 1; i++) {
		cx = toupper(s->sequ[i]) ; 
		cy = toupper(s->sequ[i+1]) ;
		
		/* different initiation values for pure AT sequences */
		if(cx == 'G' || cx == 'C') onlyAT= FALSE ;
		result += DP.seqpar[cnv[cx]][cnv[cy]] ;
	}

	if(cy == 'G' || cy == 'C') onlyAT = FALSE ;

	if(onlyAT) j = 1 ; else j = 0 ;

	result += DP.init[j] ;

	/* symmetry correction for self-containing molecules */
	if(opt.symm) result += DP.symm ;

	/* terminal penalty for each 5'-T/A pair. Note that A/T pair are OK! */
	if( toupper(s->sequ[0]) == 'T') result += DP.term ;
	if( toupper(s->sequ[dlugosc-1]) == 'A') result += DP.term ;
	return(result) ;

}

/* This is boring. Just loading the SantaLucia default params */

int InitiateParamMatrices(params* DH, params* DS, params* DG) {

	long cnv[128] ;
  cnv['A'] = 0 ; cnv['C'] = 1 ; cnv['G'] = 2 ; cnv['T'] = 3;

	strcpy(DH->name,"DH") ;
	DH->seqpar[cnv['A']][cnv['A']] = -8.4 ;	DH->seqpar[cnv['T']][cnv['T']] = -8.4 ;
	DH->seqpar[cnv['A']][cnv['T']] = -6.5 ;	DH->seqpar[cnv['A']][cnv['T']] = -6.5 ;
	DH->seqpar[cnv['T']][cnv['A']] = -6.3 ;	DH->seqpar[cnv['T']][cnv['A']] = -6.3 ;
	DH->seqpar[cnv['C']][cnv['A']] = -7.4 ;	DH->seqpar[cnv['T']][cnv['G']] = -7.4 ;
	DH->seqpar[cnv['G']][cnv['T']] = -8.6 ;	DH->seqpar[cnv['A']][cnv['C']] = -8.6 ;
	DH->seqpar[cnv['C']][cnv['T']] = -6.1 ;	DH->seqpar[cnv['A']][cnv['G']] = -6.1 ;
	DH->seqpar[cnv['G']][cnv['A']] = -7.7 ;	DH->seqpar[cnv['T']][cnv['C']] = -7.7 ;
	DH->seqpar[cnv['C']][cnv['G']] = -10.1 ;DH->seqpar[cnv['C']][cnv['G']] = -10.1 ;	
	DH->seqpar[cnv['G']][cnv['C']] = -11.1 ;DH->seqpar[cnv['G']][cnv['C']] = -11.1 ;	
	DH->seqpar[cnv['G']][cnv['G']] = -6.7 ;	DH->seqpar[cnv['C']][cnv['C']] = -6.7 ;
	DH->init[0] = 0 ;
	DH->init[1] = 0 ;
	DH->symm = 0 ;
	DH->term = 0.4 ;

	strcpy(DS->name,"DS") ;
	DS->seqpar[cnv['A']][cnv['A']] = -23.6 ;	DS->seqpar[cnv['T']][cnv['T']] = -23.6 ;	
	DS->seqpar[cnv['A']][cnv['T']] = -18.8 ;	DS->seqpar[cnv['A']][cnv['T']] = -18.8 ;	
	DS->seqpar[cnv['T']][cnv['A']] = -18.5 ;	DS->seqpar[cnv['T']][cnv['A']] = -18.5 ;	
	DS->seqpar[cnv['C']][cnv['A']] = -19.3 ;	DS->seqpar[cnv['T']][cnv['G']] = -19.3 ;	
	DS->seqpar[cnv['G']][cnv['T']] = -23.0 ;	DS->seqpar[cnv['A']][cnv['C']] = -23.0 ;	
	DS->seqpar[cnv['C']][cnv['T']] = -16.1 ;	DS->seqpar[cnv['A']][cnv['G']] = -16.1 ;	
	DS->seqpar[cnv['G']][cnv['A']] = -20.3 ;	DS->seqpar[cnv['T']][cnv['C']] = -20.3 ;	
	DS->seqpar[cnv['C']][cnv['G']] = -25.5 ;	DS->seqpar[cnv['C']][cnv['G']] = -25.5 ;	
	DS->seqpar[cnv['G']][cnv['C']] = -28.4 ;	DS->seqpar[cnv['G']][cnv['C']] = -28.4 ;	
	DS->seqpar[cnv['G']][cnv['G']] = -15.6 ;	DS->seqpar[cnv['C']][cnv['C']] = -15.6 ;	
	DS->init[0] = -5.9 ;
	DS->init[1] = -9 ;
	DS->symm = -1.4 ;
	DS->term = 0 ;
		
	strcpy(DG->name,"DG") ;
	DG->seqpar[cnv['A']][cnv['A']] = -1.02 ;	DG->seqpar[cnv['T']][cnv['T']] = -1.02 ;	
	DG->seqpar[cnv['A']][cnv['T']] = -0.73 ;	DG->seqpar[cnv['A']][cnv['T']] = -0.73 ;	
	DG->seqpar[cnv['T']][cnv['A']] = -0.6 ;	  DG->seqpar[cnv['T']][cnv['A']] = -0.6 ;	  
	DG->seqpar[cnv['C']][cnv['A']] = -1.38 ;	DG->seqpar[cnv['T']][cnv['G']] = -1.38 ;	
	DG->seqpar[cnv['G']][cnv['T']] = -1.43 ;	DG->seqpar[cnv['A']][cnv['C']] = -1.43 ;	
	DG->seqpar[cnv['C']][cnv['T']] = -1.16 ;	DG->seqpar[cnv['A']][cnv['G']] = -1.16 ;	
	DG->seqpar[cnv['G']][cnv['A']] = -1.46 ;	DG->seqpar[cnv['T']][cnv['C']] = -1.46 ;	
	DG->seqpar[cnv['C']][cnv['G']] = -2.09 ;	DG->seqpar[cnv['C']][cnv['G']] = -2.09 ;	
	DG->seqpar[cnv['G']][cnv['C']] = -2.28 ;	DG->seqpar[cnv['G']][cnv['C']] = -2.28 ;	
	DG->seqpar[cnv['G']][cnv['G']] = -1.77 ;	DG->seqpar[cnv['C']][cnv['C']] = -1.77 ;	
	DG->init[0] = 1.82 ;
	DG->init[1] = 2.8 ;
	DG->symm = 0.4 ;
	DG->term = 0.4 ;

	return(EXIT_SUCCESS) ;

}


/* Computates the Tm using the 4*AG+2*AT method. Don't use it. */

float ComputateTm(sekw *s) {
	long int i = 0,dlugosc = 0 ;
  long int cnv[128] ;
  float tm ;

  cnv['A'] = 0 ; cnv['C'] = 0 ; cnv['G'] = 0 ; cnv['T'] = 0;

  dlugosc = strlen(s->sequ) ;

	if(dlugosc>20) 
		gp_warn("Calculations may be inaccurate\n for sequence longer then 20 bases") ;

  for(i = 0;i<dlugosc;i++) {
		if(strchr("ACTGU",toupper(s->sequ[i])) == NULL) {
			gp_warn("Sequence %s is of bad type",s->name) ;
			return(0);
		}
			
  	cnv[toupper(s->sequ[i])]++ ;
  }

	tm = 4.0 * (cnv['C'] + cnv['G']) + 2.0 * (cnv['A'] + cnv['T']) ;
	return(tm) ;
}

/* Standard mesage */

void Help()
{
printf("\n");
printf("%s %s - determining the Tm of a sequence",PROGNAME,VERSION);
printf("\n");
printf("  Usage:\n");
printf("     %s [options] [ input file ] [ output file ]\n",progname);
printf("\n");
printf("  Options:\n");
printf("     -t       : use the 4*GC+2*AT thumb rule\n");
printf("     -s       : use symmetry correction for self\n");
printf("              : complementary molecules\n");
printf("     -M value : set nucleic acid concentration to [value] mM\n");
printf("     -c value : set salt concentration to [value] M\n");
/* printf("     -m       : computate only mean for all sequences\n");*/
printf("     -N       : show sequence names\n");
printf("     -H       : run in HTML mode\n");
printf("     -q       : run in quiet mode\n");
printf("     -d       : turn on debugging\n");
printf("     -v       : print version information & exit\n");
printf("     -h       : print this help screen & exit\n\n");
exit(0);
}


			
