/* ----------------------------------------------------------------------
 * find primer stem and loops and dimeres
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "0.01"
#define PROGNAME "gp_primer"

char *progname ;

/* structure holding options */
typedef struct {
	int maxstruct ;
	int onlyhighest ;
	int minloop ;
	int maxloop ;
	int dimeres ;
	int stems ;
	FILE *in ;
	FILE *out ; } opt_s ;

/* structure holding information about a stem / loop structure */
typedef struct {
	char *start ;
	char *end ;
	int shift ;
	int length ;
	int loop ;
	double energy ; } stem_s ;

/* delta G parameters from SantaLucia et al. */
double dg[4][4] = {
  {-1.02, -1.43, -1.16, -0.73 }, /* AA AC AG AT */
  {-1.38, -1.77, -2.09, -1.16 }, /* CA CC CG CT */
  {-1.46, -2.28, -1.77, -1.43 }, /* GA GC GG GT */
  {-0.6,  -1.46, -1.38, -1.02 } } ; /* TA TC TG TT */

double dginit[2] = { 1.82, 2.8 } ;
double dgsymm = 0.4 ;
double dgterm = 0.4 ;

/* function prototypes */
double stem_energy(sekw *s, stem_s *stem) ;
int stem_compare(const void *s1, const void *s2) ;
double dimer_find(sekw *s, opt_s *o) ;
double stem_find(sekw *s, opt_s *o) ;


/*
 *
 */



int main(int argc, char *argv[])
{
	extern int optind ;
	extern char *optarg ;
	int width = 70 ; /* width with which the sequence gets formatted */
	opt_s options ;
	sekw *inseq, *outseq ;
	double dim, stem ;

	int c, i;
	char message[100] ;

	options.maxstruct = 5 ;
	options.onlyhighest = FALSE ;
	options.minloop = 3 ;
	options.maxloop = 0 ;
	options.dimeres = TRUE ;
	options.stems = TRUE ;
	progname = argv[0] ;
	textdomain("genpak") ;

	while ((c = gp_getopt(argc, argv, "im:Hqdvh")) != EOF)
		switch(c) {
		case 'i':
			options.onlyhighest = TRUE ;
			break ;
		case 'm':
			if(sscanf(optarg, "%i", &options.maxstruct) != 1)
				gp_error("main: invalid argument for option -m") ;
			else
				gp_warn("main: showing %i strongest structures", options.maxstruct) ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr,_("%s version %s\n"),progname,VERSION) ;
			exit(EXIT_SUCCESS) ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn(_("Running in debug mode")) ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			gp_error(_("Type '%s -h' for help"),progname) ;
			break;
		}


	/* open the file pointer to read the sequences 
 	* from: standard input or a file provided? */
	if(optind >= argc) options.in = stdin ;
	else options.in = gp_file_open(argv[optind],"r") ;

	/* opening the file pointer to write the output: 
 	* standard output or file provided? */
	optind++ ;

	if(optind >= argc) options.out = stdout ;
	else options.out = gp_file_open(argv[optind],"wb") ;

	if(options.onlyhighest) {
		fprintf(options.out, "#no.\t") ;
		if(options.dimeres) fprintf(options.out, "dimeres\t") ;
		if(options.stems) fprintf(options.out, "stems\t") ;
		fprintf(options.out, "name\n") ;
	}

	i = 1 ;

	while( (inseq = gp_seq_read(options.in)) ) {

		if(debug) gp_warn("main: Read %s", inseq->name) ;

		if(!options.onlyhighest) 
			fprintf(options.out, "Sequence %i: %s\n", i, inseq->name) ;

		if(options.dimeres) {
			if(!options.onlyhighest) 
				fprintf(options.out, "Dimeres found:\n") ;
			dim = dimer_find(inseq, &options) ;
		}

		if(options.stems) {
			if(!options.onlyhighest) 
				fprintf(options.out, "Stems found:\n") ;
			stem = stem_find(inseq, &options) ;
		}

		if(options.onlyhighest) {
			fprintf(options.out, "%i\t", i) ;
			if(options.dimeres) fprintf(options.out, "%.2f\t", dim) ;
			if(options.stems) fprintf(options.out, "%.2f\t", stem) ;
			fprintf(options.out, "%s\n", inseq->name) ;
		}

		i++ ;
		gp_seq_free(inseq) ;
	}
	
	if(html) gp_warn_print_all(options.out) ;
	fclose(options.out) ;
	gzclose(options.in) ;
	return EXIT_SUCCESS ;
}


/* finding nucleotide dimeres */
double dimer_find(sekw *s, opt_s *o) {
	sekw *r ;
	char c, d, *shift, *p, *q ;
	int i, last, maxstruct ;
	stem_s *dimeres ;
	int cnv[128] ;

	cnv['A'] = 0 ; cnv['C'] = 1 ; cnv['G'] = 2 ; cnv['T'] = 3 ; cnv['U'] = 3 ; 
	cnv['a'] = 0 ; cnv['c'] = 1 ; cnv['g'] = 2 ; cnv['t'] = 3 ; cnv['u'] = 3 ; 

	r = gp_seq_reverse(s) ;

	if(debug) gp_warn("dimer_find: rev seq = %s", r->sequ) ;

	dimeres = malloc( (s->leng - 2) * sizeof(*dimeres) ) ;

	for(shift = s->sequ, i = 0 ; shift < (s->sequ + s->leng - 2) ; shift++, i++) {
		
		dimeres[i].energy = 0 ;
		dimeres[i].shift = i ;
		last = FALSE ;

		for(p = shift, q = r->sequ ; p < (s->sequ + s->leng - 1) ; p++) {

			if(Compare(*p, *q)) {
				if(last)
					dimeres[i].energy += dg[cnv[*(p - 1)]][cnv[*p]] ;
				last = TRUE ;
			} else last = FALSE ;

		}

	}

	qsort(dimeres, (s->leng - 2), sizeof(*dimeres), stem_compare) ;

	if(o->maxstruct) maxstruct = o->maxstruct ;
	else maxstruct = s->leng - 2 ;

	/* printing output */
	if(!o->onlyhighest) {
		for(i = 0 ; i < (s->leng - 2) && i < maxstruct ; i++ ) 
			dimer_print(s, &dimeres[i], o) ;
	}

	free(dimeres) ;
	free(r) ;

	return dimeres[0].energy ;
}


/* print a formatted dimere representation and respective values */
int dimer_print(sekw *s, stem_s *d, opt_s *o) {
	int i ;
	char c, *p ;

	fprintf(o->out, "dG %.2f kcal/mol\n", d->energy) ;
	for(i = 0 ; i < d->shift ; i++) fprintf(o->out, " ") ;

	fprintf(o->out, "5'-%s-3'\n   ", s->sequ) ;

	for(i = 0 ; i < d->shift ; i++) fprintf(o->out, " ") ;
	for(i = d->shift ; i < (s->leng) ; i++) {

		c = gp_nucl_complement(s->sequ[s->leng - i + d->shift - 1]) ;
		if(Compare(c, s->sequ[i])) fprintf(o->out, "|") ;
		else fprintf(o->out, " ") ;

	}

	fprintf(o->out, "\n3'-") ;
	for(i = s->leng - 1 ; i >= 0 ; i--) fprintf(o->out, "%c", s->sequ[i]) ;
	fprintf(o->out, "-5'\n\n") ;
	return EXIT_SUCCESS ;
}

/* finding stem/loop structures in a sequence */
double stem_find(sekw *s, opt_s *o) {

	int i, j, maxstruct, maxloop ;
	char c, *p, *q, *start, *end ;
	stem_s *stems ;

	start = s->sequ + 2 ;
	end = s->sequ + s->leng - 3 ;

	/* reserving memory for the array of stems */
	stems = malloc( (end - start) * sizeof(*stems) ) ;

	if(o->maxloop) maxloop = o->maxloop ;
	else maxloop = s->leng ;

	/* getting all possible stem/loop combinations */
	for(p = start, i = 0 ; p < end ; i++, p++) {

		stems[i].start = p ;
		stems[i].end = end ;

		/* looking for the first complementary oligonucleotide */
		for(q = p + o->minloop + 1 ; q < end && (q - p) < maxloop ; q++) {
			c = gp_nucl_complement(*p) ;

			if(Compare(c, *q)) {
				stems[i].end = q ;
				q = end ;
			}
		}

		stems[i].energy = stem_energy(s, &stems[i]) ;
		stems[i].loop = stems[i].end - stems[i].start - 1 ;

	}

	/* sorting stem structures according to energy */
	qsort(stems, (end - start), sizeof(*stems), stem_compare) ;

	/* printing output */

	if(o->maxstruct) maxstruct = o->maxstruct ;
	else maxstruct = end - start ;
	if(!o->onlyhighest) {
		for(i = 0 ; i < (end - start) && i < maxstruct ; i++ ) 
			stem_print(s, &stems[i], o) ;
	}

	free(stems) ;
	return stems[0].energy ;
}


/* function used for the qsort function */
int stem_compare(const void *s1, const void *s2) {
	stem_s *a, *b ;

	a = (stem_s *) s1 ;
	b = (stem_s *) s2 ;

	return (a->energy - b->energy) ;

}


/* calculates energy of a stem */
double stem_energy(sekw *s, stem_s *stem) {

	double res = 0.0, tmp ;
	char c, d, *p, *q ;
	int cnv[128] ;
	int onlyAT = TRUE ;
	int i, j, last ;

	cnv['A'] = 0 ; cnv['C'] = 1 ; cnv['G'] = 2 ; cnv['T'] = 3 ; cnv['U'] = 3 ; 
	cnv['a'] = 0 ; cnv['c'] = 1 ; cnv['g'] = 2 ; cnv['t'] = 3 ; cnv['u'] = 3 ; 

	/* symmetry correction + initialization parameters */
	res = dgsymm  ;
	/*+ dginit[0] ;*/

	for(j = 0, p = stem->start, q = stem->end, last = FALSE ; 
		p >= s->sequ && q < (s->sequ + s->leng) ;
		p--, q++) {

		c = gp_nucl_complement(*q) ;

		if(Compare(c, *p)) {
			j++ ;

			if(last) {
				d = gp_nucl_complement(*(q - 1)) ;
				res += dg[cnv[d]][cnv[c]] ;
			}

			last = TRUE ;
		} else {
			last = FALSE ;
		}

	}

	stem->length = j ;
	return res ;

}


/* prints a stem structure to specified output 
 * note: there are two basic forms of output, one for loops with an even and one
 * for loops with an uneven number of nucleotides, e.g.:
 * 
 * even:
 * 
 *  TAGCTATGC
 * (  ||  
 *  AGCGTA
 * 
 * uneven:
 * 
 *  TAAGC
 * A |||
 *  TTCGTACGTA
 * 
 * this is why the code is a little clumsy.
 */

int stem_print(sekw *s, stem_s *stem, opt_s *o) {

	int l, ls, i, j, k ;
	char c, *p, *q, sp[101] ;

	/* checking whether parameters OK */
	if( stem->start < s->sequ || 
		stem->end > (s->sequ + s->leng) || 
		stem->start > stem->end) gp_error_fatal() ;

	l = stem->loop ;

	fprintf(o->out, "dG = %.2f kcal/mol, matching %i bp, loop %i bp\n", 
		stem->energy, stem->length, stem->loop) ;

	/* right hand sequence */
	fprintf(o->out, " ") ;
	for(p = (stem->end - l/2) ; p < (s->sequ + s->leng) ; p++) 
		fprintf(o->out, "%c", *p) ;

	fprintf(o->out, "-3'\n") ;

	/* uneven nucleotide or '(' */
	if( (l%2) ) fprintf(o->out, "%c", *(stem->end - l/2 - 1)) ;
	else fprintf(o->out, "(") ;

	for(i = 0 ; i < l/2 ; i++) fprintf(o->out, " ") ;

	/* consensus bars */
	for(p = stem->end, q = stem->start ; 
		p < (s->sequ + s->leng) && q >= s->sequ ; 
		p++, q--) {

		c = gp_nucl_complement(*p) ;
		if(Compare(c, *q)) fprintf(o->out, "|") ;
		else fprintf(o->out, " ") ;

	}
	fprintf(o->out, "\n") ;

	/* left hand sequence */
	fprintf(o->out, " ") ;
	for(p = (stem->start + l/2) ; p >= (s->sequ) ; p--) 
		fprintf(o->out, "%c", *p) ;
	fprintf(o->out, "-5'\n\n") ;


	return EXIT_SUCCESS ;
}



/* Standard mesage */
void Help()
{
	fprintf(stdout,_(""
	"\n"
	"%s, v. %s- calculate stem/loop and dimere structures of a primer"
	"\n"
	"  Usage:\n"
	"     %s [options] [ input file ] [ output file ]\n"
	"\n"
	"  Options:\n"
	"     -i       : print only the parameters for the single strongest structure\n"
	"               (for each sequence, you will get the kcal/mol for both the \n"
	"               strongest dimere and the strongest stem & loop structure)\n"
	"     -m value : set the maximal number of shown structures (0 for all)\n"
	"     -v       : print version information & exit\n"
	"     -h       : print this help screen & exit\n"
	"     -q       : quiet, suppress error messages\n\n"),
	PROGNAME,VERSION,progname);
	exit(EXIT_SUCCESS);
}


			
