/*  
 *  hstruct/rng.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:44 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_randist.h>
#include <gdl/gdl_gentity.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_gmap.h>
#include <gdl/gdl_hview.h>
#include <gdl/gdl_hstruct_point.h>
#include <gdl/gdl_hstruct_config.h>
#include <gdl/gdl_hstruct_parameters.h>
#include <gdl/gdl_hstruct_rng_model.h>

struct _gdl_hstruct_rng_model
{
	size_t K;
	size_t L;
	gdl_gmap * map;
	gdl_locus ** loci;
	gdl_hstruct_parameters * params;
};

gdl_hstruct_rng_model *
gdl_hstruct_rng_model_alloc (const gdl_gmap * map, size_t k)
{
	size_t i, j, ng, nc;
	gdl_hstruct_rng_model * m;
	
	m = GDL_MALLOC (gdl_hstruct_rng_model, 1);
	
	m->K      = k;
	m->map    = gdl_gmap_clone (map);
	m->params = gdl_hstruct_parameters_alloc (k); 
	
	m->loci = gdl_gmap_loci (m->map, &(m->L));	
	
	return m;
}

void
gdl_hstruct_rng_model_free (gdl_hstruct_rng_model * m)
{
	if (m)
	{
		gdl_gmap_free (m->map);
		gdl_hstruct_parameters_free (m->params);	
	}	
}

size_t
gdl_hstruct_rng_model_locus_size (const gdl_hstruct_rng_model * m)
{
	return m->L;
}

const gdl_locus *
gdl_hstruct_rng_model_get_locus (const gdl_hstruct_rng_model * m, size_t l)
{
	return m->loci[l];
}

const gdl_allele *
gdl_hstruct_rng_model_get_allele (const gdl_hstruct_rng_model * m, size_t locus, size_t allele)
{
	return gdl_locus_get_allele (m->loci[locus], allele);
}

gdl_gmap *
gdl_hstruct_rng_model_map (const gdl_hstruct_rng_model * m)
{
	return m->map;
}

size_t
gdl_hstruct_rng_model_add_parameter (gdl_hstruct_rng_model * m, const gdl_hstruct_point_type * T, void * value)
{
	void ** p;
	
	p = gdl_hstruct_parameters_new_point (m->params, T, 0);
	
	(T->copy)(*p, value);
	
	return gdl_hstruct_parameters_get_point_index (m->params, T, *p);
}

int
gdl_hstruct_rng_model_set_locus_point (gdl_hstruct_rng_model * m, const gdl_locus * locus, const gdl_hstruct_point_type * T, size_t idx)
{
	gdl_hstruct_static_config * conf = (gdl_hstruct_static_config *)locus->extra;
	if (!conf)
	{
		conf = gdl_hstruct_static_config_alloc (m->K);
	}
	if (T == gdl_hstruct_point_f)
	{
		size_t kk = gdl_hstruct_fpoint_size (gdl_hstruct_parameters_get_point (m->params, T, idx));
		
		conf->k     = kk;
		conf->f_idx = idx;
	}
	else if (T == gdl_hstruct_point_rho)
	{
		conf->rho_idx = idx;
	}
	else if (T == gdl_hstruct_point_mu)
	{
		conf->mu_idx = idx;
	}
	else if (T == gdl_hstruct_point_hot)
	{
		conf->hot_idx = idx;
	}
}

void
gdl_hstruct_rng_model_set_locus_ancestral (gdl_hstruct_rng_model * m, const gdl_locus * locus, size_t k, size_t a)
{
	gdl_hstruct_static_config * conf = (gdl_hstruct_static_config *)locus->extra;
	
	conf->ancestral[k]->idx = a;
}

static double
gdl_hstruct_rng_model_recomb (const gdl_hstruct_rng_model * m, const gdl_hstruct_static_config * conf, double d)
{
	double rho, hot = 1.0;
	
	if (conf->rho_idx >=0  && d > 0.0)
	{
		rho = gdl_hstruct_point_value (gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_rho, conf->rho_idx));
		
		if (conf->hot_idx >=0)
		{
			hot = gdl_hstruct_point_value (gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_hot, conf->hot_idx));
		}
		
		return exp(-rho*hot*d);
	}
	else if (d == 0.0)
	{
		return 1.0;
	}
	else
	{
		return 0.0;
	}
}

static double
gdl_hstruct_rng_model_transition_proba (const gdl_hstruct_rng_model * m, const gdl_hstruct_static_config * c1, const gdl_hstruct_static_config * c2, size_t k1, size_t k2, double distance)
{
	size_t k;
	double q1, q2, t, w, pr;
	const gdl_hstruct_fpoint * f1, * f2;
	
	f1 = (c1->f_idx >= 0) ? gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_f, c1->f_idx) : 0;
	f2 = (c2->f_idx >= 0) ? gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_f, c2->f_idx) : 0;
	
	q2 = (c2->k > 1) ? gdl_hstruct_fpoint_value (f2, k2) : 1.0;
	
	if (k1 < c2->k)
	{
		t  = gdl_hstruct_rng_model_recomb (m, c1, distance);
		
		pr = (1.0-t)*q2;
		
		if (k1 == k2)
		{
			q1 = (c1->k > 1) ? gdl_hstruct_fpoint_value (f1, k1) : 1.0;
			for (w = 0, k = c1->k; k < c2->k; k++)
			{
				w += gdl_hstruct_fpoint_value (f2, k);
			}
			pr += (1.0-w*q2)*t;
		}
		else if (k2 >= c1->k)
		{
			pr += q1*q2*t;
		}
	}
	else
	{
		pr = q2;
	}
	
	return pr;
}

static void
gdl_hstruct_rng_model_mutation (const gdl_hstruct_rng_model * m, const gdl_locus * locus, gdl_vector_uint * haplotype, size_t h, const gdl_rng * rng)
{
	const gdl_hstruct_static_config * conf;
	const gdl_hstruct_point * mu;
	
	conf = (gdl_hstruct_static_config *)locus->extra; 
	mu   = (conf->mu_idx >= 0) ? gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_mu, conf->mu_idx) : 0;
	
	if (mu)
	{
		double p = gdl_hstruct_point_value (mu);
		
		if (gdl_ran_bernoulli (rng, p))
		{
			size_t a = 0, na = gdl_locus_allele (locus);
			while (!gdl_ran_bernoulli (rng, 1.0/na))
			{
				a++;
			}
			a = (a >= na) ? a % na : a;
			//printf ("MUTATION %d ==> %d\n", gdl_vector_uint_get (haplotype, h), a);
			gdl_vector_uint_set (haplotype, h, a);
		}
	}	
}

gdl_vector_uint *
gdl_hstruct_rng_model_generate (const gdl_hstruct_rng_model * m, const gdl_rng * rng)
{
	size_t i, j, k, l, h, ng, nc, nl;
	double * P;
	gdl_vector_uint * haplotype;
	const gdl_locus * locus;
	gdl_gdistance * dist;
	gdl_hstruct_static_config * next, * prev;
	gdl_ran_discrete_t * ran;
	
	P = GDL_MALLOC (double, m->K);
	
	haplotype = gdl_vector_uint_calloc (m->L);
	
	ng = gdl_gmap_size (m->map);
	
	for (h = i = 0; i < ng; i++)
	{
		nc = gdl_gmap_genome_size (m->map, i);
		
		for (j = 0; j < nc; j++)
		{
			nl = gdl_gmap_chromosome_size (m->map, i, j);	
			
			locus = gdl_gmap_get_locus (m->map, i, j, 0);
			prev  = (gdl_hstruct_static_config *)locus->extra;
			
			if (prev->k > 1)
			{
				for (k = 0; k < prev->k; k++)
				{
					P[k] = gdl_hstruct_fpoint_value (gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_f, prev->f_idx), k);
				}
				ran = gdl_ran_discrete_preproc (prev->k, P);
				gdl_vector_uint_set (haplotype, h, gdl_ran_discrete (rng, ran));
				gdl_ran_discrete_free (ran);
			}		
			
			for (l = 1;l < nl; l++)
			{
				h++;
				
				locus = gdl_gmap_get_locus (m->map, i, j, l);
				next  = (gdl_hstruct_static_config *)locus->extra;
				
				if (next->k > 1)
				{
					dist = gdl_gmap_get_distance (m->map, i, j, l-1, l);
					for (k = 0; k < next->k; k++)
					{
						P[k] = gdl_hstruct_rng_model_transition_proba (m, prev, next, gdl_vector_uint_get (haplotype, h-1), k, dist->value);
						//printf ("TRANSITION[%d][%d](%g) = %g\n", gdl_vector_uint_get (haplotype, h-1), k, dist->value, P[k]);
					}
					ran = gdl_ran_discrete_preproc (next->k, P);
					gdl_vector_uint_set (haplotype, h, gdl_ran_discrete (rng, ran));
					gdl_ran_discrete_free (ran);
				}
				
				// mutation on prev... !!!
				gdl_hstruct_rng_model_mutation (m, gdl_gmap_get_locus (m->map, i, j, l-1), haplotype, h-1, rng);
				
				prev = next;
			}
			// mutation on prev... !!!
			gdl_hstruct_rng_model_mutation (m, gdl_gmap_get_locus (m->map, i, j, l-1), haplotype, h-1, rng);
			
		}	
	}
	
	GDL_FREE (P);
	
	return haplotype;
}

static gdl_locus *
gdl_hstruct_rng_model_locus_type_sscanf (const gdl_string * str)
{
	size_t i, j, n;
	gdl_string * buf;
	gdl_locus  * loc;
	gdl_allele * all;
	
	n = strlen (str);
	
	for (i = 0; isspace (str[i]) && i < n; i++);
	for (j = i; !isspace (str[j]) && j < n; j++);
	
	buf = gdl_string_alloc (j-i);
	strncpy (buf, &str[i], j-i);
	loc = gdl_locus_new (buf);
	gdl_string_free (buf);
	
	for(;;)
	{
		for (i = j; isspace (str[i]) && i < n; i++);
		if (i == n) break;
		for (j = i; !isspace (str[j]) && j < n; j++);
		buf = gdl_string_alloc (j-i);
		strncpy (buf, &str[i], j-i);
		all = gdl_allele_new (buf);
		gdl_locus_add_allele (loc, &all, 1);
		gdl_string_free (buf);
	}
	
	return loc;
}

static int
gdl_hstruct_rng_model_locus_map_sscanf (const gdl_string * str, gdl_hstruct_rng_model * m, gdl_hashtable * type, gdl_genome ** gptr, gdl_chromosome ** cptr)
{
	size_t i, j = 0, k, n, fidx;
	gdl_boolean push = gdl_false;
	gdl_string * buf, * tmp;
	gdl_locus     * locus;
	gdl_gdistance * dist;
	gdl_hstruct_static_config * conf;
	gdl_hstruct_fpoint * f;
	
	n = strlen (str);

#define TOKEN {for (i = j; isspace (str[i]) && i < n; i++); \
	            for (j = i; !isspace (str[j]) && j < n; j++); \
             	buf = gdl_string_alloc (j-i); \
            	strncpy (buf, &str[i], j-i);}
	
   TOKEN
	if ((*gptr && strcmp (gdl_entity_get_name (*gptr), buf)) || !(*gptr))
	{
		*gptr = gdl_genome_new (buf);
		if (!m->map)
		{
			m->map = gdl_gmap_alloc ();
		}
		gdl_gmap_add (m->map, *gptr);
	}
	gdl_string_free (buf);
	
   TOKEN
	if ((*cptr && strcmp (gdl_entity_get_name (*cptr), buf)) || !(*cptr))
	{
		*cptr = gdl_chromosome_new (buf);
		gdl_genome_add (*gptr, *cptr);
		push = gdl_true;
	}
	gdl_string_free (buf);
	
	TOKEN
	tmp = gdl_string_clone (buf);
	gdl_string_free (buf);
	
	TOKEN
	locus = (gdl_locus *) gdl_hashtable_lookup (type, buf);
	if (!locus)
	{
		GDL_ERROR_VAL (gdl_string_sprintf ("Unknwon locus type [ %s ]", buf), GDL_EINVAL, GDL_FAILURE);	
	}
	
	locus = gdl_entity_clone (locus);
	gdl_entity_set_name (locus, tmp);
	gdl_string_free (tmp);
	gdl_string_free (buf);
	
	TOKEN
	tmp = gdl_string_clone (buf);
	gdl_string_free (buf);
	
	TOKEN
	dist = gdl_gdistance_alloc (gdl_gdistance_type_parse (buf));
	dist->value = (double)atof(tmp);
	gdl_string_free (tmp);
	gdl_string_free (buf);
	
	if (push)
	{
		gdl_chromosome_push (*cptr, locus, 1);
	}
	else
	{
		gdl_chromosome_add (*cptr, locus, dist, 1);
	}
	
	TOKEN
	fidx = atoi (buf) - 1;
	if (fidx >= 0)
	{
		f    = gdl_hstruct_parameters_get_point (m->params, gdl_hstruct_point_f, fidx);
		conf = gdl_hstruct_static_config_alloc (gdl_hstruct_fpoint_size (f));
		conf->f_idx = fidx;
	}
	else
	{
		conf = gdl_hstruct_static_config_alloc (1);
	}
	gdl_string_free (buf);
	
	TOKEN
	conf->rho_idx = atoi (buf) - 1;
	gdl_string_free (buf);
	
	TOKEN
	conf->mu_idx = atoi (buf) - 1;
	gdl_string_free (buf);
	
	TOKEN
	conf->hot_idx = atoi (buf) - 1;
	gdl_string_free (buf);
	
	for (k = 0; k < conf->k; k++)
	{
		TOKEN
		conf->ancestral[k] = gdl_entity_clone (gdl_locus_search_allele (locus, buf));
		gdl_string_free (buf);
	}
	
	locus->extra = conf;
	
	return GDL_SUCCESS;
}
 
gdl_hstruct_rng_model *
gdl_hstruct_rng_model_fscanf (FILE * stream)
{
	if (!stream)
	{
		return NULL;	
	}
	else
	{
		size_t n, k;
		gdl_genome     * gen;
		gdl_chromosome * chr;
		gdl_locus      * loc;
		gdl_string * line = NULL;
		gdl_hashtable * type = gdl_hashtable_alloc (gdl_entity_interface, 0);
		gdl_hstruct_rng_model * m = NULL;

#define CLEAN_LINE {gdl_string_free (line);line=NULL;}
#define CHECK {if (!m){CLEAN_LINE;gdl_hstruct_rng_model_free (m);GDL_ERROR_VAL(gdl_string_sprintf ("Try to add parameter before -K flag", line),GDL_EINVAL,0);}}
		
		gen = chr = 0;
		
		while (gdl_getline (&line, &n, stream)!=-1)
		{
			if (line[0]=='-')
			{	
				switch (line[1])
				{
					case 'F':
						CHECK
						gdl_hstruct_rng_model_add_parameter (m, gdl_hstruct_point_f, gdl_hstruct_fpoint_sscanf (&line[2]));
						break;
					case 'H':
						CHECK
						gdl_hstruct_rng_model_add_parameter (m, gdl_hstruct_point_hot, gdl_hstruct_point_sscanf (&line[2]));
						break;
					case 'K':
						m    = GDL_CALLOC (gdl_hstruct_rng_model, 1);
						m->K = atoi(&line[2]);
						m->params = gdl_hstruct_parameters_alloc (m->K);
						break;
					case 'M':
						CHECK
						gdl_hstruct_rng_model_add_parameter (m, gdl_hstruct_point_mu, gdl_hstruct_point_sscanf (&line[2]));
						break;
					case 'R':
						CHECK
						gdl_hstruct_rng_model_add_parameter (m, gdl_hstruct_point_rho, gdl_hstruct_point_sscanf (&line[2]));
						break;
					case 'L':
						loc = gdl_hstruct_rng_model_locus_type_sscanf (&line[2]);
						gdl_hashtable_add (type, loc->name, loc, 1);
						break;				
					default:
						GDL_ERROR_VAL(gdl_string_sprintf ("Line [ %s ] is not valid", line),GDL_EINVAL,0);
						break;
				}				
			}
			else if (line[0] != '#') 
			{
				gdl_hstruct_rng_model_locus_map_sscanf (line, m, type, &gen, &chr);
			}
			
			CLEAN_LINE
		}

#undef CLEAN_LINE
#undef CHECK

		gdl_hashtable_free (type);
		
		m->loci = gdl_gmap_loci (m->map, &(m->L));
	
		return m;
		
	}
}

int
gdl_hstruct_rng_model_fprintf (FILE * stream, const gdl_hstruct_rng_model * m)
{
	if (!stream || !m)
	{
		return GDL_EINVAL;	
	}
	else
	{
		size_t i, j, k, l, ng, nc, nl;
		gdl_hstruct_point_array * P;
		
		fprintf (stream, "# Parameters\n");
		fprintf (stream, "-K %d\n", m->K);
		P = gdl_hstruct_parameters_get_point_array (m->params, gdl_hstruct_point_rho);
		if (P)
		{
			for (i = 0; i < P->size; i++)
			{
				fprintf (stream, "-R %g\n", gdl_hstruct_point_value (P->points[i]));	
			}
		}
		P = gdl_hstruct_parameters_get_point_array (m->params, gdl_hstruct_point_mu);
		if (P)
		{
			for (i = 0; i < P->size; i++)
			{
				fprintf (stream, "-M %g\n", gdl_hstruct_point_value (P->points[i]));	
			}
		}
		P = gdl_hstruct_parameters_get_point_array (m->params, gdl_hstruct_point_hot);
		if (P)
		{
			for (i = 0; i < P->size; i++)
			{
				fprintf (stream, "-H %g\n", gdl_hstruct_point_value (P->points[i]));	
			}
		}
		P = gdl_hstruct_parameters_get_point_array (m->params, gdl_hstruct_point_f);
		if (P)
		{
			for (i = 0; i < P->size; i++)
			{
				fprintf (stream, "-F %d", gdl_hstruct_fpoint_size (P->points[i]));	
				for (j = 0; j < gdl_hstruct_fpoint_size (P->points[i]); j++)
				{
					fprintf (stream, " %g", gdl_hstruct_fpoint_value (P->points[i], j));	
				}
				fprintf (stream, "\n");
			}
		}
		fprintf (stream, "# Locus Type\n");
		
		fprintf (stream, "# Locus Map\n");
		ng = gdl_gmap_size (m->map);
		for (i = 0; i < ng; i++)
		{
			gdl_genome * gen = gdl_gmap_get (m->map, i);
			nc = gdl_genome_size (gen);
			for (j = 0; j < nc; j++)
			{
				gdl_chromosome * chr = gdl_genome_get (gen, j);
			   nl = gdl_chromosome_size (chr);
			   for (l = 0; l < nl; l++)
			   {
			   	gdl_locus * loc = gdl_chromosome_get (chr, l);
			   	gdl_hstruct_static_config * conf = (gdl_hstruct_static_config *) loc->extra;
			   	gdl_gdistance * dist = 0;
			   	fprintf (stream, "%s\t%s\t%s\t%s\t", gen->name, chr->name, loc->name, loc->name);
			   	if (l)
			   	{
			   		dist = gdl_chromosome_get_distance (chr, l-1, l);
			   	}
			   	gdl_gdistance_fprintf (stream, dist);
			   	gdl_gdistance_free (dist);
			   	fprintf (stream, "\t%d\t%d\t%d\t%d", conf->f_idx+1, conf->rho_idx+1, conf->mu_idx+1, conf->hot_idx+1);
			   	for (k = 0; k < conf->k; k++)
			   	{
			   		fprintf (stream, "\t%s", conf->ancestral[k]->name);
			   	}
			   	fprintf (stream, "\n");
			   }
			}	
		}
		
		return GDL_SUCCESS;
	}
}
