/*  
 *  hstruct/hmm.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:44 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <float.h>
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_gblock.h>
#include <gdl/gdl_gpoint.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_clustering.h>
#include <gdl/gdl_gentity.h>
#include <gdl/gdl_mask.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_gview_mask.h>
#include <gdl/gdl_gview_collector.h>
#include <gdl/gdl_hview.h>
#include <gdl/gdl_hstruct.h>
#include <gdl/gdl_hstruct_model.h>
#include <gdl/gdl_hstruct_hmm.h>

void
gdl_hstruct_hmm_free (gdl_hstruct_hmm * hmm)
{
	if (hmm)
	{
		GDL_MATRIX_FREE (hmm->beta, hmm->k);
		GDL_MATRIX_FREE (hmm->alpha, hmm->k);
		GDL_MATRIX_FREE (hmm->phi, hmm->k);
		gdl_gvalues_get_free (hmm->gbuf);
		GDL_FREE (hmm->na);
		GDL_FREE (hmm->lidx);
		GDL_FREE (hmm->delta);
		GDL_FREE (hmm->mdelta);
		GDL_FREE (hmm->rho);
		GDL_FREE (hmm);
	}
}

gdl_hstruct_hmm *
gdl_hstruct_hmm_alloc (gdl_gview_wrapper * data,
                      const gdl_allele_block * seq,
                      const gdl_chromosome    * chrom,
                      const void * haplotypes,
                      gdl_hstruct_get_ancestral get)
{
	size_t i, ii, jj, l;
	gdl_locus * locus, * nlocus;
	const gdl_gdistance * d;
	gdl_hstruct_hmm * hmm;
	
	hmm = GDL_CALLOC (gdl_hstruct_hmm, 1);
	
	// Ancestral haplotype interface
	hmm->hdb  = haplotypes;
   hmm->hget = get;
   // Raw data
   hmm->data  = data;
   hmm->chrom = chrom;
	hmm->p     = gdl_gview_wrapper_ploidy (data);
	hmm->n     = gdl_gview_wrapper_accession_size (hmm->data);
	hmm->l     = gdl_gview_wrapper_locus_size (hmm->data);
	hmm->na    = GDL_MALLOC (size_t, hmm->l);
	hmm->lidx  = GDL_MALLOC (size_t, hmm->l);
	hmm->dist  = GDL_MALLOC (double, hmm->l-1);
	hmm->norm  = GDL_MALLOC (double, hmm->l);
	hmm->rho   = GDL_MALLOC (double, hmm->l-1);
	
	hmm->seq    = seq;
	hmm->nc     = gdl_gview_wrapper_accession_size_c (hmm->data);
   hmm->gbuf   = gdl_gview_wrapper_get_new (hmm->data);
	
	hmm->weights = GDL_MALLOC (double, hmm->nc);
	for (i = 0; i < hmm->nc; i++)
	{
		hmm->weights[i] = gdl_gview_wrapper_accession_mult_c (hmm->data, i);
		hmm->weights[i] /= (hmm->n*hmm->p);
	}
	
	locus = gdl_gview_wrapper_get_locus (hmm->data, 0);
	l     = gdl_chromosome_search (hmm->chrom, locus);
	locus = gdl_chromosome_get (hmm->chrom, l);
	
	for (hmm->k = 0, i = 0; i < hmm->l; i++)
	{
		gdl_hstruct_config * conf = gdl_hstruct_config_get (locus);
		
		hmm->lidx[i]  = l;
		hmm->na[i]    = gdl_locus_allele (locus);
		
		if (hmm->k < conf->k)
		{
			hmm->k = conf->k;
		}
		
		if (i < hmm->l-1)
		{
			nlocus = gdl_gview_wrapper_get_locus (hmm->data, i+1);
			l      = gdl_chromosome_search (hmm->chrom, nlocus);
			nlocus = gdl_chromosome_get (hmm->chrom, l);
			
			d  = gdl_chromosome_search_distance (hmm->chrom, locus, nlocus);
			
			if (gdl_gdistance_is_defined (d))
			{
				hmm->dist[i] = d->value;
			}
			else
			{
				hmm->dist[i] = -1.0;
			}
		}
		
		locus = nlocus;
	}
	
	hmm->alpha = GDL_MATRIX_ALLOC (double, hmm->k, hmm->l);
	hmm->beta  = GDL_MATRIX_ALLOC (double, hmm->k, hmm->l);
	
	return hmm;
}
/*
 * Returns the parameter configutation at a given locus  
 */
gdl_hstruct_config *
gdl_hstruct_hmm_get_locus_config (const gdl_hstruct_hmm * hmm, size_t l)
{
	return gdl_hstruct_config_get (gdl_chromosome_get (hmm->chrom, hmm->lidx[l]));
}
/*
 * Returns the transition probability between states k1 and k2 given the distance and the 
 * parameter configurations at each site 
 */
double
gdl_hstruct_hmm_get_transition_proba (gdl_hstruct_hmm * hmm, const gdl_hstruct_config * c1, const gdl_hstruct_config * c2, size_t k1, size_t k2, double distance)
{
	size_t k;
	double q1, q2, t, w, pr;
	
	q2 = (c2->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (c2, gdl_hstruct_point_f), k2) : 1.0;
	
	if (k1 < c2->k)
	{
		t  = gdl_hstruct_hmm_not_recomb_proba (hmm, c1, distance);
		pr = (1.0-t)*q2;
		if (k1 == k2)
		{
			q1 = (c1->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (c1, gdl_hstruct_point_f), k1) : 1.0;
			for (w = 0, k = c1->k; k < c2->k; k++)
			{
				w += gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (c2, gdl_hstruct_point_f), k);
			}
			pr += (1.0-w*q2)*t;
		}
		else if (k2 >= c1->k)
		{
			pr += q1*q2*t;
		}
	}
	else
	{
		pr = q2;
	}
	
	return pr;
}
/*
 * Returns the probability to be in state k at positions pos in the interval between k1 and k2,
 * given the parameter configurations at each flanking site. 
 */
double
gdl_hstruct_hmm_get_interval_transition_proba (gdl_hstruct_hmm * hmm, const gdl_hstruct_config * conf1, const gdl_hstruct_config * conf2, size_t k, size_t k1, size_t k2, size_t l, double pos)
{
	size_t n=0;
	double u1, u2, pr;
	
	if (conf1->k == conf2->k)
	{
		u1 = gdl_hstruct_hmm_get_transition_proba (hmm, conf1, conf1, k1, k, pos);
		u2 = gdl_hstruct_hmm_get_transition_proba (hmm, conf2, conf2, k, k2, hmm->dist[l] - pos);
		pr = u1*u2;
	}
	else if (conf1->k != conf2->k)
	{
		pr = 0.0;
		if (k < conf1->k)
		{
			u1 = gdl_hstruct_hmm_get_transition_proba (hmm, conf1, conf1, k1, k, pos);
			u2 = gdl_hstruct_hmm_get_transition_proba (hmm, conf1, conf2, k, k2, hmm->dist[l] - pos);
			pr = u1*u2;
			n++;
		}
		if (k < conf2->k)
		{
			u1 = gdl_hstruct_hmm_get_transition_proba (hmm, conf1, conf2, k1, k, pos);
			u2 = gdl_hstruct_hmm_get_transition_proba (hmm, conf2, conf2, k, k2, hmm->dist[l] - pos);
			pr += u1*u2;
			n++;
		}
		pr/=n;
	}
	
	return pr;
}
/*
 * Returns the probability to not have recombinated between two sites sperated by the 
 * given distance.
 */
double
gdl_hstruct_hmm_not_recomb_proba (const gdl_hstruct_hmm * hmm, const gdl_hstruct_config * conf, double d)
{
	double rho, hot = 1.0;
	
	if (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_rho) && d > 0.0)
	{
		rho = gdl_hstruct_point_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_rho));
		
		if (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_hot))
		{
			hot = gdl_hstruct_point_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_hot));
		}
		
		return exp(-rho*hot*d);
	}
	else if (d == 0.0)
	{
		return 1.0;
	}
	else
	{
		return 0.0;
	}
}

static int
gdl_hstruct_hmm_fb_update_single (gdl_hstruct_hmm * hmm, size_t c, size_t p)
{
	size_t j, k, kk;
	double z, x, tot;
	gdl_hstruct_point  * mu;
	gdl_hstruct_fpoint * f;
	gdl_hstruct_config * conf, * pconf = 0;
	
	gdl_hstruct_hmm_forward (hmm, c, p);
	gdl_hstruct_hmm_backward (hmm, c, p);
	
	for (j = 0; j < hmm->l; j++)
	{
		conf = gdl_hstruct_hmm_get_locus_config (hmm, j);
		f    = (gdl_hstruct_fpoint *)gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f); 
		//mu   = (gdl_hstruct_point *)gdl_hstruct_config_get_point (conf, gdl_hstruct_point_mu);
		if (conf->k > 1)
		{
			for (tot = k = 0; k < conf->k; k++)
			{
				tot += hmm->alpha[k][j]*hmm->beta[k][j];
			}
			for (k = 0; k < conf->k; k++)
			{
				z = hmm->alpha[k][j]*hmm->beta[k][j]/tot;
				gdl_hstruct_fpoint_collect (f, k, hmm->weights[c]*z);
				//gdl_hstruct_point_collect (mu, z*hmm->weights[c]*(1-gdl_hstruct_hmm_get_site_ancestral (hmm, k, c, p, j)));
//				if (pconf)
//				{
//					for (kk = 0; kk < pconf->k; kk++)
//					{
//						if (kk == k) continue;
//						x  = gdl_hstruct_hmm_get_transition_proba (hmm, pconf, conf, kk, k, hmm->dist[j-1]);
//						x *= gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, j);
//						x *= hmm->alpha[kk][j-1]*hmm->beta[k][j];
//						hmm->rho[j-1] += hmm->weights[c]*x;
//					}
//				}
			}
			//pconf=conf;
		}
	}	
	return GDL_SUCCESS;
}
/*
 * Do a forward-bacward on all observed sequences and potentially updates the parameters
 * that need to be updated
 */
int
gdl_hstruct_hmm_fb_update (gdl_hstruct_hmm * hmm, gdl_boolean * update)
{
	*update=gdl_false;
	if (hmm->k > 1)
	{
		size_t i, j;
		double r=0;
		gdl_hstruct_point  * rho;
   	gdl_hstruct_point  * hot;
   	gdl_hstruct_config * conf;
		
		for (i = 0; i < hmm->nc; i++)
		{
			for (j = 0; j < hmm->p; j++)
			{
				gdl_hstruct_hmm_fb_update_single (hmm, i, j);
			}
		}
//		for (i = 0; i < hmm->l-1; i++)
//		{
//			conf = gdl_hstruct_hmm_get_locus_config (hmm, i);
//			hot  = (gdl_hstruct_point *)gdl_hstruct_config_get_point (conf, gdl_hstruct_point_hot);
//			rho  = (gdl_hstruct_point *)gdl_hstruct_config_get_point (conf, gdl_hstruct_point_rho);
//			if (hmm->rho[i] < 1.0) r += -log(1-hmm->rho[i])/(hmm->dist[i]*(hmm->l-1));
//			printf (">>%d %g\n", i, hmm->rho[i]);
//		}
//		printf ("RHO %g\n", r);
		
		*update=gdl_true;
	}
	return GDL_SUCCESS;
}
/*
 * Impute missing data
 */
static void
gdl_hstruct_hmm_missing_site_update (const gdl_hstruct_hmm * hmm,
                                      size_t im,
                                      size_t p,
                                      size_t l,
                                      size_t j,
                                      double * abs_res,
                                      double * sq_res,
                                      size_t * nres)
{
	size_t k, m, anc, ic;
	double tmp, tot, u, v, mu, * nt;
	gdl_gvalues * t;
	gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, l);
	
	ic = gdl_gview_wrapper_missing_accession_idx_c (hmm->data, im);
	
	t = gdl_gview_wrapper_missing_hget (hmm->data, im, p, j);
	
	if (t->size>1)
	{
		nt = GDL_CALLOC (double, t->size);
		mu = gdl_hstruct_point_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_mu));
		for (tot = k = 0; k < conf->k; k++)
		{
			anc = hmm->hget(hmm->hdb, k, l);
			u   = gdl_hstruct_hmm_get_alpha(hmm, ic, p, l, k)*gdl_hstruct_hmm_get_beta(hmm, ic, p, l, k);
			for (m = 0; m < t->size; m++)
			{
				if (t->values[m]->idx==(size_t)anc)
				{
					v = u*(1.0-mu);
				}
				else
				{
					v = u*mu/((double)hmm->na[l]-1);
				}
				nt[m]  += v;
				tot    += v;
			}
		}
		for (k = 0; k < t->size; k++)
		{
			u   = nt[k]/tot;
			tmp = (t->values[k]->value-u);
			(*abs_res) += fabs(tmp);
			(*sq_res)  += tmp*tmp;
			(*nres)++;
			t->values[k]->value = u;
		}
		GDL_FREE (nt);
	}
}

int 
gdl_hstruct_hmm_missing_genotype_update (gdl_hstruct_hmm * hmm, size_t im, size_t p, double * abs_res, double * sq_res)
{
	size_t ic, j, k, l, m, nml, nres=0;
	const size_t * lidx;	
	
	ic   = gdl_gview_wrapper_missing_accession_idx_c (hmm->data, im);
	nml  = gdl_gview_wrapper_missing_hlocus_size (hmm->data, im, p);
	lidx = gdl_gview_wrapper_missing_hlocus_idx (hmm->data, im, p);
	
	if (!hmm->abuf)
	{
	   gdl_hstruct_hmm_forward (hmm, ic, p);
	   gdl_hstruct_hmm_backward (hmm, ic, p);
	}
	
	*abs_res = *sq_res = 0;
	
	for (j = 0; j < nml; j++)
	{
		gdl_hstruct_hmm_missing_site_update (hmm, im, p, lidx[j], j, abs_res, sq_res, &nres);
	}
	
	*abs_res /= nres;
	*sq_res  /= nres;
	
	return GDL_SUCCESS;
} 
 
int
gdl_hstruct_hmm_missing_update (gdl_hstruct_hmm * hmm, double * abs_res, double * sq_res)
{
	double abs=0, sq=0;
	
	if (gdl_gview_wrapper_missing_size (hmm->data))
	{
		size_t i, j, k, l, nm, hna;
		
		nm = gdl_gview_wrapper_missing_accession_size (hmm->data);
		
		for (k = i = 0; i < nm; i++)
		{
			for (j = 0; j < hmm->p; j++)
			{
				hna = gdl_gview_wrapper_missing_hlocus_size (hmm->data, i, j);
				gdl_hstruct_hmm_missing_genotype_update (hmm, i, j, &abs, &sq);				
			}
		}
	}
	*abs_res = abs;
	*sq_res  = sq;
}
/*
 * Compute the loglikelihood of the data
 */
double
gdl_hstruct_hmm_get_loglikelihood (gdl_hstruct_hmm * hmm)
{
	size_t i, j, ni, u;
	double v, lo = 0.;
	
	for (i = 0; i < hmm->nc; i++)
	{
		for (j = 0; j < hmm->p; j++)
		{
			gdl_hstruct_hmm_forward (hmm, i, j);
			lo += hmm->log * hmm->weights[i];
			//printf ("LOG[%d] %g\n", i, hmm->log);
		}
	}
	
	return lo*(hmm->n*hmm->p);
}
/*
 * For a given genotype and a given phase returns the probability at each locus
 * to belong to each ancestral haplotype.
 * The returned matrix is [l*k] with l = number of loci and k = max number of
 * ancestral clusters.
 */
gdl_matrix *
gdl_hstruct_hmm_get_ancestral_probs (gdl_hstruct_hmm * hmm, size_t a, size_t p)
{
	if (hmm->k > 1)
	{
		size_t j, k, c;
		double tot;
		gdl_matrix * pr;
		
		// get the cluster index of the genotype
		c = gdl_gview_wrapper_accession_cidx_c (hmm->data, a);
	
		if (!hmm->abuf)
		{
			gdl_hstruct_hmm_forward (hmm, c, p);
			gdl_hstruct_hmm_backward (hmm, c, p);
		}
		
		pr = gdl_matrix_alloc (hmm->l, hmm->k);
		
		for (j = 0; j < hmm->l; j++)
		{
			gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, j);
			
			for (tot = k = 0; k < conf->k; k++)
			{
				tot += gdl_hstruct_hmm_get_alpha (hmm, c, p, j, k)*gdl_hstruct_hmm_get_beta (hmm, c, p, j, k);
			}
			for (k = 0; k < conf->k; k++)
			{
				gdl_matrix_set (pr, j, k, gdl_hstruct_hmm_get_alpha (hmm, c, p, j, k)*gdl_hstruct_hmm_get_beta (hmm, c, p, j, k)/tot);
			}
		}
			
		return pr;
	}
	else
	{
		gdl_matrix * m = gdl_matrix_alloc (hmm->l, 1);
		gdl_matrix_set_all (m, 1.0);
		return m;
	}
}
/*
 * For a given genotype and a given phase returns the best ancestral path along
 * the sequence.
 */
gdl_vector_uint *
gdl_hstruct_hmm_get_ancestral_path (gdl_hstruct_hmm * hmm, size_t a, size_t p)
{
	if (hmm->k > 1)
	{
		size_t c;
		gdl_vector_uint * v;
		
		c = gdl_gview_wrapper_accession_cidx_c (hmm->data, a);
		
		v = gdl_vector_uint_alloc (hmm->l);
		
		if (hmm->phi == 0)
		{
			hmm->phi = GDL_MATRIX_ALLOC (size_t, hmm->k, hmm->l);
		}
		if (hmm->delta == 0)
		{
			hmm->delta = GDL_MALLOC (double, hmm->k);
		}
		if (hmm->mdelta == 0)
		{
			hmm->mdelta = GDL_MALLOC (double, hmm->k);
		}
		
		gdl_hstruct_hmm_viterbi (hmm, c, p, hmm->delta, hmm->mdelta, hmm->phi, v);
		
		return v;
	}
	else
	{
		return gdl_vector_uint_calloc (hmm->l);
	}
}
/*
 * Returns the transition matrix between sites l and l+1
 */
gdl_matrix *
gdl_hstruct_hmm_get_transition_matrix (gdl_hstruct_hmm * hmm, size_t l)
{
	if (hmm->k > 1)
	{
		size_t i, j, k, k1, k2, ni;
		double tot, u, x, * xx;
		gdl_matrix * tr;
		gdl_hstruct_config * pconf, * nconf;
	
		pconf = gdl_hstruct_hmm_get_locus_config (hmm, l);
		nconf = gdl_hstruct_hmm_get_locus_config (hmm, l+1);
		
		tr = gdl_matrix_calloc (pconf->k, nconf->k);
		
		for (i = 0; i < hmm->nc; i++)
		{
			for (j = 0; j < hmm->p; j++)
			{
				if (!hmm->abuf)
				{
					gdl_hstruct_hmm_forward (hmm, i, j);
					gdl_hstruct_hmm_backward (hmm, i, j);
				}
				for (k2 = 0; k2 < nconf->k; k2++)
				{
					for (k1 = 0; k1 < pconf->k; k1++)
					{
						x  = gdl_hstruct_hmm_get_transition_proba (hmm, pconf, nconf, k1, k2, hmm->dist[l]);
						x *= gdl_hstruct_hmm_get_site_proba (hmm, k2, i, j, l+1);
						x *= gdl_hstruct_hmm_get_alpha(hmm, i, j, l, k1)*gdl_hstruct_hmm_get_beta(hmm, i, j, l+1, k2);
						x *= hmm->weights[i];
						x += gdl_matrix_get (tr, k1, k2);
						gdl_matrix_set (tr, k1, k2, x);
					}
				}
			}
		}
		// Normalize the proba...
		for (k1 = 0; k1 < pconf->k; k1++)
		{
			for (tot = k2 = 0; k2 < nconf->k; k2++)
			{
				tot += gdl_matrix_get (tr, k1, k2);
			}
			if (tot)
			{
				for (k2 = 0; k2 < nconf->k; k2++)
				{
					xx  = gdl_matrix_ptr (tr, k1, k2);
					(*xx) /= tot;
				}
			}
		}
		return tr;
	}
	else
	{
		gdl_matrix * t = gdl_matrix_alloc (1, 1);
		gdl_matrix_set (t, 0, 0, 1.0);
		return t;
	}
}
/*
 * Returns the transition matrix between sites l and l+1 computed using
 * the best path assignment (viterbi) at each site.
 */
gdl_matrix *
gdl_hstruct_hmm_get_viterbi_transition_matrix (gdl_hstruct_hmm * hmm, size_t l)
{
	if (hmm->k > 1)
	{
		size_t i, j, k, k1, k2, ni;
		double tot, u, x, w1, w2, * xx;
		gdl_matrix * tr;
		gdl_vector_uint * path;
		gdl_hstruct_config * pconf, * nconf;
	
		pconf = gdl_hstruct_hmm_get_locus_config (hmm, l);
		nconf = gdl_hstruct_hmm_get_locus_config (hmm, l+1);
		
		tr = gdl_matrix_calloc (pconf->k, nconf->k);
		
		for (i = 0; i < hmm->nc; i++)
		{
			for (j = 0; j < hmm->p; j++)
			{
				path = gdl_hstruct_hmm_get_ancestral_path (hmm, i, j);
				k1 = gdl_vector_uint_get (path, l);
				w1 = 1;
				if (l > 1)
				{
					for (k = 0; k < l; k++)
					{
						if (gdl_vector_uint_get (path, k) == k1)
						{
							w1+=1;
						}	
					}
					w1 /= (l-1);
				}
				k2 = gdl_vector_uint_get (path, l+1);
				w2 = 1;
				if (l < hmm->l - 2)
				{
					for (k = l+2; k < path->size; k++)
					{
						if (gdl_vector_uint_get (path, k) == k2)
						{
							w2+=1;
						}
					}
					w2 /= (path->size-l-2);
				}
				x = gdl_matrix_get (tr, k1, k2);
				gdl_matrix_set (tr, k1, k2, x+w1*w2*hmm->weights[i]);
				gdl_vector_uint_free (path);
			}
		}
		return tr;
	}
	else
	{
		gdl_matrix * t = gdl_matrix_alloc (1, 1);
		gdl_matrix_set (t, 0, 0, 1.0);
		return t;
	}
}
/*
 * Returns the average frequency of a given ancestral cluster from 
 * site 'from' to site 'to'.
 */
double
gdl_hstruct_hmm_get_ancestral_freq (gdl_hstruct_hmm * hmm, size_t k, size_t from, size_t to)
{
	if (hmm->k > 1)
	{
		size_t i, j, kk, l, ni;
		double tot, x, nl, f = 0;
		gdl_hstruct_config * conf;
		
		nl = (double)(to-from+1);
		
		for (i = 0; i < hmm->nc; i++)
		{
			for (j = 0; j < hmm->p; j++)
			{
				if (!hmm->abuf)
				{
					gdl_hstruct_hmm_forward (hmm, i, j);
					gdl_hstruct_hmm_backward (hmm, i, j);
				}
				for (l = from; l <= to; l++)
				{
					gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, l);
					if (k < conf->k)
					{
						for (tot = kk = 0; kk < conf->k; kk++)
						{
							tot += gdl_hstruct_hmm_get_alpha(hmm, i, j, l, kk)*gdl_hstruct_hmm_get_beta(hmm, i, j, l, kk);
						}
						x = gdl_hstruct_hmm_get_alpha(hmm, i, j, l, k)*gdl_hstruct_hmm_get_beta(hmm, i, j, l, k);
						x /= tot*nl;
						f += x*hmm->weights[i];
					}
				}				
			}
		}
		return f;
	}
	else
	{
		return 1.0;	
	}
}
/*
 * Computes and stores the forward-bacward variables for each
 * observed sequences.
 * 
 * see also gdl_hstruct_hmm_get_forward(), gdl_hstruct_hmm_get_backward()
 */
int
gdl_hstruct_hmm_store_fb_probs (gdl_hstruct_hmm * hmm)
{
	size_t i, j, k, l, ni;
	const gdl_hstruct_config * conf;
	
	
	if (!hmm->abuf)
	{
		hmm->abuf = gdl_block_alloc2 (4, hmm->nc, hmm->p, hmm->l, hmm->k);
	}
	if (!hmm->bbuf)
	{
		hmm->bbuf = gdl_block_alloc2 (4, hmm->nc, hmm->p, hmm->l, hmm->k);
	}
	
	for (i = 0; i < hmm->nc; i++)
	{
		for (j = 0; j < hmm->p; j++)
		{
			gdl_hstruct_hmm_forward (hmm, i, j);
			gdl_hstruct_hmm_backward (hmm, i, j);
			for (l = 0; l < hmm->l; l++)
			{
				conf = gdl_hstruct_hmm_get_locus_config (hmm, l);
				for (k = 0; k < conf->k; k++)
				{
					gdl_block_set (hmm->abuf, i, j, l, k, hmm->alpha[k][l]);
					gdl_block_set (hmm->bbuf, i, j, l, k, hmm->beta[k][l]);
				}
			}
		}
	}
	
	return GDL_SUCCESS;
}
/*
 * Clean the memory used by the storage of the forward
 * backward quantities
 * 
 * see gdl_hstruct_hmm_store_fb_probs
 */
void
gdl_hstruct_hmm_clean_fb_probs (gdl_hstruct_hmm * hmm)
{
	gdl_block_free (hmm->abuf);
	gdl_block_free (hmm->bbuf);
	hmm->abuf = hmm->bbuf = NULL;
}

gdl_gdistance *
gdl_hstruct_hmm_get_interval_length (const gdl_hstruct_hmm * hmm, size_t l)
{
	if ((l < hmm->l - 1) && hmm->dist[l] >= 0.0)
	{
			return gdl_chromosome_get_distance (hmm->chrom, hmm->lidx[l], hmm->lidx[l+1]);
	}
	else
	{
		return NULL;
	}
}

gdl_vector *
gdl_hstruct_hmm_get_interval_ancestral_proba (gdl_hstruct_hmm * hmm, size_t a, size_t p, size_t l, double pos)
{
	if (hmm->dist[l]==-1)
	{
		GDL_ERROR_VAL ("Try to get interval proba on an undefined interval", GDL_EINVAL, 0);
	}
	if (pos < 0.0 || pos > hmm->dist[l])
	{
		GDL_ERROR_VAL ("Try to get interval proba out of the interval range", GDL_EINVAL, 0);
	}
	
	size_t i, k, k1, k2;
	double u, pr, tot;
	gdl_vector * proba;
	const gdl_hstruct_config * nconf, * pconf;
	
	pconf = gdl_hstruct_hmm_get_locus_config (hmm, l);
	nconf = gdl_hstruct_hmm_get_locus_config (hmm, l+1);
	
	proba = gdl_vector_alloc (GDL_MAX(pconf->k, nconf->k));
	
	i = gdl_gview_wrapper_accession_cidx_c (hmm->data, a);
	
	if (!hmm->abuf)
	{
		gdl_hstruct_hmm_forward (hmm, i, p);
		gdl_hstruct_hmm_backward (hmm, i, p);
	}
	
	for (tot = k = 0; k < proba->size; k++)
	{
		pr = 0;
		
		for (k1 = 0; k1 < pconf->k; k1++)
		{
			for (k2 = 0; k2 < nconf->k; k2++)
			{
				u  = gdl_hstruct_hmm_get_alpha (hmm, i, p, l, k1) * gdl_hstruct_hmm_get_beta (hmm, i, p, l, k2);
				u *= gdl_hstruct_hmm_get_site_proba (hmm, k2, i, p, l+1);
				u *= gdl_hstruct_hmm_get_interval_transition_proba (hmm, pconf, nconf, k, k1, k2, l, pos);
				pr += u;
			}
		}
		
		gdl_vector_set (proba, k, pr);
		
		tot += pr;
	}
	
	if (tot)
	{
		gdl_vector_scale (proba, 1.0/tot);
	}

	return proba;
}

gdl_matrix *
gdl_hstruct_hmm_get_ancestral_intra_diveristy (const gdl_hstruct_hmm * hmm, gdl_vector_uint ** viterbi)
{
	size_t i, j, k, l, ni, nn;
	double x, f, * y;
	gdl_matrix * pi;
	
	pi = gdl_matrix_calloc (hmm->l+1, hmm->k);
	
	for (i = 0; i < hmm->nc; i++)
	{
		f = hmm->weights[i];
		for (l = 0; l < hmm->l; l++)
		{ 
			for (j = 0; j < hmm->p; j++)
			{
				k = gdl_vector_uint_get (viterbi [i * hmm->p + j], l);
				x = gdl_hstruct_hmm_get_site_ancestral (hmm, k, i, j, l);
				y = gdl_matrix_ptr (pi, 0, k);
				(*y) += f*(1.0-x)/(double)hmm->l;
				y = gdl_matrix_ptr (pi, l+1, k);
				(*y) += f*(1.0-x)/(double)hmm->l;
			}
		}
	}
	
	return pi;
}

static void
gdl_hstruct_hmm_mutation (const gdl_hstruct_hmm * hmm, size_t a, size_t c, size_t p, gdl_vector_uint * viterbi, gdl_vector_uint * mutation)
{
	size_t i, k;
	double x, y;
	const gdl_gvalues * gv;
	
	for (i = 0; i < hmm->l; i++)
	{
		x = gdl_hstruct_hmm_get_site_ancestral (hmm, gdl_vector_uint_get (viterbi, i), c, p, i);
		if (x == 1.)
		{
			continue;
		}
		// mutation
		else if (x == 0.)
		{
			gdl_gview_wrapper_get_allele (hmm->data, a, i, p, hmm->gbuf);
			gv = gdl_gvalues_get_gvalues (hmm->gbuf);
			gdl_vector_uint_set (mutation, i, gv->values[0]->idx+1);
		}
	}
}

gdl_matrix_uint *
gdl_hstruct_hmm_get_ancestral_mosaic (gdl_hstruct_hmm * hmm, size_t a, size_t p)
{
	size_t i, k, c;
	double x, y;
	const gdl_gvalues * gv;
	gdl_matrix_uint * v;
	
	c = gdl_gview_wrapper_accession_cidx_c (hmm->data, a);
	
	v = gdl_matrix_uint_calloc (hmm->l, 2);
	
	if (hmm->phi == 0)
	{
		hmm->phi = GDL_MATRIX_ALLOC (size_t, hmm->k, hmm->l);
	}
	if (hmm->delta == 0)
	{
		hmm->delta = GDL_MALLOC (double, hmm->k);
	}
	if (hmm->mdelta == 0)
	{
		hmm->mdelta = GDL_MALLOC (double, hmm->k);
	}
	
	gdl_vector_uint_view viterbi = gdl_matrix_uint_column (v, 0);
	
	gdl_hstruct_hmm_viterbi (hmm, c, p, hmm->delta, hmm->mdelta, hmm->phi, &(viterbi.vector));
	
	gdl_vector_uint_view mutation = gdl_matrix_uint_column (v, 1);
	
	gdl_hstruct_hmm_mutation (hmm, a, c, p, &(viterbi.vector), &(mutation.vector));
	
	return v;
}

double
gdl_hstruct_hmm_get_site_ancestral (const gdl_hstruct_hmm * hmm, size_t k, size_t c, size_t p, size_t l)
{
	size_t a, anc;
	
	anc = hmm->hget(hmm->hdb, k, l);
	
	a = gdl_allele_block_get (hmm->seq, c, l, p);
	
	if (a)
	{
		return ((a-1)==anc) ? 1.0 : 0.0;
	}
	else
	{
		const gdl_gvalues * gv;
	
		gdl_gview_wrapper_get_allele_c (hmm->data, c, l, p, hmm->gbuf);
		gv = gdl_gvalues_get_gvalues (hmm->gbuf);
		if (gv)
		{
			for (a = 0; a < gv->size; a++)
			{
				if (gv->values[a]->idx == anc)
				{
					return gv->values[a]->value;
				}
			}
		}
	}
	
	return 1.0;
}

double
gdl_hstruct_hmm_get_site_proba (const gdl_hstruct_hmm * hmm, size_t k, size_t c, size_t p, size_t l)
{
	size_t a, anc;
	double mu;
	const gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, l);
	
	mu  = gdl_hstruct_point_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_mu));
	anc = hmm->hget(hmm->hdb, k, l);
	a   = gdl_allele_block_get (hmm->seq, c, l, p);
	
	if (a)
	{
		return ((a-1)==anc) ? 1-mu : mu/(hmm->na[l]-1);
	}
	else
	{
		const gdl_gvalues * gv;
		double x = 0;
		
		gdl_gview_wrapper_get_allele_c (hmm->data, c, l, p, hmm->gbuf);
		gv = gdl_gvalues_get_gvalues (hmm->gbuf);
		if (gv)
		{
			for (a = 0; a < gv->size; a++)
			{
				if (gv->values[a]->idx == anc)
				{
					x = gv->values[a]->value;
					break;
				}
			}
			return x*(1.0-mu) + (1.0-x)*mu/(hmm->na[l]-1);
		}	
		else
		{
			return 1.0;
		}
	}
}

double
gdl_hstruct_hmm_backward_init (gdl_hstruct_hmm * hmm, size_t c, size_t p)
{
	size_t k, l = hmm->l-1;
	double x, y, pr = 0;
	gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, l);
	
	for(k = 0; k < conf->k; k++)
	{
		x = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l);
		y = (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
		hmm->beta[k][l] = 1.0;
		pr += x * y;
	}	
	
	return pr;
}

double
gdl_hstruct_hmm_forward_init (gdl_hstruct_hmm * hmm, size_t c, size_t p)
{
	size_t k;
	double x, y;
	gdl_hstruct_config * conf = gdl_hstruct_hmm_get_locus_config (hmm, 0);
	
	hmm->norm[0] = 0.;
	
	for(k = 0; k < conf->k; k++)
	{
		y  = (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
		x  = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, 0);
		hmm->alpha[k][0] = x * y;
		hmm->norm[0] += hmm->alpha[k][0];
	}
	for(k = 0; k < conf->k; k++)
	{
		hmm->alpha[k][0] /= hmm->norm[0];
	}
	
	hmm->log = log(hmm->norm[0]);
	
	return hmm->norm[0];
}

void
gdl_hstruct_hmm_backward_induc (gdl_hstruct_hmm * hmm, size_t c, size_t p, size_t l, double * pr)
{
	size_t k, kk;
	double x, t, u, v, s, w, tot = 0, ss = 0;
	gdl_hstruct_config * conf  = gdl_hstruct_hmm_get_locus_config (hmm, l);
	gdl_hstruct_config * pconf = gdl_hstruct_hmm_get_locus_config (hmm, l+1);
	
	t = gdl_hstruct_hmm_not_recomb_proba (hmm, conf, hmm->dist[l]);
	
	s = *pr;
	
	for (k = 0; k < conf->k; k++)
	{
		if (pconf->k == 1)
		{
			x  = hmm->beta[k][l+1];
			u  = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l+1);
			x *= u;
		}
		else if (k < pconf->k)
		{	
			// norec
			x  = hmm->beta[k][l+1];
			u  = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l+1);
			for (w = 0, kk = conf->k; kk < pconf->k; kk++)
			{
				v  = gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (pconf, gdl_hstruct_point_f), kk);
				v *= (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
				w += v;
			}
			x  *= (1.0-w);
			x  *= t;
			x  *= u;
			for (w = 0, kk = conf->k; kk < pconf->k; kk++)
			{
				v  = gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (pconf, gdl_hstruct_point_f), kk);
				v *= (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
				v *= gdl_hstruct_hmm_get_site_proba (hmm, kk, c, p, l+1);
				v *= hmm->beta[kk][l+1];
				w += v;
			}
			x += t*v;
			// rec
			x  += (1.0-t)*s;
		}
		else
		{
			x = s;
		}
		
		hmm->beta[k][l] = x/hmm->norm[l];
		
		u = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l);
		
		v = (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
		
		ss += u*v*hmm->beta[k][l];
	}	
	
	*pr = ss;
}

void
gdl_hstruct_hmm_forward_induc (gdl_hstruct_hmm * hmm, size_t c, size_t p, size_t l, double * pr)
{
	size_t k, kk;
	double x, t, u, v, w, s;
	gdl_hstruct_config * conf  = gdl_hstruct_hmm_get_locus_config (hmm, l);
	gdl_hstruct_config * pconf = gdl_hstruct_hmm_get_locus_config (hmm, l-1);
	
	t = gdl_hstruct_hmm_not_recomb_proba (hmm, pconf, hmm->dist[l-1]);
	
	s = *pr;
	
	hmm->norm[l] = 0.;
	
	for (k = 0; k < conf->k; k++)
	{
		u = (conf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k) : 1.0;
		
		if (conf->k == 1)
		{
			x = hmm->alpha[k][l-1];
		}
		else if (conf->k >= pconf->k && k < pconf->k)
		{
			x = hmm->alpha[k][l-1];
			for (w = 0, kk = pconf->k; kk < conf->k; kk++)
			{
				v  = gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), kk);
				v *= (pconf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (pconf, gdl_hstruct_point_f), k) : 1.0;
				w += v;
			}
			x *= (1.0-w);
			x *= t;
			x += (1.0-t)*s*u;
		}
		else if (conf->k > pconf->k)
		{
			for (w = 0, kk = 0; kk < pconf->k; kk++)
			{
				v = (pconf->k > 1) ? gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (pconf, gdl_hstruct_point_f), kk) : 1.0;
				w += u*v*t*hmm->alpha[kk][l-1];
			}
			x = w + (1.0-t)*s*u;
		}
		else 
		{
			x  = hmm->alpha[k][l-1];
			x *= t;
			for (kk = 0; kk < conf->k; kk++)
			{
				x += u*(1.0-t)*hmm->alpha[kk][l-1];
			}
			for (kk = conf->k; kk < pconf->k; kk++)
			{
				x += u*hmm->alpha[kk][l-1];
			}
		}
		
		u = gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l);
		
		x *= u;
		
		hmm->alpha[k][l] = x;
		
		hmm->norm[l] += hmm->alpha[k][l];
	}
	for (k = 0; k < conf->k; k++)
	{
		hmm->alpha[k][l] /= hmm->norm[l];
	}
	
	hmm->log += log(hmm->norm[l]);
	
	*pr = hmm->norm[l];
}

void
gdl_hstruct_hmm_backward (gdl_hstruct_hmm * hmm, size_t c, size_t p)
{
	size_t jj, j;
	double pr;
	
	pr = gdl_hstruct_hmm_backward_init (hmm, c, p);
	
	for (jj = hmm->l - 1; jj > 0; jj--)
	{
		j  = jj - 1;
		gdl_hstruct_hmm_backward_induc (hmm, c, p, j, &pr);
	}	
}

void
gdl_hstruct_hmm_forward (gdl_hstruct_hmm * hmm, size_t c, size_t p)
{
	size_t j;
	double pr;
	
	pr = gdl_hstruct_hmm_forward_init (hmm, c, p);
	
	for (j = 1; j < hmm->l; j++)
	{
		gdl_hstruct_hmm_forward_induc (hmm, c, p, j, &pr);
	}	
}

double
gdl_hstruct_hmm_viterbi (gdl_hstruct_hmm * hmm, size_t c, size_t p, double * d, double * m, size_t ** phi, gdl_vector_uint * v)
{
	size_t k, kk, l;
	double max, u;
	gdl_hstruct_config * conf, * nconf;
	
	// initialization
	conf  = gdl_hstruct_hmm_get_locus_config (hmm, 0);
	for (k = 0; k < conf->k; k++)
	{
		if (conf->k > 1) 
		{
			d[k] = log (gdl_hstruct_fpoint_value (gdl_hstruct_config_get_point (conf, gdl_hstruct_point_f), k));
		}
		else
		{
			d[k] = 0;
		}	
		d[k] += log (gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, 0));
	}
	// recursion
	for (l = 0; l < hmm->l-1; l++)
	{
		nconf = gdl_hstruct_hmm_get_locus_config (hmm, l+1);
		for (kk = 0; kk < nconf->k; kk++)
		{
			m[kk] = GDL_NEGINF; 
			for (k = 0; k < conf->k; k++)
			{
				u = gdl_hstruct_hmm_get_transition_proba (hmm, conf, nconf, k, kk, hmm->dist[l]);
				u = log (u) + d[k];
				if (u > m[kk])
				{
					m[kk]       = u;
					phi[kk][l]  = k;
				}
			}					
		}
		for (k = 0; k < nconf->k; k++)
		{
			d[k]  = m[k];
			d[k] += log (gdl_hstruct_hmm_get_site_proba (hmm, k, c, p, l+1));
		}
		conf = nconf;
	}
	// termination
	for (u = GDL_NEGINF, k = 0; k < conf->k; k++)
	{
		if (d[k] > u)
		{
			u = d[k];
			gdl_vector_uint_set (v, l, k);
		}
	}
	// backtracking
	for (k = hmm->l - 1; k > 0; k--)
	{
		l  = k - 1;
		kk = gdl_vector_uint_get (v, l+1);
		gdl_vector_uint_set (v, l, phi[kk][l]);
	}
	
	return u;
}

double
gdl_hstruct_hmm_get_alpha (const gdl_hstruct_hmm * hmm, size_t i, size_t j, size_t l, size_t k)
{
	return (hmm->abuf) ? gdl_block_get (hmm->abuf,i,j,l,k) : hmm->alpha[k][l];
}

double
gdl_hstruct_hmm_get_beta (const gdl_hstruct_hmm * hmm, size_t i, size_t j, size_t l, size_t k)
{
	return (hmm->bbuf) ? gdl_block_get (hmm->bbuf,i,j,l,k) : hmm->beta[k][l];
}

