/*  
 * 	hstruct/hstruct.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:44 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_hstruct_fuzzy.h>
#include <gdl/gdl_hstruct.h>

void gdl_hstruct_hotspot_list_add (gdl_hstruct_hotspot_list * l, const gdl_hstruct_model_partition  * p, size_t from, size_t to);

struct _gdl_hstruct_workspace
{
	size_t step;
	gdl_hstruct_model * model;
  	double res_abs;
  	double res_sq;
  	FILE * logger;
};

gdl_hstruct_workspace *
gdl_hstruct_workspace_alloc (gdl_hstruct_model * model)
{
	size_t i;
	gdl_hstruct_workspace * sw;

	sw = GDL_CALLOC (gdl_hstruct_workspace, 1);
	
	sw->model  = model;
	
	return sw;
}

void
gdl_hstruct_workspace_free (gdl_hstruct_workspace * w )
{
	if (w)
	{
		gdl_hstruct_model_free (w->model);
		GDL_FREE (w);
	}
}

gdl_hstruct_model *
gdl_hstruct_workspace_model (const gdl_hstruct_workspace * w )
{
	return w->model;	
}

size_t
gdl_hstruct_workspace_chromosome_size (const gdl_hstruct_workspace * w)
{
	return gdl_hstruct_model_size (w->model);
}

size_t
gdl_hstruct_workspace_partition_size (const gdl_hstruct_workspace * w, size_t c)
{
	gdl_hstruct_model_chromosome * cm = gdl_hstruct_model_get_chromosome  (w->model, c);
	return gdl_hstruct_model_chromosome_size (cm);
}

int
gdl_hstruct_workspace_fuzzy_init (gdl_hstruct_workspace * w, size_t c, size_t p)
{
	gdl_hstruct_model_chromosome * cm;
	gdl_hstruct_model_partition  * pm;
	gdl_hstruct_fuzzy * f;
	
	cm = gdl_hstruct_model_get_chromosome (w->model, c);
	pm = gdl_hstruct_model_chromosome_get_partition (cm, p);
	f  = gdl_hstruct_model_partition_get_fuzzy (pm);
	
	return gdl_hstruct_fuzzy_init (f);
}

int
gdl_hstruct_workspace_fuzzy_iterate (gdl_hstruct_workspace * w, size_t c, size_t p)
{
	gdl_hstruct_model_chromosome * cm;
	gdl_hstruct_model_partition  * pm;
	gdl_hstruct_fuzzy * f;
	
	w->step = 0;
	
	cm = gdl_hstruct_model_get_chromosome (w->model, c);
	pm = gdl_hstruct_model_chromosome_get_partition (cm, p);
	f  = gdl_hstruct_model_partition_get_fuzzy (pm);
				
	gdl_hstruct_fuzzy_iterate (f);
	
	w->res_abs = gdl_hstruct_fuzzy_residual_abs (f);
  	w->res_sq  = gdl_hstruct_fuzzy_residual_sq (f);
		
	return GDL_SUCCESS;
}

int
gdl_hstruct_workspace_fuzzy2hmm (gdl_hstruct_workspace * w)
{
	return gdl_hstruct_model_fuzzy2hmm (w->model);
}

int
gdl_hstruct_workspace_hmm_iterate (gdl_hstruct_workspace * work)
{
	int status;
	
	work->res_abs = work->res_sq = 0;
	
	status = gdl_hstruct_workspace_hmm_iterate_point (work, gdl_hstruct_point_rho, &(work->res_abs), &(work->res_sq));
	status = gdl_hstruct_workspace_hmm_iterate_point (work, gdl_hstruct_point_hot, &(work->res_abs), &(work->res_sq));
	status = gdl_hstruct_workspace_hmm_iterate_point (work, gdl_hstruct_point_mu, &(work->res_abs), &(work->res_sq));
	status = gdl_hstruct_workspace_hmm_iterate_point (work, gdl_hstruct_point_f, &(work->res_abs), &(work->res_sq));
	
	return status;
}

//static int
//gdl_hstruct_workspace_hmm_update_point (gdl_hstruct_workspace * w, const gdl_hstruct_point_type * T, double * abs, double * sq)
//{
//	size_t nres = 0;
//	double res_abs=0, res_sq=0;
//	gdl_hstruct_parameter_itr * itr;
//	
//	w->step = 1;
//	
//	itr = gdl_hstruct_model_parameter_iterate (w->model, T);
//	if (itr)
//	{
//		do
//		{
//			void * point = gdl_hstruct_parameter_iterate_point (itr);
//			gdl_hstruct_point_update2 (point, T, &res_abs, &res_sq);
//			nres++;
//		}
//		while (gdl_hstruct_parameter_iterate_next (itr));
//		gdl_hstruct_parameter_iterate_free (itr);
//		if (nres)
//		{
//			res_abs /= nres;
//			res_sq  /= nres;
//			//printf ("POINT %s %g %g\n", T->name, res_abs, res_sq);
//			*abs += res_abs;
//			*sq  += res_sq;
//		}
//	}
//	
////	if (w->logger)
////	{
////		fprintf (w->logger, "LOG(%s)::%.1f\n", T->name, gdl_hstruct_workspace_loglikelihood (w));
////	}
//	
//	return GDL_SUCCESS;
//}
//
//int
//gdl_hstruct_workspace_hmm_iterate_em (gdl_hstruct_workspace * w)
//{
//	size_t i, j, nc, np;
//	gdl_boolean up;
//	
//	// E - step...
//	nc = gdl_hstruct_model_size (w->model);
//	for (i = 0; i < nc; i++)
//	{
//		chrom = gdl_hstruct_model_get_chromosome (w->model, i);
//		np    = gdl_hstruct_model_chromosome_size (chrom);
//		for (j = 0; j < np; j++)
//		{
//			part = gdl_hstruct_model_chromosome_get_partition (chrom, j);
//			hmm  = gdl_hstruct_model_partition_get_hmm (part);
//			gdl_hstruct_hmm_fb_update (hmm, &up);
//		}
//	}
//	// M - step
//	w->res_abs = w->res_sq = 0;
//	gdl_hstruct_workspace_hmm_update_point (w, gdl_hstruct_point_rho, &(w->res_abs), &(w->res_sq));
//	gdl_hstruct_workspace_hmm_update_point (w, gdl_hstruct_point_hot, &(w->res_abs), &(w->res_sq));
//	gdl_hstruct_workspace_hmm_update_point (w, gdl_hstruct_point_mu, &(w->res_abs), &(w->res_sq));
//	gdl_hstruct_workspace_hmm_update_point (w, gdl_hstruct_point_f, &(w->res_abs), &(w->res_sq));
//	
//	return GDL_SUCCESS;
//}

int
gdl_hstruct_workspace_hmm_iterate_point (gdl_hstruct_workspace * w, const gdl_hstruct_point_type * T, double * abs, double * sq)
{
	size_t nres = 0;
	double res_abs=0, res_sq=0;
	gdl_hstruct_parameter_itr * itr;
	
	w->step = 1;
	
	itr = gdl_hstruct_model_parameter_iterate (w->model, T);
	if (itr)
	{
		do
		{
			void * point = gdl_hstruct_parameter_iterate_point (itr);
			gdl_hstruct_model_golden_parameter (w->model, T, point, &res_abs, &res_sq);
			nres++;
		}
		while (gdl_hstruct_parameter_iterate_next (itr));
		gdl_hstruct_parameter_iterate_free (itr);
		if (nres)
		{
			res_abs /= nres;
			res_sq  /= nres;
			//printf ("POINT %s %g %g\n", T->name, res_abs, res_sq);
			*abs += res_abs;
			*sq  += res_sq;
		}
	}
	
//	if (w->logger)
//	{
//		fprintf (w->logger, "LOG(%s)::%.1f\n", T->name, gdl_hstruct_workspace_loglikelihood (w));
//	}
	
	return GDL_SUCCESS;
}

int
gdl_hstruct_workspace_hmm_imputation (gdl_hstruct_workspace * w)
{
	size_t i, j, nc, np;
	double abs_res, sq_res;
	gdl_hstruct_model_chromosome * chrom;
	gdl_hstruct_model_partition  * part;
	gdl_hstruct_hmm * hmm;
	
	w->res_abs = w->res_sq = 0;
	
	nc = gdl_hstruct_model_size (w->model);
	for (i = 0; i < nc; i++)
	{
		chrom = gdl_hstruct_model_get_chromosome (w->model, i);
		np    = gdl_hstruct_model_chromosome_size (chrom);
		for (j = 0; j < np; j++)
		{
			if (w->logger)
			{
				fprintf (w->logger, "--\n");
				fprintf (w->logger, "Impute missing data for partition [ %d ] in chromosome [ %d ]\n", j+1, i+1);	
				fprintf (w->logger, "--\n");
			}
			part = gdl_hstruct_model_chromosome_get_partition (chrom, j);
			hmm  = gdl_hstruct_model_partition_get_hmm (part);
			gdl_hstruct_hmm_missing_update (hmm, &abs_res, &sq_res);
			if (w->logger)
			{
				fprintf (w->logger, "\tAbs. Residual %e\n", abs_res);
				fprintf (w->logger, "\tSq. Residual %e\n", sq_res);
			}
			w->res_abs += abs_res/(np*nc);
			w->res_sq += abs_res/(np*nc);
		}
	}
	
	return GDL_SUCCESS;
}

int
gdl_hstruct_workspace_hmm_maximize (gdl_hstruct_workspace * w, double epsilon, size_t max_iter, gdl_boolean imputation)
{
	int status;
	double tol;
	size_t iter = 0;
	
	if (w->logger)
	{
		fprintf (w->logger, "--\n");
		fprintf (w->logger, "Start HMM Maximization\n");
		fprintf (w->logger, "Max number of iterations %d\n", max_iter);
		fprintf (w->logger, "Convergence threshold    %e\n", epsilon);
		fprintf (w->logger, "--\n");
		fprintf (w->logger, "Iteration\tLoglikelihood\tAbs. residual\tSq. residual\n");
	}
	
	do
	{
		iter++;
		
		status = gdl_hstruct_workspace_hmm_iterate (w);
		
		tol = gdl_hstruct_workspace_residual_abs (w);
				
		if (tol < epsilon)
		{
			status = GDL_SUCCESS;
		}
		else if (iter < max_iter)
		{
			status = GDL_CONTINUE;	
		}
		else
		{
			status = GDL_EMAXITER;	
		}
		
		if (w->logger)
		{
			double log, tol2;
			
			log  = gdl_hstruct_workspace_loglikelihood (w);
			
			tol2 = gdl_hstruct_workspace_residual_sq (w);
			
			fprintf (w->logger, "%d\t%.1f\t%e\t%e\n", iter, log, tol, tol2);
			
			fflush (stdout);					
		}
		
		if (imputation)
		{
			gdl_hstruct_workspace_hmm_imputation (w);
		}		
		
	} while (status == GDL_CONTINUE);
	
	if (status == GDL_EMAXITER && w->logger)
	{
		fprintf (w->logger, "WARNING : reaches maximum number of iterations\n");
	}
	
	return status;
}

static void
_gdl_hstruct_workspace_set_point_CI (gdl_hstruct_workspace * w, const gdl_hstruct_point_type * T, double penality)
{
	gdl_hstruct_parameter_itr * itr;
	
	itr = gdl_hstruct_model_parameter_iterate (w->model, T);
	
	if (itr)
	{
		do
		{
			void * point = gdl_hstruct_parameter_iterate_point (itr);
			if (T == gdl_hstruct_point_f)
			{
				size_t k;
				gdl_hstruct_fpoint * f = (gdl_hstruct_fpoint *) point;
				for (k = 0; k < gdl_hstruct_fpoint_size (f); k++)
				{
					printf ("FPOINT CI (%g) %d %g\n", penality, k, gdl_hstruct_fpoint_value (f, k));
				}
			}
			else
			{
				gdl_hstruct_point * p = (gdl_hstruct_point *) point;
				printf ("POINT CI (%g) %g\n", penality, gdl_hstruct_point_value (p));
			}
		}
		while (gdl_hstruct_parameter_iterate_next (itr));
		gdl_hstruct_parameter_iterate_free (itr);
	}
}

//int
//gdl_hstruct_workspace_compute_confidence_interval (gdl_hstruct_workspace * w, double epsilon, size_t max_iter)
//{
//	if (w->logger)
//	{
//		fprintf (w->logger, "--\n");
//		fprintf (w->logger, "Compute Confidence Intervals\n");
//		fprintf (w->logger, "--\n");
//		fprintf (w->logger, "\tBackup Parameter Estimates\n");
//	}
//	
//	gdl_hstruct_model_backup_points (w->model);
//	
//	
//	
//	if (w->logger)
//	{
//		fprintf (w->logger, "\tRestore Parameter Estimates\n");
//	}
//	
//	gdl_hstruct_model_restore_points (w->model);
//}

int
gdl_hstruct_workspace_ligation (gdl_hstruct_workspace * w, const gdl_gligation_type * T)
{
	return gdl_hstruct_model_ligation (w->model, T);
}

double
gdl_hstruct_workspace_loglikelihood (const gdl_hstruct_workspace * w)
{
	return gdl_hstruct_model_loglikelihood (w->model);
}

double
gdl_hstruct_workspace_residual_sq (const gdl_hstruct_workspace * w)
{
	return w->res_sq;
}

double
gdl_hstruct_workspace_residual_abs (const gdl_hstruct_workspace * w)
{
	return w->res_abs;
}

double
gdl_hstruct_workspace_criterion (const gdl_hstruct_workspace * w, const gdl_hstruct_criterion_type * T)
{
	return gdl_hstruct_criterion_eval (T, w->model);
}


int
gdl_hstruct_workspace_hotrec_search (gdl_hstruct_workspace * w, const gdl_hstruct_criterion_type * C, size_t c, size_t p, gdl_hstruct_hotspot_list * hotspots)
{
	size_t i, j, nl, nh, nhot=0;
	double t1, t2, u, v, old;
	gdl_hstruct_model_chromosome * cm;
	gdl_hstruct_model_partition  * pm;
	gdl_hstruct_point_handler    * hot;
	gdl_hstruct_point            * hot_point;
	gdl_hstruct_config_buffer    * search;
	gdl_hstruct_config_block_itr * itr;
	gdl_hstruct_config_template  template;
	
	cm = gdl_hstruct_model_get_chromosome (w->model, c);
	pm = gdl_hstruct_model_chromosome_get_partition (cm, p);
	
	template.rho = gdl_false;
	template.mu  = gdl_false;
	template.f   = gdl_false;
	template.k   = gdl_true;
	template.hot = gdl_true;
	
	itr = gdl_hstruct_model_partition_config_block_iterator (pm, &template);
	
	do
	{
		gdl_hstruct_config_block * b = gdl_hstruct_config_block_iterator_get (itr);
		
		if (!gdl_hstruct_config_block_has_hotspot (b) && gdl_hstruct_config_block_ancestral_size (b) > 1)
		{
			 size_t from = gdl_hstruct_config_block_from (b);
		 	 size_t to   = gdl_hstruct_config_block_to (b);
			 
			 gdl_hstruct_model_backup_partition_points (w->model, pm);
			 
			 if (w->logger)
			 {
			 	 fprintf (w->logger, "--\n");
			 	 fprintf (w->logger, "Try to add hotspot in partition block from locus [ %d ] to [ %d ]\n", from, to+1);
			 	 fprintf (w->logger, "--\n");
			 }
			 
			 hot       = gdl_hstruct_model_new_handler (w->model, gdl_hstruct_point_hot, NULL);
			 hot_point = gdl_hstruct_point_handler_point (hot);
			 gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-2);
			 search = gdl_hstruct_model_new_buffer (w->model, pm);
			 gdl_hstruct_config_buffer_set (search, gdl_hstruct_point_hot, gdl_hstruct_point_handler_point_ptr (hot));
			 
			 u = gdl_hstruct_criterion_eval (C, w->model);
			 
		 	for (nh = 0, i = from; i <= to; i++)
			{
				if (!nh)
				{
					gdl_hstruct_point_update (hot_point, 10);
				}
				
				gdl_hstruct_model_update (w->model, pm, search, i);
				
				gdl_hstruct_model_golden_parameter (w->model, gdl_hstruct_point_hot, hot_point, &t1, &t2);
				
				v = gdl_hstruct_criterion_eval (C, w->model);
				
				if (w->logger)
				{
					fprintf (w->logger, "\tTest hotspot at locus [ %d ]\n", i);
				}
				if (v < u)
				{
					nh++;
					u = v;
					old = gdl_hstruct_point_value (hot_point);
				}
				else
				{
					gdl_hstruct_model_restore (w->model, pm, search, i);
					if (nh)
					{
						nhot++;
						if (w->logger)
						{
							fprintf (w->logger, "\tAdd hotspot from locus [ %d ] to [ %d ] with intensity [ %.1f ]\n", i-nh, i, old);
						}
						gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-5);
						gdl_hstruct_point_update (hot_point, old);
						hot = gdl_hstruct_model_new_handler (w->model, gdl_hstruct_point_hot, NULL);
						hot_point = gdl_hstruct_point_handler_point (hot);
						gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-2);
						gdl_hstruct_config_buffer_realloc (search);
						gdl_hstruct_config_buffer_set (search, gdl_hstruct_point_hot, gdl_hstruct_point_handler_point_ptr (hot));
						if (hotspots)
						{
							gdl_hstruct_hotspot_list_add (hotspots, pm, i-nh, i);
						}
					}
					nh=0;
				}		
			}
			if (nh)
			{
				nhot++;
				if (hotspots)
				{
					gdl_hstruct_hotspot_list_add (hotspots, pm, i-nh, i);
				}
				gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-5);
				if (w->logger)
				{
					fprintf (w->logger, "\tAdd hotspot from locus [ %d ] to [ %d ] with intensity [ %.1f ]\n", i-nh, i, gdl_hstruct_point_value (hot_point));
				}
			}
			else if (!nh)
			{
				gdl_hstruct_model_clean_handler	(w->model, hot);
			}
			gdl_hstruct_config_buffer_free (search);
		 }
	}
	while (gdl_hstruct_config_block_iterator_next (itr, gdl_true));
	
	gdl_hstruct_config_block_iterator_free (itr);
	
	return nhot;
}

int
gdl_hstruct_workspace_hotrec_fast_search (gdl_hstruct_workspace * w, const gdl_hstruct_criterion_type * C, size_t c, size_t p, gdl_hstruct_hotspot_list * hotspots, const double q, const double f)
{
	size_t i, j, nl, nh, nhot=0;
	double t1, t2, u, v, old;
	gdl_hstruct_model_chromosome * cm;
	gdl_hstruct_model_partition  * pm;
	gdl_hstruct_point_handler    * hot;
	gdl_hstruct_point            * hot_point;
	gdl_hstruct_config_buffer    * search;
	gdl_hstruct_config_block_itr * itr;
	gdl_hstruct_config_template  template;
	gdl_vector * proba;
	
	cm = gdl_hstruct_model_get_chromosome (w->model, c);
	pm = gdl_hstruct_model_chromosome_get_partition (cm, p);
	
	proba = gdl_hstruct_model_partition_cumul_transition_proba (pm);
	
	template.rho = gdl_false;
	template.mu  = gdl_false;
	template.f   = gdl_false;
	template.k   = gdl_true;	
	template.hot = gdl_true;
	
	itr = gdl_hstruct_model_partition_config_block_iterator (pm, &template);
	
	do
	{
		gdl_hstruct_config_block * b = gdl_hstruct_config_block_iterator_get (itr);
		
		if (!gdl_hstruct_config_block_has_hotspot (b) && gdl_hstruct_config_block_ancestral_size (b) > 1)
		{
			 size_t from = gdl_hstruct_config_block_from (b);
		 	 size_t to   = gdl_hstruct_config_block_to (b);
		 	 double * data;
		 	 size_t * outlier;
		 	 size_t nout;
		 	 
		 	 data    = GDL_MALLOC (double, to-from+1);
		 	 outlier = GDL_MALLOC (size_t, to-from+1);
		 	 
		 	 for (i = from; i <= to; i++)
			 {
				 data[i-from] = gdl_vector_get (proba, i);
			 }
		 	 
		 	 nout = gdl_stats_outlier_one_sided (outlier, data, 1, to-from+1, q, f);
		 	 
		 	 GDL_FREE (data);
		 	 
		 	 if (nout)
		 	 {
		 	 	 size_t * idx;
		 	 	 
		 	 	 idx = GDL_MALLOC (size_t, nout);
		 	 	 
		 	 	 for (j = i = 0; i < to-from+1; i++)
		 	 	 {
		 	 	 	 if (outlier[i])
		 	 	 	 {
		 	 	 	 	 idx[j++]=from+i;
		 	 	 	 }
		 	 	 }
		 	 	
			 	 gdl_hstruct_model_backup_partition_points (w->model, pm);
				 
				 if (w->logger)
				 {
				 	 fprintf (w->logger, "--\n");
				 	 fprintf (w->logger, "Try to add hotspot in partition block from locus [ %d ] to [ %d ]\n", from, to+1);
				 	 fprintf (w->logger, "--\n");
				 }
				 
				 hot       = gdl_hstruct_model_new_handler (w->model, gdl_hstruct_point_hot, NULL);
				 hot_point = gdl_hstruct_point_handler_point (hot);
				 gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-2);
				 search = gdl_hstruct_model_new_buffer (w->model, pm);
				 gdl_hstruct_config_buffer_set (search, gdl_hstruct_point_hot, gdl_hstruct_point_handler_point_ptr (hot));
				 
				 u = gdl_hstruct_criterion_eval (C, w->model);
				 
					for (nh = 0, i = 0; i < nout; i++)
					{
						if (!nh)
						{
							gdl_hstruct_point_update (hot_point, 10);
						}
						
						if (w->logger)
						{
							fprintf (w->logger, "\tTest hotspot at locus [ %d ]\n", idx[i]);
						}
						
						gdl_hstruct_model_update (w->model, pm, search, idx[i]);
						
						gdl_hstruct_model_golden_parameter (w->model, gdl_hstruct_point_hot, hot_point, &t1, &t2);
						
						v = gdl_hstruct_criterion_eval (C, w->model);
						
						if (v < u)
						{
							if (i < nout - 1 && idx[i+1] == idx[i]+1)
							{
								nh++;
								u = v;
								old = gdl_hstruct_point_value (hot_point);
							}
							else
							{
								nhot++;
								if (w->logger)
								{
									fprintf (w->logger, "\tAdd hotspot from locus [ %d ] to [ %d ] with intensity [ %.1f ]\n", idx[i-nh], idx[i], gdl_hstruct_point_value (hot_point));
								}
								gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-5);
								hot = gdl_hstruct_model_new_handler (w->model, gdl_hstruct_point_hot, NULL);
								hot_point = gdl_hstruct_point_handler_point (hot);
								gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-2);
								gdl_hstruct_config_buffer_realloc (search);
								gdl_hstruct_config_buffer_set (search, gdl_hstruct_point_hot, gdl_hstruct_point_handler_point_ptr (hot));
								if (hotspots)
								{
									gdl_hstruct_hotspot_list_add (hotspots, pm, idx[i-nh], idx[i]);
								}
								nh=0;
							}
						}
						else
						{
							gdl_hstruct_model_restore (w->model, pm, search, idx[i]);
							if (nh)
							{
								nhot++;
								if (w->logger)
								{
									fprintf (w->logger, "\tAdd hotspot from locus [ %d ] to [ %d ] with intensity [ %.1f ]\n", idx[i-nh], idx[i], old);
								}
								gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-5);
								gdl_hstruct_point_update (hot_point, old);
								hot = gdl_hstruct_model_new_handler (w->model, gdl_hstruct_point_hot, NULL);
								hot_point = gdl_hstruct_point_handler_point (hot);
								gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-2);
								gdl_hstruct_config_buffer_realloc (search);
								gdl_hstruct_config_buffer_set (search, gdl_hstruct_point_hot, gdl_hstruct_point_handler_point_ptr (hot));
								if (hotspots)
								{
									gdl_hstruct_hotspot_list_add (hotspots, pm, idx[i-nh], idx[i]);
								}
							}
							nh=0;
						}		
					}
					if (nh)
					{
						nhot++;
						if (hotspots)
						{
							gdl_hstruct_hotspot_list_add (hotspots, pm, idx[i-nh], idx[i]);
						}
						gdl_hstruct_point_set_golden_tolerance (hot_point, 1.e-5);
						if (w->logger)
						{
							fprintf (w->logger, "\tAdd hotspot from locus [ %d ] to [ %d ] with intensity [ %.1f ]\n", idx[i-nh], idx[i], gdl_hstruct_point_value (hot_point));
						}
					}
					else if (!nh)
					{
						gdl_hstruct_model_clean_handler	(w->model, hot);
					}
					gdl_hstruct_config_buffer_free (search);
					
					GDL_FREE (idx);
		 	 }
		 	 
		 	 GDL_FREE (outlier);
			
		 }
	}
	while (gdl_hstruct_config_block_iterator_next (itr, gdl_true));
	
	gdl_hstruct_config_block_iterator_free (itr);
	
	gdl_vector_free (proba);
	
	return nhot;
}

static void
_gdl_hstruct_workspace_set_hotspot_eligible (gdl_hstruct_workspace * w, gdl_hstruct_model_partition  * part, gdl_boolean e)
{
	size_t i, from, to;
	gdl_hstruct_config_block * b;
	gdl_hstruct_config * c;
	gdl_hstruct_config_block_itr * itr;
	gdl_hstruct_config_template template;
	
	template.rho = gdl_false;
	template.mu  = gdl_false;
	template.f   = gdl_false;
	template.k   = gdl_true;
	template.hot = gdl_true;
	
	itr  = gdl_hstruct_model_partition_config_block_iterator (part, &template);
	
	do
	{
		b = gdl_hstruct_config_block_iterator_get (itr);
		if (gdl_hstruct_config_block_has_hotspot (b))
		{
			gdl_hstruct_config_block_set_point_eligible (b, gdl_hstruct_point_hot, e);
		}		
	}
	while (gdl_hstruct_config_block_iterator_next (itr, gdl_true));
	
	gdl_hstruct_config_block_iterator_free (itr);
}

int
gdl_hstruct_workspace_hotrec_check (gdl_hstruct_workspace * w, const gdl_hstruct_criterion_type * C, size_t c, size_t p, double tol, size_t max_iter, gdl_hstruct_hotspot_list * hotspots)
{
	size_t iter, rm = 0;
	double res_abs, res_sq, ocrit, crit;
	gdl_hstruct_model_chromosome * cm;
	gdl_hstruct_model_partition  * pm;
	gdl_hstruct_point_handler    * handler;
	gdl_hstruct_config_block_itr * itr;
	gdl_hstruct_config_template  template;
	
	cm = gdl_hstruct_model_get_chromosome (w->model, c);
	pm = gdl_hstruct_model_chromosome_get_partition (cm, p);
	
	//_gdl_hstruct_workspace_set_hotspot_eligible (w, pm, gdl_false);
	
	template.rho = gdl_false;
	template.mu  = gdl_false;
	template.f   = gdl_false;
	template.k   = gdl_true;
	template.hot = gdl_true;
	
	itr = gdl_hstruct_model_partition_config_block_iterator (pm, &template);
	
	do
	{
		gdl_hstruct_config_block * b = gdl_hstruct_config_block_iterator_get (itr);
		
		//u = gdl_hstruct_criterion_eval (C, w->model);
		
		if (gdl_hstruct_config_block_has_hotspot (b))
		{
			 size_t from = gdl_hstruct_config_block_from (b);
		 	 size_t to   = gdl_hstruct_config_block_to (b);
			 
			 ocrit = gdl_hstruct_criterion_eval (C, w->model);
			 
			 gdl_hstruct_model_backup_partition_points (w->model, pm);
			 
			 if (w->logger)
			 {
			 	 fprintf (w->logger, "--\n");
			 	 fprintf (w->logger, "Try to remove hotspot in partition block from locus [ %d ] to [ %d ]\n", from, to+1);
			 	 fprintf (w->logger, "--\n");
			 	 fprintf (w->logger, "\tRe-estimate the parameters of the partition\n");
				 fprintf (w->logger, "\tIteration\tAbs. Residual\tSq. Residual\n");		 	 
			 }
			 
			 handler = gdl_hstruct_model_remove_hotspot_block (w->model, pm, b);
			 
			 iter=0;
			 do
			 {
			 	iter++;
			 	gdl_hstruct_model_golden_partition (w->model, pm, &res_abs, &res_sq);
			 	if (w->logger)
				{
					fprintf (w->logger, "\t%d\t%e\t%e\n", iter, res_abs, res_sq);
				}
			 } while (res_sq > tol && iter < max_iter);
			 
			 crit = gdl_hstruct_criterion_eval (C, w->model);
			 
			 if (w->logger)
			 {
				 fprintf (w->logger, "\tNew criterion [ %g ] (old = %g)\n\n", crit, ocrit);
			 }
			 
			 if (crit > ocrit)
			 {
			 	if (w->logger)
				{
					fprintf (w->logger, "\tKEEP hotspot from locus [ %d ] to [ %d ]\n", from, to+1);
					fprintf (w->logger, "\tInitial parameter estimates has been restored\n");
				}
			 	gdl_hstruct_model_restore_hotspot_block (w->model, pm, b);
			   gdl_hstruct_model_restore_partition_points (w->model, pm);
			   GDL_FREE (handler);
			 }
			 else
			 {
			 	if (w->logger)
				{
					fprintf (w->logger, "\tREMOVE hotspot from locus [ %d ] to [ %d ]\n", from, to+1);
					fprintf (w->logger, "\tNew parameter estimates are conserved\n");
				}
				rm++;
				if (hotspots)
				{
					gdl_hstruct_hotspot_list_add (hotspots, pm, from, to+1);
				}
			 	gdl_hstruct_model_clean_handler (w->model, handler);
			 }
		}
	}
	while (gdl_hstruct_config_block_iterator_next (itr, gdl_true));
	
	gdl_hstruct_config_block_iterator_free (itr);
	
	//_gdl_hstruct_workspace_set_hotspot_eligible (w, pm, gdl_true);
	
	if (rm)
	{
		gdl_hstruct_model_check_config_block (w->model, pm);
	}
	
	return rm;
}

static int
_collapse_block_ancestral (gdl_hstruct_workspace * w,
                           gdl_hstruct_model_partition  * part,
                           gdl_hstruct_config_block * b,
                           const gdl_hstruct_criterion_type * C,
                           const double tol,
                           const size_t max_iter,
                           const size_t min_length)
{
	int status;
	size_t k, nk, nnk, from, to, iter, * idx;
	double crit, ocrit, res_sq, res_abs, * q;
	gdl_hstruct_config_buffer * buffer;
	gdl_hstruct_point_handler * handler;
	
	nk = gdl_hstruct_config_block_ancestral_size (b);
	
	if (nk == 1)
	{
		return GDL_SUCCESS;
	}
	
	from = gdl_hstruct_config_block_from (b);
	to   = gdl_hstruct_config_block_to (b);
	
	if (to-from+1 <= min_length)
	{
		return GDL_SUCCESS;	
	}
	
	ocrit = gdl_hstruct_criterion_eval (C, w->model);
		
	if (w->logger)
	{
		fprintf (w->logger, "--\n");
		fprintf (w->logger, "Try to collapse ancestral haplotypes from locus %d to locus %d (K = %d)\n", from, to, nk);
		fprintf (w->logger, "--\n");
	}
	// compute the frequency of ancestral haplotypes into the block
	q   = GDL_MALLOC (double, nk);
	idx = GDL_MALLOC (size_t, nk);
	
	for (k = 0; k < nk; k++)
	{
		q[k] = gdl_hstruct_model_partition_ancestral_freq (part, k, from, to);
	}
	// sort the frequencies
	gdl_sort_index (idx, q, 1, nk);
	
	if (w->logger)
	{
		fprintf (w->logger, "\tBackup previous parameter point estimates\n");
	}
	
	gdl_hstruct_model_backup_partition_points (w->model, part);
	
	handler = gdl_hstruct_model_collapse_ancestral_block (w->model, part, b, idx[0]);
   
   if (w->logger)
	{
		fprintf (w->logger, "\tRemove ancestral haplotype [ %d ] with the lowest frequency [ %1.4f ]\n", idx[0], q[idx[0]]);
		fprintf (w->logger, "\tRe-estimate the parameters of the partition\n");
		fprintf (w->logger, "\tIteration\tAbs. Residual\tSq. Residual\n");
	}
   
   iter = 0;
   
    do
    {
    	iter++;
    	gdl_hstruct_model_golden_partition (w->model, part, &res_abs, &res_sq);
    	if (w->logger)
		{
			fprintf (w->logger, "\t%d\t%e\t%e\n", iter, res_abs, res_sq);
		}
    }
    while (res_sq > tol && iter < max_iter);
	
	crit = gdl_hstruct_criterion_eval (C, w->model);
	
	if (w->logger)
	{
		fprintf (w->logger, "\tNew criterion [ %g ] (old = %g)\n\n", crit, ocrit);
	}
	
	if (crit > ocrit)
	{
		gdl_hstruct_model_restore_ancestral_block (w->model, part, b, idx[0]);
      gdl_hstruct_model_restore_partition_points (w->model, part);
		gdl_hstruct_model_clean_handler (w->model, handler);
		if (w->logger)
		{
			fprintf (w->logger, "\tKEEP old ancestral configuration\n");
			fprintf (w->logger, "\tInitial parameter estimates has been restored\n");
		}
		status = GDL_SUCCESS;
	}
	else
	{
		gdl_hstruct_model_config_block_update (w->model, b);
		if (w->logger)
		{
			fprintf (w->logger, "\tREMOVE ancestral haplotype [ %d ]\n", idx[0]);
		}
		status = GDL_CONTINUE;
	}
	GDL_FREE (q);
	GDL_FREE (idx);
	
	return status;
}

static int
_collapse_block_hotspot_ancestral (gdl_hstruct_workspace * w,
			                           gdl_hstruct_model_partition  * part,
			                           gdl_hstruct_config_block * h1,
			                           gdl_hstruct_config_block * b,
			                           gdl_hstruct_config_block * h2,
			                           const gdl_hstruct_criterion_type * C,
			                           const double tol,
			                           const size_t max_iter,
			                           const size_t min_length)
{
	int status;
	size_t k, nk, nnk, from, to, iter, * idx;
	double crit, ocrit, res_sq, res_abs, * q;
	gdl_hstruct_config_buffer * buffer;
	gdl_hstruct_point_handler * handler;
	
	nk = gdl_hstruct_config_block_ancestral_size (b);
	
	if (nk == 1)
	{
		return GDL_SUCCESS;
	}
	
	if (h1)
	{
		from = gdl_hstruct_config_block_from (h1);
	}
	else
	{
		from = gdl_hstruct_config_block_from (b);
	}
	if (h2)
	{
		to = gdl_hstruct_config_block_to (h2);
	}
	else
	{
		to = gdl_hstruct_config_block_to (b);
	}
	
	if (to-from+1 <= min_length)
	{
		return GDL_SUCCESS;	
	}
	
	ocrit = gdl_hstruct_criterion_eval (C, w->model);
		
	if (w->logger)
	{
		fprintf (w->logger, "--\n");
		fprintf (w->logger, "Try to collapse ancestral haplotypes from locus %d to locus %d (K = %d)\n", from, to, nk);
		fprintf (w->logger, "--\n");
	}
	// compute the frequency of ancestral haplotypes into the block
	q   = GDL_MALLOC (double, nk);
	idx = GDL_MALLOC (size_t, nk);
	
	for (k = 0; k < nk; k++)
	{
		q[k] = gdl_hstruct_model_partition_ancestral_freq (part, k, from, to);
	}
	// sort the frequencies
	gdl_sort_index (idx, q, 1, nk);
	
	if (w->logger)
	{
		fprintf (w->logger, "\tBackup previous parameter point estimates\n");
	}
	
	gdl_hstruct_model_backup_partition_points (w->model, part);
	
	handler = gdl_hstruct_model_collapse_ancestral_block2 (w->model, part, h1, b, h2, idx[0]);
   
   if (w->logger)
	{
		fprintf (w->logger, "\tRemove ancestral haplotype [ %d ] with the lowest frequency [ %1.4f ]\n", idx[0], q[idx[0]]);
		fprintf (w->logger, "\tRe-estimate the parameters of the partition\n");
		fprintf (w->logger, "\tIteration\tAbs. Residual\tSq. Residual\n");
	}
   
   iter = 0;
   
    do
    {
    	iter++;
    	gdl_hstruct_model_golden_partition (w->model, part, &res_abs, &res_sq);
    	if (w->logger)
		{
			fprintf (w->logger, "\t%d\t%e\t%e\n", iter, res_abs, res_sq);
		}
    }
    while (res_sq > tol && iter < max_iter);
	
	crit = gdl_hstruct_criterion_eval (C, w->model);
	
	if (w->logger)
	{
		fprintf (w->logger, "\tNew criterion [ %g ] (old = %g)\n\n", crit, ocrit);
	}
	
	if (crit > ocrit)
	{
		gdl_hstruct_model_restore_ancestral_block2 (w->model, part, h1, b, h2, idx[0]);
      gdl_hstruct_model_restore_partition_points (w->model, part);
		gdl_hstruct_model_clean_handler (w->model, handler);
		if (w->logger)
		{
			fprintf (w->logger, "\tKEEP old ancestral configuration\n");
			fprintf (w->logger, "\tInitial parameter estimates has been restored\n");
		}
		status = GDL_SUCCESS;
	}
	else
	{
		gdl_hstruct_model_config_block_update (w->model, b);
		if (h1) gdl_hstruct_model_config_block_update (w->model, h1);
		if (h2) gdl_hstruct_model_config_block_update (w->model, h2);
		if (w->logger)
		{
			fprintf (w->logger, "\tREMOVE ancestral haplotype [ %d ]\n", idx[0]);
		}
		status = GDL_CONTINUE;
	}
	GDL_FREE (q);
	GDL_FREE (idx);
	
	return status;
}

static int
_gdl_hstruct_workspace_collapse_ancestral (gdl_hstruct_workspace * w,
						                           gdl_hstruct_model_partition  * part,
						                           gdl_hstruct_config_block * b1,
						                           gdl_hstruct_config_block * b2,
						                           gdl_hstruct_config_block * b3,
						                           const gdl_hstruct_criterion_type * C,
						                           const double tol,
						                           const size_t max_iter,
						                           const size_t min_length)
{
	size_t rm = 0;
	
	if (b1 == 0 && b3 == 0)
	{
		while (_collapse_block_ancestral (w, part, b2, C, tol, max_iter, min_length) == GDL_CONTINUE)
		{
			rm++;
		};
	}
	else 
	{
		while (_collapse_block_hotspot_ancestral (w, part, b1, b2, b3, C, tol, max_iter, min_length) == GDL_CONTINUE)
		{
			rm++;
		};
	}
	
	return rm;
}

int
gdl_hstruct_workspace_collapse_ancestral (gdl_hstruct_workspace * w,
                                          const gdl_hstruct_criterion_type * C,
                                          size_t c,
                                          size_t p,
                                          const double tol,
                                          const size_t max_iter,
                                          const size_t min_length)
{
	size_t iter, rm = 0, trm;
	gdl_hstruct_model_partition  * part;
	gdl_hstruct_config_block_itr * itr;
	gdl_hstruct_config_block * b1, * b2, * b3;
	gdl_hstruct_config_template template;
	
	template.rho = gdl_false;
	template.mu  = gdl_false;
	template.f   = gdl_false;
	template.k   = gdl_true;
	template.hot = gdl_true;
	
	part = gdl_hstruct_model_chromosome_get_partition (gdl_hstruct_model_get_chromosome (w->model, c), p);
	
	_gdl_hstruct_workspace_set_hotspot_eligible (w, part, gdl_false);
	
	itr  = gdl_hstruct_model_partition_config_block_iterator (part, &template);
	
	iter = 0;
	
	b1   = NULL;
	b2   = gdl_hstruct_config_block_iterator_get (itr);
	b3   = NULL;
	
	do
	{
		if (!gdl_hstruct_config_block_iterator_next (itr, gdl_false)) break;
		
		iter++;
		
		b3 = gdl_hstruct_config_block_iterator_get (itr);
		
		if (gdl_hstruct_config_block_has_hotspot (b3))
		{
			if (b1 && gdl_hstruct_config_block_has_hotspot (b1))
			{
				// collapse b1 + b2 + b3
				trm = _gdl_hstruct_workspace_collapse_ancestral (w, part, b1, b2, b3, C, tol, max_iter, min_length);
				if (trm)
				{
					gdl_hstruct_config_block_free (b1);b1=NULL;
					gdl_hstruct_config_block_free (b2);b2=NULL;
//					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_true)) break;
//					b1 = gdl_hstruct_config_block_iterator_get (itr);
					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_false)) break;
					b2 = gdl_hstruct_config_block_iterator_get (itr);
				}
				else
				{
					gdl_hstruct_config_block_free (b1);b1=NULL;
					gdl_hstruct_config_block_free (b2);b2=NULL;
					b1 = b3;
					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_false)) break;
					b2 = gdl_hstruct_config_block_iterator_get (itr);
				}
			}
			else
			{
				// collapse b2 + b3
				trm = _gdl_hstruct_workspace_collapse_ancestral (w, part, 0, b2, b3, C, tol, max_iter, min_length);
				if (trm)
				{
					gdl_hstruct_config_block_free (b1);b1=NULL;
					gdl_hstruct_config_block_free (b2);b2=NULL;
//					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_true)) break;
//					b1 = gdl_hstruct_config_block_iterator_get (itr);
					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_false)) break;
					b2 = gdl_hstruct_config_block_iterator_get (itr);
				}
				else
				{
					gdl_hstruct_config_block_free (b1);b1=NULL;
					gdl_hstruct_config_block_free (b2);b2=NULL;
					b1 = b3;
					if (!gdl_hstruct_config_block_iterator_next (itr, gdl_false)) break;
					b2 = gdl_hstruct_config_block_iterator_get (itr);
				}
			}
		}
		else if (b1 && gdl_hstruct_config_block_has_hotspot (b1))
		{
			// collapse b1 + b2
			trm = _gdl_hstruct_workspace_collapse_ancestral (w, part, b1, b2, 0, C, tol, max_iter, min_length);
			gdl_hstruct_config_block_free (b1);
			gdl_hstruct_config_block_free (b2);
			b1 = NULL;
			b2 = b3;
		}
		else 
		{
			// collapse b2
			trm = _gdl_hstruct_workspace_collapse_ancestral (w, part, 0, b2, 0, C, tol, max_iter, min_length);
			gdl_hstruct_config_block_free (b1);
			gdl_hstruct_config_block_free (b2);
			b1 = NULL;
			b2 = b3;
		}
		rm += trm;
	}
	while (0==0);
	
	if (b1 && b2)
	{
		rm += _gdl_hstruct_workspace_collapse_ancestral (w, part, b1, b2, 0, C, tol, max_iter, min_length);
		gdl_hstruct_config_block_free (b1);
		gdl_hstruct_config_block_free (b2);
	}
	else
	{
		if (b1 && !gdl_hstruct_config_block_has_hotspot (b1))
		{
			rm += _gdl_hstruct_workspace_collapse_ancestral (w, part, 0, b1, 0, C, tol, max_iter, min_length);
			gdl_hstruct_config_block_free (b1);
		}
		if (b2 && !gdl_hstruct_config_block_has_hotspot (b2))
		{
			rm += _gdl_hstruct_workspace_collapse_ancestral (w, part, 0, b2, 0, C, tol, max_iter, min_length);
			gdl_hstruct_config_block_free (b2);
		}
	}
	
	gdl_hstruct_config_block_iterator_free (itr);
	
	_gdl_hstruct_workspace_set_hotspot_eligible (w, part, gdl_true);
	
	if (rm)
	{
		gdl_hstruct_model_check_config_block (w->model, part);
	}
		
	return rm;
}


FILE *
gdl_hstruct_workspace_set_logger (gdl_hstruct_workspace * w, FILE * logger)
{
	FILE * old = w->logger;
	
	w->logger = logger;
	
	return old;	
}
