/*  
 *  mosaic/tag.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:01 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <float.h>
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_randist.h>
#include <gdl/gdl_gentity.h>
#include <gdl/gdl_mask.h>
#include <gdl/gdl_allele_block.h>
#include <gdl/gdl_gview_wrapper.h>
#include <gdl/gdl_mosaic.h>

size_t
gdl_mosaic_tag_predict_allele (const gdl_mosaic * w, double * z, size_t j)
{
	const size_t K = w->clust->K;
	size_t k, a, maxg;
	double max, * g;
	
	g = GDL_CALLOC (double, w->data->NA[j]);
	
	maxg = 0;
	max  = 0;
	for (k = 0; k < K; k++)
	{
		for (a = 0; a < w->data->NA[j]; a++)
		{
			if (a == gdl_allele_block_get (w->clust->haplo, k, j, 0))
			{
				g[a] += z[k]*(1-w->param->mu[j]);
			}
			else
			{
				g[a] += z[k]*w->param->mu[j]/(w->data->NA[j]-1);
			}
			if (g[a] > max)
			{
				max  = g[a];
				maxg = a;
			}
		}
	}
	
	GDL_FREE (g);
	
	return maxg;
}

size_t
gdl_mosaic_tag_predict (gdl_mosaic * w, size_t i, size_t m1, size_t m2, size_t j)
{
	const size_t K = w->clust->K;
	size_t k, k1, k2, allele;
	double alpha, beta, u, x, y, t1, t2, pr, tot, d1, d2;
	double * z;
	
	z = GDL_MALLOC (double, K);
	
	t1 = 1-gdl_mosaic_hmm_norec_proba (w, m1, gdl_mosaic_data_distance (w->data, m1, j));
	t2 = 1-gdl_mosaic_hmm_norec_proba (w, m1, gdl_mosaic_data_distance (w->data, j, m2));
	for (tot = k = 0; k < K; k++)
	{
		pr = 0;
		for (k1 = 0; k1 < K; k1++)
		{
			for (k2 = 0; k2 < K; k2++)
			{
				alpha = gdl_loci_block_get (w->util->alpha_buffer, k1, i, m1, 0);
				beta  = gdl_loci_block_get (w->util->beta_buffer, k2, i, m2, 0);
				u  = alpha*beta;
				u *= gdl_mosaic_hmm_obs_proba_haplo (w, k2, i, 0, m2);
				x  = t1*w->param->f[m1][k];
				if (k1 == k) x += 1-t1;
				y  = t2*w->param->f[m2][k];
				if (k2 == k1) y += 1-t2;
				pr += u*x*y;
			}
		}
		z[k]=pr;
		//printf ("%d %g (%g) (%g)\n", k, z[k], gdl_loci_block_get (w->util->alpha_buffer, k, i, m1, 0)*gdl_loci_block_get (w->util->beta_buffer, k, i, m1, 0), gdl_loci_block_get (w->util->alpha_buffer, k, i, m2, 0)*gdl_loci_block_get (w->util->beta_buffer, k, i, m2, 0));
	}
	
	allele = gdl_mosaic_tag_predict_allele (w, z, j);
	
	GDL_FREE (z);
	
	return allele;
}

size_t
gdl_mosaic_tag_predict1 (gdl_mosaic * w, size_t i, size_t m1, size_t j)
{
	const size_t K = w->clust->K;
	size_t k, k1, allele;
	double u, x, y, t, pr, tot;
	double * z, * g;
	
	z = GDL_MALLOC (double, K);
	
	t = 1-gdl_mosaic_hmm_norec_proba (w, m1, gdl_mosaic_data_distance (w->data, m1, j));
	for (tot = k = 0; k < K; k++)
	{
		pr = 0;
		for (k1 = 0; k1 < K; k1++)
		{
			u  = gdl_loci_block_get (w->util->alpha_buffer, k1, i, m1, 0);
			x  = t*w->param->f[j][k];
			if (k1 == k) x += 1-t;
			pr += u*x;
		}
		z[k]=pr;
		tot += pr;
	}
	
	allele = gdl_mosaic_tag_predict_allele (w, z, j);
	
	GDL_FREE (z);
	
	return allele;
}

size_t
gdl_mosaic_tag_predict2 (gdl_mosaic * w, size_t i, size_t m1, size_t j)
{
	const size_t K = w->clust->K;
	size_t k, k1, allele;
	double u, x, y, t, pr, tot;
	double * z, * g;
	
	z = GDL_MALLOC (double, K);
	
	t = 1-gdl_mosaic_hmm_norec_proba (w, j, gdl_mosaic_data_distance (w->data, j, m1));
	for (tot = k = 0; k < K; k++)
	{
		pr = 0;
		for (k1 = 0; k1 < K; k1++)
		{
			u  = gdl_loci_block_get (w->util->beta_buffer, k1, i, m1, 0);
			u *= gdl_mosaic_hmm_obs_proba_haplo (w, k1, i, 0, m1);
			x  = t*w->param->f[j][k1];
			if (k1 == k) x += 1-t;
			pr += u*x;
		}
		z[k]=pr;
		tot += pr;
	}
	
	allele = gdl_mosaic_tag_predict_allele (w, z, j);
	
	GDL_FREE (z);
	
	return allele;
}

size_t
gdl_mosaic_tag_score (gdl_mosaic * w, size_t m1, size_t m2)
{
	size_t i, j, p, a;
	size_t err=0;
	
	for (i = 0; i < w->data->NC; i++)
	{
		for (j = m1+1; j < m2; j++)
		{
			// Test if not missing site... 
			if (!gdl_mosaic_data_is_missing (w->data, i, j))
			{
				p = gdl_mosaic_tag_predict (w, i, m1, m2, j);
				a = gdl_mosaic_data_get_allele (w->data, i, j, 0);
				err += (a!=p);
			}
		}
	}
	
	return err;
}

size_t
gdl_mosaic_tag_score1 (gdl_mosaic * w, size_t m1, size_t m2)
{
	size_t i, j, p, a;
	size_t err=0;
	
	for (i = 0; i < w->data->NC; i++)
	{
		for (j = m1+1; j < m2; j++)
		{
			// Test if not missing site... 
			if (!gdl_mosaic_data_is_missing (w->data, i, j))
			{
				p = gdl_mosaic_tag_predict (w, i, m1, m2, j);
				a = gdl_mosaic_data_get_allele (w->data, i, j, 0);
				err += (a!=p);
			}
		}
		for (j = m2+1; j < w->data->L; j++)
		{
			// Test if not missing site... 
			if (!gdl_mosaic_data_is_missing (w->data, i, j))
			{
				p = gdl_mosaic_tag_predict1 (w, i, m2, j);
				a = gdl_mosaic_data_get_allele (w->data, i, j, 0);
				err += (a!=p);
			}
		}
	}
	
	return err;
}

size_t
gdl_mosaic_tag_score2 (gdl_mosaic * w, size_t m1, size_t m2)
{
	size_t i, j, p, a;
	size_t err=0;
	
	for (i = 0; i < w->data->NC; i++)
	{
		for (j = 0; j < m1; j++)
		{
			// Test if not missing site... 
			if (!gdl_mosaic_data_is_missing (w->data, i, j))
			{
				p = gdl_mosaic_tag_predict2 (w, i, m1, j);
				a = gdl_mosaic_data_get_allele (w->data, i, j, 0);
				err += (a!=p);
			}
		}
		for (j = m1+1; j < m2; j++)
		{
			// Test if not missing site... 
			if (!gdl_mosaic_data_is_missing (w->data, i, j))
			{
				p = gdl_mosaic_tag_predict (w, i, m1, m2, j);
				a = gdl_mosaic_data_get_allele (w->data, i, j, 0);
				err += (a!=p);
			}
		}
	}
	
	return err;
}

gdl_mask *
gdl_mosaic_tag (gdl_mosaic * w, const size_t T)
{
	const size_t N = w->data->N;
	const size_t M = w->data->L;
	const size_t MM = M*(M-1)/2;
	size_t i, j, l, t, min, minj, mini, mod, prog = 20, x, ** f;
	size_t * s, * s1, * s2;
	
#define INDEX(i,j)(i*M-(i*(i+1))/2+(j-i-1))
	
	s  = GDL_MALLOC (size_t, MM);
	s1 = GDL_MALLOC (size_t, MM);
	s2 = GDL_MALLOC (size_t, MM);
	f  = GDL_MATRIX_ALLOC (size_t, M, T+4);
	
	if (w->logger)
	{
		fprintf (w->logger, "Compute Score Matrices : 0% |                    | 100%");
		fflush (w->logger);
		mod = (M >= 20) ? M/20 : 20/M;
	}
	for (i = 0; i < M; i++)
	{
		if (w->logger && (i+1)%mod==0 && prog)
		{
			for (j = 0; j < prog+6; j++)
			{
				fprintf (w->logger, "\b");
			}
			fprintf (w->logger, "-");
			if (prog)
			{
				for (j = 0; j < prog; j++)
				{
					fprintf (w->logger, " ");
				}
				prog--;
				if (prog) fprintf (w->logger, "| 100%");
				else fprintf (w->logger, "-| 100%\n");
			}
			fflush (w->logger);			
		} 
		for (j = i+1; j < M; j++)
		{
			s[INDEX(i,j)]  = gdl_mosaic_tag_score (w, i, j);
			s1[INDEX(i,j)] = gdl_mosaic_tag_score1 (w, i, j);
			s2[INDEX(i,j)] = gdl_mosaic_tag_score2 (w, i, j);
			//printf ("SCORE(%d,%d,%d) %d 1 %d 2 %d\n", i, j, INDEX(i,j), s[INDEX(i,j)], s1[INDEX(i,j)], s2[INDEX(i,j)]);
		}	
	}
	
	for (i = 1; i < M; i++)
	{
		f[i][0]=1;
		f[i][1]=N*M;
	}
	
	for (i = 1; i < M; i++)
	{
		for (j = 0; j < i; j++)
		{
			if (s2[INDEX(j,i)] < f[i][1])
			{
				f[i][3]=f[i][2]=f[i][1]=s2[INDEX(j,i)];
				f[i][4]=j;
			}
		}
	}
	
	for (t = 3; t <= T; t++)
	{
		for (l = 3; l <= t; l++)
		{
			for (i = t-1; i < M; i++)
			{
				if (f[i][0])
				{
					min = N*M;
					for (j = (f[i][l+1]) ? f[i][l+1] : 1; j < i; j++)
					{
						x =  (l==t) ? s1[INDEX(j,i)] : s[INDEX(j,i)];
						x += (l==3) ? f[j][2] : f[j][3];
						if (x < min)
						{
							min  = x;
							minj = j;
						}
					}
					if (j == i)
					{
						f[i][1]   = min;
						f[i][l+2] = minj;
						//printf ("F(%d,%d) = %d (%d, [%d, %d])\n", i, l, f[i][1], f[i][l+1], s1[INDEX(minj,i)], f[minj][2]);
					}
					else
					{
						f[i][0] = 0;	
					}
				}
			}
			for (i = t-1; i < M; i++)
			{
				if (f[i][0]) f[i][3] = f[i][1];
			}
		}	
		min=N*M;
		for (i = t-1; i < M; i++)
		{
			if (f[i][0])
			{
				if (f[i][1] < min)
				{
					min=f[i][1];
					mini=i;
				}
			}
			f[i][0]=1;
			f[i][3]=f[j][2];
		}
		if (w->logger)
		{
			fprintf (w->logger, "TAG %d %1.3f:", t, 1-(double)min/(double)((M-t)*N));
			fprintf (w->logger, " %d", mini);
			for (i = t+1; i >= 3; i--)
			{
				mini = f[mini][i+1];
				fprintf (w->logger, " %d", mini);
			}
			fprintf (w->logger, "\n");
		}
	}
	
#undef INDEX	
	
	GDL_FREE (s);
	GDL_FREE (s1);
	GDL_FREE (s2);
	GDL_MATRIX_FREE (f, M-1);
}
