/**  
 * 	hview/ligation.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_permutation.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_hview.h>

typedef struct
{
	gdl_string * key;
    gdl_haplotype haplo;
} _hentry;

typedef struct
{
	size_t nl1;
	size_t nl2;
	size_t nl;
	size_t nh;
	size_t na;
	size_t np;
	size_t nn;
	size_t * aidx1;
	size_t * aidx2;
	gdl_clustering * guniq;
	gdl_hashtable * hdict;
	gdl_hconfig_ptr ** hconfigs;
	size_t * nhc;
	_hentry * hentry;
	size_t * hkeyx;
	gdl_string ** hkeyf;
} _hview_merge;

static _hview_merge *
_hview_merge_alloc (const gdl_gview * g, 
                     const gdl_mask * m,
                     const gdl_hview * h1,
                     const gdl_hview * h2,
                     const gdl_accession_mask * ma1,
                     const gdl_accession_mask * ma2)
{
	size_t i, ii, s, na;
	gdl_locus * locus;
	_hview_merge * hm;
	
	hm = GDL_CALLOC (_hview_merge, 1);
	
	hm->nh  = 0;
	hm->np  = gdl_gview_ploidy (g);
	hm->nl1 = gdl_hview_locus_size (h1);
	hm->nl2 = gdl_hview_locus_size (h2);
	hm->nl  = hm->nl1 + hm->nl2;
	
	hm->hkeyx = GDL_MALLOC (size_t, hm->nl);
	hm->hkeyf = GDL_MALLOC (gdl_string *, hm->nl);
	
	for (s = i = 0; i < hm->nl1; i++)
	{
		locus         = gdl_hview_get_locus (h1, i);
		na            = gdl_locus_allele (locus) - 1;
		hm->hkeyx[i]  = s;
		na            = _msd (na);
		s            += na;
		hm->hkeyf[i]  = gdl_string_sprintf ("%%%dd", na);
		//printf ("KEY[%d] %d %s\n", i, hm->hkeyx[i], hm->hkeyf[i]);
	}
	
	for (i = 0; i < hm->nl2; i++)
	{
		locus         = gdl_hview_get_locus (h2, i);
		na            = gdl_locus_allele (locus) - 1;
		hm->hkeyx[i+hm->nl1]  = s;
		na            = _msd (na);
		s             += na;
		hm->hkeyf[i+hm->nl1]  = gdl_string_sprintf ("%%%dd", na);
		//printf ("KEY[%d] %d %s\n", i+hm->nl1, hm->hkeyx[i+hm->nl1], hm->hkeyf[i+hm->nl1]);
	}
	
	hm->hentry        = GDL_MALLOC (_hentry, 1);
	hm->hentry->key   = gdl_string_alloc (s);
	hm->hentry->haplo = GDL_MALLOC (size_t, hm->nl);
	
	hm->hdict = gdl_hashtable_alloc (&_haplo_type, 0);
	hm->guniq = gdl_gview_accession_clustering (g, m);
	
	hm->nn       = gdl_clustering_size (hm->guniq);
	hm->na       = gdl_clustering_nclust (hm->guniq);
	
	hm->hconfigs = GDL_MALLOC (gdl_hconfig_ptr *, hm->na);
	hm->nhc      = GDL_MALLOC (size_t, hm->na);
	hm->aidx1    = GDL_MALLOC (size_t, hm->na);
	hm->aidx2    = GDL_MALLOC (size_t, hm->na);
	
	for (i = 0; i < hm->na; i++)
	{
		ii = gdl_clustering_clust_idx (hm->guniq, i);
		if (ma1 && ma2)
		{
			hm->aidx1[i] = gdl_entity_mask_idx (ma1, ii);
			hm->aidx2[i] = gdl_entity_mask_idx (ma2, ii);
		}
		else
		{
			// means that there is no accession mask
			hm->aidx1[i] = hm->aidx2[i]	= ii;
		}
	}
		
	return hm;
}

void
_hview_merge_free (_hview_merge * h)
{
	if (h)
	{
		size_t i;
		
		GDL_FREE (h->aidx1);
		GDL_FREE (h->aidx2);
	    gdl_hashtable_free (h->hdict);
	    GDL_FREE (h->hentry->key);
	    GDL_FREE (h->hentry->haplo);
	    GDL_FREE (h->hentry);
	    GDL_FREE (h->hkeyx);
	    for (i = 0; i < h->nl; i++)
	    {
	    	gdl_string_free (h->hkeyf[i]);
	    }
	    GDL_FREE (h->hkeyf);
	    GDL_FREE (h);
	}
}

static void
_init_hentry (_hview_merge * hm, const gdl_haplotype * h1, const gdl_haplotype * h2)
{
	size_t j;
	
	for (j = 0; j < hm->nl; j++)
	{	
		if (j < hm->nl1)
		{
			hm->hentry->haplo[j] = (*h1)[j];
		}
		else
		{
			hm->hentry->haplo[j] = (*h2)[j-hm->nl1];
		}
		gdl_string_scat (hm->hentry->key + hm->hkeyx[j], hm->hkeyf[j], hm->hentry->haplo[j]);
	}    
}

static size_t 
_add_haplotype (_hview_merge * hm, const gdl_haplotype * h1, const gdl_haplotype * h2)
{
	_gdl_haplo * h;
	
	_init_hentry (hm, h1, h2);
	
	h = (_gdl_haplo *) gdl_hashtable_lookup (hm->hdict, hm->hentry->key);

	if (h == 0)
	{
		h    = GDL_MALLOC (_gdl_haplo, 1);
		h->i = gdl_hashtable_size (hm->hdict);
		h->h = GDL_MALLOC (size_t, hm->nl);
		memcpy (h->h, hm->hentry->haplo, sizeof(size_t)*hm->nl);
		gdl_hashtable_add (hm->hdict, hm->hentry->key, h, 0);
		(hm->nh)++;
	}
	
	return h->i;
}

static double
_add_hconfig (_hview_merge * hm,
                  const gdl_hview * h1,
                  const gdl_hview * h2,
                  const gdl_hconfig * hc1,
                  const gdl_hconfig * hc2,
                  size_t i,
                  size_t * m)
{
	size_t j, k, l;
	double s;
	gdl_permutation * permut;
	gdl_hconfig * hc;
	
	l = *m;
	
	permut = gdl_permutation_alloc (hm->np);
	
	gdl_permutation_init (permut);
		
	do
	{
		hc = gdl_hconfig_alloc (hm->np);
		for (s = k = 0; k < hm->np; k++)
		{
	    	j = gdl_permutation_get (permut, k);
	    	hc->idx[k] = _add_haplotype (hm, 
		                       gdl_hview_get_haplotype (h1, hc1->idx[k]),
		                       gdl_hview_get_haplotype (h2, hc2->idx[j]));
		}
		hc->pr = hc1->pr*hc2->pr;
		s += hc->pr;
		hm->hconfigs[i][l++] = hc;		
	}
	while (gdl_permutation_next (permut) == GDL_SUCCESS);
	
	*m = l;
	
	gdl_permutation_free (permut);
	
	return s;
}                  

static _hview_merge *
_merge_hview (_hview_merge * hm, const gdl_hview * h1, const gdl_hview * h2)
{
	size_t i, i1, i2, j1, j2, k, nc1, nc2;
	double s;
	gdl_hconfig * hc1, * hc2;
	
	for (i = 0; i < hm->na; i++)
	{
		i1 = hm->aidx1[i];
		i2 = hm->aidx2[i];
		
		nc1 = gdl_hview_hconfig_size (h1, i1);
		nc2 = gdl_hview_hconfig_size (h2, i2);
		
		hm->nhc[i]      = nc1 * nc2 * gdl_factorial (hm->np);
		hm->hconfigs[i] = GDL_MALLOC (gdl_hconfig_ptr, hm->nhc[i]);
		
		for (s = k = j1 = 0; j1 < nc1; j1++)
		{
			hc1 = gdl_hview_get_hconfig (h1, i1, j1);
			for (j2 = 0; j2 < nc2; j2++)
			{
				hc2 = gdl_hview_get_hconfig (h2, i2, j2);
				s += _add_hconfig (hm, h1, h2, hc1, hc2, i, &k);
			}
		}
		for (k = 0; k < hm->nhc[i]; k++)
		{
			hm->hconfigs[i][k]->pr /= s;	
		}
	}
	
	return hm;
}

static int
_update_hview (_hview_merge * hm, const gdl_mask * m, gdl_hview * h1)
{
	size_t i, j, k;
	double s;
	gdl_hconfig * hc;
	gdl_hashtable_itr * itr;
	
	_gdl_hview_clean (h1);
	
	h1->mask = m;
	
	h1->na = hm->na;
	h1->nl = hm->nl;
	h1->nh = hm->nh;
	
	h1->guniq      = hm->guniq;
	h1->hconfigs   = hm->hconfigs;
	h1->nhc        = hm->nhc;
	
	h1->haplotypes = GDL_MALLOC (gdl_haplotype, h1->nh);
	
	itr = gdl_hashtable_iterator (hm->hdict);
	
	do
	{
		_gdl_haplo * he = (_gdl_haplo *) gdl_hashtable_iterator_value (itr);
		h1->haplotypes[he->i] = he->h;
	}
	while (gdl_hashtable_iterator_next (itr));
	
	gdl_hashtable_iterator_free (itr);
	
	h1->mult = GDL_CALLOC (double, h1->nh);
	
	s = hm->nn*hm->np;
	
	for (i = 0; i < h1->na; i++)
	{
		for (j = 0; j < h1->nhc[i]; j++)
		{
			hc = h1->hconfigs[i][j];
			for (k = 0; k < hm->np; k++)
			{
				h1->mult[hc->idx[k]] += hc->pr/s;
			}
		}
	}
	
	return GDL_SUCCESS;
}

gdl_mask *
gdl_hview_locus_ligation (gdl_hview * h1, const gdl_hview * h2)
{
	if (h1->data != h2->data)
	{
		GDL_ERROR_VAL ("Try to merge two hviews with different data reference",
		               GDL_EINVAL,
		               0);
	}
	else
	{
		gdl_mask * m;
		gdl_accession_mask * ma1 = NULL, * ma2 = NULL;
		_hview_merge * hm;
		const gdl_gview * g = h1->data;
		
		m = gdl_mask_ligation (gdl_hview_get_gmask (h1),
		                       gdl_hview_get_gmask (h2),
		                       GDL_LOCUS,
		                       GDL_ACCESSION,
		                       &ma1,
		                       &ma2);
		
		hm = _hview_merge_alloc (g, m, h1, h2, ma1, ma2);
		
		gdl_entity_mask_free (ma1);
		gdl_entity_mask_free (ma2);
		
		_merge_hview (hm, h1, h2);
		
		_update_hview (hm, m, h1);
		
		_hview_merge_free (hm);
		
		return m;
	}
}

int
gdl_hview_locus_ligation2 (const gdl_gview * g,
                           const gdl_mask * m,
                           const gdl_accession_mask * ma1,
                           const gdl_accession_mask * ma2,
                           gdl_hview * h1,
                           const gdl_hview * h2)
{
	if (h1->data != h2->data)
	{
		GDL_ERROR_VAL ("Try to merge two hviews with different data reference",
		               GDL_EINVAL,
		               0);
	}
	else
	{
		_hview_merge * hm;
		
		hm = _hview_merge_alloc (g, m, h1, h2, ma1, ma2);
		
		_merge_hview (hm, h1, h2);
		
		_update_hview (hm, m, h1);
		
		_hview_merge_free (hm);
		
		return GDL_SUCCESS;
	}
}
