/**  
 * 	hview/extract.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_clustering.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_sort.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_hview.h>

static gdl_hview *
_extract_hview (const gdl_hview * h,
                const gdl_mask * gmask,
                gdl_clustering * gclust,
                gdl_clustering * hclust,
                gdl_accession_mask * ma,
                gdl_locus_mask * ml)
{
	size_t i, ii, j, k, l, hh, nh, np, nhc, enhc;
	double tot, htot, ** freq;
	gdl_hconfig * hc, * ehc;
	gdl_hview * eh;
	const gdl_haplotype * haplo;
	
	np = gdl_hview_ploidy (h);
	
	freq = GDL_CALLOC (double *, h->nh);
	
	eh = gdl_hview_alloc (gdl_hview_get_gview (h), gmask);
	
	if (ml)
	{
		eh->nl = gdl_entity_mask_size (ml);
	}
	else
	{
		eh->nl = h->nl;
	}
	
	eh->guniq    = gclust;
	eh->na       = gdl_clustering_nclust (gclust);
	eh->hconfigs = GDL_MALLOC (gdl_hconfig_ptr *, eh->na);
	eh->nhc      = GDL_MALLOC (size_t, eh->na);
		
	for (htot = i = 0; i < eh->na; i++)
	{
		ii  = gdl_clustering_clust_idx (gclust, i);
		ii  = gdl_entity_mask_idx (ma, ii);
		nhc = gdl_hview_hconfig_size (h, ii);
		
		eh->hconfigs[i] = GDL_MALLOC (gdl_hconfig_ptr, nhc);
		enhc            = 0;
		
		for (tot = nh = j = 0; j < nhc; j++)
		{
			hc  = gdl_hview_get_hconfig (h, ii, j);
			
			ehc = gdl_hconfig_clone (hc, np);
			
			for (k = 0; k < np; k++)
			{
				hh = gdl_hconfig_get_haplotype (hc, k);
				if (hclust)
				{
					ehc->idx[k] = gdl_clustering_cluster (hclust, hh);
				}
				else
				{
					ehc->idx[k] = hh;
				}
				if (freq[ehc->idx[k]] == 0)
				{
					freq[ehc->idx[k]] = GDL_CALLOC (double, 1);
					nh++;
				}
				*freq[ehc->idx[k]] += ehc->pr;
				htot += ehc->pr;
			}
			if (hclust)
			{
				for (l = 0; l < enhc; l++)
				{
					for (k = 0; k < np; k++)
					{
						if (eh->hconfigs[i][l]->idx[k]!=ehc->idx[k])
						{
							break;	
						}
					}
					if (k == np)
					{
						break;	
					}
				}
				if (l == enhc)
				{
					eh->hconfigs[i][enhc] = ehc;
					tot += ehc->pr;
					enhc++;
				}
				else
				{
					gdl_hconfig_free (ehc);
				}
			}
			else
			{
				eh->hconfigs[i][j] = ehc;
				tot += ehc->pr;
			}
		}
		if (hclust && enhc < nhc)
		{
			gdl_hconfig_ptr * new = GDL_MALLOC (gdl_hconfig_ptr, enhc);
			memcpy (new, eh->hconfigs[i], sizeof (gdl_hconfig_ptr)*enhc);
			GDL_FREE (eh->hconfigs[i]);
			eh->hconfigs[i] = new;
			eh->nhc[i]      = enhc;
			for (j = 0; j < enhc; j++)
			{
				eh->hconfigs[i][j]->pr /= tot;
			}
		}
		else
		{
			eh->nhc[i] = nhc;
		}
	}
	
	eh->nh         = nh;
	eh->haplotypes = GDL_MALLOC (gdl_haplotype, nh);
	eh->mult       = GDL_MALLOC (double, nh);
	
	for (nh = i = 0; i < h->nh; i++)
	{
		if (freq[i])
		{
			if (hclust)
			{
				haplo = gdl_hview_get_haplotype (h, gdl_clustering_clust_idx (hclust, i));
				eh->haplotypes[nh] = GDL_MALLOC (size_t, eh->nl);
				for (l = 0; l < eh->nl; l++)
				{
					eh->haplotypes[nh][l] = (*haplo)[gdl_entity_mask_idx (ml, l)];
				}
			}
			else
			{
				haplo = gdl_hview_get_haplotype (h, i);
				eh->haplotypes[nh] = gdl_haplotype_clone (haplo, eh->nl);
			}
			eh->mult[nh] = freq[i][0]/htot;
			nh++;
			GDL_FREE (freq[i]);
		}
	}
	GDL_FREE (freq);
		
	return eh;
}                

static gdl_clustering * 
_collapse_haplotype (const gdl_hview * h, const gdl_entity_mask * locus)
{
	size_t i, j, l, nh, nl;
	gdl_clustering_workspace * w;
	gdl_clustering * hclust;
	
	nl = gdl_entity_mask_size (locus);
	nh = gdl_hview_haplotype_size (h);
	w  = gdl_clustering_workspace_alloc (nh);
	
	for (i = 0; i < nh; i++)
	{
		for (j = i+1; j < nh; j++)
		{
			for (l = 0; l < nl; l++)
			{
				if (h->haplotypes[i][l] != h->haplotypes[j][l])
				{
					break;
				}
			}
			if (l == nl)
			{
				gdl_clustering_workspace_set (w, i, j);
			}
		}
	}
	
	hclust = gdl_clustering_workspace_done (w);
	
	gdl_clustering_workspace_free (w);
	
	return hclust;
}
 
gdl_hview *
gdl_hview_extract (const gdl_hview * h, const gdl_mask * gmask)
{
	int comp;
	const gdl_entity_mask * ml, * eml, * ma, * ema;
	gdl_entity_mask * eml2, * ema2;
	gdl_clustering * gclust, * hclust;
	gdl_hview * eh;
	
	ml  = gdl_mask_get (h->mask, GDL_LOCUS);
	ma  = gdl_mask_get (h->mask, GDL_ACCESSION);
	eml = gdl_mask_get (gmask, GDL_LOCUS);
	ema = gdl_mask_get (gmask, GDL_ACCESSION);
	
	gclust = hclust = NULL;
	
	comp = gdl_entity_mask_compare (eml, ml);
	
	if (comp == -1)
	{
		hclust = _collapse_haplotype (h, eml);
		eml2   = gdl_entity_mask_over (ml, eml);
	}
	else if (comp == 0)
	{
		eml2 = NULL;
	}
	else
	{
		GDL_ERROR_NULL ("Unable to extract a sub hview with unknown locus",
		                GDL_FAILURE);
	}
	
	comp = gdl_entity_mask_compare (ema, ma);
	
	if (comp == -1)
	{
		gclust    = gdl_gview_accession_clustering (gdl_hview_get_gview (h), gmask);
		ema2      = gdl_entity_mask_over (ma, ema);
	} 
	else if (comp == 0)
	{
		if (eml2)
		{
			gclust    = gdl_gview_accession_clustering (gdl_hview_get_gview (h), gmask);
			ema2      = gdl_entity_mask_over (ma, ema2);
		}
		else
		{
			ema2 = NULL;
		}
	}
	else
	{
		GDL_ERROR_NULL ("Unable to extract a sub hview with unknown accession",
		                GDL_FAILURE);
	}
	
	if (ema2 == 0 && eml2 == 0)
	{
		eh       = gdl_hview_clone (h);
		eh->mask = gmask;
	}
	else
	{
		eh = _extract_hview (h, gmask, gclust, hclust, ema2, eml2);
		gdl_entity_mask_free (ema2);
		gdl_entity_mask_free (eml2);
	}
	
	return eh;
}

