/*  
 * 	hview/collect.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_gview_mask.h>
#include <gdl/gdl_hview.h>

typedef struct
{
	size_t idx;
	size_t aidx;
	size_t lidx;
} _gdl_collect;

static int
_collect (_gdl_hview * wh,
          const gdl_gview * data,
          const gdl_mask * mask,
          gdl_list * collector)
{
	size_t ic, il, ip, np, nc, nl, na, ns, push;
	gdl_clustering * gclust = wh->h->guniq;
	gdl_gvalues_get * gbuf;
	const gdl_gvalues * gval;
	_gdl_collect * point;
	
	//printf ("COLLECT (%p, %p)\n", data, mask);
	
	np   = gdl_gview_ploidy (data);
	nc   = gdl_clustering_nclust (gclust);
	nl   = GDL_GVIEW_LOCUS_SIZE (data, mask);
	gbuf = GDL_GVIEW_GET_NEW (data, mask);
	na = 0;
	
	wh->np  = np;
	wh->na  = nc;
	wh->nl  = nl;
	wh->isa = GDL_CALLOC (gdl_boolean, nc);
	
//	printf ("NA = %d\n", nc);
//	printf ("NL = %d\n", nl);
	
	for (ic = 0; ic < nc; ic++)
	{
		size_t aidx = gdl_clustering_clust_idx (gclust, ic);
		gdl_accession * a 
		   = GDL_GVIEW_GET_ACCESSION (data,
		                              mask,
		                              aidx);
		   
		//printf ("ACCESSION %s %d\n", a->name, gdl_accession_is_phased (a));
		
		size_t size  = gdl_accession_get_size (a);
			
		//printf ("SIZE %d\n", size);fflush (stdout);
		
		if (size > 1)
		{
			wh->error 
			   = gdl_string_vsprintf ("Cannot extract haplotypes from pooled accession %s", a->name);
			return GDL_EINVAL;
		}
		
		na = gdl_list_size (collector);
		
		//printf ("COLLECTOR SIZE %d\n", na);
		
		push = 0;
		
		for (ns = il = 0; il < nl; il++)
		{
			if (!gdl_accession_is_phased (a))
			{
				if (!GDL_GVIEW_IS_HOMOZYGOUS (data, mask, aidx, il))
				{
					push=1;
					if (!ns)
					{
						for (ip = 0; ip < np; ip++)
						{
							GDL_GVIEW_GET_ALLELE (data, mask, aidx, il, ip, gbuf);
							gval = gdl_gvalues_get_gvalues (gbuf);
							if (gval == NULL || gval->size > 1)
							{
								//printf ("COLLECT (%d, %d) A\n", wh->naa, il);
								point = GDL_MALLOC (_gdl_collect, 1);
								point->idx  = wh->naa;
								point->aidx = ic;
								point->lidx = il;
								gdl_list_push_back (collector, point, 1);
								push=0;
								break;
							}
						}	
					}
					else if (ns)
					{
						//printf ("COLLECT (%d, %d)\n", wh->naa, point->lidx);
						gdl_list_push_back (collector, point, 1);
					}
					if (push)
					{
						point = GDL_MALLOC (_gdl_collect, 1);
						point->idx  = wh->naa;
						point->aidx = ic;
						point->lidx = il;			
						ns++;
					}
				}				
			}
			else if (GDL_GVIEW_HAS_MISSING (data, mask, aidx, il))
			{
				point = GDL_MALLOC (_gdl_collect, 1);
				point->idx  = wh->naa;
				point->aidx = ic;
				point->lidx = il;
				gdl_list_push_back (collector, point, 1);
			}
		}
		if (push && ns > 1)
		{
		   //printf ("COLLECT (%d, %d)\n", wh->naa, point->lidx);
		   gdl_list_push_back (collector, point, 1);
		}
		else if (push && ns == 1)
		{
			//printf ("SINGLE HETERO SITE (%d, %d): ignore", point->idx, point->lidx);
			GDL_FREE (point);
		}
		if (gdl_list_size (collector) > na)
		{
			(wh->naa)++;
			wh->isa[ic] = gdl_true;
			//printf ("ADD AMBIGUOUS ACCESSION %d\n", wh->naa);
		}
	}
	
	gdl_gvalues_get_free (gbuf);
	
	return GDL_SUCCESS;
}

static size_t
_alloc (_gdl_hview * hw, gdl_list * collector)
{
	size_t i;
	_gdl_collect * point;
	gdl_list_itr * itr;
	
	hw->aidx = GDL_CALLOC (size_t, hw->naa);
	hw->nas  = GDL_CALLOC (size_t, hw->naa);
	hw->lidx = GDL_CALLOC (size_t *, hw->naa);
		
	itr = gdl_list_iterator_front (collector);
	
	do
	{
		_gdl_collect * point = 
		         (_gdl_collect *) gdl_list_iterator_value (itr);
		hw->aidx[point->idx] = point->aidx;
		hw->nas[point->idx]++;
	}
	while (gdl_list_iterator_next (itr));
	
	gdl_list_iterator_free (itr);
	
	itr = gdl_list_iterator_front (collector);
	
	do
	{
		_gdl_collect * point = 
		         (_gdl_collect *) gdl_list_iterator_value (itr);
		if (hw->lidx[point->idx]==NULL)
		{
			i = 0;
			hw->lidx[point->idx]
			   = GDL_CALLOC (size_t, hw->nas[point->idx]);
		}
		hw->lidx[point->idx][i++] = point->lidx;
		//printf ("ACCESSION %d (%d) %d\n", hw->aidx[point->idx], hw->nas[point->idx], hw->lidx[point->idx][i-1]);
	}
	while (gdl_list_iterator_next (itr));
	
	gdl_list_iterator_free (itr);
	
	return GDL_SUCCESS;
}

static int
_collect_ambiguous (_gdl_hview * wh)
{
	size_t status;
	const gdl_gview * data  = wh->h->data;
	const gdl_mask * mask  = wh->h->mask;
	gdl_list * collector = gdl_list_alloc (gdl_list_default);
	
	status = _collect (wh, data, mask, collector);
	
	if (status != GDL_SUCCESS)
	{
		return status;
	}
	if (gdl_list_size (collector))
	{
		status = _alloc (wh, collector);
		if (status != GDL_SUCCESS)
		{
			return status;
		}
	}
	gdl_list_free (collector);
	
	return GDL_SUCCESS;
}
