/*  
 *  gpoint/collect.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_clustering.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_mask.h>
#include <gdl/gdl_gpoint.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_gview_mask.h>
#include <gdl/gdl_gview_collector.h>

enum _gdl_gview_collector_type
{
	gdl_missing_collector,
	gdl_heterozygous_collector
};	

struct _gdl_gview_collector
{
	const gdl_gview_collector_type * type;
	
	size_t nr;
	size_t np;
	size_t nc;
	
	size_t * aidx;
	size_t * ridx;
	double * weights;
	size_t * ngl;
	size_t * nhl;
	size_t ** gl;
	size_t ** hl;
	
	gdl_list      * collector;
	gdl_hashtable * contains;
	gdl_string    * key_format;
};

typedef struct
{
	size_t idx;
	size_t ridx;
	size_t aidx;
	size_t cidx;
	size_t hidx;
	double w;
	void   * extra;
} _gdl_gpoint_collector;

static size_t
_gdl_gview_collector_msd (size_t x)
{
	size_t y = x / 10;
	if (y >= 10)
		return 1 + _gdl_gview_collector_msd (y);
	return 1;
}

static void
_gdl_gview_collector_init_collector (gdl_gview_collector * gc)
{
	if (gc->nr)
	{
		gc->ridx = GDL_MALLOC (size_t, gc->nr);
		gc->aidx = GDL_MALLOC (size_t, gc->nr);
		gc->weights = GDL_MALLOC (double, gc->nr);
		gc->ngl  = GDL_CALLOC (size_t, gc->nr);
		gc->nhl  = GDL_CALLOC (size_t, gc->nr*gc->np);
		gc->gl   = GDL_CALLOC (size_t * , gc->nr);
		gc->hl   = GDL_CALLOC (size_t * , gc->nr*gc->np);
		gc->contains  = gdl_hashtable_alloc (gdl_gvalues_interface, gc->nc);		
	}
}

static void
_gdl_gview_collector_clean_collector (gdl_gview_collector * gc)
{
	if (gc->nr)
	{
		size_t i, j, k;
		GDL_FREE (gc->ridx);
		GDL_FREE (gc->aidx);
		GDL_FREE (gc->weights);
		GDL_FREE (gc->ngl);
		GDL_FREE (gc->nhl);
		for (j = i = 0; i < gc->nr; i++)
		{
			GDL_FREE (gc->gl[i]);
			for(k = 0; k < gc->np; k++, j++)
			{
				GDL_FREE (gc->hl[j]);
			}
		}
		GDL_FREE (gc->gl);
		GDL_FREE (gc->hl);
		gdl_hashtable_free (gc->contains);
		gc->nr = 0;
		gc->nc = 0;
	}
}

static gdl_string * 
_gdl_gview_collector_create_key_format (size_t na, size_t nl, size_t p)
{
	size_t n, nal, nll, pl;
	gdl_string * kf, * tmp1, * tmp2, * tmp3;
	
	tmp1  = gdl_string_sprintf ("%%%dd-", _gdl_gview_collector_msd (na));
	nal = strlen (tmp1);
	tmp2  = gdl_string_sprintf ("%%%dd-", _gdl_gview_collector_msd (nl));
	nll = strlen (tmp2);
	tmp3  = gdl_string_sprintf ("%%%dd", _gdl_gview_collector_msd (p));
	pl  = strlen (tmp3);
	
	kf = gdl_string_alloc (nal+nll+pl);
	
	strcat (kf, tmp1);
	strcat (&kf[nal], tmp2);
	strcat (&kf[nll], tmp3);
	
	gdl_string_free (tmp1);
	gdl_string_free (tmp2);
	gdl_string_free (tmp3);
	
	return kf;
}

static gdl_string * 
_gdl_gview_collector_create_key (gdl_string * format, size_t i, size_t j, int p)
{
	return gdl_string_sprintf(format, i, j, p);
}

static int
_gdl_gview_collector_collect_missing (gdl_gview_collector * gc, const gdl_gview * gv, const gdl_mask * gm, const gdl_clustering * cl)
{
	size_t ii, ic, il, ip, np, nc, nl, na, ns, push;
	gdl_gvalues_get * gbuf;
	const gdl_gvalues   * gval;
	_gdl_gpoint_collector  * point;
	
	np   = gdl_gview_ploidy (gv);
	
	if (cl)
	{
		nc = gdl_clustering_nclust (cl);
	}
	else
	{
		nc = GDL_GVIEW_ACCESSION_SIZE (gv, gm);	
	}
	
	nl   = GDL_GVIEW_LOCUS_SIZE (gv, gm);
	gbuf = GDL_GVIEW_GET_NEW (gv, gm);
	
	na = 0;
	
	for (ic = 0; ic < nc; ic++)
	{
		na = gdl_list_size (gc->collector);
		
		if (cl)
		{
			ii = gdl_clustering_clust_idx (cl, ic);
		}
		else
		{
			ii = ic;	
		}
		
		for (ns = il = 0; il < nl; il++)
		{
			for (ip = 0; ip < np; ip++)
			{
				if (GDL_GVIEW_IS_MISSING (gv, gm, ii, il, ip))
				{
					point = GDL_MALLOC (_gdl_gpoint_collector, 1);
					point->idx  = gc->nr;
					point->ridx = ic;
					point->aidx = ii;
					point->cidx = il;
					point->hidx = ip;
					if (cl)
					{
						point->w = gdl_clustering_clust_size (cl, ic);
					}
					else
					{
						point->w = 1.0;
					}
					gdl_list_push_back (gc->collector, point, 1);
					//printf ("MISSING %d %d %d (%d)\n", ic, il, ip, gc->nr);
					(gc->nc)++;
				}
			}
		}		
		if (gdl_list_size (gc->collector) > na)
		{
			(gc->nr)++;
		}
	}
	
	gdl_gvalues_get_free (gbuf);
	
	return GDL_SUCCESS;
}

static int
_collect_heterozygous (gdl_gview_collector * gc, const gdl_gview * gv, const gdl_mask * gm, const gdl_clustering * cl)
{
	return GDL_SUCCESS;
}

static int
_gdl_gview_collector_fill_collector (gdl_gview_collector * gc)
{
	if (gc->nc)
	{
		size_t i, * j, k;
		gdl_list_itr * itr;
		gdl_string * key;
		
		j = GDL_CALLOC (size_t, gc->np);
		
		itr = gdl_list_iterator_front (gc->collector);
		
		do
		{
			_gdl_gpoint_collector * point = 
			         (_gdl_gpoint_collector *) gdl_list_iterator_value (itr);
			gc->ridx[point->idx] = point->ridx;
			gc->aidx[point->idx] = point->aidx;
			gc->weights[point->idx] = point->w;
			gc->ngl[point->idx]++;
			gc->nhl[point->idx*gc->np + point->hidx]++;
		}
		while (gdl_list_iterator_next (itr));
		
		gdl_list_iterator_free (itr);
		
		itr = gdl_list_iterator_front (gc->collector);
		
		do
		{
			_gdl_gpoint_collector * point = 
			         (_gdl_gpoint_collector *) gdl_list_iterator_value (itr);
			
			if (gc->gl[point->idx]==NULL)
			{
				i = 0;
				gc->gl[point->idx]
				   = GDL_CALLOC (size_t, gc->ngl[point->idx]);
				for (k = 0; k < gc->np; k++)
				{
					j[k] = 0;
					gc->hl[point->idx*gc->np + k]
				   	  = GDL_CALLOC (size_t, gc->nhl[point->idx*gc->np + k]);
				}
			}
			gc->gl[point->idx][i++] = point->cidx;
			gc->hl[point->idx*gc->np + point->hidx][j[point->hidx]++] = point->cidx;
			
//			key = _gdl_gview_collector_create_key (gc->key_format, point->ridx, point->cidx, -1);
//			gdl_hashtable_add (gc->contains, key, NULL, 0);
//			gdl_string_free (key);
//			key = _gdl_gview_collector_create_key (gc->key_format, point->ridx, point->cidx, point->hidx);
//			gdl_hashtable_add (gc->contains, key, NULL, 0);
//			gdl_string_free (key);
			//printf ("ACCESSION %d (%d) %d\n", hw->aidx[point->idx], hw->nas[point->idx], hw->lidx[point->idx][i-1]);
		}
		while (gdl_list_iterator_next (itr));
		
		gdl_list_iterator_free (itr);
		
		GDL_FREE (j);
	}
	
	gdl_list_free (gc->collector);
	
	return GDL_SUCCESS;
}

gdl_gview_collector *
gdl_gview_collector_alloc (const gdl_gview_collector_type * T)
{
	gdl_gview_collector * gc;
	
	gc = GDL_CALLOC (gdl_gview_collector, 1);
	
	gc->type = T;
	
	return gc;
}

void
gdl_gview_collector_free (gdl_gview_collector * gc)
{
	if (gc)
	{
		_gdl_gview_collector_clean_collector (gc);
		GDL_FREE (gc);
	}
}

gdl_gview_collector *
gdl_gview_collector_clone (const gdl_gview_collector * c)
{
	if (c)
	{
		size_t i, j, k;
		gdl_hashtable_itr * itr;
		gdl_gview_collector * g;
		
		g = GDL_CALLOC (gdl_gview_collector, 1);
		
		g->type = c->type;
		
		g->nr = c->nr;
 	   g->np = c->np;
	   g->nc = c->nc;
	
		_gdl_gview_collector_init_collector (g);
		
		memcpy (g->ridx, c->ridx, sizeof(size_t)*g->nr);
		memcpy (g->aidx, c->aidx, sizeof(size_t)*g->nr);
		memcpy (g->weights, c->weights, sizeof(double)*g->nr);
		memcpy (g->ngl, c->ngl, sizeof(size_t)*g->nr);
		memcpy (g->nhl, c->nhl, sizeof(size_t)*g->nr*g->np);
		
		for (k = i = 0; i < g->nr; i++)
		{
			g->gl[i] = GDL_MALLOC (size_t, g->ngl[i]);
			memcpy (g->gl[i], c->gl[i], sizeof(size_t)*g->ngl[i]);
			for (j = 0; j < g->np; j++, k++)
			{
				g->hl[k] = GDL_MALLOC (size_t, g->nhl[k]);
				memcpy (g->hl[k], c->hl[k], sizeof(size_t)*g->nhl[k]);
			}
		}
		
		g->contains = gdl_hashtable_alloc (gdl_gvalues_interface, g->nc);
		
		if (gdl_hashtable_size (c->contains))
		{
			itr = gdl_hashtable_iterator (c->contains);
			
			do
			{
				const gdl_string * key = gdl_hashtable_iterator_key (itr);
				gdl_hashtable_add (g->contains, key, gdl_gvalues_interface->clone(gdl_hashtable_iterator_value (itr)), 1);
			}
			while (gdl_hashtable_iterator_next (itr));
			
			gdl_hashtable_iterator_free (itr);
		}
		
		g->key_format = gdl_string_clone (c->key_format);
		
		return g;
	}
	
	return NULL;
}

gdl_gview_collector *
gdl_gview_collector_fread (FILE * stream)
{
	if (stream)
	{
		size_t i, j, k;
		int status;
		gdl_gview_collector * g;
		
		g = GDL_CALLOC (gdl_gview_collector, 1);
		
		status = fread (&(g->np), sizeof (size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		status = fread (&(g->nr), sizeof (size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		status = fread (&(g->nc), sizeof (size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		
		_gdl_gview_collector_init_collector (g);
		
		status = fread (g->ridx, sizeof (size_t), g->nr, stream);
		GDL_FREAD_STATUS (status, g->nr);
		status = fread (g->aidx, sizeof (size_t), g->nr, stream);
		GDL_FREAD_STATUS (status, g->nr);
		status = fread (g->weights, sizeof (double), g->nr, stream);
		GDL_FREAD_STATUS (status, g->nr);
		status = fread (g->ngl, sizeof (size_t), g->nr, stream);
		GDL_FREAD_STATUS (status, g->nr);
		status = fread (g->nhl, sizeof (size_t), g->nr*g->np, stream);
		GDL_FREAD_STATUS (status, g->nr*g->np);
		
		for (k = i = 0; i < g->nr; i++)
		{
			g->gl[i] = GDL_MALLOC (size_t, g->ngl[i]);
			
			status = fread (g->gl[i], sizeof (size_t), g->ngl[i], stream);
			GDL_FREAD_STATUS (status, g->ngl[i]);
			
			for (j = 0; j < g->np; j++, k++)
			{
				g->hl[k] = GDL_MALLOC (size_t, g->nhl[k]);
				
				status = fread (g->hl[k], sizeof (size_t), g->nhl[k], stream);
				GDL_FREAD_STATUS (status, g->nhl[k]);
			}
		}
		
		g->contains = gdl_hashtable_alloc (gdl_gvalues_interface, 0);
		
		status = gdl_hashtable_fread (stream, g->contains);
		GDL_FREAD_STATUS (status, GDL_SUCCESS);
		
		g->key_format = gdl_string_fread (stream);
		GDL_FREAD_STATUS (g->key_format!=0, 1);
		
		return g;
	}
	
	return NULL;	
}

int
gdl_gview_collector_fwrite (FILE * stream, const gdl_gview_collector * g)
{
	if (stream && g)
	{
		size_t i, j, k;
		int status;
		
		status = fwrite (&(g->np), sizeof (size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		status = fwrite (&(g->nr), sizeof (size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		status = fwrite (&(g->nc), sizeof (size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		status = fwrite (g->ridx, sizeof (size_t), g->nr, stream);
		GDL_FWRITE_STATUS (status, g->nr);
		status = fwrite (g->aidx, sizeof (size_t), g->nr, stream);
		GDL_FWRITE_STATUS (status, g->nr);
		status = fwrite (g->weights, sizeof (double), g->nr, stream);
		GDL_FWRITE_STATUS (status, g->nr);
		status = fwrite (g->ngl, sizeof (size_t), g->nr, stream);
		GDL_FWRITE_STATUS (status, g->nr);
		status = fwrite (g->nhl, sizeof (size_t), g->nr*g->np, stream);
		GDL_FWRITE_STATUS (status, g->nr*g->np);
		
		for (k = i = 0; i < g->nr; i++)
		{
			status = fwrite (g->gl[i], sizeof (size_t), g->ngl[i], stream);
			GDL_FWRITE_STATUS (status, g->ngl[i]);
			for (j = 0; j < g->np; j++, k++)
			{
				status = fwrite (g->hl[k], sizeof (size_t), g->nhl[k], stream);
				GDL_FWRITE_STATUS (status, g->nhl[k]);
			}
		}
		
		status = gdl_hashtable_fwrite (stream, g->contains);
		GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		
		status = gdl_string_fwrite (stream, g->key_format);
		GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		
		return GDL_SUCCESS;
	}
	
	return GDL_EINVAL;
}

int
gdl_gview_collector_perform (gdl_gview_collector * gc,
                                const gdl_gview * gv,
                                const gdl_mask * gm,
                                const gdl_clustering * cl)
{
   size_t na, nl, np;
   
   _gdl_gview_collector_clean_collector (gc);
   
   if (cl)
   {
   	  na = gdl_clustering_nclust (cl);
   }
   else
   {
      na = GDL_GVIEW_ACCESSION_SIZE (gv, gm);
   }
   
   nl = GDL_GVIEW_LOCUS_SIZE (gv, gm);
   np = gdl_gview_ploidy (gv);
   
   gc->np         = np;
   gc->key_format = _gdl_gview_collector_create_key_format (na, nl, np);
   gc->collector  = gdl_list_alloc (gdl_list_default);
      
   switch (*(gc->type))
   {
   		case gdl_missing_collector :
   		    //printf ("COLLECT\n");fflush(stdout);
   			_gdl_gview_collector_collect_missing (gc, gv, gm, cl);
   			//printf ("INIT\n");fflush(stdout);
   			_gdl_gview_collector_init_collector (gc);
   			//printf ("FILL\n");fflush(stdout);
   			return _gdl_gview_collector_fill_collector (gc);		
   		case gdl_heterozygous_collector :
   			return _collect_heterozygous (gc, gv, gm, cl);
   }  
}

size_t
gdl_gview_collector_size (const gdl_gview_collector * gc)
{
	return gc->nc;	
}

size_t
gdl_gview_collector_accession_size (const gdl_gview_collector * gc)
{
	return gc->nr;
}

size_t
gdl_gview_collector_glocus_size (const gdl_gview_collector * gc, size_t r)
{
	return gc->ngl[r];
}

size_t
gdl_gview_collector_hlocus_size (const gdl_gview_collector * gc, size_t r, size_t h)
{
	return gc->nhl[r*gc->np + h];
}

size_t
gdl_gview_collector_accession_idx (const gdl_gview_collector * gc, size_t r)
{
	return gc->ridx[r];
}

size_t
gdl_gview_collector_accession_id (const gdl_gview_collector * gc, size_t r)
{
	return gc->aidx[r];
}

double
gdl_gview_collector_accession_weight (const gdl_gview_collector * gc, size_t r)
{
	return gc->weights[r];
}

const size_t *
gdl_gview_collector_glocus_idx (const gdl_gview_collector * gc, size_t r)
{
	return gc->gl[r];
}

const size_t *
gdl_gview_collector_hlocus_idx (const gdl_gview_collector * gc, size_t r, size_t h)
{
	return gc->hl[r*gc->np + h];
}

int
gdl_gview_collector_gset (const gdl_gview_collector * gc, size_t r, size_t l, gdl_gvalues * extra)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, gc->ridx[r], gc->gl[r][l], -1);
	int status       = gdl_hashtable_update (gc->contains, key, extra, 1);
	gdl_string_free (key);
	return status;
}

int
gdl_gview_collector_hset (const gdl_gview_collector * gc, size_t r, size_t h, size_t l, gdl_gvalues * extra)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, gc->ridx[r], gc->hl[r*gc->np+h][l], h);
	int status = gdl_hashtable_update (gc->contains, key, extra, 1);
	gdl_string_free (key);
	return status;
}

int
gdl_gview_collector_gset_idx (const gdl_gview_collector * gc, size_t ridx, size_t lidx, gdl_gvalues * extra)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, ridx, lidx, -1);
	int status       = gdl_hashtable_update (gc->contains, key, extra, 1);
	gdl_string_free (key);
	return status;	
}

int
gdl_gview_collector_hset_idx (const gdl_gview_collector * gc, size_t ridx, size_t lidx, size_t p, gdl_gvalues * extra)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, ridx, lidx, p);
	int status = gdl_hashtable_update (gc->contains, key, extra, 1);
	gdl_string_free (key);
	return status;	
}

gdl_gvalues *
gdl_gview_collector_gget (const gdl_gview_collector * gc, size_t r, size_t l)
{
	gdl_string * key    = _gdl_gview_collector_create_key (gc->key_format, gc->ridx[r], gc->gl[r][l], -1);
	gdl_gvalues * extra = gdl_hashtable_lookup (gc->contains, key);
	gdl_string_free (key);
	return extra;
}

gdl_gvalues *
gdl_gview_collector_hget (const gdl_gview_collector * gc, size_t r, size_t h, size_t l)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, gc->ridx[r], gc->hl[r*gc->np+h][l], h);
	gdl_gvalues * extra = gdl_hashtable_lookup (gc->contains, key);
	gdl_string_free (key);
	return extra;
}

gdl_gvalues *
gdl_gview_collector_gget_idx (const gdl_gview_collector * gc, size_t ridx, size_t lidx)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, ridx, lidx, -1);
	gdl_gvalues * extra = gdl_hashtable_lookup (gc->contains, key);
	gdl_string_free (key);
	return extra;
}

gdl_gvalues *
gdl_gview_collector_hget_idx (const gdl_gview_collector * gc, size_t ridx, size_t lidx, size_t h)
{
	gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, ridx, lidx, h);
	gdl_gvalues * extra = gdl_hashtable_lookup (gc->contains, key);
	gdl_string_free (key);
	return extra;
}

gdl_boolean
gdl_gview_collector_gcontains (const gdl_gview_collector * gc, size_t r, size_t c)
{
	if (gc->nr)
	{
		gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, r, c, -1);
		
		if (gdl_hashtable_lookup (gc->contains, key)!=NULL)
		{
			gdl_string_free (key);
			return gdl_true;
		}
		else
		{
			gdl_string_free (key);
			return gdl_false;
		}
	}
	else
	{
		return gdl_false;
	}
}

gdl_boolean
gdl_gview_collector_hcontains (const gdl_gview_collector * gc, size_t r, size_t c, size_t h)
{
	if (gc->nr)
	{
		gdl_string * key = _gdl_gview_collector_create_key (gc->key_format, r, c, h);
		
		if (gdl_hashtable_lookup (gc->contains, key)!=NULL)
		{
			gdl_string_free (key);
			return gdl_true;
		}
		else
		{
			gdl_string_free (key);
			return gdl_false;
		}
	}
	else
	{
		return gdl_false;
	}
}

static const gdl_gview_collector_type _missing      = gdl_missing_collector;
static const gdl_gview_collector_type _heterozygous = gdl_heterozygous_collector;

const gdl_gview_collector_type * gdl_gview_collector_missing      = &_missing;
const gdl_gview_collector_type * gdl_gview_collector_heterozygous = &_heterozygous;
