/*  
 *  gview/window.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_matrix_uint.h>
#include <gdl/gdl_gview_collector.h>
#include <gdl/gdl_gview_wrapper.h>

gdl_matrix_uint *
randomize_missing (gdl_gview_wrapper * data, const gdl_rng * rng)
{
	size_t i, j, k, n, m;
	gdl_matrix_uint * idx;
	
	idx = gdl_matrix_uint_alloc (gdl_gview_wrapper_missing_size (data), 2);
	
	n = gdl_gview_wrapper_missing_accession_size (data);
	for (k = i = 0; i < n; i++)
	{
		m = gdl_gview_wrapper_missing_glocus_size (data, i);
		for (j = 0; j < m; j++, k++)
		{
			gdl_matrix_uint_set (idx, k, 0, i);
		 	gdl_matrix_uint_set (idx, k, 1, j);
		}
	}
	
	for (i = idx->size1 - 1; i > 0; i--)
   {
     j = gdl_rng_uniform_int(rng, i+1);
     n = gdl_matrix_uint_get (idx, i, 0);
     m = gdl_matrix_uint_get (idx, i, 1);
     gdl_matrix_uint_set (idx, i, 0, gdl_matrix_uint_get (idx, j, 0));
     gdl_matrix_uint_set (idx, i, 1, gdl_matrix_uint_get (idx, j, 1));
     gdl_matrix_uint_set (idx, j, 0, n);
	  gdl_matrix_uint_set (idx, j, 1, m);
   }
	
	return idx;
}

int
window (gdl_gview_wrapper * data, size_t a, size_t l, size_t loc, size_t length)
{
	size_t i, c, j, n, m, f, t, geno, maxi, ng;
	size_t * gpat;
	double max, s, * probs;
	gdl_gvalues_get * gb;
	const gdl_gvalues * gx;
	gdl_gvalues * gt;
	
	n = gdl_gview_wrapper_accession_size (data);
	m = gdl_gview_wrapper_locus_size (data);
	
	if (loc < length/2)
	{
		f = 0;
		t = length-1;
	}
	else if (loc+length/2 >= m)
	{
		t = m-1;
		f = m-1-length+1;
	}
	else
	{
		f = loc-length/2+1;
		t = loc+length/2;
	}
	gb = gdl_gview_wrapper_get_new (data);
	// get the genotype pattern around the missing site
	c = gdl_gview_wrapper_missing_accession_idx_c (data, a);
	gpat = GDL_CALLOC (size_t, length);
	for (j = f; j <= t; j++)
	{
		gdl_gview_wrapper_get_genotype_c (data, c, j, gb);
		gx = gdl_gvalues_get_gvalues (gb);
		if (gx && gx->size == 1)
		{
			gpat[j-f] = gx->values[0]->idx+1;
		}
	}
	ng = gdl_locus_genotype (gdl_gview_wrapper_get_locus(data, loc));
	probs = GDL_CALLOC (double, ng);
	// now look at the other genotypes
	for (i = 0; i < n; i++)
	{
		// first check if the genotype is not missing at 
		// the imputation site
		gdl_gview_wrapper_get_genotype (data, i, loc, gb);
		gx = gdl_gvalues_get_gvalues (gb);
		if (gx && gx->size == 1)
		{
			geno=gx->values[0]->idx;
		}
		else
		{
			continue;	
		}
		// now check the congruence inside the window 
		// between the current genotype and the one
		// to impute
		for (j = f; j <= t; j++)
		{
			if (j != loc && gpat[j-f])
			{
				gdl_gview_wrapper_get_genotype (data, i, j, gb);
				gx = gdl_gvalues_get_gvalues (gb);
				if ((gx && gx->size == 1) && gx->values[0]->idx == gpat[j-f]-1)
				{
					continue;
				}
				else
				{
					break;	
				}
			}
		}
		if (j > t && geno < ng) // means that the current genotype match the genotype to impute 
		{          // inside the window
			//printf ("GENOTYPE %d MATCH ==> %d\n", i, geno);
			probs[geno]+=1.0;
		}
	}
	
	gt = gdl_gview_wrapper_missing_gget (data, a, l);
	
	for (max = -1, s = 0, maxi = i = 0; i < gt->size; i++)
	{
		gt->values[i]->value = probs[gt->values[i]->idx];
		s += gt->values[i]->value;
		if (gt->values[i]->value > max)
		{
			max=gt->values[i]->value;
			maxi=i;
		}
	}
	for (i = 0; i < gt->size; i++)
	{
		if (s)
		{
			gt->values[i]->value /= s;
		}
		else
		{
			gt->values[i]->value = 1.0/(double)gt->size;
		}
	}

	GDL_FREE (gpat);
	GDL_FREE (probs);
	gdl_gvalues_get_free (gb);
}

int
imputation (gdl_gview_wrapper * data, const gdl_matrix_uint * idx, const size_t window_size)
{
	size_t i, a, l;
	const size_t * loc;
	
	for (i = 0; i < idx->size1; i++)	
	{
		a = gdl_matrix_uint_get (idx, i, 0);
		l = gdl_matrix_uint_get (idx, i, 1);
		loc = gdl_gview_wrapper_missing_glocus_idx (data, a);
		//printf ("%d IMPUT SITE [%d, %d (%d)]\n", i, a, l, loc[l]);
		window (data, a, l, loc[l], window_size);
	}
}

void
gld_gview_wrapper_window_imputation (gdl_gview_wrapper * data, const size_t window_size, const gdl_rng * rng)
{
	gdl_matrix_uint * idx; 
				
	idx = randomize_missing (data, rng);
	
	imputation (data, idx, window_size);
	
	gdl_matrix_uint_free (idx);
}
