/**  
 * 	hview/kbest.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_sort.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_hview.h>

static gdl_boolean
_kresolve (const gdl_hview * h, size_t * hidx, size_t r)
{
	size_t i, j, k, idx, nc, nh, np, nf = 0;
	gdl_hconfig * hc;
	
	np = gdl_hview_ploidy (h);
	nc = gdl_clustering_nclust (h->guniq);
	
	for (i = 0; i < nc; i++)
	{
		nh = gdl_hview_hconfig_size_c (h, i);
		for (j = 0; j < nh; j++)
		{
			hc = gdl_hview_get_hconfig_c (h, i, j);
			for (k = 0;	k < np; k++)
			{
				idx = gdl_hconfig_get_haplotype (hc, k);
				if (hidx[idx] >= r)
				{
					break;
				}
			}
			if (k == np)
			{
				nf++;				
				break;	
			}			
		}
	}
	
	return (nf == nc) ? gdl_false : gdl_true;
}

static int
_build_khview_hconfig (gdl_hview * kh, const gdl_hview * h, size_t * ihidx, size_t k)
{
	size_t i, j, l, idx, nc, nh, np;
	double s;
	gdl_hconfig * hc, * khc;
	gdl_list * configs;
	gdl_list_itr * itr;
	
	np = gdl_hview_ploidy (h);
	
	for (i = 0; i < h->na; i++)
	{
		
		nh = gdl_hview_hconfig_size (h, i);
		
		configs = gdl_list_alloc (gdl_list_default);
		
		for (s = j = 0; j < nh; j++)
		{
			
			hc = gdl_hview_get_hconfig (h, i, j);
			
			for (l = 0;	l < np; l++)
			{
				idx = gdl_hconfig_get_haplotype (hc, l);
				if (ihidx[idx] >= k)
				{
					break;
				}
			}
			if (l == np)
			{
				khc = gdl_hconfig_clone (hc, np);
				for (l = 0;	l < np; l++)
				{
					idx = gdl_hconfig_get_haplotype (hc, l);
					khc->idx[l] = ihidx[idx];
				}
				gdl_list_push_front (configs, khc, 0);
				s += khc->pr;
			}
		}
		
		kh->nhc[i]      = gdl_list_size (configs);
		kh->hconfigs[i] = GDL_MALLOC (gdl_hconfig_ptr, kh->nhc[i]);
		
		itr = gdl_list_iterator_front (configs);
		j = 0;
		
		do
		{
			khc = (gdl_hconfig *) gdl_list_iterator_value (itr);
			kh->hconfigs[i][j++] = khc;
			khc->pr /= s;
		} while (gdl_list_iterator_next (itr));
		
		gdl_list_iterator_free (itr);
		gdl_list_free (configs);
		
	}
	
	return GDL_SUCCESS;
}

static int
_build_khview_htable (gdl_hview * kh, const gdl_hview * h, size_t * hidx, size_t k)
{
	size_t i, j;
	double s = 0;
	gdl_haplotype * haplo;
	
	for (i = h->nh; i > 0; i--)
	{
		j = h->nh-i;
		haplo = gdl_hview_get_haplotype(h, hidx[i-1]);
		kh->haplotypes[j] = gdl_haplotype_clone (haplo, h->nl);
		kh->mult[j] = gdl_hview_get_haplotype_freq (h, hidx[i-1]);
		s += kh->mult[j];
		if (j == k-1)
		{
			break;	
		}
	}
	for (i = 0; i < k; i++)
	{
		kh->mult[j] /= s;
	}
	
	return GDL_SUCCESS;
}

static gdl_hview *
_build_khview (const gdl_hview * h, size_t * hidx, size_t * ihidx, size_t k)
{
	gdl_hview * kh;
	
	kh = gdl_hview_alloc (h->data, h->mask);
	
	gdl_hview_init (kh, h->na, h->nl, k);
	
	kh->guniq = gdl_clustering_clone (h->guniq);
	
	_build_khview_htable (kh, h, hidx, k);
	
	_build_khview_hconfig (kh, h, ihidx, k);
	
	return kh;
}

gdl_hview *
gdl_hview_kbest (const gdl_hview * h, size_t kmin)
{
	gdl_hview * kh = NULL;
	
	if (kmin < h->nh)
	{
		size_t i, k, * hidx, * ihidx;
		gdl_boolean ok = gdl_true;		
		
		hidx  = GDL_MALLOC (size_t, h->nh);
		ihidx = GDL_MALLOC (size_t, h->nh);
		
		gdl_sort_index (hidx, h->mult, 1, h->nh);
		
		for (i = h->nh; i > 0; i--)
		{
			ihidx[hidx[i-1]] = h->nh-i;
		}
		
		k = gdl_hview_ploidy (h);
		
		do
		{
			ok = _kresolve (h, ihidx, k);
			k++;
		}
		while (ok || k-1 < kmin);
		
		kh = _build_khview (h, hidx, ihidx, k-1);
			
		GDL_FREE (hidx);
		GDL_FREE (ihidx);		
	}
	else
	{
		kh = gdl_hview_clone (h);
	}
	
	return kh;
}
