/*  
 *  hsmap/model.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:44 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_util.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_gentity.h>
#include <gdl/gdl_mask.h>
#include <gdl/gdl_gview_wrapper.h>
#include <gdl/gdl_gmatrix.h>
#include <gdl/gdl_hview.h>
#include <gdl/gdl_hmap.h>
#include <gdl/gdl_fview_wrapper.h>
#include <gdl/gdl_fmatrix.h>
#include <gdl/gdl_multireg.h>
#include <gdl/gdl_hstruct_model.h>
#include <gdl/gdl_hsmap.h>
#include <gdl/gdl_hsmap_position.h>

struct _gdl_hsmap_model
{
	size_t    np;
	size_t    size;
	size_t    N;
	size_t    O;
	size_t    M0;
	size_t    M1;
	const gdl_mask  * mask;
	gdl_mask  * trait;
	gdl_mask  * factor;
	gdl_vector * y;
	gdl_matrix * X0;
	gdl_matrix * X1;
	gdl_fmatrix             * odb;
	gdl_fmatrix             * tdb;
	const gdl_fview_wrapper * tdata;
	const gdl_fview_wrapper * odata;
	gdl_hsmap_partition ** partitions;
	gdl_multireg_linear_workspace * wreg;
	gdl_boolean locus_update;
	gdl_boolean * exclude;
	size_t      * move;
	gdl_hsmap_model_result * result;
	gdl_hsmap_position_model * position0;
	gdl_hsmap_position_model * position1;
};

static gdl_hsmap_model_result *
gdl_hsmap_model_result_alloc (const gdl_multireg_linear_workspace * w)
{
	size_t i;
	const gdl_vector * c;
	const gdl_matrix * cov;
	gdl_hsmap_model_result * r;
	
	r = GDL_MALLOC (gdl_hsmap_model_result, 1);
	
	r->df    = gdl_multireg_linear_df (w);
	r->df1   = gdl_multireg_linear_eval_df (w);
	r->rsq   = gdl_multireg_linear_rsquare (w);
	r->rsq1  = gdl_multireg_linear_eval_rsquare (w);
	r->fstat = gdl_multireg_linear_eval_fstat (w);
	r->pval  = gdl_multireg_linear_eval_pvalue (w);
	
	c = gdl_multireg_linear_coefficients (w);
	r->c  = gdl_vector_alloc (c->size);
	gdl_vector_memcpy (r->c, c);
	
	r->sd = gdl_vector_alloc (c->size);
	cov = gdl_multireg_linear_covariance (w);
	
	for (i = 0; i < c->size; i++)
	{
		gdl_vector_set (r->sd, i, sqrt (gdl_matrix_get (cov, i, i)));	
	}
	
	r->N = r->df + r->c->size;
	r->M = r->c->size;
	
	return r;
}

void
gdl_hsmap_model_result_free (gdl_hsmap_model_result * r)
{
	if (r)
	{
		gdl_vector_free (r->c);
		gdl_vector_free (r->sd);
		GDL_FREE (r);
	}
}

static void
_gdl_hsmap_model_init_mask (gdl_hsmap_model * m, const gdl_fview_wrapper * tdata, const gdl_fview_wrapper * odata, size_t t)
{
	size_t i;
	gdl_mask * tmp;
	gdl_entity_mask * accession;
	const gdl_hsmap_partition * p;
	
	p = m->partitions[0];
	
	tmp = gdl_mask_alloc ();
	
	gdl_mask_add_idx (tmp, GDL_FACTOR, t);
	gdl_mask_set (tmp, GDL_ACCESSION, gdl_mask_get_clone (p->tmask, GDL_ACCESSION), 1);
	
	m->trait = gdl_fview_wrapper_mask_not_informative (tdata, tmp, GDL_ACCESSION);
	
	gdl_mask_free (tmp);
	
	accession = gdl_mask_get_clone (m->trait, GDL_ACCESSION);
	
	if (odata)
	{
		m->factor = gdl_mask_alloc ();
		gdl_mask_set (m->factor, GDL_ACCESSION, accession, 0);
	}
	
	m->mask = m->trait;
	
}

static void
_gdl_hsmap_model_init_matrix (gdl_hsmap_model * m, const gdl_fview_wrapper * tdata, const gdl_fview_wrapper * odata)
{
	m->tdb  = gdl_fmatrix_wrapper_mask_alloc (tdata, m->trait, gdl_false);
	m->size = gdl_fmatrix_accession_size (m->tdb);
	if (odata)
	{
		m->odb = gdl_fmatrix_wrapper_mask_alloc (odata, m->factor, gdl_false);
		m->O   = gdl_fmatrix_column_size (m->odb);
	}
	m->y  = gdl_vector_alloc (m->size);
	m->M0 = m->M1 = 0;
}

gdl_hsmap_model *
gdl_hsmap_model_alloc (gdl_hsmap_partition ** partitions,
                          size_t np,
                          const gdl_fview_wrapper * tdata,
                          size_t t,
                          const gdl_fview_wrapper * odata)
{
	gdl_hsmap_model * m;
	
	m = GDL_CALLOC (gdl_hsmap_model, 1);
	
	m->np         = np;
	m->partitions = partitions;
	
	_gdl_hsmap_model_init_mask (m, tdata, odata, t);
	
	_gdl_hsmap_model_init_matrix (m, tdata, odata);
	
	m->tdata = tdata;
	m->odata = odata;
	
	m->position0 = gdl_hsmap_position_model_alloc ();
	m->position1 = gdl_hsmap_position_model_alloc ();
	
	return m;
}

void
gdl_hsmap_model_free (gdl_hsmap_model * m)
{
	if (m)
	{
		gdl_mask_free (m->trait);
		gdl_mask_free (m->factor);
	   gdl_vector_free (m->y);
	   gdl_matrix_free (m->X0);
	   gdl_matrix_free (m->X1);
	   gdl_fmatrix_free (m->tdb);
	   gdl_fmatrix_free (m->odb);
	   gdl_multireg_linear_workspace_free (m->wreg);
	   gdl_hsmap_model_result_free (m->result);
	   GDL_FREE (m->exclude);
	   GDL_FREE (m->move);
	   GDL_FREE (m);
	}	
}

size_t
gdl_hsmap_model_partition_size (const gdl_hsmap_model * model)
{
	return model->np;	
}

gdl_hsmap_position *
gdl_hsmap_model_new_ancestral_position (const gdl_hsmap_model * model, size_t p, size_t l, double position)
{
	gdl_hsmap_position * pos;
	
	pos = gdl_hsmap_position_alloc (gdl_hsmap_position_ancestral);
	
	gdl_hsmap_position_init (pos, l, position, model->partitions[p], model->mask, 0);
	
	if (!pos->data)
	{
		gdl_hsmap_position_free (pos);
		pos=NULL;	
	}
	
	return pos;
}

gdl_hsmap_position *
gdl_hsmap_model_new_locus_position (const gdl_hsmap_model * model, size_t p, size_t l)
{
	gdl_hsmap_position * pos;
	
	pos = gdl_hsmap_position_alloc (gdl_hsmap_position_locus);
	
	gdl_hsmap_position_init (pos, l, 0, model->partitions[p], model->mask, 0);
	
	return pos;
}

gdl_hsmap_position *
gdl_hsmap_model_new_mutation_position (const gdl_hsmap_model * model, size_t p, size_t l, double position, size_t k)
{
	gdl_hsmap_position * pos;
	
	pos = gdl_hsmap_position_alloc (gdl_hsmap_position_mutation);
	
	gdl_hsmap_position_init (pos, l, position, model->partitions[p], model->mask, &k);
	
	return pos;
}

size_t
gdl_hsmap_model_add_eligible (gdl_hsmap_model * model, gdl_hsmap_position * position)
{
	return gdl_hsmap_position_model_add (model->position1, position);
}

gdl_hsmap_position *
gdl_hsmap_model_rmv_eligible (gdl_hsmap_model * model, size_t i)
{
	return gdl_hsmap_position_model_remove (model->position1, i);
}

size_t
gdl_hsmap_model_keep_eligible (gdl_hsmap_model * model, size_t i)
{
	gdl_hsmap_position * p = gdl_hsmap_position_model_remove (model->position1, i);
	return gdl_hsmap_position_model_add (model->position0, p);
}

static gdl_hsmap_model_result *
gdl_hsmap_model_multreg (gdl_hsmap_model * model)
{
	const size_t N = model->N;
	const size_t M = model->O + model->M0 + model->M1 + 1;
	gdl_vector * y;
	gdl_matrix * X0, * X1;
	
	if (model->N < model->size)
	{
		gdl_matrix_view VX1, VX0;
		gdl_vector_view vy;
		
		vy  = gdl_vector_subvector (model->y, 0, model->N);
		y   = &(vy.vector);
		
		if (model->M1)
		{
	   	VX0 = gdl_matrix_submatrix (model->X0, 0, 0, model->N, model->O + model->M0 + 1);
			X0  = &(VX0.matrix);
			VX1 = gdl_matrix_submatrix (model->X1, 0, 0, model->N, model->M1);
			X1  = &(VX1.matrix);	
		}
		else
		{
			VX0 = gdl_matrix_submatrix (model->X0, 0, 0, model->N, model->O + 1);
			X0  = &(VX0.matrix);
			VX1 = gdl_matrix_submatrix (model->X1, 0, 0, model->N, model->M0);
			X1  = &(VX1.matrix);
		}
	}
	else
	{
		X0 = model->X0;
		X1 = model->X1;
		y  = model->y;
	}
	
	if (model->wreg==0)
	{
		model->wreg = gdl_multireg_linear_workspace_alloc (N, M);
	}
	else if (   N > gdl_multireg_linear_workspace_size1 (model->wreg)
	         || M > gdl_multireg_linear_workspace_size2 (model->wreg))
	{
		gdl_multireg_linear_workspace_free (model->wreg);
		model->wreg = gdl_multireg_linear_workspace_alloc (N, M);
	}
	
	if (model->M1 || model->M0)
	{
		gdl_multireg_linear_eval (model->wreg, X0, X1, y);
	}
	else
	{
		gdl_multireg_linear_perform (model->wreg, X0, y);	
	}
	
	return gdl_hsmap_model_result_alloc (model->wreg);
}

static int
gdl_hsmap_model_system_position (gdl_hsmap_model * model, size_t i, size_t ii, size_t * jj, size_t mode)
{
	gdl_hsmap_position * position;
	gdl_hsmap_position_model * data;
	gdl_matrix  * X;
	gdl_boolean * exclude;
	
	switch (mode)
	{
		case 0 :
			data    = model->position0;
			X       = (model->M1) ? model->X0 : model->X1;
			exclude = model->exclude;
			break;
		case 1 :
			data    = model->position1;
			X       = model->X1;
			exclude = NULL;
			break;
	}
	
	if (data)
	{
		size_t j, k, nc;
		
		nc = gdl_hsmap_position_model_size (data);
		
		for (j = 0; j < nc; j++)
		{
			if (exclude && exclude[j])
			{
				continue;
			}
			
			position = gdl_hsmap_position_model_get (data, j);
			
			if (gdl_hsmap_position_is_missing (position, i))
			{
				return GDL_CONTINUE;	
			}
			
			for (k = 0; k < position->data->size2; k++)
			{
				gdl_matrix_set (X, ii, *jj + k, gdl_matrix_get (position->data, i, k));
			}
			(*jj) += position->data->size2;
		}
		
		return GDL_SUCCESS;
	}
	else
	{
		return GDL_SUCCESS;	
	}
}

static int
gdl_hsmap_model_system_factor (gdl_hsmap_model * model, size_t i, size_t ii, size_t * jj)
{
	if (model->odb)
	{
		size_t j, k, nc;
		gdl_gvalues * x;
		gdl_gvalue  * v;
		
		nc = gdl_fmatrix_factor_size (model->odb);
		
		for (j = 0; j < nc; j++)
		{
			if (gdl_fmatrix_factor_is_missing (model->odb, i, j))
			{
				return GDL_CONTINUE;
			}
			x = gdl_fmatrix_factor_get (model->odb, i, j);
			for (k = 0; k < x->size; k++)
			{
				v = x->values[k];
				gdl_matrix_set (model->X0, ii, *jj + v->idx, v->value);
			}
			gdl_gvalues_free (x);
			(*jj) += gdl_fmatrix_factor_column_size (model->odb, j);
		}
		
		return GDL_SUCCESS;
	}
	else
	{
		return GDL_SUCCESS;
	}
}

void
gdl_hsmap_model_system (gdl_hsmap_model * model)
{
	size_t i, ii, j, jj, M0 = model->M0;
	
	gdl_matrix_free (model->X0);
	gdl_matrix_free (model->X1);
	
	model->X0 = NULL;
	model->X1 = NULL;
	
	M0 = gdl_hsmap_position_model_column_size (model->position0);
	for (i = 0; i < gdl_hsmap_position_model_size (model->position0); i++)
	{
		if (model->exclude && model->exclude[i])
		{
			gdl_hsmap_position * p = gdl_hsmap_position_model_get (model->position0, i);
			M0 -= p->data->size2;
		}	
	}
	model->M0 = M0;
	model->M1 = gdl_hsmap_position_model_column_size (model->position1);
	if (model->M1)
	{
		model->X0 = gdl_matrix_alloc (model->size, model->O + model->M0 + 1);
		model->X1 = gdl_matrix_alloc (model->size, model->M1);
	}
	else
	{
		model->X0 = gdl_matrix_alloc (model->size, model->O + 1);
		if (model->M0)
		{
			model->X1 = gdl_matrix_alloc (model->size, model->M0);
		}
	}
	
	//printf ("MODEL O %d M0 %d M1 %d\n", model->O, model->M0, model->M1);
	
	for (ii = i = 0; i < model->size; i++)
	{
		gdl_vector_set (model->y, ii, gdl_fmatrix_get (model->tdb, i, 0));
		gdl_matrix_set (model->X0, ii, 0, 1.0); // intercept
		j = 1;
		if (model->O && gdl_hsmap_model_system_factor (model, i, ii, &j)==GDL_CONTINUE)
		{
			continue;
		}
		j = (model->M1) ? j : 0;
		if (model->M0 && gdl_hsmap_model_system_position (model, i, ii, &j, 0)==GDL_CONTINUE)
		{
			continue;
		}
		j = 0;
		if (model->M1 && gdl_hsmap_model_system_position (model, i, ii, &j, 1)==GDL_CONTINUE)
		{
			continue;
		}
		ii++;
	}
	
	model->N = ii;
	
//	for (i = 0; i < model->N; i++)
//	{
//		printf ("[%d] %.1f", i, gdl_vector_get (model->y, i));
//		if (model->M0+model->O)
//		{
//			printf (" [");
//			for (j = 0; j < model->X0->size2; j++)
//			{
//				printf (" %f", gdl_matrix_get (model->X0, i, j));
//			}
//			printf (" ]");
//		}
//		if (model->M1)
//		{
//			printf (" [");
//			for (j = 0; j < model->X1->size2; j++)
//			{
//				printf (" %f", gdl_matrix_get (model->X1, i, j));	
//			}
//			printf (" ]");
//		}
//		printf ("\n");
//	}
	
}

const gdl_hsmap_model_result *
gdl_hsmap_model_eval (gdl_hsmap_model * m)
{
	gdl_hsmap_model_system (m);
	
	gdl_hsmap_model_result_free (m->result);
	
	m->result = gdl_hsmap_model_multreg (m);
	
	return m->result;
}

static gdl_boolean
gdl_hsmap_model_factor_fprintf (FILE * stream, const gdl_hsmap_model * model, gdl_boolean add)
{
	if (model->odata)
	{
		size_t i, j;
		gdl_factor * factor;
		
		for (i = 0; i < gdl_fview_wrapper_factor_size (model->odata); i++)
		{
			if ((!i && add) || i) fprintf (stream, " +"); 
			factor = gdl_fview_wrapper_get_factor (model->odata, i);
			fprintf (stream, " [ %s", gdl_entity_get_name (factor));
			if (gdl_factor_get_type (factor) != gdl_factor_continuous)
			{
				fprintf (stream, ":");
				for (j = 0; j < gdl_factor_size (factor) - 1; j++)
				{
					if (j) fprintf (stream, " +");
					fprintf (stream, " %s", gdl_entity_get_name (gdl_factor_get(factor, j)));
				}
			}
			fprintf (stream, " ]");
		}
		return gdl_true;
	}
	return add;
}

static gdl_boolean
gdl_hsmap_model_position_fprintf (FILE * stream, const gdl_hsmap_model * model, size_t mode, gdl_boolean add)
{
	size_t i, j;
	gdl_hsmap_position_model * mask;
	gdl_hsmap_position * position;
	gdl_boolean        * exclude;
	
	switch (mode)
	{
		case 0 :
		   mask    = model->position0;
		   exclude = model->exclude;
			break;
		case 1 :
		   mask = model->position1;
		   exclude = 0;
			break;
	}
	
	for (i = 0; i < gdl_hsmap_position_model_size (mask); i++)
	{
		if (exclude && exclude[i])
		{
			continue;
		}
		if ((!i && add) || i) fprintf (stream, " +"); 
		position = gdl_hsmap_position_model_get (mask, i);
		if (position->type == gdl_hsmap_position_ancestral)
		{
			fprintf (stream, " [ A:%s:%.3f:", gdl_entity_get_name (position->locus), position->position);
			for (j = 0; j < position->data->size2; j++)
			{
				if (j) fprintf (stream, " +");
				fprintf (stream, " %d", j+1);
			}
		}
		else if (position->type == gdl_hsmap_position_locus)
		{
			fprintf (stream, " [ %s:", gdl_entity_get_name (position->locus));
			for (j = 0; j < gdl_locus_allele (position->locus) - 1; j++)
			{
				if (j) fprintf (stream, " +");
				fprintf (stream, " %s", gdl_entity_get_name (gdl_locus_get_allele (position->locus, j)));
			}
		}
		fprintf (stream, " ]");
	}
	
	return (gdl_hsmap_position_model_size (mask)) ? gdl_true : add;
}

#define PRINT_COEF(a, b)(fprintf (stream, "\t%.3f\t(+/- %.3f)\n", (a), (b)))

static int
gdl_hsmap_model_factor_result_fprintf (FILE * stream, const gdl_hsmap_model * model, const gdl_hsmap_model_result * result, size_t * ii)
{
	if (model->odata)
	{
		size_t i, j;
		gdl_factor * factor;
		
		for (i = 0; i < gdl_fview_wrapper_factor_size (model->odata); i++)
		{
			factor = gdl_fview_wrapper_get_factor (model->odata, i);
			if (gdl_factor_get_type (factor) == gdl_factor_continuous)
			{
				fprintf (stream, "   %s\t-", gdl_entity_get_name (factor));
				PRINT_COEF(gdl_vector_get (result->c, *ii), gdl_vector_get (result->sd, *ii));
				(*ii)++; 
			}
			else
			{
				for (j = 0; j < gdl_factor_size (factor) - 1; j++)
				{
					fprintf (stream, "   %s\t%s", gdl_entity_get_name (factor), gdl_entity_get_name (gdl_factor_get(factor, j)));
					PRINT_COEF (gdl_vector_get (result->c, *ii), gdl_vector_get (result->sd, *ii));
					(*ii)++;
				}
			}
		}
	}
}

static int
gdl_hsmap_model_position_result_fprintf (FILE * stream, const gdl_hsmap_model * model, const gdl_hsmap_model_result * result, size_t * ii, size_t mode)
{
	size_t i, j;
	gdl_hsmap_position_model * mask;
	gdl_hsmap_position * position;
	gdl_boolean * exclude;
	
	switch (mode)
	{
		case 0 :
		   mask    = model->position0;
		   exclude = model->exclude;
			break;
		case 1 :
		   mask    = model->position1;
		   exclude = 0;
			break;
	}
	
	for (i = 0; i < gdl_hsmap_position_model_size (mask); i++)
	{
		if (exclude && exclude[i])
		{
			continue;
		}
		position = gdl_hsmap_position_model_get (mask, i);
		if (position->type == gdl_hsmap_position_ancestral)
		{
			for (j = 0; j < position->data->size2; j++)
			{
				fprintf (stream, "   A:%s:%.3f\t%d", gdl_entity_get_name (position->locus), position->position, j+1);
				PRINT_COEF (gdl_vector_get (result->c, *ii), gdl_vector_get (result->sd, *ii));
				(*ii)++;
			}
		}
		else if (position->type == gdl_hsmap_position_locus)
		{
			for (j = 0; j < gdl_locus_allele (position->locus) - 1; j++)
			{
				fprintf (stream, "   %s\t%s", gdl_entity_get_name (position->locus), gdl_entity_get_name (gdl_locus_get_allele (position->locus, j)));
				PRINT_COEF (gdl_vector_get (result->c, *ii), gdl_vector_get (result->sd, *ii));
				(*ii)++;
			}
		}
	}
}

int
gdl_hsmap_model_fprintf (FILE * stream, const gdl_hsmap_model * model)
{
	if (stream && model)
	{
		size_t i, ii;
		double x;
		gdl_factor * factor;
		const gdl_hsmap_model_result * result = model->result;
		
		fprintf (stream, "--\n");
		factor = gdl_fview_wrapper_get_factor (model->tdata, gdl_mask_get_idx (model->trait, GDL_FACTOR, 0));
		fprintf (stream,"  [ %s ] ~", gdl_entity_get_name (factor));
		gdl_hsmap_model_position_fprintf (stream, model, 1, 
          gdl_hsmap_model_position_fprintf (stream, model, 0,
              gdl_hsmap_model_factor_fprintf (stream, model, gdl_false)));
		fprintf (stream, "\n--\n");
		fprintf (stream, "   Number of individuals      %d\n", result->N);
		fprintf (stream, "   Number of variables        %d\n", result->M);
		fprintf (stream, "   Degrees of freedom (H1)    %d\n", result->df);
		fprintf (stream, "   Degrees of freedom (H1:H0) %d\n", result->df1);
		fprintf (stream, "   R-square (H1)              %1.3f\n", result->rsq);
		fprintf (stream, "   R-square (H1:H0)           %1.3f\n", result->rsq1);
		fprintf (stream, "   F-stat (H1:H0)             %.2f\n", result->fstat);
		fprintf (stream, "   P-value (H1:H0)            %e ", result->pval);
		gdl_significant_star_fprintf (stream, result->pval, 0.05, 100, 3); 
		fprintf (stream, "\n");
		fprintf (stream, "--\n");
		fprintf (stream, "   Intercept\t-");
		PRINT_COEF(gdl_vector_get (result->c, 0), gdl_vector_get (result->sd, 0));
		ii = 1;
		gdl_hsmap_model_factor_result_fprintf (stream, model, result, &ii);
		gdl_hsmap_model_position_result_fprintf (stream, model, result, &ii, 0);
		gdl_hsmap_model_position_result_fprintf (stream, model, result, &ii, 1);
		
		return GDL_SUCCESS;
	}

	return GDL_EINVAL;	
}

#undef PRINT_COEF
