/*  
 * 	lasso/test.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:08 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2008  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <stdio.h>
#include <stdlib.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_test.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_lasso.h>
#include <gdl/gdl_elastic_net.h>

static int
read_table (const gdl_string * filename,
            gdl_vector * y,
            gdl_matrix * X)
{
	size_t i,j,k,l,n;
	gdl_string * line, * tok;
	FILE * stream;
	
	stream = gdl_fileopen (filename, "r");
	
	line=0;n=0;
	k=0;
	while (gdl_getline (&line, &n, stream) != -1)
	{
		i = j = 0;
		tok = gdl_string_next_token (line, n, &i, &j);
		gdl_vector_set (y, k, (double) atof(tok));
		gdl_string_free (tok);
		for(l = 0; l < X->size2; l++)
		{
			tok = gdl_string_next_token (line, n, &i, &j);
			gdl_matrix_set (X, k, l, (double) atof(tok));
			gdl_string_free (tok);
		}
		gdl_string_free (line);
		line=0;
		k++;
		if (k == y->size) break;
	}
	
	gdl_fileclose(filename, stream);
	
	return GDL_SUCCESS;
}

int main(void)
{
	gdl_lasso_workspace * W;
	gdl_vector * y;
	gdl_matrix * X;
	
	//Diabete
//	y = gdl_vector_alloc (442);
//	X = gdl_matrix_alloc (442, 10);
//	read_table ("./diabete.txt", y, X);
//	
//	W = gdl_lasso_workspace_alloc (442, 638);
//	
//	gdl_lasso_perform (W, X, y, gdl_lasso_algorithm_lar, gdl_true, gdl_true, 0, GDL_SQRT_DBL_MIN, 0, gdl_true, gdl_false, stdout);
//	
//	//gdl_lasso_perform (W, X, y, gdl_lasso_algorithm_lasso, gdl_true, gdl_true, 0, 2.2e-16, 0, gdl_true, gdl_false, stdout);
//	
//	gdl_vector_free (y);
//	gdl_matrix_free (X);
	y = gdl_vector_alloc (210);
	X = gdl_matrix_alloc (210, 638);
//	
	read_table ("./MOSC1.txt", y, X);
//	
//	W = gdl_lasso_workspace_alloc (210, 638);
//	
//	gdl_lasso_perform (W, X, y, gdl_lasso_algorithm_lar, gdl_true, gdl_true, 0, 2.2e-16, 0, gdl_false, gdl_false, stderr);
	
	W = gdl_enet_workspace_alloc (210, 638);
	
	size_t i,j,ii,k;
	size_t max_steps = 100;
	double * MSE, syy = 0;
	gdl_vector * yc = gdl_vector_alloc (209);
	gdl_matrix * Xc = gdl_matrix_alloc (209, 638);
	
	MSE = GDL_CALLOC(double, max_steps);
	
	for(i = 0; i < 210; i++)
	{
		for(ii = j = 0; j < 210; j++)
		{
			if (j == i) continue;
			gdl_vector_set (yc, ii, gdl_vector_get (y, j));
			gdl_vector_view       Xcv_row = gdl_matrix_row (Xc, ii);
			gdl_vector_const_view X_row   = gdl_matrix_const_row (X, j);
			gdl_vector_memcpy (&(Xcv_row.vector), &(X_row.vector)); 
			ii++;	
		}
		double yi = gdl_vector_get (y, i);
		
		gdl_enet_perform (W, Xc, yc, 0.01, gdl_true, gdl_true, 2.2e-16, max_steps, gdl_true, 0);
		
		for(k = 0; k < max_steps; k++)
		{
			gdl_vector_view BETA = gdl_matrix_row (W->beta_storage, k);
			double mu = W->muy;
			for(j = 0; j < W->M; j++)
			{
				mu += (gdl_matrix_get (X, i, j)-W->mux[j]) * gdl_vector_get (&(BETA.vector), j);
			}
			//printf ("%d %g %g\n", k, yi, mu);
			mu -= yi;
			mu *= mu;
			MSE[k] += mu;			
		}
		fprintf (stdout, "LOOCV FOLD %d\n", i);
		syy += yi*yi;
	}
	size_t mink;
	double min = GDL_POSINF;
	for(k = 0; k < max_steps; k++)
	{
		if (MSE[k] < min)
		{
			min = MSE[k];
			mink = k;			
		} 
		printf ("error %d %g\n", k, MSE[k]);
	}
	printf ("BEST MODEL %d (%g, %g, %g)\n", mink, MSE[mink], syy, 1.0-MSE[mink]/syy);
	gdl_enet_perform (W, X, y, 0.01, gdl_true, gdl_true, 2.2e-16, mink+1, gdl_true, stdout);
	double rss=0;
	gdl_vector_view BETA = gdl_matrix_row (W->beta_storage, mink);
	for (i = 0; i < 210; i++)
	{
		double mu = 0;
		double yi = gdl_vector_get (y, i);
		for(j = 0; j < W->M; j++)
		{
			mu += (gdl_matrix_get (X, i, j)*W->normx[j]) * gdl_matrix_get (W->beta_storage, mink, j);
		}
		//printf ("%g %g %g\n", yi, mu, yi-mu);
		mu -= yi;
		mu *= mu;
		rss += mu;			
	}
	printf ("RSS = %g (%g) ssy = %g (%g)\n", rss, W->RSS[mink], syy, 1.0-rss/syy);
	// Gene Expression
//	y = gdl_vector_alloc (60);
//	X = gdl_matrix_alloc (60, 1446);
//	read_table ("./GeneExp.txt", y, X);
//	
//	W = gdl_enet_workspace_alloc (60, 1446);
//	
//	gdl_enet_perform (W, X, y, 0.5, gdl_true, gdl_true, 2.2e-16, 3, gdl_false, stdout);
	//gdl_lasso_perform (W, X, y, gdl_lasso_algorithm_adaptive, gdl_true, gdl_true, 0, 2.2e-16, 0, gdl_false, gdl_false, 0, stdout);
	
	gdl_vector_free (y);
	gdl_matrix_free (X);
	gdl_enet_workspace_free (W);
    exit (gdl_test_summary());
}
