/*  
 *  pca/pca.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:42 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <math.h> 

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_pca.h>

struct _gdl_pca_workspace
{
	const gdl_pca_workspace_type * type;
	size_t _naxe;
	gdl_matrix * U;
	gdl_vector * tw;
	void * state;
};

gdl_pca_workspace *
gdl_pca_workspace_alloc (const gdl_pca_workspace_type * T)
{
	gdl_pca_workspace * p;
	
	p = GDL_CALLOC (gdl_pca_workspace, 1);
	
	p->type  = T;
	
	return p;
}

void
gdl_pca_workspace_free (gdl_pca_workspace * w)
{
	if (w)
	{
		GDL_FREE (w);
	}
}

static int
_gdl_pca_workspace_init (gdl_pca_workspace * w, gdl_matrix * X)
{
	(w->type->free)(w->state);
	GDL_FREE (w->state);
	
	w->state = gdl_malloc (w->type->size);
	
	return (w->type->alloc)(w->state, X->size1, X->size2);
}

static void
_gdl_pca_copy_projection (gdl_pca_workspace * w, const gdl_matrix * X, gdl_boolean transpose)
{
	size_t i, j, n;
	double x;
	
	n = (transpose) ? X->size2 : X->size1;
	
	if (w->U)
	{	
		gdl_matrix_free (w->U);
	}
	
	w->U = gdl_matrix_alloc (n, w->_naxe);
	
	for (i = 0 ; i < n; i++)
	{
		for (j = 0; j < w->_naxe; j++)
		{
			gdl_matrix_set (w->U, i, j, gdl_matrix_get (X, i, j));
		}	
	}
}

static void
_gdl_pca_compute_tracy_widom (gdl_pca_workspace * w, const gdl_matrix * X, gdl_boolean transpose)
{
	const gdl_vector * l = gdl_pca_workspace_weights (w);
	const size_t M       = X->size1;
	const size_t N       = X->size2;
	size_t i, j;
	double u, v, n, x, t, t2;
	
	if (w->tw)
	{
		gdl_vector_free (w->tw);
	}
	
	w->tw = gdl_vector_calloc (l->size);
	
	t=t2=0;
	for(j = 0; j < l->size-1; j++)
	{
		x=gdl_vector_get(l,j);
		t+=x;
		t2+=x*x;
	}
	for (i = 0; i < l->size-2; i++)
	{
		if (X->size1 < X->size2)
		{
		   n=(M+1-i)*t*t/((M-1-i)*t2-t*t);
		   u=sqrt(n-1)+sqrt(M-i-1);
			v=u*pow(1/sqrt(n-1)+1/sqrt(M-i-1),1./3.);
			u*=u;
			x=n*(M-i-1)*gdl_vector_get(l,i)/t;
		}
		else
		{
			n=(N+1-i)*t*t/((N-1-i)*t2-t*t);
			u=sqrt(n-1)+sqrt(N-i-1);
			v=u*pow(1/sqrt(n-1)+1/sqrt(N-i-1),1./3.);
			u*=u;
			x=n*(N-i-1)*gdl_vector_get(l,i)/t;
		}
		gdl_vector_set(w->tw, i, (x-u)/v);
		x=gdl_vector_get(l,i);
		t-=x;
		t2-=x*x;
	}
}

int
gdl_pca_workspace_perform (gdl_pca_workspace * w, gdl_matrix * X)
{
	if (X->size2 > X->size1)
	{
		GDL_ERROR_VAL ("The matrix must have less columns than rows\n", GDL_EINVAL, GDL_EINVAL);	
	}
	
	_gdl_pca_workspace_init (w, X);
	
	w->_naxe = GDL_MIN (X->size1, X->size2);
	
	(w->type->perform)(w->state, X, w->_naxe);
	
	_gdl_pca_copy_projection (w, X, gdl_false);
	
	//_gdl_pca_compute_tracy_widom (w, X, gdl_false);
	
	return GDL_SUCCESS;
}

int
gdl_pca_workspace_wperform (gdl_pca_workspace * pca, gdl_matrix * X, const gdl_vector * w)
{
	if (X->size2 > X->size1)
	{
		GDL_ERROR_VAL ("The matrix must have less columns than rows\n", GDL_EINVAL, GDL_EINVAL);	
	}
	if (w->size < X->size1)
	{
		GDL_ERROR_VAL ("The vector of weights must have a size which matches the number of rows\n", GDL_EINVAL, GDL_EINVAL);	
	}

	_gdl_pca_workspace_init (pca, X);
	
	pca->_naxe = GDL_MIN (X->size1, X->size2);

	(pca->type->wperform)(pca->state, X, w, pca->_naxe);
	
	_gdl_pca_copy_projection (pca, X, gdl_false);
	
	//_gdl_pca_compute_tracy_widom (pca, X, gdl_false);
	
	return GDL_SUCCESS;
}

int
gdl_pca_workspace_const_perform (gdl_pca_workspace * w, const gdl_matrix * X)
{
	int status;
	gdl_matrix * XX;
	
	XX = gdl_matrix_alloc (X->size1, X->size2);
	
	gdl_matrix_memcpy (XX, X);
	
	_gdl_pca_workspace_init (w, XX);
	
	status = gdl_pca_workspace_perform (w, XX);
	
	_gdl_pca_copy_projection (w, XX, gdl_false);
	
	//_gdl_pca_compute_tracy_widom (w, XX, gdl_false);
	
	gdl_matrix_free (XX);
	
	return GDL_SUCCESS;
}

int
gdl_pca_workspace_perform_transpose (gdl_pca_workspace * pca, gdl_matrix * X)
{
	if (X->size2 > X->size1)
	{
		GDL_ERROR_VAL ("The matrix must have less columns than rows\n", GDL_EINVAL, GDL_EINVAL);	
	}
	
	_gdl_pca_workspace_init (pca, X);
	
	pca->_naxe = GDL_MIN (X->size1, X->size2);
	
	(pca->type->perform_transpose)(pca->state, X, pca->_naxe);
	
	_gdl_pca_copy_projection (pca, X, gdl_true);
	
	//_gdl_pca_compute_tracy_widom (pca, X, gdl_true);
	
	return GDL_SUCCESS;
}

int
gdl_pca_workspace_wperform_transpose (gdl_pca_workspace * pca, gdl_matrix * X, const gdl_vector * weights)
{
	if (X->size2 > X->size1)
	{
		GDL_ERROR_VAL ("The matrix must have less columns than rows\n", GDL_EINVAL, GDL_EINVAL);	
	}
	if (weights->size < X->size2)
	{
		GDL_ERROR_VAL ("The vector of weights must have a size which matches the number of columns\n", GDL_EINVAL, GDL_EINVAL);	
	}
	
	_gdl_pca_workspace_init (pca, X);
	
	pca->_naxe = GDL_MIN (X->size1, X->size2);
	
	(pca->type->wperform_transpose)(pca->state, X, weights, pca->_naxe);
	
	_gdl_pca_copy_projection (pca, X, gdl_true);
	
	//_gdl_pca_compute_tracy_widom (pca, X, gdl_true);
		
	return GDL_SUCCESS;
}

int
gdl_pca_workspace_const_perform_transpose (gdl_pca_workspace * pca, const gdl_matrix * X)
{
	
}

int
gdl_pca_workspace_const_wperform_transpose (gdl_pca_workspace * pca, const gdl_matrix * X, const gdl_vector * weights)
{
	
}

const gdl_matrix *
gdl_pca_workspace_projection (const gdl_pca_workspace * w)
{
	return w->U;
}

const gdl_matrix *
gdl_pca_workspace_rotation (const gdl_pca_workspace * w)
{
	return (w->type->rotation)(w->state);
}

const gdl_vector *
gdl_pca_workspace_weights (const gdl_pca_workspace * w)
{
	return (w->type->weights)(w->state);
}

const gdl_vector *
gdl_pca_workspace_tracy_widom (const gdl_pca_workspace * w)
{
	return w->tw;
}

double
gdl_pca_workspace_tot_var (const gdl_pca_workspace * w)
{
	const gdl_vector * we = (w->type->weights)(w->state);
	return gdl_blas_dasum (we);
}

size_t
gdl_pca_workspace_scree_threshold (const gdl_pca_workspace * w, const double threshold)
{
	size_t i;
	double tot, s;
	const gdl_vector * we = (w->type->weights)(w->state);
	
	tot = gdl_blas_dasum (we);
	s   = 0;
	
	for (i = 0; i < we->size; i++)
	{
		s += gdl_vector_get (we, i)/tot;
		if (s >= threshold)
		{
			break;	
		}
	}
	
	return (i < we->size) ? i+1 : we->size;
}

