/*  
 *  pca/cov.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:42 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <math.h> 

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_statistics.h>
#include <gdl/gdl_pca.h>

#include <gdl/gdl_linalg_svd.h>

typedef struct
{
	size_t size1;
	size_t size2;
	size_t naxe;
	gdl_linalg_svd_workspace * svd;
	gdl_matrix * E;
	gdl_vector * S;
	double stot;
	double env;
} gdl_pca_cov_t;

static int
gdl_pca_cov_alloc (void * vpca, size_t size1, size_t size2)
{
	if (vpca)
	{
		gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
		
		pca->size1 = size1;
		pca->size2 = size2;
		pca->svd   = gdl_linalg_svd_workspace_alloc (gdl_linalg_svd_golub_reinsch, size1, size2);
		
		return GDL_SUCCESS;
	}
}

static int
gdl_pca_cov_free (void * vpca)
{
	if (vpca)
	{
		gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
		
		return GDL_SUCCESS;
	}
} 

static int
gdl_pca_cov_adjust (gdl_pca_cov_t * pca, gdl_matrix * X, const gdl_vector * w)
{
	size_t i, j;
	double x, m;
	
	for (j = 0; j < X->size2; j++)
	{
		gdl_vector_view column = gdl_matrix_column (X, j);
		
		if (w)
		{
			m = gdl_stats_wmean (w->data, w->stride, (column.vector).data, (column.vector).stride, (column.vector).size);
		}
		else
		{
			m = gdl_stats_mean ((column.vector).data, (column.vector).stride, (column.vector).size);
		}
		
		for (i = 0; i < X->size1; i++)
		{
			x = gdl_matrix_get (X, i, j);
			
			if (gdl_isnan (x))
			{
				gdl_matrix_set (X, i, j, 0);
			}
			else if (w)
			{
				gdl_matrix_set (X, i, j, (x-m)*sqrt(gdl_vector_get (w, i)*X->size1/(X->size1-1)));
			}
			else
			{
				gdl_matrix_set (X, i, j, (x-m)/sqrt(X->size1-1));
			}
		}
	}
}

static int
gdl_pca_cov_adjust_transpose (gdl_pca_cov_t * pca, gdl_matrix * X, const gdl_vector * w)
{
	size_t i, j;
	double tot, x, m;
	
	for (j = 0; j < X->size1; j++)
	{
		gdl_vector_view row = gdl_matrix_row (X, j);
		
		if (w)
		{
			m = gdl_stats_wmean (w->data, 1, (row.vector).data, (row.vector).stride, (row.vector).size);
		}
		else
		{
			m = gdl_stats_mean ((row.vector).data, (row.vector).stride, (row.vector).size);
		}
		
		for (tot = i = 0; i < X->size2; i++)
		{
			x = gdl_matrix_get (X, j, i);
			
			if (gdl_isnan (x))
			{
				gdl_matrix_set (X, j, i, 0);
			}
			else if (w)
			{
				gdl_matrix_set (X, j, i, (x-m)*sqrt(gdl_vector_get (w, i)*X->size2/(X->size2-1)));
			}
			else
			{
				gdl_matrix_set (X, j, i, (x-m)/sqrt(X->size2-1));
			}
		}
	}
}

static int
gdl_pca_cov_svd (gdl_pca_cov_t * pca, gdl_matrix * X, size_t naxe)
{
	gdl_linalg_svd_workspace_perform (pca->svd, X, naxe);
}

static void
gdl_pca_cov_scale_projection (gdl_pca_cov_t * pca, gdl_matrix * X, size_t n, size_t naxe)
{
	size_t i, j;
	double * x;
	
	for (i = 0; i < n; i++)
	{
		for (j = 0; j < naxe; j++)
		{
			 x    = gdl_matrix_ptr (X, i, j);
			(*x) *= gdl_vector_get (pca->S, j);
		}	
	}
}

static void
gdl_pca_cov_scale_eigenvalues (gdl_pca_cov_t * pca, size_t naxe)
{
	size_t i;
	double * x;
	
	for (i = 0; i < naxe; i++)
	{
		x = gdl_vector_ptr (pca->S, i);
		(*x) *= (*x);
	}
}

static void
gdl_pca_cov_set_properties (gdl_pca_cov_t * pca, size_t naxe)
{
	size_t i;
	double x, s, s2;
	
	s = s2 = 0;
	
	for (i = 0; i < naxe; i++)
	{
		x  = gdl_vector_get (pca->S, i);
		s  += x;
		s2 += x*x;
	}
	
	pca->stot = s;
	pca->env  = 1.0 + naxe*(1.0-s2/(s*s));
}

static int
gdl_pca_cov_end (gdl_pca_cov_t * pca, gdl_matrix * X, size_t naxe)
{
	pca->E = gdl_linalg_svd_workspace_get_right (pca->svd);
	
	pca->S = gdl_linalg_svd_workspace_get_singular (pca->svd);
	
	gdl_pca_cov_scale_projection (pca, X, X->size1, naxe);
	
	gdl_pca_cov_scale_eigenvalues (pca, naxe);
	
	gdl_pca_cov_set_properties (pca, naxe);
	
	return GDL_SUCCESS;
}

static int
gdl_pca_cov_end_transpose (gdl_pca_cov_t * pca, gdl_matrix * X, size_t naxe)
{
	size_t i, j;
	double x, s;
	gdl_matrix * V;
	
	V      = gdl_linalg_svd_workspace_get_right (pca->svd);
	pca->S = gdl_linalg_svd_workspace_get_singular (pca->svd);
	pca->E = gdl_matrix_alloc (X->size1, naxe);
	
	for (i = 0; i < X->size1; i++)
	{
		for (s = 0, j = 0; j < naxe; j++)
		{
			x = gdl_matrix_get (X, i, j);
			s += x*x;
		}
		s = sqrt(s);
		for (j = 0; j < naxe; j++)
		{
			x = gdl_matrix_get (X, i, j);
			gdl_matrix_set (pca->E, i, j, x/s);
		}
	}
	
	for (i = 0; i < X->size2; i++)
	{
		for (j = 0; j < naxe; j++)
		{
			x = gdl_matrix_get (V, i, j);
			gdl_matrix_set (X, i, j, x);
		}	
	}
	
	gdl_pca_cov_scale_projection (pca, X, X->size2, naxe);
	gdl_pca_cov_scale_eigenvalues (pca, naxe);
	gdl_pca_cov_set_properties (pca, naxe);
	
	return GDL_SUCCESS;
}

static int
gdl_pca_cov_perform (void * vpca, gdl_matrix * X, size_t naxe)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	
	gdl_pca_cov_adjust (pca, X, 0);
	
	gdl_pca_cov_svd (pca, X, naxe);
	
	gdl_pca_cov_end (pca, X, naxe);	
}

static int
gdl_pca_cov_wperform (void * vpca, gdl_matrix * X, const gdl_vector * w, size_t naxe)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	
	gdl_pca_cov_adjust (pca, X, w);
	
	gdl_pca_cov_svd (pca, X, naxe);
	
	gdl_pca_cov_end (pca, X, naxe);
}

gdl_pca_cov_perform_transpose (void * vpca, gdl_matrix * X, size_t naxe)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	
	gdl_pca_cov_adjust_transpose (pca, X, 0);
	gdl_pca_cov_svd (pca, X, naxe);
	gdl_pca_cov_end_transpose (pca, X, naxe);
}

static int
gdl_pca_cov_wperform_transpose (void * vpca, gdl_matrix * X, const gdl_vector * w, size_t naxe)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	
	gdl_pca_cov_adjust_transpose (pca, X, w);
	gdl_pca_cov_svd (pca, X, naxe);
	gdl_pca_cov_end_transpose (pca, X, naxe);	
}

static gdl_matrix *
gdl_pca_cov_rotation (void * vpca)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	return pca->E;
}

static gdl_vector *
gdl_pca_cov_weights (void * vpca)
{
	gdl_pca_cov_t * pca = (gdl_pca_cov_t *) vpca;
	return pca->S;
}

static const gdl_pca_workspace_type _gdl_pca_cov =
{
	"gdl_pca_covariance",
	sizeof (gdl_pca_cov_t),
	&gdl_pca_cov_alloc,
	&gdl_pca_cov_free,
	&gdl_pca_cov_perform,
	&gdl_pca_cov_wperform,
	&gdl_pca_cov_perform_transpose,
	&gdl_pca_cov_wperform_transpose,
	&gdl_pca_cov_rotation,
	&gdl_pca_cov_weights
};

const gdl_pca_workspace_type * gdl_pca_covariance = &_gdl_pca_cov;
