/*
 *  eqtl/regadd.c 
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:47 $, $Version$
 *  
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
#include <math.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_randist.h>
#include <gdl/gdl_specfunc.h>
#include <gdl/gdl_sort_double.h>
#include <gdl/gdl_statistics_double.h>
#include <gdl/gdl_bayesian_regression.h>
#include <gdl/gdl_snp_annotation.h>
#include <gdl/gdl_eqtl_chromosome.h>
#include <gdl/gdl_eqtl_bayes.h>

#define VECTOR_SET(X,i,y)(*(X->data+i*X->stride)=y)
#define VECTOR_GET(X,i)(*(X->data+i*X->stride))
#define MATRIX_SET(X,i,j,y)(*(X->data+(i*X->tda+j))=y)
#define MATRIX_GET(X,i,j)(*(X->data+(i*X->tda+j)))

static double *
geneExp2Vector (const gdl_eqtl_chromosome * chrom,
                const gdl_eqtl_probe * probe,
                const gdl_boolean qnorm,
                size_t eligible[])
{
	size_t i, j, k, p, * rk = 0;
	double pr, * y, * x = 0;

	y = GDL_MALLOC (double, chrom->nindiv);
	
	for (k = i = 0; i < chrom->npop; i++)
	{
		const size_t n = chrom->pop_sizes[i];
		if (qnorm)
		{
			x  = GDL_CALLOC (double, n);
			rk = GDL_CALLOC (size_t, n);
			memcpy (x, probe->data[i], sizeof(double)*n);
			gdl_sort_index (rk, x, 1, n);
			for (j = 0; j < n; j++)
			{
				pr = ((double)(j+0.5))/((double)n);
				x[rk[j]] = gdl_ran_ugaussian_quantile (pr);
			}
			for(j = 0; j < n; j++, k++)
				y[k]=x[j];
			GDL_FREE (x);
			GDL_FREE (rk);
		}
		else
		{
			for(j = 0; j < n; j++, k++)
				y[k]=probe->data[i][j];
		}
	}
	
	return y;	
}

static double
AddReg (const gdl_eqtl_chromosome * chrom,
        const double Y[],
        const double X[],
        double * a,
        double * r2,
        double * p)
{
	size_t i, j, kx, ky;
	double l0, l1, chi, e, f, g, sig0=0, sig1=0, x=0, y=0, xx=0, xy=0, yy=0;
	
	for (kx = ky = i = 0; i < chrom->npop; i++)
	{
		for(j = 0; j < chrom->pop_sizes[i]; j++, kx++, ky++)
		{
			e   = X[kx];
			x  += e;
			xx += e*e;
			f   = Y[ky];
			y  += f;
			xy += e*f;
		}
	}
	(*a)=(kx*xy-x*y)/(kx*xx-x*x);
	y/=kx;
	x/=kx;
	for (kx = ky = i = 0; i < chrom->npop; i++)
	{
		for(j = 0; j < chrom->pop_sizes[i]; j++, kx++, ky++)
		{
			 e = X[kx];
			 f = Y[ky];
			 sig0 += (f-y)*(f-y);
			 g = (f-y)-(*a)*(e-x);
			 sig1 += g*g;
		}
	}
	
	chi = (sig0-sig1)*(kx - 2)/sig1;
	chi = (chi >= 0) ? chi : 0;
	*p  = gdl_sf_beta_inc (0.5*(kx - 2), 0.5,  (kx - 2)/((kx - 2) + chi));
	*r2 = 1-sig1/sig0;
	
	//printf ("%d %g %g %g\n", kx, sig0, sig1, -log(*p)/log(10));
	
	return chi;
}

int
gdl_eqtl_chromosome_regadd (gdl_eqtl_chromosome * chrom,
                            const gdl_boolean qnorm,
                            const gdl_string * x_name,
                            const double x[],
                            const gdl_feature_table * gene_set,
                            FILE * stream)
{
	const size_t G   = chrom->ngene;
	const size_t P   = chrom->ploidy;
	const size_t N   = chrom->nindiv;
	const size_t POP = chrom->npop;
	size_t i, j, k, l, p, g, m, n, maxg, npop, msize;
	gdl_genex_gene * gene;
	gdl_genex_block * block;
	gdl_genex_probe * probe;
	gdl_snp ** snps;
	double * y, r2, pval, a, stat;
	
	snps = chrom->snp_data->chrom->snps;
	
	if (chrom->logger)
	{
		fprintf (chrom->logger, ">%s |ngene=%d|npop=%d|nindiv=%d|\n", chrom->name, G, chrom->snp_data->chrom->size, POP, N);
		fflush(chrom->logger);
	}
	
	for (i = 0; i < G; i++)
	{
		gene = chrom->genes[i];
		if (gene_set && !gdl_feature_table_is_inside (gene_set, chrom->name, gene->name))
			continue;
		for (j = 0; j < gene->size; j++)
		{
			block = gene->blocks[j];
			for (p = 0; p < block->size; p++)
			{
				probe = block->probes[p];
				if (probe->ignore == 'y')
					continue;
				y    = geneExp2Vector (chrom, probe, qnorm, 0);
				stat = AddReg (chrom, y, x, &a, &r2, &pval);
				fprintf (stream, "%s %s %s %s %g %g %g %g\n", chrom->name, gene->name, probe->name, x_name, a, r2, stat, pval);
				fflush (stream);
//				if (pval <= 0.000199141) 
//				{
//					for(k = 0; k < chrom->nindiv; k++)
//					{
//						printf ("%s_%s %g %g\n", chrom->name, gene->name, y[k], x[k]);	
//					}
//				}
				GDL_FREE (y);
			} // probe			
		} // block		
		if (chrom->logger)
		{
			gdl_progress_bar (chrom->logger, 25, i, G);
		}
	} // gene
	
	return GDL_SUCCESS;
}
