/*
 *  genex/probe.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:52 $, $Version$
 *  
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_sort.h>
#include <gdl/gdl_statistics.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_rng.h>
#include <gdl/gdl_randist.h>
#include <gdl/gdl_specfunc.h>
#include <gdl/gdl_sort_double.h>
#include <gdl/gdl_statistics_double.h>
#include <gdl/gdl_genex_probe.h>
#include <gdl/gdl_genex_chromosome.h>

gdl_genex_probe *
gdl_genex_probe_alloc (gdl_string * name, const unsigned char strand, long * start, long * end, const size_t nunit)
{
	gdl_genex_probe * p;
	
	p = GDL_CALLOC (gdl_genex_probe, 1);
	
	p->name   = name;
	p->size   = nunit;
	p->start  = start;
	p->end    = end;
	p->strand = strand;
	p->ignore = 'n';
	
	return p;
}

gdl_genex_probe *
gdl_genex_meta_probe_alloc (gdl_genex_probe ** probes, const size_t nprobe, const size_t npop)
{
	size_t p,len=0,size=0,n;
	long * starts, * ends;
	gdl_string * name;
	gdl_genex_probe * meta;
	
	for(p = 0; p < nprobe; p++)
	{
		if (probes[p]->ignore == 'y') continue;
		len  += strlen(probes[p]->name)+1;
		size += probes[p]->size;
	}
	if (len)
	{
		name   = gdl_string_alloc (len);
		starts = GDL_MALLOC (long, size);
		ends   = GDL_MALLOC (long, size);
		for(len = size = p = 0; p < nprobe; p++)
		{
			if (probes[p]->ignore == 'y') continue;
			n = strlen (probes[p]->name);
			memcpy(name+len, probes[p]->name, sizeof(char)*n);
			if (p < nprobe-1) name[len+n]='+';
			len += n+1;
			memcpy(starts+size, probes[p]->start, sizeof(long)*probes[p]->size);
			memcpy(ends+size, probes[p]->end, sizeof(long)*probes[p]->size);
			size += probes[p]->size;
		}
		meta = gdl_genex_probe_alloc (name,  probes[0]->strand, starts, ends, size);
	}
	else
	{
		for(p = 0; p < nprobe; p++)
		{
			len  += strlen(probes[p]->name)+1;
			size += probes[p]->size;
		}
		name   = gdl_string_alloc (len);
		starts = GDL_MALLOC (long, size);
		ends   = GDL_MALLOC (long, size);
		for(len = size = p = 0; p < nprobe; p++)
		{
			n = strlen (probes[p]->name);
			memcpy(name+len, probes[p]->name, sizeof(char)*n);
			if (p < nprobe-1) name[len+n]='+';
			len += n+1;
			memcpy(starts+size, probes[p]->start, sizeof(long)*probes[p]->size);
			memcpy(ends+size, probes[p]->end, sizeof(long)*probes[p]->size);
			size += probes[p]->size;
		}
		meta = gdl_genex_probe_alloc (name,  probes[0]->strand, starts, ends, size);
		meta->ignore = 'y';
	}
	if (npop)
	{
		meta->data = GDL_CALLOC (double *, npop);
		meta->mean = GDL_CALLOC (double, npop);
		meta->var  = GDL_CALLOC (double, npop);
	}	
	return meta;
}

void
gdl_genex_probe_free (gdl_genex_probe * p, const size_t npop)
{
	if (p)
	{
		if (p->data)
		{
			size_t i;
			for (i = 0; i < npop; i++)
			{
				GDL_FREE (p->data[i]);
			}
			GDL_FREE (p->data);
		}
		GDL_FREE (p->start);
		GDL_FREE (p->end);
		gdl_string_free (p->name);
		GDL_FREE (p);
	}
}

double
gdl_genex_probe_var_score (const gdl_genex_probe * p, const size_t * pop_sizes, const size_t npop)
{
	size_t i, j, size;
	double e,u,v,w;
	
	for (u = 0, size = i = 0; i < npop; i++)
	{
		u += p->mean[i]*pop_sizes[i];
		size += pop_sizes[i];
	}
	u /= size;
	for (v = w = 0, i = 0; i < npop; i++)
	{
		v += p->var[i]*pop_sizes[i];
		for (j = 0; j < pop_sizes[i]; j++)
		{
			e = p->data[i][j]-u;
			w += e*e;	
		}
	}
	v /= size;
	w /= size-1;
	return 0.5*(v+w);
}

size_t
gdl_genex_probe_extreme_outlier (const gdl_genex_probe * p, const size_t * pop_sizes, const size_t npop)
{
	size_t i, j, k, size=0, out=0;
	double * data, q1, q2, iq;
	
	for (i = 0; i < npop; i++)
	{
		size+=pop_sizes[i];
	}
	
	data = GDL_MALLOC (double, size);
	
	for (k = i = 0; i < npop; i++)
	{
		for (j = 0; j < pop_sizes[i]; j++, k++)
		{
			data[k] = p->data[i][j];
		}
	}
	
	gdl_sort (data, 1, size);
	
	q1 = gdl_stats_quantile_from_sorted_data (data, 1, size, 0.25);
	q2 = gdl_stats_quantile_from_sorted_data (data, 1, size, 0.75);
	iq = q2-q1;
	
	for (i = 0; i < size; i++)
	{
		if ((data[i] > q2 + 3*iq) || (data[i] < q1 - 3*iq))
			out++;
	}
	
	GDL_FREE (data);
	
	return out;
}

void
gdl_genex_probe_gaussian_quantile_normalize (gdl_genex_probe * probe, const size_t * pop_sizes, const size_t npop)
{
	size_t i,j;
	double pr,* x;
	size_t * rk;
	
	//quantile-normalize the probe signal within each pop
	for(i = 0; i < npop; i++)
	{
		const size_t n = pop_sizes[i];
		x  = GDL_CALLOC (double, n);
		rk = GDL_CALLOC (size_t, n);
		memcpy (x, probe->data[i], sizeof(double)*n);
		gdl_sort_index (rk, x, 1, n);
		for (j = 0; j < n; j++)
		{
			pr = ((double)(j+0.5))/((double)(n));
			x[rk[j]] = gdl_ran_ugaussian_quantile (pr);
		}
		for(j = 0; j < n; j++)
		{
			probe->data[i][j]=x[j];
		}
		GDL_FREE (x);
		GDL_FREE (rk);
	}
}

gdl_genex_probe *
gdl_genex_probe_fread (FILE * stream, const size_t npop, const size_t * pop_sizes)
{
	if (stream)
	{
		int status;
		size_t size;
		long * end, * start;
		unsigned char strand;
		gdl_string * name;
		gdl_genex_probe * p;
		
		status = fread(&size, sizeof(size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		start = GDL_MALLOC (long, size);
		status = fread(start, sizeof(long), size, stream);
		GDL_FREAD_STATUS (status, size);
		end = GDL_MALLOC (long, size);
		status = fread(end, sizeof(long), size, stream);
		GDL_FREAD_STATUS (status, size);
		status = fread(&strand, sizeof(unsigned char), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		name = gdl_string_fread (stream);
		GDL_FREAD_STATUS (name!=0, 1);
		
		p = gdl_genex_probe_alloc (name, strand, start, end, size);
	
		if (npop)
		{
			size_t i;
			p->data = GDL_CALLOC (double *, npop);
			p->mean = GDL_CALLOC (double, npop);
			p->var  = GDL_CALLOC (double, npop);
			status = fread(p->mean, sizeof(double), npop, stream);
			GDL_FREAD_STATUS (status, npop);
			status = fread(p->var, sizeof(double), npop, stream);
			GDL_FREAD_STATUS (status, npop);
			for (i = 0; i < npop; i++)
			{
				//printf ("PROBE %s %g %g", name, p->mean[i], p->var[i]);
				if (pop_sizes[i])
				{
					p->data[i] = GDL_MALLOC (double, pop_sizes[i]);
					status = fread(p->data[i], sizeof(double), pop_sizes[i], stream);
					GDL_FREAD_STATUS (status, pop_sizes[i]);
				}
			}
		}
		
		status = fread(&p->ignore, sizeof(unsigned char), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		
		return p;
	}
	return 0;	
}

int
gdl_genex_probe_fwrite (FILE * stream, const gdl_genex_probe * p, const size_t npop, const size_t * pop_sizes)
{
	if (stream && p)
	{
		int status;
		
		status = fwrite(&p->size, sizeof(size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		status = fwrite(p->start, sizeof(long), p->size, stream);
		GDL_FWRITE_STATUS (status, p->size);
		status = fwrite(p->end, sizeof(long), p->size, stream);
		GDL_FWRITE_STATUS (status, p->size);
		status = fwrite(&p->strand, sizeof(unsigned char), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		status = gdl_string_fwrite (stream, p->name);
		GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		if (npop)
		{
			size_t i;
			status = fwrite(p->mean, sizeof(double), npop, stream);
			GDL_FWRITE_STATUS (status, npop);
			status = fwrite(p->var, sizeof(double), npop, stream);
			GDL_FWRITE_STATUS (status, npop);
			for (i = 0; i < npop; i++)
			{
				if (pop_sizes[i])
				{
					status = fwrite(p->data[i], sizeof(double), pop_sizes[i], stream);
					GDL_FWRITE_STATUS (status, pop_sizes[i]);
				}
			}
			
		}
		status = fwrite(&p->ignore, sizeof(unsigned char), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}
