/*
 *  genex/gene.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:52 $, $Version$
 *  
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
#include <stdio.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_sort_double.h>
#include <gdl/gdl_genex_chromosome.h>
#include <gdl/gdl_genex_genome.h>

static gdl_string *
gdl_genex_genome_chromfile (const gdl_genex_genome * g, size_t i)
{
	return gdl_string_sprintf ("%s/%s_exp.db", g->dbdir, g->chroms[i]);
}

static gdl_hashtable *
gdl_genex_genome_read_config (FILE * stream)
{
	size_t i, j, n;
	gdl_string * line = 0, * tok;
	gdl_hashtable * chroms;
	gdl_list * pops;
	
	chroms = gdl_hashtable_alloc (gdl_hash_default, 0);
	
	while (gdl_getline (&line, &n, stream) != -1)
	{
		i=j=0;
		// first the chromosome name
		tok  = gdl_string_next_token (line, n, &i, &j);
		pops = gdl_list_alloc (gdl_string_interface);
		gdl_hashtable_add (chroms, tok, pops, 0);
		gdl_string_free (tok);
		// then the population names
		while ((tok = gdl_string_next_token (line, n, &i, &j))!=0)
		{
			gdl_list_push_back (pops, tok, 1);
		}
		GDL_FREE (line);
		line=0;
	}
	
	return chroms;
}

static int
gdl_genex_genome_process_config (gdl_genex_genome * g, 
                                 const gdl_string * config_file,
                                 const gdl_string * probe_dir,
                                 const gdl_string * refseq_dir)
{
	size_t i, j, n;
	gdl_string * line = 0, * tok;
	gdl_hashtable * chroms;
	gdl_hashtable_itr * chroms_itr;
	gdl_list * pops;
	gdl_list_itr * pops_itr;
	FILE * stream;

	stream = gdl_fileopen (config_file, "r");
	chroms = gdl_genex_genome_read_config (stream);
	gdl_fileclose (config_file, stream);
	
	g->nchrom  = gdl_hashtable_size (chroms);
	g->chroms  = GDL_MALLOC (gdl_string *, g->nchrom);
	
	chroms_itr = gdl_hashtable_iterator (chroms);
	j = 0;
	do
	{
		size_t npop;
		gdl_string * pop_dir, * file;
		const gdl_string * name;
		gdl_genex_chromosome * chrom=0;
		
		name = gdl_hashtable_iterator_key (chroms_itr);
		pops = (gdl_list *) gdl_hashtable_iterator_value (chroms_itr);
		
		// copy the population names to an array
		npop = gdl_list_size (pops);
		
		// 1 - Go into probe_dir and load the chromosome
		if (probe_dir)
		{
			file   = gdl_string_sprintf ("%s/%s.prb", probe_dir, name);
			stream = gdl_fileopen (file, "r");
			chrom  = gdl_genex_chromosome_alloc (gdl_string_clone (name), npop);
			gdl_genex_chromosome_fscanf (stream, chrom);
			gdl_fileclose (file, stream);
			gdl_string_free (file);
			// 2 - Go into the pop dirs and load the expression data
			for (i = 0; i < npop; i++) 
			{
				pop_dir = (gdl_string *) gdl_list_get (pops, i);
				file   = gdl_string_sprintf ("%s/%s.exp", pop_dir, name);
				stream = gdl_fileopen (file, "r");
				gdl_genex_chromosome_fscanf_expr (stream, i, chrom);
				gdl_fileclose (file, stream);
				gdl_string_free (file);
			}
		}
		// 3 - Read refseq info if exists
		if (refseq_dir)
		{
			gdl_boolean force = gdl_false;
			if (!chrom)
			{
				chrom = gdl_genex_chromosome_alloc (gdl_string_clone (name), npop);
				force = gdl_true;
			}
			file = gdl_string_sprintf ("%s/%s.refseq", refseq_dir, name);
			stream = gdl_fileopen (file, "r");
			gdl_genex_chromosome_fscanf_refseq (stream, chrom, force);
			gdl_fileclose (file, stream);
			gdl_string_free (file);
		}
		// 4 - save it
		file = gdl_string_sprintf ("%s/%s_exp.db", g->dbdir, name);
		stream = gdl_fileopen (file, "w");
		gdl_genex_chromosome_fwrite (stream, chrom);
		gdl_fileclose (file, stream);
		gdl_string_free (file);
		
		g->chroms[j++] = gdl_string_clone (name);
		
		gdl_genex_chromosome_free (chrom);		
		gdl_list_free (pops);
	}
	while (gdl_hashtable_iterator_next (chroms_itr));
	
	gdl_hashtable_iterator_free (chroms_itr);
	
	gdl_hashtable_free (chroms);
}

gdl_genex_genome *
gdl_genex_genome_alloc (const gdl_string * config_file, const gdl_string * probe_dir, const gdl_string * refseq_dir, const gdl_string * dbdir)
{
	gdl_genex_genome * g;
	
	g = GDL_MALLOC (gdl_genex_genome, 1);
	
	g->dbdir = gdl_string_clone (dbdir);
	
	gdl_genex_genome_process_config (g, config_file, probe_dir, refseq_dir);
	
	return g;
}

size_t
gdl_genex_genome_size (const gdl_genex_genome * g)
{
	return g->nchrom;
}

gdl_genex_chromosome *
gdl_genex_genome_get (const gdl_genex_genome * g, size_t i)
{
	gdl_string * file;
	FILE * stream;
	gdl_genex_chromosome * c;
	
	file = gdl_genex_genome_chromfile (g, i);
	
	stream = gdl_fileopen (file, "r");
	
	c = gdl_genex_chromosome_fread (stream);
	
	gdl_fileclose (file, stream);
	
	gdl_string_free (file);
	
	return c;
}

int
gdl_genex_genome_set (const gdl_genex_genome * g, size_t i, gdl_genex_chromosome * c)
{
	int status;
	gdl_string * file;
	FILE * stream;
	
	file = gdl_genex_genome_chromfile (g, i);
	
	stream = gdl_fileopen (file, "w");
	
	status = gdl_genex_chromosome_fwrite (stream, c);
	
	gdl_fileclose (file, stream);
	
	gdl_string_free (file);
	
	return status;
}

void
gdl_genex_genome_select_most_variable_probe (gdl_genex_genome * v, size_t size)
{
	size_t i, j, k, n, m;
	size_t * idx;
	double * vpr;
	gdl_genex_chromosome * c;
	gdl_genex_probe ** p;
	gdl_genex_gene  ** g;
	
	for (n = i = 0; i < v->nchrom; i++)
	{
		
		c = gdl_genex_genome_get (v, i);
		n += gdl_genex_chromosome_probe_size (c);
		gdl_genex_chromosome_free (c);
	}
	vpr = GDL_MALLOC (double, n);
	for (j = i = 0; i < v->nchrom; i++)
	{
		c = gdl_genex_genome_get (v, i);
		m = gdl_genex_chromosome_probe_size (c);
		p = gdl_genex_chromosome_probes (c);
		for (k = 0; k < m; k++, j++)
		{
			vpr[j] = gdl_genex_probe_var_score (p[k], c->pop_sizes, c->npop);
		}
		GDL_FREE (p);
		gdl_genex_chromosome_free (c);
	}
	idx = GDL_MALLOC (size_t, 2*n);
	gdl_sort_index (idx, vpr, 1, n);
	for (i = 0; i < n; i++) idx[n+idx[i]]=i;
	for (j = i = 0; i < v->nchrom; i++)
	{
		c = gdl_genex_genome_get (v, i);
		m = gdl_genex_chromosome_probe_size (c);
		p = gdl_genex_chromosome_probes (c);
		for (k = 0; k < m; k++, j++)
		{
			if (idx[n+j]<size)
			{
				p[k]->ignore='y';
			}
		}
		GDL_FREE (p);
		gdl_genex_chromosome_free (c);
	}
	GDL_FREE (idx);
	GDL_FREE (vpr);
}

void
gdl_genex_genome_free (gdl_genex_genome * v)
{
	if (v)
	{
		size_t i;
		for (i = 0; i < v->nchrom; i++)
		{
			gdl_string_free (v->chroms[i]);
		}
		GDL_FREE (v->chroms);
		gdl_string_free (v->dbdir);
		GDL_FREE (v);
	}
}

void
gdl_genex_genome_rm (gdl_genex_genome * v)
{
	size_t i;
	
	for (i = 0; i < v->nchrom; i++)
	{
		gdl_string * file = gdl_genex_genome_chromfile (v, i);
		remove (file);
		gdl_string_free (file);
	}	
}

gdl_genex_genome *
gdl_genex_genome_fread (FILE * stream)
{
	if (stream)
	{
		int status;
		size_t i;
		gdl_genex_genome * v;
		
		v = GDL_MALLOC (gdl_genex_genome, 1);
		
		v->dbdir = gdl_string_fread (stream);
		GDL_FREAD_STATUS (v->dbdir!=0, 1);
		status = fread (&v->nchrom, sizeof(size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		v->chroms = GDL_MALLOC (gdl_string *, v->nchrom);
		for (i = 0; i < v->nchrom; i++)
		{
			v->chroms[i] = gdl_string_fread (stream);
			GDL_FREAD_STATUS (v->chroms[i]!=0, 1);
		}
		
		return v;
	}
	return 0;
}

int
gdl_genex_genome_fwrite (FILE * stream, const gdl_genex_genome * v)
{
	if (stream && v)
	{
		int status;
		size_t i;
		
		status = gdl_string_fwrite (stream, v->dbdir);
		GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		status = fwrite (&v->nchrom, sizeof(size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		for (i = 0; i < v->nchrom; i++)
		{
			status = gdl_string_fwrite (stream, v->chroms[i]);
			GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		}
		
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}
