/*
 *  snp/genome.c 
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:03 $, $Version$
 *  
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_hash.h>
#include <gdl/gdl_list.h>
#include <gdl/gdl_snp_data.h>
#include <gdl/gdl_cnv_data.h>
#include <gdl/gdl_snp_map.h>
#include <gdl/gdl_snp_chromosome.h>
#include <gdl/gdl_snp_annotation.h>
#include <gdl/gdl_snp_genome.h>

static gdl_string *
gdl_snp_genome_chromfile (const gdl_snp_genome * g, size_t i)
{
	return gdl_string_sprintf ("%s/%s_snp.db", g->dbdir, g->chroms[i]);
}

static int
gdl_snp_genome_process_config (gdl_snp_genome * g, 
                               const gdl_string * config_file,
                               const gdl_snp_data_format * format)
{
	size_t i, j, n;
	gdl_string * line = 0, * tok;
	gdl_hashtable * chroms;
	gdl_hashtable_itr * chroms_itr;
	gdl_list * pops;
	gdl_list_itr * pops_itr;
	FILE * stream;

	stream = gdl_fileopen (config_file, "r");
	
	chroms = gdl_hashtable_alloc (gdl_hash_default, 0);
	
	while (gdl_getline (&line, &n, stream) != -1)
	{
		i=j=0;
		// first the chromosome name
		tok  = gdl_string_next_token (line, n, &i, &j);
		pops = gdl_list_alloc (gdl_string_interface);
		gdl_hashtable_add (chroms, tok, pops, 0);
		gdl_string_free (tok);
		// then the population names
		while ((tok = gdl_string_next_token (line, n, &i, &j))!=0)
		{
			gdl_list_push_back (pops, tok, 1);
		}
		GDL_FREE (line);
		line=0;
	}
	gdl_fileclose (config_file, stream);
	
	g->nchrom  = gdl_hashtable_size (chroms);
	g->chroms  = GDL_MALLOC (gdl_string *, g->nchrom);
	
	chroms_itr = gdl_hashtable_iterator (chroms);
	j = 0;
	do
	{
		size_t npop;
		gdl_string ** dirs, * file;
		const gdl_string * name;
		gdl_snp_chromosome * chrom;
		
		name = gdl_hashtable_iterator_key (chroms_itr);
		pops = (gdl_list *) gdl_hashtable_iterator_value (chroms_itr);
		
		// copy the population names to an array
		npop = gdl_list_size (pops);
		dirs = GDL_MALLOC (gdl_string *, npop);
		for (i = 0; i < npop; i++) dirs[i] = (gdl_string *) gdl_list_get (pops, i);
		
		// create the chromosome view
		chrom = gdl_snp_chromosome_alloc (name, dirs, npop, format);
		// saved it to the dbdir
		g->chroms[j] = gdl_string_clone (name);
		file   = gdl_snp_genome_chromfile (g, j++);
		stream = gdl_fileopen (file, "w");
		gdl_snp_chromosome_fwrite (stream, chrom);
		gdl_fileclose (file, stream);
		// clean it
		gdl_string_free (file);
		gdl_snp_chromosome_free (chrom);
		
		GDL_FREE (dirs);
		gdl_list_free (pops);
	}
	while (gdl_hashtable_iterator_next (chroms_itr));
	
	gdl_hashtable_iterator_free (chroms_itr);
	
	gdl_hashtable_free (chroms);
}

gdl_snp_genome *
gdl_snp_genome_alloc (const gdl_string * config_file, const gdl_snp_data_format * format, const gdl_string * dbdir)
{
	gdl_snp_genome * g;
	
	g = GDL_CALLOC (gdl_snp_genome, 1);
	
	g->dbdir = gdl_string_clone (dbdir);
	
	gdl_snp_genome_process_config (g, config_file, format);
	
	return g;
}

void
gdl_snp_genome_free (gdl_snp_genome * v)
{
	if (v)
	{
		size_t i;
		for (i = 0; i < v->nchrom; i++)
		{
			gdl_string_free (v->chroms[i]);
		}
		GDL_FREE (v->chroms);
		gdl_string_free (v->dbdir);
		gdl_snp_annot_dico_free (v->dico);
		GDL_FREE (v);
	}
}

size_t
gdl_snp_genome_size (const gdl_snp_genome * g)
{
	return g->nchrom;
}

static int
gdl_snp_genome_set (const gdl_snp_genome * g, size_t i, const gdl_snp_chromosome * c)
{
	gdl_string * file;
	FILE * stream;
	
	file = gdl_snp_genome_chromfile (g, i);
	
	stream = gdl_fileopen (file, "w");
	
	gdl_snp_chromosome_fwrite (stream, c);
	
	gdl_fileclose (file, stream);
	
	gdl_string_free (file);
}

gdl_snp_chromosome *
gdl_snp_genome_get (const gdl_snp_genome * g, size_t i)
{
	gdl_string * file;
	FILE * stream;
	gdl_snp_chromosome * c;
	
	file = gdl_snp_genome_chromfile (g, i);
	
	stream = gdl_fileopen (file, "r");
	
	c = gdl_snp_chromosome_fread (stream);
	
	c->dico = g->dico;
	
	gdl_fileclose (file, stream);
	
	gdl_string_free (file);
	
	return c;
}

int
gdl_snp_genome_annotation (gdl_snp_genome * g, const gdl_string * annot_dir)
{
	size_t i;
	gdl_string * file;
	FILE * stream;
	gdl_snp_annot_reader * reader;
	
	reader = gdl_snp_annot_reader_alloc ();
	
	for (i = 0; i < g->nchrom; i++)
	{
		file = gdl_string_sprintf ("%s/%s.ano", annot_dir, g->chroms[i]);
		
		if (gdl_isfile (file))
		{
			gdl_snp_chromosome * c;
			
			c = gdl_snp_genome_get (g, i);
			
			stream = gdl_fileopen (file, "r");
			
			gdl_snp_annot_reader_parse (reader, c, stream);
			
			gdl_fileclose (file, stream);
			
			gdl_snp_genome_set (g, i, c);
			
			gdl_snp_chromosome_free (c);
		}
		else
		{
			fprintf (stderr, "No annotation for chromosome %s\n", g->chroms[i]);	
		}
	}
	
	g->dico = gdl_snp_annot_dico_alloc (reader);
	
	gdl_snp_annot_reader_free (reader);
	
}

int
gdl_snp_genome_gmap (gdl_snp_genome * g, const gdl_string * gmap_dir)
{
	size_t i;
	gdl_string * file;
	FILE * stream;
	
	for (i = 0; i < g->nchrom; i++)
	{
		file = gdl_string_sprintf ("%s/%s.gmap", gmap_dir, g->chroms[i]);
		
		if (gdl_isfile (file))
		{
			gdl_snp_chromosome * c;
			
			c = gdl_snp_genome_get (g, i);
			
			stream = gdl_fileopen (file, "r");
			
			gdl_snp_chromosome_gmap (c, stream);
			
			gdl_fileclose (file, stream);
			
			gdl_snp_genome_set (g, i, c);
			
			gdl_snp_chromosome_free (c);
		}
		else
		{
			fprintf (stderr, "No genetic map for chromosome %s\n", g->chroms[i]);	
		}
	}
	
	return GDL_SUCCESS;
}

int
gdl_snp_genome_cnv (gdl_snp_genome * g, const gdl_string * config_file)
{
	size_t i, j, c, n;
	gdl_string * file, * line, * tok;
	FILE * stream;
	gdl_hashtable * chroms;
	gdl_list * pops;
	
	stream = gdl_fileopen (config_file, "r");
	
	chroms = gdl_hashtable_alloc (gdl_hash_default, 0);
	
	line=0;
	while (gdl_getline (&line, &n, stream) != -1)
	{
		i=j=0;
		// first the chromosome name
		tok  = gdl_string_next_token (line, n, &i, &j);
		pops = gdl_list_alloc (gdl_string_interface);
		gdl_hashtable_add (chroms, tok, pops, 0);
		gdl_string_free (tok);
		// then the population names
		while ((tok = gdl_string_next_token (line, n, &i, &j))!=0)
		{
			gdl_list_push_back (pops, tok, 1);
		}
		GDL_FREE (line);
		line=0;
	}
	gdl_fileclose (config_file, stream);
	
	for (c = 0; c < g->nchrom; c++)
	{
		printf ("CHROM %s\n", g->chroms[c]);
		
		pops = (gdl_list *) gdl_hashtable_lookup (chroms, g->chroms[c]);
		
		if (pops)
		{
			gdl_snp_chromosome * chrom = gdl_snp_genome_get (g, c);
			// copy the population names to an array
			size_t npop = gdl_list_size (pops);
			gdl_string ** dirs = GDL_MALLOC (gdl_string *, npop);
			for (i = 0; i < npop; i++) 
			   dirs[i] = (gdl_string *) gdl_list_get (pops, i);
			
			chrom->cnvs = GDL_CALLOC (gdl_cnv_data *, npop);
			
			// read cnv data for each pop
			for (i = 0; i < npop; i++) 
			{
				file = gdl_string_sprintf ("%s/%s.cnv", dirs[i], g->chroms[c]);

				printf ("READ DATA FROM %s\n", file);

				if (gdl_isfile (file))
				{
					stream = gdl_fileopen (file, "r");
					chrom->cnvs[i] = gdl_cnv_data_fscanf (stream);
					gdl_fileclose (file, stream);
				}
				else
				{
					fprintf (stderr, "No CNV data for chromosome %s in popultion %s\n", g->chroms[c], dirs[i]);	
				}
			}
			
			gdl_snp_genome_set (g, c, chrom);
			
			gdl_snp_chromosome_free (chrom);
			GDL_FREE (dirs);
			gdl_list_free (pops);
		}		
	}	
}

void
gdl_snp_genome_rm (gdl_snp_genome * v)
{
	size_t i;
	
	for (i = 0; i < v->nchrom; i++)
	{
		gdl_string * file = gdl_snp_genome_chromfile (v, i);
		remove (file);
		gdl_string_free (file);
	}	
}

gdl_snp_genome *
gdl_snp_genome_fread (FILE * stream)
{
	if (stream)
	{
		int status;
		size_t i;
		unsigned char has;
		gdl_snp_genome * v;
		
		v = GDL_MALLOC (gdl_snp_genome, 1);
		
		v->dbdir = gdl_string_fread (stream);
		GDL_FREAD_STATUS (v->dbdir!=0, 1);
		status = fread (&v->nchrom, sizeof(size_t), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		v->chroms = GDL_MALLOC (gdl_string *, v->nchrom);
		for (i = 0; i < v->nchrom; i++)
		{
			v->chroms[i] = gdl_string_fread (stream);
			GDL_FREAD_STATUS (v->chroms[i]!=0, 1);
		}
		status = fread (&has, sizeof(unsigned char), 1, stream);
		GDL_FREAD_STATUS (status, 1);
		if (has == '1')
		{
			v->dico = gdl_snp_annot_dico_fread (stream);
			GDL_FREAD_STATUS (v->dico!=0, 1);
		}
		
		return v;
	}
	return 0;
}

int
gdl_snp_genome_fwrite (FILE * stream, const gdl_snp_genome * v)
{
	if (stream && v)
	{
		int status;
		unsigned char has;
		size_t i;
		
		status = gdl_string_fwrite (stream, v->dbdir);
		GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		status = fwrite (&v->nchrom, sizeof(size_t), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		for (i = 0; i < v->nchrom; i++)
		{
			status = gdl_string_fwrite (stream, v->chroms[i]);
			GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		}
		has = (v->dico) ? '1' : '0';
		status = fwrite (&has, sizeof(unsigned char), 1, stream);
		GDL_FWRITE_STATUS (status, 1);
		if (v->dico)
		{
			status = gdl_snp_annot_dico_fwrite (stream, v->dico);
			GDL_FWRITE_STATUS (status, GDL_SUCCESS);
		}
		
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}
