/*  
 * 	gmap/relative.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:41 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_util.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_fsheet.h>
#include <gdl/gdl_gmap.h>
#include <gdl/gdl_gmap_reader.h>

static const gdl_fsheet_type FSHEET =
{
	0,
	1,
	"",
	"\t",
	"\n",
	"\t",
	"\t"	
};

static gdl_fsheet *
_fsheet (gdl_gmap_reader * reader)
{
	gdl_fsheet_type * T;
	gdl_fsheet * fs;
	
	T = gdl_fsheet_type_clone (&FSHEET);
	
	fs = gdl_fsheet_alloc (T);
	
	gdl_fsheet_init (fs, reader->stream);
	
	return fs;
}

static gdl_genome * 
_relative_single_init (gdl_fsheet * fs)
{
	gdl_string * old, * cell;
	gdl_chromosome * chrom = NULL;
	gdl_genome * genome;
	gdl_fsheet_itr * itr;
	size_t nc = 0, nl = 0;
 	
	genome = gdl_genome_new ("genome");
	
	itr = gdl_fsheet_iterator_r (fs);
	
	do
	{
		nl++;
		
		cell = gdl_fsheet_iterator_cell (itr);
		
		if (gdl_fsheet_iterator_is_new_row (itr))
		{
			if (nc == 0)
			{
				chrom      = gdl_chromosome_new (cell);
				chrom->idx = ++nc;
				old = gdl_string_clone (cell);
			}
			else if (strcmp (cell, old))
			{
				gdl_chromosome_init (chrom, nl);
				gdl_genome_add (genome, chrom);
				chrom      = gdl_chromosome_new (cell);
				chrom->idx = ++nc;
				gdl_string_free (old);
				old = gdl_string_clone (cell);
				nl  = 0;
			}
		}
	}
	while (gdl_fsheet_iterator_next (itr));
	
	gdl_genome_add (genome, chrom);
	
	gdl_string_free (old);
	
	gdl_fsheet_iterator_free (itr);
	
	return genome;
}

static int 
_relative_single_read (gdl_gmap_reader * reader, gdl_fsheet * fs)
{
	size_t ridx, cidx, new_chrom;
	gdl_string * row, * col, * cell, * cname = NULL;
	gdl_locus * locus;
	gdl_chromosome * chrom = NULL;
	gdl_genome * genome;
	gdl_gdistance dist, odist;
	gdl_fsheet_itr * itr;
	
	genome = _relative_single_init (fs);
	
	itr = gdl_fsheet_iterator_r (fs);
	
	do
	{
		row  = gdl_fsheet_iterator_row (itr);
		col  = gdl_fsheet_iterator_column (itr);
		cell = gdl_fsheet_iterator_cell (itr);
		ridx = gdl_fsheet_iterator_row_idx (itr);
		cidx = gdl_fsheet_iterator_column_idx (itr);
		
		if (gdl_fsheet_iterator_is_new_row (itr)) // chromosome
		{
			if (cname == NULL || strcmp (cname, cell))
			{
				gdl_string_free (cname);
				cname = gdl_string_clone (cell);
				chrom = gdl_genome_search (genome, cell);
				new_chrom = 1;
			}
		}
		if (cidx == 1) // locus
		{
			locus = gdl_locus_new (cell);
		}
		else if (cidx == 2) // position
		{
			if (strcmp (cell, "-"))
			{
				dist.value = (double) atof (cell);
				if (dist.value < 0.)
				{
					gdl_gmap_reader_error (reader, "Find negative distance at locus %s", locus->name);
					gdl_locus_free (locus);
					gdl_genome_free (genome);
					return GDL_EINVAL;
				}
			}
			else
			{
				dist.value = 0.;
			}
		}
		else if (cidx == 3) // unit
		{
			dist.type 
				= gdl_gdistance_type_parse (cell);
			if (dist.type == gdl_gdistance_unknown)
			{
				gdl_gmap_reader_error (reader, "Unknown distance unit at locus %s", locus->name);
				gdl_genome_free (genome);
				gdl_locus_free (locus);
				return GDL_EINVAL;
			}
			if (new_chrom)
			{
				gdl_chromosome_push (chrom, locus, 1);
				new_chrom = 0;
			}
			else
			{
				gdl_chromosome_add (chrom, locus, &odist, 1);
			}
			odist.value = dist.value;
			odist.type  = dist.type;
		}		
	}
	while (gdl_fsheet_iterator_next (itr));
	
	gdl_fsheet_iterator_free (itr);
	
	gdl_string_free (cname);
	
	reader->gmap = gdl_gmap_alloc ();
	
	gdl_gmap_add (reader->gmap, genome);
	
	return GDL_SUCCESS;
}

static int 
_relative_multiple_read (gdl_gmap_reader * reader, gdl_fsheet * fs)
{
	size_t ridx, cidx, new_chrom, new_genome;
	gdl_string * row, * col, * cell, *gname = NULL, * cname = NULL;
	gdl_locus * locus;
	gdl_chromosome * chrom = NULL;
	gdl_genome * genome;
	gdl_gdistance dist, odist;
	gdl_fsheet_itr * itr;
	
	reader->gmap = gdl_gmap_alloc ();
	
	itr = gdl_fsheet_iterator_r (fs);
	
	do
	{
		row  = gdl_fsheet_iterator_row (itr);
		col  = gdl_fsheet_iterator_column (itr);
		cell = gdl_fsheet_iterator_cell (itr);
		ridx = gdl_fsheet_iterator_row_idx (itr);
		cidx = gdl_fsheet_iterator_column_idx (itr);
		
		if (gdl_fsheet_iterator_is_new_row (itr)) // chromosome
		{
			if (gname == NULL || strcmp (gname, cell))
			{
				gdl_string_free (gname);
				gname  = gdl_string_clone (cell);
				genome = gdl_genome_new (gname);
				gdl_gmap_add (reader->gmap, genome);
				new_genome = 1;
			}			
		}
		if (cidx == 1) // chromosome
		{
			if (cname == NULL || strcmp (cname, cell))
			{
				gdl_string_free (cname);
				cname = gdl_string_clone (cell);
				chrom = gdl_chromosome_new (cname);
				gdl_genome_add (genome, chrom);
				new_chrom = 1;
			}	
		}
		if (cidx == 2) // locus
		{
			locus = gdl_locus_new (cell);
		}
		else if (cidx == 3) // position
		{
			if (strcmp (cell, "-"))
			{
				dist.value = (double) atof (cell);
				if (dist.value < 0.)
				{
					gdl_gmap_reader_error (reader, "Find negative distance at locus %s", locus->name);
					gdl_locus_free (locus);
					gdl_gmap_free (reader->gmap);
					reader->gmap=NULL;					
					return GDL_EINVAL;
				}
			}
			else
			{
				dist.value = 0.;
			}
		}
		else if (cidx == 4) // unit
		{
			dist.type 
				= gdl_gdistance_type_parse (cell);
			if (dist.type == gdl_gdistance_unknown)
			{
				gdl_gmap_reader_error (reader, "Unknown distance unit at locus %s", locus->name);
				gdl_locus_free (locus);
				gdl_gmap_free (reader->gmap);
				reader->gmap=NULL;
				return GDL_EINVAL;
			}
			if (new_chrom)
			{
				gdl_chromosome_push (chrom, locus, 1);
				new_chrom = 0;
			}
			else
			{
				gdl_chromosome_add (chrom, locus, &odist, 1);
			}
			odist.value = dist.value;
			odist.type  = dist.type;
		}		
	}
	while (gdl_fsheet_iterator_next (itr));
	
	gdl_fsheet_iterator_free (itr);
	
	gdl_string_free (cname);
	
	return GDL_SUCCESS;
}

static int
_relative_read (gdl_gmap_reader * reader)
{
	int status;
	gdl_fsheet * fs = _fsheet (reader);
	
	if (gdl_fsheet_ncolumn (fs) == 4)
	{
		status = _relative_single_read (reader, fs);
	}
	else if (gdl_fsheet_ncolumn (fs) == 5)
	{
		status = _relative_multiple_read (reader, fs);
	}
	else
	{
		gdl_gmap_reader_error (reader, "A map must contain at least 4 columns separated by tabulation");	
		status = GDL_EINVAL;		
	}
	
	gdl_fsheet_free (fs);
	
	return status;	
}

static const gdl_gmap_reader_type _relative =
{
	"relative",
	&_relative_read
};

const gdl_gmap_reader_type * gdl_gmap_reader_relative = &_relative;
