/*  
 * 	gview/standard.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:43 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_util.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_fsheet.h>
#include <gdl/gdl_gview.h>
#include <gdl/gdl_gview_reader.h>

static const gdl_fsheet_type STANDARD =
{
	1,
	1,
	"",
	"\t",
	"\n",
	"\t",
	"\t"	
};

static gdl_gview_reader_cell * _gdl_gview_reader_cell_new (gdl_gview_reader * reader, const gdl_string * cell);

static gdl_fsheet_type *
_fsheet_type (const gdl_gview_reader_type * type)
{
	gdl_fsheet_type * ftype 
		=gdl_fsheet_type_default ();
	
   ftype->rname  = type->has_acc_name;
	ftype->cname  = type->has_loc_name;
	gdl_string_free (ftype->fsep);
	ftype->fsep   = gdl_string_clone (type->gfsep);
	gdl_string_free (ftype->rnfsep);
	ftype->rnfsep = gdl_string_clone (type->nfsep);
	gdl_string_free (ftype->cnfsep);
	ftype->cnfsep = gdl_string_clone (type->nfsep);
	gdl_string_free (ftype->tsep);
	ftype->tsep   = gdl_string_clone (STANDARD.tsep);
	gdl_string_free (ftype->rsep);
	ftype->rsep   = gdl_string_clone (STANDARD.rsep);
	
	return ftype;
}

static gdl_fsheet *
_fsheet (gdl_gview_reader * reader)
{
	gdl_fsheet_type * ftype = _fsheet_type (reader->type);
	gdl_fsheet * fs;
	
	fs = gdl_fsheet_alloc (ftype);
	
	gdl_fsheet_init (fs, reader->stream);
	
	return fs;
}

static int
_standard_init (gdl_gview_reader * reader, gdl_fsheet * fs)
{
	size_t p,n,l;
	
	p = reader->type->ploidy;
	n = gdl_fsheet_nrow (fs);
	l = gdl_fsheet_ncolumn (fs);
	
	if (!p || !n || !l)
	{
		return GDL_EINVAL;
	}
	
	reader->gview = gdl_gview_alloc (gdl_gview_standard);
	
	gdl_gview_init (reader->gview, n, l, p);
	
	return GDL_SUCCESS;
}

static int
_standard_check (gdl_gview_reader * reader, gdl_fsheet * fs)
{
	int status;
	char * cell, * row, * col;
	size_t ridx, cidx, first = 1;
	gdl_accession  * va;
	gdl_locus      * vl;
	gdl_gview_reader_cell  * pcell;
	gdl_gview_reader_cells * pcells = NULL;
	
	gdl_fsheet_itr * itr = gdl_fsheet_iterator_c (fs);
	
	do 
	{
		row  = gdl_fsheet_iterator_row (itr);
		col  = gdl_fsheet_iterator_column (itr);
		cell = gdl_fsheet_iterator_cell (itr);
		ridx = gdl_fsheet_iterator_row_idx (itr);
		cidx = gdl_fsheet_iterator_column_idx (itr);
		
		if (gdl_fsheet_iterator_is_new_column (itr))
		{
			if (first)
			{
				first =	0;
			}
			
			vl = gdl_gview_reader_add_locus (reader, col, cidx);
			
			if (vl->idx != cidx)
			{
				gdl_gview_reader_error (reader, "Locus %s is not unique", col);
				gdl_fsheet_iterator_free (itr);
				return (1);
			}
			
			gdl_gview_reader_cells_free (pcells);
			
			pcells = gdl_gview_reader_cells_alloc ();
					
		}
		
		va = gdl_gview_reader_add_accession (reader, row, ridx);
		
		if (first && va->idx != ridx)
		{
			gdl_gview_reader_error (reader, "Accession %s is not unique", row);
			gdl_gview_reader_cells_free (pcells);
			gdl_fsheet_iterator_free (itr);
			return (1);
		}
		
		pcell = gdl_gview_reader_cells_lookup (pcells, cell);
		
		if (pcell == NULL)
		{
			
			pcell = gdl_gview_reader_parse_cell (reader, cell);
		
			if (pcell == NULL)
			{
				gdl_gview_reader_error (reader, "Invalid cell format [%s] at row [%s,%d] and column [%s,%d]", cell, row, ridx, col, cidx);
				gdl_gview_reader_cells_free (pcells);
				gdl_fsheet_iterator_free (itr);
				return (1);	
			}
			
			gdl_gview_reader_cells_add (pcells, cell, pcell);			
		}
		
	}
	while (gdl_fsheet_iterator_next (itr));
	
	gdl_fsheet_iterator_free (itr);
	
	gdl_gview_reader_cells_free (pcells);
	
	return GDL_SUCCESS;	
}

static int
_standard_perform (gdl_gview_reader * reader, gdl_fsheet * fs)
{
	int status;
	char * cell, * row, * col;
	size_t ridx, cidx, first = 1;
	gdl_accession  * va;
	gdl_locus      * vl = NULL;
	gdl_gview_reader_cell  * pcell;
	gdl_gview_reader_cells * pcells = NULL;
	
	gdl_fsheet_itr * itr = gdl_fsheet_iterator_c (fs);
	
	do 
	{
		row  = gdl_fsheet_iterator_row (itr);
		col  = gdl_fsheet_iterator_column (itr);
		cell = gdl_fsheet_iterator_cell (itr);
		ridx = gdl_fsheet_iterator_row_idx (itr);
		cidx = gdl_fsheet_iterator_column_idx (itr);
		
		if (gdl_fsheet_iterator_is_new_column (itr))
		{
			gdl_gview_reader_cells_free (pcells);
			
			pcells = gdl_gview_reader_cells_alloc ();
			
			vl = gdl_gview_reader_add_locus (reader, col, cidx);
		}
		
		va = gdl_gview_reader_add_accession (reader, row, ridx);
		
		pcell = gdl_gview_reader_cells_lookup (pcells, cell);
		
		if (pcell == NULL)
		{
			pcell = gdl_gview_reader_parse_cell (reader, cell);
		
			gdl_gview_reader_cells_add (pcells, cell, pcell);
			
			gdl_gview_reader_update_locus (vl, pcell);
		}
		
		gdl_gview_reader_set_gdatapoint (reader, va, vl, pcell);
		
	}
	while (gdl_fsheet_iterator_next (itr));
	
	gdl_fsheet_iterator_free (itr);
	
	gdl_gview_reader_cells_free (pcells);
	
	return GDL_SUCCESS;
}

static int
_standard_parse (gdl_gview_reader * reader, gdl_fsheet * fs)
{
	int status;
	
	if (_standard_init (reader, fs) != GDL_SUCCESS)
	{
		return GDL_EINVAL;
	}
	
	status = _standard_check (reader, fs);
	
	if (status != GDL_SUCCESS)
	{
		gdl_gview_free (reader->gview);
		GDL_ERROR_VAL (reader->error, GDL_EINVAL, 0);
	}
	
	status = _standard_perform (reader, fs);
	
	if (status != GDL_SUCCESS)
	{
		gdl_gview_free (reader->gview);
		GDL_ERROR_VAL (reader->error, GDL_EINVAL, 0);
	}
	
	return GDL_SUCCESS;
}

static int
_standard_read (gdl_gview_reader * reader)
{
	int status;
	
	//gdl_meta * meta = _meta (in);
	
	gdl_fsheet * fs = _fsheet (reader);
	
	//gdl_gview_add_meta (reader->gview, meta);
	if (fs == NULL)
		return GDL_EINVAL;
		
	status = _standard_parse (reader, fs);
	
	gdl_fsheet_free (fs);
	
	return status;
}

static gdl_gview_reader_gpoint * 
_gdl_gview_reader_cell_unit_init (gdl_gview_reader_type * type,
                                const char * token,
                                size_t geno)
{
	if (geno)
	{
		// Does the token contain as many phase separator as ploidy - 1 ?
		size_t ntok = gdl_string_token (type->psep, token);
		
		if (ntok != type->ploidy)
		{
			return NULL;
		}
		
		return gdl_gview_reader_gpoint_alloc (type->ploidy);
	}
	else
	{
		return gdl_gview_reader_gpoint_alloc (1);	
	}
}

static void 
_gdl_gview_reader_cell_unit_fill_allele (gdl_gview_reader_type * type,
                                          const char * token,
                                          gdl_gview_reader_gpoint * unit,
                                          size_t i)
{
	if (strcmp (token, type->missing))
	{
		unit->alleles[i] = gdl_string_clone (token);
	}
	if (!strcmp (token, type->recessive))
	{
		unit->recessives[i] = 1;
	}	
}                                

static double
_gdl_gview_reader_cell_unit_fill (gdl_gview_reader_type * type,
                                   const char * token,
                                   size_t geno,
                                   gdl_gview_reader_gpoint * unit)
{
	size_t i, gnext, anext;
	gdl_string * gcode;
	gdl_string * allele;
		
	gnext = gdl_string_token_start (type->vsep, token, &gcode);
	
	if (geno)
	{
		anext = gdl_string_token_start (type->psep, gcode, &allele);
		i = 0;
		do
		{
			_gdl_gview_reader_cell_unit_fill_allele (type, allele, unit, i);
			gdl_string_token_next (type->psep, gcode, &allele, &anext);
			i++;
		}
		while (allele);
	}
	else
	{
		_gdl_gview_reader_cell_unit_fill_allele (type, gcode, unit, 0);
	}
	
	gdl_string_token_next (type->vsep, token, &gcode, &gnext);
	
	if (gcode)
	{
		unit->value = (double) atof(gcode);
		gdl_string_free (gcode);
	}
	else
	{
		unit->value = 1.0;
	}
	
	return unit->value;
}                                                

static int
_gdl_gview_reader_cell_units_push(gdl_list * units,
                                gdl_gview_reader_type * type,
                                const char * token,
                                size_t geno)
{                               
	size_t next;
	double sum = 0.;
	gdl_gview_reader_gpoint * unit;
	gdl_string * unit_token;
	
	next = gdl_string_token_start (type->gsep, token, &unit_token);
	
	do
	{
		unit = _gdl_gview_reader_cell_unit_init(type, unit_token, geno);
		
		if (unit == NULL)
		{
			gdl_string_free (unit_token);
			return (-1);
		}
		
		sum += _gdl_gview_reader_cell_unit_fill (type, unit_token, geno, unit);
		
		gdl_list_push_back (units, unit, 1);
						
		gdl_string_token_next (type->gsep, token, &unit_token, &next);
				
	} while (unit_token);
	
	if (sum != 1. && !gdl_list_empty (units))
	{
		gdl_list_itr * itr = gdl_list_iterator_front (units);
		
		do
		{
			unit = (gdl_gview_reader_gpoint *) gdl_list_iterator_value (itr);
			unit->value /= sum;
		}
		while (gdl_list_iterator_next (itr));
		
		gdl_list_iterator_free (itr);
	}
	
	return (0);
}

static const gdl_gpoint_type *
_gdl_gview_reader_cell_type (gdl_gview_reader_type * type, const char * cell)
{
	if (type == 0 || cell == 0)
	{
		return NULL;	
	}
	else if (!strcmp (type->missing, cell))
	{
		return gdl_gpoint_missing;
	}
	else
	{
		size_t nsep = gdl_string_sep (type->psep, cell);
		
		if (nsep == type->ploidy - 1
		    && strcmp (type->missing, cell))
		{
			if (type->ploidy==1)
			{
				nsep = gdl_string_sep (type->gsep, cell);
				if (nsep)
				{
					return gdl_gpoint_geno;
				}
				else
				{
					return gdl_gpoint_haplo;
				
				}
			}
			else
			{
				return gdl_gpoint_haplo;	
			}
		}
		else if (nsep > type->ploidy - 1)
		{
			return gdl_gpoint_geno;
		}
		else
		{
			return NULL;	
		}
	}
}

static gdl_gview_reader_cell *
_standard_cell (gdl_gview_reader * reader, const gdl_string * cell)
{
	size_t i, j, k, len;
	const gdl_gpoint_type * T;
	size_t next;
	gdl_string * token = NULL;
	gdl_gview_reader_cell * r;
		
	T = _gdl_gview_reader_cell_type (reader->type, cell);
	
	if (gdl_gpoint_geno == T)
	{
		r = gdl_gview_reader_cell_alloc (T, 1);
		
		if (_gdl_gview_reader_cell_units_push (r->units[0], reader->type, cell, 1)!=0)
		{
			gdl_gview_reader_cell_free (r);
			return NULL;
		}
	}
	else if (gdl_gpoint_haplo == T)
	{
	    r = gdl_gview_reader_cell_alloc (T, reader->type->ploidy);
	    
	    r->is_phased = reader->type->is_phased;
	    
	    k = 0;
	    
	    next = gdl_string_token_start (reader->type->psep, cell, &token);
	    
	    do
	    {
	    	if (_gdl_gview_reader_cell_units_push (r->units[k], reader->type, token, 0) != 0)
	    	{
	    		gdl_string_free (token);
	    		gdl_gview_reader_cell_free (r);
	    		return NULL;
	    	}
	    	
			k++;
			
			gdl_string_token_next (reader->type->psep, cell, &token, &next);
	    }
	    while (token);
	}
	else if (gdl_gpoint_missing == T)
	{
		 r = gdl_gview_reader_cell_alloc (T, 0);
	}
	else
	{
		return NULL;
	}
	
	return r;
}

static gdl_gview_reader_type _gdl_gview_reader_standard = 
{
	"?",
	"x",
	"\t",
	"\t",
	"/",
	":",
	";",
	1,
	1,
	1,
	1,
	&_standard_read,
	&_standard_cell
};

const gdl_gview_reader_type * gdl_gview_reader_standard 
                                   = &_gdl_gview_reader_standard;
