/*
 *  snp/fscanf.c 
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:03 $, $Version$
 *  
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <stdio.h>
#include <ctype.h>
#include <math.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_util.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_snp_data.h>
#include <gdl/gdl_snp_map.h>

static int
gdl_snp_data_fscanf_dim (FILE * stream, size_t * N, size_t * L)
{
	int c;
	size_t l=0;
	
	*N=*L=0;
	
	while ((c=fgetc(stream))!=EOF)
	{
		if (c=='\n') 
		{
			if (*N==0) *L=l;
			else if (*L!=l)
			{
				GDL_ERROR (gdl_string_sprintf ("Line %d contains %d data (expected %d)\n", *N+1, l, *L), GDL_FAILURE);
			}
			(*N)++;
			l=0;
		}
		l += !isspace(c);
	}
}

gdl_snp_data *
gdl_snp_data_fscanf (FILE * stream, const gdl_snp_data_format * T)
{
	int c;
	size_t i, j, k, N, L;
	long offset;
	gdl_snp_data * v;
	
	offset = ftell (stream);
	
	gdl_snp_data_fscanf_dim (stream, &N, &L);
	
	fseek (stream, offset, SEEK_SET);
	
	if (T->P==1 || !T->haplo)
	{
		v = gdl_snp_data_alloc (gdl_snp_data_genotype, N, L, T->P);
		i=j=0;
		while ((c=fgetc(stream))!=EOF)
		{
			if (c=='\n') 
			{
				i++;
				j=0;
			}
			else if (!isspace(c))
			{
				gdl_snp_data_set (v, i, j, c);
				j++;	
			}
		}
	}
	else if (N%2==0)
	{
		v = gdl_snp_data_alloc (gdl_snp_data_haplotype, N/2, L, T->P);
		i=j=k=0;
		while ((c=fgetc(stream))!=EOF)
		{
			if (c=='\n') 
			{
				if (k) {k=0;i++;}
				else k++;
				j=0;
			}
			else if (!isspace(c))
			{
				gdl_snp_data_hset (v, i, j, k, c);
				j++;	
			}
		}
	}
	else
	{
		GDL_ERROR_VAL (gdl_string_sprintf ("Expected an even number of rows (haplotype mode) and I got %d\n", N), GDL_FAILURE, 0);	
	}
	
	return v;
}

static int
gdl_snp_map_fscanf_dim (FILE * stream, size_t * N)
{
	int c;
	
	*N=0;
	
	while ((c=fgetc(stream))!=EOF)
	{
		if (c=='\n') 
		{
			(*N)++;
		}
	}
}

gdl_snp_map *
gdl_snp_map_fscanf (FILE * stream)
{
	if (stream)
	{
		int c;
		size_t i, j, n, s, N;
		long offset;
		gdl_string * line = 0;
		gdl_string * rs, * pos, * allele0, * allele1;
		gdl_snp_map * chrom;
		gdl_snp * snp;

#define NEXT_TOKEN {for (j=i;isspace(line[j]) && j<n;j++); \
for (i=j;!isspace(line[i]) && i<n;i++);}	
	
		offset = ftell (stream);
	
		gdl_snp_map_fscanf_dim (stream, &N);
	
		fseek (stream, offset, SEEK_SET);
		
		chrom = gdl_snp_map_alloc (N);
		
		s=i=j=0;
		while(gdl_getline (&line, &n, stream)!=-1)
		{
			NEXT_TOKEN
			rs = gdl_string_alloc (i);
			strncpy (rs, line, i);
			NEXT_TOKEN
			pos = gdl_string_alloc (i-j);
			strncpy (pos, &line[j], i-j);
			NEXT_TOKEN
			allele0 = gdl_string_alloc (i-j);
			strncpy (allele0, &line[j], i-j);
			NEXT_TOKEN
			allele1 = gdl_string_alloc (i-j);
			strncpy (allele1, &line[j], i-j);
			
			snp = gdl_snp_alloc (rs, allele0, allele1, atol(pos));
			gdl_snp_map_set (chrom, s, snp);
			
			GDL_FREE (pos);
			GDL_FREE (line);
			line=0;
			i=j=0;
			s++;
		}
		return chrom;
	}
	return 0;
	
#undef NEXT_TOKEN

}
