/*  
 * 	sheet/private.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:53 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <stdio.h>
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_fsheet.h>

static const gdl_fsheet_type _default_type =
{
	1,
	1,
	"",
	"\t",
	"\n",
	NULL,
	NULL
};

struct _gdl_fsheet
{
	size_t nr;
	size_t nc;
	long offset;
	gdl_fsheet_type * type;
	FILE * stream;
	size_t _nfsep;
	size_t _ntsep;
	size_t _nrsep;
	size_t _last;
	size_t _first;
	size_t _jump_fsep;
	size_t _jump_tsep;
	size_t _buffersize;
	size_t _nextrow;
	char * _tsep;
	char * _fsep;
	char * _rsep;
	char * _buffer;
	const char * fsep;
};

static int
gdl_fsheet_grow_buffer (gdl_fsheet * fs)
{
	char * new;
	
	new = GDL_CALLOC (char, fs->_buffersize*2+1);
	
	if (new == 0)
	{
		GDL_ERROR_VAL ("Cannot increase buffer size",
		               GDL_ENOMEM,
		               -1);	
	}
	
	memcpy (new, fs->_buffer, sizeof(char)*fs->_buffersize);
	
	fs->_buffersize *= 2;
	
	GDL_FREE (fs->_buffer);
	
	fs->_buffer = new;
	
	return (0);
}

static int
gdl_fsheet_jump_fsep (gdl_fsheet * fs)
{
	int ch;
	size_t i;
	
	if (fs->_nfsep)
	{
		for (i = 0; ; i++)
		{
			ch = fgetc (fs->stream);
			
			if (ch == EOF)
			{
			   return (-1);
			}
			
			fs->_fsep[i % fs->_nfsep] = ch;
			fs->_rsep[i % fs->_nrsep] = ch;
			
			if (i % fs->_nrsep == fs->_nrsep - 1 
			    && !strcmp (fs->_rsep, fs->type->rsep))
			{
				fs->_last  = 1;
				break;
			}
			
			if (i % fs->_nfsep == fs->_nfsep - 1 
			    && !strcmp (fs->_fsep, fs->fsep))
			{
				break;
			}			
		}		
	}	
		
	return (0);
}

static int
gdl_fsheet_jump_tsep (gdl_fsheet * fs)
{
	if (fs->_ntsep)
	{
		int ch;
		size_t i;
		
		for (i = 0; ; i++)
		{
			ch = fgetc (fs->stream);
			
			if (ch == EOF)
				return (-1);
				
			fs->_rsep[i % fs->_nrsep] = ch;
			
			if (i % fs->_nrsep == fs->_nrsep - 1 
			    && !strcmp (fs->_rsep, fs->type->rsep))
			{
				fs->_last  = 1;
				return (0);
			}	
				
			fs->_tsep[i % fs->_ntsep] = ch;
			
			if (i % fs->_ntsep == fs->_ntsep - 1
			    && !strcmp (fs->_tsep, fs->type->tsep))
			{
				return (0);
			}
			
		}
		
		return (-1);
		
	}
	else
	{
		return (0);	
	}
}

static int
gdl_fsheet_read_field_0_0 (gdl_fsheet * fs)
{
	size_t i;
	int  ch; 
	long pos;
	
	ch = fgetc (fs->stream);
	
	// no text and no field separator
	
	if (ch == EOF)
	{
		return (-1);
	}
	else
	{
		fs->_buffer[0] = ch;
		
		pos = ftell (fs->stream);
		
		for (i = 0; i < fs->_nrsep; i++)
		{
			ch = fgetc (fs->stream);
			
			if (ch == EOF)
				return (-1);
				
			fs->_rsep[i] = ch;
			
			if (i == fs->_nrsep - 1 
			    && !strcmp (fs->_rsep, fs->type->rsep))
			{
				fs->_last  = 1;
				return (0);
			}
		}
		fseek (fs->stream, pos, SEEK_SET);
		return (0);
	}
}

static int
gdl_fsheet_read_field_0_1 (gdl_fsheet * fs)
{
	int ch;
	size_t i, j, k;
	
	// no field but text separator
	
	for (j = 0;;j++)
	 {
	 	ch = fgetc (fs->stream);
	 	
	 	if (ch == EOF)
	 		return (-1);
	 	
	 	if ( j >= fs->_buffersize)
	 	{
	 		if (gdl_fsheet_grow_buffer (fs))
	 			return (-1);
	 	}
	 	
	 	fs->_buffer[j]            = ch;
	 	fs->_tsep[j % fs->_ntsep] = ch;
	 	
	 	if (j % fs->_ntsep == fs->_ntsep - 1 
	 	    && !strcmp (fs->_tsep, fs->type->tsep))
	 	{
	 		break;
	 	}
	 	
	 }
	 
	 for (k = j; k > j - fs->_ntsep; k--)
	 {
	 	fs->_buffer[k] = '\0';
	 }
	 
	 gdl_fsheet_jump_tsep (fs);
	 
	 fs->_jump_fsep = 1;
	 fs->_jump_tsep = 1;
	 
	 return (0);
}

static int
gdl_fsheet_read_field_0 (gdl_fsheet * fs)
{
	if (!fs->_ntsep)
	{
		gdl_fsheet_read_field_0_0 (fs);
	}
	else
	{
		gdl_fsheet_read_field_0_1 (fs);
	}
}

static int
gdl_fsheet_read_field_1_0 (gdl_fsheet * fs)
{
	int ch;
	long pos;
	size_t j, k, fsep = 0;
	
	// no text but field separator
	
	 for (j = 0; ; j++)
	 {
	 	ch = fgetc (fs->stream);
	 	
	 	if (ch == EOF)
	 		return (-1);
	 		
	 	if ( j >= fs->_buffersize)
	 	{
	 		if (gdl_fsheet_grow_buffer (fs))
	 			return (-1);
	 	}
	 	
	 	fs->_buffer[j]            = ch;
	 	fs->_fsep[j % fs->_nfsep] = ch;
	 	fs->_rsep[j % fs->_nrsep] = ch;
	 	
	 	if (j % fs->_nrsep == fs->_nrsep - 1 
	 	    && !strcmp (fs->_rsep, fs->type->rsep))
	 	{
	 		break;
	 	}
	 	if (j % fs->_nfsep == fs->_nfsep - 1 
	 	    && !strcmp (fs->_fsep, fs->fsep))
	 	{
	 		fsep = 1;
	 		break;
	 	}
	 }
	 
	 if (fsep)
	 {
	 	 for (k = j; k > j - fs->_nfsep; k--)
		 {
		 	fs->_buffer[k] = '\0';
		 }
		 fs->_jump_fsep = 1;		 
	 }
	 else 
	 {
	 	for (k = j; k > j - fs->_nrsep; k--)
		{
		 	fs->_buffer[k] = '\0';
		}
	    fs->_last = 1;	    
	 }
	 
	 return (0);
}

static int
gdl_fsheet_read_field_1_1 (gdl_fsheet * fs)
{
	int ch;
	long pos;
	size_t j, k, fsep = 0;
	
	// text and field separator
	
	 for (j = 0; ; j++)
	 {
	 	ch = fgetc (fs->stream);
	 	
	 	if (ch == EOF)
	 		return (-1);
	 		
	 	if ( j >= fs->_buffersize)
	 	{
	 		if (gdl_fsheet_grow_buffer (fs))
	 			return (-1);
	 	}
	 	
	 	fs->_buffer[j]            = ch;
	 	fs->_tsep[j % fs->_ntsep] = ch;
	 	
	 	if (j % fs->_ntsep == fs->_ntsep - 1 
	 	    && !strcmp (fs->_tsep, fs->type->tsep))
	 	{
	 		break;
	 	}
	 }
	 
	 for (k = j; k > j - fs->_ntsep; k--)
	 {
		fs->_buffer[k] = '\0';
	 }
	 
	 fs->_jump_tsep = 0;
	 fs->_jump_fsep = 0;
	 
	 return (0);
}

static int
gdl_fsheet_read_field_1 (gdl_fsheet * fs)
{
	if (!fs->_ntsep)
	{
		gdl_fsheet_read_field_1_0 (fs);
	}
	else
	{
		gdl_fsheet_read_field_1_1 (fs);	
	}
}

static int
gdl_fsheet_read_field (gdl_fsheet * fs)
{
	if (!fs->_nfsep)
	{
		// case with no field seperator
		return gdl_fsheet_read_field_0 (fs);		
	}
	else
	{
		// case with field seperator
		return gdl_fsheet_read_field_1 (fs);
	}
}

static int
gdl_fsheet_next_field (gdl_fsheet * fs)
{
	gdl_string_clean (fs->_buffer);
	
	if (!fs->_jump_tsep)
	{
		gdl_fsheet_jump_tsep (fs);
	}
	
	if (fs->_first && (fs->type->rname && fs->type->rnfsep != NULL))
	{
		fs->fsep    = fs->type->rnfsep;
		fs->_nfsep	= strlen (fs->fsep);
		GDL_FREE (fs->_fsep);
		fs->_fsep   = GDL_CALLOC (char, fs->_nfsep+1);
	}
	
	gdl_fsheet_read_field (fs);
	
	if (!fs->_last && !fs->_jump_fsep)
	{
		gdl_fsheet_jump_fsep (fs);
	}
	
	//printf (">> BUFFER [ %s ] LAST = %d\n", fs->_buffer, fs->_last);
	
	if (fs->_last)
	{
		fs->_first   = 1;
		fs->_last    = 0;
		fs->_nextrow = 1;
		return (0);
	} 
	else if (fs->_first)
	{
		fs->_first = 0;
		if (fs->type->rname && fs->type->rnfsep != NULL)
		{
			fs->fsep    = fs->type->fsep;
			fs->_nfsep	= strlen (fs->fsep);
			GDL_FREE (fs->_fsep);
			fs->_fsep  = GDL_CALLOC (char, fs->_nfsep+1);
		}		
	}	
	return (1);
}

static int
gdl_fsheet_next_row (gdl_fsheet * fs)
{
	int ch;
	size_t j;
	
	if (fs->_nextrow)
	{
		fs->_nextrow = 0;	
		return (1);
	}
	
	for (j = 0; ; j++)
	{
	 	ch = fgetc (fs->stream);
	 	
	 	if (ch == EOF)
	 		return (0);
	 		
	 	fs->_rsep[j % fs->_nrsep] = ch;
	 	
	 	if (j % fs->_nrsep == fs->_nrsep - 1 
	 	    && !strcmp (fs->_rsep, fs->type->rsep))
	 	{
	 		
	 		ch = fgetc (fs->stream);
	 		ch = ungetc (ch, fs->stream);
	 		return (ch != EOF);
	 	}
	}
}

static int
gdl_fsheet_jump_header (gdl_fsheet * fs)
{
	if (fs->type->cname)
	{
		return gdl_fsheet_next_row (fs);
	}
	return (0);
}

static long
gdl_fsheet_ftell (gdl_fsheet * fs)
{
	return ftell (fs->stream);	
}

static int
gdl_fsheet_fseek (gdl_fsheet * fs, long offset)
{
	return fseek (fs->stream, offset, SEEK_SET);	
}

static int
gdl_fsheet_rewind (gdl_fsheet * fs)
{
	fs->_nextrow=0;
	if (fs->stream)
	{
		return fseek (fs->stream, fs->offset, SEEK_SET);
	}
	return (-1);
}

static int
gdl_fsheet_init_buffer (gdl_fsheet * fs)
{
	fs->_nfsep  = strlen (fs->type->fsep);
	
	fs->_fsep   = GDL_CALLOC (char, fs->_nfsep+1);
	
	if (!fs->_fsep)
	{
		GDL_ERROR_VAL ("Unable to initialize gdl_fsheet_init",
		               GDL_ENOMEM,
		           	   -1);
	}
	
	fs->fsep     = fs->type->fsep;
	
	fs->_ntsep  = strlen (fs->type->tsep); 
	
	fs->_tsep   = GDL_CALLOC (char, fs->_ntsep+1);
	
	if (!fs->_tsep)
	{
		GDL_ERROR_VAL ("Unable to initialize gdl_fsheet_init",
		               GDL_ENOMEM,
		           	   -1);
	}
	
	fs->_nrsep  = strlen (fs->type->rsep);
	
	fs->_rsep   = GDL_CALLOC (char, fs->_nrsep+1);
	
	if (!fs->_rsep)
	{
		GDL_ERROR_VAL ("Unable to initialize gdl_fsheet_init",
		               GDL_ENOMEM,
		           	   -1);
	}
		
	fs->_first = 1;
	
	fs->_last  = 0;
	
	fs->_jump_fsep = 0;
	
	fs->_jump_tsep = 0;
	
	fs->_buffersize = 100;
	
	fs->_buffer     = GDL_CALLOC (char, fs->_buffersize+1);
	
	if (!fs->_buffer)
	{
		GDL_ERROR_VAL ("Unable to initialize gdl_fsheet_init",
		               GDL_ENOMEM,
		           	   -1);
	}
	
	fs->_nextrow = 0;
	
	return (0);
}

static void
gdl_fsheet_parse_header (gdl_fsheet * fs)
{
	if (fs->type->cname && fs->type->cnfsep != NULL)
	{
		fs->fsep   = fs->type->cnfsep;
		fs->_nfsep = strlen (fs->fsep);
		GDL_FREE (fs->_fsep);
		fs->_fsep  = GDL_CALLOC (char, fs->_nfsep+1);
	}
	fs->_first = 0;
	fs->_last      = 0;
	fs->_jump_fsep = 0;
	fs->_jump_tsep = 0;
	//fs->_nextrow = 0;
}

static void
gdl_fsheet_parse_cell_2 (gdl_fsheet * fs)
{
	fs->fsep       = fs->type->fsep;
	fs->_nfsep     = strlen (fs->fsep);
	GDL_FREE (fs->_fsep);
	fs->_fsep      = GDL_CALLOC (char, fs->_nfsep+1);
	fs->_first     = 0;
	fs->_last      = 0;
	fs->_jump_fsep = 0;
	fs->_jump_tsep = 0;
	//fs->_nextrow = 0;
}

static void
gdl_fsheet_parse_cell (gdl_fsheet * fs)
{
	fs->fsep       = fs->type->fsep;
	fs->_nfsep     = strlen (fs->fsep);
	GDL_FREE (fs->_fsep);
	fs->_fsep      = GDL_CALLOC (char, fs->_nfsep+1);
	fs->_first     = 1;
	fs->_last      = 0;
	fs->_jump_fsep = 0;
	fs->_jump_tsep = 0;
	//fs->_nextrow = 0;
}

static int
gdl_fsheet_init_size (gdl_fsheet * fs)
{
	fs->nc = 0;
	
	if (fs->type->cname)
	{
		// init buffer to parse the column names
		gdl_fsheet_parse_header (fs);
	}
	
	do
	{
		(fs->nc)++;
	}
	while (gdl_fsheet_next_field (fs));
	
	if (!fs->type->cname && fs->type->rname)
	{
		// decrement since we have in fact parsed
		// the first row line including the row name.
		(fs->nc)--;
	}
	
	if (fs->type->cname)
	{
		gdl_fsheet_jump_header (fs);
	}
	// init buffer to parse the cells
	gdl_fsheet_parse_cell (fs);
	
	fs->nr = 0;
	do
	{
		fs->nr++;
	}
	while (gdl_fsheet_next_row (fs));
	
//	if (!fs->type->cname)
//	{
//		fs->nr++;
//	}	
}

