/* gcoord/gcoord.c
 * 
 * Copyright (C) 2008 Jean-Baptiste Veyrieras
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
 
#include <stdlib.h>
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_list.h> 
#include <gdl/gdl_hash.h>
#include <gdl/gdl_math.h> 
#include <gdl/gdl_genome_coord.h>

static gdl_chunk_coord **
_parse_chunk_coord (const gdl_string * file,
                    gdl_dictionary  * seqid_dico,
                    size_t * nchunk)
{
	
	size_t l,n;
	gdl_string * line = 0;
	gdl_list * buffer;
	gdl_chunk_coord * chunk;
	FILE * stream;
	
	stream = gdl_fileopen (file, "r");	
		
	buffer = gdl_list_alloc (gdl_list_default);
	l = 0;
	while(gdl_getline (&line, &n, stream)!=-1)
	{
		if (line[0] == '#')
		{
			gdl_string_free (line);line=0;
			continue;
		}
		l++;
		chunk = gdl_chunk_coord_sscanf (line, n, seqid_dico);
		gdl_list_push_back (buffer, chunk, 0);
		gdl_string_free (line);
		line = 0;
	}	
	
	gdl_chunk_coord ** chunks = 0;
	
	if ((*nchunk=gdl_list_size(buffer))>0)
	{
		size_t i=0;
		gdl_list_itr * itr = gdl_list_iterator_front (buffer);
		
		chunks = GDL_MALLOC (gdl_chunk_coord *, *nchunk);
		do
		{
			chunks[i++] = gdl_list_iterator_value (itr);
		}
		while(gdl_list_iterator_next (itr));
		
		gdl_list_iterator_free (itr);
	}
	
	gdl_list_free (buffer);
	gdl_fileclose (file, stream);
	
	return chunks;
}

static gdl_chunk_coord_cluster **
_create_chunk_clusters (gdl_chunk_coord ** chunks,
                        const size_t nchunk,
                        const gdl_dictionary * seqid_dico,
                        const gdl_string * which,
                        size_t * nclust)
{
	size_t i;
	gdl_chunk_coord_cluster * cluster;
	gdl_chunk_coord_cluster ** clusters;
	
	*nclust  = gdl_dictionary_size (seqid_dico, which);
	clusters = GDL_MALLOC (gdl_chunk_coord_cluster *, *nclust); 
	for(i = 0; i < *nclust; i++)
	{
		const gdl_string * seq_id = gdl_dictionary_get (seqid_dico, which, i);
		cluster     = gdl_chunk_coord_cluster_alloc (seq_id, which);
		clusters[i] = cluster;
	}
	for(i = 0; i < nchunk; i++)
	{
		gdl_chunk_coord * chunk = chunks[i];
		if (!strcmp(which,"ref"))
			gdl_chunk_coord_cluster_add (clusters[chunk->ref_id], chunk);
		else if (!strcmp(which,"qry"))
			gdl_chunk_coord_cluster_add (clusters[chunk->qry_id], chunk);
	}
	for(i = 0; i < *nclust; i++)
	{
		gdl_chunk_coord_cluster_sort (clusters[i]);
	}
	
	return clusters;
}

gdl_genome_coord *
gdl_genome_coord_alloc (const gdl_string * file)
{
	gdl_genome_coord * g;
	
	g = GDL_CALLOC (gdl_genome_coord, 1);
	
	g->seqid_dico = gdl_dictionary_alloc ();
	gdl_dictionary_add (g->seqid_dico, "ref");
	gdl_dictionary_add (g->seqid_dico, "qry");
	g->chunks = _parse_chunk_coord (file, g->seqid_dico, &g->nchunk);
	
	g->ref_chunks = _create_chunk_clusters (g->chunks, g->nchunk, g->seqid_dico, "ref", &(g->nref));
	g->qry_chunks = _create_chunk_clusters (g->chunks, g->nchunk, g->seqid_dico, "qry", &(g->nqry));
	
	return g;	
}

void
gdl_genome_coord_free (gdl_genome_coord * gc)
{
	if (gc)
	{
		size_t i;
		gdl_dictionary_free (gc->seqid_dico);
		for(i = 0; i < gc->nref; i++)
			gdl_chunk_coord_cluster_free (gc->ref_chunks[i], 0);
		for(i = 0; i < gc->nqry; i++)
			gdl_chunk_coord_cluster_free (gc->qry_chunks[i], 0);
		for(i = 0; i < gc->nchunk; i++)
			gdl_chunk_coord_free (gc->chunks[i]);
		GDL_FREE (gc);	
	}	
}

gdl_chunk_coord_cluster *
gdl_genome_coord_lookup (const gdl_genome_coord * gc, const gdl_string * which, const gdl_string * seq_id)
{
	int seq_idx = gdl_dictionary_lookup (gc->seqid_dico, which, seq_id);
	if (seq_idx==-1)
		return 0;
	gdl_chunk_coord_cluster * cluster = (!strcmp(which, "ref")) ? gc->ref_chunks[seq_idx] : gc->qry_chunks[seq_idx]; 
	cluster->seqid_dico = gc->seqid_dico;
	return cluster;
}

gdl_chunk_coord_cluster *
gdl_genome_coord_get (const gdl_genome_coord * gc, const gdl_string * which, const size_t seq_idx)
{
	size_t n = (!strcmp(which, "ref")) ? gc->nref : gc->nqry;
	if (seq_idx >= n)
		return 0;
	gdl_chunk_coord_cluster * cluster = (!strcmp(which, "ref")) ? gc->ref_chunks[seq_idx] : gc->qry_chunks[seq_idx]; 
	cluster->seqid_dico = gc->seqid_dico;
	return cluster;
}
