/* gcoord/cluster.c
 * 
 * Copyright (C) 2008 Jean-Baptiste Veyrieras
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
 
#include <stdlib.h>
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_io.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_list.h> 
#include <gdl/gdl_hash.h>
#include <gdl/gdl_math.h> 
#include <gdl/gdl_sort_double.h> 
#include <gdl/gdl_genome_coord.h>

gdl_chunk_coord_cluster *
gdl_chunk_coord_cluster_alloc (const gdl_string * seq_id, const gdl_string * which)
{
	gdl_chunk_coord_cluster * c;
	
	c = GDL_CALLOC (gdl_chunk_coord_cluster, 1);
	
	if (!strcmp(which, "ref"))
		c->is_ref = gdl_true;
	
	return c;
}

void
gdl_chunk_coord_cluster_free (gdl_chunk_coord_cluster * c, void (*cfree)(gdl_chunk_coord * c))
{
	if (c)
	{
		if (cfree)
		{
			size_t i;
			for(i = 0; i < c->nchunk; c++)
				(*cfree)(c->chunks[i]);
		}
		GDL_FREE (c->chunks);
		GDL_FREE (c); 
	}	
}

int
gdl_chunk_coord_cluster_add (gdl_chunk_coord_cluster * cluster, gdl_chunk_coord * chunk)
{
	if (cluster->nchunk)
	{
		gdl_chunk_coord ** tmp = GDL_MALLOC (gdl_chunk_coord *, cluster->nchunk+1);
		memcpy (tmp, cluster->chunks, sizeof(gdl_chunk_coord *)*cluster->nchunk);
		GDL_FREE (cluster->chunks);
		cluster->chunks=tmp;
	}
	else
	{
		cluster->chunks = GDL_MALLOC (gdl_chunk_coord *, 1);
	}
	cluster->chunks[cluster->nchunk] = chunk;
	(cluster->nchunk)++;
	return cluster->nchunk-1;
}

void
gdl_chunk_coord_cluster_sort (gdl_chunk_coord_cluster * cluster)
{
	const size_t N = cluster->nchunk;
	size_t i;
	double s, * x   = GDL_MALLOC (double, N);
	size_t * idx = GDL_MALLOC (size_t, N);
	gdl_chunk_coord ** tmp = GDL_MALLOC (gdl_chunk_coord *, N);
	
	for(i = 0; i < N; i++)
	{
		if (cluster->is_ref)
		{
			if (gdl_chunk_coord_is_deletion (cluster->chunks[i])) s=0.25;
			else s=0;
			x[i] = 0.5*(cluster->chunks[i]->ref_start+cluster->chunks[i]->ref_end)+s;
		}
		else
		{
			if (gdl_chunk_coord_is_insertion (cluster->chunks[i])) s=0.25*cluster->chunks[i]->qry_strand;
			else s=0;
			x[i] = 0.5*(cluster->chunks[i]->qry_start+cluster->chunks[i]->qry_end)+s;
		} 	
	}
	
	gdl_sort_index (idx, x, 1, N);
	
	for(i = 0; i < N; i++)
		tmp[i] = cluster->chunks[idx[i]];
	
	GDL_FREE (cluster->chunks);
	
	cluster->chunks = tmp;
	
	if (cluster->is_ref)
	{
		cluster->start = tmp[0]->ref_start;
		cluster->end   = tmp[N-1]->ref_end;
	}
	else 
	{
		cluster->start = tmp[0]->qry_start;
		cluster->end   = tmp[N-1]->qry_end;
	} 
	
	GDL_FREE (x);
	GDL_FREE (idx);
}

int
gdl_chunk_coord_cluster_goto (const gdl_chunk_coord_cluster * cluster, const long position, const size_t start_idx)
{
	if (position < cluster->start || position > cluster->end)
		return -1;
	if (cluster->is_ref)
	{
		size_t i;
		for(i = start_idx; i < cluster->nchunk; i++)
		{
			if (position >= cluster->chunks[i]->ref_start && position <= cluster->chunks[i]->ref_end)
			{
				if (cluster->chunks[i]->type == 'd')
					 return (i >= 1) ? i-1 : i;
				else
					return i;
			}
		}
		return -1;  
	}
	else
	{
		size_t i;
		for(i = start_idx; i < cluster->nchunk; i++)
		{
			if (position >= cluster->chunks[i]->qry_start && position <= cluster->chunks[i]->qry_end)
			{
				if (cluster->chunks[i]->type == 'i')
				{
					if (cluster->chunks[i]->qry_strand==1)
						return (i >= 1) ? i-1 : i;
					else
						return (i < cluster->nchunk-1) ? i+1 : i;
				}
				else
					return i;	
			}	
		}
		return -1;
	}
}

int
gdl_chunk_coord_cluster_fprintf (FILE * stream, const gdl_chunk_coord_cluster * c, gdl_dictionary * dico)
{
	size_t i;
	for(i = 0; i < c->nchunk; i++)
		gdl_chunk_coord_fprintf (stream, c->chunks[i], dico);	
}

gdl_boolean
gdl_chunk_coord_cluster_has_inversion (const gdl_chunk_coord_cluster * cluster, const size_t from, const size_t to)
{
	size_t i;
	for (i = from; i <= to; i++)
	{
		if (gdl_chunk_coord_is_inversion (cluster->chunks[i]))
			return gdl_true;
	}
	return gdl_false;
}

gdl_boolean
gdl_chunk_coord_cluster_has_translocation (const gdl_chunk_coord_cluster * cluster, const size_t from, const size_t to)
{
	size_t i;
	for (i = from; i <= to; i++)
	{
		if (gdl_chunk_coord_is_translocation (cluster->chunks[i]))
			return gdl_true;
	}
	return gdl_false;
}

gdl_boolean
gdl_chunk_coord_cluster_has_rearrangement (const gdl_chunk_coord_cluster * cluster, const size_t from, const size_t to)
{
	size_t i;
	for (i = from; i <= to; i++)
	{
		if (gdl_chunk_coord_is_inversion (cluster->chunks[i]))
			return gdl_true;
		else if (gdl_chunk_coord_is_translocation (cluster->chunks[i]))
			return gdl_true;
	}
	return gdl_false;
}

gdl_boolean
gdl_chunk_coord_cluster_is_inversion (const gdl_chunk_coord_cluster * cluster, const size_t from, const size_t to)
{
	size_t i;
	for (i = from; i <= to; i++)
	{
		if (!gdl_chunk_coord_is_inversion (cluster->chunks[i]))
			return gdl_false;
	}
	return gdl_true;
}

gdl_boolean
gdl_chunk_coord_cluster_is_translocation (const gdl_chunk_coord_cluster * cluster, const size_t from, const size_t to)
{
	size_t i;
	for (i = from; i <= to; i++)
	{
		if (!gdl_chunk_coord_is_translocation (cluster->chunks[i]))
			return gdl_false;
	}
	return gdl_true;
}

void
gdl_chunk_coord_cluster_get_info (gdl_chunk_coord_cluster * chunk_cluster, const size_t from, const size_t to, gdl_genome_coord_info * info)
{
	size_t i;
	gdl_chunk_coord * chunk;
	
	info->qry_strand = chunk_cluster->chunks[from]->qry_strand;
	info->ref_seq_id = gdl_dictionary_get (chunk_cluster->seqid_dico, "ref", chunk_cluster->chunks[from]->ref_id);
	info->qry_seq_id = gdl_dictionary_get (chunk_cluster->seqid_dico, "qry", chunk_cluster->chunks[from]->qry_id);
	
	if (info->qry_strand==1)
	{
		info->ref_start = chunk_cluster->chunks[from]->ref_start;
		info->ref_end   = chunk_cluster->chunks[to]->ref_end;
		info->qry_start = chunk_cluster->chunks[from]->qry_start;
		info->qry_end   = chunk_cluster->chunks[to]->qry_end;
	}
	else if (chunk_cluster->is_ref)
	{
		info->ref_start = chunk_cluster->chunks[from]->ref_start;
		info->ref_end   = chunk_cluster->chunks[to]->ref_end;
		info->qry_start = chunk_cluster->chunks[to]->qry_start;
		info->qry_end   = chunk_cluster->chunks[from]->qry_end;
	}
	else
	{
		info->ref_start = chunk_cluster->chunks[to]->ref_start;
		info->ref_end   = chunk_cluster->chunks[from]->ref_end;
		info->qry_start = chunk_cluster->chunks[from]->qry_start;
		info->qry_end   = chunk_cluster->chunks[to]->qry_end;	
	}
}

long
gdl_chunk_coord_cluster_map (const gdl_chunk_coord_cluster * cluster, long position, int start_chunk)
{
	if (start_chunk == -1)
		start_chunk = gdl_chunk_coord_cluster_goto (cluster, position, 0);
	if (start_chunk == -1)
		return -1;
	
	if (cluster->is_ref)
	{
		if (gdl_chunk_coord_is_deletion (cluster->chunks[start_chunk])
			|| gdl_chunk_coord_is_insertion (cluster->chunks[start_chunk]))
		{	
			return cluster->chunks[start_chunk]->qry_start;
		}
		long delta = position-cluster->chunks[start_chunk]->ref_start;
		if (cluster->chunks[start_chunk]->qry_strand == 1)
		{
			return cluster->chunks[start_chunk]->qry_start+delta;
		}
		else
		{
			return cluster->chunks[start_chunk]->qry_end-delta;	
		}
	}
	else
	{	
		if (gdl_chunk_coord_is_insertion (cluster->chunks[start_chunk])
		    || gdl_chunk_coord_is_deletion (cluster->chunks[start_chunk]))
		{    
			return cluster->chunks[start_chunk]->ref_start;
		}
		if (cluster->chunks[start_chunk]->qry_strand==1)
		{
			long delta = position-cluster->chunks[start_chunk]->qry_start;
			return cluster->chunks[start_chunk]->ref_start+delta;
		}
		else
		{
			long delta = cluster->chunks[start_chunk]->qry_end-position;
			return cluster->chunks[start_chunk]->ref_start+delta;
		}
	}
}

void 
gdl_chunk_coord_cluster_indel_size (const gdl_chunk_coord_cluster * chunk_cluster,
				                      	const size_t chunk_start,
				                      	const size_t chunk_end,
				                      	const long region_start,
				              			 	const long region_end,
				                      	long * del_size,
				                      	long * ins_size)
{                      
	size_t c;
	
	if (chunk_start == -1 || chunk_end == -1)
		return;
	
	if ((chunk_start == chunk_end) 
	    && (!gdl_chunk_coord_is_deletion (chunk_cluster->chunks[chunk_start]) 
	        && !gdl_chunk_coord_is_insertion (chunk_cluster->chunks[chunk_start])))
	{        
	 	return;
	}
	if (chunk_cluster->is_ref)
	{
		for(c = chunk_start; c <= chunk_end; c++)
		{
			gdl_chunk_coord * chunk = chunk_cluster->chunks[c];
			if (gdl_chunk_coord_is_deletion (chunk))
			{
				if (chunk->ref_start >= region_start && chunk->ref_start <= region_end)
			      *del_size += chunk->qry_end-chunk->qry_start+1;
			}
			else if (gdl_chunk_coord_is_insertion (chunk))
			{
				if (chunk->ref_start >= region_start && chunk->ref_start <= region_end)	
		 	   {
		 	   	long outside = (chunk->ref_end > region_end) ? chunk->ref_end-region_end : 0;
		 	  	   *ins_size += chunk->ref_end-chunk->ref_start+1-outside;
		 	   }
			}   
		}
	}
	else
	{
		for(c = chunk_start; c <= chunk_end; c++)
		{
			gdl_chunk_coord * chunk = chunk_cluster->chunks[c];
			if (gdl_chunk_coord_is_insertion (chunk))
			{
				if (chunk->qry_start >= region_start && chunk->qry_start <= region_end)
			      *del_size += chunk->ref_end-chunk->ref_start+1;
			}
			else if (gdl_chunk_coord_is_deletion (chunk))
			{
				if (chunk->qry_start >= region_start && chunk->qry_start <= region_end)	
		 	   {
		 	  	   long outside = (chunk->qry_end > region_end) ? chunk->qry_end-region_end : 0;
		 	  	   *ins_size += chunk->qry_end-chunk->qry_start+1-outside;
		 	   }
			}   
		}
	}  
}

/**
 * The returned feature has just its positions 
 * intialized
 */
gdl_gfeatures_gff3_record *
gdl_chunk_coord_cluster_map_gff3_record (const gdl_chunk_coord_cluster * cluster, const gdl_gfeatures_gff3_record * rec, int chunk_start, int chunk_end)
{
	if (chunk_start == -1)
		chunk_start = gdl_chunk_coord_cluster_goto (cluster, rec->start, 0);
	if (chunk_start == -1) return 0;
	if (chunk_end == -1)
		chunk_end = gdl_chunk_coord_cluster_goto (cluster, rec->end, chunk_start);
	if (chunk_end == -1) return 0;
	
	size_t c;
	gdl_gfeatures_gff3_record * map_rec = gdl_gfeatures_gff3_record_alloc ();
	
	map_rec->start = gdl_chunk_coord_cluster_map (cluster, rec->start, chunk_start);
	map_rec->end   = gdl_chunk_coord_cluster_map (cluster, rec->end, chunk_end);
	
	return map_rec;
}
