/*  
 * 	clust/clust.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:33:46 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */
 
#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_sort_uint.h>
#include <gdl/gdl_clustering.h>

struct _gdl_clustering
{
	size_t ne;   // tot number of entity
	size_t nc;   // number of cluster
	size_t * nbc;  // number of entity per cluster
	size_t ** clusters;    // the entity clusters
	size_t * ridx; // idx of the representing entities
	size_t * cidx;
};

static gdl_clustering *
gdl_clustering_alloc (size_t size)
{
	gdl_clustering * c = GDL_MALLOC (gdl_clustering, 1);
	c->ne = size;
	c->nc = 0;
	c->cidx = GDL_CALLOC (size_t, size);
	c->ridx = GDL_CALLOC (size_t, size);
	c->nbc  = GDL_CALLOC (size_t, size);
	c->clusters = GDL_CALLOC (size_t *, size);
	return c;
}

static void
_new_cluster (gdl_clustering * c,
              gdl_clustering_workspace * w,
              size_t * idx,
              size_t i,
              size_t j)
{
	int l;
	size_t ll;
	
	c->clusters[c->cidx[j-1]] = GDL_MALLOC (size_t, c->nbc[c->cidx[j-1]]);
	for (l = i - 1; l >= 0 && w->cidx[idx[l]] == j; l--)
	{
		ll = l;
		c->clusters[c->cidx[j-1]][i-1-ll] = idx[ll];
		c->cidx[idx[ll]]                  = c->cidx[j-1];
	}
}              

gdl_clustering *
gdl_clustering_workspace_done (gdl_clustering_workspace * w)
{
	if (w == 0)
		return NULL;
	else
	{
		size_t i, j, k;
		size_t * idx;
		gdl_clustering * c;
	
		c = gdl_clustering_alloc (w->size);
		
		idx = GDL_CALLOC (size_t, w->size);
		
		gdl_sort_uint_index (idx, w->cidx, 1, w->size);
		
		for (j = i = 0; i < w->size; i++)
		{
			k = w->cidx[idx[i]];
			if (j && k != j)
			{
				_new_cluster (c, w, idx, i, j);
			}
			j = k;
			if (!j)
			{
				// singleton
				c->ridx[c->nc]      = idx[i];
				c->cidx[idx[i]]     = c->nc;
				c->nbc[c->nc]       = 1;
				c->clusters[c->nc]  = NULL;
				c->nc++;
			}
			else
			{
				// increment cluster size
				c->nbc[c->cidx[j-1]]++;
			}			
		}
		if (j)
		{
			_new_cluster (c, w, idx, i, j);
		}
		
		if (c->nc < c->ne)
		{
			// re-alloc memory
			size_t * ridx, * nbc, ** clusters;
			ridx = GDL_MALLOC (size_t, c->nc);
			nbc  = GDL_MALLOC (size_t, c->nc);
			clusters = GDL_MALLOC (size_t *, c->nc);
			for (i = 0; i < c->nc; i++)
			{
				ridx[i] = c->ridx[i];
				nbc[i]  = c->nbc[i];
				clusters[i] = c->clusters[i];
			}
			GDL_FREE (c->ridx)
			GDL_FREE (c->nbc)
			GDL_FREE (c->clusters)
			c->ridx = ridx;
			c->nbc  = nbc;
			c->clusters = clusters;
		}
		
		return c;
	}
}

gdl_clustering *
gdl_clustering_clone (const gdl_clustering * c)
{
	if (c)
	{
		size_t i;
		gdl_clustering * n;
		
		n = GDL_MALLOC (gdl_clustering, 1);
		
		n->ne = c->ne;
		n->nc = c->nc;
		
		n->ridx = GDL_MALLOC (size_t, c->nc);
		n->nbc  = GDL_MALLOC (size_t, c->nc);
		n->clusters = GDL_MALLOC (size_t *, c->nc);
		n->cidx = GDL_MALLOC (size_t, c->ne);
		
		memcpy (n->cidx, c->cidx, sizeof(size_t)*c->ne);
		memcpy (n->ridx, c->ridx, sizeof(size_t)*c->nc);
		memcpy (n->nbc, c->nbc, sizeof(size_t)*c->nc);
				
		for (i = 0; i < c->nc; i++)
		{
			if (c->clusters[i])
			{
				n->clusters[i] = GDL_MALLOC (size_t, c->nbc[i]);
				memcpy (n->clusters[i], c->clusters[i], sizeof(size_t)*c->nbc[i]);
			}
			else
			{
				n->clusters[i] = NULL;
			}			
		}
		
		return n;
	}
	return NULL;	
}

void
gdl_clustering_free (gdl_clustering * c)
{
	if (c)
	{
		size_t i;
		GDL_FREE (c->ridx);
		GDL_FREE (c->cidx);
		GDL_FREE (c->nbc);
		for (i = 0; i < c->nc; i++)
		{
			GDL_FREE (c->clusters[i]);
		}
		GDL_FREE (c->clusters);
		GDL_FREE (c);
	}
}

size_t
gdl_clustering_size (const gdl_clustering * c)
{
	return c->ne;
}

size_t
gdl_clustering_cluster (const gdl_clustering * c, size_t i)
{
	return c->cidx[i];
}

size_t
gdl_clustering_nclust (const gdl_clustering * c)
{
	return c->nc;
}

size_t
gdl_clustering_clust_idx (const gdl_clustering * c, size_t i)
{
	return c->ridx[i];
}

size_t
gdl_clustering_clust_size (const gdl_clustering * c, size_t i)
{
	return c->nbc[i];
}

size_t *
gdl_clustering_clust_members (const gdl_clustering * c, size_t i)
{
	return c->clusters[i];
}

#include "fread.c"
