#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Skyline.pm,v 1.1 2002/07/30 17:44:27 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#

package Rcmd::Clustering;

use strict;
use Carp qw(croak);

use SubOpt;
use G::Messenger;

#:::::::::::::::::::::::::::::::::
#       Perldoc
#:::::::::::::::::::::::::::::::::


=head1 NAME

  Rcmd::Clustering - Interfaces to clustering algorithms of R language.

=head1 DESCRIPTION

    This class is a part of G-language Genome Analysis Environment, 
    collecting interfaces to clustering algorithms of R language.

=cut


#::::::::::::::::::::::::::::::
#    Let the code begin...
#::::::::::::::::::::::::::::::

sub set_clust_data{
    my $rcmd = shift;

    my @args = opt_get(@_);
    my $label = opt_val("label");

    my $flag = 0;
    foreach (@args){
	if ($flag == 0){
	    $flag ++;

	    $rcmd->array('rclust', @$_);
	}else{
	    $rcmd->array('tmp', @$_);
	    $rcmd->exec('rclust <- cbind(rclust, tmp)');
	}
    }

    if (length $label){
	$rcmd->sarray('label', @$label);
	$rcmd->exec('dimnames(rclust) <- list(label, NULL)');
    }
}

sub sample_data_for_clustering{
    my $rcmd = shift;

    $rcmd->exec(
		'rclust<-rbind(matrix(rnorm(100,sd=0.3),ncol=2),' .
		'matrix(rnorm(100,mean=1,sd=0.3),ncol=2))' 
		);
}


=head2 $rcmd->som()

  Name: $rcmd->som()   -   clustering using Self-Organizing Map

  Description:
    Clustering with Self-Organizing Map (SOM) using R language.
    Installation of GeneSOM library for R language is required.
        run R as a super user - sudo R - and type the following:
        install.packages('GeneSOM')) 
    
    Returns a two-dimensional array correspondingn to the 
    result$visual of som() in R's GeneSOM library.

  Usage:
    $rcmd = new Rcmd();
    @result = $rcmd->som(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @result = $rcmd->som(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label        labels or names of the data series.
   -xdim         x-dimension of the map (default: 3)
   -ydim         y-dimension of the map (default: 3)
   -filename     output filename of the graph (default: som.png)
   -output       output toggle option (default: show)
                 "g" to generate graph without displaying.
   -sampledata   use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070612-01 converted to Rcmd::Clustering
   20030904-01 initial posting (G::Tools::RCluster)

=cut


sub som{
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default(filename=>"som.png", output=>"show", xdim=>3, ydim=>3, sampledata=>0);
    my @args = opt_get(@_);
    my $xdim = opt_val("xdim");
    my $ydim = opt_val("ydim");
    my $filename= opt_val("filename");
    my $output = opt_val("output");
    my $sampledata = opt_val("sampledata");
    my $label = opt_val("label");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    my $message = $rcmd->exec(
			      'require(GeneSOM)'
			      );

    croak(
	"\n\nPackage GeneSOM not installed.\nrun\n\tinstall.packages(\'GeneSOM\')\n" . 
	"in R as root.\n\n"
	) if ($message =~ /FALSE/);

    $rcmd->exec(
		"rclust.som<-som(rclust, $xdim, $ydim)",
		'rclust.som$visual'
		);

    my @result;
    open(FILE, $rcmd->{log}) || ($!);
    while(<FILE>){
	if(/qerror/){
	    @result = ();
	    while(<FILE>){
		chomp;
		if(/^\d/){
		    my (undef, $x, $y, $qerror) = split(/\s+/, $_, 4);
		    push(@result, [$x, $y, $qerror]);
		}
	    }
	}
    }
    close(FILE);

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/som.ps")',
		    'plot(rclust.som)'
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/som.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}



=head2 $rcmd->hclust()

  Name: $rcmd->hclust()   -   hierarchical clustering analysis

  Description:
    Hierarchical clustering with using R language.
    Distance measure to use can be specified by -dist_method option.
    (euclidean:default, maximum, manhattan, canberra, binary)
    Clustering method can be specified by -hclust_method option. 
    (complete:default, ward, single, average, mcquitty, median, centroid)

    This method automatically runs dist() in R to calculate the dissimilarities
    of the given data and passes the result to hclust() of R.

    Returned value corresponds to result$merge of hclust() in R (n-1 by 2 matrix).
    Row i describes the merging of clusters at step i.
    
  Usage:
    $rcmd = new Rcmd();
    @merge = $rcmd->hclust(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @merge = $rcmd->hclust(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

 Options:
   -label           labels or names of the data series.
   -dist_method     distance measure (euclidean:default, maximum, manhattan, 
                    canberra, binary)
   -hclust_method   clustering method (complete:default, ward, single, average, 
                    mcquitty, median, centroid) 
   -filename        output filename of the graph (default: hclust.png)
   -output          output toggle option (default: show)
                    "g" to generate graph without displaying.
   -sampledata      use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070612-01 converted to Rcmd::Clustering
   20030904-01 initial posting (G::Tools::RCluster)

=cut

sub hclust{
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default("dist_method"=>"euclidean", "hclust_method"=>"com", 
		filename=>"hclust.png", output=>"show", sampledata=>0);
    my @args = opt_get(@_);
    my $dist_method = opt_val("dist_method");
    my $hclust_method = opt_val("hclust_method");
    my $filename= opt_val("filename");
    my $output = opt_val("output");
    my $label = opt_val("label");
    my $sampledata = opt_val("sampledata");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    $rcmd->exec(
		'require(mva)',
		"rclust.hclust<-hclust(dist(rclust, \"$dist_method\"), \"$hclust_method\")"
		);

    my @result = $rcmd->exec('rclust.hclust$merge');

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/rclust.ps")',
		    'plot(rclust.hclust,hang=-1)'
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/rclust.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }

    shift @result;

    $rcmd->set_mode();
    return @result;
}



=head2 $rcmd->kmeans()

  Name: $rcmd->kmeans()   -   clustering with K-means method

  Description:
    
    Clustering with K-means method with using R language.
    Number of cluster centers can be given by -centers option (default: 10)
    and number of iterations is given by -iter.max (default: 10).

    Returned value corresponds to result$cluster of kmeans() in R.
    (a vector of cluster numbers to which each point is allocated)
    
  Usage:
    $rcmd = new Rcmd();
    @cluster = $rcmd->kmeans(\@array1, \@array2, \@array3, ..., -label=>\@label);
      or
    @cluster = $rcmd->kmeans(-sampledata=>1);

    Arrays correspond to the columns (data series), and labels for each of
    these arrays can be given by -label option.

  Options:
   -label           labels or names of the data series.
   -center          number of cluster centers (default: 5)
   -iter.max        number of iterations (default: 10)
   -filename        output filename of the graph (default: hclust.png)
   -output          output toggle option (default: show)
                    "g" to generate graph without displaying.
   -sampledata      use sample data (default: 0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070612-01 converted to Rcmd::Clustering
   20030904-01 initial posting (G::Tools::RCluster)

=cut


sub kmeans{
    my $rcmd = shift;
    $rcmd->set_mode('tmp');

    opt_default("iter.max"=>10, filename=>"kmeans.png", output=>"show", sampledata=>0, centers=>5);
    my @args = opt_get(@_);
    my $centers = opt_val("centers");
    my $iter = opt_val("iter.max");
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $sampledata = opt_val("sampledata");
    my $label = opt_val("label");

    if($sampledata){
	$rcmd->sample_data_for_clustering();
    }else{
	$rcmd->set_clust_data(@args, -label=>$label);
    }

    $rcmd->exec(
		'require(mva)',
		"rclust.kmeans<-kmeans(rclust,$centers,$iter)"
		);

    my @result = $rcmd->exec('rclust.kmeans$cluster');

    if($output =~ /g/ || $output =~ /show/){
	$rcmd->exec(
		    'postscript("/tmp/kmeans.ps")',
		    'plot(rclust,col=rclust.kmeans$cluster)',
		    "points(rclust.kmeans\$centers, col=1:$centers,pch=8)"
		    );

	mkdir('graph', 0777);
	system("convert -rotate 90 /tmp/kmeans.ps graph/$filename");
	msg_gimv("graph/$filename") if ($output =~ /show/);
    }
    
    $rcmd->set_mode();
    return @result;
}



1;

