#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             Consensus.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Consensus.pm,v 1.12 2001/09/11 17:58:00 s98982km Exp $

package G::Seq::Consensus;

use SubOpt;
use G::Tools::Graph;
use G::Messenger;
use strict;
use Statistics::Descriptive;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     base_counter
	     _base_printer
	     base_entropy
	     base_information_content
	     base_relative_entropy
	     base_z_value
	     base_individual_information_matrix
	     consensus_z
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


sub _complement {
    my $nuc = reverse(shift);
    
    $nuc =~ tr
        [acgtuACGTU]
        [tgcaaTGCAA];
    return $nuc;
}


#base_counter ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program analyzes bases around start codon.
#(hash result)=&bun(pointer Genome,  string position,  int pattern_length,  int upstream_limit,  int downstream_limit,  boolean debug);
sub base_counter{
    &opt::default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $output=opt::val("output");
    my $filename=opt::val("filename");
    my @hash;
    my $num=1;
    my $start;
    my $i;
    my $nuc;
    my $cnuc;

    $PatLen--;
    $downstream++;

    while(defined(%{$gb->{"FEATURE$num"}})){
	if($gb->{"FEATURE$num"}->{type} eq 'CDS'){
	    if($gb->{"FEATURE$num"}->{direction} eq 'direct'){
		if($position eq "start"){
		    $start = $gb->{"FEATURE$num"}->{start};
		}
		elsif($position eq "end"){
		    $start = $gb->{"FEATURE$num"}->{end};
		}
		else{
		    &msg::error('Invalid parameter.Please enter "start" or "end".',"\n");
		}
#if(getseq($gb,$start-1,$start+1) eq 'atg'){
		for($i=$upstream;$i>-$downstream;$i--){
		    if($start-1-$i>0){
			$hash[$upstream-$i]{kazu}++;
			$nuc=$gb->getseq($start-1-$i,$start-1-$i+$PatLen);
			$hash[$upstream-$i]{$nuc}++;
			$hash[0]{pat}{$nuc}++;
		    }
		}
#}
	    }
	    elsif($gb->{"FEATURE$num"}->{direction} eq 'complement'){
		if($position eq "start"){
		    $start = $gb->{"FEATURE$num"}->{end};
		}
		elsif($position eq "end"){
		    $start = $gb->{"FEATURE$num"}->{start};
		}
		else{
		    &msg::error('Invalid parameter.Please enter "start" or "end".',"\n");
		}
#if(getseq($gb,$start-3,$start-1) eq 'cat'){
		for($i=$upstream;$i>-$downstream;$i--){
		    if($start-1+$i<length($gb->{SEQ})){
			$hash[$upstream-$i]{kazu}++;
			$nuc=$gb->getseq($start-1+$i-$PatLen,$start-1+$i);
			$cnuc=_complement($nuc);
			$hash[$upstream-$i]{$cnuc}++;
			$hash[0]{pat}{$cnuc}++;
		    }
		}
#}
	    }
	}
	$num++;
    }
    $hash[0]{upstream}=$upstream;
    $hash[0]{downstream}=$downstream;
    $hash[0]{PatLen}=$PatLen;
    $hash[0]{gbk}=$gb;
    $hash[0]{pos}=$position;

    if($output eq "f"){
	_base_printer(\@hash,-output=>"f",-filename=>$filename);
    }
    if($output eq "stdout"){
	_base_printer(\@hash,-output=>"stdout");
    }
    return \@hash;
}


#base_printer ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program prints result of base_counter.
#need &bun.
#&base_print(pointer Genome,  string position,  int pattern_length, int upstream_limit,  int downstream_limit,  boolean debug);
sub _base_printer{
    &opt::default(output=>"stdout",filename=>"consensus.csv");
    my @args=opt::get(@_);

    my $hash=shift @args;
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $PatLen=$$hash[0]{PatLen};
    my $upstream=$$hash[0]{upstream};
    my $downstream=$$hash[0]{downstream};
    my $gb=$$hash[0]{gbk};
    my $position=$$hash[0]{pos};
    my $i;
    my $j;


    if($printer eq "f"){
	open(FILE,">>$filename");
	print FILE "Pattern";
	for(my $i=$upstream;$i>-$downstream;$i--){
	    print FILE ",$i";
	}
	print FILE "\n";
	foreach(sort keys(%{$$hash[0]{pat}})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
		print FILE $_;
		for(my $i=$upstream;$i>-$downstream;$i--){
		    printf FILE ",%d",$$hash[$i]{$_};
		}
		print FILE "\n";
	    }
	}
	print FILE "\n\n";

	print FILE "Pattern";
	for(my $i=$upstream;$i>-$downstream;$i--){
	    print FILE ",$i";
	}
	print FILE "\n";
	foreach(sort keys(%{$$hash[0]{pat}})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
		print FILE $_;
		for(my $i=$upstream;$i>-$downstream;$i--){
		    printf FILE ",%03.3f",$$hash[$i]{$_}*100/$$hash[$i]{kazu};
		}
		print FILE "\n";
	    }
	}
	
	close(FILE);
    }

    if($printer eq "stdout"){
	&msg::send("CDS number:",$$hash[$upstream-1]{kazu},"\n");
	for($i=$upstream;$i>-$downstream;$i=$i-12){
	    for(my $c=0;$c<102;$c++){
		&msg::send("-");
	    }
	        
	    &msg::send("\npos\t");
	    for(my $c=0;$c<12 && $i-$c > -$downstream;$c++){
		&msg::send(sprintf("%6d\t",-($i-$c)));
	    }
	    &msg::send("\n");
	        
	    foreach(sort keys(%{$$hash[0]{pat}})){
		if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
		    &msg::send("\n $_|\t");
		    for($j=0;$j<12 && $i-$j>-$downstream;$j++){ 
			&msg::send(sprintf("%6d\t",$$hash[$upstream-$i+$j]{$_}));
		    }
		}
	    }
	    &msg::send("\n");
	        
	    foreach(sort keys(%{$$hash[0]{pat}})){
		if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
		    &msg::send(sprintf("\n $_|\t"));
		    for($j=0;$j<12 && $i-$j>-$downstream;$j++){        
			&msg::send(sprintf("%03.3f\t",$$hash[$upstream-$i+$j]{$_}*100/$$hash[$upstream-$i+$j]{kazu}));
		    }
		}
	    }
	    &msg::send("\n");
	}
    }
}


#logmod ver.20010714-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculate log2.
#(double result)=&logmod(double number,  boolean debug);
sub logmod{
    my $x=shift;

    if($x == 0){
	return 0;
    } 
    else{
	return log($x);
    }
}


#buns::ent in Atg7
#base_entropy ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculate entropy of base_counter.
#need &base_counter.
#(array result)=&base_entropy(hash base_counter_result,  boolean debug);
sub base_entropy{
    &opt::default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $hash;
    my %ratio;
    my @loc_ratio;
    my @loc_total;
    my @entropy;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;

    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;
    
    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
		if($ratio{$_} != 0){
		    $loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
		    $loc_ratio[$upstream-$i]{$_}=$loc_ratio[$upstream-$i]{$_}/$ratio{$_};
		    $loc_total[$upstream-$i]+=$loc_ratio[$upstream-$i]{$_};
		}
		else{
		    $entropy[$upstream-$i]=0;
		}
	    }
	}
	
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		$entropy[$upstream-$i]-=$loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i]
		    *&logmod($loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i])/&logmod(2.0)/length($_);
	    }
	}
	if($printer=~/f/){
	    printf FILE "%d,%.5f\n",-$i ,$entropy[$upstream-$i];
	}
	if($printer!~/[fn]/){
	    &msg::send(sprintf("%d\t%.5f\n",-$i ,$entropy[$upstream-$i]));
	}
	push(@y,-$i);
    }
    close(FILE);
    
    if($printer=~/g/ || $printer=~/show/){
	&G::Tools::Graph::_UniUniGrapher(\@y,\@entropy,-filename=>"base_entropy.gif",-x=>"position",-y=>"entropy",-title=>"entropy");
    }

    system('gimv','graph/base_entropy.gif') if($printer=~/show/);

    $tmp="entropy";
    @entropy=($position,$tmp,$upstream,$downstream-1,@entropy);
    return \@entropy;
}


#buns::rseq in Atg7
#base_informarion_content ver.20010707-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculate information content of base_counter.
#need &base_counter.
#(array result)=&base_information_content(hash base_counter_result,  boolean debug);
sub base_information_content{
    &opt::default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $hash;
    my @IC;
    my $hgenome;
    my %ratio;
    my @loc_ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;

    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	$hgenome=0;
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
		$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
		$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
	    }
	}
	
	$IC[$upstream-$i]=$hgenome - (4-1)/(2.0 * logmod(2.0) * $$hash[$upstream-$i]{kazu});

	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		$IC[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_};
	    }
	}
	if($printer=~/f/){
	    printf FILE "%d,%.5f\n",-$i ,$IC[$upstream-$i];
	}
	if($printer!~/[fn]/){
	    &msg::send(sprintf("%d\t%.5f\n",-$i ,$IC[$upstream-$i]));
	}
	push(@y,-$i);
    }

    close(FILE);

    if($printer=~/g/ || $printer=~/show/){
	&G::Tools::Graph::_UniUniGrapher(\@y,\@IC,-filename=>"base_information_content.gif",-x=>"position",-y=>"information_content",-title=>"information_content");
    }

    system('gimv','graph/base_information_content.gif') if($printer=~/show/);

    $tmp="IC";
    @IC=($position,$tmp,$upstream,$downstream-1,@IC);
    return \@IC;
}


#buns::ic in Atg7
#base_relative_entropy ver.20010714-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculate relative entropy of base_counter.
#need &base_counter.
#(array result)=&base_information_content(hash base_counter_result,  boolean debug);
sub base_relative_entropy{
    &opt::default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $hash;
    my @RE;
    my %ratio;
    my @loc_ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;

    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
		$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
		$RE[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_}/$ratio{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_};
	    }
	}
	if($printer=~/f/){
	    printf FILE "%d,%.5f\n",-$i ,$RE[$upstream-$i];
	}
	if($printer!~/[fn]/){
	    &msg::send(sprintf("%d\t%.5f\n",-$i ,$RE[$upstream-$i]));
	}
	push(@y,-$i);
    }
    close(FILE);

     if($printer=~/g/ || $printer=~/show/){
	&G::Tools::Graph::_UniUniGrapher(\@y,\@RE,-filename=>"base_relative_entropy.gif",-x=>"position",-y=>"relative_entropy",-title=>"relative_entropy");
    }

    system('gimv','graph/base_relative_entropy.gif') if($printer=~/show/);
   
    $tmp="RE";
    @RE=($position,$tmp,$upstream,$downstream-1,@RE);
    return \@RE;
}


#base_z_value ver.20010714-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculates Z value of base_counter.
#need &base_counter.
#(array result)=&base_z_value(hash base_counter_result,  int limit,  boolean debug);
sub base_z_value{
    &opt::default(limit=>5,output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $limit=opt::val("limit");
    my $hash;
    my @Z;
    my @Z_value;
    my @Z_tmp;
    my %ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @tmp;
    my @key;
    my $c;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;    

    if($printer eq "f"){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
		$Z_tmp[$upstream-$i]{$_}=($$hash[$upstream-$i]{$_}-$$hash[$upstream-$i]{kazu}*$ratio{$_})
		    /sqrt($$hash[$upstream-$i]{kazu}*$ratio{$_}*(1-$ratio{$_}));
	    }
	}

	foreach(keys(%{$Z_tmp[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		push(@Z_value,sprintf("%.5f",$Z_tmp[$upstream-$i]{$_})." ".$_);
	    }
	}

	$c=1;
	foreach(sort{$b <=> $a}@Z_value){
	    @tmp=split(/ +/,$_);
	    $Z[$upstream-$i]{$c}{$tmp[1]}=$tmp[0];
	    $c++;
	    last if($c>$limit);
	}

	if($i==0){
	    if($printer eq "f"){
		print FILE "position:0,";
	    }
	    if($printer eq "stdout"){
		&msg::send("position:0\n");
	    }
	}
	else{
	    if($printer eq "f"){
		print FILE "position:",-1*$i,",";
	    }
	    if($printer eq "stdout"){
		&msg::send("position:",-1*$i,"\n");
	    }
	}
	foreach(sort{$a <=> $b} keys(%{$Z[$upstream-$i]})){
	    if($printer eq "f"){
		print FILE $_,",",(keys(%{$Z[$upstream-$i]{$_}}))[0],",",
		$Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n";
	    }
	    if($printer eq "stdout"){
		&msg::send($_,"\t",(keys(%{$Z[$upstream-$i]{$_}}))[0],"\t",
		$Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n");
	    }
	}
	if($printer eq "f"){
	    print FILE "";
	}
	if($printer eq "stdout"){
	    &msg::send("\n");
	}
	@Z_value=();
    }
    return \@Z;
    close(FILE);
}


#base_individual_information_matrix ver.20010714-01
#scripting by Koya Mori(mory@g-language.org)
#This program calculates individual information matrix of base_counter.
#need &base_counter.
#(array result)=&base_individual_information_matrix(hash base_counter_result,  boolean debug);
sub base_individual_information_matrix{
    &opt::default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $position=opt::val("position");
    my $PatLen=opt::val("PatLength");
    my $upstream=opt::val("upstream");
    my $downstream=opt::val("downstream");
    my $printer=opt::val("output");
    my $filename=opt::val("filename");
    my $hash;
    my $hgenome;
    my $tmp;
    my $offset;
    my %seqnum;
    my %ratio;
    my @loc_ratio;
    my @IIM;
    my %q;

    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;    

    for(my $i=$upstream;$i>-$downstream;$i--){
	$hgenome=0;
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
		$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
		$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
		$loc_ratio[$upstream-$i]{$_}=1/($$hash[$upstream-$i]{kazu}+2) if($loc_ratio[$upstream-$i]{$_}==0);
	    }
	}
	
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		$IIM[$upstream-$i]{$_}=$hgenome-(4-1)/(2.0*logmod(2.0)*$$hash[$upstream-$i]{kazu})+
		    logmod($loc_ratio[$upstream-$i]{$_})/logmod(2.0);
	    }
	} 
    }

    if($printer eq "f"){
	open(FILE,">>$filename");
	print FILE "Pattern";
	for(my $i=$upstream;$i>-$downstream;$i--){
	    print FILE ",$i";
	}
	print FILE "\n";
	foreach(sort keys(%{$$hash[0]{pat}})){
	    print FILE $_;
	    for(my $i=$upstream;$i>-$downstream;$i--){
		print FILE sprintf(",%.3f",$IIM[$i]{$_});
	    }
	    print FILE "\n";
	}
	close(FILE);
    }
    if($printer eq "stdout"){
	for(my $i=$upstream;$i>-$downstream;$i){
	    for(my $c=0;$c<102;$c++){
		&msg::send("-");
	    }
	    &msg::send("\npos");
	    for(my $c=0;$c<10;$c++){
		&msg::send("\t",-$i+$q{$_}*10);
		$i--;
		last if($i<=-$downstream);    
	    }
	    &msg::send("\n\n");
	    foreach(sort keys(%{$$hash[0]{pat}})){
		&msg::send($_,"|");
		for(my $c=0;$c<10;$c++){
		    &msg::send("\t",sprintf("%.3f",$IIM[$c+$q{$_}*10]{$_}));
		    last if($c+$q{$_}*10>=$downstream+$upstream-1);
		}
		&msg::send("\n");
		$q{$_}++;
	    }
	}
    }
    return \@IIM;
}


# consensus_z ver.20010906-01
# Author: Kazuharu "Gaou" Arakawa (gaou@g-language.org)
# (string consensus) = &consensus_z(pointer array_of_sequences);
#
# pointer array_of_sequences is a reference to an array of sequences.
# The sequences must be of same lengths.
# eg. my @seq = ("atgc","ctaa","tttt","cttg"); &consensus_z(\@seq);
#     returns 'cTt-'
#
# Options:
#     -high      z value greater than which is significant (default: 1)
#     -low       z value less than which is insignificant (default: 0.2)
#     -filename  outfile name (default: consensus_z.gif for -output=>"g",
#                                       consensus_z.csv for -output=>"f")
#     -output    "g" for graph, "f" for file, "show" for display
#
# Consensus with Z value greater than int high_z (default is 1) is 
# capitalized, and consensus with Z value less than int low_z 
# (default is 0.2) is shown as '-'.
# This subroutine requires 
# use Statistics::Descriptive;
# at the beginning of the library.
# History:
#   20010713-01 initial posting
#   20010906-01 update with options

sub consensus_z{
    &opt::default(high=>1, low=>0.2, output=>"show", 
		  filename=>"consensus_z.gif");
    my @args = opt::get(@_);
    my $ref = shift @args;
    my $high_z = opt::val("high");
    my $low_z = opt::val("low");
    my $filename = opt::val("filename");
    $filename = 'consensus_z.csv' if (opt::val("output") eq "f");
    my $output = opt::val("output");
    my @inseq = @$ref;
    my ($i,$tmp,$outseq,@seq,@array,%nuc_table, @out, @pos);
    my $length = length($inseq[0]);
    my $rows = @inseq;
    my $nuc_max = 0;

    foreach $tmp (@inseq){
	for ($i = 0; $i < $length; $i++){
	    $seq[$i]{substr($tmp, $i,1)} += 1/$rows;
	    $nuc_table{substr($tmp, $i,1)} ++;
	    push (@array, $seq[$i]{substr($tmp,$i,1)});
	}
    }

    foreach $tmp (keys %nuc_table){$nuc_max ++;}
    
    for ($i = 0; $i < $length; $i++){
	my $max = 0.0;
	my $max_index;
	my $nuc = '';

	foreach $nuc (keys %{$seq[$i]}){
	    if ($seq[$i]{$nuc} > $max){
		$max_index = lc($nuc);
		$max = $seq[$i]{$nuc};
	    }
	}

	my $stat = Statistics::Descriptive::Full->new();
	$stat->add_data(@array);

	my $z = ($max - $stat->mean())/$stat->standard_deviation();
	$max_index = '-' if ($z <= $low_z);
	$max_index = uc($max_index) if ($z >= $high_z);
	$outseq = $outseq . $max_index;
	push (@out, $z);
	push (@pos, $i);
    }

    if ($output eq 'g' || $output eq 'show'){
	&G::Tools::Graph::_UniUniGrapher(
					 \@pos, \@out,-x=>"position", 
					 -y=>"Z value",
					 -title=>"Consensus by Z value", 
					 -filename=>$filename
					 );

	system("gimv graph/$filename &") if ($output eq "show");
    }elsif ($output eq 'f'){
	$, = ',';
	mkdir ('data', 0777);
	open(OUT, '>data/' . $filename);
	print OUT @out, "\n";
	close(OUT);
	$, = '';
    }
    return $outseq;
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Consensus - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Consensus;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Consensus was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
