#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             CAI.pm legend
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: CAI.pm,v 1.5 2001/09/08 08:05:12 s98982km Exp $

package G::Seq::CAI;

use SubOpt;
use G::Messenger;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     w_value
	     cai
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


# w_value ver.20010830-01
# Author: Kazuharu Gaou Arakawa
# Usage: (hash w_value) = &w_value(pointer G instance); 
# Options:
#  none.
# Description:
#  Calculates the W value necessary for CAI analysis.
#  Returned value is a hash of W values.
# Requirements:
#  none.

sub w_value {
    &opt::default(output=>"stdout",filename=>"w_value.csv");
    my @args=opt::get(@_);
    
    my $gb = shift @args;
    my $filename=opt::val("filename");
    my %codon;
    my %w_val;

    foreach my $feature ($gb->feature()){
	next if ($gb->{$feature}->{type} ne 'CDS');
	next if ($gb->{$feature}->{product} !~ 
		 /[Rr]ibosom.*[Pp]rotein/);
	next if ($gb->{$feature}->{product} =~ 
		 /[Mm]itochondrial/);
	msg::send("Found Optimal Gene:\n");
	msg::send("   gene: ", $gb->{$feature}->{gene}, "\n");
	msg::send("   product: ", $gb->{$feature}->{product}, "\n\n");
	my $seq = $gb->get_geneseq($feature);
	my $i;
	for ($i = 0; $i<= length($seq) - 1 - 2; $i += 3){
	    $codon{substr($seq, $i, 3)} ++;
	}
    }
    my $optimal;
    #D
    $optimal = (sort {$b <=> $a} ($codon{gac}, $codon{gat}))[0];
    $w_val{gac} = sprintf "%.3f", $codon{gac} / $optimal;
    $w_val{gat} = sprintf "%.3f", $codon{gat} / $optimal;
    #E
    $optimal = (sort {$b <=> $a} ($codon{gaa}, $codon{gag}))[0];
    $w_val{gaa} = sprintf "%.3f", $codon{gaa} / $optimal;
    $w_val{gag} = sprintf "%.3f", $codon{gag} / $optimal;
    #R
    $optimal = (sort {$b <=> $a} ($codon{aga}, $codon{agg}, $codon{cga},
				  $codon{cgc}, $codon{cgg}, $codon{cgt}))[0];
    $w_val{aga} = sprintf "%.3f", $codon{aga} / $optimal;
    $w_val{agg} = sprintf "%.3f", $codon{agg} / $optimal;
    $w_val{cga} = sprintf "%.3f", $codon{cga} / $optimal;
    $w_val{cgc} = sprintf "%.3f", $codon{cgc} / $optimal;
    $w_val{cgg} = sprintf "%.3f", $codon{cgg} / $optimal;
    $w_val{cgt} = sprintf "%.3f", $codon{cgt} / $optimal;
    #K
    $optimal = (sort {$b <=> $a} ($codon{aaa}, $codon{aag}))[0];
    $w_val{aaa} = sprintf "%.3f", $codon{aaa} / $optimal;
    $w_val{aag} = sprintf "%.3f", $codon{aag} / $optimal;
    #H
    $optimal = (sort {$b <=> $a} ($codon{cac}, $codon{cat}))[0];
    $w_val{cac} = sprintf "%.3f", $codon{cac} / $optimal;
    $w_val{cat} = sprintf "%.3f", $codon{cat} / $optimal;
    #N
    $optimal = (sort {$b <=> $a} ($codon{aac}, $codon{aat}))[0];
    $w_val{aac} = sprintf "%.3f", $codon{aac} / $optimal;
    $w_val{aat} = sprintf "%.3f", $codon{aat} / $optimal;
    #Q
    $optimal = (sort {$b <=> $a} ($codon{caa}, $codon{cag}))[0];
    $w_val{caa} = sprintf "%.3f", $codon{caa} / $optimal;
    $w_val{cag} = sprintf "%.3f", $codon{cag} / $optimal;
    #S
    $optimal = (sort {$b <=> $a} ($codon{agc}, $codon{agt}, $codon{tca},
				  $codon{tcc}, $codon{tcg}, $codon{tct}))[0];
    $w_val{agc} = sprintf "%.3f", $codon{agc} / $optimal;
    $w_val{agt} = sprintf "%.3f", $codon{agt} / $optimal;
    $w_val{tca} = sprintf "%.3f", $codon{tca} / $optimal;
    $w_val{tcc} = sprintf "%.3f", $codon{tcc} / $optimal;
    $w_val{tcg} = sprintf "%.3f", $codon{tcg} / $optimal;
    $w_val{tct} = sprintf "%.3f", $codon{tct} / $optimal;
    #T
    $optimal = (sort {$b <=> $a} ($codon{aca}, $codon{acc}, 
				  $codon{acg}, $codon{act}))[0];
    $w_val{aca} = sprintf "%.3f", $codon{aca} / $optimal;
    $w_val{act} = sprintf "%.3f", $codon{act} / $optimal;
    $w_val{acg} = sprintf "%.3f", $codon{acg} / $optimal;
    $w_val{acc} = sprintf "%.3f", $codon{acc} / $optimal;
    #Y
    $optimal = (sort {$b <=> $a} ($codon{tac}, $codon{tat}))[0];
    $w_val{tac} = sprintf "%.3f", $codon{tac} / $optimal;
    $w_val{tat} = sprintf "%.3f", $codon{tat} / $optimal;
    #F
    $optimal = (sort {$b <=> $a} ($codon{ttc}, $codon{ttt}))[0];
    $w_val{ttc} = sprintf "%.3f", $codon{ttc} / $optimal;
    $w_val{ttt} = sprintf "%.3f", $codon{ttt} / $optimal;
    #A
    $optimal = (sort {$b <=> $a} ($codon{gca}, $codon{gct}, 
				  $codon{gcg}, $codon{gcc}))[0];
    $w_val{gca} = sprintf "%.3f", $codon{gca} / $optimal;
    $w_val{gct} = sprintf "%.3f", $codon{gct} / $optimal;
    $w_val{gcg} = sprintf "%.3f", $codon{gcg} / $optimal;
    $w_val{gcc} = sprintf "%.3f", $codon{gcc} / $optimal;
    #G
    $optimal = (sort {$b <=> $a} ($codon{gga}, $codon{ggc}, 
				  $codon{ggg}, $codon{ggt}))[0];
    $w_val{gga} = sprintf "%.3f", $codon{gga} / $optimal;
    $w_val{ggt} = sprintf "%.3f", $codon{ggt} / $optimal;
    $w_val{ggg} = sprintf "%.3f", $codon{ggg} / $optimal;
    $w_val{ggc} = sprintf "%.3f", $codon{ggc} / $optimal;
    #V
    $optimal = (sort {$b <=> $a} ($codon{gta}, $codon{gtc}, 
				  $codon{gtg}, $codon{gtt}))[0];
    $w_val{gta} = sprintf "%.3f", $codon{gta} / $optimal;
    $w_val{gtt} = sprintf "%.3f", $codon{gtt} / $optimal;
    $w_val{gtg} = sprintf "%.3f", $codon{gtg} / $optimal;
    $w_val{gtc} = sprintf "%.3f", $codon{gtc} / $optimal;
    #L
    $optimal = (sort {$b <=> $a} ($codon{tta}, $codon{ttg}, $codon{cta}, 
				  $codon{ctc}, $codon{ctg}, $codon{ctt}))[0];
    $w_val{tta} = sprintf "%.3f", $codon{tta} / $optimal;
    $w_val{ttg} = sprintf "%.3f", $codon{ttg} / $optimal;
    $w_val{cta} = sprintf "%.3f", $codon{cta} / $optimal;
    $w_val{ctc} = sprintf "%.3f", $codon{ctc} / $optimal;
    $w_val{ctg} = sprintf "%.3f", $codon{ctg} / $optimal;
    $w_val{ctt} = sprintf "%.3f", $codon{ctt} / $optimal;
    #I
    $optimal = (sort {$b <=> $a} ($codon{ata}, $codon{atc}, 
				  $codon{att}))[0];
    $w_val{ata} = sprintf "%.3f", $codon{ata} / $optimal;
    $w_val{att} = sprintf "%.3f", $codon{att} / $optimal;
    $w_val{atc} = sprintf "%.3f", $codon{atc} / $optimal;
    #P
    $optimal = (sort {$b <=> $a} ($codon{cca}, $codon{ccc}, 
				  $codon{ccg}, $codon{cct}))[0];
    $w_val{cca} = sprintf "%.3f", $codon{cca} / $optimal;
    $w_val{cct} = sprintf "%.3f", $codon{cct} / $optimal;
    $w_val{ccg} = sprintf "%.3f", $codon{ccg} / $optimal;
    $w_val{ccc} = sprintf "%.3f", $codon{ccc} / $optimal;
    #C
    $optimal = (sort {$b <=> $a} ($codon{tgc}, $codon{tgt}))[0];
    $w_val{tgc} = sprintf "%.3f", $codon{tgc} / $optimal;
    $w_val{tgt} = sprintf "%.3f", $codon{tgt} / $optimal;

    msg::send("W Values:\n");
    foreach my $key (sort keys %w_val){
	if(&opt::val("output") eq "stdout"){
	    msg::send("$key: ", $w_val{$key}, "\n");
	}
	if(&opt::val("output") eq "f"){
	    open(FILE,">>$filename");
	    print FILE "$key,", $w_val{$key}, "\n";
	}
    }
    close(FILE);
    msg::send("\n");
    
    return \%w_val;
}


# cai ver.20010905-01
# Author: Kazuharu Gaou Arakawa
# Usage: NULL = &cai(pointer G instance); 
# Options:
#  -w_filename  filename to output the W values
#  -w_output    output option for W value (default: stdout)
#  -output      output option (default: stdout)
# Description:
#  Calculates the CAI values for each genes, and inputs in the G instance.
#  i.e. cai values will be accessible at $gb->{"CDS$i"}->{cai};
# Requirements:
#  sub w_value()
# History
#  20010905-01 addition of -output option
#  20010830-01 first release

sub cai {
    &opt::default(output=>"stdout", w_output=>"stdout",
		  w_filename=>"w_value.cvs");
    my @args=opt::get(@_);

    my $gb = shift @args;
    my $w_output=opt::val("w_output");
    my $w_fname=opt::val("w_filename");
    my $w_val = &w_value($gb,-output=>$w_output,$w_fname);

    foreach my $cds ($gb->cds()){
	my $seq = $gb->get_geneseq($cds);
	my ($i);
	my $cai = 1;
	my $count = 0;
	for ($i = 0; $i<= length($seq) - 1 - 2; $i += 3){
	    if ($$w_val{substr($seq, $i, 3)} > 0){
		$cai += log($$w_val{substr($seq, $i, 3)});
	    }
	    $count ++;
	}
	$gb->{$cds}->{cai} = sprintf "%.4f", exp($cai/$count);

	if (opt::val("output") eq "stdout"){ 
	    my $feat = $gb->{$cds}->{feature};
	    msg::send($gb->startcodon($cds), '!');
	    msg::send($gb->{$cds}->{cai}, '!');
	    msg::send(length($gb->get_cdsseq($cds)), '!');
	    msg::send($gb->{"FEATURE$feat"}->{gene}, '!');
	    msg::send($gb->{"FEATURE$feat"}->{product}, "\n");
	}
    }	
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::CAI - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::CAI;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::CAI was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut



