#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             Markov.pm legend
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Markov.pm,v 1.3 2001/09/08 13:38:18 s98982km Exp $

package G::Seq::Markov;

use SubOpt;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     markov
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


# markov ver.20010830-01
# Author: Kazuharu Gaou Arakawa
# Usage: (pointer hash_of_O/E_values) = &markov(pointer G instance); 
# Options:
# -length    length of oligomer to analyze (default:6)
# -mincount  minimum number of oligomer count to report (default:10)
# -filename  output filename (default:'markov.csv')
# -output    "f" for file output, "stdout" for STDOUT output
# Description:
#  Calculates the Markov analysis for all oligomers of specified length.
#  Returned value is a reference to a hash with keys as oligomer 
#  sequences, and valus as the O/E values at (length - 2) order Markov.
# Requirements:
#  SubOpt.pm

sub markov {
    &opt::default(length=>6, mincount=>10, filename=>"markov.csv",output=>"stdout");
    my @args = opt::get(@_);
    my $gb = shift @args;
    my @aSortedTable = ( );
    my $iTotalNucs = 0;
    my @ahNucsTable = ( );
    my %oe;

    for (my $iCounter = 0; $iCounter <= 32; $iCounter++) {
	$ahNucsTable[$iCounter] = { };
    }

    my $rhTmp;
    foreach $rhTmp (@ahNucsTable) { undef %$rhTmp; }
    $iTotalNucs = 0;

    my($nucs, $char);
    $nucs = '';
    foreach $char (split(//, $gb->{SEQ})) {
	$iTotalNucs++;
	$nucs .= $char;
	if (opt::val("length") < $iTotalNucs) {
	    substr($nucs, 0, 1) = '';
	}
	;# Now $nucs contains tail of sequence.
	my $iLoopEnd = opt::val("length");
	if ($iTotalNucs < $iLoopEnd) {
	    $iLoopEnd = $iTotalNucs;
	}
	my $iLen;
	for ($iLen = 1; $iLen <= $iLoopEnd; $iLen++) {
	    $ahNucsTable[$iLen - 1]->{substr($nucs, -$iLen, $iLen)}++;
	}
    }

    {
	my @aTmpTable1 = ( );
	my @aTmpTable2 = ( );
	my @aTmpTable3 = ( );
	my $sKey;
	foreach $sKey (keys(%{$ahNucsTable[opt::val("length") - 1]})) {
	    my $iTmp = $ahNucsTable[opt::val("length") - 1]->{$sKey};
	    if (opt::val("mincount") <= $iTmp) {
		my $sTmp = sprintf("%08d %s", $iTmp, $sKey);
		if ($iTmp == 1) {
		    push(@aTmpTable1, $sTmp);
		} elsif ($iTmp == 2) {
		    push(@aTmpTable2, $sTmp);
		} else {
		    push(@aTmpTable3, $sTmp);
		}
	    }
	}
	@aSortedTable = sort {$b cmp $a;} @aTmpTable3;
	push(@aSortedTable, @aTmpTable2);
	push(@aSortedTable, @aTmpTable1);
    }

    if (opt::val("output") eq "f"){
	open(TABLEFILE, '>' . opt::val("filename")) || die;
	print TABLEFILE "oligomer,O-value,E-value,";
	my $i;
	for ($i = 1; $i <= opt::val("length") - 2; $i ++){
	    printf TABLEFILE "%d degree Markov,", $i;
	}
	print TABLEFILE "O/E value\n";
    }

    foreach my $sRecord (@aSortedTable) {
	my($iOVal, $sKey) = split(' ', $sRecord);
	my $klen = length($sKey);
	$iOVal =~ s/^0+//;
	my ($order, $iEVal);

	if (opt::val("output") eq "f"){
	    printf TABLEFILE "%s,%d,", $sKey, $iOVal;
	}elsif(opt::val("output") eq "stdout"){
	    &msg::send(sprintf("%s %5d", $sKey, $iOVal));
	}


	for ($order = 0; $order <= opt::val("length") - 2; $order++) {
	    my $numerator = $iTotalNucs + 1 - $klen;
	    my $denominator = 1.0;
	    my $offset;
	    for ($offset = 0; $offset <= $klen - $order - 1; $offset++) {
		my $key = substr($sKey, $offset, $order + 1);
		my $len = length($key);
		$numerator *=  $ahNucsTable[$len - 1]->{$key} / 
		    ($iTotalNucs + 1 - $len);
	    }
	    if (1 <= $order) {
		for ($offset = 1; $offset <= $klen - $order - 1; $offset++) {
		    my $key = substr($sKey, $offset, $order);
		    my $len = length($key);
		    $denominator *=  $ahNucsTable[$len - 1]->{$key} / 
			($iTotalNucs + 1 - $len);
		}
	    } else {
		$denominator = 1.0;
	    }
	    if ($denominator <= 0.0) {
		$iEVal = 0.0;
	    } else {
		$iEVal = $numerator / $denominator;
	    }

	    if (opt::val("output") eq "f"){
		printf TABLEFILE "%d,", $iEVal if (opt::val("output") eq "f");
	    }elsif(opt::val("output") eq "stdout"){
		&msg::send(sprintf(" %8d", $iEVal));
	    }
	}
	if (opt::val("output") eq "f"){
	    printf TABLEFILE "%.4f\n", $iOVal/$iEVal;
	}elsif(opt::val("output") eq "stdout"){
	    &msg::send(sprintf("   %3.4f\n", $iOVal/$iEVal));
	}
	$oe{$sKey} = $iOVal/$iEVal;
    }
    close(TABLEFILE) if (opt::val("output") eq "f");

    return \%oe;
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Markov - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Markov;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Markov was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut



