#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             PatSearch.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: PatSearch.pm,v 1.4 2001/09/08 13:47:24 s98982km Exp $

package G::Seq::PatSearch;

use SubOpt;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     oligomer_counter
	     find_seq
	     match_test
	     palindrome
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


# palindrome ver.20010829-01
# Author: Kazuharu Gaou Arakawa
# Usage: &palindrome(pointer G instance); 
# Options:
# -shortest shortest palindrome to search (default:4)
# -loop     longest stem loop to allow (default: 0)
# -gtmatch  if 1, allows g-t match (default: 0)
# -output   "f" for file output
# Description:
#  Searches palindrome sequences
# Requirements:
#  SubOpt.pm
#  sub _match_test()

sub palindrome {
    &opt::default(gtmatch=>0, loop=>0, shortest=>4);
    my @args = opt::get(@_);
    my $gb = shift @args;
    my $length = int(opt::val("shortest") / 2);
    my $i = $length - 1; 
    my ($len, $j, $k, $stem);

    if (opt::val("output") eq "f"){
	open(OUT, '>palindrome.csv') || &msg::error($!);
	print OUT "Length, start, end, sequence\n";
    }

    while($i <= length($gb->{SEQ}) - 1 - $length - opt::val("loop")){
	$stem = opt::val("loop");

	while($stem >= 0){
	    $j = $i;
	    $k = $stem + 1 + $i;
	    $len = 0;
	    last if ($k > length($gb->{SEQ}) - 1);

	    while(&_match_test(substr($gb->{SEQ}, $j, 1), 
			       substr($gb->{SEQ}, $k, 1),
			       &opt::val("gtmatch")) 
		  )
	    {
		$j --;
		$k ++;
		last if ($j < 0 || $k > length($gb->{SEQ}) - 1);
		$len += 2;
	    }

	    if ($len >= opt::val("shortest")){
		&msg::send(sprintf("Length: %2d Position: %7d %7d Sequence: %s %s %s\n",
		$len, $j + 1, $k + 1, 
		substr($gb->{SEQ}, $j, $len/2),
		substr($gb->{SEQ}, $j + $len/2, $stem),
		substr($gb->{SEQ}, $j + $len/2 + $stem, $len/2)));

		if (opt::val("output") eq "f"){
		    printf OUT "%d,%d,%d,%s %s %s\n",
		    $len, $j + 1, $k + 1, 
		    substr($gb->{SEQ}, $j, $len/2),
		    substr($gb->{SEQ}, $j + $len/2, $stem),
		    substr($gb->{SEQ}, $j + $len/2 + $stem, $len/2);
		}
	    }

	    $stem --;
	}
	$i ++;
    }
    close(OUT) if (opt::val("output") eq "f");
}

sub _match_test {
    my $first = shift;
    my $second = shift;
    my $gtmatch = shift;

    if ($first eq 'a' && $second eq 't' ||
	$first eq 't' && $second eq 'a' ||
	$first eq 'g' && $second eq 'c' ||
	$first eq 'c' && $second eq 'g' ||
	$first eq 't' && $second eq 'g' && $gtmatch ||
	$first eq 'g' && $second eq 't' && $gtmatch
	)
    {
	return 1;
    }else{
	return 0;
    }
}


# oligomer_counter ver.20010829-01
# Author: Kazuharu Gaou Arakawa
# History:
#  1.0.0 5.18.2001 [Gaou] from atg7.wind + gcwind [rsaito]
# Usage: (array count || int count) = &oligomer_counter(pointer G instance, 
#                                                       string seq);
# Options:
# -window int window size.
#         If specified, seeks oligomer in specified windows
#         Method returns an array of numbers at each windows
#         If not specified, seeks oligomer in the genome
#         Method returns the number of oligomers
# -option "f" for file output, "g" for graph output
#         Only available when -window option is specified
# Description:
#  Counts oligomers (by windows optionally)
# Requirements:
#  SubOpt.pm
#  sub UniUniGrapher()

sub oligomer_counter {
    my @args = opt::get(@_);
    my $gb = shift @args;
    my $seq = shift @args;
    my $window = opt::val("window");
    $window = length($gb->{SEQ}) if($window <= 0);

    if (opt::val("window")){
	open(OUT, '>oligo_count.csv') || &msg::error($!)
	    if (opt::val("output") eq "f");

	my $i = 0;
	my @wincount = ();
	my @winnum = ();
	for ($i = 0; $i <= int(length($gb->{SEQ}) / $window); $i ++){
	    my $partial = substr($gb->{SEQ}, $i * $window, $window);
	    last if (length($partial) < $window);
	    my $start = 0;
	    my $count = 0;
	    if (length($seq) == 1 && $seq =~ /a|t|g|c/){
		$count = $partial =~ tr/a/a/ if ($seq eq 'a');
		$count = $partial =~ tr/t/t/ if ($seq eq 't');
		$count = $partial =~ tr/g/g/ if ($seq eq 'g');
		$count = $partial =~ tr/c/c/ if ($seq eq 'c');
	    }else{
		while(0 <= ($start = index($partial, $seq, $start + 1))){
		    $count ++;
		}
	    }
	    push (@wincount, $count);
	    push (@winnum, $i * $window);
	    print OUT "%d,%d\n", $i*$window, $count
		if (opt::val("output") eq "f");
	}
	close(OUT) if (opt::val("output") eq "f");
	if (opt::val("output") eq "g"){
	    UniUniGrapher(\@winnum, \@wincount, -x=>'window(bp)', 
			  -y=>'number of oligomer', 
			  -title=>'oligomer by window',
			  -outfile=>'oligo_count.gif'
			  );
	}
	return (@wincount);
    }else{
	my $start = 0;
	my $count = 0;
	while(0 <= ($start = index($gb->{SEQ}, $seq, $start + 1))){
	    $count ++;
	}
	return $count;
    }
}


# (Int direct, int comp, int total) = &find_seq(pointer GENOME, string Seq, boolean debug);
# Version 1.0.0 3.26.2001 [Gaou]
# Counts an oligomer and its complement
sub find_seq {
    my $ref_Genome = shift;
    my $sSeq = shift;
    my $printer=shift;
    my $sSeq2 = complement($sSeq);
    my $direct = 0;
    my $comp = 0;
    my $iSeqStart = 0;
#    assert(length($sSeq) >= 1);
    
    while(0 <= ($iSeqStart = index($$ref_Genome, $sSeq, $iSeqStart + 1))){
	$direct ++;
    }
    $iSeqStart = 0;
    while(0 <= ($iSeqStart = index($$ref_Genome, $sSeq2, $iSeqStart + 1))){
	$comp ++;
    }

    if($printer eq "f"){
	open(FILE,">>oligomer_count.rst");
	print FILE '--- find_sequence_result ---',"\n";
	print FILE "$sSeq: $direct\n$sSeq2: $comp\nTotal: $direct+$comp\n\n";
	close(FILE);
    }
    return ($direct, $comp, $direct + $comp);
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::PatSearch - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::PatSearch;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::PatSearch was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
