#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             Eliminate.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Eliminate.pm,v 1.3 2001/09/19 07:06:07 s98982km Exp $

package G::Seq::Eliminate;

use SubOpt;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     valid_CDS
	     eliminate_atg
	     eliminate_pat
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


#valid_CDS ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program choose valid CDS.
#(pointer Genome)=&next_patsp(pointer Genome,  int max,  int min,  boolean debug);
sub valid_CDS{
    &opt::default(Max=>10000,Min=>20);
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $max=opt::val("Max");
    my $min=opt::val("Min");
    my $start;
    my $end;
    my $num=1;
    my $tmp;
    my $switch;
    my @result;

    foreach($gb->cds()){
	$switch=0;
	$tmp=$num-1;
	if($gb->{"CDS$num"}->{direction} eq 'direct' && $gb->{"CDS"."$tmp"}->{direction} eq 'direct'){
	    $start = $gb->{"CDS$num"}->{start};
	    $end = $gb->{"CDS"."$tmp"}->{end};
	    $switch=1 if($start-$end > $max || $start-$end < $min);
	}
	elsif($gb->{"CDS$num"}->{direction} eq 'complement' && $gb->{"CDS"."$tmp"}->{direction} eq 'complement'){
	    $start = $gb->{"CDS$num"}->{start};
	    $end = $gb->{"CDS"."$tmp"}->{end};
	    $switch=1 if($start-$end > $max || $start-$end < $min);
	}
	$gb->{"CDS$num"}->{on}=0 if($switch==1);
	push(@result,"CDS$num") if($switch==0);
	$num++;
    }

    return \@result;
}


#eliminate_atg ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program eliminates sequences which has "atg" in the same frame.
#&bun(pointer Genome,  int upstream,  int downstream,  boolean debug);
sub eliminate_atg{
    &opt::default(upstream=>15,downstream=>15);
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $upstream=int(opt::val("upstream")/3);
    my $downstream=int(opt::val("downstream")/3);
    my $start;
    my $i=1;
    my $j;
    my $switch;
    my @result;


    foreach($gb->cds()){
	$switch=0;
	if($gb->{"CDS$i"}->{direction} eq 'direct'){ 
	    $start=$gb->{"CDS$i"}->{start};
	    for($j = -$upstream * 3 + $start;$j <= $downstream * 3 + $start;$j += 3){
		if($j != $start && substr($gb->{SEQ},$j-1,3) eq 'atg'){
		    $switch=1;
		}
	    }
	}
	elsif($gb->{"CDS$i"}->{direction} eq 'complement'){
	    $start=$gb->{"CDS$i"}->{end};
	    for($j = $upstream * 3 + $start;$j >= -$downstream * 3 + $start;$j -= 3){
		if($j != $start && substr($gb->{SEQ},$j-1,3) eq 'cat'){
		    $switch=1;
		}
	    }
	} 
	$gb->{"CDS$i"}->{on}=0 if($switch==1);
	push(@result,"CDS$i") if($switch==0);
	$i++;
    }
    return \@result;
}


#eliminate_pat ver.20010625-01
#scripting by Koya Mori(mory@g-language.org)
#This program eliminates sequences which has pattern in the specified range.
#(pointer Genome)=&eliminate_pat(pointer Genome,  int uppos,  int downpos,  string pattern,  boolean debug);
sub eliminate_pat{
    &opt::default(upstream=>30,downstream=>30);
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $uppos=opt::val("upstream");
    my $downpos=opt::val("downstream");
    my $pat=shift;
    my $start;
    my $i=1;
    my $j;
    my $switch;
    my @result;


    foreach($gb->cds()){
	$switch=0;
	if($gb->{"CDS$i"}->{direction} eq 'direct'){ 
	    $start=$gb->{"CDS$i"}->{start};
	    for($j = -$uppos + $start;$j <= $downpos+ $start;$j ++){
		if($gb->getseq($j,$j+length($pat)-1) eq $pat){
		    $switch=1;
		}
	    }
	}
	elsif($gb->{"CDS$i"}->{direction} eq 'complement'){
	    $start=$gb->{"CDS$i"}->{end};
	    for($j = $uppos+ $start;$j >= -$downpos+ $start;$j --){
		if($gb->getseq($j-length($pat)+1,$j) eq _complement($pat)){ 
		    $switch=1;
		}
	    }
	} 
	$gb->{"CDS$i"}->{on}=0 if($switch==1);
	push(@result,"CDS$i") if($switch==0);
	$i++;
    }
    return \@result;
}


sub _complement {
    my $nuc = reverse(shift);
    
    $nuc =~ tr
        [acgtuACGTU]
        [tgcaaTGCAA];
    return $nuc;
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Eliminate - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Eliminate;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Eliminate was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
