#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Eliminate.pm,v 1.1.1.1 2002/04/02 20:25:41 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#

package G::Seq::Eliminate;

use SubOpt;
use G::Messenger;
use G::Seq::Util;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

use SelfLoader;

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     valid_CDS
	     eliminate_atg
	     eliminate_pat
);
$VERSION = '0.01';

__DATA__

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


#valid_CDS ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program choose valid CDS.
#(pointer Genome)=&next_patsp(pointer Genome,  int max,  int min,  boolean debug);
sub valid_CDS{
    &opt_default(Max=>10000,Min=>20);
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $max=opt_val("Max");
    my $min=opt_val("Min");
    my $start;
    my $end;
    my $num=1;
    my $tmp;
    my $switch;
    my @result;

    foreach($gb->cds()){
	$switch=0;
	$tmp=$num-1;
	if($gb->{"CDS$num"}->{direction} eq 'direct' && $gb->{"CDS"."$tmp"}->{direction} eq 'direct'){
	    $start = $gb->{"CDS$num"}->{start};
	    $end = $gb->{"CDS"."$tmp"}->{end};
	    $switch=1 if($start-$end > $max || $start-$end < $min);
	}
	elsif($gb->{"CDS$num"}->{direction} eq 'complement' && $gb->{"CDS"."$tmp"}->{direction} eq 'complement'){
	    $start = $gb->{"CDS$num"}->{start};
	    $end = $gb->{"CDS"."$tmp"}->{end};
	    $switch=1 if($start-$end > $max || $start-$end < $min);
	}
	$gb->{"CDS$num"}->{on}=0 if($switch==1);
	push(@result,"CDS$num") if($switch==0);
	$num++;
    }

    return \@result;
}


#eliminate_atg ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program eliminates sequences which has "atg" in the same frame.
#&bun(pointer Genome,  int upstream,  int downstream,  boolean debug);
sub eliminate_atg{
    &opt_default(upstream=>15,downstream=>15);
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $upstream=int(opt_val("upstream")/3);
    my $downstream=int(opt_val("downstream")/3);
    my $start;
    my $i=1;
    my $j;
    my $switch;
    my @result;


    foreach($gb->cds()){
	$switch=0;
	if($gb->{"CDS$i"}->{direction} eq 'direct'){ 
	    $start=$gb->{"CDS$i"}->{start};
	    for($j = -$upstream * 3 + $start;$j <= $downstream * 3 + $start;$j += 3){
		if($j != $start && substr($gb->{SEQ},$j-1,3) eq 'atg'){
		    $switch=1;
		}
	    }
	}
	elsif($gb->{"CDS$i"}->{direction} eq 'complement'){
	    $start=$gb->{"CDS$i"}->{end};
	    for($j = $upstream * 3 + $start;$j >= -$downstream * 3 + $start;$j -= 3){
		if($j != $start && substr($gb->{SEQ},$j-1,3) eq 'cat'){
		    $switch=1;
		}
	    }
	} 
	$gb->{"CDS$i"}->{on}=0 if($switch==1);
	push(@result,"CDS$i") if($switch==0);
	$i++;
    }
    return \@result;
}


#eliminate_pat ver.20010625-01
#scripting by Koya Mori(mory@g-language.org)
#This program eliminates sequences which has pattern in the specified range.
#(pointer Genome)=&eliminate_pat(pointer Genome,  int uppos,  int downpos,  string pattern,  boolean debug);
sub eliminate_pat{
    &opt_default(upstream=>30,downstream=>30);
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $uppos=opt_val("upstream");
    my $downpos=opt_val("downstream");
    my $pat=shift;
    my $start;
    my $i=1;
    my $j;
    my $switch;
    my @result;


    foreach($gb->cds()){
	$switch=0;
	if($gb->{"CDS$i"}->{direction} eq 'direct'){ 
	    $start=$gb->{"CDS$i"}->{start};
	    for($j = -$uppos + $start;$j <= $downpos+ $start;$j ++){
		if($gb->getseq($j,$j+length($pat)-1) eq $pat){
		    $switch=1;
		}
	    }
	}
	elsif($gb->{"CDS$i"}->{direction} eq 'complement'){
	    $start=$gb->{"CDS$i"}->{end};
	    for($j = $uppos+ $start;$j >= -$downpos+ $start;$j --){
		if($gb->getseq($j-length($pat)+1,$j) eq _complement($pat)){ 
		    $switch=1;
		}
	    }
	} 
	$gb->{"CDS$i"}->{on}=0 if($switch==1);
	push(@result,"CDS$i") if($switch==0);
	$i++;
    }
    return \@result;
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Eliminate - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Eliminate;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Eliminate was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
