#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             G.pm Prelude core
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: G.pm,v 1.32 2001/09/22 10:23:07 s98982km Exp $
#
# G-language System is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language System is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language System -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@g-language.org> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#


package G;

use strict;

# import prelude extensions
use SubOpt;
use Rcmd;
use G::Messenger;

# import odyssey classes
use G::Seq::Codon;
use G::Seq::Consensus;
use G::Seq::Eliminate;
use G::Seq::FreeEnergy;
use G::Seq::GCskew;
use G::Seq::OverLapping;
use G::Seq::PatSearch;
use G::Seq::Tandem;
use G::Seq::Util;
use G::Seq::CAI;
use G::Seq::Markov;
use G::Seq::ORF;
use G::Seq::Align;
use G::Tools::Graph;
use G::Tools::H2v;
use G::Tools::Mapping;
use G::Tools::Blast;
use G::Ecell::Pathway;
use G::Ecell::Reader;
use G::Ecell::KEGG;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK @INC $AUTOLOAD);

require Exporter;

@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(
	     cds
	     complement
	     del_key
	     feature
	     foreachCDS
	     getseq
	     get_cdsseq
	     get_geneseq
	     get_gbkseq
	     get_intron
	     get_exon
	     hairpin_cut
	     new
	     next_locus
	     next_seq
	     rewind_genome
	     seq_info
	     startcodon
	     stopcodon
	     translate

	     gcskew
	     genomicskew
	     cum_gcskew
	     genome_map
	     _find_bad_substance
	     _find_pathway_gap
	     _eri_reader
	     _eri_extracter
	     alignment
	     diffseq
	     maskseq
	     pasteseq
	     codon_counter
	     amino_counter
	     codon_usage
	     _codon_table
	     _codon_amino_printer
	     _codon_usage_printer
	     base_counter
	     _base_printer
	     base_entropy
	     base_information_content
	     base_relative_entropy
	     base_z_value
	     base_individual_information_matrix
	     consensus_z
	     valid_CDS
	     eliminate_atg
	     eliminate_pat
	     RNAfold
	     foreach_RNAfold
	     find_ori_ter
	     gcskew
	     genomicskew
	     gcwin
	     over_lapping_finder
	     _over_lapping_printer
	     oligomer_counter
	     find_seq
	     find_tandem
	     foreach_tandem
	     _print_tandem
	     find_king_of_gene
	     atcgcon
	     cds_echo
	     print_gene_function_list
	     _R_base_graph
	     _R_RNA_graph
	     _csv_h2v
	     _h2v
	     _mask_repeat_for_mapping
	     _foreach_mask_repeat_for_mapping
	     _cutquery_for_mapping
	     _blast_db_for_mapping
	     _formatdb_for_mapping
	     _blast_for_mapping
	     _blastpointer_for_mapping
	     _foreach_blastpointer_for_mapping
	     _file_list_for_mapping
	     _eri_update_with_kegg
	     _ecell_name2kegg_compound
	     seq2gif
	     cai
	     w_value
	     markov
	     _UniUniGrapher
	     palindrome
	     longest_ORF
	     pseudo_atg
	     find_identical_gene
	     _UniMultiGrapher
	     view_cds
	     graphical_LTR_search
	     _blast_tp_finder
	     _blaster
	     );
$VERSION = '1.0.0 gamma';

#::::::::::::::::::::::::::::::
#          Constants
#::::::::::::::::::::::::::::::

my %CodonTable = (
	       'gac', 'D', 'caa', 'Q', 'gca', 'A', 'ctg', 'L',
	       'gat', 'D', 'cag', 'Q', 'gcc', 'A', 'ctt', 'L',
	       'gaa', 'E', 'agc', 'S', 'gcg', 'A', 'ata', 'I',
	       'gag', 'E', 'agt', 'S', 'gct', 'A', 'atc', 'I',
	       'aga', 'R', 'tca', 'S', 'gga', 'G', 'att', 'I',
	       'agg', 'R', 'tcc', 'S', 'ggc', 'G', 'cca', 'P',
	       'cga', 'R', 'tcg', 'S', 'ggg', 'G', 'ccc', 'P',
	       'cgc', 'R', 'tct', 'S', 'ggt', 'G', 'ccg', 'P',
	       'cgg', 'R', 'aca', 'T', 'gta', 'V', 'cct', 'P',
	       'cgt', 'R', 'acc', 'T', 'gtc', 'V', 'atg', 'M',
	       'aaa', 'K', 'acg', 'T', 'gtg', 'V', 'tgg', 'W',
	       'aag', 'K', 'act', 'T', 'gtt', 'V', 'tgc', 'C',
	       'cac', 'H', 'tac', 'Y', 'tta', 'L', 'tgt', 'C',
	       'cat', 'H', 'tat', 'Y', 'ttg', 'L', 'taa', '/',
	       'aac', 'N', 'ttc', 'F', 'cta', 'L', 'tag', '/',
	       'aat', 'N', 'ttt', 'F', 'ctc', 'L', 'tga', '/'
		  );

#::::::::::::::::::::::::::::::
#          Variables
#::::::::::::::::::::::::::::::

my $outfile = '';
my $loaded = 0;

#::::::::::::::::::::::::::::::
#         IO Methods
#::::::::::::::::::::::::::::::

sub new {
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this = {};

    bless $this, $pkg;

    $this->loaded_msg();

    if ($option eq 'Fasta' || $option eq 'EMBL' || $option eq 'swiss' || 
	$option eq 'SCF'   || $option eq 'PIR'  || $option eq 'GCG'   || 
	$option eq 'raw'   || $option eq 'ace'){

	use Bio::SeqIO;

	$outfile = '/tmp/' . time . '.gbk';
	my $in = Bio::SeqIO->newFh(-format => $option, -file => $filename);
	my $out = Bio::SeqIO->newFh(-format => "GenBank", 
				    -file => '>' . $outfile);

	print $out $_ while <$in>;

	open(GENBANK, $outfile) || die("Error at G.pm: $!\n");
	$this->read_locus();
	$this->read_header();
	$this->read_features();
	$this->getnucs();
	$this->seq_info();
    }elsif($option eq 'without annotation'){
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->goto_origin();
	$this->getnucs();
	$this->seq_info();
    }elsif($option eq 'long sequence'){
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->read_locus();
	$this->read_header();
	$this->read_features();
	$this->{origin} = tell GENBANK;
    }elsif($option eq 'bioperl'){
	$this->bioperl2prelude($filename);
	$this->seq_info();
    }else{
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->read_locus();
	$this->read_header();
	$this->read_features();
	$this->getnucs();
	$this->seq_info();
    }

    return $this;
}

sub next_seq{
    my $this = shift;
    my $len = '100';
    my $opt = shift;
    my $char = '1';
    $len = $opt if ($opt);
    $this->{SEQ} = '';

    while($len > 0 && $char ne ''){
	$char = getc(GENBANK);
	next unless ($char =~ /[a-zA-Z]/);
	$len --;
	$this->{SEQ} .= $char;
    }
    $this->{position} = tell GENBANK;

    return $char;
}

sub rewind_genome{
    my $this = shift;
    seek GENBANK, $this->{origin}, 0;
    return 1;
}

sub next_locus{
    my $this = shift;
    undef %{$this};
    $this->read_locus();
    $this->read_header();
    $this->read_features();
    $this->getnucs();

    if (length($this->{SEQ}) > 0){
	return 1;
    }else{
	return 0;
    }
}

sub goto_features{
    my $this = shift;
    while(<GENBANK>){
	last if (/^FEATURES/);
    }
}

sub goto_origin{
    my $this = shift;
    while(<GENBANK>){
	last if (/^ORIGIN/);
    }
}

sub read_locus{
    my $this = shift;
    my $tmp = '';
    local($_);

    while(<GENBANK>){
	split;
	shift;
	if ($#_ == 6){
	    $this->{"LOCUS"}->{"circular"} = 1;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     $tmp,
	     $this->{"LOCUS"}->{"nucleotide"},
	     $tmp,
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @_;
	}elsif ($#_ == 5){
	    $this->{"LOCUS"}->{"circular"} = 0;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     $tmp,
	     $this->{"LOCUS"}->{"nucleotide"},
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @_;
	}else{
	    msg::error("ERROR: Unknown LOCUS definition\n");
	}
	last;
    }
}
	
sub read_header{
    my $this = shift;
    my $line = '';

    while($line = <GENBANK>){
	if ($line =~ /^COMMENT/){
	    s/COMMENT     //g;
	    while($line = <GENBANK>){
		last if ($line =~ /^FEATURES/);
		last unless (substr($line, 0, 1) eq ' ');
		$line =~ s/  +//g;
		$this->{"COMMENT"} .= $line;
	    }
	}
	last if ($line =~ /^FEATURES/);
	$this->{HEADER} .= $line;
    }
}

sub read_features{
    local($_);
    my $this = shift;
    my $num = -1;
    my $cds = 0;
    $this->{"CDS0"}->{dummy} = 1;

    while(<GENBANK>){
	if (/^BASE COUNT/){
	    s/BASE COUNT //g;
	    $this->{"BASE_COUNT"} = $_;
	}elsif (/^ORIGIN/){
	    last;
	}elsif (/^ {5}(\S+)\s+(.*)$/ && $_ !~ /\//){
	    my $key = $1;
	    my $feature = $2;

	    $num ++;
	    $this->{"FEATURE$num"}->{"feature"} = $num;
	    $this->{"FEATURE$num"}->{"type"} = $1;
	    $this->{"FEATURE$num"}->{"on"} = 1;
	    
	    if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		$cds ++;
		$this->{"CDS$cds"}->{"on"} = 1;
		$this->{"FEATURE$num"}->{"cds"} = $cds;
	    }

	    s/\. \./\.\./g; #for (1. .2) type irregular format
	    s/\^/\.\./g;    #for (1^2) type irregular format

	    my $part_left = tr/\<//d;  #for (<1..2) type irregular format
	    my $part_right = tr/\>//d; #for (1..2>) type irregular format
	    $this->{"FEATURE$num"}->{"partial"} = "$part_left $part_right";

	    if (/join/){
		if (/complement\(join/){
		    $this->{"FEATURE$num"}->{"direction"} = "complement";
		    $this->{"CDS$cds"}->{"direction"} = "complement";
		    s/complement//;
		}else{
		    $this->{"FEATURE$num"}->{"direction"} = "direct";
		    $this->{"CDS$cds"}->{"direction"} = "direct";
		}

		my $line = $_;
		my $fth = '';
		my $linenum = tell GENBANK;

		while($line !~ /\//){
		    $fth .= $line;
		    $line = <GENBANK>;
		    $linenum = tell GENBANK;
		    $linenum -= length($_);
		}
		seek GENBANK, $linenum, 0;

		$fth =~ s/CDS//g;
		$fth =~ s/join//g;
		$fth =~ s/\(//g;
		$fth =~ s/\)//g;
		$fth =~ s/ //g;
		$fth =~ s/\n//g;
		$fth =~ s/complement/c/g;

		my $tmpfth = $fth;
		$tmpfth =~ s/c//g;
		my @choparray = split(/\.\./, $tmpfth);
		$this->{"FEATURE$num"}->{"start"} = shift @choparray;
		$this->{"CDS$cds"}->{"start"} = $this->{"FEATURE$num"}->{"start"};
		$this->{"FEATURE$num"}->{"end"} = pop @choparray;
		$this->{"CDS$cds"}->{"end"} = $this->{"FEATURE$num"}->{"end"};
		$this->{"FEATURE$num"}->{"join"} = $fth;
		$this->{"CDS$cds"}->{"join"} = $fth;
		$this->{"CDS$cds"}->{"feature"} = $num;
	    }elsif (/\?/){
		$this->{"FEATURE$num"}->{"type"} = "partial_$key";
		$this->{"FEATURE$num"}->{"partial"} = $feature;
		msg::error("Partial feature: $feature\n");
	    }elsif (/complement\((\d+)\.\.(\d+)\)/){
		$this->{"FEATURE$num"}->{"direction"} = "complement";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $2;
		if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		    $this->{"CDS$cds"}->{"direction"} = "complement";
		    $this->{"CDS$cds"}->{"start"} = $1;
		    $this->{"CDS$cds"}->{"end"} = $2;
		    $this->{"CDS$cds"}->{"feature"} = $num;
		}
	    }elsif (/(\d+)\.\.(\d+)/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $2;
		if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		    $this->{"CDS$cds"}->{"direction"} = "direct";
		    $this->{"CDS$cds"}->{"start"} = $1;
		    $this->{"CDS$cds"}->{"end"} = $2;
		    $this->{"CDS$cds"}->{"feature"} = $num;
		}
	    }elsif (/\s+complement\((\d+)\)/){
		$this->{"FEATURE$num"}->{"direction"} = "complement";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $1;
	    }elsif (/\s+(\d+)/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $1;
	    }else{
		msg::error("Irregular location feature: $key $feature\n");
	    }

	    if (/replace/){
		msg::error(qw|Found replace() feature. Please tell Gaou (gaou@g-language.org)\n|);
	    }

	}else{
	    if (/\/(\w+)=\"([^\"]+)\"/){
		$this->{"FEATURE$num"}->{"$1"} = $2;
	    }elsif (/\/(\w+)=\"([^\"]+)/){
		my $tag = $1;
		my $tmp = $2;
		my $line;
		while(<GENBANK>){
		    if (!/\"/){
			$tmp .= $_;
		    }elsif (/([^\"]+)\"/){
			$tmp .= $1;
			last;
		    }
		}
		$tmp =~ s/\s+/ /g;
		$tmp =~ s/ //g if ($tag eq 'translation');
		$this->{"FEATURE$num"}->{$tag} = $tmp;
	    }elsif (/\/(\w+)=([\d|\d+])/){
		$this->{"FEATURE$num"}->{$1} = $2;
	    }
	}
    }


}

sub getnucs {
    my $this = shift;

    while(<GENBANK>){
	last if (/\/\//);
	s/[^A-Za-z]//g;
	$this->{"SEQ"} .= $_;
    }

}

sub bioperl2prelude {
    my $this = shift;
    my $bpobj = shift;

    my $num = -1;
    my $cds = 0;
    my ($feat,$tag);
    $this->{"CDS0"}->{dummy} = 1;

    foreach $feat ($bpobj->all_SeqFeatures()){
	$num ++;
	if ($feat->primary_tag eq 'CDS'){
	    $cds ++;
	    $this->{"CDS$cds"}->{start} = $feat->start;
	    $this->{"CDS$cds"}->{end} = $feat->end;
	    if ($feat->strand == 0 || $feat->strand == 1){
		$this->{"CDS$cds"}->{direction} = "direct";
	    }else{
		$this->{"CDS$cds"}->{direction} = "complement";
	    }
	    $this->{"CDS$cds"}->{feature} = $num;
	}

	$this->{"FEATURE$num"}->{feature} = $num;
	$this->{"FEATURE$num"}->{type} = $feat->primary_tag;
	$this->{"FEATURE$num"}->{start} = $feat->start;
	$this->{"FEATURE$num"}->{end} = $feat->end;
	
	if ($feat->strand == 0 || $feat->strand == 1){
	    $this->{"FEATURE$num"}->{direction} = "direct";
	}else{
	    $this->{"FEATURE$num"}->{direction} = "complement";
	}

	foreach $tag ($feat->all_tags()){
	    $this->{"FEATURE$num"}->{$tag} = 
		join(' ', $feat->each_tag_value($tag));
	}
    }
    
    $this->{"SEQ"} = lc($bpobj->seq());
    
}

sub loaded_msg {
    my $this = shift;

    return if ($loaded);

    my $print =
	qq(
	     __/__/__/__/__/__/__/__/__/__/__/__/__/
                
                     G-language System

	      Version: $VERSION

	      Copyright (C) 2001 G-language Project
	      Institute of Advanced Biosciences,
	      Keio University, JAPAN 

          	 http://www.g-language.org/

	     __/__/__/__/__/__/__/__/__/__/__/__/__/
	   \n);

    &msg::error($print);

    $loaded ++;
}

#::::::::::::::::::::::::::::::
#        Native Methods
#::::::::::::::::::::::::::::::

sub getseq {
    my $this = shift;
    my $start = shift;
    my $end = shift;

    my $seq = substr($this->{SEQ}, $start, $end-$start+1);
    
    return $seq;
}

sub get_gbkseq {
    my $this = shift;
    my $start = shift;
    my $end = shift;

    my $seq = substr($this->{SEQ}, $start -1, $end-$start+1);

    return $seq;
}

sub get_cdsseq {
    my $this = shift;
    my $object = shift;

    my $cdsseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    $cdsseq = &complement($cdsseq) 
	if ($this->{$object}->{direction} eq 'complement');

    return $cdsseq;
}

sub before_startcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{end}, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{start} - 1 - $length, 
		   $length);
    }
}

sub after_startcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{end} - 1 - 3 - $length + 1, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{start} + 3 - 1, 
		   $length);
    }
}

sub before_stopcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{start} + 3 - 1, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{end} - 3 - 1 - $length + 1, 
		   $length);
    }
}

sub after_stopcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{start} - 1 - $length, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{end} +1 - 1, 
		   $length);
    }
}

sub startcodon {
    my $this = shift;
    my $object = shift;

    return substr($this->get_geneseq($object), 0, 3);
}

sub stopcodon {
    my $this = shift;
    my $object = shift;

    return substr($this->get_geneseq($object), -3, 3);
}

sub get_geneseq {
    my $this = shift;
    my $object = shift;

    my $geneseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    if ($this->{$object}->{join}){
	$geneseq = $this->get_exon($object);
    }elsif ($this->{$object}->{direction} eq 'complement'){
	$geneseq = &complement($geneseq);
    }

    return $geneseq;
}

sub get_intron {
    my $this = shift;
    my $cds = shift;

    unless ($this->{$cds}->{join}){
	return -1;
	last;
    }
    my @join = split(/\.\./, $this->{$cds}->{join});
    shift @join;
    pop @join;
    my @seq;
    my $line;

    foreach $line (@join){
	$line =~ s/c//g;
	my ($start, $end) = split(/,/, $line, 2);
	my $tmp = $this->get_gbkseq($start + 1, $end - 1);
	push (@seq, $tmp);
    }

    return @seq;
}

sub get_exon {
    my $this = shift;
    my $cds = shift;

    unless ($this->{$cds}->{join}){
	return -1;
	last;
    }
    my @join = split(/,/, $this->{$cds}->{join});
    my $seq = '';
    my $line;

    foreach $line (@join){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	$seq .= $tmp;
    }

    $seq = complement($seq) if ($this->{$cds}->{direction} eq 'complement');
    return $seq;
}

sub seq_info {
    my $this = shift;
    my $length = length($this->{SEQ});

    my $a = $this->{SEQ} =~ tr/a/a/;
    my $t = $this->{SEQ} =~ tr/t/t/;
    my $g = $this->{SEQ} =~ tr/g/g/;
    my $c = $this->{SEQ} =~ tr/c/c/;
    my $others = $length - $a - $t - $g - $c;
    my $msg;

    $msg .= sprintf "\n\nAccession Number: %s\n", $this->{LOCUS}->{id};
    $msg .= sprintf "\n  Length of Sequence : %9d\n" , $length;
    $msg .= sprintf "           A Content : %9d (%.2f\%)\n" , 
    $a , $a / $length * 100;
    $msg .= sprintf "           T Content : %9d (%.2f\%)\n" , 
    $t , $t / $length * 100;
    $msg .= sprintf "           G Content : %9d (%.2f\%)\n" , 
    $g , $g / $length * 100;
    $msg .= sprintf "           C Content : %9d (%.2f\%)\n" , 
    $c , $c / $length * 100;
    $msg .= sprintf "              Others : %9d (%.2f\%)\n" , 
    $others,  $others / $length * 100;
    $msg .= sprintf "          AT Content :    %.2f\%\n", 
    ($a + $t) / $length * 100;
    $msg .= sprintf "          GC Content :    %.2f\%\n\n", 
    ($g + $c) / $length * 100;

    &msg::send($msg);

    return ($a, $t, $g, $c);
}

sub feature {
    my $this = shift;
    my $opt = shift;
    my $i = 0;
    my @feature;

    while(defined(%{$this->{"FEATURE$i"}})){
	if ($opt eq 'off'){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	}
	push (@feature, "FEATURE$i");
	$i ++;
    }

    return @feature;
}

sub cds {
    my $this = shift;
    my $opt = shift;
    my $i = 0;
    my @cds;

    while(defined(%{$this->{"FEATURE$i"}})){
	$i ++;
	next if ($this->{"FEATURE$i"}->{type} ne 'CDS');

	if ($opt eq 'off'){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	    my $id = $this->{"FEATURE$i"}->{cds};
	    next if ($this->{"CDS$id"}->{on} == 0);
	}

	push (@cds, "FEATURE$i");
    }

    return @cds;
}
	  

sub foreachCDS {
    my $this = shift;
    my $sub = shift;
    my $i = 0;
    local($_);

    while(defined(%{$this->{"FEATURE$i"}})){
	$i ++;
	next unless ($this->{"FEATURE$i"}->{type} eq 'CDS');
	my $start = $this->{"FEATURE$i"}->{start};
	my $end = $this->{"FEATURE$i"}->{end};
	$_ = $this->getseq($start, $end);
	$_ = &complement($_) if ($this->{"FEATURE$i"}->{direction} eq 'complement');
	&$sub();
    }
}

sub hairpin_cut {
    my $nuc = shift;
    my $val = "\n==============\n!!!Afro Man!!!===============\n\n";

    system('netscape http://www2.osk.3web.ne.jp/~e916/');

    return $val;
}

sub complement {
    my $nuc = reverse(shift);
    
    $nuc =~ tr
	[acgtuACGTU]
	[tgcaaTGCAA];
    return $nuc;
}

sub translate {
    my $seq = lc(shift);
    my $amino = '';

    while(3 <= length($seq)){
	my $codon = substr($seq, 0, 3);
	substr($seq, 0, 3) = '';
	if ($codon =~ /[^atgc]/){
	    $amino .= '?';
	}else{
	    $amino .= $CodonTable{$codon};
	}
    }
    if(length($seq)){
	msg::error("Translation: illegal length.\n");
    }

    return $amino;
}

sub del_key {
    my $this = shift;
    my $key = shift;

    delete $this->{$key};

    return 1;
}

sub DESTROY {
    my $self = shift;

    undef %{$self};
    close(GENBANK);   
    unlink($outfile);
}

#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::

1;
__END__

=head1 NAME

G - G-language System core module in Perl (Prelude)

=head1 SYNOPSIS

 use G;                          # Imports G module 
   
 $gb = new G("ecoli.gbk");       # Creates G's instance at $gb 
                                 # At the same time, read in ecoli.gbk. 
                                 # Read the annotation and sequence 
                                 # information 
                                 # See DESCRIPTION for details
   
 $gb->seq_info();                # Prints the basic sequence information.

 $find_ori_ter(\$gb->{SEQ});     # Gives sequence as a reference to
                                 # MT package functions

=head1 DESCRIPTION

 The Prelude Core of G-language System fully supports most sequence databases.

=head2 stored annotation information:

=over 4   

 LOCUS  
         $gb->{LOCUS}->{id}              -accession number 
         $gb->{LOCUS}->{length}          -length of sequence  
         $gb->{LOCUS}->{nucleotide}      -type of sequence ex. DNA, RNA  
         $gb->{LOCUS}->{circular}        -1 when the genome is circular.
                                          otherwise 0
         $gb->{LOCUS}->{type}            -type of species ex. BCT, CON  
         $gb->{LOCUS}->{date}            -date of accession 

 HEADER  
    $gb->{HEADER}  

 COMMENT  
    $gb->{COMMENT}  

 FEATURE  
         Each FEATURE is numbered(FEATURE1 .. FEATURE1172), and is a 
         hash structure that contains all the keys of Genbank.   
         In other words,  in most cases, FEATURE$i's hash at least 
         contains informations listed below: 
         $gb->{FEATURE$i}->{start}  
         $gb->{FEATURE$i}->{end}  
         $gb->{FEATURE$i}->{direction}
         $gb->{FEATURE$i}->{join}
         $gb->{FEATURE$i}->{note}  
         $gb->{FEATURE$i}->{type}        -CDS,gene,RNA,etc.

         To analyze each FEATURE, write: 

         $i = 1;  
         while(defined(%{$gb->{FEATURE$i}})){  
   
                 $i ++;  
         }  

         Each CDS is stored in a similar manner.
         There are 
         $gb->{CDS$i}->{start}
         $gb->{CDS$i}->{end}
         $gb->{CDS$i}->{direction}
         $gb->{CDS$i}->{join}
         $gb->{CDS$i}->{feature}         -number $n for $gb->{FEATURE$n}
                                          where "CDS$i" = "FEATURE$n"

         In the same manner, to analyze all CDS, write:  
   
         $i = 1;  
         while(defined(%{$gb->{CDS$i}})){  
   
                 $i ++;  
         }

 BASE COUNT  
         $gb->{BASE_COUNT}  

 SEQ  
         $gb->{SEQ}              -sequence data following "ORIGIN" 

=back

=head2 supported methods

=over 2

=item new()

         Creates a G instance.
         First option is the filename of the database. Default format is
         the GenBank database.
         Second option specifies detailed actions.

           'without annotation' option skips the annotation.
           'long sequence'      option uses a pointer of the filehandle 
                                to read the genome sequence. See 
                                next_seq() method below for details.
           'bioperl'            option creates a G instance from a bioperl
                                object. 
                                eg. $bp = $bp->next_seq();       # bioperl
                                    $gb = new G($bp, "bioperl"); # G

               - following options require bioperl installation -

           'Fasta'              option loads a Fasta format database.
           'EMBL'               option loads a EMBL  format database.
           'swiss'              option loads a swiss format database.
           'SCF'                option loads a SCF   format database.
           'PIR'                option loads a PIR   format database.
           'GCG'                option loads a GCG   format database.
           'raw'                option loads a raw   format database.
           'ace'                option loads a ace   format database.

=item complement()

         Given a sequence, returns its complement.
         eg. complement('atgc');  returns 'gcat'

=item translate()

         Given a sequence, returns its translated sequence.
         Regular codon table is used.
         eg. translate('ctggtg'); returns 'LV'

=item $gb->seq_info()

         Prints the basic information of the genome to STDOUT.

=item $gb->DESTROY()

         Destroys the G instance

=item $gb->del_key()

         Given a object, deletes it from the G instance structure
         eg. $gb->del_key('FEATURE1'); deletes 'FEATURE1' hash

=item $gb->getseq()

         Given the start and end positions (starting from 0 as in Perl),
         returns the sequence specified.
         eg. $gb->getseq(1,3); returns the 2nd, 3rd, and 4th nucleotides.

=item $gb->get_gbkseq()

         Given the start and end positions (starting from 1 as in 
         Genbank), returns the sequence specified.
         eg. $gb->get_gbkseq(1,3); returns the 1st, 2nd, and 3rd 
             nucleotides.

=item $gb->get_cdsseq()

         Given a CDS ID, returns the CDS sequence. 
         'complement' is properly parsed.
         eg. $gb->get_cdsseq('CDS1'); returns the 'CDS1' sequence.

=item $gb->get_geneseq()

         Given a CDS ID, returns the CDS sequence, or the exon sequence
         If introns are present.
         'complement' is properly parsed, and introns are spliced out.
         eg. $gb->get_geneseq('CDS1'); returns the 'CDS1' sequence or 
             exon.

=item $gb->feature()

         Returns the array of all feature object name.
         foreach ($gb->feature()){
             $gb->get_cdsseq($_);
         }
         prints all feature sequences.

=item $gb->cds()

         Returns the array of all cds object name.

         !CAUTION! the object name is actually the FEATURE OBJECT NAME,
         to enable access to all feature values. However, most of the
         time you do not need to be aware of this difference.

         foreach ($gb->cds()){
             $gb->get_geneseq($_);
         }
         prints all gene sequences.

=item $gb->startcodon()

         Given a CDS ID, returns the start codon.
         eg. $gb->startcodon('CDS1'); returns 'atg'

=item $gb->stopcodon()

         Given a CDS ID, returns the stop codon.
         eg. $gb->stopcodon('CDS1'); returns 'tag'

=item $gb->before_startcodon()

         Given a CDS ID and length, returns the sequence upstream of 
         start codon.
         eg. $gb->before_startcodon('CDS1', 100); returns 100 bp  
             sequence upstream of the start codon of 'CDS1'.

=item $gb->after_startcodon()

         Given a CDS ID and length, returns the sequence downstream of 
         start codon.
         eg. $gb->after_startcodon('CDS1', 100); returns 100 bp  
             sequence downstream of the start codon of 'CDS1'.

=item $gb->before_stopcodon()

         Given a CDS ID and length, returns the sequence upstream of 
         stop codon.
         eg. $gb->before_stopcodon('CDS1', 100); returns 100 bp  
             sequence upstream of the stop codon of 'CDS1'.

=item $gb->after_stopcodon()

         Given a CDS ID and length, returns the sequence downstream of 
         stop codon.
         eg. $gb->after_stopcodon('CDS1', 100); returns 100 bp  
             sequence downstream of the stop codon of 'CDS1'.

=item $gb->get_intron()

         Given a CDS ID, returns the intron sequences as array of 
         sequences.
         eg. $gb->get_intron('CDS1'); 
             returns ($1st_intron, $2nd_intron,..)

=item $gb->get_exon()

         Given a CDS ID, returns the exon sequence.
         'complement' is properly parsed, and introns are spliced out.
         eg. $gb->get_exon('CDS1'); returns the 'CDS1' exon.

=item $gb->next_locus()

         Reads the next locus.
         the G instance is then updated.

         do{

         }while($gb->next_locus());

         Enables multiple loci analysis.        

=item $gb->next_seq()

         If G instance is created with 'long sequence' option, 
         $gb->next_seq() method replace the next chunk of sequence 
         to $gb->{SEQ}.

         while($gb->next_seq(100000)){
             print $gb->{SEQ};
         }

         Enables continuous analysis.

=item $gb->rewind_genome()

         If G instance is created with 'long sequence' option, 
         $gb->rewind_genome() method puts the filehandle pointer back 
         to the ORIGIN position.

=back

=head1 AUTHOR

Kazuharu Gaou Arakawa, gaou@g-language.org

=head1 SEE ALSO

perl(1).

=cut



