#!/usr/bin/env perl

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2007 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: G.pm,v 1.1 2002/07/30 17:44:27 gaou Exp $
#
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@g-language.org> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#


package G::G;

use strict;

# import prelude extensions
use SubOpt;
use Rcmd;

# import skyline extensions
use G::Messenger;
use G::Inspire;
use G::IO::Bioperl;

# import odyssey classes
use G::Seq::Codon;
use G::Seq::Consensus;
use G::Seq::Eliminate;
use G::Seq::FreeEnergy;
use G::Seq::GCskew;
use G::Seq::Operon;
use G::Seq::OverLapping;
use G::Seq::PatSearch;
use G::Seq::Tandem;
use G::Seq::Util;
use G::Seq::CAI;
use G::Seq::Markov;
use G::Seq::ORF;
use G::Seq::Align;
use G::Tools::Graph;
use G::Tools::H2v;
use G::Tools::Mapping;
use G::Tools::Blast;
use G::Tools::Fasta;
use G::Tools::Alignment;
use G::Tools::HMMER;
use G::Tools::Repeat;
use G::Tools::EPCR;
use G::Tools::SIM4;
use G::Tools::PBS;
use G::Tools::Cap3;
use G::Ecell::Pathway;
use G::Ecell::Reader;
use G::Ecell::KEGG;

# import presage classes
use G::System::BAS;
use G::System::GEMS;
use G::System::COMGA;
use G::System::STeP;
use G::System::CHI;
use G::System::ReL8;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK @INC $AUTOLOAD);

require Exporter;

@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(
	     cds
	     complement
	     del_key
	     feature
	     foreachCDS
	     getseq
	     get_cdsseq
	     get_geneseq
	     get_gbkseq
	     get_intron
	     get_exon
	     hairpin_cut
	     new
	     next_locus
	     next_seq
	     output
	     rewind_genome
	     seq_info
	     startcodon
	     stopcodon
	     translate
	     pos2feature
	     pos2gene
	     gene2id

	     opt_default
	     opt_get
	     opt_val
	     opt_as_gb

	     gcskew
	     genomicskew
	     cum_gcskew
	     genome_map
	     leading_strand
	     rep_ori_ter
	     _find_bad_substance
	     _find_pathway_gap
	     _eri_reader
	     _eri_extracter
	     alignment
	     diffseq
	     maskseq
	     pasteseq
	     codon_counter
	     amino_counter
	     codon_usage
	     _codon_table
	     _codon_amino_printer
	     _codon_usage_printer
	     base_counter
	     _base_printer
	     base_entropy
	     base_information_content
	     base_relative_entropy
	     base_z_value
	     base_individual_information_matrix
	     consensus_z
	     valid_CDS
	     eliminate_atg
	     eliminate_pat
	     RNAfold
	     foreach_RNAfold
	     find_ori_ter
	     gcskew
	     genomicskew
	     gcwin
	     over_lapping_finder
	     _over_lapping_printer
	     oligomer_counter
	     _oligomer_translation
	     find_seq
	     find_tandem
	     foreach_tandem
	     _print_tandem
	     find_king_of_gene
	     atcgcon
	     cds_echo
	     print_gene_function_list
	     molecular_weight
	     _R_base_graph
	     _R_RNA_graph
	     _csv_h2v
	     _h2v
	     _mask_repeat_for_mapping
	     _foreach_mask_repeat_for_mapping
	     _cutquery_for_mapping
	     _blast_db_for_mapping
	     _formatdb_for_mapping
	     _blast_for_mapping
	     _blastpointer_for_mapping
	     _foreach_blastpointer_for_mapping
	     _file_list_for_mapping
	     _jstat_for_mapping
	     _jstat_for_STeP
	     _eri_update_with_kegg
	     _ecell_name2kegg_compound
	     seq2png
	     cai
	     w_value
	     markov
	     _UniUniGrapher
	     palindrome
	     query_strand
	     longest_ORF
	     pseudo_atg
	     seqinfo
	     find_identical_gene
	     _UniMultiGrapher
	     view_cds
	     graphical_LTR_search
	     _blast_tp_finder
	     _blast
	     _fasta
	     _formatdb
	     _clustalw
	     _hmmpfam
	     _repeatmasker
	     _trf
	     _STS_modifer_for_STeP
             _STS_divider_for_STeP
             _ePCR_for_STeP
	     _sts2pg_for_STeP
	     _sim4
	     _cap3
	     sim4_parse
	     fasta_parse
	     blast_parse
	     cap3_parse
	     cluster
	     file_maker
	     redundancy
	     redundancy_fasta
	     redundancy_sim4
	     redundancy_cap3
	     Totals
	     output_maker

	     set_operon
	     
	     BAS_engine
	     GEMS_engine
	     COMGA_engine
	     STeP_engine
	     CHI_engine
	     );

$VERSION = '1.1.0';

#::::::::::::::::::::::::::::::
#          Constants
#::::::::::::::::::::::::::::::

my %CodonTable = (
	       'gac', 'D', 'caa', 'Q', 'gca', 'A', 'ctg', 'L',
	       'gat', 'D', 'cag', 'Q', 'gcc', 'A', 'ctt', 'L',
	       'gaa', 'E', 'agc', 'S', 'gcg', 'A', 'ata', 'I',
	       'gag', 'E', 'agt', 'S', 'gct', 'A', 'atc', 'I',
	       'aga', 'R', 'tca', 'S', 'gga', 'G', 'att', 'I',
	       'agg', 'R', 'tcc', 'S', 'ggc', 'G', 'cca', 'P',
	       'cga', 'R', 'tcg', 'S', 'ggg', 'G', 'ccc', 'P',
	       'cgc', 'R', 'tct', 'S', 'ggt', 'G', 'ccg', 'P',
	       'cgg', 'R', 'aca', 'T', 'gta', 'V', 'cct', 'P',
	       'cgt', 'R', 'acc', 'T', 'gtc', 'V', 'atg', 'M',
	       'aaa', 'K', 'acg', 'T', 'gtg', 'V', 'tgg', 'W',
	       'aag', 'K', 'act', 'T', 'gtt', 'V', 'tgc', 'C',
	       'cac', 'H', 'tac', 'Y', 'tta', 'L', 'tgt', 'C',
	       'cat', 'H', 'tat', 'Y', 'ttg', 'L', 'taa', '/',
	       'aac', 'N', 'ttc', 'F', 'cta', 'L', 'tag', '/',
	       'aat', 'N', 'ttt', 'F', 'ctc', 'L', 'tga', '/'
		  );

#::::::::::::::::::::::::::::::
#          Variables
#::::::::::::::::::::::::::::::

my $outfile = '';
my $loaded = 0;

#::::::::::::::::::::::::::::::
#         IO Methods
#::::::::::::::::::::::::::::::

sub new {
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this = {};
    my $tmp = {};

    bless $this, $pkg;

    $this->loaded_msg() unless ($option eq 'no msg');

    if ($option eq 'Fasta' || $option eq 'EMBL' || $option eq 'swiss' || 
	$option eq 'SCF'   || $option eq 'PIR'  || $option eq 'GCG'   || 
	$option eq 'raw'   || $option eq 'ace'){

	use Bio::SeqIO;

	$outfile = '/tmp/' . time . '.gbk';
	my $in = Bio::SeqIO->newFh(-format => $option, -file => $filename);
	my $out = Bio::SeqIO->newFh(-format => "GenBank", 
				    -file => '>' . $outfile);

	print $out $_ while <$in>;

	open(GENBANK, $outfile) || die("Error at G.pm: $!\n");
	$this->read_locus("no msg");
	$this->read_header();
	$this->read_features();
	$this->getnucs();
	$this->seq_info();
    }elsif($option eq 'net GenBank'){
	use Bio::DB::GenBank;

	my $db = new Bio::DB::GenBank();
	my $bp;

	eval {
	    $bp = $db->get_Seq_by_acc($filename);
	};
	die("$@ Could not retrieve $filename\n") if $@;

        G::IO::Bioperl::convert($bp, $this);
	$this->seq_info();
    }elsif($option eq 'without annotation'){
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->goto_origin();
	$this->getnucs();
	$this->seq_info();
    }elsif($option eq 'long sequence'){
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->read_locus();
	$this->read_header();
	$this->read_features();
	$this->{origin} = tell GENBANK;
    }elsif($option eq 'multiple locus'){
	$tmp = new G ($filename, "no msg");
	$this->multi_locus($tmp);
	$this->seq_info();
    }elsif($option eq 'longest ORF annotation'){
	$tmp = new G ($filename, "no msg");
	$this->annotate_with_LORF($tmp);
	$this->seq_info();
    }elsif($option eq 'glimmer annotation'){
	$this->run_glimmer($filename);
	$this->annotate_with_glimmer($filename);
	$this->seq_info();
    }elsif($option eq 'bioperl'){
        G::IO::Bioperl::convert($filename, $this);
	$this->seq_info();
    }elsif($option ne '' && $option ne 'no msg'){
        die ("Error at G.pm: Unknown option\n");
    }else{
	open(GENBANK, $filename) || die("Error at G.pm: $!\n");
	$this->read_locus();
	$this->read_header();
	$this->read_features();
	$this->getnucs();
	$this->seq_info() unless ($option eq 'no msg');
    }

    return $this;
}

sub next_seq{
    my $this = shift;
    my $len = '100';
    my $opt = shift;
    my $char = '1';
    $len = $opt if ($opt);
    $this->{SEQ} = '';

    while($len > 0 && $char ne ''){
	$char = getc(GENBANK);
	next unless ($char =~ /[a-zA-Z]/);
	$len --;
	$this->{SEQ} .= $char;
    }
    $this->{position} = tell GENBANK;

    return $char;
}

sub rewind_genome{
    my $this = shift;
    seek GENBANK, $this->{origin}, 0;
    return 1;
}

sub next_locus{
    my $this = shift;
    my $msg = shift;
    undef %{$this};
    $this->read_locus($msg);
    $this->read_header();
    $this->read_features();
    $this->getnucs();

    if (length($this->{SEQ}) > 0){
	return 1;
    }else{
	return 0;
    }
}

sub goto_features{
    my $this = shift;
    while(<GENBANK>){
	last if (/^FEATURES/);
    }
}

sub goto_origin{
    my $this = shift;
    while(<GENBANK>){
	last if (/^ORIGIN/);
    }
}

sub read_locus{
    my $this = shift;
    my $msg = shift;
    my $tmp = '';
    local($_);

    while(<GENBANK>){
	next unless (/LOCUS/);

	split;
	shift;
	if ($#_ == 6){
	    $this->{"LOCUS"}->{"circular"} = 1;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     $tmp,
	     $this->{"LOCUS"}->{"nucleotide"},
	     $tmp,
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @_;
	}elsif ($#_ == 5){
	    $this->{"LOCUS"}->{"circular"} = 0;
	    ($this->{"LOCUS"}->{"id"},
	     $this->{"LOCUS"}->{"length"},
	     $tmp,
	     $this->{"LOCUS"}->{"nucleotide"},
	     $this->{"LOCUS"}->{"type"},
	     $this->{"LOCUS"}->{"date"}) = @_;
	}else{
	    msg_error("ERROR: Unknown LOCUS definition\n") if ($msg ne 'no msg');
	    $this->{"LOCUS"}->{"circular"} = 0;
	    $this->{"LOCUS"}->{"id"} = join('', @_);
	}
	last;
    }
}
	
sub read_header{
    my $this = shift;
    my $line = '';

    while($line = <GENBANK>){
	if ($line =~ /^COMMENT/){
	    s/COMMENT     //g;
	    while($line = <GENBANK>){
		last if ($line =~ /^FEATURES/);
		last unless (substr($line, 0, 1) eq ' ');
		$line =~ s/  +//g;
		$this->{"COMMENT"} .= $line;
	    }
	}
	last if ($line =~ /^FEATURES/);
	$this->{HEADER} .= $line;
    }
}

sub read_features{
    local($_);
    my $this = shift;
    my $num = -1;
    my $cds = 0;
    $this->{"CDS0"}->{dummy} = 1;

    while(<GENBANK>){
	if (/^BASE COUNT/){
	    s/BASE COUNT //g;
	    $this->{"BASE_COUNT"} = $_;
	}elsif (/^ORIGIN/){
	    last;
	}elsif (/^ {5}(\S+)\s+(.*)$/ && $_ !~ /\//){
	    my $key = $1;
	    my $feature = $2;

	    $num ++;
	    $this->{"FEATURE$num"}->{"feature"} = $num;
	    $this->{"FEATURE$num"}->{"type"} = $1;
	    $this->{"FEATURE$num"}->{"on"} = 1;
	    
	    if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		$cds ++;
		$this->{"CDS$cds"}->{"on"} = 1;
		$this->{"FEATURE$num"}->{"cds"} = $cds;
	    }

	    s/\. \./\.\./g; #for (1. .2) type irregular format
	    s/\^/\.\./g;    #for (1^2) type irregular format

	    my $part_left = tr/\<//d;  #for (<1..2) type irregular format
	    my $part_right = tr/\>//d; #for (1..2>) type irregular format
	    $this->{"FEATURE$num"}->{"partial"} = "$part_left $part_right";

	    if (/join/){
		if (/complement\(join/){
		    $this->{"FEATURE$num"}->{"direction"} = "complement";
		    $this->{"CDS$cds"}->{"direction"} = "complement";
		    s/complement//;
		}else{
		    $this->{"FEATURE$num"}->{"direction"} = "direct";
		    $this->{"CDS$cds"}->{"direction"} = "direct";
		}

		my $line = $_;
		my $fth = '';
		my $linenum = tell GENBANK;

		while($line !~ /\//){
		    $fth .= $line;
		    $line = <GENBANK>;
		    $linenum = tell GENBANK;
		    $linenum -= length($_);
		}
		seek GENBANK, $linenum, 0;

		substr($fth, 0, 19) = '';
		$fth =~ s/join//g;
		$fth =~ s/\(//g;
		$fth =~ s/\)//g;
		$fth =~ s/ //g;
		$fth =~ s/\n//g;
		$fth =~ s/complement/c/g;

		my $tmpfth = $fth;
		$tmpfth =~ s/c//g;
		my @choparray = split(/\.\./, $tmpfth);
		$this->{"FEATURE$num"}->{"start"} = shift @choparray;
		$this->{"CDS$cds"}->{"start"} = $this->{"FEATURE$num"}->{"start"};
		$this->{"FEATURE$num"}->{"end"} = pop @choparray;
		$this->{"CDS$cds"}->{"end"} = $this->{"FEATURE$num"}->{"end"};
		$this->{"FEATURE$num"}->{"join"} = $fth;
		$this->{"CDS$cds"}->{"join"} = $fth;
		$this->{"CDS$cds"}->{"feature"} = $num;

		if ($line =~ /\:/){
		    $this->{"FEATURE$num"}->{"partial"} = $line;
		    $this->{"CDS$cds"}->{"partial"} = $line;
		}
	    }elsif (/\?/){
		$this->{"FEATURE$num"}->{"type"} = "partial_$key";
		$this->{"FEATURE$num"}->{"partial"} = $feature;
		msg_error("Partial feature: $feature\n");
	    }elsif (/complement\((\d+)\.\.(\d+)\)/){
		$this->{"FEATURE$num"}->{"direction"} = "complement";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $2;
		if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		    $this->{"CDS$cds"}->{"direction"} = "complement";
		    $this->{"CDS$cds"}->{"start"} = $1;
		    $this->{"CDS$cds"}->{"end"} = $2;
		    $this->{"CDS$cds"}->{"feature"} = $num;
		}
	    }elsif (/(\d+)\.\.(\d+)/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $2;
		if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
		    $this->{"CDS$cds"}->{"direction"} = "direct";
		    $this->{"CDS$cds"}->{"start"} = $1;
		    $this->{"CDS$cds"}->{"end"} = $2;
		    $this->{"CDS$cds"}->{"feature"} = $num;
		}
	    }elsif (/\s+complement\((\d+)\)/){
		$this->{"FEATURE$num"}->{"direction"} = "complement";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $1;
	    }elsif (/\s+(\d+)/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $1;
	    }elsif (/replace\((\d+)\,\"/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $1;
		$this->{"FEATURE$num"}->{"partial"} = $_;
	    }elsif (/(\d+).*\.\..*(\d+)/){
		$this->{"FEATURE$num"}->{"direction"} = "direct";
		$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
		$this->{"FEATURE$num"}->{"start"} = $1;
		$this->{"FEATURE$num"}->{"end"} = $2;
		$this->{"FEATURE$num"}->{"partial"} = $_;
	    }else{
		msg_error("Irregular location feature: $key $feature\n");
	    }

	}else{
	    if (/\/(\w+)=\"([^\"]+)\"/){
		$this->{"FEATURE$num"}->{"$1"} = $2;
	    }elsif (/\/(\w+)=\"([^\"]+)/){
		my $tag = $1;
		my $tmp = $2;
		my $line;
		while(<GENBANK>){
		    if (!/\"/){
			$tmp .= $_;
		    }elsif (/([^\"]+)\"/){
			$tmp .= $1;
			last;
		    }
		}
		$tmp =~ s/\s+/ /g;
		$tmp =~ s/ //g if ($tag eq 'translation');
		$this->{"FEATURE$num"}->{$tag} = $tmp;
	    }elsif (/\/(\w+)=([\d|\d+])/){
		$this->{"FEATURE$num"}->{$1} = $2;
	    }
	}
    }


}

sub getnucs {
    my $this = shift;

    while(<GENBANK>){
	last if (/\/\//);
	s/[^A-Za-z]//g;
	$this->{"SEQ"} .= $_;
    }

}

sub multi_locus{
    my $this = shift;
    my $gb = shift;
    my $lng;
    my $i = 1;
    my $f = 1;
    my $c = 1;

    do{
        my $F = 1;
        my $C = 1;
        $lng = length($this->{"SEQ"});
        $this->{"LOCUS$i"} = $gb->{"LOCUS"};
        $this->{"HEADER$i"} = $gb->{"HEADER"};
        $this->{"COMMENT$i"} = $gb->{"COMMENT"};

        while(defined (%{$gb->{"FEATURE$F"}})){
            $this->{"FEATURE$f"}            = $gb->{"FEATURE$F"};
            $this->{"FEATURE$f"}->{"start"} = $gb->{"FEATURE$F"}->{"start"} + $lng;
            $this->{"FEATURE$f"}->{"end"}   = $gb->{"FEATURE$F"}->{"end"} + $lng;
            $this->{"FEATURE$f"}->{"locus"} = $i;

	    if($gb->{"FEATURE$F"}->{"type"} eq "CDS"){
                $this->{"CDS$c"}              = $gb->{"CDS$C"};
                $this->{"CDS$c"}->{"start"}   = $gb->{"CDS$C"}->{"start"} + $lng;
                $this->{"CDS$c"}->{"end"}     = $gb->{"CDS$C"}->{"end"} + $lng;
                $this->{"CDS$c"}->{"feature"} = $f;
                $this->{"CDS$c"}->{"locus"}   = $i;


                if(defined $gb->{"CDS$C"}->{"join"}){
                    my @join = split(/\,/,$gb->{"CDS$C"}->{"join"});
		    my @num = ();
		    my @new_join = ();

                    foreach(@join){
                        if(tr/c/c/){
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng));
                        } else {
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("%d\.\.%d",  $num[0] + $lng, $num[1] + $lng));
                        }
                    }
                    $this->{"CDS$c"}->{join} = join(',', @new_join);
                }
		$this->{"FEATURE$f"}->{"cds"} = $c;
                $c++;
                $C++;
            }
            $f++;
            $F++;
        }
        $this->{"SEQ"} .= $gb->{"SEQ"};
        $i++;

    }while($gb->next_locus());

    $this->{"FEATURE0"}->{"type"} = "source";
    $this->{"FEATURE0"}->{"start"} = 1;
    $this->{"FEATURE0"}->{"end"} = length($this->{"SEQ"});
    $this->{"LOCUS"} = $this->{"LOCUS1"};
    $this->{"HEADER"} = $this->{"HEADER1"};
    $this->{"COMMENT"} = $this->{"COMMENT1"};
    $this->{"CDS0"}->{dummy} = 1;
}


sub run_glimmer {
    my $this = shift;
    my $file = shift;

    system("./run-glimmer2 $file");
}


sub annotate_with_glimmer {
    my $this = shift;
    my $file = shift;
    
    open (FASTA, $file);
    while(<FASTA>){
	if (/^\>/){
	    s/\>//;
	    split;
	    $this->{LOCUS}->{id} = $_[0];
	    next;
	}else{
	    s/[^a-zA-Z]//g;
	    $this->{SEQ} .= lc($_);
	}
    }
    close(FASTA);

    $this->{"CDS0"}->{dummy} = 1;
    $this->{"FEATURE0"}->{dummy} = 1;

    my $count = 0;
    open (GLIMMER, 'tmp.coord');
    while(<GLIMMER>){
	$count ++;
	my @line = split;

	$this->{"CDS$count"}->{feature} = $count;
	$this->{"FEATURE$count"}->{feature} = $count;

	if ($line[1] > $line[2]){
	    $this->{"CDS$count"}->{start} = $line[2];
	    $this->{"CDS$count"}->{end} = $line[1];
	    $this->{"CDS$count"}->{direction} = "complement";
	    $this->{"FEATURE$count"}->{start} = $line[2];
	    $this->{"FEATURE$count"}->{end} = $line[1];
	    $this->{"FEATURE$count"}->{direction} = "complement";
	}else{
	    $this->{"CDS$count"}->{start} = $line[1];
	    $this->{"CDS$count"}->{end} = $line[2];
	    $this->{"CDS$count"}->{direction} = "direct";
	    $this->{"FEATURE$count"}->{start} = $line[1];
	    $this->{"FEATURE$count"}->{end} = $line[2];
	    $this->{"FEATURE$count"}->{direction} = "direct";
	}
    }
    close(GLIMMER);
}



sub annotate_with_LORF {
    my $this = shift;
    my $gb = shift;
    my $seq = $gb->{SEQ};
    my ($start,$end,$i);
    my $count = 0;

    $this->{"LOCUS"} = $gb->{"LOCUS"};
    $this->{"HEADER"} = $gb->{"HEADER"};
    $this->{"COMMENT"} = $gb->{"COMMENT"};
    $this->{"CDS0"}->{dummy} = 1;
    $this->{"FEATURE0"} = $gb->{"FEATURE0"};
    $this->{"SEQ"} = $seq;

    for ($i = 0; $i <= 1; $i ++){
	$seq = complement($gb->{SEQ}) if ($i);
	$start = 0;
	$end = 0;
	
	while(0 <= ($start = index($seq, 'atg', $start + 1))){
	    next if ($start < $end && ($start - $end + 1) % 3 == 0);
	    my $tmp = $start;
	    my $stopcodon = '';
	    while(0 <= ($tmp = index($seq, 'tag', $tmp +1))){
		if (($tmp - $start + 1) % 3 == 0 && $tmp - $start > 49){
		    $count ++;
		    $end = $tmp;
		    $stopcodon = 'tag';
		    last;
		}
	    }
	    my $tmp = $start;
	    while(0 <= ($tmp = index($seq, 'taa', $tmp +1))){
		if (($tmp - $start + 1) % 3 == 0 && $tmp - $start > 49){
		    if ($tmp < $end){
			$count ++;
			$end = $tmp;
			$stopcodon = 'taa';
			last;
		    }else{
			last;
		    }
		}
	    }
	    my $tmp = $start;
	    while(0 <= ($tmp = index($seq, 'tga', $tmp +1))){
		if (($tmp - $start + 1) % 3 == 0 && $tmp - $start > 49){
		    if ($tmp < $end){
			$count ++;
			$end = $tmp;
			$stopcodon = 'tga';
			last;
		    }else{
			last;
		    }
		}
	    }
	    if ($i){
		if ($end > 0 && ($end - $start + 1) / 3 > opt::val("length")){
		    $this->{"CDS$count"}->{start} = length($gb->{SEQ}) - $end + 1;
		    $this->{"CDS$count"}->{end} = length($gb->{SEQ}) - $start + 1;
		    $this->{"CDS$count"}->{feature} = $count;
		    $this->{"CDS$count"}->{direction} = "complement";

		    $this->{"FEATURE$count"}->{type} = "CDS";
		    $this->{"FEATURE$count"}->{start} = length($gb->{SEQ}) - $end + 1;
		    $this->{"FEATURE$count"}->{end} = length($gb->{SEQ}) - $start + 1;
		    $this->{"FEATURE$count"}->{feature} = $count;
		    $this->{"FEATURE$count"}->{direction} = "complement";
		}
	    }else{
		if ($end > 0 && ($end - $start + 1) / 3 > opt::val("length")){
		    $this->{"CDS$count"}->{start} = $start + 1;
		    $this->{"CDS$count"}->{end} = $end + 1;
		    $this->{"CDS$count"}->{feature} = $count;
		    $this->{"CDS$count"}->{direction} = "direct";

		    $this->{"FEATURE$count"}->{type} = "CDS";
		    $this->{"FEATURE$count"}->{start} = $start + 1;
		    $this->{"FEATURE$count"}->{end} = $end + 1;
		    $this->{"FEATURE$count"}->{feature} = $count;
		    $this->{"FEATURE$count"}->{direction} = "direct";
		}
	    }	
	}
    }
}


sub loaded_msg {
    my $this = shift;

    $loaded ++;
    return if ($loaded > 1);

    my $print =
	qq(
	     __/__/__/__/__/__/__/__/__/__/__/__/__/
                
             G-language Genome Analysis Environment

	      Version: $VERSION
	      Lisence: GPL
 
	      Copyright (C) 2001-2002 
              G-language Project
	      Institute for Advanced Biosciences,
	      Keio University, JAPAN 

          	 http://www.g-language.org/

	     __/__/__/__/__/__/__/__/__/__/__/__/__/
	   \n);

    &msg_error($print);
}

#::::::::::::::::::::::::::::::
#        Native Methods
#::::::::::::::::::::::::::::::

sub getseq {
    my $this = shift;
    my $start = shift;
    my $end = shift;

    my $seq = substr($this->{SEQ}, $start, $end-$start+1);
    
    return $seq;
}

sub get_gbkseq {
    my $this = shift;
    my $start = shift;
    my $end = shift;

    my $seq = substr($this->{SEQ}, $start -1, $end-$start+1);

    return $seq;
}

sub get_cdsseq {
    my $this = shift;
    my $object = shift;

    my $cdsseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    $cdsseq = &complement($cdsseq) 
	if ($this->{$object}->{direction} eq 'complement');

    return $cdsseq;
}

sub before_startcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{end}, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{start} - 1 - $length, 
		   $length);
    }
}

sub after_startcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{end} - 1 - 3 - $length + 1, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{start} + 3 - 1, 
		   $length);
    }
}

sub before_stopcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{start} + 3 - 1, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{end} - 3 - 1 - $length + 1, 
		   $length);
    }
}

sub after_stopcodon {
    my $this = shift;
    my $object = shift;
    my $length = shift;

    if ($this->{$object}->{direction} eq 'complement'){
	return 
	    complement(substr($this->{SEQ}, 
			      $this->{$object}->{start} - 1 - $length, 
			      $length));
    }else{
	return 
	    substr($this->{SEQ}, 
		   $this->{$object}->{end} +1 - 1, 
		   $length);
    }
}

sub startcodon {
    my $this = shift;
    my $object = shift;

    return substr($this->get_geneseq($object), 0, 3);
}

sub stopcodon {
    my $this = shift;
    my $object = shift;

    return substr($this->get_geneseq($object), -3, 3);
}

sub pos2feature {
    my $this = shift;
    my $pos = shift;

    foreach my $feat ($this->feature()){
	next if ($feat eq 'FEATURE0');

	if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
	    return $feat;
	}elsif ($pos < $this->{$feat}->{start}){
	    return '';
	}
    }
}

sub gene2id {
    my $this = shift;
    my $gene = shift;

    foreach my $feat ($this->cds()){
	return $feat if ($this->{$feat}->{gene} eq $gene);
    }
}

sub pos2gene {
    my $this = shift;
    my $pos = shift;

    foreach my $feat ($this->cds()){
	if ($pos >= $this->{$feat}->{start} && $pos <= $this->{$feat}->{end}){
	    return $feat;
	}elsif ($pos < $this->{$feat}->{start}){
	    return '';
	}
    }
}

sub get_geneseq {
    my $this = shift;
    my $object = shift;

    my $geneseq = $this->get_gbkseq($this->{$object}->{start}, 
				   $this->{$object}->{end});
    if ($this->{$object}->{join}){
	$geneseq = $this->get_exon($object);
    }elsif ($this->{$object}->{direction} eq 'complement'){
	$geneseq = &complement($geneseq);
    }

    return $geneseq;
}

sub get_intron {
    my $this = shift;
    my $cds = shift;

    unless ($this->{$cds}->{join}){
	return -1;
	last;
    }
    my @join = split(/\.\./, $this->{$cds}->{join});
    shift @join;
    pop @join;
    my @seq;
    my $line;

    foreach $line (@join){
	$line =~ s/c//g;
	my ($start, $end) = split(/,/, $line, 2);
	my $tmp = $this->get_gbkseq($start + 1, $end - 1);
	push (@seq, $tmp);
    }

    return @seq;
}

sub get_exon {
    my $this = shift;
    my $cds = shift;

    unless ($this->{$cds}->{join}){
	return -1;
	last;
    }
    my @join = split(/,/, $this->{$cds}->{join});
    my $seq = '';
    my $line;

    foreach $line (@join){
	my $complement = $line =~ tr/c//d;
	my ($start, $end) = split(/\.\./, $line, 2);
	my $tmp = $this->get_gbkseq($start, $end);
	$tmp = complement($tmp) if ($complement);
	$seq .= $tmp;
    }

    $seq = complement($seq) if ($this->{$cds}->{direction} eq 'complement');
    return $seq;
}

sub seq_info {
    my $this = shift;
    my $length = length($this->{SEQ});

    my $a = $this->{SEQ} =~ tr/a/a/;
    my $t = $this->{SEQ} =~ tr/t/t/;
    my $g = $this->{SEQ} =~ tr/g/g/;
    my $c = $this->{SEQ} =~ tr/c/c/;
    my $others = $length - $a - $t - $g - $c;
    my $msg;

    $msg .= sprintf "\n\nAccession Number: %s\n", $this->{LOCUS}->{id};
    $msg .= sprintf "\n  Length of Sequence : %9d\n" , $length;
    $msg .= sprintf "           A Content : %9d (%.2f\%)\n" , 
    $a , $a / $length * 100;
    $msg .= sprintf "           T Content : %9d (%.2f\%)\n" , 
    $t , $t / $length * 100;
    $msg .= sprintf "           G Content : %9d (%.2f\%)\n" , 
    $g , $g / $length * 100;
    $msg .= sprintf "           C Content : %9d (%.2f\%)\n" , 
    $c , $c / $length * 100;
    $msg .= sprintf "              Others : %9d (%.2f\%)\n" , 
    $others,  $others / $length * 100;
    $msg .= sprintf "          AT Content :    %.2f\%\n", 
    ($a + $t) / $length * 100;
    $msg .= sprintf "          GC Content :    %.2f\%\n\n", 
    ($g + $c) / $length * 100;

    &msg_send($msg);

    return ($a, $t, $g, $c);
}

sub feature {
    my $this = shift;
    my $opt = shift;
    my $i = 0;
    my @feature;

    while(defined(%{$this->{"FEATURE$i"}})){
	if ($opt ne 'all'){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	}
	push (@feature, "FEATURE$i");
	$i ++;
    }

    return @feature;
}

sub cds {
    my $this = shift;
    my $opt = shift;
    my $i = 0;
    my @cds;

    while(defined(%{$this->{"FEATURE$i"}})){
	$i ++;
	next if ($this->{"FEATURE$i"}->{type} ne 'CDS');

	if ($opt ne 'all'){
	    next if ($this->{"FEATURE$i"}->{on} == 0);
	    my $id = $this->{"FEATURE$i"}->{cds};
	    next if ($this->{"CDS$id"}->{on} == 0);
	}

	push (@cds, "FEATURE$i");
    }

    return @cds;
}
	  

sub foreachCDS {
    my $this = shift;
    my $sub = shift;
    my $i = 0;
    local($_);

    while(defined(%{$this->{"FEATURE$i"}})){
	$i ++;
	next unless ($this->{"FEATURE$i"}->{type} eq 'CDS');
	my $start = $this->{"FEATURE$i"}->{start};
	my $end = $this->{"FEATURE$i"}->{end};
	$_ = $this->getseq($start, $end);
	$_ = &complement($_) if ($this->{"FEATURE$i"}->{direction} eq 'complement');
	&$sub();
    }
}

sub hairpin_cut {
    my $nuc = shift;
    my $val = "\n==============\n!!!Afro Man!!!===============\n\n";

    system('netscape http://www2.osk.3web.ne.jp/~e916/');

    return $val;
}

sub complement {
    my $nuc = reverse(shift);
    
    $nuc =~ tr
	[acgturymkdhbvwsnACGTURYMKDHBVWSN]
	[tgcaayrkmhdvbwsnTGCAAYRKMHDVBWSN];
    return $nuc;
}

sub translate {
    my $seq = lc(shift);
    my $amino = '';

    while(3 <= length($seq)){
	my $codon = substr($seq, 0, 3);
	substr($seq, 0, 3) = '';
	if ($codon =~ /[^atgc]/){
	    $amino .= '?';
	}else{
	    $amino .= $CodonTable{$codon};
	}
    }
    if(length($seq)){
	msg_error("Translation: illegal length.\n");
    }

    return $amino;
}

sub make_gb {
    my $gb = shift;
    my $file = shift;
    my $output = shift;
    my ($type, $dummy, $i, $p, $q, $z, $lng);
    
    if ($output eq "attach"){
        open(OUT, '>>' . $file) || die($!);
    }else {
        open(OUT, '>' . $file) || die("hoge", $!);
    }

    if($gb->{LOCUS}->{circular} eq "1"){
	$type = "circular";
    }else{
        $type = "linear";
    }

    my @locus = ("LOCUS", $gb->{LOCUS}->{id}, "$gb->{LOCUS}->{length}bp", 
		 $gb->{LOCUS}->{nucleotide}, $type, $gb->{LOCUS}->{type}, 
		 $gb->{LOCUS}->{date});

    printf OUT "%-11.11s %-10.10s %-12.12s %-5.5s %-9.9s %-9.9s%-10.10s\n",@locus;
    printf OUT "$gb->{HEADER}$gb->{COMMENT}FEATURES %11sLocation/Qualifiers\n";

    foreach my $feat ($gb->feature()){
	if($gb->{$feat}->{"direction"} eq "direct"){
            if($gb->{$feat}->{join}){
		my $join = "join"."(".$gb->{$feat}->{join}.")";
		my $position = rindex($join,',',58);
		for($z = 0; $z <= length($join); $z += 58){
                    my $join_cut = substr($join,$z,58);
                    if($z == 0){
			printf OUT "%-4.4s %-15.15s %-58.58s\n","$dummy","$gb->{$feat}->{type}","$join_cut";
                    }else{
                        printf OUT "%-20.20s %-58.58s\n","$dummy","$join_cut";
                    }
		}
            }else{
                my @partial = split(/ /,$gb->{$feat}->{"partial"});
                if($partial[1] == 1){
                    printf OUT "%-4.4s %-15.15s %-58.58s\n",
			    "$dummy","$gb->{$feat}->{type}",
			    "$gb->{$feat}->{start}..>$gb->{$feat}->{end}";
                }elsif($partial[0] == 1){
                    printf OUT "%-4.4s %-15.15s %-58.58s\n",
			    "$dummy","$gb->{$feat}->{type}",
			    "<$gb->{$feat}->{start}..$gb->{$feat}->{end}";
                }else{
                    printf OUT "%-4.4s %-15.15s %-58.58s\n","$dummy",
			    "$gb->{$feat}->{type}",
			    "$gb->{$feat}->{start}..$gb->{$feat}->{end}";
		}
            }
	}elsif($gb->{$feat}->{"direction"} ne "direct"){
            if($gb->{$feat}->{join}){
		my $join = $gb->{$feat}->{"direction"}."("."join"."(".$gb->{$feat}->{join}.")".")";
		
                for($z = 0; $z < length($join); $z += 58){
                    my $join_cut = substr($join,$z,58);
                    if($z == 0){
                        printf OUT "%-4.4s %-15.15s %-58.58s\n",
			       "$dummy","$gb->{$feat}->{type}","$join_cut";
                    }else{
                        printf OUT "%-20.20s %-58.58s\n","$dummy","$join_cut";
		    }
		}
            }else{
                printf OUT "%-4.4s %-15.15s %-58.58s\n",
			"$dummy","$gb->{$feat}->{type}",
			"$gb->{$feat}->{direction}($gb->{$feat}->{start}..$gb->{$feat}->{end})";
            }
	}

        foreach (keys(%{$gb->{$feat}})){
            next if($_ eq "on" || $_ eq "partial" || $_ eq "start" || $_ eq "end" 
	       || $_ eq "feature" || $_ eq "type" || $_ eq "direction" 
	       || $_ eq "join");

	    my $str = "/".$_."="."\"".$gb->{$feat}->{$_}."\"";
	    $lng = length($str);
	    if($lng > 61 ){
		for($i = 0;$i < $lng;$i += 58){
		    my $pr = substr($str,$i,58);
		    printf OUT "%-20.20s %-58.58s\n","$dummy",$pr;
		}
	    }else{
		printf OUT "%-20.20s %-58.58s\n","$dummy",$str;
	    }
	}
    }

    print OUT "BASE COUNT $gb->{BASE_COUNT}\n";
    print OUT "ORIGIN\n";
    for($p = 0;$p<=length($gb->{SEQ});$p += 60){
	my $seq_prt = "";
        my $seq = substr($gb->{SEQ},$p,60);
	for($q = 0;$q<=60;$q += 10){
            my $seq_splt = substr($seq,$q,10);
            $seq_prt .= $seq_splt." ";
        }
        printf OUT "%9.9s %-66.66s\n",$p+1,"$seq_prt";
    }
    print OUT "//\n";

    close(OUT);

    return 1;
}

sub output {
    my $gb = shift;
    my $file = shift;
    my $option = shift;

    if ($option eq 'GenBank' || length($option) < 1){
	&make_gb($gb, $file);
    }elsif($option eq 'Fasta' || $option eq 'EMBL' || $option eq 'swiss' || 
	$option eq 'SCF'   || $option eq 'PIR'  || $option eq 'GCG'   || 
	$option eq 'raw'   || $option eq 'ace'){

	use Bio::SeqIO;

	my $outfile = '/tmp/' . time . '.gbk';
	&make_gb($gb, $outfile);
	my $in = Bio::SeqIO->newFh(-format => "GenBank", -file => $outfile);
	my $out = Bio::SeqIO->newFh(-format => "$option", 
				    -file => '>' . $file);

	print $out $_ while <$in>;
    }else{
	&msg_error("G::output - Unknown format to output.");
    }
}

sub del_key {
    my $this = shift;
    my $key = shift;

    delete $this->{$key};

    return 1;
}

sub DESTROY {
    my $self = shift;

    undef %{$self};
    close(GENBANK);   
    unlink($outfile);
}



#::::::::::::::::::::::::::::::
#          Perldoc
#::::::::::::::::::::::::::::::

1;

__END__

=head1 NAME

G - G-language Genome Analysis Environment Version 1.x core module (Prelude)

=head1 SYNOPSIS

 use G;                          # Imports G-language GAE module 
   
 $gb = new G("ecoli.gbk");       # Creates G's instance at $gb 
                                 # At the same time, read in ecoli.gbk. 
                                 # Read the annotation and sequence 
                                 # information 
                                 # See DESCRIPTION for details
   
 $gb->seq_info();                # Prints the basic sequence information.

 $find_ori_ter(\$gb->{SEQ});     # Gives sequence as a reference to
                                 # odyssey functions

=head1 DESCRIPTION

 The Prelude Core of G-language GAE fully supports most sequence databases.

=head2 stored annotation information:

=over 4   

 LOCUS  
         $gb->{LOCUS}->{id}              -accession number 
         $gb->{LOCUS}->{length}          -length of sequence  
         $gb->{LOCUS}->{nucleotide}      -type of sequence ex. DNA, RNA  
         $gb->{LOCUS}->{circular}        -1 when the genome is circular.
                                          otherwise 0
         $gb->{LOCUS}->{type}            -type of species ex. BCT, CON  
         $gb->{LOCUS}->{date}            -date of accession 

 HEADER  
    $gb->{HEADER}  

 COMMENT  
    $gb->{COMMENT}  

 FEATURE  
         Each FEATURE is numbered(FEATURE1 .. FEATURE1172), and is a 
         hash structure that contains all the keys of Genbank.   
         In other words,  in most cases, FEATURE$i's hash at least 
         contains informations listed below: 
         $gb->{FEATURE$i}->{start}  
         $gb->{FEATURE$i}->{end}  
         $gb->{FEATURE$i}->{direction}
         $gb->{FEATURE$i}->{join}
         $gb->{FEATURE$i}->{note}  
         $gb->{FEATURE$i}->{type}        -CDS,gene,RNA,etc.

         To analyze each FEATURE, write: 

         $i = 1;  
         while(defined(%{$gb->{FEATURE$i}})){  
   
                 $i ++;  
         }  

         Each CDS is stored in a similar manner.
         There are 
         $gb->{CDS$i}->{start}
         $gb->{CDS$i}->{end}
         $gb->{CDS$i}->{direction}
         $gb->{CDS$i}->{join}
         $gb->{CDS$i}->{feature}         -number $n for $gb->{FEATURE$n}
                                          where "CDS$i" = "FEATURE$n"

         In the same manner, to analyze all CDS, write:  
   
         $i = 1;  
         while(defined(%{$gb->{CDS$i}})){  
   
                 $i ++;  
         }

 BASE COUNT  
         $gb->{BASE_COUNT}  

 SEQ  
         $gb->{SEQ}              -sequence data following "ORIGIN" 

=back

=head2 supported methods of Prelude

=over 2

=item new()

         Creates a G instance.
         First option is the filename of the database. Default format is
         the GenBank database.
         Second option specifies detailed actions.

           'without annotation'      this option skips the annotation.

           'multiple locus'          this option merges multiple loci in the 
                                     database and load the information
                                     as G-language instance.

           'long sequence'           this option uses a pointer of the filehandle 
                                     to read the genome sequence. See 
                                     next_seq() method below for details.

           'bioperl'                 this option creates a G instance from 
                                     a bioperl object. 
                                     eg. $bp = $bp->next_seq();       # bioperl
                                         $gb = new G($bp, "bioperl"); # G

           'longest ORF annotation'  this option predicts genes with longest ORF
                                     algorithm (longest frame from start codon
                                     to stop codon, with more than 17 amino 
                                     acids) and annotates the sequence.

           'glimmer annotation'      this option predicts genes using glimmer2,
                                     a gene prediction software for microbial
                                     genomes available from TIGR.
                                     http://www.tigr.org/softlab/
                                     Local installation of glimmer2 and setting
                                     of PATH environment value is required.

               - following options require bioperl installation -

           'Fasta'              this option loads a Fasta format database.
           'EMBL'               this option loads a EMBL  format database.
           'swiss'              this option loads a swiss format database.
           'SCF'                this option loads a SCF   format database.
           'PIR'                this option loads a PIR   format database.
           'GCG'                this option loads a GCG   format database.
           'raw'                this option loads a raw   format database.
           'ace'                this option loads a ace   format database.
           'net GenBank'        this option loads a GenBank format database from 
                                NCBI database. With this option, the first value to 
                                pass to new() function will be the accession 
                                number of the database.

=item output()

         Given a filename and an option, outputs the G-language data object 
         to the specified file in a flat-file database of a given format.
         The options are the same as those of new().  Default format is 'GenBank'.
         eg. $gb->output("my_genome.embl", "EMBL");
             $gb->output("my_genome.gbk"); # with GenBank you can ommit the option.

=item complement()

         Given a sequence, returns its complement.
         eg. complement('atgc');  returns 'gcat'

=item translate()

         Given a sequence, returns its translated sequence.
         Regular codon table is used.
         eg. translate('ctggtg'); returns 'LV'

=item $gb->seq_info()

         Prints the basic information of the genome to STDOUT.

=item $gb->DESTROY()

         Destroys the G instance

=item $gb->del_key()

         Given a object, deletes it from the G instance structure
         eg. $gb->del_key('FEATURE1'); deletes 'FEATURE1' hash

=item $gb->getseq()

         Given the start and end positions (starting from 0 as in Perl),
         returns the sequence specified.
         eg. $gb->getseq(1,3); returns the 2nd, 3rd, and 4th nucleotides.

=item $gb->get_gbkseq()

         Given the start and end positions (starting from 1 as in 
         Genbank), returns the sequence specified.
         eg. $gb->get_gbkseq(1,3); returns the 1st, 2nd, and 3rd 
             nucleotides.

=item $gb->get_cdsseq()

         Given a CDS ID, returns the CDS sequence. 
         'complement' is properly parsed.
         eg. $gb->get_cdsseq('CDS1'); returns the 'CDS1' sequence.

=item $gb->get_geneseq()

         Given a CDS ID, returns the CDS sequence, or the exon sequence
         If introns are present.
         'complement' is properly parsed, and introns are spliced out.
         eg. $gb->get_geneseq('CDS1'); returns the 'CDS1' sequence or 
             exon.

=item $gb->feature()

         Returns the array of all feature object name.
         foreach ($gb->feature()){
             $gb->get_cdsseq($_);
         }
         prints all feature sequences.

=item $gb->cds()

         Returns the array of all cds object name.

         !CAUTION! the object name is actually the FEATURE OBJECT NAME,
         to enable access to all feature values. However, most of the
         time you do not need to be aware of this difference.

         foreach ($gb->cds()){
             $gb->get_geneseq($_);
         }
         prints all gene sequences.

=item $gb->startcodon()

         Given a CDS ID, returns the start codon.
         eg. $gb->startcodon('CDS1'); returns 'atg'

=item $gb->stopcodon()

         Given a CDS ID, returns the stop codon.
         eg. $gb->stopcodon('CDS1'); returns 'tag'

=item $gb->before_startcodon()

         Given a CDS ID and length, returns the sequence upstream of 
         start codon.
         eg. $gb->before_startcodon('CDS1', 100); returns 100 bp  
             sequence upstream of the start codon of 'CDS1'.

=item $gb->after_startcodon()

         Given a CDS ID and length, returns the sequence downstream of 
         start codon.
         eg. $gb->after_startcodon('CDS1', 100); returns 100 bp  
             sequence downstream of the start codon of 'CDS1'.

=item $gb->before_stopcodon()

         Given a CDS ID and length, returns the sequence upstream of 
         stop codon.
         eg. $gb->before_stopcodon('CDS1', 100); returns 100 bp  
             sequence upstream of the stop codon of 'CDS1'.

=item $gb->after_stopcodon()

         Given a CDS ID and length, returns the sequence downstream of 
         stop codon.
         eg. $gb->after_stopcodon('CDS1', 100); returns 100 bp  
             sequence downstream of the stop codon of 'CDS1'.

=item $gb->get_intron()

         Given a CDS ID, returns the intron sequences as array of 
         sequences.
         eg. $gb->get_intron('CDS1'); 
             returns ($1st_intron, $2nd_intron,..)

=item $gb->pos2feature()

         Given a GenBank position (sequence starting from position 1) 
         returns the G-instance ID (ex. FEATURE123) of the feature at
         the given position. If multiple features exists for the given
         position, the first feature to appear is returned. Returns 
         NULL if no feature exists.

=item $gb->pos2gene()

         Given a GenBank position (sequence starting from position 1) 
         returns the G-instance ID (ex. FEATURE123) of the gene at
         the given position. If multiple genes exists for the given
         position, the first gene to appear is returned. Returns 
         NULL if no gene exists.

=item $gb->gene2id()

         Given a GenBank gene name, returns the G object feature ID
         (ex. FEATURE123). Returns NULL if no gene exists.

=item $gb->get_exon()

         Given a CDS ID, returns the exon sequence.
         'complement' is properly parsed, and introns are spliced out.
         eg. $gb->get_exon('CDS1'); returns the 'CDS1' exon.

=item $gb->next_locus()

         Reads the next locus.
         the G instance is then updated.

         do{

         }while($gb->next_locus());

         Enables multiple loci analysis.        

=item $gb->next_seq()

         If G instance is created with 'long sequence' option, 
         $gb->next_seq() method replace the next chunk of sequence 
         to $gb->{SEQ}.

         while($gb->next_seq(100000)){
             print $gb->{SEQ};
         }

         Enables continuous analysis.

=item $gb->rewind_genome()

         If G instance is created with 'long sequence' option, 
         $gb->rewind_genome() method puts the filehandle pointer back 
         to the ORIGIN position.

=back

=head1 AUTHOR

Kazuharu Gaou Arakawa, gaou@g-language.org

=head1 SEE ALSO

perl(1).

=cut



