#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             KEGG.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: KEGG.pm,v 1.6 2001/09/10 04:53:44 t98901ka Exp $

package G::Ecell::KEGG;

use SubOpt;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	_ecell_name2kegg_compound
	_eri_update_with_kegg
	);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


# ecell_name2kegg_compound ver.20010830-01
# Author: Kazuharu Gaou Arakawa
# Usage: &ecell_name2kegg_compound(string compoundfile, string ecell_name);
# Options:
#  none.
# Description:
#  Matches the ecell name with KEGG compound by original scoring system.
# Requirements:
#  KEGG Compound database file 
#     (ftp://kegg.genome.ad.jp/molecules/ligand/compound)

sub _ecell_name2kegg_compound{
    my $compound_file= shift;
    my $ecell_name = lc(shift);

    my ($i,@name,$kegg_id,@info,$formula,$score,$max_name,$max_formula);
    my ($max_score, @max_info, $max_id, $flag);
    $, = "\n";

    open(INFILE, $compound_file);
    while(<INFILE>){
        chop;
        if (/ENTRY\s+/){
            $flag = 1;
            $kegg_id = (split(/\s+/,$_,2))[1];
        }elsif (/NAME\s+(.*)/){
	    $flag = 2;
            $name[0] = lc($1);
	}elsif (/FORMULA\s+(.*)/){
	    $flag = 1;
	    $formula = $1;
	}elsif (/PATHWAY\s+/){
	    $flag = 1;
	}elsif (/ENZYME\s+/){
	    $flag = 0;
	}elsif (/^\/\//){
	    foreach my $key (@name){
		if($ecell_name eq $key){
		    $max_score = 777;
		    @max_info = @info;
		    $max_name = $key;
		    $max_formula = $formula;
		    $max_id = $kegg_id;
		    $flag = -1;

		    $score = 0;
		    @name = ();
		    $kegg_id = 0;
		    @info = '';

		    last;
		}

		my $tmp_ecell_name = $ecell_name;
		$tmp_ecell_name =~ s/-/ /g;
		my @parts = split(/ /, $tmp_ecell_name);
		foreach my $part (@parts){
		    if (length($part) > 3 && $key =~ /$part/){
			$score += 100;
		    }elsif ($key =~ /$part/){
			$score += 10;
		    }
		}

                my $position = -1;
                for ($i = 0; $i < length($ecell_name); $i++){
		    $position = index($key, substr($ecell_name, $i, 1), 
				      $position + 1);
		    last if ($position > index($key, '('));
		    if (0 <= $position){
			$score += int(100/length($ecell_name));
			$score += 5 if(substr($key, $position -1, 1) 
					=~ /[^a-zA-Z]/);
			$score += 7 if(substr($ecell_name,$i,1) =~ /[0-9]/);
		    }
		}
		$score += 10 if(substr($ecell_name, 0, 1) eq 
				substr($key, 0, 1));

		if ($score > $max_score){
		    $max_score = $score;
		    @max_info = @info;
		    $max_formula = $formula;
		    $max_name = $key;
		    $max_id = $kegg_id;
		}
		$score = 0;
	    }
	    @name = ();
	    $kegg_id = 0;
	    @info = '';
	}

	if ($flag == 2 && /^\s+(.*)/){
	    push (@name, lc($1));
	}elsif($flag == -1){
	    last;
	}

	push (@info, $_) if ($flag);
    }
    close(INFILE);

    $, = '';

    return ($max_name, $max_formula, $max_score, $max_id);
}


# eri_update_with_kegg ver.20010830-01
# Author: Kazuharu Gaou Arakawa
# Usage: &eri_update_with_kegg(string erifile, int minimum_score_to_print); 
# Options:
#  none.
# Description:
#  Matches the ecell name with KEGG compound and prints the 
#  chemical formulae
# Requirements:
#  sub ecell_name2kegg_compound()
#  KEGG Compound database file 
#     (ftp://kegg.genome.ad.jp/molecules/ligand/compound)
#  E-CELL eri file

sub _eri_update_with_kegg {
    my $erifile = shift;
    my @substance = ();
    my $minscore;

    open(INFILE, $erifile);
    while(<INFILE>){
	next unless(/SUBSTANCE/);
	push(@substance,(split(/\t/, $_, 5))[3]);
    }
    close(INFILE);

    foreach my $key (@substance){
	my ($name, $formula, $score, $id) = 
	    &_ecell_name2kegg_compound('compound', lc($key));
	next if ($minscore > $score);
	&msg::send("SCORE: $score \t\tECELL: $key\n\tKEGG ID: $id",
	"\tNAME: $name\n\tFORMULA: $formula\n\n");
    }

}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Ecell::KEGG - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Ecell::KEGG;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Ecell::KEGG was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
