#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             Util.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Util.pm,v 1.13 2001/10/07 16:29:43 t98901ka Exp $

package G::Seq::Util;

use SubOpt;
use strict;
use GD;

use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     find_king_of_gene
	     atcgcon
	     cds_echo
	     print_gene_function_list
	     genome_map
	     seq2gif
	     maskseq
	     pasteseq
	     );
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


# genome_map ver.20010906-01
# Author: Kazuharu Gaou Arakawa
# Usage: NULL = &genome_map(pointer G instance); 
# Options:
# -name      print gene name (default: 1)
# -output    "g" for graph, "show" for display (default: "show") 
# Description:
#  This method creates a nice looking map of the genome, showing
#  the partial ATGC contents and genes in either direct and complement
#  strands.
#  A is shown in red, T is shown in green, 
#  G is shown in yellow, and C is shown in blue. 
# Requirements:
#  SubOpt.pm
#  GD.pm

sub genome_map {
    &opt::default(output=>"show", name=>1);
    my @args = opt::get(@_);
    my $gb = shift @args;
    my $acnum = $gb->{LOCUS}->{id};
    my $output = opt::val("output");
    my $name = opt::val("name");
    my $filename;
    my $topmargin = 30;
    my $sidemargin = 80;
    my $hblock = 100;
    my $vblock = 10;
    my $page = 1;
    my $start;
    my $width = 800;
    my $height = 600;
    my $i = 0;
    my $cds = 1;

    mkdir ("graph", 0777);
    for ($start = 1; $start <= length($gb->{SEQ}); $start += 50 * 700 * 10){ 
	my $end = $start + 10 * 50 * 700 - 1;
	
	# GD constant
	my $im = new GD::Image($width, $height);
	my $white = $im->colorAllocate(255,255,255);
	my $black = $im->colorAllocate(0,0,0);
	my $gray = $im->colorAllocate(180,180,180);  
	my $red = $im->colorAllocate(255,0,0);       #A
	my $yellow = $im->colorAllocate(255,255,0);  #T
	my $green = $im->colorAllocate(0,150,0);     #G
	my $blue = $im->colorAllocate(0,0,255);      #C
	my $aqua = $im->colorAllocate(120, 160, 255);

	my $gred = $im->colorAllocate(255,150,150);       #A for graph
	my $gyellow = $im->colorAllocate(255,255,50);     #T for graph
	my $ggreen = $im->colorAllocate(150,150,150);     #G for graph
	my $gblue = $im->colorAllocate(150,150,255);      #C for graph


	# Draw Base Graph
	for ($i = $sidemargin; $i <= $sidemargin + $hblock * 7; $i += $hblock){
	    $im->line($i, $topmargin, $i, $topmargin + 5 * 11 * $vblock, 
		      $gray);
	}
	for ($i = $topmargin; $i <= $topmargin + $vblock * 5 * 11; $i += 
	     $vblock){
	    $im->line($sidemargin, $i, $sidemargin + 7 * $hblock, $i, $gray);
	}
	for ($i = $topmargin + $vblock * 5; 
	     $i < $topmargin + $vblock * 5 * 11; 
	     $i += $vblock * 5){
	    $im->line($sidemargin, $i - 1, $sidemargin + 7 * $hblock, $i - 1, 
		      $black);
	    $im->line($sidemargin, $i + 1, $sidemargin + 7 * $hblock, $i + 1, 
		      $black);
	}
	$im->string(gdSmallFont, $width - 110, 5, 
		    "G-language Project", $black);
	$im->string(gdSmallFont, $width - 110 - 50, 5, "A", $red);
	$im->string(gdSmallFont, $width - 110 - 40, 5, "T", $yellow);
	$im->string(gdSmallFont, $width - 110 - 30, 5, "G", $green);
	$im->string(gdSmallFont, $width - 110 - 20, 5, "C", $blue);

	my $j = 0;
	for ($i = $topmargin + $vblock * 5; 
	     $i <= $topmargin + $vblock * 5 * 10; 
	     $i += $vblock * 5){
	    my $num = $start + $j * 50 * 700;
	    $im->string(gdTinyFont, 10, $i, "$num", $black);
	    $j ++;
	}
	$im->string(gdSmallFont, 5,  5, "$acnum : from $start to $end", 
		    $black);

	my ($pa, $pt, $pg, $pc, $num, $color);
	my $locus = 0;
	for ($i = $start - 1; $i <= $start + 700 * 10 * 50 - 1; $i += 50){
	    last if ($i + 50 >= length($gb->{SEQ}));
	    my $seq = $gb->getseq($i, $i + 50 - 1);
	    my $a = $seq =~ tr/a/a/;
	    my $t = $seq =~ tr/t/t/;
	    my $g = $seq =~ tr/g/g/;
	    my $c = $seq =~ tr/c/c/;

	    # Draw DNA
	    if ($a >= $t && $a >= $g && $a >= $c){
		my $num = int($a / 50 * 100);
		$color = $red;
	    }elsif ($t >= $a && $t >= $g && $t >= $c){
		my $num = int($t / 50 * 100);
		$color = $yellow;
	    }elsif ($g >= $a && $g >= $t && $g >= $c){
		my $num = int($g / 50 * 100);
		$color = $green;
	    }elsif ($c >= $a && $c >= $t && $c >= $g){
		my $num = int($c / 50 * 100);
		$color = $blue;
	    }
	    $im->setPixel($sidemargin + 1 + $locus % (700), 
			  $topmargin + (int($locus / 700) + 1) * $vblock * 5,
			  $color);

	    my $dist = 7;
	    if ($locus % $dist == $dist - 1){
		# Draw A content graph
		$num = int($a / 70 * 100) + 5;
		$im->line($sidemargin - $dist + $locus % (700), $pa,
			  $sidemargin + $locus % (700),
			  $topmargin - $num
			  + (int($locus / 700) + 1) * $vblock * 5,
			  $gred);
		$pa = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;

		# Draw T content graph
		$num = int($t / 70 * 100) + 5;
		$im->line($sidemargin - $dist + $locus % (700), $pt,
			  $sidemargin + $locus % (700),
			  $topmargin - $num
			  + (int($locus / 700) + 1) * $vblock * 5,
			  $gyellow);
		$pt = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;

		# Draw G content graph
		$num = int($g / 70 * 100) + 5;
		$im->line($sidemargin - $dist + $locus % (700), $pg,
			  $sidemargin + $locus % (700),
			  $topmargin - $num
			  + (int($locus / 700) + 1) * $vblock * 5,
			  $ggreen);
		$pg = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;

		# Draw C content graph
		$num = int($c / 70 * 100) + 5;
		$im->line($sidemargin - $dist + $locus % (700), $pc,
			  $sidemargin + $locus % (700),
			  $topmargin - $num
			  + (int($locus / 700) + 1) * $vblock * 5,
			  $gblue);
		$pc = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;
	    }elsif($locus % 700 == 0){
		$num = int($a / 70 * 100) + 5;
		$pa = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;
		$num = int($t / 70 * 100) + 5;
		$pt = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;
		$num = int($g / 70 * 100) + 5;
		$pg = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;
		$num = int($c / 70 * 100) + 5;
		$pc = $topmargin - $num +(int($locus / 700) + 1)
		    * $vblock * 5;
	    }
	    $locus ++;
	}

	# Draw Genes
	my $flag = 0;
	my $before = -5000;
	my $before2 = -10000;
	while (defined %{$gb->{"CDS$cds"}}){
	    my $cdsstart = $gb->{"CDS$cds"}->{start};
	    my $cdsend = $gb->{"CDS$cds"}->{end};
	    my $cdsdir = $gb->{"CDS$cds"}->{direction};
	    my $cdsdiff = $cdsstart - $before;
	    my $cdsdiff2 = $cdsstart - $before2;
	    if ($flag == 0){
		if (int($cdsdiff / 50) < 20){
		    $flag = 1;
		}
	    }elsif ($flag == 1){
		if (int($cdsdiff / 50) < 20){
		    if (int($cdsdiff2 / 50) < 20){
			$flag = 2;
		    }else{
			$flag = 0;
		    }
		}else{
		    $flag = 0;
		}
	    }elsif ($flag == 2){
		if (int($cdsdiff2 / 50) < 20){
		    $flag = 1;
		}else{
		    $flag = 0;
		}
	    }
		
	    if ($cdsstart < $start && $cdsend >$start){
		my $dif1 = -3; 
		my $dif2 = -2 - 3;
		if ($cdsdir eq 'complement'){
		    $dif1 *= -1;
		    $dif2 *= -1;
		}
		my $k;
		for ($k = 1; $k <= $cdsend - $start; $k ++){
		    my $l = int ($k / 50); 
		    $im->line($sidemargin + 1 + $l % 700,
			      $topmargin + $dif1 + (int($l/700)+1)*$vblock * 5,
			      $sidemargin + 1 + $l % 700,
			      $topmargin + $dif2 + (int($l/700)+1)*$vblock * 5,
			      $aqua);
		}
		$cds ++;
	    }else{
		last if ($cdsstart >= $end);
		my $feat = $gb->{"CDS$cds"}->{feature};
		my $genename = $gb->{"FEATURE$feat"}->{gene};
		
		my $dif1 = -3; 
		my $dif2 = -2 - 3;
		if ($cdsdir eq 'complement'){
		    $dif1 *= -1;
		    $dif2 *= -1;
		}

		my $k;
		for ($k = $cdsstart-$start; $k <= $cdsend - $start; $k += 50){
		    last if ($k + $start > $end);
		    my $l = int ($k / 50);
		    $im->line($sidemargin + 1 + $l % 700,
			      $topmargin + $dif1 + (int($l/700)+1)*$vblock * 5,
			      $sidemargin + 1 + $l % 700,
			      $topmargin + $dif2 + (int($l/700)+1)*$vblock * 5,
			      $aqua);
		}
		last if ($k + $start > $end);
		
		$k = int(($cdsstart - $start)/50);
		
		$dif1 = -2; 
		$dif2 = -2 - 4;
		my $dif3 = -2 - 4 - 9 + (-9 * $flag);
		if ($cdsdir eq 'complement'){
		    $dif1 *= -1;
		    $dif2 *= -1;
		    $dif3 *= -1;
		    $dif3 -= 7;
		}
		$im->line($sidemargin + 1 + $k % 700,
			  $topmargin + $dif1 + (int($k/700)+1)*$vblock * 5,
			  $sidemargin + 1 + $k % 700,
			  $topmargin + $dif2 + (int($k/700)+1)*$vblock * 5,
			  $black);

		$im->string(gdTinyFont, $sidemargin + 1 + $k %700,
			    $topmargin + $dif3 + (int($k/700)+1)*$vblock * 5,
			    "$genename", $black) if ($name);

		if ($cdsend > $end){
		    last;
		}else{
		    $before = $cdsstart;
		    $before2 = $before;
		    $cds ++;
		}
	    }
	}

	open(OUT, '>graph/' . $acnum . '-'. $page . '.gif');
	binmode OUT;
	print OUT $im->gif;
	close(OUT);
	$page ++;
    }

    system("gimv graph/$acnum-".  '*.gif &') if ($output eq 'show');
}


# seq2gif ver.20010906-01
# Author: Kazuharu Gaou Arakawa
# Usage: NULL = &seq2gif(pointer G instance); 
# Options:
# -width     width of the image (default:640)
# -filename  output filename (default:'seq.gif')
# -output    "g" for graph, "show" for display
# Description:
#  Converts a sequence to a gif image.
#  A is shown in red, T is shown in green, 
#  G is shown in yellow, and C is shown in blue. 
# Requirements:
#  SubOpt.pm
#  GD.pm
# History:
#  20010906 update with options
#  20010830 initial posting

sub seq2gif {
    &opt::default(width=>640, filename=>"seq.gif", output=>"show");
    my @args = opt::get(@_);
    my $gb = shift @args;
    my $width = opt::val("width");
    my $output = opt::val("output");
    my $filename = opt::val("filename");
    my $height = int((length($gb->{SEQ})+1)/$width)+1;
    my $im = new GD::Image($width, $height);

    my $white = $im->colorAllocate(255,255,255);
    my $red = $im->colorAllocate(255,0,0);
    my $yellow = $im->colorAllocate(255,255,0);
    my $green = $im->colorAllocate(0,150,0);
    my $blue = $im->colorAllocate(0,0,255);

    my ($x, $y);
    my $count = 0;
    for ($y = 0; $y <= $height; $y ++){
	for ($x = 0; $x <= $width; $x ++){
	    my $color=$white;
	    $color=$red if (substr($gb->{SEQ}, $count, 1) eq 'a');
	    $color=$yellow if (substr($gb->{SEQ}, $count, 1) eq 'g');
	    $color=$green if (substr($gb->{SEQ}, $count, 1) eq 't');
	    $color=$blue if (substr($gb->{SEQ}, $count, 1) eq 'c');
	    
	    $im->setPixel($x,$y,$color);
	    last if ($count == length($gb->{SEQ}));
	    $count ++;
	}
    }

    mkdir ('graph', 0777);
    open(OUT, '>graph/' . $filename);
    binmode OUT;
    print OUT $im->gif;
    close(OUT);

    system("gimv graph/$filename &") if ($output eq 'show');
}


#find_king_of_gene ver.20010608-01
#scripting by Koya Mori(s98982km@sfc.keio.ac.jp)
#This program finds king of gene.
#(string)=&find_king_of_gene(pointer GENOME,  boolean debug);
sub find_king_of_gene{
    my $nuc=shift;
    my $gene='you have just found the king of AFROs.'."\n";
    
    system('gimv','data/debug.jpg');
    
    return $gene;
}


#atcgcon ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program counts bases in CDS.
#(hash result)=&bun(pointer Genome,  boolean debug);
sub atcgcon{
    &opt::default(output=>"stdout",filename=>"cds_info.csv");
    my @args=opt::get(@_);

    my $gb=shift @args;
    my $output=opt::val("output");
    my $filename=opt::val("filename");
    my $start;
    my $end;
    my $seq;
    my $num=1;
    my %hash;


    foreach($gb->feature()){
	if($gb->{"FEATURE$num"}->{type} eq 'CDS'){
	    $start=$gb->{"FEATURE$num"}->{start};
	    $end=$gb->{"FEATURE$num"}->{end};
	    $seq=$gb->getseq($start-1,$end-1);
	    $hash{a} += $seq =~tr/a/a/;
	    $hash{t} += $seq =~tr/t/t/;
	    $hash{g} += $seq =~tr/g/g/;
	    $hash{c} += $seq =~tr/c/c/;
	    $hash{total}+=length($seq);
	}
	$num++;
    }

    if($output eq "stdout"){
	&msg::send(sprintf("total:\t%10d base\n",$hash{total}));
	&msg::send(sprintf("a:\t%10d / %2.2f\%\n", $hash{a}, 100.0*$hash{a}/$hash{total}));
	&msg::send(sprintf("t:\t%10d / %2.2f\%\n", $hash{t}, 100.0*$hash{t}/$hash{total}));
	&msg::send(sprintf("c:\t%10d / %2.2f\%\n", $hash{c}, 100.0*$hash{c}/$hash{total}));
	&msg::send(sprintf("g:\t%10d / %2.2f\%\n", $hash{g}, 100.0*$hash{g}/$hash{total}));
	
	&msg::send(sprintf("GC content:\t%.2f\%\n", 100.0*($hash{c} + $hash{g}) / $hash{total}));
    }
    if($output eq "f"){
	open(FILE,">$filename");
	printf FILE "total:\t%10d base\n",$hash{total};
	printf FILE "a:\t%10d / %2.2f\%\n", $hash{a}, 100.0*$hash{a}/$hash{total};
	printf FILE "t:\t%10d / %2.2f\%\n", $hash{t}, 100.0*$hash{t}/$hash{total};
	printf FILE "c:\t%10d / %2.2f\%\n", $hash{c}, 100.0*$hash{c}/$hash{total};
	printf FILE "g:\t%10d / %2.2f\%\n", $hash{g}, 100.0*$hash{g}/$hash{total};
	
	printf FILE "GC content:\t%.2f\%\n", 100.0*($hash{c} + $hash{g}) / $hash{total};
	close(FILE);
    }

    return \%hash;
}


#cds_echo ver.20010623-01
#scripting by Koya Mori(mory@g-language.org)
#This program displayes CDS start and end positions.
#(hash result)=&bun(pointer Genome,  boolean debug);
sub cds_echo{
    my $gb=shift;
    my $start;
    my $end;
    my $i=1;


    foreach($gb->feature()){
        if($gb->{"FEATURE$i"}->{type} eq 'CDS'){
	    if($gb->{"FEATURE$i"}->{direction} eq 'direct'){
		$start = $gb->{"FEATURE$i"}->{start};
		$end = $gb->{"FEATURE$i"}->{end};
		&msg::send(sprintf("%d..%d\n",$start,$end));
	    }
	        
	    elsif($gb->{"FEATURE$i"}->{direction} eq 'complement'){
		$start = $gb->{"FEATURE$i"}->{end};
		$end = $gb->{"FEATURE$i"}->{start};
		&msg::send(sprintf("%d..%d\n",$start,$end));
	    }
	}
	$i++;
    }
}


#print_gene_function_list ver.20010718-01
#Author: Kazuharu "Gaou" Arakawa (gaou@g-language.org)
#1 = &print_gene_function_list (pointer G instance, string sequence)
#This subroutine prints out the number of genes for each functions,
#and the number of specified oligomers in each gene function groups.
#The default for string sequence is 'gctggtgg'.
sub print_gene_function_list {
    my $gb = shift;
    my $seq = shift;
    $seq = 'gctggtgg' unless ($seq);
    my $revseq = complement($seq);
    my $i = 1;
    my %chi;
    my %cds;
    my $key;

    while(defined(%{$gb->{"CDS$i"}})){
        my $id = $gb->{"CDS$i"}->{feature};
        my ($function, $tmp) = split(/;/, $gb->{"FEATURE$id"}->{function}, 2);
        my $cdsseq = $gb->get_cdsseq("CDS$i");
        $cds{$function}++;
        
        my $iStart = -1;
        while(0 <= ($iStart = index($cdsseq, $seq, $iStart +1))){
            $chi{$function}++;
        }
        $iStart = -1;
        while(0 <= ($iStart = index($cdsseq, $revseq, $iStart +1))){
	    $chi{$function}++;
	}
        
        $i++;
    }
    
    &msg::send("=== $seq ===\n");
    my $tot = 0;
    foreach $key (sort keys %chi){
        &msg::send(sprintf("%20s: %8d\n",$key, $chi{$key}));
        $tot += $chi{$key};
    }
    
    &msg::send("total: $tot\n\n");
    
    
    &msg::send("=== CDS ===\n");
    $tot = 0;
    foreach $key (sort keys %cds){
        next if ($key !~ /[a-z]/);
        &msg::send(sprintf("%20s: %8d\n", $key, $cds{$key}));
        $tot += $cds{$key};
    }
    
    &msg::send("total: $tot\n");

    return 1;
}


sub maskseq{
    &opt::default(pattern=>"",start=>1,end=>"");
    my @args=opt::get(@_);

    my $seq=shift @args;
    my $start=opt::val("start");
    my $end=opt::val("end");
    my $pat=opt::val("pattern");
    my $masked;
    my $null;
 

    $$seq=~tr/ \n[0-9]//d;
    $$seq=~tr/A-Z/a-z/;
    
    $end=length($$seq) if($end eq "");

    for(my $i=0;$i<length($pat);$i++){
	$null.="n";
    }

    if($pat){
	$masked=substr($$seq,$start-1,$end-$start+1);
	$masked=~s/$pat/$null/g;
	substr($$seq,$start-1,$end-$start+1)=$masked;
    }
    else{
	$masked=substr($$seq,$start-1,$end-$start+1);
	$masked=~tr/a-zA-Z/n/;
	substr($$seq,$start-1,$end-$start+1)=$masked;
    }

    return $seq;
}


sub pasteseq{
    &opt::default();
    my @args=opt::get(@_);
    
    my $seq=shift @_;
    my $paste=shift @_;
    my $pos=shift @_;
    
    $$seq=~tr/ \n[0-9]//d;
    $$seq=~tr/A-Z/a-z/;
    $$paste=~tr/A-Z/a-z/;

    substr($$seq,$pos-1,0)=$$paste;

    return $seq;
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Seq::Util - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Seq::Util;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Seq::Util was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
