#!/usr/bin/perl
#
#Codon Usage Analyzer
#Copyright (c) 2004 by Carlo Lapid
#
#This program is free software; you can redistribute it and/or modify 
#it under the terms of the GNU General Public License as published by 
#the Free Software Foundation; either version 2 of the License, or 
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful, 
#but WITHOUT ANY WARRANTY; without even the implied warranty of 
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
#General Public License for more details.
#
#You should have received a copy of the GNU General Public License 
#along with this program; if not, write to the Free Software Foundation, Inc., 
#59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#Contact the author at aximili@tri-isys.com or aximili23@bioinformatics.org.

#CODON.CGI
#Last updated October 22, 2004



use strict;
use warnings;
use CGI qw(:standard);

#print out opening HTML code

print 	header;

print	<<HTML_code_1;

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<TITLE> Codon Usage Analyzer </TITLE>
<META NAME="Generator" CONTENT="EditPlus">
<META NAME="Author" CONTENT="Carlo Lapid">
<META NAME="Keywords" CONTENT="CUA, Codon Usage Analyzer, codon usage, codon">
<META NAME="Description" CONTENT="Codon Usage Analyzer">

<LINK REL="stylesheet" TYPE="text/css" HREF="../main.css" TITLE="Default">

</HEAD>

<BODY>

<FONT FACE="ARIAL">

HTML_code_1

#define variables

my	($codon_usage_table, $organism);
$codon_usage_table = param("codon_usage_table");
$organism = param("organism");

#print out title

print	br, font({-size=>6}),
		div({-align=>"center"}, "Codon Usage Analyzer"), hr;

print	font{-size=>3};

unless ($codon_usage_table) {			#prints out introductory text, form if page has been newly opened

	print	p({-class=>"heading"}, "Overview"),
			p(), "The Codon Usage Analyzer is a web-based program written to process information from the Codon Usage Database and display it in an easy-to-read format. The program ranks the different codons that can encode each amino acid in order of decreasing frequency, so it becomes easy to determine which codon an organism most frequently uses to encode a particular amino acid. This is helpful for laboratory techniques like site-directed mutagenesis, in which a researcher might need to select a particular codon from several choices to encode a specific amino acid in a novel mutation.",
			p({-class=>"heading"}, "Directions:"),
			p(), "1. Go to the Codon Usage Database at ", a({-href=>"http://www.kazusa.or.jp/codon/", -class=>"text_link", -target=>"new_window"}, "http://www.kazusa.or.jp/codon/"), ".",
			p(), "2. Search the database for the codon usage table of your organism of interest.",
			p(), "3. Copy and paste the entire codon usage table from the database into the text area below.";

	#print out form

	print	start_form,
			p(), b("Organism:"), space(3), textfield (-name => "organism", -override => 1, -size => 40, -maxlength =>40), space(3), "(optional)",
			p(), textarea (-name => "codon_usage_table",
						-cols => 74,
						-rows => 19),
			p(), submit("Submit"), reset,
			end_form;

	print	hr;

	#print out copyright & other info

	print <<HTML_CODE_2;

	<FONT SIZE="1">
		Copyright &copy; 2004 by Carlo Lapid, author of <A HREF="../../primerx/index.htm" CLASS="text_link">PrimerX</A>.<BR>
		For comments and suggestions, send email to <A HREF="MAILTO:aximili23\@bioinformatics.org" CLASS="text_link">aximili23\@bioinformatics.org</A><BR>
		PrimerX is kindly hosted at <A HREF="http://bioinformatics.org" CLASS="text_link">Bioinformatics.Org</A>.<BR>
		Last updated: Oct. 21, 2004<BR>
	</FONT>

HTML_CODE_2


} else {			#displays results, if user has submitted a codon usage table

	print	font{-size=>4}, b("Results: "), i($organism), p();
	
	#use regular expressions to weed out unnecessary data, characters from codon usage table

	$codon_usage_table =~ s/([UACG]{3})\s+(\d+\.\d)\(\s*\d*\)/$1|$2/g;
	my @data = split(/\s+/, $codon_usage_table);

	foreach my $cntr (0..63) {				#places codon usage data in array of hashes, @data
		my ($codon, $frequency) = split(/\|/, $data[$cntr]);
		$data[$cntr] = {codon => $codon, frequency => $frequency};
	}
	

	my %codon_usage;
	my @amino_acids = (['A', 'Ala', 'Alanine', 49, 53, 57, 61],		#array of anonymous arrays that defines each amino acid by name and respective codons
					   ['C', 'Cys', 'Cysteine', 3, 7],
					   ['D', 'Asp', 'Aspartic acid', 50, 54],
					   ['E', 'Glu', 'Glutamic acid', 58, 62],
					   ['F', 'Phe', 'Phenylalanine', 0, 4],
					   ['G', 'Gly', 'Glycine', 51, 55, 59, 63],
					   ['H', 'His', 'Histidine', 18, 22],
					   ['I', 'Ile', 'Isoleucine', 32, 36, 40],
					   ['K', 'Lys', 'Lysine', 42, 46],
					   ['L', 'Leu', 'Leucine', 8, 12, 16, 20, 24, 28],
					   ['M', 'Met', 'Methionine', 44],
					   ['N', 'Asn', 'Asparagine', 34, 38],
					   ['P', 'Pro', 'Proline', 17, 21, 25, 29],
					   ['Q', 'Gln', 'Glutamine', 26, 30],
					   ['R', 'Arg', 'Arginine', 19, 23, 27, 31, 43, 47],
					   ['S', 'Ser', 'Serine', 1, 5, 9, 13, 35, 39],
					   ['T', 'Thr', 'Threonine', 33, 37, 41, 45],
					   ['V', 'Val', 'Valine', 48, 52, 56, 60],
					   ['W', 'Trp', 'Tryptophan', 15],
					   ['Y', 'Tyr', 'Tyrosine', 2, 6],
					   ['', '', 'STOP', 10, 11, 14]);

	#crucial code to organize data in outputted format

	foreach my $cntr (@amino_acids) {
		my ($letter, $abbrev, $name, @locations) = @{$cntr};
		my @data_set;
		foreach my $cntr2 (@locations) {
			push (@data_set, $data[$cntr2]);
		}
		@data_set = sort by_frequency @data_set;
		$codon_usage{$letter} = [$letter, $abbrev, $name, \@data_set];
	}
	
	#output results in table

	my @amino_acid_letters = ('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '');
	print	"<TABLE BORDER=0 CELLSPACING=0 BGCOLOR=FFCC99>",
			"<TR ALIGN=CENTER BGCOLOR=CCCC99><TD COLSPAN=3 ROWSPAN=2><FONT SIZE=4><B>Amino Acids</B></FONT></TD><TD COLSPAN=12><FONT SIZE=4><B>Codons</B></FONT></TD></TR>",
			"<TR ALIGN=CENTER BGCOLOR=CCCC99><TD COLSPAN=2><B>1</B></TD><TD COLSPAN=2><B>2</B></TD><TD COLSPAN=2><B>3</B></TD><TD COLSPAN=2><B>4</B></TD><TD COLSPAN=2><B>5</B></TD><TD COLSPAN=2><B>6</B></TD></TR>";
	foreach my $cntr (@amino_acid_letters) {
		print	"<TR HEIGHT=21><TD WIDTH=30 BGCOLOR=FF9999 ALIGN=CENTER>", b(${%codon_usage}{$cntr}[0]), "</TD><TD WIDTH=50 BGCOLOR=CC9999 ALIGN=CENTER>", b(${%codon_usage}{$cntr}[1]), "</TD><TD WIDTH=100 BGCOLOR=FFCCCC>", space(2), b(${%codon_usage}{$cntr}[2]), space(1), "</TD>";
		foreach my $cntr2 (@{${%codon_usage}{$cntr}[3]}) {
			foreach my $cntr3 (reverse keys(%{$cntr2})) {
				if ($cntr3 eq 'codon') {
					print	"<TD WIDTH=40 BGCOLOR=FFCC99 ALIGN=CENTER>", b(${$cntr2}{$cntr3}), "</TD>";
				} elsif ($cntr3 eq 'frequency') {
					print	"<TD WIDTH=45 BGCOLOR=FFCC99>\(${$cntr2}{$cntr3}\)</TD>";
				}
			}
		}
		print	"</TR>";
	}
	print	"</TABLE>";
	print	p(), hr, font{-size => 2, -face => "arial"}, a( {-href => "codon.cgi", -class => "text_link"}, "Back to homepage");
}

	#ending HTML code

	print	<<HTML_CODE_3;

			<!-- Start of StatCounter Code -->
			<script type="text/javascript" language="javascript">
			var sc_project=429421; 
			var sc_partition=2; 
			var sc_invisible=1; 
			</script>

			<script type="text/javascript" language="javascript" src="http://www.statcounter.com/counter/counter.js"></script><noscript><a href="http://www.statcounter.com/" target="_blank"><img  src="http://c3.statcounter.com/counter.php?sc_project=429421&amp;java=0&amp;invisible=1" alt="free hit counter" border="0"></a> </noscript>
			<!-- End of StatCounter Code -->

			</BODY>
			</HTML>

HTML_CODE_3



# --- Subroutines ---

sub by_frequency {
	$b->{frequency} <=> $a->{frequency};
}

sub space {
	my ($space_length) = @_;
	my $space;
	foreach my $cntr (1..$space_length) {
		$space .= "&nbsp";		
		}
	return $space;
}

