#!/usr/bin/perl

# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#


#Program : genvis
#Author : Sashidhar Gadiraju
#version : 2.10
#2.10 : Checking the input ribl file to check if it is a information weight
# matrix or a user defined weight matrix#
# 2.9 : Including the option -S\--SORT to sort the output
# 		First the output BED is generated, and html is generated on this
# 		output file
#2.8.1 : Changing the help message a bit
#2.8 : Changing the SOURCE and NCBI links to conform to other organims like mouse and rat
#		apart from the humans 
#2.7 : Not satisfied with the so-called stable links taken from 2.6, adding trying 
#		some concoctions of my own.
#		Centralized management of the genome browser URLs
#2.6 : Modifying the links to the DNA sequence based on the communications with
#		Jim Kent et al. (the guys who developed UCSC genome browser)
#Yet to modify the MAXLONG variable to include the sizes of various genomes(diff organisms)
#To add the option of specifying the website address of the bed file
#2.5 : Added option of pruning based on the strandedness
#2.4 : Changed the COORDLIST from an array to a hash array, changed the 3 coordlistprocessing
#		subroutines to modular
#2.3 : Adding the '--coordinates' option to specify a chrm and a coordinate range to get sites from
#		Changed the binding site sequence retrieval URL, added correct link for both '+' and '-' strands
#2.2 : Usage() and -h option given
#2.1 : Adding the option of providing an input accession number list file
#1.1 : Added display items like tooltip text, title, coordinate display
#0.2 : Modify with links to genebank, dna sequence, and ucsc browser
#Description :
#This program produces a HTML file from a given BED file produced by promotsite. 
#The HTML file has links to the UCSC genome browser.
#The link on the HTML page is hyperlinked to Accession numbers on the UCSC browser.
#The program input is either the psparams file or the -w option for the wsize
#

use strict;
use Getopt::Long;
Getopt::Long::Configure("no_ignore_case");


my $baseBrowserURL = "http://genome.ucsc.edu";
my $browserURL = "$baseBrowserURL/cgi-bin";



#Permanent (hopefully) declarations
#main subroutine declarations
my $MAXLONG = int (2**31 -1);	#setting an upper limit of 4 byte integer on the coordinate of a site
my %OPT ;	#the options variable
my %COORDLIST;	#Array to hold the coordinate list with --coordinates options
#datapos stores the positions of fields(the first pos is zero) in BED file,"the output of promotsite"
my %datapos;	#the first column starts with 0
$datapos{'chrm'} = 0;
$datapos{'bsst'} = 1;	#the start pos of the site
$datapos{'bsend'} = 2;	#the end position of the site
$datapos{'acc'} = 3;
$datapos{'Ri'} = 4;
$datapos{'dir'} = 5;
my $TRACK="";
#parameters hash
my %PAR; 
my $MATRIXTYPE = 'ribl';	# values -> { ribl, user } to indicate whethere 
# information wgt matrix or user defined weight matrix

#input file descriptors
my $IPSP, my $IBED, my $OBED, my $IACC;
#output file descriptors
my $OBHTML;
#process the command line options
procopts();
openfiles();
# check the wgt matrix file 'ribl' 
checkRibl();
#read the psparams file
readparams();
# start the main processing of the input BED file
# First generate the output BED file
if(! defined getopts('f') )
{	procCoordList();	}
else
{	procAccListBED();	}
#close the open files
closef( $IPSP, $IBED, $OBED);

# Sort the generated BED file
sortOutput(getopts('S')  , getopts('o') ) if( defined getopts('S') );	
# Generate the HTML page on the generated(maybe sorted) BED file
genHTML(getopts('o') );
closef($OBHTML);
#END OF MAIN SUBROUTINE

# check if the ribl file is information weight matrix or user defined
sub checkRibl
{
	my $fribl='ribl';
	if(! -f "$fribl")
	{
		print "Note: Cannot find file $fribl. Not checking for the weight matrix type\n";
		return;
	}
	else
	{
		print "Note: Cannot open file $fribl. Not checking for the weight matrix type\n",
			return if ! open(RI, "$fribl");
		scalar <RI>;	# leave the first line
		my $line2 = <RI>;
		if( "$line2" =~ /^\s*\*.*user.*defined.*matrix/i )
		{
			$MATRIXTYPE = 'user';
			print "Note: ribl is a user defined wgt matrix. Not a information wgt. matrix\n";
		}
	}
}# checkRibl()
	
# sort the generated output BED file based on coordinate or strength
sub sortOutput
{
	my( $sortopt, $bedfile ) = @_;
	if( "$sortopt" eq "" || "$bedfile" eq "" )
	{	
		print "WARNING: Not enough arguments to process sortOutput()\n";
		return;
	}
	my $column = 4;	#BED column(first col is zero), 4->sort by strength, 1->by start coordinate
	if( $sortopt =~ /^s/i )		# sort by strength
	{	$column=4;	}
	elsif( $sortopt =~ /^c/i )	# sort by start coordinate
	{	$column=1;	}
	else
	{	
		print "WARNING: sort option is neither by coordinate nor by strength\n";
		return;	
	}
	# use the sortBED file
	my $tmpBED = "tmp_bed.txt";
	my $res=system("sortBED.pl -c $column -o  $tmpBED $bedfile");
	if( $res )
	{	print "Error in executing sortBED.pl: $!\n";	}
	unlink("$bedfile");
	rename("$tmpBED", "$bedfile"); 
}
	
# print the html based on the created output BED file	
sub genHTML
{	
	my( $bedfile) = @_;
	if( "$bedfile" eq "" )
	{	
		print "WARNING: Not enough arguments to process genHTML()\n";
		return;
	}
	open( IF, "$bedfile" ) || die "Cannot open file $bedfile\n";
	printHTMLheader($OBHTML);
	my $line = getline(\*IF);
	while( $line )
	{
		#print "$line";
		$line = trim( $line  );		
		my @list1 = split(/\s+/, $line);
		printHTMLline($OBHTML, @list1);	
		$line = getline(\*IF);			
	}
	printHTMLtailer($OBHTML);
}

#get the Accession list from the file as a hash array
sub getAccList
{
	my $file = shift;	#the file descriptor reference
	my %AccList;
	my $ipline="";
	if( ! defined $file)
	{	die "Undefined Input file\n";	}
	while ( $ipline = <$file> )
	{	$AccList{ trim($ipline) }="";	}#just make the key exist	
	#print %AccList;
	return %AccList;
}#getAccList()

#This is to process the input accession list
sub procAccListBED
{
	#hash array to hold the input accession list
	my %AccList = getAccList($IACC);
	#print %AccList;
	my $strand = getopts('s');	#strand to search
	
	my $line = getline($IBED, $OBED);
	while( $line )
	{
		$line = trim( $line  );
		my @list1 = split(/\s+/, $line);
		my %data = getdata(@list1);
		#do not process the site if its not on the strand specified with -s
		#process the site if -s is not specified		
		if( (! defined $strand) || (defined $strand and ( $data{'dir'} eq $strand ) ) )
		{
			if( exists ($AccList{ $data{'accname'} }) )
			{	
				print $OBED "$line\n";
			}
		}
		$line = getline($IBED, $OBED);			
	}
}#procAccList()

#This is to process sites within a given set of coordinates with the "--coordinates" option
sub procCoordList
{
	my $line = getline($IBED, $OBED);
	my $crdlistref = getCoordList();	#get reference to the coordlist
	my $strand = getopts('s');	#strand to search
	while( $line )
	{
		$line = trim( $line  );
		my @list1 = split(/\s+/, $line);
		my %data = getdata(@list1);
		#do not process the site if its not on the strand specified with -s
		#process the site if -s is not specified
		if( (! defined $strand) || (defined $strand and ( $data{'dir'} eq $strand ) ) )
		{
			if( requestedSite( \%data , $crdlistref) )
			{	
				print $OBED "$line\n";
			}
		}		
		$line = getline($IBED, $OBED);			
	}
}#procCoordList()
	
#check whether a given BED site is in the requested coordinates
#the first 2 arguments are references to the data and coordinate list
#if an empty hash list of coordinates is passed, the whole BED is converted
sub requestedSite
{
	#print "In requestedSite()\n";
	my %data = %{$_[0]};
	my %crdlist = %{$_[1]};
	#print "coordst = " , $data{'bsst'},"\n";
	my $result = 0;	#indicate whether this is a valid site or not
	#if its an empty list, get sites from the whole BED file
	if( keys(%crdlist) <=0 )	
	{	$result = 1;	}
	else
	{
		my $chname = lc( $data{'chrm'} );	#lower case the chrm name
			
		#first check in the global 'chr' range
		my $chn = "chr";
		if( exists $crdlist{ $chn } )
		{
			my $i = 1;
			#print "[0] in \'chr\' : ", $crdlist{ $chn }[0];
			for($i =1; $i <= $crdlist{ $chn }[0]; $i++ )
			{
				if( exists $data{'bscoord'}  &&
	            	( $data{'bscoord'} >= $crdlist{$chn}[2*$i-1] ) && 
					( $data{'bscoord'} <= $crdlist{$chn}[2*$i] ) )
				   {	$result =1; last;	}
				elsif( ($data{'bsst'} >= $crdlist{$chn}[2*$i-1] && $data{'bsst'} <= $crdlist{$chn}[2*$i] ) ||
				($data{'bsend'} >= $crdlist{$chn}[2*$i-1] && $data{'bsend'} <= $crdlist{$chn}[2*$i] ) )
				{	$result = 1;	last;	}
			}#for: inner 
		}#if: exists 'chr'
		
		#check in the specific chromosome entry
		$chn = $chname; 
		if( $result != 1 && (exists $crdlist{$chname})  )
		{
			my $i = 1;
			#print "[0] in \'chname\' : ", $crdlist{ $chn }[0];
			for($i =1; $i <= $crdlist{ $chn }[0]; $i++ )
			{
				if( exists $data{'bscoord'} && 	           
    	           ( $data{'bscoord'} >= $crdlist{$chn}[2*$i-1] ) &&
				   ( $data{'bscoord'} <= $crdlist{$chn}[2*$i] ) )
				   {	$result =1; last;	}
				elsif( ($data{'bsst'} >= $crdlist{$chn}[2*$i-1] && $data{'bsst'} <= $crdlist{$chn}[2*$i] ) ||
				($data{'bsend'} >= $crdlist{$chn}[2*$i-1] && $data{'bsend'} <= $crdlist{$chn}[2*$i] ) )
				{	$result = 1;	last;	}
			}#for: inner 
		}#if: exists 'chr'

	}#else : -c/--coordinates option is defined
	#print "Leaving requestedSite()\n";
	return $result;
}#requestedSite()

#return the parse coordinate list as a reference
sub getCoordList
{
	return \%COORDLIST;
}
		
#get the coordinate list from the string given with --coordinates option
#The list is returned as a array of multiple (chrmname, coordst, coordend) lists
sub parseCoordList 
{
	my $coordline = shift ;	# the coordinate list line
	my %chrml;	#the hash array holding the parsed coordinates
	my @chlist = split(/;/, $coordline);	#chromosome and coord list
	my $i=0;
	foreach my $ch( @chlist)
	{
		my @temp = split(/:/, $ch);	#split a single chromosome entry 
		my $chname = lc( trim($temp[0]) );	#convert the chrm name to lower case
		$chname="chr" if( $chname eq "");
		if( $chname ne "chr" && $chname ne "chrx" && $chname ne "chry")
		{
			if( $chname =~ /chr(\d+)/ )
			{
				my $chnum = $1;
				#todo - may have to remove the below line to make this work for multiple
				#organisms with their multiple chromosome formats
				if($chnum <1 || $chnum > 22)
				{	print "Ignoring entry $ch : Illegal chrm $chnum\n";	next;	}
			}
			else
			{	print "Ignoring entry $ch : Unrecognized chrm format\n";next;	}
		}
		#temp[1] contains the list of start, stop coordinates
		my @tempcrdlist = split(/,/,$temp[1]);
		foreach my $tempcrd (@tempcrdlist)
		{
			$tempcrd = trim($tempcrd);
			next if( $tempcrd eq "" );	#skip on empty value
			
			my @coordlist = split(/-/, $tempcrd);
			my $coordst = trim($coordlist[0]);
			my $coordend = trim($coordlist[1]);
			
			$coordst=0 if ( (! defined $coordst) || $coordst eq "");
			$coordend=$MAXLONG if ( (! defined $coordend) || $coordend eq "" );
			($coordst, $coordend) = ($coordend, $coordst) if( $coordst > $coordend);
			if(! isPosInt($coordst) )
			{	print "Ignoring entry $ch : Illegal start coordinate $coordst\n"; next;	}
			if(! isPosInt($coordend) )
			{	print "Ignoring entry $ch : Illegal end coordinate $coordend\n"; next;	}
			#$chrml[$i++] = ($chname, $coordst, $coordend);
			#$chrml{$i}[0] = $chname;
			if(! exists $chrml{$chname} )
			{	$chrml{$chname}[0]=1;	}
			else
			{	$chrml{$chname}[0]=$chrml{$chname}[0]+1;	}
			my $c1 = $chrml{$chname}[0];
			$chrml{$chname}[2*$c1 -1 ] = $coordst;
			$chrml{$chname}[2*$c1] = $coordend;
			#$i++;
			#print "$chname:$coordst-$coordend\n";
		}#foreach: inner
	}#foreach
	#print "In parseCoordList : \n";
	#print %chrml;	
	return %chrml;
}#parseCoordList

sub printHTMLheader
{
	my $F = shift;

	#Temporary variables used before providing the actual options through command line
	my $db=getopts('d');	#db number, ex - hg13		
	my $org=lc( getopts('O') );	#organism
	#Note: DB and ORG parameters in the UCSC browser are mutually exclusive in a URL

	#my $baseBEDURL="http://r.faculty.umkc.edu/roganp/Information";#base URL of the BED files, atleast for now
	my $baseBEDURL="";
	my $bedlaunchURL= "$browserURL/hgTracks?db=$db&hgt.customText=";	#the customText is filled by $baseBEDURL

	my $psdist = getpar('psdist');
	my $bedfile = getopts('o');	#output BED file name to use in the html file
		
	
	print $F "<HTML>\n";
	print $F "<HEAD>\n";
	print $F "<SCRIPT language=\"javascript\">\n";
	print $F "var psdist=$psdist;\n"; 
	print $F "var GDB=\"$db\";\n";	#write the global variable DB
	print $F "var GORG=\"$org\";\n";	#write the global variable ORG
	if(defined getopts('U') )
	{	
		$baseBEDURL = getopts('U');
		print $F "var baseBEDURL=\"$baseBEDURL\";\n";
		print $F "var bedlaunchURL = \"$bedlaunchURL\"+baseBEDURL;\n";	
	}
	else
	{
		print $F "var baseBEDURL=\"\";\n";
		print $F "var bedlaunchURL = \"$bedlaunchURL\"+baseBEDURL;\n";
	}
		
	print $F "first =0;
	CURADDR=\"\";
	//launchgb();
	function launchgb()
	{
		var ORG = GORG;
		if(baseBEDURL != \"\")
		{	genbWin = window.open(bedlaunchURL+\"/$bedfile\");		}
		else
		{	genbWin = window.open(\"$browserURL/hgGateway?org=\" + ORG);	}		
		first=1; 
	}

	function go2gbBSCustomTrack( chr, acc, coord1, coord2, track)
	{
		var DB = GDB;
		var addr=\"$browserURL/hgc?o=\" + coord1 + \"&t=\" + coord2 + \"&g=ct_\" + track + \"&i=.+\" + acc + \"&c=\" + chr + \"&l=\"+coord1 + \"&r=\"+coord2 + \"&db=\" + DB;
		if( genbWin.closed ){	
		launchgb();
		first=1;	
		}
		genbWin.location.href=addr;
		genbWin.focus();		
	}
	
	function go2addr( )
	{
		genbWin.location.href =CURADDR;
	}
	function donothing()
	{
		alert('s');
	}
	function go2gbBSseq(chr, acc, coord1, coord2, dir, track)
	{
		//assign the local DB to the global DB 
		//
		var DB = GDB;
		//The below line is required because, in the original track,
		//the lower coordinate is one less than the actual coordinate
		//of the sequence which is needed in the below URL
		c1 = coord1 + 1;	
		//the above line is needed as the start in BED is one less than the original
		if( dir == \"-\" ){
			//The below link (version 2.5) works most of the times
			//CURADDR= \"$browserURL/hgc?g=htcGetDna2&table=&getDnaPos=\" + chr + \"%3A\" + c1 + \"-\" + coord2 + \"&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&boolshad.hgSeq.maskRepeats=1&hgSeq.repMasking=lower&hgSeq.revComp=on&boolshad.hgSeq.revComp=1&submit=Get+DNA\";

			//CURADDR=\"$browserURL/hgc?g=htcGetDna2&table=&db=\" + DB + \"&hgSeq.repMasking=lower&hgSeq.casing=upper&hgSeq.revComp=on&getDnaPos=\" + chr + \"%3A\" + c1 + \"-\" + coord2 + \"&hgSeq.revComp=on\" ;
			//adding both the getDnaPos and the c=,l=,r= in the below url
			CURADDR= \"$browserURL/hgc?g=htcGetDna2&table=&db=\" + DB + \"&hgSeq.repMasking=lower&hgSeq.casing=upper&hgSeq.revComp=on&getDnaPos=\" + chr + \"%3A\" + c1+\"-\"+coord2 + \"&c=\"+chr + \"&l=\"+c1 + \"&r=\"+coord2 + \"&hgSeq.revComp=on\" ;
		}
		else
		{
			//The below link (version 2.5) works most of the times
			//CURADDR= \"$browserURL/hgc?g=htcGetDna2&table=&getDnaPos=\" + chr + \"%3A\" + c1 + \"-\" + coord2 + \"&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&boolshad.hgSeq.maskRepeats=1&hgSeq.repMasking=lower&boolshad.hgSeq.revComp=1&submit=Get+DNA\";
			//CURADDR=\"$browserURL/hgc?g=htcGetDna2&table=&db=\" + DB + \"&hgSeq.repMasking=lower&hgSeq.casing=upper&hgSeq.revComp=on&getDnaPos=\" + chr + \"%3A\" + c1 + \"-\" + coord2 ;
			//adding both the getDnaPos and the c=,l=,r= in the below url
			CURADDR= \"$browserURL/hgc?g=htcGetDna2&table=&db=\" + DB + \"&hgSeq.repMasking=lower&hgSeq.casing=upper&hgSeq.revComp=on&getDnaPos=\" + chr + \"%3A\" + c1+\"-\"+coord2 + \"&c=\"+chr + \"&l=\"+c1 + \"&r=\"+coord2 ;
		}
		genbWin.location.href =CURADDR;
		////setTimeout(\"go2addr()\", 2000);
		genbWin.focus();
		//go2addr( addr );
	}
	
	function go2gb(chr, coord1, coord2)
	{
		var r1 = coord1 - 2*psdist;
		var r2 = coord2 + 2*psdist;
		var DB = GDB;
		var ORG = GORG;
		if( r1 < 1 ) { r1 = 1;	}
		//var gnloc = \"$browserURL/hgTracks?org=\" + ORG + \"&position=\"+chr+\":\"+r1+\"-\"+r2;
		var gnloc = \"$browserURL/hgTracks?db=\" + DB + \"&position=\"+chr+\":\"+r1+\"-\"+r2;
		//if(first==0){	genbWin = window.open(\"$browserURL/hgGateway?org=\" + ORG );first=1;	}
		if( genbWin.closed ){	
		launchgb();
		first=1;	
		}
		//genbWin.location.href=\"$browserURL/hgTracks?org=\" + ORG + \"&position=\"+chr+\":\"+r1+\"-\"+r2;
		genbWin.location.href=\"$browserURL/hgTracks?db=\" + DB + \"&position=\"+chr+\":\"+r1+\"-\"+r2;
		genbWin.focus();
		return false;
	}

	function go2gbAccSource1(acc)
	{
		
		source = window.open(\"sourceSearch.html\");
		source.document.forms[0].criteria.value=acc;
	}
	function go2gbAccSource(acc)
	{
		var ORG = GORG;
		source = window.open(\"\");
		source.document.open();
		source.document.write(\"	<html>\\n\");
		source.document.write(\"	<body>\\n\");
		source.document.write(\"	<form method=\\\"post\\\" action=\\\"http://genome-www5.stanford.edu/cgi-bin/SMD/source/sourceResult\\\" enctype=\\\"application/x-www-form-urlencoded\\\">\\n\");
		source.document.write(\"	<select name=\\\"organism\\\">\\n\");
		if( ORG == \"human\")
		{	source.document.write(\"	<option selected=\\\"1\\\" value=\\\"Hs\\\">Homo sapiens</option>\\n\");	}
		else if( ORG == \"mouse\" )
		{	source.document.write(\"	<option selected=\\\"1\\\" value=\\\"Mm\\\">Mus musculus</option>\\n\");	}
		else if (ORG == \"rat\" )
		{	source.document.write(\"	<option selected=\\\"1\\\" value=\\\"Hs\\\">Rattus norvegicus</option>\\n\");	}
		else
		{	alert( \"Organism not human/mouse/rat\" );	}
		source.document.write(\"	</select>\\n\");
		source.document.write(\"	<select name=\\\"option\\\">\\n\");
		source.document.write(\"	<option  value=\\\"Number\\\">GenBank Accession Num</option>\\n\");
		source.document.write(\"	</select>\\n\");
		source.document.write(\"	<input type=\\\"text\\\" name=\\\"criteria\\\" value=\" + acc + \" size=\\\"25\\\" /> \\n\");
		source.document.write(\"	<input type=\\\"radio\\\" name=\\\"choice\\\" value=\\\"Gene\\\" checked=\\\"1\\\" />GeneReport: Gene Information (limited to those in UniGene)<br /> \\n\");
		source.document.write(\"	<input type=\\\"submit\\\" name=\\\".submit\\\" value=\\\"Submit\\\" /> \\n\");
		source.document.write(\"	</form>\\n\");
		source.document.write(\"	</body>\\n\");
		source.document.write(\"	</html>\\n\");	
		source.document.close();
		source.document.forms[0].submit();
	}

	function go2gbChr()
	{
		var DB = GDB;
		var ORG = GORG;
		var chr= go2gbChr.arguments[0];
		//genbWin.location.href=\"http://www.genome.ucsc.edu/cgi-bin/hgTracks?org=\" + ORG + \"&position=\"+chr;
		genbWin.location.href=\"http://www.genome.ucsc.edu/cgi-bin/hgTracks?db=\" + DB + \"&position=\"+chr;
		return false;
	}

	function go2gbAccGenBank(acc)
	{
		var addr = \"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=n&form=6&uid=\" + acc + \"&dopt=g\";
		window.open(addr);
		return false;
	}\n";
	print $F "</SCRIPT>\n";
	print $F "</HEAD>\n";
	print $F "<BODY>\n";
	print $F "<p align=\"center\">
	<table>
	<tr>
		<td>
		<img src=\"tinyround.gif\">
		</td>
		
		<td>
		<i>
		<font size=\"6\" color=\"blue\" face=\"Times\">
		Delila-Genome 1.0
		</font>
		</i>
		</td>

		<td>
		<img src=\"donorwalkersmall.gif\">
		</td>
	</tr>
	<tr>
		<td colspan=\"3\" align=\"center\">
		<font size=\"2\" color=\"blue\">
			Sashidhar Gadiraju <sup>1,2</sup>, Peter K. Rogan <sup>1,2,3</sup><br>
			Laboratory of Human Molecular Genetics, Children's Mercy Hospital and Clinics<sup>1</sup><br>
			SICE, University of Missouri - Kansas City <sup>2</sup><br>
			School of Medicine, University of Missouri - Kansas City <sup>3</sup>
		</font>
		</td>
	</tr>
	</table>
	\n";
	print $F "<\i></p>\n";
	print $F "<p align=\"center\">\n";
	print $F "<input type=\"button\" value=\"Click to launch Genome Browser and upload BED file\" onClick=\"javascript:launchgb()\"><br><br><br>\n";
	print $F "</p>\n";
	print $F "<p align=\"center\">\n";
	print $F "
	<table border=\"1\">
		<tr>
		<th colspan=\"3\" align=\"center\">Binding Site</th>
		<th colspan=\"3\" rowspan=\"1\" align=\"center\">Accession Number</th>
		<th colspan=\"2\" rowspan=\"2\" align=\"center\">UCSC Browser</th>
	</tr>
	<tr>
		<td align=\"center\">Type</td>\n";
	if("$MATRIXTYPE" eq "ribl")
	{	print $F "		<td align=\"center\">Ri(bits)</td>\n";	}
	elsif("$MATRIXTYPE" eq "user")
	{	print $F "		<td align=\"center\">Score</td>\n";	}
	else
	{	print $F "		<td align=\"center\">Score</td>\n";	}
	
	print $F "	
		<td align=\"center\">Seq</td>
		<td align=\"center\">Value</td>
		<td align=\"center\">GenBank</td>
		<td align=\"center\">SOURCE</td>		
	</tr>\n";

}#printHTMLheader()

sub printHTMLtailer
{
	my $F = shift;

	print $F "</table>\n";
	print $F "</p>\n
	</BODY>
	</HTML>";
}#printHTMLheader()

#return the position of an data element
sub getpos
{
	my $elm = shift;
	return $datapos{$elm};
}

#return the input bed line as a hash array
sub getdata
{
	my @l1 = @_;
	my %data;
	$data{'chrm'} = trim( $l1[ getpos('chrm') ] );
	$data{'bsst'} = trim( $l1[ getpos('bsst') ] );
	$data{'bsend'} = trim( $l1[ getpos('bsend') ] );
	$data{'acc'} = trim( $l1[ getpos('acc') ] );
	$data{'Ri'} = trim( $l1[ getpos('Ri') ] );
	$data{'dir'} = trim( $l1[ getpos('dir') ] );
	#the accession number from the bed file is appended with a '_' and info like Ri value is added. So remove it
	my @acclist =  split( /_/,$data{'acc'} );
	$data{'accname'} = $acclist[0];	
	$data{'acctype'} = $acclist[1];
	$data{'accRi'} = $acclist[2];	#shown as R10, R15 etc
	return %data;
}#getdata()

	
sub printHTMLline
{
	my ( $F , @l1 ) = @_;
	my %data = getdata(@l1); 
	#the accession number from the bed file is appended with a '_' and info like Ri value is added. So remove it
	my $acc = $data{'accname'};
	my $type = $data{'acctype'};
	my $value = $data{'accRi'};
	my $dir = $data{'dir'};
	my $track = gettrack();
	if( $value =~ /(\d+)/ )
	{	$value = $1;	}
	my $psdist = getpar('psdist');
	my $r1 = $data{'bsst'} - 2*$psdist;
	my $r2 = $data{'bsend'} + 2*$psdist;		
	if( $r1 < 1 ) { $r1 = 1;	}	
		
	print $F "<tr>\n
	<td > $type </td>
	<td align=\"center\"><A HREF=\"#\" Title=\"Go to the UCSC custom track for this binding site\" onClick=\"go2gbBSCustomTrack(\'$data{'chrm'}\', \'$data{'acc'}\', $data{'bsst'}, $data{'bsend'}, \'$track\')\">$value</A></td>
	<td align=\"center\"><A HREF=\"#\" Title=\"Get the dna sequence\" onClick=\"go2gbBSseq(\'$data{'chrm'}\', \'$data{'acc'}\',$data{'bsst'}, $data{'bsend'}, \'$data{'dir'}\', \'$track\')\">X</A></td>
	<td >$acc</td>
	<td align=\"center\"><A HREF=\"#\" Title=\"Accession Number link to Gene Bank\" onClick=\"go2gbAccGenBank(\'$acc\')\"><img src=\"tinygenbank.gif\" width=\"25\" height=\"25\"></A></td>
	<td align=\"center\"><A HREF=\"#\" Title=\"Accession Number link to Stanford Source\" onClick=\"go2gbAccSource(\'$acc\')\"><img src=\"tinysource.gif\" width=\"25\" height=\"25\"></A></td>
	<td ><A HREF=\"#\" Title=\"Go the UCSC browser with these coordinates\" onClick=\"go2gb(\'$data{'chrm'}\', $data{'bsst'}, $data{'bsend'})\">$data{'chrm'}:$r1-$r2</A></td>\n";
	print $F "</tr>\n";	
	#<td align=\"center\"><A HREF=\"#\" onClick=\"go2gbChr(\'$data{'chrm'}\')\">$data{'chrm'}</A></td>
	#<td align=\"center\"><A HREF=\"#\" onClick=\"go2gbBS(\'$data{'chrm'}\', $data{'bsst'}, $data{'bsend'})\">$data{'bsst'} $data{'bsend'}</A></td>
	#<td align=\"center\"><A HREF=\"#\" onClick=\"go2gbAcc(\'$acc\')\">$data{'acc'}</A></td>
	#<td align=\"center\">$data{'Ri'}</td>
	#<td align=\"center\">$data{'dir'}</td>\n";

	#print $F "<td>$data{'bsst'}</td> ";
	#print $F "<td>$data{'bsend'}</td> ";

}#printHTMLline()
	
sub puttrack
{	$TRACK = shift;	}
sub gettrack
{	return $TRACK;	}

sub getline
{
	my $ifile = shift;	#get the file name
	my $ofile = shift;	#output file name
	my $ipline;
	my $iflag = 1;
	while( $iflag)
	{
		if( $ipline = <$ifile>)
		{
			my $line = &trim($ipline);
			if( $line =~ /^track\s+name=(\S+)\s+/i )
			{	
				puttrack($1);	
				print $ofile "$ipline" if(defined $ofile); 
			}
			elsif(  $line =~ /^browser/i   )	
			{	print $ofile "$ipline" if(defined $ofile); }
			else	{	$iflag = 0; }
		}
		else{ return 0 };
	}
	return $ipline;
}#getline()

	
#remove leading and trailing ws from a line
sub trim
{
	my $ret = shift;
	$ret =~ s/^\s*//;	#remove leading whitespaces
	$ret =~ s/\s*$//;	#remove trailing whitespaces
	return $ret;
}#trim()

#print and error message and exit the program
sub errexit
{
	my $msg = shift;
	print "Error in program : $msg\n";
	closef($IPSP, $IBED, $OBED , $OBHTML);  

	exit;
}
	
sub openfiles()
{
	#open input files
	
	open( IPSP, "<psparams" ) || ( errexit("Cannot open file \'psparams\' ") );
	my $ibedfile = getopts('i');	
	open( IBED, "<$ibedfile" ) || ( errexit("Cannot open file \'$ibedfile\' ") );
	my $obedfile = getopts('o');
	open( OBED, ">${obedfile}") || ( errexit("Cannot open output file \'$obedfile\' ") );	
	my $ohtmlfile = $obedfile.".html";
	open( OBHTML, ">$ohtmlfile" ) || ( errexit("Cannot open file \'$ohtmlfile\' ") );
	my $accfile = getopts('f');
	( open( IACC, "$accfile" ) || ( errexit("Cannot open file \'$accfile\' ") ) ) if ( defined $accfile);
	$IPSP=\*IPSP;	$IBED=\*IBED;	$OBED=\*OBED;	$OBHTML=\*OBHTML;	$IACC=\*IACC;
}

sub procBED
{
	#first print the required html output header text
	#
	printHTMLheader($OBHTML);
	my $line = getline($IBED, $OBED);
	while( $line )
	{
		$line = trim( $line  );
		my @list1 = split(/\s+/, $line);
		printHTMLline($OBHTML, @list1);
		print $OBED "$line\n";
		$line = getline($IBED, $OBED);	
	}
	printHTMLtailer($OBHTML);
}#procBED()

#get the options
sub getopts
{
	my $par = shift;
	return $OPT{$par};
}#getopts()

#get the params
sub getpar
{
	my $parm = shift;
	if( exists( $PAR{$parm} )	)
	{	return $PAR{$parm};	}
	else {	return 0;	}
}	

#put the params
sub putpar
{
	my $parm = shift;
	my $val = shift;
	$PAR{$parm} = $val;
}

#print the usage
sub usage
{
	print "Usage: $0 [-cdfoOsSU] [bedfile]
	
	INPUT OPTIONS
	-c and -f are mutually exclusive options
	-c\\--coordinates Coordinates_range
		Specify the coordinate range and the chromosome names
		list to pull the sites. A site is reported if its 
		start or its end coordinate lies within the given range.
		Multiple ranges and names are separated with a ';'
		Upper limit for a coordinate is 2^31 -1;

		Ex:  
		-c chr10:100-200	#sites on 10 between coordinates 100 and 200	
		-c chrX:1000-		#sites on X above coordinate 1000 
		-c chr4:-4000		#sites on 4 below coordinate 4000
		-c chrX:1000		#same as chrX:1000-
		-c chr:10000-20000	#sites on all chromosomes between coordinates 10000 and 20000
		-c chrY:200-5000;chr1:3000;chr1:5000-10000	#multiple chrm
	
	-d\\-db UCSC_database_name
		Default : hg13
		This is the database name for the genome draft used.
		A few of these mappings are given below.
		Human             Mouse            Rat      
		hg15 -Apr 2003    mm3  -Feb 2003   rn2  -Jan 2003
		hg13 -Nov 2002    mm2  -Feb 2002   rn1  -Nov 2002

		The full list is given in http://genome.ucsc.edu/FAQ.html.
	
	
	-O\\--ORG Organism
		Default: human
		Values supported by UCSC - human/mouse/rat
		The -d/--db option takes precedence over this option in 
		links for the UCSC browser. 
		
			 
	-s\\--strand [+/-]
		Specifies the specific strand to search for sites. 
		If this option is not specified, sites on both the 
		strands are searched for.

		
	-f\\--file AccessionListFile
		If this option is given, then the output html file
		has only those binding sites which are associated
		with an accession number in the file given with 
		this option.

		
	-U\\--URL URL_of_the_BED
		If this option is given, the UCSC genome browser
		loads the BED file automatically from this URL
		when the appropriate link in the generated HTML
		page is clicked.
	
		
	bedfile
		default : psBED.txt
		
	OUTPUT OPTIONS
	-o\\--output filename
		default : sub{bedfile} ( Prefix 'sub' to the input bedfile name)
		This option controls the names of the output BED
		and HTML files.
		Ex: If 'xxx' is the filename, then
		output BED file: xxx
		output html file: xxx.html	
		So, if the input BED file name is psBED.txt(by default) and the
		-o option is not specified, then by default
		output BED file: subpsBED.txt
		output html file: subpsBED.txt.html
		
	-S\\--SORT [coordinate/strength]
		default : the output is not sorted if this option is not specified
		If the option starts with a 'c', output is sorted by coordinate
		If the option starts with a 's', output is sorted by strength
		If the option starts with any other alphabet , output is not sorted 
	\n";	
}


#process the command line options
sub procopts
{

	$OPT{'h'} = 0;	#help option

	GetOptions(	
					"coordinates=s" => \$OPT{'c'},
					"output=s" => \$OPT{'o'},
					"help" => \$OPT{'h'},
					"strand=s" => \$OPT{'s'},
					"SORT=s" => \$OPT{'S'},	
					"file=s" => \$OPT{'f'},
					"ORG=s" => \$OPT{'O'},
					"db=s" => \$OPT{'d'},
					"URL=s" => \$OPT{'U'}
				);
	$OPT{'i'} = "psBED.txt";
	if(@ARGV)	{$OPT{'i'} = shift @ARGV;	}
	if( $OPT{'h'} )
	{	usage() and exit;	} 
	if( defined $OPT{'c'} && defined $OPT{'f'} )
	{	warn("-c and -f are mutually exclusive options\n") and exit;	}
	
	#$OPT{'s'} = trim( $OPT{'s'} );
	my $strand = $OPT{'s'};	
	if( (defined $strand) &&  ($strand ne "+") && ($strand ne "-") )
	{	
		warn "in -s\\--strand option : Strand $strand is not in [+,-]\n";
		$strand = undef;	#for use later in this function
	}
	my $sortopt = "$OPT{'S'}";
	if( $sortopt =~ /^s/i )
	{	$sortopt = 's';	}
	elsif( $sortopt =~ /^c/i )
	{	$sortopt = 'c';	}
	else
	{
		warn "-S\--SORT option does not start with c or with s, taking default \n"
			if(defined $OPT{'S'} );
		$OPT{'S'}=undef;
	}
	if( defined $OPT{'c'} )
	{	%COORDLIST = parseCoordList( $OPT{'c'}	);	}

	if(! defined $OPT{'O'} )
	{
		$OPT{'O'}= "human";
		print "-o\\--ORG option not specified, taking default: $OPT{'O'}\n";

	}
	if(! defined $OPT{'d'} )
	{
		$OPT{'d'} = "hg13";
		print "-d\\--db option not specified, taking default: $OPT{'d'}\n";
	}
	
	$OPT{'o'} = "sub".$OPT{'i'} if( ! defined $OPT{'o'} );
	#print parameters
	print "\nProgram parameters\n\n";
	print "Organism : $OPT{'O'}\n";
	print "database : $OPT{'d'}\n";
	print "Input BED file : \'$OPT{'i'}\' \n";
	print "Output BED file : \'$OPT{'o'}\' \n";
	print "Output HTML file : \'$OPT{'o'}.html\' \n";		
	if( defined( $OPT{'f'} ) )
	{	print "Input Accession List File : \'$OPT{'f'}\' \n";	}
	elsif( defined( $OPT{'c'} ) )
	{
		print "\nSpecified coordinate range(s) \n";
		foreach ( keys %COORDLIST )
		{
			for(my $i=1; $i<=$COORDLIST{$_}[0];	++$i)
			{	print "$_:$COORDLIST{$_}[2*$i-1]-$COORDLIST{$_}[2*$i]\n";	}
		}#foreach
		print "\n";
	}
	else
	{	print "Printing the html file for the whole coordinate range in the input bed file\n";	}
	
	if( defined $strand )
	{
		print "Search direction : positive strand\n" if( $strand eq "+" );
		print "Search direction : antisense strand\n" if( $strand eq "-" );		
	}
	else
	{	print "Search direction : both strands\n";	}
	
	if( "$sortopt" eq  "c")
	{	print "Sort output by coordinate\n";	}
	elsif( "$sortopt" eq  "s")
	{	print "Sort output by strength\n";	}
	else
	{	print "Output is not sorted\n";	}
	
}#procopts()

sub readparams
{
	#open("parf", $_[0]) || die "params file cannot be opened";
	#open( FPARAMS, "psparams") || die "cannot open psparams\n";
	my @paramsf=<$IPSP>;
	my %par = &mystrtok(" ", @paramsf);
	my @parf;
	
	if(!exists($par{"wsize"}) ) { die "parameter wsize not given";	}
	my @wsizelst = split(/\s+/, &trim($par{"wsize"}) );
	if ( ! defined($wsizelst[0]) ) { $wsizelst[0]=0;;	}
	if ( ! defined($wsizelst[1]) ) { $wsizelst[1]=0;;	}
	putpar('wsize1' ,  $wsizelst[0]);
	putpar( 'wsize2' , $wsizelst[1] );
	my $psdist=100000;
	my $ws1 = getpar('wsize1');	my $ws2 = getpar('wsize2');
	if( $ws1 >= $ws2){	$psdist = $ws1;	}
	else	{	$psdist = $ws2;	}
	putpar('psdist', $psdist);
	print  getpar('wsize1'). "  ". getpar('wsize2')."\n" ;
	
	#close FPARAMS;
}

#splits each value in a list into two parts, token and value.
#any leading & trailing whitespaces are removed
sub mystrtok()
{
	my $sep = shift;
	my @qstr = @_;	#the rest of arguments
	my %params;
	foreach ( @qstr )
	{
		print $_ . "\n";
		$_=&trim($_);
		/([^$sep]*)${sep}+(.*)/;
		$params{$1}=$2;
	}
	%params;	#return the params	
}
sub isInt
{	return ($_[0] =~ /^\s*[\+-]?\d+\s*$/)?1:0;	}

sub isPosInt
{	return ($_[0] =~ /^\s*[\+]?\d+\s*$/)?1:0;	}

sub isNegInt
{	return ($_[0] =~ /^\s*[-]?\d+\s*$/)?1:0;	}

# close file handle references if defined 
sub closef
{
	foreach( @_ )
	{	close $_ if( defined $_ );	}
}



