#!/usr/bin/perl

# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#


#program : promotsite
#version : 1.6.6
#Description:
#This program modifies the input data file from scan and sorts it on the binding site coordinate.
#This ensures that this program will run faster. This is done thru scan2ps.pl
#From version 1.6.0, even the smrna.txt file should be sorted on the mrna start coordinate.
# This is done using the sortmrna.pl
#If the gene has a negative orientation, then its start position is its end position
#if possible, change to read each line of psdatainp and process the smrna.txt for that entry
#add a field min_distance of the mrms at the end of the psdataop so as to process at a later time
#1.6.6: Integrating the calling of scan2ps.pl and sortmrna.pl in promotsite.pl
#1.6.5: Providing filtering option by introducing the --coordinates option
#1.6.4: Providing the upper and lower Ri cutoff value of a binding site
#1.6.3: Adding the capability of assymetric window size
#1.6.2: Removing the preprocessing function scan2psdata function to make promotsite faster 
#1.6.1: To make the processing faster by preprocessing mrna.txt and sorting it by the start mrna position 
#regardless of orientation of the gene
#1.6.0: Changing file so that the whole data file is not read at the same time,incomplete
#1.5.9: reading ri from to wanted correctly, chaning the readparams() to reflect the changes in psparams
#1.5.8: reading ri consensus values from the ribl file instead of as an input parameter
#1.5.7: reduced the accession name in BED output and removed score as it is displayed in the new genome browser
#1.5.6: speed the program by processing fewer data values
#1.5.5: supports the BED format to display
#1.5.3: modified mrms() to work for all u/d/b streams
#1.5.3: processed sitename to remove the "s
#1.5.2: modified the incorrect values of rstart, end for -ve strand
#1.5.2: speeding the program by removing redundant processing of the commented data
#1.5.2: Correcting the start end values of the GFF file
#Added the subroutine to create an output file formatted in the GTF style

#initialize constants
$version = "1.6.6";	#removed a few bugs from previous version
@dayofweek = ( "mon", "tue", "wed", "thu", "fri", "sat", "sun" );
#initiate data structures

@params{ qw(sitename wsize1 wsize2 dir Ricon mrmsdist from to trackcolor) } = ( "PXR", 0, 0,"d", 25, 100, -2, 20, "128,128,128") ;	#parameters read from the params file and the Ribl matrix
@mrna{ qw(name chrm start dir) } = ( "default", "chr", 0, "+" );	#values read from the mrna.txt file
my %COORDLIST;	#Hash to hold the coordinate range to get sites from 
# the below positions are relative to the first field in mrna.txt being considered 
# as position 1
$chrmpos = 14;	#chromosome name from mrna.txt 
$dirpos=9;	#orientation from mrna.txt	
$namepos=10;	#Accession number from mrna.txt
$startpos=16;	#from mrna.txt
$endpos=17;	#from mrna.txt

$coordpos=4;	#from psdatainp
$bsdirpos=5;	#from psdatainp
$ripos = 6;	#from psdatainp

#search for input files

$fparams="psparams";
if(! -f $fparams)
{	die "params file not found";	}
else
{	open(FPARAMS,"<$fparams") || die " Error in opening params file ";	}
	
$fdatatmp="data";
if(! -f $fdatatmp)
{	die "data file not found";	}
$fmrnatmp="mrna.txt";
if(! -f $fmrnatmp)
{	die "$fmrnatmp file not found";	}
$fribl = "ribl";
if(! -f $fribl)
{	die "ribl matrix not found";	}
else
{	open(FRIBL, "<$fribl") || die "cannot open ribl file";	}
&readparams();

# sort the data file on input coordinate rather than strand
my $res=system("scan2ps.pl $fdatatmp");
if( $res)
{	print "ERROR: in execution of scan2ps.pl : $!\n" and exit;	}

# sort the mrna.txt file on the actual start coordinate taken strand
# into consideration
my $res=system("sortmrna.pl $fmrnatmp");
if( $res)
{	print "ERROR: in execution of sortmrna.pl : $!\n" and exit;	}

my $fdata="psdatainp";	# psdatainp is the output of scan2ps.pl
if(! -f $fdata)
{	die "$fdata file not found";	}
else
{	open(FDATA,"<$fdata") || die " Error in opening $fdata file ";	}

$fmrna="smrna.txt";		# smrna.txt is the output of sortmrna.pl
if(! -f $fmrna)
{	die "$fmrna file not found";	}
else
{
	open(FMRNA,"<$fmrna") || die " Error in opening $fmrna file";
	@totalmrna = <FMRNA>;	#read the whole mrna file
}

#open the output files

open(PSDATA,">psdataop") || die "cannot open output file psdataop";
open(BED, ">psBED.txt") || die "cannot open output file psBED.txt";

#first write a signal handler to close all open files before aborting
sub SIGNAL_HANDLER
{
	my $signal = @_;
	closef(\*FPARAMS, \*FMRNA, \*FDATA, \*FRIBL, \*PSDATA, \*BED);
	print "Signal Handler Message: $signal\n";
	exit(0);	#exit the process
}#SIGNAL_HANDLER()

$SIG{__DIE__} = 'SIGNAL_HANDLER';
$SIG{'INT'} = 'SIGNAL_HANDLER';
$SIG{'BREAK'} = 'SIGNAL_HANDLER';
$SIG{'TERM'} = 'SIGNAL_HANDLER';
$SIG{'HUP'} = 'SIGNAL_HANDLER';
$SIG{'ABRT'} = 'SIGNAL_HANDLER';

print PSDATA ("* This file is generated by promotsite\n");
print PSDATA ("* promotosite version: $version\n");
@ctime = localtime($^T);

$curtime=join(":",( $ctime[2], $ctime[1], $ctime[0]) );		#hour:min:sec

$ctime[4] += 1;		#perl gives month as 0..11
$ctime[5] += 1900;	#perl gives year as 1900 + x
 
$curdate=join("/", ( $ctime[4], $ctime[3], $ctime[5] ) );	#month/day/year
print PSDATA ("* Generated Date: $curdate $curtime\n");
if( $params{'dir'} eq "u" ||  $params{'dir'} eq "d" )
{	print PSDATA ("* window size : $params{'wsize1'}\n");	}
else
{	print PSDATA ("* window size : $params{'wsize1'} $params{'wsize2'}\n");	}

$fdir = "";
if ($params{dir} eq "u")
{	$fdir = "upstream";	}
elsif ($params{dir} eq "d")
{	$fdir = "downstream";	}
else
{	$fdir = "up/down streams";	}
print PSDATA ("* Orientation = $fdir\n");
print PSDATA "* RiCutoff : $params{'rilow'} $params{'rihigh'}\n";

&procdatabyline();

closef(\*FPARAMS, \*FMRNA, \*FDATA, \*FRIBL );
#End of main code block

#splits each value in a list into two parts, token and value.
#any leading & trailing whitespaces are removed
sub mystrtok()
{
	my $sep = shift;
	my @qstr = @_;	#the rest of arguments
	my %params;
	foreach ( @qstr )
	{
		print $_ . "\n";
		$_=trim($_);
		/([^$sep]*)${sep}+(.*)/;
		$params{$1}=$2;
	}
	%params;	#return the params	
}

sub isInt
{	return ($_[0] =~ /^\s*[\+-]?\d+\s*$/)?1:0;	}

sub isPosInt
{	return ($_[0] =~ /^\s*[\+]?\d+\s*$/)?1:0;	}

sub isNegInt
{	return ($_[0] =~ /^\s*[-]?\d+\s*$/)?1:0;	}

sub isReal
{	return ($_[0] =~ /^\s*[\+-]?\d+\.?\d*\s*$/)?1:0;	}

sub readparams
{
	#open("parf", $_[0]) || die "params file cannot be opened";
	
	my @paramsf=<FPARAMS>;
	my %par = &mystrtok(" ", @paramsf);
	my @parf;

	if(!exists($par{"direction"}) ) { die "parameter direction not given";	}	
	#check whether direction is up or down
	$parf[1] = lc( &trim ( $par{"direction"} ) );
	if ( $parf[1] ne "u" && $parf[1] ne "d" && $parf[1] ne "b" )
	{	die "Enter a proper direction u/d";	}
	
	if(!exists($par{"wsize"}) ) { die "parameter wsize not given";	}
	my @wsizelst = split(/\s+/, &trim($par{"wsize"}) );
	if ( $parf[1] eq "u" || $parf[1] eq "d" )
	{	die "Window size is not a positive integer\n" unless( &isPosInt($wsizelst[0]) );	}
	elsif( $parf[1] eq "b")
	{	
		#Added 1.6.5
		if (! defined $wsizelst[1] ) {	$wsizelst[1] = $wsizelst[0];	}
		die "Window size boundaries not given as positive integers\n"
		unless( &isPosInt($wsizelst[0]) && &isPosInt($wsizelst[1]));
	}

	$params{'wsize1'} = $wsizelst[0];
	$params{'wsize2'} = $wsizelst[1];
	$/="\n";
	chomp($params{'wsize1'});
	chomp($params{'wsize2'});
	$params{'dir'} = $parf[1];
	
	#include the code for the ri cutoff values
	$params{'rilow'} = -10000;	$params{'rihigh'} = 10000;
	if( exists( $par{'ricutoff'} ) )
	{
		my @ricut= split( /\s+/, &trim($par{'ricutoff'}) );
		if( @ricut >= 1)
		{
			die "RiCutoff value not a valid real number\n" unless( &isReal($ricut[0]) );
			$params{'rilow'} = $ricut[0];
		}
		if( @ricut >= 2)
		{
			die "RiCutoff value not a valid integer\n" unless( &isReal($ricut[1]) );
			$params{'rihigh'} = $ricut[1];
		}
	}
	if( $params{'rilow'} > $params{'rihigh'} )
	{	die"Error: Ricutlow > Ricutoffhigh\n";	}

	#read the mrms distance
	if(! exists($par{"psdistance"}) )	{ die "parameter psdistance not given";	}
	$parf[3] = $par{"psdistance"};
	chomp($parf[3]);
	$params{mrmsdist} = $parf[3];

	#read the track color
	if(! exists($par{"trackcolor"}) )	
	{ warn "parameter psdistance not given\ndefault values being applied";	}
	else	{	$params{"trackcolor"} = $par{"trackcolor"};	}

	#read the coordinates for pulling the binding sites
	if( exists $par{'coordinates'} )
	{	%COORDLIST = parseCoordList( $par{'coordinates'} );	}

	#read the Ri consensus value from the ribl file
	readribl(FRIBL);
	
}#readparams()
	
#read the ribl matrix
sub readribl()
{
	my $file = $_[0];	#ribl file handle
	my $parcnt = 0;	#parameter count 

	while(my $ip = <$file>)
	{
		if ($parcnt >= 3)	{	last;	}	#all ribl params read, so quit
		
		#read sitename
		my $dline = $ip;
		if( $dline =~ /^\s*ribl\s+\"(\S+)\"/ )
		{
			$params{sitename} = trim( $1 );
			$parcnt++;
			next;
		}#if :ribl sitename
		
		#read start and end of ribl window
		$dline = $ip;
		if( $dline =~ /^\s*(\S+)\s+(\S+)\s+frombase.*tobase/i)
		#if( $dline =~ /^\s*\*\s+(\S+)\s+(\S+)\s+from-to/i ) #this reads from the user comments lines
		{
			$params{from} = $1;
			$params{to} = $2;
			$parcnt++;
			next;
		}#if: ribl window
		if(  (($params{'from'} < 0) && ($params{'to'} < 0))  ||
			(($params{'from'} > 0) && ($params{'to'} > 0)) )
		{	die "Error in ribl: Both from,to lie on one side of 0(zero)\n";	}

		#read the Ri consensus value
		$dline = $ip;
		if( $dline =~ /^\s*(\S+)\s+bits\s+.*Ri.*\bconsensus\b.*sequence/i )
		{
			$params{Ricon} = $1;
			$parcnt++;
			next;
		}#if: ribl ri consensus
	}#while: ribl input
	print $params{sitename}." ".$params{from}." ".$params{to}." ".$params{Ricon}."\n";
}#readribl()

#version 6.0
#process the psdatainp and psmrnainp.txt line by line
sub procdatabyline()
{
	my $mrnareturn = 0;	#the inner loop searches above and below this value for apt mrna
	my $oldgenelist="";
	my $oldgeneleader="";
	my $oldbscoord=-10;
	my $topBSRi=-10000;
	my $topBSBEDline="";
	
	
	my %tempmrna = &getmrna(1);
	my $chrmname = $tempmrna{'chrm'};
	printBEDheader(BED, $chrmname);
	my $i = 0;
	while (<FDATA> )
	{
		#print $_."\n";
		#print "$i\n";$i++;
		my @data;	#to hold the elements read from a single data line
		my $genelist="";	#to hold the good mrna list
		my $geneleader="";	#the most relevant mrna from the genelist
		my $mrmsdistance = undef;
		my $curdist = -10;	# this is the temporary mrms distance
		#
		#&resetmrna();	#make the smrna.txt goto the first mrna line
		$dataline = trim($_);
		#if the data is a comment or an empty line, write to output directly
		if( ($dataline =~ /\s*\*/) || ($dataline =~ /^\s*$/) )
		{	printopcommentline($dataline);	} 
		else	#non-comment line
		{
			#print $innercount++."\n";
			@data = split( /\s+/, $dataline);
			my $bscoord = $data[$coordpos-1];	#read the coordinate of the binding site
			my $ri = $data[$ripos-1];	#read the ri value of the binding site
			my $bsdir = $data[$bsdirpos-1];	#read the orientation
	
			#Write the code for checking the upper and lower Ri cutoff here
			#if out of Ri bounds, ignore the current Binding site
			if( $ri<$params{'rilow'} || $ri>$params{'rihigh'}	)
			{	next;	}
			#Write the code for checking the upper and lower coordinate
			#of the BS. If out of bound, ignore the site
			my %data;
			$data{'chrm'} = $chrmname;
			$data{'dir'} = $bsdir;
			$data{'bscoord'} = $bscoord;
			$data{'bsst'} = $bscoord - $params{'from'};
			$data{'bsend'} = $bscoord + $params{'to'};
			if( $bsdir == -1)
			{
				$data{'bsst'} = $bscoord + absval($params{'from'});
				$data{'bsend'} = $bscoord - absval($params{'to'});
			}
			$data{'bsst'} = 1 if ($data{'bsst'}<1);
			$data{'bsend'} = 1 if ($data{'bsend'}<1);
			
			$data{'Ri'} = $ri;

			if( !requestedSite( \%data, getCoordList() )  )
			{	next;	}

			
			if($bsdir == 1)
			{	$bsdir="+";	}
			elsif($bsdir == -1)
			{	$bsdir="-";	}
			else	#direction undefined
			{	warn "direction of binding site not 1/-1";	}
			
			my $loopcount = @totalmrna;
			
			#start the search prior to mrnareturn 
			#NEGSEARCH:
			for(my $i = $mrnareturn; $i>=0; --$i)
			{
				my $sdir = &findGoodMRNA($bscoord, $i, -1);
				if ($sdir == -1)		#no more searches this direction
				{	
					#print "NO MORE SEARCHES\n";
					#last NEGSEARCH;
					last;
				}#if
				elsif ($sdir == 0 )	#not a good mrna
				{	}	#do nothing
				elsif ($sdir == 1 )	#found a good mrna
				{
					 my %mrna = &getmrna($i);					 
					 $genelist.= "$mrna{name}".",";
					 my $tdist = &getmrmsdist($bscoord, $mrna{start},$mrna{end}, $mrna{dir});
					 if ($curdist <0 || $curdist>$tdist)	#new mrms found
					 {
					 	$curdist = $tdist;
						$geneleader = $mrna{name};
					 }
				}
				else
				{	warn "not a good mrna return direction";	}				
				
			}#for: search before
			
			#loopexit is the variable which defines the starting
			#position of the next loop. This is set to the pos
			#of the last found mrna for the current binding site
			#
			my $loopexit = $mrnareturn;
			#print "loopexit = $loopexit\n";
			#start the search after the mrnareturn
			#POSSEARCH:
			for(my $i = $mrnareturn + 1; $i<$loopcount; ++$i)
			{
				my $sdir = &findGoodMRNA($bscoord, $i, 1);
				if ($sdir == -1)		#no more searches this direction
				{
					#print "NO MORE SEARCHES\n";
					$loopexit = $i-1;
					#last POSSEARCH;
					last;
				}#if
				elsif ($sdir == 0 )	#not a good mrna
				{	}
				elsif ($sdir == 1 )	#found a good mrna
				{
					 my %mrna = &getmrna($i);					 
					 $genelist.= "$mrna{name}".",";
					 my $tdist = &getmrmsdist($bscoord, $mrna{start},$mrna{end}, $mrna{dir});
					 if ($curdist <0 || $curdist>$tdist)	#new mrms found
					 {
					 	$curdist = $tdist;
						$geneleader = $mrna{name};
					 }
				}
				else
				{	warn "not a good mrna return direction";	}
			}#for: search before
			#print output line in psdataop
			if( trim($genelist ne "") )
			{	
				printopdataline($dataline, $genelist, $geneleader);
			}

			#print output line in psBED.txt
			#first, the grouping has to be done based on psdistance
			if( $oldbscoord >=0 )#since the initial value is -10
			{
				my $res = groupTransidline($bscoord,$oldbscoord,$genelist,$oldgenelist);
				if($res > 0)	#found a common element
				{	$geneleader = $oldgeneleader;	}
			}

			#print BED if the genelist is not empty
			if( trim($genelist ne "") )
			{
				my $tempBED = printopBEDline( $tempmrna{chrm}, $bscoord, $bsdir, $ri, $geneleader );
				if( $ri > $topBSRi)
				{
					$topBSRi = $ri;
					$topBSBEDline = $tempBED;
				}	
			}
			$mrnareturn = $loopexit;	#setting the search pos for the next interation	  
			$oldbscoord = $bscoord;
			$oldgenelist = $genelist;
			$oldgeneleader = $geneleader;
			#print "mrnareturn=$mrnareturn\n";
		}#else:non-comment line read from FDATA  
	}#while: <FDATA>
	&printtopBSBEDline($topBSBEDline);
	print "completed processing the input data\n";
}#procdatabyline()

#returns the absolute distance between the binding site and the mrna start coordinate
#this distance is the mrms distance
sub getmrmsdist()
{
	my ($bscoord, $mstart, $mend, $mrnadir) = @_;
	my $rstart = $mstart;
	if($mrnadir eq "-")
	{	$rstart = $mend;	}
	elsif( $mrnadir eq "+" )
	{	$rstart = $mstart;	}
	else
	{	warn "direction in $fmrna not +/-";	}
		
	my $ldist = $bscoord - $rstart;

	#$ldist = ($ldist<0)? (-$ldist) : $ldist; #absolute value
	$ldist = absval($ldist);
	return $ldist;
}#getmrmsdis()

#find whether a bs falls in a particular mrna window or not. if it falls, then
#return 1, if it does not fall but the search in the present direction can be 
#successful, return 0, else return -1. In the search, the BS is constant and 
#the MRNA window is moved
sub findGoodMRNA()
{
	my ($bscoord, $mrnanum, $searchdir) = @_;
	my %mrna = &getmrna($mrnanum);	
	#get the mrna from the total mrna
	my ($rstart, $rend) = (0,0);		

	if ($params{dir} eq "u")		#up stream
	{
		if( $mrna{dir} eq "+" )
		{
			$rstart = $mrna{start} - $params{wsize1};
			$rend = $mrna{start};
		}
		elsif( $mrna{dir} eq "-" )
		{
			$rstart = $mrna{end};
			$rend = $mrna{end} + $params{wsize1};
		}
	}
	elsif ($params{dir} eq "d")	#down stream
	{
		if( $mrna{dir} eq "-" )
		{
			$rstart = $mrna{end} - $params{wsize1};
			$rend = $mrna{end};	
		}
		elsif( $mrna{dir} eq "+" )
		{
			$rstart = $mrna{start};
			$rend = $mrna{start} + $params{wsize1};
		}
	}	
	elsif ($params{dir} eq "b")	#both up/down streams
	{
		if( $mrna{dir} eq "-" )
		{
			$rstart = $mrna{end} - $params{wsize2};
			$rend = $mrna{end} + $params{wsize1};	
		}
		elsif( $mrna{dir} eq "+" )
		{
			$rstart = $mrna{start} - $params{wsize1};
			$rend = $mrna{start} + $params{wsize2};
		}
	}
	
	$rstart = ($rstart<0)?0:$rstart;
	$rend = ($rend<0)?0:$rend;

	#print "rstart = $rstart, rend = $rend\n";
	my $retval = 0;	#default to "continue search"
	if($rstart <= $bscoord && $bscoord <= $rend)	#a good mrna
	{	$retval = 1;	}
	elsif ($searchdir == -1)	#upward/-ve search 
	{
		if($bscoord > $rend)
		{			
			$retval = -1;	#no more searches in this direction
		}
	}#elsif: searchdir = -1
	elsif ($searchdir == 1)	#downward/+ve search
	{
		if($bscoord < $rstart)
		{
			$retval = -1;	#no more searches in this direction
		}#if
	}#elsif: searchdir = 1
	#if( $retval == -1)	
	#{print "BadMRNA: bs=$bscoord mrna=$mrna{name} rs=$rstart re=$rend dir=$searchdir rval=$retval\n"; }
	return $retval;
}#findGoodMRNA()
		
#parse and return  a mrna line as a hash
sub getmrna()
{
	my $mrnanum = shift;
	my $ipline = $totalmrna[$mrnanum];
	my %mrna;
	#initialize the mrna struct
	@mrnalist=split(/\s+/, $ipline);
	my $msz = @mrnalist;
	#print "size of mrna list is $msz\n";
	$mrna{name}= $mrnalist[$namepos-1];
	$mrna{dir}= $mrnalist[$dirpos-1];
	$mrna{start}= $mrnalist[$startpos-1];
	$mrna{end} = $mrnalist[$endpos-1];
	$mrna{chrm} = $mrnalist[$chrmpos-1];
	
	#the direction position may be in the 9th column(prior to Aug 2001) or in the 10th column(starting Aug 2001) 
	if( $mrna{dir} ne "+" && $mrna{dir} ne "-" )
	{
		$mrna{dir} = $mrnalist[$dirpos];
		$mrna{chrm} = $mrnalist[$chrmpos];	
		#if direction position is not the 9th column, check for the 10th column
		if ( $mrna{dir} eq "+" || $mrna{dir} eq "-" )
		{
			$mrna{name}= $mrnalist[$namepos];
			$mrna{start}= $mrnalist[$startpos];
			$mrna{end} = $mrnalist[$endpos];

		}
		else
		{
			die "mrna file error: Direction should be +/-";
		}
	}#if:outer

	#print("the read mrna is \n$mrna{chrm} $mrna{name} $mrna{dir} $mrna{start}\n");
	%mrna;	#set the return value of this subroutine to the mrna hash
}#getmrna()

#print a line of data to the output psdataop file
sub printopdataline()
{
	my ($dataline, $genelist, $geneleader) = @_;
		
	$dataline = trim($dataline);
	$genelist = trim($genelist);
	$geneleader = trim($geneleader);
	$/=",";		
	chomp($genelist);				
	$/="\n";
	print PSDATA ("$dataline"."\t$genelist"."\t$geneleader\n");
}#printopdataline()

#print a line of comment to the output psdataop file
sub printopcommentline
{
	my $dataline = shift;
	$dataline = trim($dataline);
	print PSDATA ("$dataline\n");
}#printopcommentline()

#print 1 line of the output BED file
sub printopBEDline
{
	my %rmrms;
	my $opline = "";
	$rmrms{'chrm'} = $_[0];
	$rmrms{'bscoord'} = $_[1];
	$rmrms{'bsdir'} = $_[2];
	$rmrms{'ri'} = $_[3];
	$rmrms{'transid'} = $_[4];	#geneleader
				
	$/="\n";
	
	chomp($rmrms{chrm});		#CHROM
	$opline .= $rmrms{chrm}." ";
	
	my $BEDstart=0;	#BED start
	my $BEDend=0;		#BED end
	my $bsdirec = $rmrms{bsdir};
	#in this scenario, the Binding Site occupies the 0 position
	
	if($bsdirec eq "+")
	{
		$BEDstart = $rmrms{bscoord} - absval($params{from});
		if($BEDstart<0) {	$BEDstart = 0;	}	#lower bound is 0

		$BEDend = $rmrms{bscoord} + absval($params{to});
	}#if: dir "+"
	elsif($bsdirec eq "-")
	{
		$BEDstart = $rmrms{bscoord} - absval($params{to});
		if($BEDstart<0) {	$BEDstart = 0;	}	#lower bound is 0
		
		$BEDend = $rmrms{bscoord} + absval($params{from});
	}#elsif: dir "-"
	else
	{	print ("Binding Site orientation not +/-\n");	}

	($BEDstart>0)?( $BEDstart-- ):$BEDstart;	
	# the above line is in conformation with UCSC browser inp where the
	# first coordinate is 0 rather than 1. but surprisingly the end
	# coordinate does not change
	$opline .= $BEDstart." ";	#ChrmStart coordinate
	$opline .= $BEDend." ";	#ChrmEnd coordinate
	
	my $rival = $rmrms{ri};
	my $score = ($rival * 1000) / $params{Ricon};	

	#Added 1.6.5
	my @sitel = split(/_/, $params{'sitename'});
	my $site = $sitel[0];
	if ($site eq "")	{	$site="site" }
	my $BEDname = $rmrms{transid}."_".$site."_R".int($rival);
	#my $BEDname = $rmrms{transid}."_R".int($rival);
	#Added 1.6.5

	$opline .= $BEDname." ";	#Name
	$opline .= $score." ";		#score
	
	chomp($rmrms{bsdir});	#strand direction
	$opline .= $rmrms{bsdir}." ";
	
	my $mid  = int( ($BEDstart + $BEDend)/2 );
	my $thickStart = ( $bsdirec eq "+")?($BEDstart):($mid);
	my $thickEnd = ( $bsdirec eq "+") ?($mid):($BEDend);

	$opline .= $thickStart." ";	#thickStart
	$opline .=  $thickEnd." ";	#thickEnd
	$opline .= "0"." ";	#reserved
	$opline .= "1"." ";	#blockCount

	my $blockSize = $BEDend - $BEDstart;
	$opline .=  $blockSize." ";
	
	my $blockStart = 0;
	$opline .=  $blockStart;
	$opline .= "\n";
	print BED "$opline";
	return $opline;
}#printopBEDline()

sub printBEDheader()
{
	my ($opfile, $chrm) = @_;
	#print the browser lines
	print $opfile ("browser position ".$chrm."                              \n");
	#print track lines
	$tname = &trim($params{'sitename'});	#track name
	print $opfile ("track name=${tname} description=\"${tname}\" color=$params{trackcolor} useScore=1\n");
}

#function to print the BED line of the BS with the top Ri value
sub printtopBSBEDline()
{
	my $opline = shift;
	open(TOP, ">topBS.txt") || die "Cannot open  output file topBS.txt\n";
	print TOP "$opline";
	close TOP;
}

#now group the geneleaders if they have a common element
#and if they fall within the minimum paralog distance
#which is the psdistance
sub groupTransidline()
{
	my ($newbs,$oldbs, $newglist, $oldglist) = @_;
	my $retval=0;	#default is no common elements
	my $dist = $newbs - $oldbs;
	$dist = absval($dist);
	if($dist <= $params{mrmsdist})
	{
		my $res = ifcommon($newglist, $oldglist);
		$retval = $res;
	}
	else {$retval = 0;}
	return $retval;
}
	
sub ifcommon()
{
	my ($g1, $g2) = @_;
	$g1 = trim($g1);
	$g2 = trim($g2);
	
	my @genel1=split(",",$g1);		
	my @genel2=split(",",$g2);

	my $outer = @genel1;	#outer index
	my $inner = @genel2;	#inner index

	my $retvalue=0;	#default is no common value
	for(my $i=0;$i<$outer;++$i)
	{
		for(my $j=0;$j<$inner;++$j)
		{
			if( $genel1[$i] eq $genel2[$j] )
			{
				$retvalue = 1;
				last;
	 		}
 		}#for:inner

		if($retvalue == 1)
		{	last;	}
	}#for:outer

	return $retvalue;
}#ifcommon()

#trims the start and end from whitespaces
sub trim
{
	my $ret = $_[0];

	$ret =~ s/^\s+//;	#remove leading ws
	$ret =~ s/\s+$//;	#remove trailing ws

	return $ret;
}#trim()

sub absval
{
	my $ret = $_[0];

	$ret = ($ret<0)?(-$ret):$ret;

	return $ret;	#return the absolute value
}#absval()

#getdataline() : get the next data line from the input data file.
#This function is from Version 1.6.2 and remove the preprocessing
#and sorting of the data file on its coordinate. The idea is to have
#two file handles, one for the data start position for the positive
#oriented Binding sites and the other handles for the negative oriented
#Binding sites. In the scan data, the positive sites start first followed
#by the negative sites both sorted on the BS coordinates
#sub getdataline()


#check whether a given BED site is in the requested coordinates
#the first 2 arguments are references to the data and coordinate list
#if an empty hash list of coordinates is passed, the whole BED is converted
sub requestedSite
{
	#print "In requestedSite()\n";
	my %data = %{$_[0]};
	my %crdlist = %{$_[1]};
	#print "coordst = " , $data{'bsst'},"\n";
	my $result = 0;	#indicate whether this is a valid site or not
	#if its an empty list, get sites from the whole BED file
	if( keys(%crdlist) <=0 )	
	{	$result = 1;	}
	else
	{
		my $chname = lc( $data{'chrm'} );	#lower case the chrm name
			
		#first check in the global 'chr' range
		my $chn = "chr";
		if( exists $crdlist{ $chn } )
		{
			my $i = 1;
			#print "[0] in \'chr\' : ", $crdlist{ $chn }[0];
			for($i =1; $i <= $crdlist{ $chn }[0]; $i++ )
			{
				if( exists $data{'bscoord'} &&
					( $data{'bscoord'} >= $crdlist{$chn}[2*$i-1] && $data{'bscoord'} <= $crdlist{$chn}[2*$i] ))
				{	$result =1;	last;	}
				elsif( ($data{'bsst'} >= $crdlist{$chn}[2*$i-1] && $data{'bsst'} <= $crdlist{$chn}[2*$i] ) ||
					($data{'bsend'} >= $crdlist{$chn}[2*$i-1] && $data{'bsend'} <= $crdlist{$chn}[2*$i] ) )
				{	$result = 1;	last;	}
				# the elsif above occurs only if the bscoord is not given
				# which might not happen in data files but may be possible
				# when this function is called from genvis program where
				# there are no binding coords, only starts and ends
			}#for: inner 
		}#if: exists 'chr'
		
		#check in the specific chromosome entry
		$chn = $chname; 
		if( $result != 1 && (exists $crdlist{$chname})  )
		{
			my $i = 1;
			#print "[0] in \'chname\' : ", $crdlist{ $chn }[0];
			for($i =1; $i <= $crdlist{ $chn }[0]; $i++ )
			{
				if( exists $data{'bscoord'} &&
					( $data{'bscoord'} >= $crdlist{$chn}[2*$i-1] && $data{'bscoord'} <= $crdlist{$chn}[2*$i] ))
				{	$result =1;	last;	}
				elsif( ($data{'bsst'} >= $crdlist{$chn}[2*$i-1] && $data{'bsst'} <= $crdlist{$chn}[2*$i] ) ||
					($data{'bsend'} >= $crdlist{$chn}[2*$i-1] && $data{'bsend'} <= $crdlist{$chn}[2*$i] ) )
				{	$result = 1;	last;	}
			}#for: inner 
		}#if: exists 'chr'

	}#else : -c/--coordinates option is defined
	#print "Leaving requestedSite()\n";
	return $result;
}#requestedSite()

#return the parse coordinate list as a reference
sub getCoordList
{
	return \%COORDLIST;
}
		
#get the coordinate list from the string given with --coordinates option
#The list is returned as a array of multiple (chrmname, coordst, coordend) lists
sub parseCoordList 
{
	my $coordline = shift ;	# the coordinate list line
	my %chrml;	#the hash array holding the parsed coordinates
	my @chlist = split(/;/, $coordline);	#chromosome and coord list
	my $i=0;
	foreach my $ch( @chlist)
	{
		my @temp = split(/:/, $ch);	#split a single chromosome entry 
		my $chname = lc( trim($temp[0]) );	#convert the chrm name to lower case
		$chname="chr" if( $chname eq "");
		if( $chname ne "chr" && $chname ne "chrx" && $chname ne "chry")
		{
			if( $chname =~ /chr(\d+)/ )
			{
				my $chnum = $1;
				
				if($chnum <1 || $chnum > 22)
				{	print "Ignoring entry $ch : Illegal chrm $chnum\n";	next;	}
			}
			else
			{	print "Ignoring entry $ch : Unrecognized chrm format\n";next;	}
		}
		#temp[1] contains the list of start, stop coordinates
		my @tempcrdlist = split(/,/,$temp[1]);
		foreach my $tempcrd (@tempcrdlist)
		{
			$tempcrd = trim($tempcrd);
			next if( $tempcrd eq "" );	#skip on empty value
			
			my @coordlist = split(/-/, $tempcrd);
			my $coordst = trim($coordlist[0]);
			my $coordend = trim($coordlist[1]);
			
			$coordst=0 if ( (! defined $coordst) || $coordst eq "");
			$coordend=$MAXLONG if ( (! defined $coordend) || $coordend eq "" );
			($coordst, $coordend) = ($coordend, $coordst) if( $coordst > $coordend);
			if(! isPosInt($coordst) )
			{	print "Ignoring entry $ch : Illegal start coordinate $coordst\n"; next;	}
			if(! isPosInt($coordend) )
			{	print "Ignoring entry $ch : Illegal end coordinate $coordend\n"; next;	}
			#$chrml[$i++] = ($chname, $coordst, $coordend);
			#$chrml{$i}[0] = $chname;
			if(! exists $chrml{$chname} )
			{	$chrml{$chname}[0]=1;	}
			else
			{	$chrml{$chname}[0]=$chrml{$chname}[0]+1;	}
			my $c1 = $chrml{$chname}[0];
			$chrml{$chname}[2*$c1 -1 ] = $coordst;
			$chrml{$chname}[2*$c1] = $coordend;
			#$i++;
			print "$chname, $coordst, $coordend\n";
			#print %chrml;
		}#foreach: inner
	}#foreach
	print "In parseCoordList : \n";
	print %chrml;
	return %chrml;
}#parseCoordList


# close file handle references if defined 
sub closef
{
	foreach( @_ )
	{	close $_ if( defined $_ );	}
}



