#!/usr/bin/perl
#
# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
#
#Program: sortmrna
#version: 1.0
#Description: 
#This script converts an input accession database from UCSC into a file 
#with the same format but sorted on the gene starting coordinate.
#The accession database is sorted simply by the start position regardless
#of the gene orientation. The output file from this program takes the orientation
#into consideration and sorts on the basis of the actual start coordinate.
#For ex: The start position of a gene with negative orientation is the end 
#coordinate as given in the accession database.

$mstpos = 16 ;#mrna start position
$mendpos = 17 ;#mrna end position
$mdirpos = 9;#mrna dir position
$mrnaop = "mrnaop.tmp";#the output file name
$mrnafieldcnt = 23;	#the number of fields in a mrna line

if( @ARGV != 1)	{	die "usage: sortmrna file\noutput file is smrna.txt";	}
open( MRNA, "<$ARGV[0]") || die "cannot open $ARGV[0]";#open input file
open( MRNAOP, ">$mrnaop" ) || die "cannot open $mrnaop";#open output file
while ( <MRNA> )
{
	my $mrnaline = &trim($_);
	next if( $mrnaline eq "" );
	my @mrna=split(/\s+/, $mrnaline );
	my $rstart= ($mrna[$mdirpos] eq "+")?($mrna[$mstpos]):
					(($mrna[$mdirpos] eq "-")?($mrna[$mendpos]): -1);
	if($rstart <0)	#orientation not  +/-, might be a file from pre aug 2000
	{
		$mendpos--;	$mstpos--;	$mdirpos--;
		$rstart= ($mrna[$mdirpos] eq "+")?($mrna[$mstpos]):
					(($mrna[$mdirpos] eq "-")?($mrna[$mendpos]): -1);
	}
	#print "input line is @mrna\nmrnadir = $mrna[$mdirpos]; \n";
	die "mrna orientation not +/- in line $.\n" unless( $rstart >= 0);
	print MRNAOP "$mrnaline"."\t$rstart\n";	#print to output
	$mrnafieldcnt = @mrna;
}
close MRNA;
close MRNAOP;
#now start sorting on the actual gene start coordinate which is in the last field of the file
`sort +${mrnafieldcnt}n ${mrnaop} -o smrna.txt`; #produce the sorted mrna
if( $?)	{	warn "Error in sorting the file: $?\n";	}
unlink "$mrnaop";
#END OF MAIN SUBROUTINE

#trim the whitespaces at the start and beginning of a line
sub trim()
{
	my $ret = shift;	
	$ret =~ s/^\s*//;	#remove leading whitespaces
	$ret =~ s/\s*$//;	#remove trailing whitespaces
	return $ret;
}
