#!/usr/bin/perl 

# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#


#Program: f2r
#version 1.6
#Author: Sashidhar Gadiraju
#1.6 : Adding option to run n2rand from inside this to create substituted libraries
#1.5 : Changing some functions to utlize the underlying linux commands to run faster
			#and to make more general
#1.4.2 : removing some temporary files
#1.4.1 : moving all the sub scan results to a 'sub' directory
#1.4 : run catal and delila only when needed
#1.3 : fixed the lower coordinate in 'inst' file to 1 rather than 0

#this very simple program converts a fasta formatted file to a raw sequence ending in a line with a single '.'

use strict;
my %OPT;	#hold options
($0) = $0 =~ /([\w\.]+)$/;	#get the prog. name
#Just one option. So write own getopts
while( @ARGV && $ARGV[0] =~ s/^-// )
{
	local $_=shift;
	if(/([S])/)
	{	$OPT{'S'}=shift @ARGV;	}
	elsif(/([h])/)
	{	$OPT{'h'}=1;	}
}

if($OPT{'h'} || @ARGV<1 || @ARGV >2)
{	usage() && exit;	}

runfasta2raw();
runN2rand( $OPT{'S'} ) if( $OPT{'S'} );	#run substitutions if -S flag given
runmakebk();
runcatal();
#end of main()

#convert the input fasta format file to a raw sequence as needed by makebk()
sub runfasta2raw()
{
	print "runf2r: ".getdate()." converting fasta to raw sequence\n";
	my $filename = $ARGV[0];
	if(! -f "$filename")
	{	die "Cannot open file $filename\n";	}
	`rm rawseq`;	#remove any preexisting file or link
	my $firstline =`head -1 $filename`;
	if( ! ($firstline =~ /^\s*>/) )	
	{
		warn "$filename : not a fasta format file\n";
		warn "Proceeding to makebk\n";
		`ln -s $filename rawseq`;
	}	
	`sed '1d' $ARGV[0] > rawseq`;	#
	print "runf2r: ".getdate()." conversion completed\n";
}#runf2r():
##End of step 1

#substitute the N's in the raw sequence to randomized nucleotide sequences
#This can be considered step 1.5
sub runN2rand()
{
	my ($len) = @_;
	my $ch = 'N';
	my $file = "rawseq";
	if( ! -f $file )
	{	die "Cannot open intermediate file $file\n";	}
	my $res = system( "n2rand.pl $file $ch $len");
	if($res)	#if $res not 0
	{	die "Error in execution of n2rand.pl : $!\n";	}
	unlink "rawseq";
	`mv subseq rawseq`;
}

#start of step2 -- run makebk
sub runmakebk()
{
	print "runmkbk: ".getdate()." executing makebk\n";
	if(! -f "rawseq")
	{	die "no input raw file";	}
	my $result = system("mv rawseq sequ");
	open (F, ">f2r.tmp") || warn "unable to open a temp file";
	print F ( "\na" );
	close F;
	`rm -f book`;	#remove any previous links
	$result = system("makebk  <f2r.tmp >/dev/null");
	if($result)
	{	warn "execution error in makebk";	} 
	else
	{
		open(CH, "<changes") || die "unable to open changes";
		open(BKCH, ">bkchanges") || die "unable to open bkchanges";
		while (my $chline =<CH>)
		{
			$chline =~ s/^\s+//; #remove leading ws
	
			if($chline =~ /^\@/ )
			{
				$chline =~ /\@\s+(\S+)\s+(\S+)\s+.*change:.*was\s+(\S)\s+.*now\s+(\S)/;		
				#do the processing for producing the delila inst file
				if( lc(trim($3)) ne "n")
				{	print BKCH ( $chline );	}
			}#if: /^@/
		}#while: <CH>
		close BKCH;
		close CH;
		unlink("changes");
		unlink("sequ");
	}#else
	unlink "f2r.tmp";
	print "runmkbk: ".getdate()." completed makebk\n";
}#runmakebk()
#end of step2

#start step3 -- run catal
sub runcatal()
{
	print "runcatal: ".getdate()." executing catal\n";
	#crfile("l1");
	crfile("l2");
	crfile("l3");
	crfile("catalp");
	
	if(! -f "book")
	{	die "unable to open file book";	}
	`rm l1 lib1`;
	`ln -s book l1`;
	my $result = system("catal");
	if( $result != 0)
	{	warn "error in executing catal";	}#if
	else
	{
		#remove the l1 output of makebk
		print "deleting catal inputfile l1\n";
		unlink("l1");
		unlink("book");
		system ( "ln -s lib1 book" );
	}
	print "runcatal: ".getdate()." completed catal\n";
}#runcatal():
#end of step3 -- run catal

#create a file similar to 'touch' and backup an file if already present		
sub crfile()
{
	my $file = $_[0];
	print "creating file: $file\n";
	if(-f $file)
	{
		my $arg = "mv $file $file"."_old";	
		system("$arg");
	}
	
	open(CRF, ">$file") || die "unable to create file $file";
	close(CRF);
}#crfile()

#the get data and time function
sub getdate()
{
	my $retval = localtime;	#scalar localtime gives the correct date format
	return $retval;
}#getdate()
	
#trims the start and end from whitespaces
sub trim()
{
	my $ret = $_[0];
	$ret =~ s/^\s+//;	#remove leading ws
	$ret =~ s/\s+$//;	#remove trailing ws
	return $ret;
}#trim()

sub absval()
{
	my $ret = $_[0];
	$ret = ($ret<0)?(-$ret):$ret;
	return $ret;	#return the absolute value
}#absval()

sub usage
{
	print "usage: $0 [-S] fastafile\n";
	print "\n";
	print "-S min_seq_len\n";
	print "	If this option is given, then all undefined nucleotides (stretches of 'N's) of\n";
	print "	length >= min_seq_len will be substituted with random nucleotides(a,g,c,t)\n";
}


