#!/bin/bash

# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#


#program : runf2r
#version : 2.4
#Description:
# This program starts the f2r on a particular chromosome range to create
# libraries of downloaded fasta files
#2.2 : Stores results in only the current directory
		 #Does not parse the orglist and assemblylist	

#First check whether the pertinent Delila Genome environment variables are set

if [ "$DELGEN_GENOMES" = "" ]; then
	echo "ENV variable DELGEN_GENOMES not set"
	exit
fi

PRNAME=$0		#program name
PRNAME=`echo $PRNAME | tr "/" "\n" | tail -1`
CONP=1	#number of concurrent processes 
LIB=$DELGEN					#Delila Genome directory
CHRPFX=chrm_		#chromosome directory prefix
PARSRC=`pwd` #the actual destination directory for the output

function showusage
{
	echo
	if [ "$1" != "" ]; then
		echo "***************************************"
		echo $1
		echo "***************************************"
	fi
	echo "usage: $PRNAME [-S min_seq_len] [-h] [chrmstart [chrmend] ] "
 	echo "-S min_seq_len\n";
	echo "  Substitute all undefined nucleotides (stretches of 'N's) of\n";
	echo "  length >= min_seq_len(>=1) with random nucleotides(a,g,c,t)\n";
	echo "-h help menu"
	echo
	echo "The range of chromosomes to be used is specified by"	
	echo "chrmstart: default=The smallest chromosome in chrmlist"
	echo "chrmend: default=The largest chromosome  in chrmlist"
	echo
	echo "Sample 'chrmlist' file contents are given below"
	echo "In this file, default chrmstart=21, chrmend=Y"
	echo "chrmend can also be specified as 24"
	echo 
	echo "*Chrm file starts here"
	echo "*Comment lines start with a *"
	echo "*ChrDirectoryNumber ChrName"
	echo "	21 21"
	echo "	22 22"
	echo "	23 X"
	echo "	24 Y"
	echo
	exit 0
}

function isdigit
{
	[ $# -eq 1 ] || return -1

	case $1 in
		*[!0-9]*|"") return -1;;
    	*) return 0;;
	esac
}
 
if [ "$DELGEN_CONP" != "" ]; then
   CONP=$DELGEN_CONP
	if ! isdigit $CONP; then
		echo "Environment variable DELGEN_CONP not an integer"
		exit 1
	fi
	if [ $CONP -lt 1 ]; then
		echo "Environment variable DELGEN_CONP < 1, setting it to 1"
		CONP=1
	fi
fi

#Minimum sequence length for substitution
SUBST=0

while getopts ":S:h" Option
do
	case $Option in 
	S )	SUBST=$OPTARG
			if ! isdigit $SUBST; then
				showusage "-S argument not a positive number"
			fi;;
	h )	showusage;;
	* )	showusage;;
	esac
done

shift $(($OPTIND - 1))
#exit 0

if [ ! -f "chrmlist" ]; then
		showusage "File 'chrmlist' not found"
fi

#initialize the chromosome range with the values taken from chrmlist
#in the grep, remove empty lines and comment lines(beginning with a *)
CHRSTCNT=`sort +0n chrmlist | grep -v '^ *$\|^ *\*' | head -1 | awk '{print $1}'`
CHRENDCNT=`sort +0n chrmlist | grep -v '^ *$\|^ *\*' | tail -1 | awk '{print $1}'`
#echo "chrmst=$CHRSTCNT"
#echo "chrmend=$CHRENDCNT"

function findCHRM
{
	CMD=`echo "grep '\<$1\>' chrmlist | grep -v '^ *\*' | tail -1"`
	#echo $CMD
	#eval $CMD
	CHRMNUM=`eval $CMD | awk '{print $1}'`
	CHRMNAME=`eval $CMD | awk '{print $2}'`
	#return the value of isdigit function
	isdigit $CHRMNUM
}

if [ $# -ge 1 ]; then
	if  !  findCHRM $1
	then
		echo "chrmstart $1 not listed in  chrmlist"
		exit
	fi
	CHRSTCNT=$CHRMNUM
	echo "chrmstart=$CHRSTCNT"
fi

if [ $# -ge 2 ]; then
	if ! findCHRM $2
	then
		echo "chrmend $2 not listed in  chrmlist"
		exit
	fi
	CHRENDCNT=$CHRMNUM
	echo "chrmend=$CHRENDCNT"
fi
###
if [ $CHRSTCNT -gt $CHRENDCNT ]; then
	showusage "chrmstart > chrmend";	
fi

#store the list of  chromosomes to_be_processed in an array
count=$CHRSTCNT
cnt=0
while [ $count -le $CHRENDCNT ];
do
	if  findCHRM ${count}; then		
		CHRML[$cnt]=$count
		cnt=`expr $cnt + 1`			
	else
		echo "Skipping chromosome $count : not listed in $GLIB/chrmlist "
	fi
	
	count=`expr $count + 1`
done

if [ ${#CHRML[@]} -le 0 ]; then
	echo "No valid chromosomes to process in the range $CHRSTCNT to	$CHRENDCNT"
	exit
fi

function dof2r
{	
	curcnt=$1
	curdir=$2
	cd $curdir
	echo "Chromosome $curcnt : f2r log "
	if [ ! -f fsequ ]; then
		echo "Cannot find file fsequ"
	else
		if [ $SUBST -gt 0 ]; then
			echo "Performing randomized substitutions"
			f2r.pl -S $SUBST fsequ
		else
			f2r.pl fsequ
		fi
	fi
}

echo "Starting $PRNAME on chromosomes $CHRSTCNT TO $CHRENDCNT"

terminal=`tty`
exec>$PARSRC/runf2r.log
#echo "$PRNAME"

date 
loopcnt=0
# for count in ${CHRML[@]}	
while [ $loopcnt -lt ${#CHRML[@]} ];
do
	count=${CHRML[$loopcnt]}
	#calculate the number of parallel f2r processes running
	#pnum=`ps -e | grep -c $PRNAME`
	pnum=`ps -C $PRNAME  | wc -l`
	pnum=$(($pnum-2))
	if [ $pnum -le $CONP ];	then
		echo $count
		#run background function
		cur=${PARSRC}/${CHRPFX}$count
		if [ ! -d $cur ]; then
			echo "Cannot find directory ${CHRPFX}$count : skipping"		
		else
			echo "cur=$cur count=$count "
			#executing dof2r
			( dof2r $count $cur 1>$cur/f2r.log 2>&1 )&						
		fi
		loopcnt=$(($loopcnt+1))
		sleep 1
	else 
		#echo " "
		#check for process completion after 1 min
		sleep 60
	fi
done

#waiting for the child processes created by this shell
wait
date #this is stored in runf2r.log
exec>$terminal
echo
echo
echo "Printing the execution logs"
cat $PARSRC/runf2r.log 
echo "****************************"
for count in ${CHRML[@]}
do
	findCHRM $count
	echo "Printing chromosome $CHRMNAME log"
	cat ${CHRPFX}${count}/f2r.log 
	echo "****************************"
done
exit 0

