#!/bin/bash
# paianalysis - Bash script to coordinate predictions of PAIs.
#
#
# Written by: Siomar C. Soares, Federal University of Minas Gerais (UFMG), 
#   Laboratory of Celular and Molecular Genetics, Brazil
#
# Date Written: Jun 02, 2011


software=$(basename $0)

function usage() {
  echo -e "$software 0.1 (June 2011)"
  echo "
  Usage:
  $software [options] -p <Pathogenic bacterium to be analysed> -n <Non-pathogenic bacterium to be compared to>
	
	OPTIONS	Default	FUNCTION
	-a	1e-5	: e-value used during creation of comparison files for act, i.e., tblastx against Non-pathogenic bacterium.
	-c	0.95	: sensitivity used during identification of regions with Codon Usage deviation.
	-d	embl	: format of the input file of your non-pathogenic bacterium (embl/gbk).
	-e	1e-5	: e-value used during identification of similarity regions for plasticity analysis, i.e., blastp against Non-pathogenic bacterium.
	-f 	embl	: format of the input file of your pathogenic bacterium (embl/gbk).
	-g	1.5	: multiple of Standard Deviations used during identification of regions with G+C content deviation.
	-m	1e-5	: e-value used during identification of virulence factors, i.e., blastp against mVIRdb.		
	-t	1e-5	: e-value used during identification of transposases, i.e., hmmsearch against pfam database of transposases.
  	-h 	----	: print this help

	Any question? Contact <siomars@gmail.com>
  "
  exit -1
  }

while getopts "p:n:d:f:m:c:g:t:a:e:h" OPT; do
	case $OPT in 
		p) patho=$OPTARG || exit 1;;
		n) nonpatho=$OPTARG || exit 1;;
		d) format_nonpatho=$OPTARG || exit 1;;
		f) format_patho=$OPTARG || exit 1;;
		m) mvirdb_evalue=$OPTARG || exit 1;;
		c) colombo_sensitivity=$OPTARG || exit 1;;
		g) gcdeviation=$OPTARG || exit 1;;
		t) transposase_evalue=$OPTARG || exit 1;;
		a) act_evalue=$OPTARG || exit 1;;
		e) plasticity_evalue=$OPTARG || exit 1;;
		h) usage && exit;;
	esac
done
	if [ ! $patho ]; then
		echo -e "\n\n ERROR: No embl/gbk file provided for the Pathogenic Bacterium. \n\n" && usage && exit 1;
	fi
	if [ ! $nonpatho ]; then
		echo -e "\n\n ERROR: No embl/gbk file provided for the Non-Pathogenic related Bacterium. \n\n" && usage && exit 1;
	fi
	if [ ! $format_patho ]; then
		format_patho="embl";
	fi
	if [ $format_patho != "embl" ] && [ $format_patho != "gbk" ]; then
		echo -e "\n\n ERROR: Format option (-f) must be embl/gbk. \n\n" && usage && exit 1;
	fi
	if [ ! $format_nonpatho ]; then
		format_nonpatho="embl";
	fi
	if [ $format_nonpatho != "embl" ] && [ $format_nonpatho != "gbk" ]; then
		echo -e "\n\n ERROR: Format option (-d) must be embl/gbk. \n\n" && usage && exit 1;
	fi
	if [ ! $mvirdb_evalue ]; then
		mvirdb_evalue="0.000001";
	fi
	if [ ! $colombo_sensitivity ]; then
		colombo_sensitivity="0.95";
	fi
	if [ ! $gcdeviation ]; then
		gcdeviation="1.5";
	fi
	if [ ! $transposase_evalue ]; then
		transposase_evalue="0.000001";
	fi
	if [ ! $act_evalue ]; then
		act_evalue="0.000001";
	fi
	if [ ! $plasticity_evalue ]; then
		plasticity_evalue="0.000001";
	fi


####################################################################################################


tmp=${TMPDIR-/tmp}			###http://www.linuxsecurity.com/content/view/115462/151/###
	tmp=$tmp/$patho-vs-$nonpatho.$RANDOM.$RANDOM.$RANDOM.$$
	(umask 077 && mkdir $tmp) || {
		echo "Could not create temporary directory! Exiting." 1>&2 
		exit 1
	}
pips_dir=$(pwd)
######Create files .fsa .faa and .fna from embl######
perl $pips_dir/bin/errorhandling.pl $mvirdb_evalue $gcdeviation $transposase_evalue $act_evalue $plasticity_evalue $colombo_sensitivity $patho $format_patho $nonpatho $format_nonpatho || usage exit 1
cp $nonpatho $tmp/$nonpatho
cp $patho $tmp/$patho
	if [ $format_patho == "gbk" ]; then
		perl $pips_dir/bin/gbk2embl.pl $tmp/$patho > $tmp/$patho.embl || usage exit 1
		patho="$patho.embl"
	fi
	if [ $format_nonpatho == "gbk" ]; then
		perl $pips_dir/bin/gbk2embl.pl $tmp/$nonpatho > $tmp/$nonpatho.embl || usage exit 1
		nonpatho="$nonpatho.embl"
	fi

echo Creating files $patho							####This part of the program creates the files .fsa, .faa, .fna
perl $pips_dir/bin/embl2product.pl $tmp/$patho $tmp/$patho.list || exit 1	####from the embl of the pathogenic bacterium to be analysed.
perl $pips_dir/bin/embl2fasta.pl $tmp/$patho > $tmp/$patho.fasta || exit 1
perl $pips_dir/bin/embl2faafna.pl $tmp/$patho.fasta $tmp/$patho.list > $tmp/$patho.fna || exit 1
perl $pips_dir/bin/embl2faafna.pl -p $tmp/$patho.fasta $tmp/$patho.list > $tmp/$patho.faa || exit 1


echo Creating files $nonpatho							####This part of the program creates the files .fsa, .faa, .fna
perl $pips_dir/bin/embl2product.pl $tmp/$nonpatho $tmp/$nonpatho.list || exit 1	####from the embl of the non-pathogenic reference bacterium.
perl $pips_dir/bin/embl2fasta.pl $tmp/$nonpatho > $tmp/$nonpatho.fasta || exit 1
perl $pips_dir/bin/embl2faafna.pl $tmp/$nonpatho.fasta $tmp/$nonpatho.list > $tmp/$nonpatho.fna || exit 1
perl $pips_dir/bin/embl2faafna.pl -p $tmp/$nonpatho.fasta $tmp/$nonpatho.list > $tmp/$nonpatho.faa || exit 1


###########################Creating database for Blast############################
echo Formatting Blast database $patho
cp $tmp/$patho.faa $pips_dir/blastdb/$patho.faa			####This part of the program format the file .faa 
cd $pips_dir/blastdb					####from the pathogenic bacterium to perform blast searchs.
$pips_dir/other_softwares/blast/formatdb -p T -i $pips_dir/blastdb/$patho.faa || exit 1
cd $pips_dir

########## Execute Colombo-SIGIHMM to generate Codon Usage deviation Analysis ###########
echo Codon Usage Deviation Analysis $patho			####This part of the program performs the idenfitication of
								####regions with codon usage deviation on the pathogenic bacterium
								####to be analysed
cd $pips_dir/other_softwares/Colombo_3.8/ || exit 1
cp $tmp/$patho $tmp/$patho.embl

java okSigiHMM input=$tmp/$patho.embl gff=$tmp/$patho.gff sens=0.95 || exit 1

cd $pips_dir/

perl -pi -e "s/#.+\n//g" $tmp/$patho.gff
perl -pi -e "s/#\n//g" $tmp/$patho.gff

perl $pips_dir/bin/codus2sysid.pl $tmp/$patho.gff $tmp/$patho.list > $tmp/$patho.gff.codonusage || exit 1

########## Execute Colombo to generate Codon Usage deviation data ###########
echo Codon Usage Deviation Analysis $nonpatho			####This part of the program performs the idenfitication of
								####regions with codon usage deviation on the non-pathogenic 
								####reference bacterium.

cd $pips_dir/other_softwares/Colombo_3.8/ || exit 1
cp $tmp/$nonpatho $tmp/$nonpatho.embl

java okSigiHMM input=$tmp/$nonpatho.embl gff=$tmp/$nonpatho.gff sens=0.95 || exit 1

cd $pips_dir/

perl -pi -e "s/#.+\n//g" $tmp/$nonpatho.gff
perl -pi -e "s/#\n//g" $tmp/$nonpatho.gff

perl $pips_dir/bin/codus2sysid.pl $tmp/$nonpatho.gff $tmp/$nonpatho.list > $tmp/$nonpatho.gff.codonusage || exit 1


############Predict tRNA genes in bacteria ###################
echo Predicting Tranfer RNA $patho
$pips_dir/other_softwares/tRNAscan-SE-1.23/tRNAscan-SE -B $tmp/$patho.fasta > $tmp/$patho.fasta.trna.out || exit 1		####This part of the program performs the idenfitication of
										####transfer rna genes.
perl $pips_dir/bin/trna2embl.pl $tmp/$patho.fasta.trna.out || exit 1

########### Execute gccontent.pl script to generate GC content analysis ##########
echo GC Content analysis $patho						####This part of the program performs the idenfitication of
									####regions with GC content deviation on the pathogenic bacterium
									####to be analysed.

perl $pips_dir/bin/gccontent.pl $tmp/$patho.fasta $tmp/$patho.fna > $tmp/$patho.fna.gc || exit 1

########## Execute hmmsearch against transposase database to identify transposase genes #######
echo Transposase identification	$patho					####This part of the program performs the idenfitication of
									####transposase genes on the pathogenic bacterium to be analysed.

hmmsearch -E 0.0001 $pips_dir/blastdb/transposase.hmm $tmp/$patho.faa > $tmp/$patho.transposase.out || exit 1
perl $pips_dir/bin/transpo2table.pl $tmp/$patho.transposase.out $tmp/$patho.list > $tmp/$patho.faa--vs--transposase.out.tab || exit 1

########## Generate comparison file against non pathogenic species from the same generum or related species to use in ACT #######

echo ACT										####This part of the program performs protein similarity searchs
											####between the pathogenic bacterium and the non-pathogenic
											####bacterium to be manually analysed.

echo Split
perl $pips_dir/bin/splits.pl $tmp/$patho.fasta > $tmp/$patho.fasta.splitted || exit 1

echo formatdb
cp $tmp/$nonpatho.fasta $pips_dir/blastdb/
$pips_dir/other_softwares/blast/formatdb -p F -i $pips_dir/blastdb/$nonpatho.fasta || exit 1

echo Blast
$pips_dir/other_softwares/blast/blastall -p tblastx -i $tmp/$patho.fasta.splitted -d $pips_dir/blastdb/$nonpatho.fasta -e 0.000001 -o $tmp/$patho.fasta--vs--$nonpatho.fasta.ff.out -F F || exit 1

echo Creating Tabular File						####on this step, it creates a tabular file from the blast result
									####to be used as a comparison file on ACT.
perl $pips_dir/bin/blast2table.pl -s $tmp/$patho.fasta--vs--$nonpatho.fasta.ff.out > $tmp/$patho.fasta--vs--$nonpatho.fasta.ff.out.tab || exit 1


########## Generate comparison file against non pathogenic species from the same generum or related species for automatic plasticity analysis #######


echo Blast										####This part of the program performs protein similarity searchs
											####between the pathogenic bacterium and the non-pathogenic
											####bacterium to be automatically analysed.
$pips_dir/other_softwares/blast/blastall -p blastp -i $tmp/$nonpatho.faa -d $pips_dir/blastdb/$patho.faa -e 0.000001 -o $tmp/$nonpatho--vs--$patho.ff.out -F F || exit 1
perl -pi -e "s/\*/A/g" $tmp/$nonpatho--vs--$patho.ff.out || exit 1

echo Creating Tabular File
perl $pips_dir/bin/blast2table.pl -p $tmp/$nonpatho--vs--$patho.ff.out > $tmp/$nonpatho--vs--$patho.ff.out.tab || exit 1


###################### Execute Blast e generate Hyperlinked XLS ###########################
	                                              	####This part of the program creates the Hyperlinked .xls file
							####to be used on manual analises.

mkdir $tmp/$patho.faa-vs-virulenceDB/ || exit 1

echo Blast2xls
cp $tmp/$patho.faa $tmp/$patho.faa-vs-virulenceDB/$patho.faa

cd $tmp/$patho.faa-vs-virulenceDB/

perl $pips_dir/bin/all2many.pl $tmp/$patho.faa-vs-virulenceDB/$patho.faa 1 || exit 1

$pips_dir/other_softwares/blast/blastall -p blastp -i $tmp/$patho.faa-vs-virulenceDB/$patho.faa -d $pips_dir/blastdb/virulenceDB.protein.fasta -b 40 -v 40 -e $mvirdb_evalue -o $tmp/$patho.faa-vs-virulenceDB/$patho.faa.vs.virulenceDB.out || exit 1

perl $pips_dir/bin/blast2many.pl $tmp/$patho.faa-vs-virulenceDB/$patho.faa.vs.virulenceDB.out || exit 1

perl $pips_dir/bin/blast2table.pl -f $tmp/$patho.faa-vs-virulenceDB/$patho.faa.vs.virulenceDB.out > $tmp/$patho.faa.vs.virulenceDB.out.tab || exit 1


ls > $tmp/$patho.faa-vs-virulenceDB/$patho.protein.list

for i in $(cat $tmp/$patho.faa-vs-virulenceDB/$patho.protein.list);
do mv $tmp/$patho.faa-vs-virulenceDB/$i $tmp/$patho.faa-vs-virulenceDB/$i.txt;
done
mv $tmp/$patho.faa-vs-virulenceDB/$patho.protein.list.txt $tmp/$patho.protein.list
mv $tmp/$patho.faa-vs-virulenceDB/blastp.txt $tmp/$patho.faa-vs-virulenceDB/blastp
cd $tmp/$patho.faa-vs-virulenceDB/blastp/
ls > $tmp/$patho.faa-vs-virulenceDB/blastp/blast.list
for i in $(cat $tmp/$patho.faa-vs-virulenceDB/blastp/blast.list);
do mv $tmp/$patho.faa-vs-virulenceDB/blastp/$i $tmp/$patho.faa-vs-virulenceDB/blastp/$i.txt;
done
rm -rf $tmp/$patho.faa-vs-virulenceDB/$patho.faa.txt $tmp/$patho.faa-vs-virulenceDB/$patho.faa.vs.virulenceDB.out.tab.txt $tmp/$patho.faa-vs-virulenceDB/$patho.faa.vs.virulenceDB.out.txt $tmp/$patho.faa-vs-virulenceDB/blastp/blast.list.txt
cd $tmp/

perl $pips_dir/bin/mergefiles.pl $tmp/$patho.faa.vs.virulenceDB.out.tab $tmp/$patho.fna.gc $tmp/$patho.gff.codonusage $tmp/$patho.faa--vs--transposase.out.tab > $tmp/$patho.faa.vs.virulenceDB.txt || exit 1
perl $pips_dir/bin/tab2xls.pl $tmp/$patho.faa.vs.virulenceDB.txt $tmp/$patho.protein.list $patho.faa-vs-virulenceDB > $tmp/$patho.faa.vs.virulenceDB.xls || exit 1


################### Verify Plasticity between pathogenic e non pathogenic bacterium ###############################
echo Verifying Plasticity between $patho and $nonpatho		####This part of the program uses all the informations to
								####perform automatic analyses.

perl $pips_dir/bin/plasticity.pl $nonpatho.faa $patho.faa $nonpatho--vs--$patho.ff.out.tab $patho.gff.codonusage $nonpatho.gff.codonusage || exit 1;
perl $pips_dir/bin/plasticity2.pl $patho || exit 1;
perl $pips_dir/bin/paifinder.pl $patho || exit 1;
perl $pips_dir/bin/paifinder2.pl $patho || exit 1;


###################################################################################################################
if [ ! -d $pips_dir/result ]
then
    mkdir $pips_dir/result
fi
if [ -d $pips_dir/result/$patho.faa-vs-virulenceDB ]
then
    rm -rf $pips_dir/result/$patho.faa-vs-virulenceDB
fi

cd $pips_dir/result
mv $tmp/$patho.fasta--vs--$nonpatho.fasta.ff.out.tab .
mv $tmp/$patho.faa-vs-virulenceDB .
mv $tmp/$patho.faa.vs.virulenceDB.xls .
mv $tmp/$patho.Putative_Islands .
mv $tmp/$patho.PAI.tab .
rm -rf $tmp
exit

