#!/usr/bin/perl
# blast2table - perl script that parses the results from the blast searches creating tabular files.
#
#
# Written by: Siomar C. Soares, Federal University of Minas Gerais (UFMG), 
#   Laboratory of Celular and Molecular Genetics, Brazil
#
# Date Written: Sep 27, 2010
#
#
#Usage = ./blast2table.pl -a <blast_outfile> > <outfile> (Tabular file with all the alignments.)
#Usage = ./blast2table.pl -f <blast_outfile> > <outfile> (Tabular file with first alignment of each search only.)
#Usage = ./blast2table.pl -t <blast_outfile> > <outfile> (Tabular file with first alignment of each search only. Only the name of the protein with hit.)
#Usage = ./blast2table.pl -s <blast_outfile> > <outfile> (Tabular file with all alignments from a tblastx search to use on ACT.)
#Usage = ./blast2table.pl -p <blast_outfile> > <outfile> (Tabular file with all the alignments.)

my $vf = $ARGV[1];
my $locus_tag;
my %hit;
my %sc;
my %le;
my %di;
my %pe;
my %ga;
my $line;

use Getopt::Std;
use vars qw($opt_a $opt_f $opt_t $opt_s $opt_p);
getopts('a:f:t:s:p');
if (defined($opt_a)){ 
if (open (IN, "$vf")) {
	while ($line = <IN>) 	{
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
		if ($line =~ m/$q/)	{
		$info = $line;
		$info =~ s/$q//g;
		$locus_tag = $info;
		$locus_tag =~ s/[ ].+//g;
		chomp $locus_tag;
		}
		elsif($line =~ m/No hits found/){$cc=0;
		$hit->{$cc}= " \*\*\*\*\* No hits found \*\*\*\*\*\*"; $ccc=$cc; print "$locus_tag\t$hit->{$cc}\n"
		}
		elsif ($line =~ m/$spsa/)	{
			$cc=0;
			$line = <IN>; $line= <IN>;
			while ($line =~ m/.+/){chomp $line; $line=~ s/\.\.\..+//g; $hit->{$cc}="$line"; $ccc=$cc; $line = <IN>;$cc++;}
			$cc=0;			}
		elsif ($line =~ m/^\>/){
			$line = <IN>;
			while (($line !~ m/^\>/) && ($line !~ m/^Reference/)) 	
				{
				$s = " Score \=[ ]+";
				if ($line =~ m/$s/)	{
					$score = $line;
					$score =~ s/$s//g;
					$score =~ s/[ ].+//g;
					$expect = $line;
					$expect =~ s/.+Expect \=[ ]+//g;
					$expect =~ s/\,.+//g;
					$line = <IN>;
					$id = $line;
					$id =~ s/ Identities \=[ ]+//g;
					$id =~ s/ \(.+//g;
					$idb = $id; $ide = $id;
					$idb =~ s/\/.+//g;
					$ide =~ s/.+\///g;
					$length_id = $ide;
					$dif_id = $ide-$idb;
					$per_id = $line;
					$per_id =~ s/ Identities \= [0-9]+\/[0-9]+ \(//g;
					$per_id =~ s/\%.+//g;
					chomp $score;
					chomp $expect;
					chomp $length_id;
					chomp $dif_id;
					chomp $per_id;
					$sc->{$cc}=$score;
					$le->{$cc}=$length_id;
					$di->{$cc}=$dif_id;
					$pe->{$cc}=$per_id;
					$ex->{$cc}=$expect;
					$line = <IN>;
					$line = <IN>;
					if ($line =~ m/Query\:/){
						$gap=0;
						$ccc=0;
						$c=1;
						$qbeg = $line;
						$qtmp = $line;
						while ($qtmp =~ /\-+/g) {$gap++;}
						$qend[$c] = $line;
						$line = <IN>; $line = <IN>;
						$sbeg = $line;
						$stmp = $line;
						while ($stmp =~ /\-+/g) {$gap++;}
						$send[$c] = $line;
						$line = <IN>;
						$nlines = $length_id/60;
						while ($c<$nlines)	{
							$c++;
							$ccc++;
							$line =<IN>;
							$qend[$c] = $line;
							$qtmp = $line;
							while ($qtmp =~ /\-+/g) {$gap++;}
							$line = <IN>; $line = <IN>;
							$send[$c] = $line;
							$stmp = $line;
							while ($stmp =~ /\-+/g) {$gap++;}
							if (($qend[$ccc] =~ /\-[ ][0-9]+/)&&($qend[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							if (($send[$ccc] =~ /\-[ ][0-9]+/)&&($send[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							$line=<IN>;
									}						
								}
					$qbeg =~s/Query\: //g;
					$qbeg =~s/[ ].+//g;
					$sbeg =~s/Sbjct\: //g;
					$sbeg =~s/[ ].+//g;
					$qend[$c] =~s/.+[ ]//g;
					$send[$c] =~s/.+[ ]//g;
					chomp $qbeg;
					chomp $sbeg;
					chomp $qend[$c];
					chomp $send[$c];
					$ga->{$cc}=$gap;
					$qs->{$cc}=$qbeg;
					$qe->{$cc}=$qend[$c];
					$ss->{$cc}=$sbeg;
					$se->{$cc}=$send[$c];
					print "$locus_tag\t$hit->{$cc}\t$pe->{$cc}\t$le->{$cc}\t$di->{$cc}\t$ga->{$cc}\t$qs->{$cc}\t$qe->{$cc}\t$ss->{$cc}\t$se->{$cc}\t$ex->{$cc}\t$sc->{$cc}\n"; $cc++;
							}
				$line = <IN>;		
				}
			$cc++;
					}
				}
}
}

if ((defined($opt_f))|(defined($opt_t))){ 
if (open (IN, "$vf")) {
	while ($line = <IN>) 	{
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
		if ($line =~ m/$q/)	{
		$cc=0;
		$info = $line;
		$info =~ s/$q//g;
		$locus_tag = $info;
		$locus_tag =~ s/[ ].+//g;
		chomp $locus_tag;
		}
		elsif($line =~ m/No hits found/){$cc=0;
		$hit->{$locus_tag.$cc}= " \*\*\*\*\* No hits found \*\*\*\*\*\*";
		}
		elsif ($line =~ m/$spsa/)	{
			if (defined($opt_p)){
			$cc=0;
			$line = <IN>; $line= <IN>;
			while ($line =~ m/.+/){chomp $line; $line=~ s/[ ].+//g; $hit->{$locus_tag.$cc}="$line"; $line = <IN>;$cc++;}
			$cc=0;
					    }
			else{
			$cc=0;
			$line = <IN>; $line= <IN>;
			while ($line =~ m/.+/){chomp $line; $line=~ s/\.\.\..+//g; $hit->{$locus_tag.$cc}="$line"; $line = <IN>;$cc++;}
			$cc=0;
			     }
						}
		elsif ($line =~ m/^\>/){
			$line = <IN>;
			while (($line !~ m/^\>/) && ($line !~ m/^Reference/)) 	
				{
				$s = " Score \=[ ]+";
				if ($line =~ m/$s/)	{
					$score = $line;
					$score =~ s/$s//g;
					$score =~ s/[ ].+//g;
					$expect = $line;
					$expect =~ s/.+Expect \=[ ]+//g;
					$expect =~ s/\,.+//g;
					$line = <IN>;
					$id = $line;
					$id =~ s/ Identities \=[ ]+//g;
					$id =~ s/ \(.+//g;
					$idb = $id; $ide = $id;
					$idb =~ s/\/.+//g;
					$ide =~ s/.+\///g;
					$length_id = $ide;
					$dif_id = $ide-$idb;
					$per_id = $line;
					$per_id =~ s/ Identities \= [0-9]+\/[0-9]+ \(//g;
					$per_id =~ s/\%.+//g;
					chomp $score;
					chomp $expect;
					chomp $length_id;
					chomp $dif_id;
					chomp $per_id;
					$sc->{$locus_tag.$cc}=$score;
					$le->{$locus_tag.$cc}=$length_id;
					$di->{$locus_tag.$cc}=$dif_id;
					$pe->{$locus_tag.$cc}=$per_id;
					$ex->{$locus_tag.$cc}=$expect;
					$line = <IN>;
					$line = <IN>;
					if ($line =~ m/Query\:/){
						$gap=0;
						$ccc=0;
						$c=1;
						$qbeg = $line;
						$qend[$c] = $line;
						$qtmp = $line;
						while ($qtmp =~ /\-+/g) {$gap++;}
						$line = <IN>; $line = <IN>;
						$sbeg = $line;
						$send[$c] = $line;
						$stmp = $line;
						while ($stmp =~ /\-+/g) {$gap++;}
						$line = <IN>;
						$nlines = $length_id/60;
						while ($c<$nlines)	{
							$c++;
							$ccc++;
							$line =<IN>;
							$qend[$c] = $line;
							$qtmp = $line;
							while ($qtmp =~ /\-+/g) {$gap++;}
							$line = <IN>; $line = <IN>;
							$send[$c] = $line;
							$stmp = $line;
							while ($stmp =~ /\-+/g) {$gap++;}
							if (($qend[$ccc] =~ /\-[ ][0-9]+/)&&($qend[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							if (($send[$ccc] =~ /\-[ ][0-9]+/)&&($send[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							$line=<IN>;
									}						
								}
					$qbeg =~s/Query\: //g;
					$qbeg =~s/[ ].+//g;
					$sbeg =~s/Sbjct\: //g;
					$sbeg =~s/[ ].+//g;
					$qend[$c] =~s/.+[ ]//g;
					$send[$c] =~s/.+[ ]//g;
					chomp $qbeg;
					chomp $sbeg;
					chomp $qend[$c];
					chomp $send[$c];
					$ga->{$locus_tag.$cc}=$gap;
					$qs->{$locus_tag.$cc}=$qbeg;
					$qe->{$locus_tag.$cc}=$qend[$c];
					$ss->{$locus_tag.$cc}=$sbeg;
					$se->{$locus_tag.$cc}=$send[$c];
							}
				$line = <IN>;		
				}
			$cc++; 
					}
				}
}
if (open (IN, "$vf")) {
	while ($line = <IN>) 	{
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
		if ($line =~ m/$q/)	{
		$info = $line;
		$info =~ s/$q//g;
		$locus_tag = $info;
		$locus_tag =~ s/[ ].+//g;
		chomp $locus_tag;
		$cc=0;
		$nhf= "No hits found";
		if (defined($opt_t)){print "$locus_tag\t$hit->{$locus_tag.$cc}\n";}
		else	{
			if ($hit->{$locus_tag.$cc} =~ m/$nhf/ ){
				if ((defined($opt_f))|(defined($opt_t))){
					print "$locus_tag\t$hit->{$locus_tag.$cc}\n";}
											}
			else{
			print "$locus_tag\t$hit->{$locus_tag.$cc}\t$pe->{$locus_tag.$cc}\t$le->{$locus_tag.$cc}\t$di->{$locus_tag.$cc}\t$ga->{$locus_tag.$cc}\t$qs->{$locus_tag.$cc}\t$qe->{$locus_tag.$cc}\t$ss->{$locus_tag.$cc}\t$se->{$locus_tag.$cc}\t$ex->{$locus_tag.$cc}\t$sc->{$locus_tag.$cc}\n";
			    }
			}
					}
				}
			}
}

if (defined($opt_s)){ 
	open (IN, "$vf");
	@in = <IN>;
	for $i (@in){$in2 .= $i;}
	@file = split (/TBLASTX/, $in2);
	for $i (@file){
		@file2 = split (/\n/, $i);
		for $i2 (@file2){
		$line = shift (@file2);
		$s = "Score \=[ ]+";
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
			if ($line =~ m/$q/)	{
			$info = $line;
			$info =~ s/$q//g;
			$info =~ s/.+\{//g;$info =~ s/\.\..+\}//g;
						}
			elsif($line =~ m/No hits found/){$cc=0;
			$hit->{$locus_tag.$cc}= " \*\*\*\*\* No hits found \*\*\*\*\*\*";
							}
			elsif ($line =~ m/$s/)	{
				$score = $line;
				$score =~ s/ Score \=[ ]+//g;
				$score =~ s/[ ].+//g;
				$expect = $line;
				$expect =~ s/.+Expect.+\=[ ]+//g;
				$line = shift (@file2);
				$id = $line;
				$id =~ s/ Identities \=[ ]+//g;
				$id =~ s/ \(.+//g;
				$idb = $id; $ide = $id;
				$idb =~ s/\/.+//g;
				$ide =~ s/.+\///g;
				$length_id = $ide;
				$dif_id = $ide-$idb;
				$per_id = $line;
				$per_id =~ s/ Identities \= [0-9]+\/[0-9]+ \(//g;
				$per_id =~ s/\%.+//g;
				chomp $score;
				chomp $expect;
				chomp $length_id;
				chomp $dif_id;
				chomp $per_id;
				$sc=$score;
				$le=$length_id;
				$di=$dif_id;
				$pe=$per_id;
				$ex=$expect;}
			elsif ($line =~ m/Query\:/){
				$gap=0;
				$c=1;
				$qbeg = $line;
				$qend = $line;
				$line = shift (@file2); $line = shift (@file2);
				$sbeg = $line;
				$send = $line;
				$line = shift (@file2);
				$nlines = $length_id/60;
				while ($c<$nlines){
				$c++;
				$line = shift (@file2);$line = shift (@file2);
				$qend = $line;
				$line = shift (@file2);$line = shift (@file2);
				$send = $line;
				$line = shift (@file2);
						}			
				$qbeg =~s/Query\: //g;
				$qbeg =~s/[ ].+//g;
				$sbeg =~s/Sbjct\: //g;
				$sbeg =~s/[ ].+//g;
				$qend =~s/.+[ ]//g;
				$send =~s/.+[ ]//g;
				chomp $qbeg;
				chomp $sbeg;
				chomp $qend;
				chomp $send;
				$ga=$gap;
				$qs=($qbeg+$info)-1;
				$qe=($qend+$info)-1;
				$ss=$sbeg;
				$se=$send;
				print "all_bases\tall_bases\t$pe\t$le\t$di\t$ga\t$qs\t$qe\t$ss\t$se\t$ex\t$sc\n";
						}
			$line = <IN>;		
				}
					}
				}
if (defined($opt_p)){ 
if (open (IN, "$vf")) {
	while ($line = <IN>) 	{
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
		if ($line =~ m/$q/)	{
		$cc=0;
		$info = $line;
		$info =~ s/$q//g;
		$locus_tag = $info;
		$locus_tag =~ s/[ ].+//g;
		chomp $locus_tag;
		}
		elsif($line =~ m/No hits found/){$cc=0;
		$hit->{$locus_tag.$cc}= " \*\*\*\*\* No hits found \*\*\*\*\*\*";
		}
		elsif ($line =~ m/$spsa/)	{
			if (defined($opt_p)){
			$cc=0;
			$line = <IN>; $line= <IN>;
			while ($line =~ m/.+/){chomp $line; $line=~ s/[ ].+//g; $hit->{$locus_tag.$cc}="$line"; $line = <IN>;$cc++;}
			$cc=0;
					    }
			else{
			$cc=0;
			$line = <IN>; $line= <IN>;
			while ($line =~ m/.+/){chomp $line; $line=~ s/\.\.\..+//g; $hit->{$locus_tag.$cc}="$line"; $line = <IN>;$cc++;}
			$cc=0;
			     }
						}
		elsif ($line =~ m/^\>/){
			$line = <IN>;
			while (($line !~ m/^\>/) && ($line !~ m/^Reference/) && (not eof)) 	
				{
				$s = " Score \=[ ]+";
				if ($line =~ m/$s/)	{
					$score = $line;
					$score =~ s/$s//g;
					$score =~ s/[ ].+//g;
					$expect = $line;
					$expect =~ s/.+Expect \=[ ]+//g;
					$expect =~ s/\,.+//g;
					$line = <IN>;
					$id = $line;
					$id =~ s/ Identities \=[ ]+//g;
					$id =~ s/ \(.+//g;
					$idb = $id; $ide = $id;
					$idb =~ s/\/.+//g;
					$ide =~ s/.+\///g;
					$length_id = $ide;
					$dif_id = $ide-$idb;
					$per_id = $line;
					$per_id =~ s/ Identities \= [0-9]+\/[0-9]+ \(//g;
					$per_id =~ s/\%.+//g;
					chomp $score;
					chomp $expect;
					chomp $length_id;
					chomp $dif_id;
					chomp $per_id;
					$sc->{$locus_tag.$cc}=$score;
					$le->{$locus_tag.$cc}=$length_id;
					$di->{$locus_tag.$cc}=$dif_id;
					$pe->{$locus_tag.$cc}=$per_id;
					$ex->{$locus_tag.$cc}=$expect;
					$line = <IN>;
					$line = <IN>;
					if ($line =~ m/Query\:/){
						$gap=0;
						$ccc=0;
						$c=1;
						$qbeg = $line;
						$qend[$c] = $line;
						$qtmp = $line;
						while ($qtmp =~ /\-+/g) {$gap++;}
						$line = <IN>; $line = <IN>;
						$sbeg = $line;
						$send[$c] = $line;
						$stmp = $line;
						while ($stmp =~ /\-+/g) {$gap++;}
						$line = <IN>;
						$nlines = $length_id/60;
						while ($c<$nlines)	{
							$c++;
							$ccc++;
							$line =<IN>;
							$qend[$c] = $line;
							$qtmp = $line;
							while ($qtmp =~ /\-+/g) {$gap++;}
							$line = <IN>; $line = <IN>;
							$send[$c] = $line;
							$stmp = $line;
							while ($stmp =~ /\-+/g) {$gap++;}
							if (($qend[$ccc] =~ /\-[ ][0-9]+/)&&($qend[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							if (($send[$ccc] =~ /\-[ ][0-9]+/)&&($send[$c] =~ /Query\:[ ][0-9]+[ ]+\-/)){$gap--;}
							$line=<IN>;
									}						
								}
					$qbeg =~s/Query\: //g;
					$qbeg =~s/[ ].+//g;
					$sbeg =~s/Sbjct\: //g;
					$sbeg =~s/[ ].+//g;
					$qend[$c] =~s/.+[ ]//g;
					$send[$c] =~s/.+[ ]//g;
					chomp $qbeg;
					chomp $sbeg;
					chomp $qend[$c];
					chomp $send[$c];
					$ga->{$locus_tag.$cc}=$gap;
					$qs->{$locus_tag.$cc}=$qbeg;
					$qe->{$locus_tag.$cc}=$qend[$c];
					$ss->{$locus_tag.$cc}=$sbeg;
					$se->{$locus_tag.$cc}=$send[$c];
							}
				$line = <IN>;		
				}
			$cc++; 
					}
				}
}
if (open (IN, "$vf")) {
	while ($line = <IN>) 	{
		$q ="Query\=[ ]";
		$spsa = "Sequences producing significant alignments";
		if ($line =~ m/$q/)	{
		$info = $line;
		$info =~ s/$q//g;
		$locus_tag = $info;
		$locus_tag =~ s/[ ].+//g;
		chomp $locus_tag;
		$cc=0;
		$nhf= "No hits found";
		if ($hit->{$locus_tag.$cc} !~ m/$nhf/ ){
			print "$locus_tag\t$hit->{$locus_tag.$cc}\t$pe->{$locus_tag.$cc}\t$le->{$locus_tag.$cc}\t$di->{$locus_tag.$cc}\t$ga->{$locus_tag.$cc}\t$qs->{$locus_tag.$cc}\t$qe->{$locus_tag.$cc}\t$ss->{$locus_tag.$cc}\t$se->{$locus_tag.$cc}\t$ex->{$locus_tag.$cc}\t$sc->{$locus_tag.$cc}\n";
		    }
					}
				}
			}
}

