1 |
#!/usr/bin/perl |
2 |
|
3 |
# A script to parse the output of pfam_scan.pl |
4 |
|
5 |
use strict; |
6 |
use File::Basename; |
7 |
|
8 |
# usage |
9 |
unless (@ARGV) |
10 |
{ |
11 |
print "Usage: ./parse_pfam.pl <infile> <outfile>\n"; |
12 |
exit; |
13 |
} |
14 |
|
15 |
my $infile = shift; |
16 |
my $outfile = shift; |
17 |
|
18 |
open (IN, "<$infile") or die "Can't open $infile: $!"; |
19 |
my @lines = <IN>; |
20 |
close IN; |
21 |
|
22 |
open (OUT, ">$outfile") or die "Can't open $outfile: $!"; |
23 |
foreach my $line (@lines) |
24 |
{ |
25 |
# correct place |
26 |
# the start and stop locations are calculated from |
27 |
# the original orf, the locations of which are |
28 |
# in the "trans" file name |
29 |
my @parts = split(/\s+/,$line); |
30 |
my $seq_id = $parts[0]; |
31 |
my $hmm_acc = $parts[3]; |
32 |
my $hmm_start = $parts[4]; |
33 |
my $hmm_end = $parts[5]; |
34 |
my $bit_score = $parts[6]; |
35 |
my $evalue = $parts[7]; |
36 |
my $hmm_name = $parts[8]; |
37 |
my $positions = [split(/\./,$seq_id)]->[-2]; |
38 |
my ($seq_start,$seq_end) = split(/-/,$positions); |
39 |
my $lineout; |
40 |
if ($seq_start < $seq_end) |
41 |
{ |
42 |
$lineout = "$seq_start..$seq_end"; |
43 |
} |
44 |
else |
45 |
{ |
46 |
$lineout = "complement($seq_end..$seq_start)"; |
47 |
} |
48 |
|
49 |
|
50 |
# print to tab file |
51 |
print OUT <<EOF; |
52 |
FT gene $lineout |
53 |
FT /note="PFAM match to $hmm_acc $hmm_start..$hmm_end bit score $bit_score evalue $evalue" |
54 |
FT /label=$seq_id |
55 |
FT /label=$hmm_name |
56 |
FT /score=$bit_score |
57 |
FT /colour=10 155 100 |
58 |
EOF |
59 |
} |
60 |
|
61 |
close OUT; |
62 |
|
63 |
|
64 |
__END__ |