ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/btabflt.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 2774 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 #use FindBin;use lib $FindBin::Bin;
5
6 my $usage = q/Usage:
7 btabflt.pl [-p <min_sim>] [-c <min_cov>] [-i <max_intron>] <input_btab..>
8
9 It assumes that the input has the extended format which includes
10 segment information (i.e. as generated by blast2btab)
11
12 It will show only those alignments of at least <min_sim> percent similarity
13 (default: 60), at least <min_cov> percent coverage of query (default 50)
14 and not having a genomic gap (intron) larger than <max_intron>
15 (default: 500000)
16
17 /;
18
19 getopts('p:c:i:') || die ($usage."\n");
20 my $mincov=$Getopt::Std::opt_c || 50;
21 my $minpsim=$Getopt::Std::opt_p || 70;
22 my $maxintron=$Getopt::Std::opt_i || 500000;
23
24 while (<>) {
25 next if m/^\s*#/;
26 my $line=$_;
27 chomp;
28 # 0 1 2 3 4 5 6 7 8 9
29 my ($qname, $date, $qlen, $method, $db, $sname, $q_5, $q_3, $s_5, $s_3,
30 # 10 11 12 13 14 15 16 17 18
31 $pid, $psim, $score, $fofsb, $fofse, $sdescr, $frame, $strand, $slen,
32 # 19 20 21
33 $e_val, $scov, $hsps)=split(/\t/);
34 next unless $psim>=$minpsim;
35 my @hsp=split(/\~/,$hsps);
36 my @qh; #current line HSPs on query: list of [ql, qr]
37 my @sh; #current line HSPS on subj
38 foreach my $h (@hsp) {
39 my ($q5,$q3, $h5, $h3, $p)=($h=~/(\d+)\-(\d+)\:(\d+)\-(\d+)\|([\d\.]+)/);
40 die("Error parsing segment data $h for btab line:\n$_\n") unless $p>1;
41 ($q5, $q3)=($q3,$q5) if $q5>$q3;
42 push(@qh, [$q5, $q3]);
43 ($h5, $h3)=($h3,$h5) if $h5>$h3;
44 push(@sh, [$h5, $h3]);
45 }
46 @sh = sort { $main::a->[0] <=> $main::b->[0] } @sh;
47 my $prev_end=0;
48 my $badintron=0;
49 foreach my $ed (@sh) {
50 if ($prev_end) {
51 if ($$ed[0]-$prev_end>$maxintron) {
52 $badintron=1;
53 last;
54 }
55 }
56 $prev_end=$$ed[1];
57 }
58 next if $badintron;
59 my $cov=getCov($qlen, \@qh);
60 next unless $cov>=$mincov;
61 chomp($line);
62 print $line."\t$cov\n";
63 }
64
65 sub getCov {
66 # merge overlapping segments (yes, blast can produce such segments)
67 my ($len, $r)=@_;
68 my @a=sort { $main::a->[0]<=>$main::b->[0] } @$r;
69 my $wasovl=1;
70 my @m = map { [$_->[0], $_->[1]] } @a;
71 WASOVL: while ($wasovl) {
72 $wasovl=0;
73 for (my $i=0;$i<@m-1;$i++) {
74 for (my $j=$i+1;$j<@m;$j++) {
75 next if $i==$j;
76 my ($l1, $r1)=($m[$i]->[0], $m[$i]->[1]);
77 my ($l2, $r2)=($m[$j]->[0], $m[$j]->[1]);
78 #we know that l2>=l1 for sure
79 if ($l2<=$r1) {
80 #intersection detected;
81 $wasovl=1;
82 $m[$i]->[1] = ($r1>$r2)?$r1:$r2; #max right coord
83 splice(@m, $j);
84 next WASOVL;
85 } #intersection
86 } #j
87 } #i
88 }#while wasovl
89 my $cov=0;
90 map { $cov+=$_->[1]-$_->[0]+1 } @m;
91 return sprintf('%d',($cov*100.00)/$len);
92 }

Properties

Name Value
svn:executable *