ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/sam_ovlcheck.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 2733 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use FindBin;use lib $FindBin::Bin;
5
6 my $usage = q/Usage:
7 sam_ovlcheck.pl <bam_file> <interval_list>
8
9 Reports SAM formatted mappings from <bam_file> that truly overlap
10 (not through a gap) any of the intervals given in <interval_list>
11
12 <interval_list> has the following format:
13
14 <chromosome>[<strand>]:<start1>-<end1>[,<start2>-<end2>,...]
15
16 Note:
17 <bam_file> must have an index file present (<bam_file>.bai)
18 /;
19 umask 0002;
20 getopts('o:') || die($usage."\n");
21 my $outfile=$Getopt::Std::opt_o;
22 if ($outfile) {
23 open(OUTF, '>'.$outfile) || die("Error creating output file $outfile\n");
24 select(OUTF);
25 }
26 # --
27 my ($fbam, $intv)=@ARGV;
28 die("Error: no index file for $fbam?\n") unless -f $fbam.'.bai';
29 my ($chr,$rlst)=split(/\:/,$intv);
30 die("$usage Incorrect format for the interval list!\n") unless $chr && $rlst;
31 my $strand=chop($chr);
32 if ($strand ne '-' && $strand ne '+') {
33 $chr.=$strand;
34 $strand=undef;
35 }
36 my @rdata=map { [split(/[\-\.]+/)] } (split(/[\,\;\s]+/,$rlst));
37 foreach my $d (@rdata) {
38 ($$d[0], $$d[1])=($$d[1], $$d[0]) if $$d[0]>$$d[1];
39 }
40 my @ex = sort { $a->[0] <=> $b->[0] } @rdata;
41 my $range=$chr.':'.$ex[0]->[0].'-'.$ex[-1]->[1];
42 # -- assumes samtools and bam index
43 #my $pipecmd="samtools view $fbam $range |";
44 #print STDERR $pipecmd."\n";
45 open(SAMPIPE, "samtools view $fbam $range |") || die ("Error opening samtools pipe ($!)\n");
46 while(<SAMPIPE>) {
47 my $samline=$_;
48 chomp;
49 my ($qname, $flags, $gseq, $pos, $mapq, $cigar, $rnext, $pnext, $tlen, $seq, $qual, @extra)=
50 split(/\t/);
51 if ($strand) {
52 my $mstrand= (($flags & 0x10)==0) ? '+' : '-';
53 next if $mstrand ne $strand;
54 }
55 #now extract the CIGAR segments
56 my @cigdata=($cigar=~m/(\d+[A-Z,=])/g);
57 my ($mstart, $mend);
58 my $hasOvl=0;
59 my $curpos=$pos;
60 $mstart=$pos;
61 foreach my $cd (@cigdata) {
62 my $code=chop($cd);
63 if ($code eq 'N') { #gap
64 #process previous interval
65 if ($mend && checkOverlap($mstart, $mend, \@ex)) {
66 $hasOvl=1;
67 last;
68 }
69 $mstart=$curpos+$cd;
70 $mend=undef;
71 next;
72 }
73 $mend=$curpos+$cd-1;
74 $curpos+=$cd;
75 }
76 unless ($hasOvl) { #check the last interval
77 if ($mend && checkOverlap($mstart, $mend, \@ex)) {
78 $hasOvl=1;
79 }
80 }
81 if ($hasOvl) {
82 print $samline;
83 }
84 } # while <SAMPIPE>
85
86
87 close(SAMPIPE);
88
89 sub checkOverlap {
90 my ($a, $b, $rx)=@_;
91 return 0 if ($a>$$rx[-1]->[1] || $b<$$rx[0]->[0]); # not overlapping the whole exon chain
92 foreach my $x (@$rx) {
93 #return (start<=d->end && end>=d->start)
94 return 1 if ($a<=$$x[1] && $b>=$$x[0]);
95 return 0 if $b<$$x[0];
96 }
97 }
98
99
100 # --
101 if ($outfile) {
102 select(STDOUT);
103 close(OUTF);
104 }
105
106 #************ Subroutines **************

Properties

Name Value
svn:executable *