ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/eqfasta_check.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 1073 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3
4 my $usage = q/Usage:
5 eqfasta_check.pl <qryfasta.fa> <tgtfasta.fa.cidx>
6 Checks if each sequence in qryfasta.fa is the same with
7 the sequence with the same ID in <tgtfasta.fa>
8 /;
9 umask 0002;
10
11
12 my ($qfile, $fcdb)=@ARGV;
13 die("$usage\n") unless -f $qfile && -f $fcdb;
14
15 open(QFILE, $qfile) || die ("Error opening $qfile\n");
16 my ($numdiff, $totalchecked)=(0,0);
17 my ($qid, $qseq);
18 while (<QFILE>) {
19 if (m/^>(\S+)/) {
20 my $newqid=$1;
21 checkRec() if $qseq;
22 $qid=$newqid;
23 $qseq='';
24 next;
25 }
26 chomp;
27 $qseq.=$_;
28 }
29 checkRec() if $qseq;
30 close(QFILE);
31
32 print STDERR "Checked $totalchecked records, "
33 .int($numdiff)." are different.\n";
34 sub checkRec {
35 my $syscmd="cdbyank -a '$qid' $fcdb";
36 my $r=`$syscmd`;
37 die("Error at $syscmd") if $? || length($r)<10;
38 my ($defline, $tseq)=($r=~m/^>([^\n]+)\n(.+)/s);
39 $tseq=~tr/\n//d;
40 die("Error: $syscmd returned empty sequence!\n") unless $tseq;
41 $totalchecked++;
42 if (uc($qseq) ne uc($tseq)) {
43 print STDERR "Warning: difference found for $qid\n";
44 $numdiff++;
45 }
46 }

Properties

Name Value
svn:executable *