ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/fqdust
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 2 months ago) by gpertea
File size: 655 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 #simple dust filter for short reads:
3 # if more than 50% of the sequence is masked, don't print the read
4 use strict;
5 while (<>) {
6 if (m/^@(\S+)/) {
7 my $seqname=$1;
8 my $seq=<>;
9 $seq=~tr/\n\r//d;
10 my $qh=<>;
11 #chomp($qh);
12 $qh=~tr/\n\r//d;
13 die("Error: 3rd line of $seqname must start with '+' !\n")
14 unless ($qh eq '+') || ($qh eq '+'.$seqname);
15 my $quals=<>;
16 my $mseq=$seq;
17 $mseq =~ s/((\w+?)\2{5,})/'N' x length $1/oeg;
18 my $masked=($mseq=~tr/N//);
19 if ($masked<(length($seq)>>1)) {
20 print "\@$seqname\n$seq\n+\n$quals";
21 }
22 #else {
23 # print STDERR "Discarding: $seq\n";
24 # }
25 }
26 }

Properties

Name Value
svn:executable *