ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/fsize
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 1223 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 #
3 #pipe a multi-fasta into this and get the sequence size for each fasta record
4 #
5 my $usage=q/
6 Reports the sequence length for each record in a multi fasta file
7 The output line format is (tab delimited):
8 <seq_name> <seq_len> [<seq_description>]
9
10 Use '-S' to get a summary of the sequences read.
11 /;
12 die $usage."\n" if ($ARGV[0]=~m/^\-+h/);
13 my $summary=$ARGV[0] eq '-S';
14 shift(@ARGV) if $summary;
15 local $/="\n>";
16 my ($minlen, $min, $max, $maxlen, $total, $avg, $std);
17 $minlen=2000000000;
18 my @v; #
19 while (<>) {
20 s/^>//;
21 chomp;
22 my ($seqname, $ann, $seq)=(m/^(\S+)[ \t\x01]*(.*?)\n(.+)/s);
23 my @nr=split(/\x01/, $ann, 2);
24 $ann=$nr[0] if (@nr>1);
25 $seq =~ tr/\t \n\r//d;
26 my $len=length($seq);
27 my @l=($seqname, $len);
28 push(@l,$ann) if $ann;
29 push(@v, $len);
30 $total+=$len;
31 ($minlen, $min)=($len, $seqname) if ($len<$minlen);
32 ($maxlen, $max)=($len, $seqname) if ($len>$maxlen);
33 print join("\t",@l), "\n";
34 }
35 my $numseqs=@v;
36 if ($summary) {
37 #TODO: calculate variance
38 print STDERR "Total $total bases in $numseqs sequences.\n";
39 print STDERR "Max length: $maxlen (sequence $max)\n";
40 print STDERR "Min length: $minlen (sequence $min)\n";
41 print STDERR "Average : ".int($total/$numseqs)."\n";
42 }

Properties

Name Value
svn:executable *