ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/ace2info
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (9 years, 2 months ago) by gpertea
File size: 2193 byte(s)
Log Message:
Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     my $usage=q/
4     Converts ace files to the more familiar TIGR assembler info format.
5     Usage:
6     ace2info <ace_files...>
7     Output format:
8     /;
9     die $usage if $ARGV[0]=~m/\-\-?h/i;
10     my $cmd='aceconv';
11    
12     die "Conversion program not found as '$cmd'\n" unless -x $cmd;
13    
14     foreach (@ARGV) {
15     die "Error: input ace file '$_' not found.'\n" unless -e $_;
16     }
17     #=========== global data used by write_asm_info:
18     my $asmno=0;
19     my $asm_numseqs;
20     my $asm_len;
21     my @asmseqs;
22     #===================
23     my $total_seqcount; #overall count of sequences assembled.
24     my $total_asmcount; #overall count of sequences assembled.
25     my ($seq_name,$seq_lend, $seq_rend, $asm_lend, $asm_rend);
26     foreach my $ace (@ARGV) {
27     open (INCOMING, "$cmd $ace |") || die "Error opening aceconv stream!\n";
28     while (<INCOMING>) {
29     if (m/^sequence\t(\S+)/) {
30     my $seq=$1;
31     $asm_len=length($seq);
32     $asmno++;
33     $asm_numseqs=0;
34     @asmseqs=();
35     }
36     elsif (m/^seq#\t(\d+)/) {
37     $asm_numseqs=$1;
38     }
39     elsif (m/^seq_name\t(\S+)/) {
40     $seq_name=$1;
41     #this is the first in the seq_name block
42     }
43     elsif (m/^asm_lend\t(\d+)/) {
44     $asm_lend=$1;
45     }
46     elsif (m/^asm_rend\t(\d+)/) {
47     $asm_rend=$1;
48     }
49     elsif (m/^seq_lend\t(\d+)/) {
50     $seq_lend=$1;
51     }
52     elsif (m/^seq_rend\t(\d+)/) {
53     $seq_rend=$1;
54     }
55     elsif (m/^offset\t/) {
56     #end of sequence data signal
57     #--------------- 0 1 2 3 4
58     push(@asmseqs, [$seq_name, $asm_lend, $asm_rend, $seq_lend, $seq_rend]);
59     }
60     elsif (m/^\|$/) {
61     #end of assembly data signal:
62     die("Invalid sequence count for asm $asmno") if scalar(@asmseqs)!=$asm_numseqs;
63     &write_asm_info();
64     }
65     }
66     close(INCOMING);
67     }
68     #===============================
69     sub write_asm_info {
70     @asmseqs = sort { $main::a->[0] cmp $main::b->[0] } @asmseqs;
71     my $first=1;
72     # compute redundancy:
73     my $ovlsum;
74     map { $ovlsum += $$_[2]-$$_[1]+1; } @asmseqs;
75     my $redundancy=sprintf("%4.2f",$ovlsum/$asm_len);
76     foreach my $d (@asmseqs) {
77     if ($first) {
78     print join("\t",$asmno,$asm_len, $asm_numseqs, $redundancy);
79     $first=0;
80     }
81     else {
82     print "\t\t\t";
83     }
84     print "\t".join("\t",@$d)."\n";
85     }
86     }

Properties

Name Value
svn:executable *