ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/ace2info
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 2 months ago) by gpertea
File size: 2193 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 my $usage=q/
4 Converts ace files to the more familiar TIGR assembler info format.
5 Usage:
6 ace2info <ace_files...>
7 Output format:
8 /;
9 die $usage if $ARGV[0]=~m/\-\-?h/i;
10 my $cmd='aceconv';
11
12 die "Conversion program not found as '$cmd'\n" unless -x $cmd;
13
14 foreach (@ARGV) {
15 die "Error: input ace file '$_' not found.'\n" unless -e $_;
16 }
17 #=========== global data used by write_asm_info:
18 my $asmno=0;
19 my $asm_numseqs;
20 my $asm_len;
21 my @asmseqs;
22 #===================
23 my $total_seqcount; #overall count of sequences assembled.
24 my $total_asmcount; #overall count of sequences assembled.
25 my ($seq_name,$seq_lend, $seq_rend, $asm_lend, $asm_rend);
26 foreach my $ace (@ARGV) {
27 open (INCOMING, "$cmd $ace |") || die "Error opening aceconv stream!\n";
28 while (<INCOMING>) {
29 if (m/^sequence\t(\S+)/) {
30 my $seq=$1;
31 $asm_len=length($seq);
32 $asmno++;
33 $asm_numseqs=0;
34 @asmseqs=();
35 }
36 elsif (m/^seq#\t(\d+)/) {
37 $asm_numseqs=$1;
38 }
39 elsif (m/^seq_name\t(\S+)/) {
40 $seq_name=$1;
41 #this is the first in the seq_name block
42 }
43 elsif (m/^asm_lend\t(\d+)/) {
44 $asm_lend=$1;
45 }
46 elsif (m/^asm_rend\t(\d+)/) {
47 $asm_rend=$1;
48 }
49 elsif (m/^seq_lend\t(\d+)/) {
50 $seq_lend=$1;
51 }
52 elsif (m/^seq_rend\t(\d+)/) {
53 $seq_rend=$1;
54 }
55 elsif (m/^offset\t/) {
56 #end of sequence data signal
57 #--------------- 0 1 2 3 4
58 push(@asmseqs, [$seq_name, $asm_lend, $asm_rend, $seq_lend, $seq_rend]);
59 }
60 elsif (m/^\|$/) {
61 #end of assembly data signal:
62 die("Invalid sequence count for asm $asmno") if scalar(@asmseqs)!=$asm_numseqs;
63 &write_asm_info();
64 }
65 }
66 close(INCOMING);
67 }
68 #===============================
69 sub write_asm_info {
70 @asmseqs = sort { $main::a->[0] cmp $main::b->[0] } @asmseqs;
71 my $first=1;
72 # compute redundancy:
73 my $ovlsum;
74 map { $ovlsum += $$_[2]-$$_[1]+1; } @asmseqs;
75 my $redundancy=sprintf("%4.2f",$ovlsum/$asm_len);
76 foreach my $d (@asmseqs) {
77 if ($first) {
78 print join("\t",$asmno,$asm_len, $asm_numseqs, $redundancy);
79 $first=0;
80 }
81 else {
82 print "\t\t\t";
83 }
84 print "\t".join("\t",@$d)."\n";
85 }
86 }

Properties

Name Value
svn:executable *