ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/parse_transterm.pl
Revision: 1.3
Committed: Thu Sep 28 13:14:52 2006 UTC (10 years ago) by knirirr
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +1 -0 lines
Log Message:
Updated labels in parser scripts.

Line File contents
1 #!/usr/bin/perl
2
3 # A script to parse the output of TransTerm
4
5 use strict;
6 use File::Basename;
7
8 # usage
9 unless (@ARGV)
10 {
11 print "Usage: ./parse_transterm.pl <infile> <outfile>\n";
12 exit;
13 }
14
15 my $infile = shift;
16 my $outfile = shift;
17
18 open (IN, "<$infile") or die "Can't open $infile: $!";
19 my @lines = <IN>;
20 close IN;
21
22 # because the interesting stuff is on two lines, it is necessary
23 # to store it and print it out later
24 my %lineinfo;
25
26 open (OUT, ">$outfile") or die "Can't open $outfile: $!";
27 my $number;
28 my $oldnumber;
29 foreach my $line (@lines)
30 {
31 # Each terminator entry starts in column 3 and is of the form:
32 # term # start - end +/- regionconf hp tail | notes
33 # Followed by the sequence of the 5' tail, 5' stem, loop, 3' stem, and 3' tail.
34
35 if ($line =~ /^\s+TERM/)
36 {
37 $line =~ s/^\s+//;
38 chomp($line);
39 my @parts = split(/\s+/,$line);
40 my $number = $parts[1];
41 my $seq_start = $parts[2];
42 my $seq_end = $parts[4];
43 $lineinfo{$number}{number} = $number;
44 $lineinfo{$number}{direction} = "$parts[5]ve";
45 $lineinfo{$number}{regionconf} = $parts[6];
46 $lineinfo{$number}{conf} = $parts[7];
47 $lineinfo{$number}{notes} = [split(/\|/,$line)]->[-1];
48 if ($seq_start <= $seq_end)
49 {
50 $lineinfo{$number}{lineout} = "$seq_start..$seq_end";
51 }
52 else
53 {
54 $lineinfo{$number}{lineout} = "complement($seq_end..$seq_start)";
55 }
56 $oldnumber = $number;
57 }
58 elsif ($line =~ /^\s+[GATC]+/)
59 {
60 $line =~ s/^\s+//;
61 chomp($line);
62 my @parts = split(/\s+/,$line);
63 $lineinfo{$oldnumber}{tail_5} = $parts[0];
64 $lineinfo{$oldnumber}{stem_5} = $parts[1];
65 $lineinfo{$oldnumber}{loop} = $parts[2];
66 $lineinfo{$oldnumber}{stem_3} = $parts[3];
67 $lineinfo{$oldnumber}{tail_3} = $parts[4];
68 }
69 else
70 {
71 next;
72 }
73 }
74
75 # dont' bother printingif we haven't started
76 # counting TERMs yet
77 # now print each thing out
78 foreach my $num (sort {$a <=> $b} keys %lineinfo)
79 {
80 # print to tab file
81 print OUT <<EOF;
82 FT stem_loop $lineinfo{$num}{lineout}
83 FT /label="stem_loop $lineinfo{$num}{number}"
84 FT /colour=12 153 210
85 FT /note="detected with transterm"
86 FT /note="direction: $lineinfo{$num}{direction}, Loc: $lineinfo{$num}{regionconf}, confidence: $lineinfo{$num}{conf}, notes: $lineinfo{$num}{notes}"
87 FT /note="5-prime tail seq $lineinfo{$num}{tail_5}"
88 FT /note="5-prime stem seq $lineinfo{$num}{stem_5}"
89 FT /note="loop seq $lineinfo{$num}{loop}"
90 FT /note="3-prime stem seq $lineinfo{$num}{stem_3}"
91 FT /note="3-prime tail seq $lineinfo{$num}{tail_3}"
92 EOF
93 }
94
95 close OUT;
96
97 __END__