ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/jigsaw2gff.pl
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 2447 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4
5 my $usage = q/Usage:
6 jigsaw2gff.pl [-N] [-p <IDbase>] <jigsaw_output>
7
8 Converts jigsaw output to gff3 format. The transcript ID
9 is by default built from 1st column (chromosome ID) followed by the suffix
10 .jsm.<N> (where <N> is the gene number in the jigsaw output).
11
12 Options:
13 -p use given base name instead of 1st column for transcript ID base name
14 and simply append .jsm<N> to it
15 -N use a simple .jsm<N> suffix (without the second '.' before the gene #)
16 /;
17 umask 0002;
18 getopts('Np:o:') || die($usage."\n");
19 my $outfile=$Getopt::Std::opt_o;
20 my $jbase=$Getopt::Std::opt_p;
21 my $jsuf='.jsm';
22 $jsuf.='.' unless ($jbase || $Getopt::Std::opt_N);
23
24 my $curmodel; #current model name
25 my ($curtag, $curstrand, $curchr, $genescore); #chromosome and strand for current model
26 my @exd; #exons for current model
27
28 while (<>) {
29 next if m/^\s*#/;
30 chomp;
31 my ($chr, $jsver, $exontype, $exonstart, $exonend, $jscore,
32 $strand, $frame, $lnum)=split(/\t/);
33 #next unless $lnum>0;
34 my ($modelnum, $gscore);
35 if ($lnum=~m/^(\d+)$/) { #old version
36 $modelnum=$1;
37 }
38 else {
39 my @a=split(/\s*;\s*/, $lnum);
40 ($modelnum)=($a[0]=~m/\.(\d+)$/);
41 ($gscore)=($a[1]=~m/gene_score=([\-\d\.]+)/);
42 }
43 next unless $modelnum;
44 ($exonstart, $exonend)=($exonend, $exonstart) if $exonend<$exonstart;
45 my $base=$jbase || $chr;
46 my $locus=$base.$jsuf.$modelnum;
47 if ($locus ne $curmodel) {
48 &writeModel() if $curmodel;
49 $curmodel=$locus;
50 $curtag=$chr.$strand;
51 $curchr=$chr;
52 $curstrand=$strand;
53 $genescore=$gscore;
54 @exd=([$exonstart, $exonend, $frame]);
55 next;
56 }
57 push(@exd, [$exonstart, $exonend, $frame]);
58 }
59
60 &writeModel() if $curmodel;
61
62 sub writeModel {
63 my @ex= sort { $main::a->[0] <=> $main::b->[0] } @exd;
64 my ($mstart, $mend)=($ex[0]->[0], $ex[-1]->[1]);
65 my $attrs="ID=$curmodel;Name=$curmodel";
66 if ($genescore) {
67 $attrs.=";gene_score=$genescore";
68 }
69 print STDOUT join("\t",$curchr, 'jigsaw', 'mRNA', $mstart, $mend, '.',
70 $curstrand, '.', $attrs)."\n";
71 my $i=1;
72 foreach my $exon (@ex) {
73 my ($estart, $eend, $eframe)=@$exon;
74 print STDOUT join("\t",$curchr, 'jigsaw', 'CDS', $estart, $eend, '.',
75 $curstrand, $eframe, "Parent=$curmodel")."\n";
76 $i++;
77 }
78 #my @exw = map { $_->[0].'-'.$_->[1] } @ex;
79 #print STDOUT ">$curmodel $mstart $mend $curtag\n";
80 #print STDOUT join(',',@exw)."\n";
81 $genescore='';
82 }

Properties

Name Value
svn:executable *