ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/gff_add_descr.pl
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (8 years, 8 months ago) by gpertea
Original Path: ann_bin/gff_add_descr.pl
File size: 1476 byte(s)
Log Message:
adding misc scripts

Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     my $usage=q/
4     gff_add_descr.pl <cdbyank_file.cidx> <gfflines..>
5    
6     Adds an extra "Descr" attribute to the main gff lines
7     (mRNA or gene) of the input, by querying the ID
8     from the <cdbyank_file.cidx> file.
9    
10     /;
11     my %cache; # ID => descr
12     my $cdbidx=shift(@ARGV) || die("$usage\nError: no cidx file given\n");
13     my $skipped=0;
14     my $skip;
15     while (<>) {
16     if (m/^\s*#/) { print $_;next; }
17     my @t=split('\t');
18     next unless($t[8]); #can't be a valid gff3 line
19     my $descr;
20     my $f=lc($t[2]);
21     if ($f eq 'mrna' || $f eq 'gene') {
22     if ($t[8]=~m/(?:Descr|Info)\s*=\s*"?([^;"]+)/i) {
23     $descr=$1;
24     if (length($descr)>6) {
25     $skipped++;
26     print $_;
27     next;
28     }
29     $t[8]=~s/(?:Descr|Info)\s*=\s*"?[^;"]+//i;
30     $t[8]=~tr/;/;/s;
31     }#has descr/info already
32     # -- retrieve the description here..
33     my ($id)=($t[8]=~m/\bID\s*=\s*"?([^;" ]+)/i);
34     die("Error: no ID found for gff $f line: $_\n") unless $id;
35     $id=~s/\.(\d+)$/.m$1/;
36     $t[8]=~s/\bID\s*=\s*"?([^;" ]+)/ID=$id/;
37     $id=~s/\.[a-z]*\d+$//;
38     $descr=fetchDescr($id);
39     chomp($t[8]);
40     $t[8].=';descr="'.$descr.'"';
41     $_=join("\t",@t)."\n";
42     } #mRNA/gene line
43     else { #
44     s/(Parent=[\w\|]+)\.(\d+)$/$1.m$2/;
45     }
46     print $_;
47     } #while
48    
49     sub fetchDescr {
50     my $id=$_[0];
51     my $def=$cache{$id};
52     return $def if $def;
53     #print STDERR "fetching: cdbyank -a '$id' -F $cdbidx\n";
54     $def=`cdbyank -a '$id' -F $cdbidx`;
55     chomp($def);
56     $def=~s/^\S+\s*//; #remove first token (the ID)
57     return $def;
58     }

Properties

Name Value
svn:executable *