ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/qtaxon
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 2 months ago) by gpertea
File size: 6259 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use FindBin;use lib $FindBin::Bin;
4 use dbSession;
5 use Getopt::Std;
6
7 my $usage = q/Usage:
8 qtaxon [-q] [-p <#lvls>] [-b <db>[@<server>]] [-U [-x<maxpe>]]
9 <taxon_id>|<organism-name>
10
11 Default operation (without any options) is to show a list of
12 taxon records belonging to the whole taxonomic tree of the
13 organism (class) query.
14
15 The organism class query can be given as either a numeric NCBI
16 taxon ID or a scientific or "common" name for the organism\/class
17 (which has to resolve into a NCBI taxon ID eventually).
18
19 Options:
20 -b : database\/server locator to override the default (which is the
21 'common' database); has the format <db>[@<server>][:<user>]
22 -q : query only the first level (root) of the taxononmic tree(s) that
23 can be found for the provided organism class(es) or taxon ID(s).
24 This query mode also accepts a SQL pattern to be used for the
25 <organism-name> string (e.g. 'Mus%') and it also accepts a
26 comma delimited list of numeric taxon IDs
27 -p : also shows parent nodes, going <#lvls> levels up from query taxon
28 -F : do not format the display (default is to use indentation based on
29 various levels in the (local) taxonomy tree shown)
30 -U : returns a list of UniProt entries corresponding to the taxon IDs
31 queried (and its whole subtree unless -q is used). In conjunction
32 with the -q option the list of proteins is limited to only those
33 belonging to the first level (root) of the taxa being queried.
34 -x : with -U option, only returns entries having a PE code
35 lower or equal to <maxpe>
36 /;
37
38 umask 0002;
39
40 getopts('qDUFb:x:p:o:') || die($usage."\n");
41
42 my $outfile=$Getopt::Std::opt_o;
43 my $debug=$Getopt::Std::opt_D;
44 my $notree = $Getopt::Std::opt_q;
45 my $maxpe = $Getopt::Std::opt_x;
46 my $getprot = $Getopt::Std::opt_U;
47 my $fmtout = !$Getopt::Std::opt_F;
48 my $numparents = $Getopt::Std::opt_p;
49 my $tdb = $Getopt::Std::opt_b || 'common';
50 my @auth = &db_perm($tdb);
51
52
53 die("$usage\nError: no query organism given!\n") unless @ARGV>=1;
54
55 my @arg;
56
57 foreach my $qstr (@ARGV) {
58 my @a=split(/\,/,$qstr);
59 push(@arg, @a);
60 }
61
62 #print STDERR "auth: ".join(",",@auth)."\n";
63 my $ds = dbSession->new(@auth);
64
65 #my $ds = dbSession->new("$user"."@"."$mysqlhost");
66 my %qdata; # list of main taxon IDs queried
67 # tax_id => [parent_id, name, c_name]
68
69 foreach my $qstr (@arg) {
70 print ">query:$qstr\n" if @arg>1 && !$getprot;
71 my $numeric=($qstr=~/^\d+$/);
72 my $sth;
73 if ($numeric) {
74 ($sth, my $ret)=$ds->exec("select tax_id, parent_id, name, c_name from ".
75 "taxon where tax_id=$qstr");
76 }
77 else {
78 #string query (name or c_name)
79 my $qcname=lc($qstr);
80 my $qname=$qcname;
81 $qname=~s/^(\w)/\U$1/;
82 my $op = ($qstr=~m/\%/) ? 'like' : '=';
83 ($sth, my $ret)=$ds->exec("select tax_id, parent_id, name, c_name from ".
84 "taxon where name $op '$qname' or c_name $op '$qcname'");
85 }
86 while (my $r=sth_fetch($sth)) {
87 $qdata{$$r[0]}=[1, $$r[1], $$r[2], $$r[3]];
88 } #for each query taxon
89
90 $sth->finish();
91
92 my @pt=keys(%qdata); #parent trees to expand
93 foreach my $t (@pt) {
94 print STDERR "running sql_get_tree with $t..\n" if $debug;
95 my $qd=$qdata{$t};
96
97 my $ar=&sql_get_tree($ds, $t, $qd, $numparents, $notree);
98
99 # -- print the results here:
100 foreach my $r (@$ar) {
101 my @d=@$r;
102 my $depth=pop(@d);
103 unless ($getprot) {
104 print ' ' x $depth if $fmtout;
105 my $tpointer=($fmtout && ($d[0]==$t) && !$notree)?"\t<-":"";
106 print join("\t",@d)."$tpointer\n";
107 }
108 $qdata{$$r[0]}=[0,$$r[1], $$r[2], $$r[3]]
109 if (@$r>1 && !exists($qdata{$$r[0]}));
110 }#while rows
111 } #for each query taxon
112 }
113
114 exit(0) unless $getprot;
115
116 # show corresponding uniref proteins
117 my $pqry='select tax_id, up_id from uniprot where tax_id=?';
118 $pqry.= ' and xcode<='.$maxpe if $maxpe;
119 my $sth=$ds->prep($pqry);
120 #my %uniref; #to collapse the results
121 foreach my $t (keys(%qdata)) {
122 sth_exec($sth, $t);
123 while (my $r=sth_fetch($sth)) {
124 #$uniref{$$r[0]}=$t;
125 print $$r[0]."\t".$$r[1]."\n";
126 }
127 }
128 $sth->finish();
129
130 sub sql_get_tree {
131 my ($ds, $t, $td, $pnum, $notree)=@_;
132 my ($sth, @ar);
133 my $srvtype=$ds->servertype();
134 if ( $srvtype eq 'sybase') {
135 #our sybase, we have a stored procedure for this
136 my $rt;
137 ($sth, $rt)=$ds->exec("get_taxon_tree $t");
138 while ((my $r=$sth->fetch())) {
139 push(@ar, [@$r]);
140 }
141 $sth->finish();
142 return \@ar;
143 }
144 else { # elsif ($srvtype eq 'mysql') { #must be mysql
145
146 my $qt=$t;
147 if ($pnum>0) { # parents requested
148 $qt=nav2parent($$td[1], $pnum, \@ar);
149 }
150 # ---
151 # print STDERR "parent qt = $qt\n" if $debug;
152
153 my $sth=$ds->prepare('select tax_id, parent_id, name, c_name from taxon where parent_id=?');
154 my $rcount=0;
155 if ($notree) {
156 @ar=reverse(@ar);
157 my $depth=scalar(@ar);
158 push(@ar, [$t, $$td[1], $$td[2], $$td[3], $depth]);
159 }
160 else { # tree required
161 if (@ar) { #parents requested earlier
162 my $ad=$ar[-1];
163 @ar=( $ad ); #keep only the first entry
164 }
165 else {
166 @ar = ( [$t, $$td[1], $$td[2], $$td[3], 0] );
167 }
168 get_taxchildren(\@ar, $sth, $qt, 0); #recursive call to populate the results array
169 }
170 $sth->finish();
171 return \@ar;
172 }
173 }
174
175 sub get_taxchildren {
176 my ($ar, $sth, $t, $depth)=@_;
177 sth_exec($sth, $t);
178 my @crs; # child data
179 my $accnt=scalar(@$ar);
180 # print STDERR " children list for $t (total count so far: $accnt)\n" if $debug;
181 $depth++;
182 while ((my $r=sth_fetch($sth))) {
183 #push(@$ar, [@$r, $depth]);
184 #print STDERR "\t".join("\t", @$r)."\n" if $debug;
185 push(@crs, [@$r]);
186 }
187 foreach my $cr (@crs) {
188 push(@$ar, [@$cr, $depth]);
189 get_taxchildren($ar, $sth, $$cr[0], $depth);
190 }
191 }
192
193 sub nav2parent {
194 my ($t, $dist, $ar)=@_;
195 my $sth=$ds->prepare('select tax_id, parent_id, name, c_name from taxon where tax_id=?');
196 my $rt=$t; #target parent
197 my $st=$t;
198 while ($st && ($dist>0)) {
199 &sth_exec($sth, $st);
200 #fetch parent
201 undef($st);
202 while ((my $r=sth_fetch($sth))) {
203 $st=$$r[1];
204 $rt=$$r[0];
205 push(@$ar, [@$r, $dist-1]) if $ar;
206 }
207 $dist--;
208 }
209 $sth->finish();
210 return $rt;
211 }

Properties

Name Value
svn:executable *