ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/qexo_p2g
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (8 years, 2 months ago) by gpertea
Original Path: ann_bin/qexo_p2g
File size: 3562 byte(s)
Log Message:
adding misc scripts

Line User Rev File contents
1 gpertea 23 #!/usr/bin/perl
2     use strict;
3     use Getopt::Std;
4     use POSIX "sys_wait_h"; # mostly for the handy uname() function
5    
6     my $usage = q/Usage:
7     qexo_p2g [-o <output.gff3>] {-q '<qprotname>' | -m <ctgname>:<hitmap_fadb.cidx>}
8     [-c <mincov> -p <minpid>] {qprot.fa|protdb.cidx} genomic.fa
9     If <qprot.fa> is a multifasta file then exonerate p2g will be run for each of the
10     proteins in that file.
11     The output is converted to gff and optionally appended to file <output.gff3>
12     (if -o is not given, the gff output is sent to stdout)
13     /;
14    
15     umask 0002;
16     getopts('Do:m:n:c:p:q:') || die($usage."\n");
17     #die($usage."\n") unless -f $ARGV[1];
18     my ($fappend, $qpull)=($Getopt::Std::opt_o, $Getopt::Std::opt_q);
19     my ($mincov, $minpid)=($Getopt::Std::opt_c,$Getopt::Std::opt_p);
20     $mincov=1 unless $mincov;
21     $minpid=10 unless $minpid;
22     my $debug=$Getopt::Std::opt_D;
23     my ($hitmapdta)=($Getopt::Std::opt_m);
24     my ($ctg, $hitmapcidx);
25     if ($hitmapdta) { ($ctg, $hitmapcidx)=split(/:/,$hitmapdta); }
26     my $numaln=1; # ID suffix for each mapping
27     my $pulling=0; # temporary qry file created through db pulling
28     my ($qf, $sfile)=@ARGV;
29     die("$usage\n") unless $qf && $sfile;
30     die("$usage Error: cannot locate query file ('$qf')!\n") unless -f $qf;
31     die("$usage Error: cannot locate genomic fasta file ('$sfile')!\n") unless -f $sfile;
32     my $host=lc((&POSIX::uname)[1]);
33     my $counter=time();
34     chomp($host);
35     ($host)=($host=~m/^([\w\-]+)/);
36     my $qrf=getfname($qf);
37     my $gfn=getfname($sfile);
38     my $pnum=$ENV{GRID_TASK} || $counter;
39     my $qfexo='exop2g.'.$host.'_'.$$.".$pnum.$qrf.$gfn";
40     my $qfexout=$qfexo.'.exo';
41     if ($qpull || $ctg) {
42     $pulling=1;
43     #my $qrf=$qf;
44     #$qrf=$1 if $qf=~m/\/([^\/]+)$/;
45     #$qrf=~s/\.cidx$//;
46     my $qfqry=$qfexo.'.qry';
47     if ($qpull) { #just pull one protein sequence
48     my $r=system("cdbyank -a '$qpull' $qf > $qfqry");
49     }
50     else { # pull a set of proteins -- in two steps
51     die("Error: the hitmap cdb index not found!\n") unless -f $hitmapcidx;
52     my $r=system("cdbyank -a '$ctg' $hitmapcidx | cdbyank $qf > $qfqry");
53     }
54     runExonerate($qfqry);
55     }
56     else {
57     runExonerate($qf, $qfexout);
58     }
59    
60     sub runExonerate {
61     my ($qfqry, $fexout)=@_;
62     print STDERR ("Warning: 0 length protein input, mistake?\n") unless -s $qfqry;
63     #my $CDSextracted;
64     #write sequence to a tmp file for searching
65     $fexout=$qfqry.'.exo' unless $fexout;
66     # use exonerate.icc ?
67     my $cmd="exonerate $qfqry $sfile -V 0 --model p2g --percent 20 -n 20 --showalignment no --saturatethreshold 80 ".
68     "--ryo '".'%ti\t%qi\tmap_end\t%r\t%ql|%qab-%qae\t%ps\t%g\t%qd\n'."' --geneseed 60 -x 50 --proteinwordlen 4 ".
69     "--seedrepeat 2 -i -40 -M 512 --showtargetgff yes --showvulgar no > $fexout ";
70    
71     #$cmd.=' >> '.$fappend if ($fappend);
72     my $r=system($cmd);
73     if ($r) {
74     die("Error at running: \n $cmd\n");
75     }
76     else {
77     if ($fappend && -s $fexout) {
78     my $excat=$fappend;
79     $excat=~s/\.gff\d*$//i;
80     $excat.='.exout';
81     system("cat $fexout >> $excat");
82     system("fltexonerate -c $mincov -p $minpid < $fexout >> $fappend");
83     unlink($fexout);
84     }
85     unlink($qfqry) if $pulling && !$debug;
86     }
87     }
88    
89     #------------
90     sub runCmd {
91     my ($docmd, @todel) = @_;
92     my $errmsg = `( $docmd ) 2>&1`;
93     my $errcode=$?;
94     my @cores=<core.*>;
95     if ($errcode || @cores>0 || ($errmsg=~/error|segmentation|violation/si) || ($errmsg=~/\bfail/si)) {
96     print STDERR "!Error at:\n$docmd\n";
97     print STDERR "$errmsg\n";
98     foreach (@todel) {
99     unlink($_);
100     }
101     exit(1);
102     }
103     }
104    
105     sub getfname {
106     my $f=$_[0];
107     $f=$1 if $f=~m/\/([^\/]+)$/;
108     $f=~s/\.cidx$//;
109     $f=~s/\.\w+$//;
110     return $f
111     }

Properties

Name Value
svn:executable *