ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/qexo_p2g
Revision: 23
Committed: Tue Jul 26 21:44:38 2011 UTC (8 years, 2 months ago) by gpertea
Original Path: ann_bin/qexo_p2g
File size: 3562 byte(s)
Log Message:
adding misc scripts

Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use Getopt::Std;
4 use POSIX "sys_wait_h"; # mostly for the handy uname() function
5
6 my $usage = q/Usage:
7 qexo_p2g [-o <output.gff3>] {-q '<qprotname>' | -m <ctgname>:<hitmap_fadb.cidx>}
8 [-c <mincov> -p <minpid>] {qprot.fa|protdb.cidx} genomic.fa
9 If <qprot.fa> is a multifasta file then exonerate p2g will be run for each of the
10 proteins in that file.
11 The output is converted to gff and optionally appended to file <output.gff3>
12 (if -o is not given, the gff output is sent to stdout)
13 /;
14
15 umask 0002;
16 getopts('Do:m:n:c:p:q:') || die($usage."\n");
17 #die($usage."\n") unless -f $ARGV[1];
18 my ($fappend, $qpull)=($Getopt::Std::opt_o, $Getopt::Std::opt_q);
19 my ($mincov, $minpid)=($Getopt::Std::opt_c,$Getopt::Std::opt_p);
20 $mincov=1 unless $mincov;
21 $minpid=10 unless $minpid;
22 my $debug=$Getopt::Std::opt_D;
23 my ($hitmapdta)=($Getopt::Std::opt_m);
24 my ($ctg, $hitmapcidx);
25 if ($hitmapdta) { ($ctg, $hitmapcidx)=split(/:/,$hitmapdta); }
26 my $numaln=1; # ID suffix for each mapping
27 my $pulling=0; # temporary qry file created through db pulling
28 my ($qf, $sfile)=@ARGV;
29 die("$usage\n") unless $qf && $sfile;
30 die("$usage Error: cannot locate query file ('$qf')!\n") unless -f $qf;
31 die("$usage Error: cannot locate genomic fasta file ('$sfile')!\n") unless -f $sfile;
32 my $host=lc((&POSIX::uname)[1]);
33 my $counter=time();
34 chomp($host);
35 ($host)=($host=~m/^([\w\-]+)/);
36 my $qrf=getfname($qf);
37 my $gfn=getfname($sfile);
38 my $pnum=$ENV{GRID_TASK} || $counter;
39 my $qfexo='exop2g.'.$host.'_'.$$.".$pnum.$qrf.$gfn";
40 my $qfexout=$qfexo.'.exo';
41 if ($qpull || $ctg) {
42 $pulling=1;
43 #my $qrf=$qf;
44 #$qrf=$1 if $qf=~m/\/([^\/]+)$/;
45 #$qrf=~s/\.cidx$//;
46 my $qfqry=$qfexo.'.qry';
47 if ($qpull) { #just pull one protein sequence
48 my $r=system("cdbyank -a '$qpull' $qf > $qfqry");
49 }
50 else { # pull a set of proteins -- in two steps
51 die("Error: the hitmap cdb index not found!\n") unless -f $hitmapcidx;
52 my $r=system("cdbyank -a '$ctg' $hitmapcidx | cdbyank $qf > $qfqry");
53 }
54 runExonerate($qfqry);
55 }
56 else {
57 runExonerate($qf, $qfexout);
58 }
59
60 sub runExonerate {
61 my ($qfqry, $fexout)=@_;
62 print STDERR ("Warning: 0 length protein input, mistake?\n") unless -s $qfqry;
63 #my $CDSextracted;
64 #write sequence to a tmp file for searching
65 $fexout=$qfqry.'.exo' unless $fexout;
66 # use exonerate.icc ?
67 my $cmd="exonerate $qfqry $sfile -V 0 --model p2g --percent 20 -n 20 --showalignment no --saturatethreshold 80 ".
68 "--ryo '".'%ti\t%qi\tmap_end\t%r\t%ql|%qab-%qae\t%ps\t%g\t%qd\n'."' --geneseed 60 -x 50 --proteinwordlen 4 ".
69 "--seedrepeat 2 -i -40 -M 512 --showtargetgff yes --showvulgar no > $fexout ";
70
71 #$cmd.=' >> '.$fappend if ($fappend);
72 my $r=system($cmd);
73 if ($r) {
74 die("Error at running: \n $cmd\n");
75 }
76 else {
77 if ($fappend && -s $fexout) {
78 my $excat=$fappend;
79 $excat=~s/\.gff\d*$//i;
80 $excat.='.exout';
81 system("cat $fexout >> $excat");
82 system("fltexonerate -c $mincov -p $minpid < $fexout >> $fappend");
83 unlink($fexout);
84 }
85 unlink($qfqry) if $pulling && !$debug;
86 }
87 }
88
89 #------------
90 sub runCmd {
91 my ($docmd, @todel) = @_;
92 my $errmsg = `( $docmd ) 2>&1`;
93 my $errcode=$?;
94 my @cores=<core.*>;
95 if ($errcode || @cores>0 || ($errmsg=~/error|segmentation|violation/si) || ($errmsg=~/\bfail/si)) {
96 print STDERR "!Error at:\n$docmd\n";
97 print STDERR "$errmsg\n";
98 foreach (@todel) {
99 unlink($_);
100 }
101 exit(1);
102 }
103 }
104
105 sub getfname {
106 my $f=$_[0];
107 $f=$1 if $f=~m/\/([^\/]+)$/;
108 $f=~s/\.cidx$//;
109 $f=~s/\.\w+$//;
110 return $f
111 }

Properties

Name Value
svn:executable *