ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/grid_blastall.psx
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 2243 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use FindBin;
4
5 umask 0002;
6 #$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'};
7
8 my $usage=q{ gridx/psx slice processing script for running a blastall search
9 on the grid (given a large multi-fasta query file). Cannot be used by itself!
10
11 Usage:
12
13 gridx [gridx_opts] -i <qfile> grid_blastall.psx <blastdb> <blastall_options>
14
15 <blastall_options> must at least specify the -p option, but should not
16 include -i and -d options as these will be set by gridx
17 <qfile> is a multi-fasta file to be sliced and searched against <blastdb>
18 <blastdb> should be the full path to the blast database
19 (and should have been already formatted with formatdb)
20
21 The output will be found as ./gridx-*/wrk_*/<slice_file>.blout files.
22
23 Usage example (using 10 grid CPUs with 1000 sequences per slice):
24
25 cd /fs/sztmpscratch/operons/Sphingomonas_wittichii_RW1
26
27 gridx -p 10 -m your_e-mail -n 1000 -i all.faa grid_blastall.psx \
28 /fs/sztmpscratch/operons/proteins.faa \
29 -p blastp -e 1e-15 -m 8 -v 0 -F f
30 };
31
32 #==============
33 # 1 is the name of the fasta sequence input file
34 # 2 is the # of sequences in ${1}
35 # 3 is the slice no. being processed by sx
36 # 4 is 0 if not the last file, 1 if the last file
37 # 5 is the # of sequences skipped initially
38 # 6 is the # of sequences to be processed (-1 = ALL)
39 # 7 user parameter
40 # 1 2 3 4 5 6
41 my ($file, $numseqs, $slice_num, $last, $skipped, $total, $bldbpath, @blopts)=@ARGV;
42
43
44 die "\n$usage" unless $bldbpath;
45 #die "\n$usage\nCannot find $bldbpath!\n" unless -f $bldbpath;
46
47 my $log_file='log_std';
48 my $err_file='err_log';
49 open(STDERR, '>>'.$err_file);
50 open(STDOUT, '>>'.$log_file);
51
52 my $bl_res=$file.".blout";
53 my $cmd="blastall ".join(' ',@blopts)." -i $file -d $bldbpath -o $bl_res";
54 my $slno=sprintf("slice:%09d",$slice_num);
55 print STDERR ">>$slno: $cmd\n";
56 &runCmd($cmd, $bl_res);
57
58 print STDERR "<<$slno: done.\n";
59
60 unlink($file);
61 exit 0;
62
63 sub runCmd {
64 my ($docmd, @todel) = @_;
65 my $errmsg = `($docmd) 2>&1`;
66 my $exitcode=$?;
67 if ($exitcode || ($errmsg=~/Error|Segmentation|Fail|Invalid|Cannot/si)) {
68 print STDERR "!Error at:\n$docmd\n";
69 print STDERR "Exit code: $exitcode, message:\n$errmsg\n";
70 unlink(@todel);
71 exit(1);
72 }
73 }

Properties

Name Value
svn:executable *