ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/scripts/glimmer_run.psx
Revision: 24
Committed: Tue Jul 26 21:46:39 2011 UTC (8 years, 1 month ago) by gpertea
File size: 2462 byte(s)
Log Message:
Line File contents
1 #!/usr/bin/perl
2 use strict;
3 use FindBin;
4 #use Cwd qw(abs_path cwd);
5
6 umask 0002;
7 #the line below is needed if pvmsx is used
8 # also, the error condition is set only by the presence of $file
9 #$ENV{'PATH'}=$FindBin::Bin.':'.$ENV{'PATH'};
10
11 my $usage=q{
12 gridx/psx slice processing script - never use by itself.
13
14
15 gridx -U -p20 -n1 -i <input_genomicseqs.fasta> glimmer_run.psx <train_data_dir> <protein_domains_dir>
16
17 The directories should be given as a full, absolute path
18
19 Usage examples:
20
21 gridx -U -p20 -n1 -i allChr_mrg.fa glimmer_run.psx \
22 /fs/szannotation/zebrafish/genefinding/training \
23 /fs/szannotation/zebrafish/genefinding/domains
24 };
25
26
27 #so for pvmsx to consider the task was successful, $file must be deleted!
28 #==============
29 # 1 is the name of the fasta sequence input file
30 # 2 is the # of sequences in ${1} should = 1 for this script
31 # 3 is the slice no. being processed by sx
32 # 4 is 0 if not the last file, 1 if the last file
33 # 5 is the # of sequences skipped initially
34 # 6 is the # of sequences to be processed (-1 = ALL)
35 # 7 user parameter
36 # 1 2 3 4 5 6
37 my ($file, $numpass, $slice_num, $last, $skipped, $total, $traindir, $pdomdir)=@ARGV;
38
39 die "$usage\n Cannot find $traindir or $pdomdir\n" unless @ARGV>6 && -d $traindir && -d $pdomdir;
40
41 #$traindir=abs_path($traindir);
42 #$pdomdir=abs_path($pdomdir);
43 my $log_file='log_std';
44 my $err_file='err_log';
45 open(STDERR, '>>'.$err_file);
46 open(STDOUT, '>>'.$log_file);
47
48 open(INFILE, $file) || die("Cannot open input file $file\n");
49 my $seqname;
50 while (<INFILE>) {
51 ( $seqname )=(m/^>(\S+)/);
52 last if $seqname;
53 }
54 close(INFILE);
55
56 $seqname=~tr/,|:;/____/;
57
58 unless (-f "$pdomdir/$seqname.domains") {
59 print STDERR "WARNING: protein domains info file ($seqname.domains) not found! Genefinding aborted.\n";
60 exit;
61 }
62
63 #my $toskip=($file =~ m/_\@(\d+)_v\d+\.\d+/) ? $1 : $skipped+$numpass*($slice_num-1);
64 my $outprefix=$seqname.'.glimmerhmm_pred';
65 my $cmd.="glimmerhmm $file $traindir -p $pdomdir -g -f -o $outprefix -n 5";
66 my $slno=sprintf("slice:%09d",$slice_num);
67 print STDERR ">>$slno: $cmd\n";
68 &runCmd($cmd);
69
70 print STDERR "<<$slno: done.\n";
71
72 unlink($file);
73 exit 0;
74
75 sub runCmd {
76 my ($docmd, @todel) = @_;
77 my $errmsg = `($docmd) 2>&1`;
78 if ($? || ($errmsg=~/ERROR/si) || ($errmsg=~/Segmentation/si) || ($errmsg=~/Failed/s) || $errmsg=~/Invalid/s) {
79 print STDERR "!Error at:\n$docmd\n";
80 print STDERR "$errmsg\n";
81 unlink(@todel);
82 exit(1);
83 }
84 }

Properties

Name Value
svn:executable *