ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/quickmine.pl
Revision: 1.1
Committed: Wed Dec 13 10:48:57 2006 UTC (9 years, 9 months ago) by gawi79
Branch: MAIN
CVS Tags: HEAD
Log Message:
A QuickMine script

Line File contents
1 #!/usr/bin/perl
2
3 #
4 # quickmine.pl
5 #
6
7 # Gareth Wilson v 1.0.2
8
9 use strict;
10 use warnings;
11 use Config::Simple;
12 use Getopt::Std;
13
14 my $config_file;
15
16 my %opts;
17 getopts('hc:d',\%opts);
18
19 if (defined $opts{h})
20 {
21 print <<USAGE;
22
23 Usage:
24 -c quickmine configuration file (please provide full path)
25 -d delete existing QuickMine output
26 -h list these options
27
28
29 USAGE
30 exit;
31 }
32
33 if (defined $opts{c})
34 {
35 $config_file = $opts{c};
36 }
37 else
38 {
39 print <<USAGE;
40
41 Usage:
42 -c quickmine configuration file name (do not use full path, make sure config file is in the scripts directory)
43 -d delete existing QuickMine output
44 -h list these options
45
46 USAGE
47 exit;
48 }
49
50
51 # create a new object containing the variables in the cfg file
52 my $cfg = new Config::Simple($config_file);
53
54 # initialize the variables shared with the config file
55 my $path2proteins = $cfg->param('PATHS.path2proteins');
56 my $ext = $cfg->param('LEAVE_ALONE.ext');
57 my $path2output = $cfg->param('PATHS.path2output');
58 my $path2scripts = $cfg->param('PATHS.path2scripts');
59 my $formatdb = $cfg->param('PARAMS.formatdb');
60 my $end = $cfg->param('ENDINGS.end');
61 my $count_end = $cfg->param('ENDINGS.count_end');
62 my $time_end = $cfg->param('ENDINGS.time_end');
63 my $matrix_end = $cfg->param('ENDINGS.matrix_end');
64 my $condor_output = $cfg->param('LEAVE_ALONE.condor_output');
65 my $record_separator = $cfg->param('PARAMS.record_separator');
66 my $self_hit = $cfg->param('PARAMS.self_hit');
67 my $fasta_file_ending = $cfg->param('LEAVE_ALONE.fasta_file_ending');
68 my $write_fasta_files = $cfg->param('LEAVE_ALONE.write_fasta_files');
69 my $blast_programme = $cfg->param('PARAMS.blast_programme');
70 my $blast_command = $cfg->param('PARAMS.blast_command');
71
72 my $parse = $cfg->param('RUN.parse');
73 my $format = $cfg->param('RUN.format');
74 my $quickmine = $cfg->param('RUN.quickmine');
75 my $split = $cfg->param('RUN.split');
76 my $orphans = $cfg->param('RUN.orphans');
77 my $hits = $cfg->param('RUN.hits');
78 my $genetable = $cfg->param('RUN.genetable');
79 my $orphan_count = $cfg->param('RUN.orphan_count');
80 my $orphan_size = $cfg->param('RUN.orphan_size');
81 my $paralogue_count = $cfg->param('RUN.paralogue_count');
82 my $increment = $cfg->param('RUN.increment');
83 my $time = $cfg->param('RUN.time');
84 my $binary = $cfg->param('RUN.binary');
85 my $plots = $cfg->param('RUN.plots');
86 my $indiv_plot = $cfg->param('RUN.indiv_plot');
87 my $dot_plot = $cfg->param('RUN.dot_plot');
88 my $summarizer = $cfg->param('RUN.summarizer');
89
90 my $para_check = 0;
91
92 if ($opts{d})
93 {
94 print "Deleting old reports.\n";
95 unlink glob "$path2output/condor_mine.cmd";
96 unlink glob "$path2output/*plotter*\.dat";
97 unlink glob "$path2output/gene_table.html";
98 unlink glob "$path2output/index.html";
99 unlink glob "$path2output/*\.complete";
100 unlink glob "$path2output/*\.blast[pn]";
101 unlink glob "$path2output/*\.SELF_blast[pn]";
102 unlink glob "$path2output/*\_orphan_increment.html";
103 unlink glob "$path2output/*orphan_plot*";
104 unlink glob "$path2output/*\_errors";
105 unlink glob "$path2output/*\_matrix.html";
106 unlink glob "$path2output/*\_orphans*";
107 unlink glob "$path2output/*\_overview*";
108 unlink glob "$path2output/*\_rank.html";
109 unlink glob "$path2output/*\_scores.html";
110 unlink glob "$path2output/*orphan_count.html";
111 unlink glob "$path2output/*orphan_time.html";
112 unlink glob "$path2output/*SELF_blast_database*";
113 unlink glob "$path2output/*time_error.txt";
114 unlink glob "$path2output/help*";
115 unlink glob "$path2output/*.dir/*";
116 #exit;
117 }
118
119 # convert since use of \t in config file results in literal \t being printed
120
121 if ($record_separator =~ "tab") {$record_separator = "\t"}
122
123 # in the quickmine file - so leave as our
124 our @cmd = ();
125
126
127 # find out where we are so we can come back to the scripts directory
128 # when we need to
129 my $home = `pwd`;
130 chomp($home);
131
132
133 # print a bit to screen -
134 print "Your path2proteins is $path2proteins\n";
135 print "Your path2output is $path2output\n";
136 print <<TEXT;
137 You should delete all the files in $path2output before rerunning the pipeline from the start....
138 TEXT
139
140 if ($parse ) {
141
142 # start creating the fasta files we need
143 print "Parsing all $ext files to create new fasta files...\n";
144 system ("perl $path2scripts/2qmfasta.pl $config_file");
145
146 }
147 # getting abbreviations
148
149 # pick up the abbreviations to use
150 open (ABBR, "$path2output/abbr.list" ) or die "Can't reopen $path2output/abbr.list for reading";
151 my @abbr_list = ();
152 while (my $line = <ABBR>)
153 {
154 chomp $line;
155 push (@abbr_list, $line);
156 }
157
158 # print them to screen
159 print "The abbreviations created from your input files: @abbr_list\n";
160
161 foreach my $abbr (@abbr_list)
162 {
163 if ($parse)
164 {
165 if ($write_fasta_files == 1)
166 {
167 print "fasta_html.pl $abbr $fasta_file_ending $path2output\n";
168 system ("perl $path2scripts/fasta_html.pl $abbr $fasta_file_ending $path2output");
169 }
170 }
171 }
172
173
174 if ($format) {
175 # format the SELF_blast_database of all sequences
176 print "Running command: $formatdb\n";
177 system ("$formatdb");
178 }
179
180
181
182
183 if ($quickmine)
184 {
185 # change path to where we will write all the output files
186 chdir "$path2output/";
187
188 # run blast searches
189
190 foreach my $abbr (@abbr_list)
191 {
192 print "Running blast on $abbr using the following commmand:\n$blast_command\n";
193 my $cmd = "$blast_command -i $abbr$ext".".complete -o $abbr$ext".".complete.blast";
194 system ("$cmd");
195
196 }
197
198
199 } # end if $quickmine
200
201 if ($split)
202 {
203 # split the blast report into individual files
204 my $split_cmd = "$path2scripts/quick_splitblast.pl $path2output $ext $config_file";
205 print "$split_cmd\n";
206 system ("$split_cmd");
207 # create abbr_extra file
208 open (ABBREX, ">$path2output/abbr_extra.list" ) or die "Can't open $path2output/abbr_extra.list for writing";
209
210 if ($self_hit == 1)
211 {
212 foreach my $abbr (@abbr_list)
213 {
214 print ABBREX "$abbr\n";
215 }
216 close ABBREX;
217 }
218 else
219 {
220 print ABBREX ".*";
221 }
222 }
223
224 if ($orphans)
225 {
226 # change path to where we will write all the output files
227 chdir "$path2output/";
228
229 # Run get_orphans
230
231 # needs to get each glob run by quickmine - currently hardcoded!
232 print "Running get_orphans with the following commands...\n";
233 foreach my $abbr (@abbr_list)
234 {
235 open (TAX, ">$path2output/tax.list");
236 if ($abbr =~/,/)
237 {
238 my @taxes = split /,/, $abbr;
239 my $tax;
240 foreach $tax (@taxes)
241 {
242 print TAX "$tax\n";
243 }
244 }
245 else
246 {
247 print TAX "$abbr";
248 }
249 close TAX;
250 my $cmd = "$path2scripts/get_orphans.pl YES $abbr SELF_"."$blast_programme $condor_output $ext $config_file";
251 print "Running $cmd...\n";
252 system ("$cmd");
253 }
254 } # end get_orphans
255
256
257 if ($hits)
258 {
259 # Run hitsparser.pl
260 chdir "$path2output";
261
262 my @overview_files = <*overview.html>;
263 print "overviews: @overview_files\n";
264
265 foreach my $overview (@overview_files)
266 {
267 my $cmd = "$path2scripts/hitsparser.pl $path2output/$overview >$path2output/$overview".".hits.html $config_file";
268 print "Running cmd $cmd ...\n";
269 system ("$cmd");
270 }
271 } # end ($hits)
272
273 if ($genetable)
274 {
275 # Run genetable.pl
276 chdir "$path2output";
277 my $cmd = "$path2scripts"."/genetable.pl $end $config_file";
278 print "Running command $cmd\n";
279 system ("$cmd");
280 } # end ($genetable)
281
282
283
284 if ($orphan_count)
285 {
286 # Run orphan_count.pl
287 chdir "$path2output";
288 my $cmd = "$path2scripts/orphan_count.pl $self_hit $count_end $config_file";
289 print "Running command $cmd\n";
290 system ("$cmd");
291 } # end ($orphan_count)
292
293
294
295 if ($orphan_size)
296 {
297 # Run orphan_size.pl
298 chdir "$path2output";
299 my $cmd = "$path2scripts/orphan_size.pl $para_check $config_file";
300 print "Running command $cmd\n";
301 system ("$cmd");
302 } # end ($orphan_size)
303
304
305
306 if ($paralogue_count)
307 {
308 if ($self_hit == 0)
309 {
310 print "Sorry, paralogue_count.pl cannot be run as you are not searching against a SELF_blast database\n";
311 }
312 else
313 {
314 # Run paralogue_count.pl
315 chdir "$path2output";
316 my $cmd = "$path2scripts/paralogue_count.pl $count_end $config_file";
317 print "Running command $cmd\n";
318 system ("$cmd");
319 }
320 } # end paralogue_count
321
322
323
324 if ($increment)
325 {
326 # Run incremental_orphan.pl
327 chdir "$path2output";
328 my $cmd = "$path2scripts/incremental_orphan.pl $count_end $config_file";
329 print "Running command $cmd\n";
330 system ("$cmd");
331 } # end incremental_orphan
332
333
334
335 if ($time)
336 {
337 # Run orphan_time.pl
338 chdir "$path2output";
339 my $cmd = "$path2scripts/orphan_time.pl $time_end $config_file";
340 print "Running command $cmd\n";
341 system ("$cmd");
342 } # end orphan_time
343
344
345
346 if ($binary)
347 {
348 # Run binary_matrix.pl
349 chdir "$path2output";
350 my $cmd = "$path2scripts/binary_matrix.pl $count_end $config_file";
351 print "Running command $cmd\n";
352 system ("$cmd");
353 } # end orphan_time
354
355 ###########################################################
356 # The following perl scripts require gnuplot to be installed
357 ###########################################################
358
359 if ($plots)
360 {
361 # Run gnu_plotter.pl
362 chdir "$path2output";
363 my $cmd = "$path2scripts/gnu_plotter.pl $config_file";
364 print "Running command $cmd\n";
365 system ("$cmd");
366 my $cmd2 = "$path2scripts/gnu_percent_plotter.pl $config_file";
367 print "Running command $cmd2\n";
368 system ("$cmd2");
369 } # end gnu_plotter
370
371
372
373 if ($indiv_plot)
374 {
375 # Run genome_plot.pl
376 chdir "$path2output";
377 my $cmd = "$path2scripts/genome_plot.pl $config_file";
378 print "Running command $cmd\n";
379 system ("$cmd");
380 my $cmd2 = "$path2scripts/genome_percent_plot.pl $config_file";
381 print "Running command $cmd2\n";
382 system ("$cmd2");
383 } # end genome_plot
384
385
386
387 if ($dot_plot)
388 {
389 if ($self_hit == 0)
390 {
391 print "Sorry, dotplots can't be created as you are not searching against a SELF_blast database\n";
392 }
393 else
394 {
395 # Run dot_plot.pl
396 chdir "$path2output";
397 my $cmd = "$path2scripts/dot_plot.pl $matrix_end $config_file";
398 print "Running command $cmd\n";
399 system ("$cmd");
400 }
401 } # end dot_plot
402
403 ##############################################################################
404
405 if ($summarizer)
406 {
407 # Run summarizer
408 system ("$path2scripts/summarizer.pl $config_file>$path2output/index.html");
409 } # end ($summarizer)