ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/msatfinder/msatfinder.cgi
Revision: 1.1
Committed: Tue Feb 13 16:21:44 2007 UTC (9 years, 5 months ago) by confused
Branch: MAIN
CVS Tags: HEAD
Log Message:
New version of msatfinder, new engines

Line File contents
1 #!/usr/bin/perl
2
3 ##############################################
4 ##############################################
5 # SCRIPT NAME: msatfinder.cgi
6 # FUNCTION: act as a wrapper to msatfinder
7 ##############################################
8
9
10 # MINE: Molecular INformation Explorer
11
12 BEGIN {
13 unshift(@INC, qw(/var/apache/cgi-bin/msatfinder /nerc/packages/perl/lib/5.9.2 /nerc/packages/perl/lib/5.9.2/sun4-solaris));
14 };
15
16 use strict;
17 use CGI;
18 use Mine;
19 use CGI qw/:standard :html3/;
20 use CGI::Carp qw(fatalsToBrowser set_message);
21 use Config::Simple;
22 use Bio::SeqIO;
23 use Mail::Mailer;
24 $CGI::POST_MAX = 100000000;
25 $SIG{__DIE__} = sub { print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script> <img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_error.png\" border=\"0\">\n<br><b>Msatfinder failed to run!</b> <p>Please click <a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#errors\" target=\"_blank\">here</a> for more information.<br><br>"};
26
27 #################
28 # START EACH MINE CGI SCRIPT
29 #################
30 # this redirects the error messages to the user's screen
31 # and is useful for debugging CGI scripts!
32 open (STDERR, ">&STDOUT"); # print errors to screen
33 $| = 1; # flush the print buffer continuously
34 # make a new query object using CGI.pm module
35 my $query = new CGI;
36
37 # print the required header and start the web page
38 print $query->header;
39
40 my $ip = $query->remote_addr();
41 my $tarip = $ip;
42 $tarip =~ s/\./_/g;
43
44 # some example data to be left on the form by default
45 # in case users want to test it. N.B. the clear button
46 # does not work if this is used!
47 #my $sample = "gtttacatctatcaataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataataatttagatctacagatcgtcatggcgtcaaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacaataacttgatcctatgcgtacagcatagacaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagaataacaagtggttagatcgatcgatcgatcgatcgatcgatcgatcgatcgatcgatcgatc";
48 my $sample = "";
49
50 # check seq is not too long
51 my $content_length = defined $ENV{'CONTENT_LENGTH'} ? $ENV{'CONTENT_LENGTH'} : 0;
52 if ( ($CGI::POST_MAX > 0) && ($content_length > $CGI::POST_MAX) )
53 {
54 # remove please wait
55 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
56 # print error picture
57 print "<img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_error.png\" border=\"0\">\n";
58 print "<p><b>Your sequence exceeds 100 MB (megabytes) in length. Please use a shorter sequence, or download msatfinder to use on your own system.</p></b>\n";
59 print "<p><b>Length supplied:</b> $content_length.</p>\n";
60 print "<p><font size=\"+1\"><a href=\"http://www.bioinf.ceh.ac.uk/cgi-bin/msatfinder/msatfinder.cgi\">BACK</a></font><br>\n";
61 exit 0;
62 }
63
64
65 # read the mine.config file to get the location of the stylesheet
66 my $datestring = `date +%j%H%M%S`;
67 chomp($datestring);
68 my $time = `date`;
69 chomp $time;
70 my $cfg = new Config::Simple('mine.config');
71 my $stylesheet = $cfg->param('stylesheet');
72 my $baseURL = $cfg->param('baseURL');
73 my $absPATH = $cfg->param('absPATH');
74 my $tempdir = $absPATH; # to allow on-line viewing
75 my $execdir = "/usr/local/bin/";
76 my $eprimer3 = $cfg->param('eprimer3');
77 my $eprimer_args = $cfg->param('eprimer_args');
78 my $p3_core = $cfg->param('primer3_core_path');
79 print $query->start_html(-title=>'Msatfinder on-line - microsatellite detection',
80 -style=>{-src=>$stylesheet}
81 );
82 #-script=>$JSCRIPT
83 my $file;
84 # button labels for msat type
85 my @s_names = ("mono","di","tri","tetra","penta","hexa");
86 my %s_labels = ("mono" => "mono",
87 "di" => "di",
88 "tri" => "tri",
89 "tetra" => "tetra",
90 "penta" => "penta",
91 "hexa" => "hexa");
92 my %s_values = ("mono" => 1,
93 "di" => 2,
94 "tri" => 3,
95 "tetra" => 4,
96 "penta" => 5,
97 "hexa" => 6);
98
99 # button labels for download type
100 my @d_names = ("zip", "tar");
101 my %d_labels = ("zip" => "a zip file","tar" => "a tar.gz file");
102
103 # file format
104 my @f_names = ("tab","gff","vertical");
105 my %f_labels = ("tab" => "tab-delimited",
106 "gff" => "gff3", "vertical"=>"vertical-line delimited");
107
108 # labels for interrupt type
109 my @i_names = ("yes");
110 my %i_labels = ("yes" => "");
111
112 # labels for randomising
113 #my @r_names = ("yes");
114 my @r_names = ("0", "1", "5", "10", "50","100");
115 my %r_labels = ("yes" => "");
116
117 # labels for calculating Markov thresh
118 my @ml_names = ("1","2","3","4","5");
119
120 # labels for calculating Markov thresh
121 my @p_names = ("yes");
122 my %p_labels = ("yes" => "");
123
124 # labels for using Markov thresh
125 my @puse_names = ("yes");
126 my %puse_labels = ("yes" => "");
127
128 # labels for calculating poly thresh
129 my @poly_names = ("yes");
130 my %poly_labels = ("yes" => "");
131
132 # labels for using poly thresh
133 my @poluse_names = ("yes");
134 my %poluse_labels = ("yes" => "");
135
136 # labels for advanced options
137 my @a_names = ("sumswitch", "fasta","primers","atab","btab","mine");
138 my %a_labels =("primers" => "primer files",
139 "atab" => "feature tables for artemis",
140 "btab" => "extended feature tables artemis with flanking regions",
141 "mine" => "MINE files for each repeats for setting up a MINE website",
142 "fasta" => "FASTA files of each repeat with flanking sequences",
143 "sumswitch" => "summary files of the search");
144
145 # labels for engines
146 my @e_names = ("1", "2", "3", "4","5","6");
147 my %e_labels = ("1" => "regex",
148 "2" => "multipass",
149 "3" => "iterative",
150 "4" => "sputnik",
151 "5" => "dust",
152 "6" => "seg"
153 );
154
155 # labels for viewing artemis
156 my @art_names = ("yes");
157 my %art_labels = ("yes" => "");
158
159 # check value of $action
160 my $action = $query->param('action');
161 # &nbsp;Sputnik courtesy <a href=\"http://espressosoftware.com/pages/sputnik.jsp\">Chris Abajian</a>",
162
163 #######
164 # START THE WEBPAGE
165 #######
166 # print the MINE menu
167 &Mine::finder_header;
168
169
170 print "
171 <br><a href=\"http://www.bioinf.ceh.ac.uk/msatfinder/testseq.txt\" target=\"_blank\">Sample fasta file</a></span>&nbsp;|&nbsp;\n
172 <span class=\"not_entered\"><a href=\"http://www.bioinf.ceh.ac.uk/msatfinder/sample/\" target=\"_blank\">Sample output</a>&nbsp;|&nbsp;";
173 print "<a href=\"http://www.genomics.ceh.ac.uk/cgi-bin/msatfinder/msatfinder_v_1_0.cgi/\">Msatfinder v.1.0</a></span><br>";
174 #print "<p><span class=\"not_entered\">Click <a href=\"http://www.bioinf.ceh.ac.uk/msatfinder/\">here</a> to return to the home page.</span>\n";
175 print "<br>\n";
176 print "\n<script language=\'javascript\'>
177 function checkMe(remove)
178 {
179 if (document.myform.markov.checked==false && document.myform.markovuse.checked==true)
180 {
181 document.myform.markov.checked=true;
182 }
183 if (document.myform.poly.checked==false && document.myform.polyuse.checked==true)
184 {
185 document.myform.poly.checked=true;
186 }
187 if (document.myform.markovuse.checked==true)
188 {
189 document.myform.markov.checked=true;
190 }
191 if (document.myform.polyuse.checked==true)
192 {
193 document.myform.poly.checked=true;
194 }
195 if (document.myform.polyuse.checked==true && document.myform.markovuse.checked==true)
196 {
197 document.myform.polyuse.checked=false;
198 document.myform.markovuse.checked=false;
199 alert(\"You can't implement both poly and markov thresholds. Please just choose one\");
200 }
201 }
202 </script>\n";
203 print "<table VALIGN=TOP HALIGN=LEFT BORDER=3 CELLSPACING=5 CELLPADDING=5 COLS=1 WIDTH=600 BGCOLOR=white class=\"text\">\n";
204 print "<tr><td VALIGN=TOP>";
205 #my $JSCRIPT=<<END;
206 #END
207 #document.myform.markov.checked = true;
208
209 ############
210 # START FORM
211 ############
212 # actual form
213 print $query->startform(-enctype=>'multipart/form-data',
214 -method=>'post',
215 -name=>'myform');
216
217 my $agent = $query->user_agent();
218 print "<p><b>Click on the help icons alongside each option for more information.</b><p>";
219
220
221
222 # subtable start here
223 print "<table VALIGN=TOP HALIGN=LEFT BORDER=0 CELLSPACING=5 CELLPADDING=5 COLS=1 WIDTH=300 BGCOLOR=white class=\"text\">\n";
224 print "<tr><td VALIGN=TOP>\n";
225
226 # msat motifs
227 print "<p><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#motif\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;<b>Choose which microsatellite motifs to search for: </b><br>", $query->checkbox_group(-name=>'motifs',
228 -values=>[@s_names],
229 -linebreak=>'true',
230 -defaults=>[@s_names],
231 -labels=>\%s_labels);
232 print "</td><td VALIGN=TOP>\n";
233 #<a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#threshold\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>
234
235 # thresholds
236 print "<p>
237 &nbsp;<b>Set thresholds (minimum 3 repeat units):</b><br>";
238 print $query->textfield(-name=>'mono_t', -default=>"12", -maxlength=>'2', -size=>'2'), " mono<br>\n";
239 print $query->textfield(-name=>'di_t', -default=>"5", -maxlength=>'2', -size=>'2'), " di<br>\n";
240 print $query->textfield(-name=>'tri_t', -default=>"5", -maxlength=>'2', -size=>'2'), " tri<br>\n";
241 print $query->textfield(-name=>'tetra_t', -default=>"5", -maxlength=>'2', -size=>'2'), " tetra<br>\n";
242 print $query->textfield(-name=>'penta_t', -default=>"5", -maxlength=>'2', -size=>'2'), " penta<br>\n";
243 print $query->textfield(-name=>'hexa_t', -default=>"5", -maxlength=>'2', -size=>'2'), " hexa \n";
244 # subtable end here
245
246 print "</td></tr></table>\n";
247
248 # downloads
249 my $dldef;
250 if ($agent =~ /MSIE/ or $agent =~ /Windows/)
251 {
252 $dldef = "zip";
253 }
254 else
255 {
256 $dldef = "tar";
257 }
258
259
260 # advanced options
261 print "<P><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#advanced\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;";
262 print $query->submit('action', 'Advanced options +');
263 print "<p>";
264
265 if ($action =~ "Advanced options") {
266
267 # select engine.
268 print "<b>Select the engine to be used:</b><br>\n";
269 print $query->radio_group(-name=>'engine',
270 -values=>[@e_names],
271 -linebreak=>'true',
272 -default=>"1",
273 -labels=>\%e_labels);
274 # select a box here to upload the file instead
275 print "<b>&nbsp;seg values:</b><br>";
276 print "Window size: ";
277 print $query->textfield(-name=>'segwindow',
278 -default=>'12',
279 -size=>2,
280 -maxlength=>10);
281 print "Locut: ";
282 print $query->textfield(-name=>'seglocut',
283 -default=>'2.2',
284 -size=>2,
285 -maxlength=>10);
286 print "Highcut: ";
287 print $query->textfield(-name=>'seghighcut',
288 -default=>'2.5',
289 -size=>2,
290 -maxlength=>10);
291 print "<br>\n";
292 print "<p>Sputnik courtesy of <a href=\"http://espressosoftware.com/pages/sputnik.jsp\">Chris Abajian</a></p>";
293 print "<p>This option selects the method used to search for microsatellites. The default setting is suitable for most purposes &mdash; please read the manual before trying another.";
294 print "<hr>";
295 print "<table VALIGN=TOP HALIGN=LEFT BGCOLOR=white class=\"text\">\n<tr><td>";
296 # click to detect interrupted msats
297 print "
298 <span class=\"not_entered\">
299
300 <a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#interrupts\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a></td><td><b>&nbsp;Search for interrupted microsatellites </b>&nbsp;</td><td>", $query->checkbox_group(-name=>'interrupts',
301 -values=>[@i_names],
302 -linebreak=>'true',
303 -defaults=>[],
304 -labels=>\%i_labels);
305 print "</td></tr><tr><td>\n";
306
307 # click to randomise your file
308 #print "<a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#random\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a></td><td><b>&nbsp;Randomise the sequence&nbsp;</b></td><td>", $query->checkbox_group(-name=>'random',
309 # -values=>[@r_names],
310 # -linebreak=>'true',
311 # -defaults=>[],
312 # -labels=>\%r_labels);
313 print "<a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#random\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a></td><td><b>&nbsp;Number of times to randomise sequence&nbsp;</b></td><td>", $query->popup_menu(-name=>'random',
314 -values=>[@r_names],
315 -default=>[],
316 -label=>'times');
317 print "</td></tr><tr><td>\n";
318
319 # choose markov chain length
320 print "<a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#markov\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a><br></td><td><b>&nbsp;Choose Markov chain length&nbsp;</b></td><td>", $query->popup_menu(-name=>'markovlength',
321 -values=>[@ml_names],
322 -default=>'5');
323 print "</td></tr><tr><td>\n";
324
325 # click to pimp your file
326 print "</td><td><b>&nbsp;Calculate Markov thresholds</b></td><td>", $query->checkbox(-name=>'markov',
327 -value=>'yes',
328 -onClick=>'javascript:checkMe(markovuse)',
329 -checked=>'',
330 -label=>''
331 );
332 #print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#markov\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Calculate Markov thresholds</b></td><td>", $query->checkbox_group(-name=>'markov',
333 # -values=>[@p_names],
334 # -linebreak=>'true',
335 # -onClick=>'javascript:checkMe(markovuse)',
336 # -defaults=>[],
337 # -labels=>\%p_labels);
338 print "</td></tr><tr><td>\n";
339
340 # click to pimp your file
341 print "</td><td><b>&nbsp;Implement Markov thresholds</b></td><td>", $query->checkbox(-name=>'markovuse',
342 -value=>'yes',
343 -label=>'',
344 -onClick=>'checkMe(markov)'
345 );
346 #print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#markovuse\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Implement Markov thresholds</b></td><td>", $query->checkbox_group(-name=>'markovuse',
347 # -values=>[@puse_names],
348 # -linebreak=>'true',
349 # -defaults=>[],
350 # -onClick=>'checkMe(markov)',
351 # -labels=>\%puse_labels);
352 print "</td></tr><tr><td>\n";
353
354 # click to get poly thresholds
355 print "<a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#poly\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a></td><td><b>&nbsp;Calculate Poly thresholds</b>
356 </td><td>", $query->checkbox(-name=>'poly',
357 -value=>'yes',
358 -onClick=>'javascript:checkMe(poly)',
359 -label=>'');
360 #print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#markovuse\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Calculate Poly thresholds</b>
361 #</td><td>", $query->checkbox_group(-name=>'poly',
362 # -values=>[@poly_names],
363 # -linebreak=>'true',
364 # -defaults=>[],
365 # -labels=>\%poly_labels);
366 print "</td></tr><tr><td>\n";
367
368 # click to implement poly thresholds
369 print "</td><td><b>&nbsp;Implement Poly thresholds</b>
370 </td><td>", $query->checkbox(-name=>'polyuse',
371 -value=>'yes',
372 -onClick=>'checkMe(poly)',
373 -label=>'');
374 print "</td></tr>";
375 print "<tr><td></td><td>&nbsp;&nbsp;&nbsp;Poly courtesy of <a href=\"http://www.biomedcentral.com/1471-2105/4/22\">Jeff Bizzaro</a></td></tr></span></table>";
376
377
378 print "<hr>By default all of the following files are generated. If you are searching with very low thresholds, or expect to find a very large number of repeats you may want to turn off all but the summary file. Turn the MINE files on only if you plan to set up an online MINE database";
379 print "<p><b>Select output files to be generated: </b><br>", $query->checkbox_group(-name=>'advanced',
380 -values=>[@a_names],
381 -linebreak=>'true',
382 -defaults=>[("sumswitch", "fasta","atab","btab")],
383 -labels=>\%a_labels);
384 print "<br>\n";
385
386 print "<p>The flanking regions to either side of a repeat can be stored in the files above (fasta, extended feature table, and MINE) for blasting and other uses.<P>";
387
388 # flanks
389 print "<b>Enter the required flank size in bp:</b> ",$query->textfield(-name=>'flank_size',
390 -default=>"300",
391 -maxlength=>'4',
392 -size=>'4'),"<br>\n";
393 print "<br>\n";
394
395 # product size
396 print "<b>Optimum PCR product size (0 = no optimum):</b> ",$query->textfield(-name=>'prod_size',
397 -default=>"0",
398 -maxlength=>'4',
399 -size=>'4'),"<br>\n";
400 print "<br>\n";
401
402
403
404 } # end if ($action =~ "advanced")
405
406
407
408 # table row
409 print "</td><td VALIGN=TOP>";
410
411 my $hidden;
412 $hidden = $query->param(-name=>'hidden_file');
413
414
415 # select a box here to upload the file instead
416 print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#upload\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Select a sequence file</b>";
417 print $query->filefield(-name=>'uploaded_file',
418 -default=>$hidden,
419 -size=>30,
420 -maxlength=>80);
421 print "<br>\n";
422 #'starting value',
423
424 # sequence paste
425 print "<br>\n";
426 print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#paste\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;...or paste a sequence</b><P>";
427 print $query->textarea(-name=>'paste',
428 -default=>"$sample",
429 -rows=>5,
430 -columns=>30);
431
432 print "<font size=\"-2\"><p><b>Accepted formats:</b><br></font>";
433 print "<font size=\"-2\">GenBank, EMBL, Swissprot, FASTA, ASCII.<br><br></font><hr>";
434 #print "<font size=\"-2\"><b>Sequences are limited to 10Mb.</b><br><hr><br></font>";
435
436 print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#download\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Compress results into: </b><br>", $query->radio_group(-name=>'download',
437 -values=>[@d_names],
438 # -linebreak=>'true',
439 -default=>"$dldef",
440 -labels=>\%d_labels);
441 print "<p>\n";
442
443 # download as tab or gff3?
444 #print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#format\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Output file format: </b><br>", $query->radio_group(-name=>'format',
445 # -values=>[@f_names],
446 # -linebreak=>'true',
447 # -default=>"vertical",
448 # -labels=>\%f_labels);
449 #print "<p>\n";
450
451 # whether or not you want Artemis output
452 #print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#artemis\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>View results using artemis</b></td><td>", $query->checkbox_group(-name=>'viewartemis',
453 # -values=>[@art_names],
454 # -linebreak=>'true',
455 # -defaults=>[],
456 # -labels=>\%art_labels);
457
458 # select a box here to upload the file instead
459 print "<b><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#upload\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;Email:</b><br>";
460 print $query->textfield(-name=>'email',
461 -default=>'',
462 -size=>30,
463 -maxlength=>100);
464 print "<br>Required if file is larger than 7 megabytes\n";
465
466 # print some buttons
467 #print "<p><b>Click here to search:</b> ";
468 print "<br><br><center>";
469 if ($file)
470 {
471 print "got $file<br>";
472 print $query->hidden( -name => 'hidden_file',
473 -value => $file,
474 -override=>1);
475 }
476 print $query->submit('action', 'SEARCH');
477 print "&nbsp;&nbsp;";
478
479 # print a 'clear' button at end of form: script self-calls
480 #print "<p><b>Clear the form:</b> ";
481 print "&nbsp;&nbsp;";
482 print $query->defaults('Clear');
483 print "</center>";
484 print $query->endform();
485 #################
486 # END FORM
487 #################
488
489 #################
490 # PROCESS FORM
491 #################
492 # get the values
493 my @action = $query->param('action');
494 my $paste = $query->param('paste');
495
496 ################ IF Primers ################
497 # select primers and show results
498 if ($action =~ /SEARCH/)
499 {
500 my $sillythresh = 0; # silliness check...
501 print "</td><td VALIGN=TOP>";
502 print "<p><b id=\"PLEASEWAIT\">Running msatfinder. Please wait...<br><br><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_searching.gif\"></b></p>\n";
503
504 # set flag for running
505 my $run = 0;
506
507
508 # set thresholds
509 my @thresholds;
510 $thresholds[0] = $query->param('mono_t');
511 $thresholds[1] = $query->param('di_t');
512 $thresholds[2] = $query->param('tri_t');
513 $thresholds[3] = $query->param('tetra_t');
514 $thresholds[4] = $query->param('penta_t');
515 $thresholds[5] = $query->param('hexa_t');
516
517 # check no silly thresholds are supplied
518 foreach my $thresh (@thresholds)
519 {
520 if (int($thresh) != $thresh or $thresh < 3)
521 {
522 # remove wait message
523 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
524 # print error picture
525 unless ($sillythresh == 1)
526 {
527 print "<img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_error.png\" border=\"0\">\n";
528 }
529 print "<p><b>The threshold values must be integers of 3 or greater. </b>\n";
530 $sillythresh = 1;
531 }
532 }
533
534 # advanced options
535 my $flank_size;
536 my $prod_size;
537 my $primers = 0;
538 my $artemis = 0;
539 my $mine = 0;
540 my $fasta = 0;
541 my $sumswitch = 0;
542 my @adv_select = $query->param(-name=>'advanced');
543 if (@adv_select)
544 {
545 my @adv_select_sorted = sort { $a cmp $b } @adv_select;
546 foreach my $adv (@adv_select_sorted)
547 {
548 if ($adv eq 'primers') { $primers = 1; }
549 if ($adv eq 'mine') { $mine = 1; }
550 if ($adv eq 'fasta') { $fasta = 1; }
551 if ($adv eq 'sumswitch') { $sumswitch = 1; }
552 if ($adv eq 'atab') { $artemis = 1; }
553 if ($adv eq 'btab') { $artemis = 2; }
554 }
555 $flank_size = $query->param(-name=>'flank_size');
556 $prod_size = $query->param(-name=>'prod_size');
557 }
558 else
559 {
560 $primers = 1;
561 $fasta = 1;
562 $sumswitch = 1;
563 $artemis = 2;
564 $mine = 0;
565 $flank_size = 300;
566 }
567 $prod_size = 0 unless (defined $prod_size);
568
569 # check no silly flank sizes are supplied
570 if (int($flank_size) != $flank_size or $flank_size =~ /^\D+$/)
571 {
572 # remove wait message
573 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
574 # print error picture
575 print "<img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_error.png\" border=\"0\">\n";
576 print "<p><b>The flank size must be an integer, or 0.</b>\n";
577 $sillythresh = 1;
578 }
579 # print error message
580 if ($sillythresh == 1)
581 {
582 print "<p><b>Please try again.</b>\n";
583 }
584
585 # msats to search for
586 my @mranges;
587 my @selected = $query->param(-name=>"motifs");
588 foreach my $thing (@selected)
589 {
590 my $thing = $s_values{$thing} . "," . $thresholds[$s_values{$thing}-1];
591 push(@mranges, $thing);
592 }
593 my $mrange = join("|", @mranges);
594
595 # randomise the sequence first?
596 my $random = $query->param(-name=>'random');
597
598 # get markov length?
599 my $markovlength = $query->param(-name=>'markovlength');
600
601 # get pimp results?
602 my $pimp = $query->param(-name=>'markov');
603
604 # get pimpuse results?
605 my $pimpuse = $query->param(-name=>'markovuse');
606
607 # get poly thresholds
608 my $poly = $query->param(-name=>'poly');
609
610 # use poly thresholds
611 my $polyuse = $query->param(-name=>'polyuse');
612
613 # get seg results?
614 my $segwindow = $query->param(-name=>'segwindow');
615 my $seglocut = $query->param(-name=>'seglocut');
616 my $seghighcut = $query->param(-name=>'seghighcut');
617
618 # get artemis
619 my $viewartemis = $query->param(-name=>'viewartemis');
620
621 # get email
622 my $email = $query->param(-name=>'email');
623
624 # what style to download in?
625 my $dl = $query->param(-name=>'download');
626
627 # what style of file format?
628 # my $format = $query->param(-name=>'format');
629
630
631 # check for interrupts
632 my $int = $query->param(-name=>'interrupts');
633
634
635 # a handy config file to be created so that the script
636 # will run, as msatfinder needs one of these in $PWD
637 my $config = <<EOF;
638 [COMMON]
639 debug = 0
640 flank_size = $flank_size
641 mine_dir = "MINE/"
642 repeat_dir = "Repeats/"
643 tab_dir = "Msat_tabs/"
644 bigtab_dir = "Flank_tabs/"
645 fasta_dir = "Fasta/"
646 prime_dir = "Primers/"
647 align_dir = "Aligner/"
648 anno_dir = "Annotations/"
649 count_dir = "Counts/"
650 gff_dir = "GFF/"
651 [DEPENDENCIES]
652 run_eprimer = $primers
653 eprimer_args = "$eprimer_args -productosize $prod_size"
654 eprimer = "$eprimer3"
655 primer3core = "$p3_core"
656 [FINDER]
657 override = 0
658 motif_threshold="$mrange"
659 artemis = $artemis
660 mine = $mine
661 fastafile = $fasta
662 sumswitch = $sumswitch
663 screendump = 0
664 [CONTACT]
665 ip = $tarip
666 download = $dl
667 viewart = $viewartemis
668 markovlength = $markovlength
669 segwindow = $segwindow
670 seglocut = $seglocut
671 seghighcut= $seghighcut
672 poly = $poly
673 polyuse = $polyuse
674 random = $random
675 EOF
676
677 # run msatfinder
678 $ENV{'PATH'} = "/usr/bin:/usr/sbin:$p3_core:" . $ENV{'PATH'};
679 $ENV{'LD_LIBRARY_PATH'} = "/nerc/packages/gcc/3.1.1/lib:/nerc/packages/gcc/3.1.1/lib/sparcv9:" . $ENV{'LD_LIBRARY_PATH'};
680 #$ENV{'LD_PRELOAD'} = "/nerc/packages/gcc/3.1.1/lib:/nerc/packages/gcc/3.1.1/lib/sparcv9:" . $ENV{'LD_PRELOAD'};
681 $ENV{'SETUP'} = "gcc/current";
682 my $outdir = "$tempdir/msatfinder.$datestring";
683 umask(0000);
684 system("mkdir $outdir") == 0 or die "Could not mkdir $outdir: $!, stopped";
685 open (CONF,">$outdir/msatfinder.rc") or die "Could not create config file in $outdir: $!, stopped";
686 print CONF $config;
687 close CONF;
688 my $outfile = "$outdir/msatfinder.infile";
689
690 # engine to be used
691 my $engine = $query->param(-name=>'engine') || 1;
692
693 ###########################
694 # pasted or uploaded file #
695 ###########################
696 if ($paste)
697 {
698 $run = &checkseq($paste,$outfile,$sillythresh);
699 }
700 else # uploaded
701 {
702 $file = $query->upload("uploaded_file");
703 my $data;
704 while (<$file>)
705 {
706 $data .= $_;
707 }
708 $run = &checkseq($data,$outfile);
709 }
710
711 ##################################
712 # if sequence is OK, get running #
713 ##################################
714 if ($run == 1 and $sillythresh == 0)
715 {
716 # run the code
717 # on normal systems
718 my $random_input;
719 if ($random != 0)
720 {
721 $random_input = "-r";
722 }
723 my $pimp_input=0;
724 if ($pimp)
725 {
726 $pimp_input = 1;
727 }
728 if ($pimpuse)
729 {
730 $pimp_input = 2;
731 }
732 my $email_input;
733 if ($email)
734 {
735 $email_input = "-g $email";
736 }
737 if ($content_length > 7000000)
738 {
739 if ($email)
740 {
741 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
742 print "<p><b>This job will take some time, I'm afraid. Once it's complete, we'll email you a link to the results. Cheers.</b></p>";
743 open (LOG, ">>$absPATH/msatfinder.log");
744 print LOG "$ip\t$time\n";
745 close LOG;
746
747 if ($int eq "yes")
748 {
749 #system("cd $outdir; $execdir/msatfinder_24Jan06 -e $engine -i $random_input -p $pimp_input $email_input $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
750
751 #this is just for testing
752 #system("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder_24Jan06 $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
753 exec("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
754 exit;
755 }
756 else
757 {
758 #print "running msatfinder ...<br>";
759 chop $execdir;
760 #print "$execdir/msatfinder $random_input $pimp_input -e $engine $outfile<br>";
761 #system("cd $outdir; $execdir/msatfinder_24Jan06 -e $engine $random_input -p $pimp_input $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
762
763 #this is just for testing
764 #print "system(\"cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder_24Jan06 $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1\") == 0 or die \"Could not run msatfinder: $!, stopped";
765 #system("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder_24Jan06 $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
766 exec("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
767 exit;
768 #$execdir.="/";
769 }
770 }
771
772
773 else #need email, but don't have it
774 {
775 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
776 print "<p><b>Sorry, this is a big job, so we'll have to email you the results. However, you haven't entered an email address.</b></p>";
777 }
778 }
779
780 else #don't need email, will return results to user
781 {
782 if ($int eq "yes")
783 {
784 #system("cd $outdir; $execdir/msatfinder_24Jan06 -e $engine -i $random_input -p $pimp_input $email_input $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
785
786 #this is just for testing
787 system("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
788 }
789 else
790 {
791 #print "running msatfinder ...<br>";
792 chop $execdir;
793 #print "$execdir/msatfinder_24Jan06 $random_input $pimp_input -e $engine $outfile<br>";
794 #system("cd $outdir; $execdir/msatfinder_24Jan06 -e $engine $random_input -p $pimp_input $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
795
796 #this is just for testing
797 # print "system(\"cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder_24Jan06 $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1\") == 0 or die \"Could not run msatfinder: $!, stopped";
798 #testing!
799 system("cd $outdir; /usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder $random_input -p $pimp_input $email_input -e $engine $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
800 #system ("/usr/local/bioinf/msatfinder/msatfinder-2.0.8/msatfinder $outfile > /dev/null 2>&1") == 0 or die "Could not run msatfinder: $!, stopped";
801 $execdir.="/";
802 }
803
804 #all this stuff has to be done by msatfinder if email
805 system("chmod -R a+r $outdir") == 0 or die "Could not cd to output dir: $!, stopped";
806 # tar/zip up the output
807 my $suffix;
808 if ($dl eq "zip")
809 {
810 $suffix = "zip";
811 system("cd $tempdir; zip -r $absPATH/$tarip.$datestring.$suffix msatfinder.$datestring > /dev/null 2>&1") == 0 or die; # "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>Can't zip files: $!";
812 }
813 elsif ($dl eq "tar")
814 {
815 $suffix = "tar.gz";
816 system("cd $tempdir; tar cvf $absPATH/$tarip.$datestring.tar msatfinder.$datestring > /dev/null 2>&1; gzip $tarip.$datestring.tar") == 0 or die "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>Can't tar files: $!";
817 }
818 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>"; # remove wait message
819 open (SUM,"<$outdir/Repeats/msatfinder.index") or die; # "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>Aieeee, $! !";
820 print "<p><a href=\"http://www.genomics.ceh.ac.uk/msatfinder/msatfinder_manual.html#output\" target=\"_blank\"><img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_help.png\" border=\"0\"></a>&nbsp;<b>Results:</b>\n";
821 print "<p><a href=\"$baseURL/msatfinder.$datestring/results.html\" target=\"_blank\">View</a>. <br><a href=\"$baseURL/$tarip.$datestring.$suffix\">Download</a>. <br> All files will be deleted off our servers after 36 hours.</p>";
822 # show results on line (delete after two hours)
823 print "<p><b>Summary of results</b></p>";
824 print "<pre>";
825 print "<pre>";
826 my @things = <SUM>;
827 foreach my $stuff (@things) { print $stuff; }
828 print "</pre>";
829 close SUM;
830 # show downloadable archive (delete after two hours)
831 # print "<p>You can also view the generated tab files using <a href=\"$baseURL/jnlp.php?ID=$datestring\">Artemis</a> - NB Not working yet when multiple sequences are searched</p>";
832 #system("rm -rf $outdir");
833
834
835 open (LOG, ">>$absPATH/msatfinder.log");
836 print LOG "$ip\t$time\n";
837 close LOG;
838 }
839 }
840 } # if $action = search
841
842 #################
843 # END PROCESS FORM
844 #################
845
846 # PRINT BOTTOM OF EACH WEB PAGE
847 # if $show_source is set to 1 show a link
848 # at the bottom of each script to the source
849 # code - pass the name of this script to the
850 # function in CGI-MINE.pl
851 #if ($show_source)
852 #{
853 # $script_name = $query->script_name();
854 # &source ($script_name);
855 #}
856
857 # end table
858 print "</td></tr>";
859 print "</table>";
860
861 # attach the MINE copywrite
862 #&Mine::mine_cp;
863 #################
864 # END WEBPAGE
865 #################
866
867 ###############
868 # subroutines #
869 ###############
870 sub checkseq # check seq is of correct type
871 {
872 my $seq = shift;
873 my $outfile = shift;
874 my $sillythresh = shift;
875 my $run = 0;
876 open (OUT, ">$outfile") or "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>die can't open $outfile";
877 if ($seq =~ /^>/)
878 {
879 print OUT $seq;
880 $run = 1;
881 close OUT;
882 }
883 elsif ($seq =~ /^\s*LOCUS/)
884 {
885 print OUT $seq;
886 $run = 1;
887 close OUT;
888 my $filein = Bio::SeqIO->new(-file => "$outfile",
889 -format => 'genbank');
890 # add together total sequence length to check it's
891 # within the appropriate limits
892 my $tempseq;
893 while (my $stuff = $filein->next_seq())
894 {
895 $tempseq .= $stuff->seq();
896 }
897 $seq = $tempseq;
898 }
899 elsif ($seq =~ /^\s*ID/)
900 {
901 print OUT $seq;
902 $run = 1;
903 close OUT;
904 }
905 elsif ($seq =~ /^[A-Za-z]+/)
906 {
907 print OUT $seq;
908 $run = 1;
909 close OUT;
910 }
911 else
912 {
913 $run = 0;
914 # remove wait message
915 print "<script>n=document.getElementById(\"PLEASEWAIT\"); n.parentNode.removeChild(n)</script>";
916 # print error picture
917 unless ($sillythresh == 1)
918 {
919 print "<img src=\"http://www.bioinf.ceh.ac.uk/msatfinder/msatfinder_error.png\" border=\"0\">\n";
920 }
921
922 if ($sillythresh == 0)
923 {
924 print "<p><b>The sequence was invalid. Please try again.</p></b>\n";
925 }
926 else
927 {
928 print "<p><b>In addition, the sequence was invalid.</p></b>\n";
929 }
930 if ($seq eq "")
931 {
932 print "<p><b>In fact, you didn't supply <i>any</i> sequence.</b><br>\n";
933 }
934 else
935 {
936 # print first 500 characters of a dodgy sequence
937 my $shortseq;
938 if (length $seq > 500)
939 {
940 $shortseq = substr($seq,0,500);
941 }
942 else
943 {
944 $shortseq = $seq;
945 }
946 # format sequence nicely for printing...
947 my @seqarray = ();
948 my $pos = 0;
949 while ($pos < length $shortseq)
950 {
951 push(@seqarray,substr($shortseq,$pos,50));
952 $pos += 50;
953 }
954 print "<p>You supplied the sequence:<br>\n";
955 print "<pre>\n";
956 foreach my $output (@seqarray)
957 {
958 print "$output\n";
959 }
960 print "&lt;snipped&gt;" if (length $seq > 500);
961 print "</pre>\n";
962 close ;
963 }
964 }
965 return $run;
966 }