ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/msatfinder/msatdbmaker
Revision: 1.1.1.1 (vendor branch)
Committed: Mon Mar 7 15:34:44 2005 UTC (11 years, 5 months ago) by knirirr
Branch: MAIN
CVS Tags: HEAD, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
First import

Line File contents
1 #!/usr/bin/perl
2
3 ####################################################
4 # SCRIPT_NAME: msatdbmaker #
5 # FUNCTION: loads genomes and repeats files into #
6 # an existing database #
7 ####################################################
8
9 use strict;
10 use warnings;
11 use Cwd;
12 use Config::Simple;
13 use DBI;
14 use File::Basename;
15 use File::Copy;
16 use Getopt::Std;
17 use Term::ANSIColor;
18 use Term::ReadLine;
19 use Msatminer;
20
21 ################
22 # idendify cwd #
23 ################
24 my $cwd = getcwd;
25 my $install = $ENV{MSATMINER_HOME};
26 my $user = $ENV{USER};
27 print "Dbmaker starting in: $cwd\n";
28
29 #####################
30 # usage information #
31 #####################
32 my $usage="
33 Usage:
34
35 msatfinder [options]
36 msatfinder -h
37 msatfinder -a
38 msatfinder -c
39
40 Options:
41
42 -h list these options
43 -a don't pause to ask for user confirmation
44 -c install cgi scripts
45
46 ";
47
48 ####################################
49 # import settings from config file #
50 ####################################
51 my $config = &Msatminer::getconfig($install,$cwd);
52
53 ######################
54 # essential varables #
55 ######################
56 my $cgiloc = $config->{'VIEWER.cgiloc'};
57 my $dbname = $config->{'VIEWER.dbname'};
58 my $dbtype = $config->{'VIEWER.dbtype'};
59 my $dbusername = $config->{'VIEWER.gdbusername'};
60 my $dbusername2 = $config->{'VIEWER.sdbusername'};
61 my $dbpassword = $config->{'VIEWER.gdbpassword'};
62 my $baseURL = $config->{'VIEWER.baseURL'};
63 my $msatview = $config->{'VIEWER.msatview'};
64 my $align_dir = $config->{'COMMON.align_dir'};
65 my $annodir = $config->{'COMMON.anno_dir'};
66 my $mine_dir = $config->{'COMMON.mine_dir'};
67 my $repeat_dir = $config->{'COMMON.repeat_dir'};
68 my $tab_dir = $config->{'COMMON.tab_dir'};
69 my $fasta_dir = $config->{'COMMON.fasta_dir'};
70 my $prime_dir = $config->{'COMMON.prime_dir'};
71
72 # stop pattern matches from breaking
73 # later in the script
74 $mine_dir =~ s/\/$//;
75 $repeat_dir =~ s/\/$//;
76 $tab_dir =~ s/\/$//;
77 $fasta_dir =~ s/\/$//;
78 $prime_dir =~ s/\/$//;
79
80 ###########
81 # getopts #
82 ###########
83 my %opts=();
84 getopts('ach',\%opts);
85 # print help message
86 if ($opts{h})
87 {
88 print $usage;
89 exit;
90 }
91
92 #########################################
93 # let the user know that all is working #
94 #########################################
95 unless ($opts{a})
96 {
97 &Msatminer::areyousure($dbname,$dbtype,$dbusername,$dbusername2,$cgiloc,$msatview);
98 }
99
100 ###################
101 # find some files #
102 ###################
103 my $schema;
104 if (-R "$cwd/schema.sql")
105 {
106 $schema = "$cwd/schema.sql";
107 }
108 elsif (-R "$install/etc/schema.sql")
109 {
110 $schema = "$install/etc/schema.sql";
111 }
112 else
113 {
114 print "Could not find suitable schema to create database. \n";
115 print "Please check that there is a \"schema.sql\" file in $cwd and try again.\n";
116 exit;
117 }
118
119 #####################
120 # load the database #
121 #####################
122 my $dbh;
123
124 # read in schema info
125 my $lcname = lc(basename($cwd));
126 open (SCHEME, "<$schema") or die "Can't open $schema $!";
127 my @scheme = <SCHEME>;
128 close SCHEME;
129
130 # die if in wrong directory
131 unless (-d "$repeat_dir")
132 {
133 print "The required \"$repeat_dir\" directory is not available.\n";
134 print "Please run msatfinder and try again.\n";
135 exit;
136 }
137
138 # data to load
139 my @dirs = ("$mine_dir","$repeat_dir","$tab_dir","$fasta_dir","$prime_dir");
140 my @datafiles = ("$cwd/Repeats/$lcname.genomes","$cwd/Repeats/$lcname.repeats");
141
142 ############
143 # postgres #
144 ############
145 if ($dbtype eq "Pg")
146 {
147 $dbh = DBI->connect("dbi:Pg:host=localhost dbname=$dbname",
148 $dbusername,
149 $dbpassword,
150 {RaiseError=>1,
151 AutoCommit=>1,
152 Taint=>1}) or die "Can't connect: $!";
153
154 # create tables
155 print "Creating tables...\n";
156 &createtables($dbh,\@scheme);
157
158 # load data
159 print "Loading repeat and genome data...\n";
160 foreach my $file (@datafiles)
161 {
162 &filltables($file);
163 }
164
165 # do other files
166 print "Loading data files...\n";
167 foreach my $dir (@dirs)
168 {
169 &loadfiles($dir);
170 }
171 }
172 #########
173 # mysql #
174 #########
175 elsif ($dbtype eq "mysql")
176 {
177 $dbh = DBI->connect("dbi:mysql:$dbname",
178 "$dbusername",
179 "$dbpassword") or die "Can't connect: $!";
180
181 # create tables
182 print "Creating tables...\n";
183 &createtables($dbh,\@scheme);
184
185 # load data
186 print "Loading repeat and genome data...\n";
187 foreach my $file (@datafiles)
188 {
189 &filltables($file);
190 }
191
192 # do other files
193 print "Loading data files...\n";
194 foreach my $dir (@dirs)
195 {
196 &loadfiles($dir);
197 }
198
199 }
200 else
201 {
202 print "No database type was specified, so the database could not be loaded.\n";
203 exit;
204 }
205
206 # so far, so good
207 print "Database loading complete.\n";
208
209 ###############################
210 # begin install of msatviewer #
211 ###############################
212 if ($opts{c})
213 {
214 print "Installing cgi-front end...\n";
215 if (-R "$install/bin/msatviewer.cgi")
216 {
217 copy("$install/bin/msatviewer.cgi","$cgiloc/") or die "Can't copy msatviewer: $!";
218 !system("chmod 755 $cgiloc/msatviewer.cgi") or die "Can't make msatviewer executable: $!";
219 }
220 else
221 {
222 print "Sorry, msatviewer.cgi can't be found, or is not readable. Please check your installation.\n";
223 exit;
224 }
225
226 # copy actions file &c. to right location
227 my @cgistuff = ("$install/etc/related_links.txt",
228 "$install/etc/actions.txt",
229 "$install/lib/Msatminer.pm",
230 "$cwd/msatminer.rc");
231 my $edit = 0;
232 foreach (@cgistuff)
233 {
234 if (-R "$_")
235 {
236 copy ("$_","$cgiloc") or warn "Can't copy $_ to $cgiloc : $!";
237 $edit++;
238 }
239 else
240 {
241 print "$_ could not be copied to $cgiloc. Please check your installation.\n";
242 }
243 }
244 if ($edit > 0)
245 {
246 print <<EDIT;
247 Please note that you will need to edit the following files in $cgiloc,
248 if you'd like to customise the pages and sql queries performed by msatviewer.cgi:
249
250 actions.txt
251 related_links.txt
252
253 The first of these contains the sql queries that msatviewer will run, and the
254 second will allow you to add any extra links to your site that you may need.
255 EDIT
256 }
257
258 ###################################################
259 # set up msataligner files in a suitable location #
260 ###################################################
261 unless ($opts{a})
262 {
263 print "Press <return> to copy the results of msatfinder and msataligner to a web viewable directory,\nor press X <return> to exit.\n";
264 chomp(my $choices = <STDIN>);
265 exit if ($choices =~ /^[Xx]/);
266 }
267 unless (-e $msatview)
268 {
269 !system("mkdir -p $msatview") or die "Can't create $msatview: $!";
270 }
271 !system("chmod 755 $msatview") or warn "Can't set permissions of $msatview $!";
272
273 # copy output from msataligner
274 !system("cp -r $align_dir $msatview") or warn "Can't copy $align_dir to $msatview: $!";
275 copy("$install/etc/quickmineoutput.css","$msatview") or warn "Can't copy stylesheet: $!";
276 !system("chmod 755 $msatview$align_dir") or warn "Can't set permissions of $msatview$align_dir $!";
277 !system("chmod 644 $msatview$align_dir*") or warn "Can't set permissions of $msatview$align_dir contents $!";
278 !system("chmod 644 $msatview$align_dir*\.jar") or warn "Can't set permissions of jar files $!";
279 my @htmlfiles = glob "$cwd/*\.html $cwd/*\.gbk";
280 !system("cp @htmlfiles $msatview") or warn "Can't copy @htmlfiles to $msatview: $!";
281 !system("chmod 644 $msatview*\.html $msatview*\.gbk") or warn "Can't set permissions of html files: $!";
282
283 # copy output from msatannotator
284 if (-e "$annodir")
285 {
286 !system("cp -r $annodir $msatview") or warn "Can't copy $annodir to $msatview: $!";
287 !system("cp annotated.csv $msatview$annodir") or warn "Can't copy annotated.csv to $msatview: $!";
288 !system("chmod -R 755 $msatview$annodir") or warn "Can't set permissions of $msatview$annodir contents $!";
289 }
290
291 # hopefully that worked...
292 print "You should now be able to see your data at: $baseURL" . "msatviewer.cgi\n";
293 print "In case of problems, please check your database and httpd setup, or see your system administrator.\n";
294
295 }
296
297 print "Finished!\n";
298
299 ########################
300 # exciting subroutines #
301 ########################
302 sub createtables
303 {
304 my $dbh = shift;
305 my $lines = shift;
306 my $buffer;
307
308 # source the schema
309 foreach my $buf (@$lines)
310 {
311 chomp($buf);
312 if ($dbtype eq "Pg")
313 {
314 $buf =~ s/if exists//;
315 }
316 $buffer .= $buf;
317 if ($buf =~ /;$/)
318 {
319 $buffer =~ s/;$//;
320 if ($dbtype eq "Pg" and $buf =~ /drop/)
321 {
322 eval {$dbh->do($buffer,{PrintError=>0});}
323 }
324 else
325 {
326 $dbh->do($buffer) or warn $dbh->errstr;
327 }
328 $buffer = "";
329 }
330 }
331 }
332
333 sub loadfiles
334 {
335 my $dir = shift;
336 my $table = lc $dir;
337 opendir(DIR, $dir) or die "Can't read contents of $dir $!";
338 my @files = readdir(DIR);
339 closedir DIR;
340 foreach my $filename (@files)
341 {
342 # don't read symlinks to data files
343 if ($dir eq "$tab_dir") { next unless ($filename =~ /\.tab$/); }
344 if ($dir eq "$repeat_dir")
345 {
346 $table = "repeatfiles";
347 next unless ($filename =~ /\.index$/ or $filename =~ /\.matrix$/);
348 }
349 next if ($filename =~ /^\./);
350
351 # get data
352 open (IN, "$dir/$filename") or die "Can't open $dir/$filename $!";
353 my @data = <IN>;
354 close IN;
355
356 # concatenate the data array
357 my $joineddata;
358 foreach my $dp (@data)
359 {
360 $dp =~ s/^\n//;
361 $joineddata .= $dp;
362 }
363
364 # load into database
365 if ($dir eq "$repeat_dir")
366 {
367 my $sth = $dbh->prepare("insert into $table values (?,?)");
368 $sth->execute($filename,$joineddata);
369 }
370 else
371 {
372 my $genome = [split(/\./, $filename)]->[0];
373 my $sth = $dbh->prepare("insert into $table values (?,?,?)");
374 $sth->execute($filename,$genome,$joineddata);
375 }
376 }
377 }
378
379 sub get_qms
380 {
381 # This is the fiddly bit - generate a string of
382 # question marks, one per column, by reading in the header row.
383 my $file = shift;
384 my $row;
385 open(DATAFILE, "<$file") or die "Can't open $file $!";
386 $. = 0;
387 do { $row = <DATAFILE> } until $. == 1 || eof;
388 chomp;
389 $row =~ s/\*$//;
390 my @fields = split(/\*/, $row);
391 my $qms = "?" x @fields;
392 return join(", ", split("", $qms));
393 }
394
395
396 sub filltables
397 {
398 my $file = shift;
399 my $tablename = [split(/\./, $file)]->[-1];
400 my $qms = get_qms($file);
401 my $sth = $dbh->prepare("insert into $tablename values ($qms)");
402 open(DATAFILE, "<$file") or die "Can't open $file $!";
403 while (my $row = <DATAFILE>)
404 {
405 chomp $row;
406 next if ($row =~ /^[genome|repeat]/);
407 $row =~ s/\*$//;
408 my @inserts = split(/\*/, $row);
409 $sth->execute(@inserts);
410 }
411 }
412
413 ###############
414 # help, help! #
415 ###############
416 sub HELP_MESSAGE
417 {
418 print $usage;
419 exit;
420 }