#!/usr/bin/perl 

# Script to setup the wwwPartiGene package.
#Main function is to create the organism.info file.
#last update 11/01/05 Ralf Schmid
#Author - Alasdair Anthony, University of Edinburgh
#this verison - 0.1 

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#

use warnings;	
use Term::ANSIColor;
use File::Path;	#for rmtree
use File::Copy;
use Cwd; 	#see sub storependingEST
use Term::ReadLine;	#makes user input a bit more friendly
use DBI;
  use DBD::Pg; #for database interaction
use strict;


#declare scalars
my $read_gnu;
my $max_blasts = 3;
my %species;	#holds species name and cluster id info for insertion into html page
my $db_title ;
my @blast_sub_dirs;

#check for readline, use if present.
my $term = new Term::ReadLine 'sample';
$term ->ornaments(0);	#stops prompt getting underlined
my $attribs = $term->Attribs;	
$attribs->{completion_entry_function} =
 $attribs->{filename_completion_function};

if ($term ->ReadLine() =~ /Gnu$/) {	#returns the actual package that executes the commands - we need gnu
	$read_gnu = 1;
}
else {
	print "The readline package Term::ReadLine:Gnu was not found.\nInstalling this package will make this program more friendly.\nContinuing...\n";
	$read_gnu = 0;
}

print colored("\t\t####################################################\n","bold");
print colored("\t\t###                                              ###\n","bold");
print colored("\t\t###       WebPartiGene_setup Version 0.1         ###\n","bold");
print colored("\t\t###                                              ###\n","bold");
print colored("\t\t####################################################\n","bold");

print "Performing system checks...";
my $postmaster=`ps -e|grep postmaster`; ### See if the process is running
if(!$postmaster)  {
    print colored("\n\t#### Postmaster is not running ####\n","red bold");
    print colored("Please ensure that postgreSQL is correctly installed and running\n","red bold");
    exit();
}
else {
	print colored (" Done\n", "green bold");
	print "\n";
}

#get database info from user
my $db_name = &get_input("\nEnter the name of the database you want to webify:\n");
my $db_user = &get_input("\nEnter valid username for this databse:\n");

#get info to prepare web serving directory
my $public_html = &get_input ("\nEnter the directory that your web pages are served from\n(e.g. /home/username/public_html/):\n");
$db_title = &get_input ("\nHow would you like the database to be named on the web pages?\n");
my $dir_flag = 1;
while ($dir_flag) {
	
	print "\nSetting up directories for $db_title...";
	if (-e "$public_html/$db_title") {
		print colored ("Failed", "red");
		print "\n";
		$db_title = &get_input ("A $db_title directory already exists, please enter an alternative name:\n");
	} else {
		mkdir "$public_html/$db_title" or die "Error! could not make $public_html/$db_title\n$!\n";
		mkdir "$public_html/$db_title/blast" or die "Error! could not make $public_html/$db_title/blast\n$!\n";
		print colored ("Done\n", "green bold");
		$dir_flag = 0;
	}
}


#connect to databse
print "Connecting to database...";
my $conn=DBI->connect("dbi:Pg:dbname=$db_name", "$db_user", "", {PrintError => 0}); #Last two values would be user/pass.
if (! $conn)   { ### Couldn't connect to the database  
	print colored("ERROR! \nCouldn't connect to the database!\n","red bold");
	exit();
} else {
print colored ("Done", "green bold");
print "\n";
} 
#collect the cluster ids in databse
my $query = "select clus_id from blast";
my $db_cluster_ids = $conn->prepare("$query");
$db_cluster_ids->execute();
my @clus_ids;	#array to hold all unique three letter cluster id's 
my $cluster_id;
while ($cluster_id = $db_cluster_ids->fetchrow_array){

	$cluster_id =~ /^(\w\w\w)\d\d\d\d\d/;	#extract cluster id portion of cluster name
	my $num = scalar @clus_ids;	
	if ($num > 0) {	#no cluster ids collected yet
		my $flag =0;
		foreach my $id (@clus_ids) {
			if ($id){	#may get a null element??
				if ($1 eq $id) {
					$flag =0;	#id in array so don't add it again
					last;	#this id is already in array so exit foreach loop
				} else {
					$flag = 1;	#id not in array
					#print "$1 and $id don't match, flag = $flag\n";
				}
			}
		}
		if ($flag ==1) {	#id not in array so add it
			push @clus_ids, $1;
		}
	}else {
		push @clus_ids, $1;	#no elements in array so add this id
	}
}

#open/create organism.info

open (ORG_INFO, ">./organism.info") or die "couldn't open ./organism.info!";

foreach my $cid (@clus_ids) {
	print "\n### The cluster identifier $cid was found in the database\n";
	my $species_name =  &get_input("If you want this species to be available on the website enter\nits binomial name, otherwise hit enter: ");
	if ($species_name) {	#user has entered a name for the clus id
		#query database to find out what blasts where done for this species
		my $blast_query = "select distinct db, prog from blast where clus_id like '$cid%'";
		my $db_cluster_ids = $conn->prepare("$blast_query");
		$db_cluster_ids->execute();
		my ($blast_db, $blast_prog, $blast_string );	#blast_string will hold all the blast db/blast prog info
														#until it can be put into organism.info
		$species_name =~ s/\s/_/g;	#swap spaces for _ use g in case there is more than 2 words
		print ORG_INFO "$cid $species_name $db_name $db_user";
		#make species specific sub dir in blast
		mkdir "$public_html/$db_title/blast/$species_name" or die "failed to mkdir $public_html/$db_title/blast/$species_name, $!\n";
		my $counter =0;
		while (($blast_db, $blast_prog) = $db_cluster_ids->fetchrow_array){
			$counter++;
		}
		$blast_string = "";
		if ($counter > $max_blasts) {
			print "$counter Blasts have been found in the database for $species_name. You may choose up to\n";
			print "$max_blasts blasts for displayed in the main cluster window (other blasts\nwill be linked below this).\n";
			my $counter2=0;	#to keep record of how many blasts selected for main window
			$db_cluster_ids->execute();
			while (($blast_db, $blast_prog) = $db_cluster_ids->fetchrow_array){
				if ($counter2 == $max_blasts) {
					print "You have selected $max_blasts blasts - the others will be linked below this\n";
					last;
				}else {
					my $input = &get_input("Include $blast_db $blast_prog in main window?(y/n): ");
					if ($input =~ /^y$/i) {
						#print ORG_INFO " $blast_db $blast_prog";
						$blast_string = $blast_string . " " . $blast_db . " " . $blast_prog;
						$counter2++;
					}
				}
			}
			
		} else {
			$db_cluster_ids->execute();
			my $blast_count =0;	#if the number of blasts is less than the max then "null" needs to be added to make up the length of the string
			while (($blast_db, $blast_prog) = $db_cluster_ids->fetchrow_array){
				$blast_count++;
				$blast_string = $blast_string . " " . $blast_db . " " . $blast_prog;
				#print ORG_INFO " $blast_db $blast_prog";
			}
			while ($blast_count < $max_blasts) {
				$blast_count++;
				$blast_string = $blast_string . " null null";
			}
		}
		
		#now get locations for blast dirs ...
		my $blast_ok = 1;	#flag to allow re-entering of blast dir
		my $blast_dir = &get_input("Enter the location of the blast directory created by PartiGene for\nthis species (e.g. /data/partigene/blast):\n");
		while ($blast_ok == 1) {
			undef @blast_sub_dirs;	#the blast sub directories will be stored here for copying later
			opendir BLASTS, $blast_dir or die "Cannot open $blast_dir: $!";
			print "This directory contains the following individual blast directories:\n";
			foreach my $file (readdir BLASTS) {
				next if $file eq "." or $file eq ".." or $file eq "passed";
				print "$file ";
				push @blast_sub_dirs, $file;
			}
			print "\n";
			my $input = &get_input("Is this correct (y/n)? ");
			if ($input =~ /^n$/i) {
				$blast_dir = &get_input ("Ok, enter a different directory:\n");
			} else {
				$blast_ok =0;
				
			}
		}
		$|=1;	#flush the buffer
		print "Copying BLAST reports to $public_html/$db_title/blast/$species_name...";	
						
		foreach my $dir (@blast_sub_dirs) {
			mkdir "$public_html/$db_title/blast/$species_name/$dir" or die "mkdir failed $public_html/$db_title/blast/$species_name/$dir, $!\n";
			opendir BLASTDIR, "$blast_dir/$dir" or die "couldn't open $blast_dir/$dir, $!\n";
			foreach my $file (readdir BLASTDIR ) {
				next if $file eq "." or $file eq "..";
				#might be better to use soflinks here - some blast dirs might be very big.
				copy ("$blast_dir/$dir/$file", "$public_html/$db_title/blast/$species_name/$dir/$file")
				 or die "Error! Could not copy $blast_dir/$dir/$file to $public_html/$db_title/blast/$species_name/$dir/$file\n$!\n";
			}
		}
		print colored ("Done","green bold");
		print "\n";
		print ORG_INFO " blast";	#apache will interpret this as being where the blast files have just been copied to, honest
		#...and phrap .ace files 
		my $phrap_ok = 1;	#flag to allow re-entering of phrap dir
		my $phrap_dir = &get_input("Enter the location of the phrap directory created by PartiGene for\nthis species (e.g. /data/partigene/phrap):\n");
		while ($phrap_ok == 1) {
			opendir PHRAP, $phrap_dir or die "Cannot open $phrap_dir: $!";
			#print "This directory contains the following individual blast directories:\n";
			foreach my $file (readdir PHRAP) {
				next if $file eq "." or $file eq "..";
				next unless $file =~ /\.ace/;
				$phrap_ok = 0;	#at least one phrap ace file found in this dir, so assume all is well
			}
			if ($phrap_ok ==1) {
				print "No phrap .ace files were found in $phrap_dir\n";
				$blast_dir = &get_input ("To try again, enter a different directory, or to continue type 'continue':\n");
				if ($blast_dir =~ /^continue$/i) {
					$phrap_ok = 0;
				} 
			}
		}
		print ORG_INFO " $phrap_dir";	
		#add blast info to organism.info
		print ORG_INFO "$blast_string";	#no space req in front because space already incl in variable
		#save cluster id and species name for the html file (below)
		$species{$cid} = $species_name;
		print ORG_INFO " $db_title\n";	#allows cgi file to be identified
	}
}


$conn->disconnect or warn "Disconnection failed: $DBI::errstr\n";

#now edit wwwPartiGene.html
print "\n";
my $species_insert = "<!SpeciesGoHere!>";
my $db_name_insert = "<!TitleGoesHere!>";
my $keywords_insert = "<!keywords!>";
open (HTML, "<./wwwPartiGene.temp") or die "couldn't open ./wwwPartiGene.temp!\n$!";	#original template file
open (HTML_NEW, ">./temp.html") or die "couldn't open ./temp.html!\n$!";	#temp file which will have new info in it
while (<HTML>) {	#read in template line by line
	if (m/^\s*$species_insert$/) {
		print HTML_NEW "$species_insert\n";
		foreach my $cid (keys %species) {
		my @keys = keys %species; my @values = values %species;
		#print "keys are @keys\nvalues are:@values\n";
			my $species_name = $species{$cid};
			#print "cid is $cid\nspec name is $species_name\n";
			$species_name =~ s/\_/ /g;	#replace underscore with space
			print HTML_NEW "<OPTION VALUE=\"$cid\">$species_name</OPTION>\n";
		}
	} 
	elsif (m/^\s*$db_name_insert\s*$/) {
		#print HTML_NEW "$db_name_insert\n";
		print HTML_NEW "$db_title\n";
	}
	elsif (m/^\s*$keywords_insert\s*$/) {
		print HTML_NEW "<meta name=\"keywords\" content=\"partigene, EST, $db_title\">\n";
	}
	else {
		print HTML_NEW $_;	#transcribe template line to temp file
	}
}

#now write temp html to final html file name
move ("temp.html", "wwwPartiGene.html") or die $!;

#move all files to the web serving directory
move  ("organism.info", "$public_html/$db_title/") or die "couldn't move organism.info to $public_html/$db_name\n$!";
move  ("wwwPartiGene.html", "$public_html/$db_title/") or die "couldn't move wwwPartiGene.html to $public_html/$db_name\n$!";

#now edit wwwPartiGene_align.cgi
my $org_info_insert = "##organism_info";
open (CGI, "<./wwwPartiGene_align.cgi.temp") or die "couldn't open wwwPartiGene_align.cgi.temp!\n$!";	#original template file
open (CGI_NEW, ">./temp.cgi") or die "couldn't open ./cgi.html!\n$!";	#temp file which will have new info in it
while (<CGI>) {	#read in template line by line
	if (m/^$org_info_insert/) {
		print CGI_NEW "\$organism_info = \"$public_html/$db_title/organism.info\";\n";
	} else {
		print CGI_NEW $_;	#transcribe template line to temp file
	}
}
#now write temp cgi to final cgi file name
move ("temp.cgi", "wwwPartiGene_align_$db_title.cgi") or die $!;

#move all files to the web serving directory
if (-d "$public_html/cgi-bin") {	#check they have a cgi_bin
	move  ("wwwPartiGene_align_$db_title.cgi", "$public_html/cgi-bin/") or die "couldn't move wwwPartiGene_align_$db_title.cgi to $public_html/cgi-bin\n$!";
} else {
	print "$public_html does not contain a cgi-bin directory. This is necessary for web PartiGene.\n";
	my $input = &get_input ("Would you like a cgi-bin directory to be created? (y/n): ");
	if ($input =~ /^y$/i) {
		mkdir "$public_html/cgi-bin/" or die "Error! Couldn't make $public_html/cgi-bin/\n$!\n";
		move  ("wwwPartiGene_align_$db_title.cgi", "$public_html/cgi-bin/") or die "couldn't move wwwPartiGene_align_$db_title.cgi to $public_html/cgi-bin\n$!";
	} else {
		print "The wwwPartiGene_align_$db_title.cgi script has NOT been moved to an appropriate directory.\n";
		print "This will cause loss of function in web PartiGene.\n";
	}
}

#last bit - get some pictures for the website
$| =1;	#flush the buffer
print "If you have an image for the species below, enter its location - otherwise just hit return:\n";
my $front_flag =1;
foreach my $cid (keys %species) {
	my $species_name = $species{$cid};
	$species_name =~ s/\_/ /g;	#replace underscore with space
	my $pic_flag = 1;
	while ($pic_flag) {
		my $pic_loc = &get_input("$species_name - ");
		if ($pic_loc) {
			if (-e $pic_loc) {
				copy ("$pic_loc", "$public_html/$db_title/$cid.jpg") or die "copy failed! $!\n";
				$pic_flag =0;
				if ($front_flag == 1) {
					my $front_pic = &get_input("Use this picture on the front page? (y/n): ");
					if ($front_pic =~ /^y$/i) {	#wpg.jpg is the name given to the pic used on the wwwPg.html page
						copy ("$pic_loc", "$public_html/$db_title/wpg.jpg") or die "copy failed! $!\n";
						$front_flag =0;
					}
				}
			} else {
				print "Error! This file does not exist! try again.\n";
			}
		} else {
			print "OK, a default image will be used\n";
			copy ("./wpg.jpg", "$public_html/$db_title/$cid.jpg") or die "copy failed! $!\n";
			$pic_flag =0;
		}
	}
}
if ($front_flag ==1) {	#no pics selected for front
	copy ("./wpg.jpg", "$public_html/$db_title/wpg.jpg") or die "copy failed! $!\n";
}	

#Finally, move all the php files to the public_html dir
copy ("./wwwPartiGene.php","$public_html/$db_title/") or die "./wwwPartiGene.php move failed! $!\n";
copy ("./wwwPartiGene_cluster.php","$public_html/$db_title/") or die "./wwwPartiGene_cluster.php move failed! $!\n";
copy ("./wwwPartiGene_clusimg.php","$public_html/$db_title/") or die "./wwwPartiGene_clusimg.php move failed! $!\n";
copy ("./wwwPartiGene_download.php","$public_html/$db_title/") or die "./wwwPartiGene_download.php move failed! $!\n";
chmod 0755, "$public_html/cgi-bin/wwwPartiGene_align_$db_title.cgi" or die "chmod failed $!\n";


print "All done!! Go to http://your.domain/your.webspace/$db_title/wwwPartiGene.html to enjoy wwwPartiGene\n";
exit();




												#######n##################
######################################################/#\#####}#####################################################
#################### - - SUB ROUTINES - - ###########/###\####}####################+++++++#########################
####################################################/#####\###}~~~################################################
												##########################				

##########################################################################################################################

sub get_input() {
#uses either readline module or the traditional way to get user response to question printed to screen

	my $input;
	my $question = shift (@_);

	if ($read_gnu) {	#true if gnu readline module installed
		$input = $term->readline("$question");	#print question and get user input
	} else {	#do it the old way, without readline
		print "$question";
		$input =<>;
	}
	
	chomp $input;	#remove trailing newline
	$input =~ s/\s*$//;	#remove trailing space
	return $input;
}

####################################################### the - end #########################################################
