package Admin::createBlastDB;
use strict;
use Cwd;
use DBI;
use GQ::Client::Utilities;
require Exporter;

use vars qw(@ISA @EXPORT);

@ISA = qw(Exporter);
@EXPORT= qw(createBlastDatabase getNonRedundantContigSequence getRedundantContigSequence);


###This assumes a database connection!

sub createBlastDatabase {
    my ($QueryScreen) = @_;
    my $WD = $QueryScreen->WORKING_DIR;
    my $orgid = $QueryScreen->context->organism->id;
    my $version = $QueryScreen->context->version;
    my $orgname = $QueryScreen->context->organism->latin;
    $orgname =~ s/\s+//g;
    $orgname =~ s/\W//g;

    my $db_name = $orgname . "_version_" . $version;

    my $ORIG_DIR = cwd;

    chdir $WD or warn "can't change to working directory $!; this is a big problem!!";;
    my $success = mkdir $db_name, 0777;
    if (!$success) {
	$QueryScreen->lblSysMess->configure(-text => "Can't create database (already exists??)");
	$QueryScreen->lblSysMess->update;
	return;
    }


    chdir "./$db_name" or warn "can't change to directory $!; will write database to your WORKING_DIRECTORY location";
    $QueryScreen->lblSysMess->configure(-text => "Querying Database for sequences (slow!)"); $QueryScreen->lblSysMess->update;

    my $dbh = $QueryScreen->context->dbh;

####abstract this into an adaptor!
    my $sth = $dbh->prepare("SELECT SQL_BIG_RESULT
    							seq,
								length,
    							VC_start,
    							VC_length
					        FROM
    							Sequence,
    							Tiling_Path,
    							ContigAssembly,
    							Assembly,
    							Contig
    						WHERE
    						    Assembly.version = ?
    						AND Assembly.organism = ?
    						AND Contig.name=?
                                                AND Sequence.contig_id = ContigAssembly.id
    						AND Contig.id = ContigAssembly.contig_id
    						AND ContigAssembly.assembly = Assembly.id
    						AND Tiling_Path.contig_id = ContigAssembly.id
    						AND Assembly.version = ContigAssembly.version
    ");
    my $pwd = cwd;

    open OUT, ">$db_name.fas" or die "cant open the output FASTA file for database creation.  Aborted.";

    foreach my $contigname($QueryScreen->context->get_contigs_by_name) {
	$sth->execute($version, $orgid, $contigname) or warn ("problem with SQL query in retrieving contig sequences");

	while (my ($seq, $length, $VC_start,$VC_length) = $sth->fetchrow_array()) {
	    $QueryScreen->lblSysMess->configure(-text => "Writing contig $contigname"); $QueryScreen->lblSysMess->update;
	    my $thisseq = substr($seq, $VC_start-1, $VC_length);
	    $thisseq =~ s/\W//g;
		$thisseq = ('N' x ($VC_start-1)) . $thisseq;
		$thisseq = $thisseq . 'N' x ($length-($VC_start+$VC_length-1));
	    $thisseq =~ s/([^\n]{70})/$1\n/g;
	    print OUT ">$contigname\n$thisseq\n\n\n";
	}
    }
    close OUT;

    my $BLASTDIR = _addSlash($QueryScreen->BLAST_BINARIES);
    $QueryScreen->lblSysMess->configure(-text => "Running Blast FormatDB program"); $QueryScreen->lblSysMess->update;
    system "$BLASTDIR" . "formatdb -t $db_name -i $db_name.fas -p F -o T";
    $QueryScreen->lblSysMess->configure(-text => "Blast database of $db_name "); $QueryScreen->lblSysMess->update;

    chdir $ORIG_DIR or warn "couldn't return to original working folder.  This may be a problem...";

}

sub getNonRedundantContigSequence {
	my ($QueryScreen, $contigname) = @_;
	my $dbh = $QueryScreen->context->dbh;
    my $orgid = $QueryScreen->context->organism->id;
    my $version = $QueryScreen->context->version;

####abstract this into an adaptor!
    my $sth = $dbh->prepare("SELECT SQL_BIG_RESULT
    							seq,
								length,
    							VC_start,
    							VC_length
					        FROM
    							Sequence,
    							Tiling_Path,
    							ContigAssembly,
    							Assembly,
    							Contig
    						WHERE
    						    Assembly.version = ?
    						AND Assembly.organism = ?
    						AND Contig.name=?
                            AND Sequence.contig_id = ContigAssembly.id
    						AND Contig.id = ContigAssembly.contig_id
    						AND ContigAssembly.assembly = Assembly.id
    						AND Tiling_Path.contig_id = ContigAssembly.id
    						AND Assembly.version = ContigAssembly.version
    ");

	$sth->execute($version, $orgid, $contigname) or warn ("problem with SQL query in retrieving contig sequences");
	my $thisseq;
	while (my ($seq, $length, $VC_start,$VC_length) = $sth->fetchrow_array()) {
		$thisseq = substr($seq, $VC_start-1, $VC_length);
		$thisseq =~ s/\W//g;
		#$thisseq = ('N' x ($VC_start-1)) . $thisseq;
		#$thisseq = $thisseq . 'N' x ($length-($VC_start+$VC_length-1));
	}
	return $thisseq;
}
	

1;
