package GQ::Server::DB::createVirtContig;
#$Id: createVirtContig.pm,v 1.3 2003/04/23 19:37:09 markw Exp $;
use strict;
use DBI;
require Exporter;
use vars qw(@ISA @EXPORT); #keep 'use strict' happy
@ISA = qw(Exporter); 
@EXPORT = qw(create_sequence create_subsequence);

=pod

=head1 create_sequence

 Title       : create_sequence
 Usage       : create_sequence($seq,$name,$orgid,$dbh,$dominate,$parent,$start);
 Returns     : Nothing
 Accomplishes: creates Sequence, Contig and Tiling_Path table entries for the given sequence
 Args        : $seq: the sequence of the contig
               $name: the name associated with this sequence (from the fasta header, usually)
               $orgid: the Organism table id of the organism the sequence is from
               $dbh: a valid database handle to the database you want to insert the data into

               $dominate: 1 if this contig is more valid than the parent,
                          0 if the parent is more valid than this contig
               $parent: the name of the parent sequence
               $start: the bp position on the parent sequence where this sequence starts

               $dominate, $parent, and $start are optional parameters.  If not sent, the
               package assumes you want a contig with no parent contig (i.e. not a part of
               a tiling path, or the first contig in a tiling path).

=cut

sub create_sequence {
    my ($newseq,$name,$context,$chr_id,$dominate,$parent,$start_rel_to_parent)=@_;

    my $orgid=$context->organism->id;
    my $dbh=$context->dbobj->dbh;
    my $version=$context->version;
    

    $start_rel_to_parent ||=1;  #given by caller, or assumed to be 1
    my $VC_start=1;             #default is for VC to start on 1 of sequence - with overlaps this can change
    my ($contigName,            #name of Contig holding the start of this sequence
	$contigStart,           #abs start of $contigName
	$real_abs_start,        #chromosomal position of the start of this sequence
	$parent_assembly,       #assembly id (effectively tiling path id)
	$parent_chr,            #chr_id of parent
	$parent_VC_length,      #length of parent sequence, to determine overlap    #problem with proper semantics - hard to find VC to chop!!!
	$parent_VC_start,       #selfstart of parent
	$parent_abs_start,      #abs start of $parent_seqname
       );

    if ($parent) {                                                                    #Only if we have a parent

	my $sth_parent=$dbh->prepare("SELECT VC_start,assembly,VC_length,abs_start,chr_id ".
				     "FROM Tiling_Path,ContigAssembly,Assembly WHERE ".
				     "Tiling_Path.contig_id=ContigAssembly.id AND ".
				     "ContigAssembly.id=? AND ContigAssembly.version=? ".
				     "AND ContigAssembly.assembly=Assembly.id");
	$sth_parent->execute($parent,$version);
	($parent_VC_start,$parent_assembly,$parent_VC_length,$parent_abs_start,$parent_chr)=$sth_parent->fetchrow_array;

#	print $parent_abs_start,"\n";

	unless ($dominate) {                                             #case where parent's sequence is more reliable than this one in overlap
	    $VC_start = $parent_VC_start + $parent_VC_length - $start_rel_to_parent + 1;      #not dominating, so gives up overlapped bp's to parent
	    if ($VC_start<1) {
		$dominate=0;         #if sequence starts after the end of the VC, there's no conflict
	    }
	}

	$real_abs_start = $parent_abs_start + $start_rel_to_parent - 1;   #Now we know where we are in chromosomal coords

#	print $real_abs_start,"\n";

    } else {              ##No parent case- must create VC with start 1
	$contigName=$name;
	$contigStart=1;
	$real_abs_start=1;
    }

    my $length = length($newseq);
    my $VC_length = $length - $VC_start + 1;      #trim the length
    my $VCseq=substr($newseq,($VC_start-1));  #trim the front of the sequence of overlap


    if ($dominate) {                       #case where this contig is more reliable than parent sequence
	my $overlap = $parent_VC_start + $parent_VC_length - $start_rel_to_parent;
	my $sth=$dbh->prepare("UPDATE Tiling_Path SET VC_length = VC_length - ? ".
			      "WHERE contig_id = ?");
	$sth->execute($overlap, $parent);                         #trim the end of the VC - leave Sequence table alone
	$context->commit;
    }

    $real_abs_start ||= 1;                             #if no parent, then start at 1, else defined above

    $chr_id ||= $parent_chr;   #either chr_id was given, or we get it from parent,

    unless ($parent_assembly) {
		unless ($chr_id) {         # or we create one
			print STDERR "SELECT MAX(chr_id) FROM Assembly WHERE organism=$orgid AND version=$version\n";
			my $sth_chr=$dbh->prepare("SELECT MAX(chr_id) FROM Assembly WHERE organism=? AND version=?");
			$sth_chr->execute($orgid,$version);                 #if they don't supply a chromosome, make one up to avoid conflicts
			my ($prev_chr)=$sth_chr->fetchrow_array;
			$chr_id=$prev_chr+1;
			print STDERR "Chromosome $chr_id\n";
		}
		my $sth_assembly=$dbh->prepare("SELECT MAX(id) FROM Assembly");
		$sth_assembly->execute();
		my ($prev_assembly)=$sth_assembly->fetchrow_array;
		$parent_assembly=$prev_assembly+1; # we need this number later
		$sth_assembly=$dbh->prepare("INSERT INTO Assembly (id,version,organism,chr_id) ".
						"VALUES (?,?,?,?)");
		$sth_assembly->execute($parent_assembly,$version,$orgid,$chr_id);
		$context->commit;
    }

    my $sth_contig=$dbh->prepare("INSERT INTO Contig SET name=?");
    my $sth_CA=    $dbh->prepare("INSERT INTO ContigAssembly (contig_id,version,assembly) VALUES (?,?,?)");
    my $sth_TP=    $dbh->prepare("INSERT INTO Tiling_Path (contig_id,abs_start,length,VC_start,VC_length) VALUES (?,?,?,?,?)");
    my $sth_seq=   $dbh->prepare("INSERT INTO Sequence (contig_id,seq) VALUES (?,?)");

    $sth_contig->execute($name);
    my $contig=$dbh->{mysql_insertid};

    $sth_CA->execute( $contig,
		      $version,
		      $parent_assembly,
		    );
    my $contig_id=$dbh->{mysql_insertid};

    $sth_TP->execute( $contig_id,
		      $real_abs_start,
		      $length,
		      $VC_start,
		      $VC_length,
		    );

    $sth_seq->execute($contig_id,
		      $newseq,
		     );
    $context->commit;
    return $contig_id;
}

sub create_subsequence {
    my ($QS,$name,$seq,$parent,$start,$length)=@_;
    my $dbh=$QS->context->dbh;
    my $sth=$dbh->prepare("INSERT INTO Contig (name) VALUES (?)");
    $sth->execute($name);
    my ($id)=$dbh->{mysql_insertid};
    $sth=$dbh->prepare("INSERT INTO ContigAssembly (contig_id,version,assembly) VALUES (?,?,?)");
    $sth->execute($id,$QS->context->version,$QS->context->find_assembly($parent));
    my ($cid)=$dbh->{mysql_insertid};
    $sth=$dbh->prepare("INSERT INTO Tiling_Path (contig_id,abs_start,length,VC_start,VC_length) ".
		       "VALUES (?,?,?,?,?)");
    $sth->execute($cid,$seq->{$parent}[1]+$start-1,$length,1,$length);
    $sth=$dbh->prepare("SELECT seq FROM Sequence WHERE contig_id = ?");
    $sth->execute($seq->{$parent}[0]);
    my ($sequence)=$sth->fetchrow_array;
    $sequence=substr($sequence,$start-1,$length);
    $sth=$dbh->prepare("INSERT INTO Sequence (contig_id,seq) VALUES (?,?)");
    $sth->execute($cid,$sequence);
    $QS->context->commit;
}

1;







