# Sequence module for Genquire I

# by David Block <dblock@gene.pbir.nrc.ca>

# Copyright David Block and NRC-PBI

# POD Documentation - main docs before the code

=head1 NAME

Sequence - Sequence Object for Genquire I

=head1 SYNOPSIS

=head2 Loading a Sequence from the database

Sequences are meant to be loaded from the database.  They are
containers for everything the feature table contains within the
boundaries of the sequence.

 use Sequence;

$sequence=Sequence->new(context=>$context,
                        start=>$start,
                        stop=>$stop,
                        chr_id=>$chr_id,
                       );

=head2 Finding out about a Sequence:

$start=$sequence->start;

returns the start of the sequence on its chromosome.

$length=$sequence->length;

returns the sequence's length.

=head2 Finding out about a Sequence's Features:

The List of features in a hash:

$hash_of_feautures=$sequence->features;

An individual feature:

$feature=$sequence->feature($feature_id);

Adding a feature:

$sequence->features($feature);

=head1 DESCRIPTION

Sequence is a container for only the features from the Feature table, with
no ability to add annotation.  However, Sequence::Annot adds that functionality
by inheriting from Sequence.

=head1 FEEDBACK

Like it or lump it, report to dblock@gene.pbi.nrc.ca.  Feel free to add
to the docs available at
http://bioinfo.pbi.nrc.ca/dblock/wiki

=head2 Reporting Bugs

Email the author with any bug reports.

=head1 AUTHOR - David Block

email dblock@gene.pbi.nrc.ca

=cut

package GQ::Server::Sequence;
use strict;
use vars qw(@ISA $AUTOLOAD ); #keep 'use strict' happy
use Carp;
use Bio::UpdateableSeqI;
use Bio::SeqFeatureI;
use GQ::Server::Object;
use GQ::Root;

use GQ::Server::Gene;
use GQ::Server::Transcript;
use GQ::Server::Feature;

@ISA=qw(GQ::Server::Object Bio::UpdateableSeqI Bio::PrimarySeqI);

{
    #Encapsulated class data

                                      #     DEFAULT    ACCESSIBILITY
    GQ::Root->create(	  lockid        =>  [undef,        'read'], #needed to confirm annotation lock
			  base_contig   =>  ['',           'read'], #seqname of origin contig
			  base_contig_id=>  [0,            'read'], #id of said contig
			  bc_start      =>  [0,            'read'], #assembly coords of base_contig
			  dir           =>  ['',           'read/write'],
			  features      =>  [{},           'read/write'],
			  genes         =>  [{},           'read/write'],
			  seq           =>  ['',           'read/write'], #actual sequence 'ATTCTGA...'
			  start         =>  [undef,        'read/write'], #start in base_contig coords
			  length        =>  [undef,        'read/write'],
			  moltype       =>  ['dna',        'read'],
			  date          =>  [[],           'read/write'],
			  accession     =>  ['',           'read/write'],
			  secondary_accession=>[[],        'read'],
			  sv            =>  ['',           'read/write'],
			  keywords      =>  ['',           'read/write'],
			  display_id    =>  ['',           'read/write'],
			  primary_id    =>  ['',           'read/write'],
			  desc          =>  ['',           'read/write'],
		    );

}

sub DESTROY {
    my ($self)=@_;
    $self->context->lock(release=>$self->lockid);
}

sub new {
    my ($caller, %arg) = @_;
    my $caller_is_obj = ref($caller);
    my $class = $caller_is_obj || $caller;
    my $self = $class->SUPER::new(%arg);  #give it the Object initiation
	bless $self,$class;
    my %massaged_arg;
    foreach (keys %arg) {
	my $old=$_;
	s/^-//;  #take out the leading dashes
	s/accession_number/accession/;
	$massaged_arg{$_}=$arg{$old};
    }
    %arg=%massaged_arg;
    foreach my $attrname ( $self->_standard_keys() ) {
	if (exists $arg{$attrname}) {
            $self->{$attrname} = $arg{$attrname} }
	elsif ($caller_is_obj) {
            $self->{$attrname} = $caller->{$attrname} }
	else {
            $self->{$attrname} = _default_for($attrname) }
    }
    if ($self->moltype eq 'protein') {    #we just do dna for now - punt
		return Bio::PrimarySeq->new('-seq' => $self->seq,
				    '-display_id'  => $self->display_id,
				    '-accession_number' => $self->accession,
				    '-moltype' => 'protein'
				   );
    }
    $self->adaptor($self->context->adaptor($self));
###  Must have a base contig to work off of
    if (exists $arg{contig}) {
		unless ($self->accession){$self->accession($arg{contig})};
		my $contigobj=$arg{context}->contig($arg{contig});
		$self->{base_contig_id}=$contigobj->id;
		$self->{bc_start}=$contigobj->abs_start;
		$self->{base_contig}=$arg{contig};
		$self->{length}=$contigobj->length unless defined $self->length;
		$self->{start}=1 unless defined $self->start;
    }

	#print "sequence created ",
	#$self->{start}, "  ",
	#$self->{length}, "  ",
	#$arg{context}->contig($arg{contig})->name, "\n\n";
	#
	# this call loads the features into $self->{features} as a hash
    $self->adaptor->_features($self);
    return $self;
}

sub assembly {
    my ($self)=@_;
    unless ($self->{assembly}) {
	$self->{assembly}=$self->context->find_assembly($self->base_contig);
    }
    return $self->{assembly};
}

sub flush_SeqFeatures {
	return;
}

sub where_am_i {
    my ($self,$loc)=@_;
    return $self->adaptor->where_am_i($self,$loc);
}

sub add_feature {
    my ($self,$feature)=@_;
    return unless $feature->isa("GQ::Server::GenericFeature");
    if ($feature->id) {
	if ($self->feature($feature->id) eq $feature) {
	    return $feature;
	} else {
	    $self->{features}{$feature->id}=$feature;
	    return $feature;
	}
    } else {
	my $index=$feature->id || $feature->source_tag.$feature->contig_id.":".
	  $feature->start."-".$feature->end;
	$self->{features}{$index}=$feature;
    }
    return $feature;
}

sub _importFeature {
    my ($self,$feature)=@_;
	
	
}

sub add_gene {
    my ($self,$gene)=@_;
    return unless $gene->isa("GQ::Server::Gene");
    my $index=$gene->id or die "Tried to add invalid gene";
    $self->{genes}{$index}=$gene;
    return $gene;
}

sub get_contig_names {
    my ($self)=@_;
    return $self->adaptor->get_contig_names($self);
}

#### SeqI Bioperl emulation here!!

sub seq {
    my ($self,$new)=@_;
    if ($new) {
	$self->{seq}=$new;
    }
    return $self->getseq;
}

sub top_SeqFeatures {
    return (values %{$_[0]->genes}, values %{$_[0]->features});
}

sub all_SeqFeatures {
    my ($self)=@_;
    my @list=(values %{$self->genes},       #all the genes
	      values %{$self->features});   #all the non-gene exon-type stuff
    foreach (values %{$self->genes}) {
	push @list, $_->transcripts;        #all the transcripts
	foreach my $trans($_->transcripts) {
	    push @list,$trans->features;    #all the gene exon-type stuff
	}
    }
    return @list;
}

sub add_SeqFeature {
    my ($self,$feature)=@_;
    if ($feature->isa("GQ::Server::GenericFeature")) {
	return $self->add_feature($feature);
    } elsif ($feature->isa("GQ::Server::Gene")) {
	return $self->add_gene($feature);
    } elsif ($feature->isa("Bio::SeqFeature::Generic")) {
	return $self->import_feature($feature);
    }
    return;
}

sub import_feature {
    my ($self,$feature)=@_;
    if ($feature->isa("Bio::SeqFeature::Gene::GeneStructureI")) {
		#we're going to assume that the gene's coordinates are relative to this sequence
			my ($contig_id,$contig_start)=$self->where_am_i($feature->start);
		my ($contig_stop)= $contig_start+$feature->length-1;
		my $new_gene=GQ::Server::Gene->new(contig_id    =>$contig_id,
						   contig_start =>$contig_start,
						   contig_stop  =>$contig_stop,
						   length       =>$feature->length,
						   context      =>$self->context,
						   par_pos      =>$self->start+$self->bc_start-1,
						   lockid       =>$self->lockid,
						   source_tag   =>$feature->source_tag,
						   primary_tag  =>'Gene',
						   score        =>$feature->score,
						   strand       =>$feature->strand,
						   frame        =>$feature->frame,
						  );
		$new_gene->attach_seq($self);
		foreach my $trans ($feature->transcripts) {
			$new_gene->add_transcript_as_features($trans->features);
		}
		$self->add_SeqFeature($new_gene);
		return $new_gene;
    } elsif ($feature->isa("Bio::SeqFeature::Gene::Transcript")) {
		$self->throw("Must add transcript to a gene, not directly to a sequence.");
    } else {
		my ($contig_id,$contig_start)=$self->where_am_i($feature->start);
		my ($contig_stop)= $contig_start+$feature->length-1;
		my $newfea=GQ::Server::Feature->new(context=>$self->context,
						 lockid=>$self->lockid,
						 par_pos=>$self->start+$self->bc_start-1,
						 contig_id=>$contig_id,
						 source_tag=>$feature->source_tag,
						 primary_tag=>$feature->primary_tag,
						 contig_start=>$contig_start,
						 contig_stop=>$contig_stop,
						 score=>$feature->score,
						 strand=>$feature->strand,
						 length=>$feature->length,
						 frame=>$feature->frame,
						);
		$self->add_SeqFeature($newfea);
		return $newfea;
    }
}

sub export_GFF {           #writes a GFF2 string in the coordinates of the Virtual Contigs
    my ($self,$fh) = @_;

    $fh || do { $fh = \*STDOUT; };

    $self->write_GFF($fh);
}

sub write_GFF {   #writes a GFF2 string in the coordinates of $self
    my ($self,$fh) = @_;

    $fh || do { $fh = \*STDOUT; };

    foreach my $sf ( $self->all_SeqFeatures() ) {
        print $fh $sf->gff_string, "\n";
    }
}

sub primary_id {
    my $self=shift;
    return $self->base_contig."::".$self->start."-".$self->length;
}

#### End of Bioperl SeqI emulation!

#### PrimarySeqI Bioperl emulation here!!

sub subseq {
    my ($self,$start,$stop)=@_;
    return $self->getseq($start,$stop);
}

sub display_id {
    my ($self)=@_;
    return "Sequence from ".$self->organism->latin." Sequence ".$self->base_contig;
}

sub accession_number {
    return $_[0]->accession;
}

sub can_call_new {1}

sub moltype { return $_[0]->{moltype} }

sub revcom {
    my ($self)=@_;
    my $seq=$self->seq;
    $seq =~ tr/ACTGactg/CAGTcagt/;
    $seq = scalar reverse $seq;
    $self->new(context=>$self->context,
		seq=>$seq);
}

sub trunc {
    my ($self,$start,$end)=@_;
    if( !$end ) {
	if( $self->can('throw')  ) {
            $self->throw("trunc start,end");
	} else {
            confess("[$self] trunc start,end");
	}
    }
    if( $end < $start ) {
	if( $self->can('throw')  ) {
            $self->throw("$end is smaller than $start. if you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
	} else {
            confess("[$self] $end is smaller than $start. If you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
	}
    }

    my $str = $self->subseq($start,$end);
    $start=$start+$self->start + $self->bc_start-2;
    $end  =$end  +$self->start + $self->bc_start-2;

    $self->new(context  =>$self->context,
		start=>$start,
		stop =>$end,
		seq  =>$str,
              );
}

sub feature_count {return $#{$_[0]->top_SeqFeatures}};

sub _retrieve_subSeqFeature {
    my ($arrayref,$feat) = @_;

    foreach my $sub ( $feat->sub_SeqFeature() ) {
	push(@$arrayref,$sub);
	&_retrieve_subSeqFeature($arrayref,$sub);
    }

}

sub species {return $_[0]->organism->latin}

sub sub_species {return $_[0]->organism->latin}

#### EMBL/GENBANK/DDBJ emulation

sub add_date {push @{$_[0]->date},$_[1]}

sub each_date {return @{$_[0]->date}}

sub add_secondary_accession {push @{$_[0]->secondary_accession},$_[1]}

sub each_secondary_accession {return @{$_[0]->secondary_accession}}

##### Done Emulation layer

sub feature { return $_[0]->{features}{$_[1]} }

sub gene { return $_[0]->{genes}{$_[1]} }

sub getseq {
    my ($self,$start,$end)=@_;
    return $self->adaptor->getseq($self,$start,$end);
}

sub SeqHash {
    my ($self,$start,$end,$seq)=@_;
    if ($seq) {
	$self->{SeqHash}{$start}{$end}=$seq;
    }
	return $self->{SeqHash}{$start}{$end};
}

sub delete_feature {
    my ($self,$feature,$transcript,$gene)=@_;
    return unless $feature;
    my @orphanlist;
    if ($gene) {
	push @orphanlist, grep {!$_->isa('GQ::Server::Container')} $gene->sub_SeqFeature;
    }
    if ($transcript) {
	$transcript=$transcript->remove_feature($feature);
	$gene->flesh_out_details if $gene;
    } elsif ($gene && !$transcript) {
	$gene=$gene->remove_feature($feature);
    } elsif ($feature->delete_from_db) {
	delete $self->{features}{$feature->id};
	if ($feature->isa('GQ::Server::Gene')) {
	    push @orphanlist, grep {!$_->isa('GQ::Server::Container')} $feature->sub_SeqFeature;
	}
    } else { #unsuccessful deletion - don't return anything
	return;
    }
    if ($gene) {
	for (reverse (0..$#orphanlist)) {
	    foreach my $exon ($gene->sub_SeqFeature) {
		if ($exon eq $orphanlist[$_]) {
		    splice @orphanlist, $_, 1;  #remove the exon from the orphan list as it is not an orphan
		}
	    }
	}
    }
    return \@orphanlist;
}

1;











