#!/usr/bin/perl

my $MAXSEP = 12000; # if HSP is more than MAXSEP characters away from of any HSP in any current group it will become a new group.  This is to allow exons separated by large introns to be combined together.  NOTE: that <10% of introns in the human genome are more than 11,000 bp in length.

my $MAXSEP_CONTIGUOUS = 500; # This variable is for determing whether or not to combine two HSPs that overlap on the query sequence (see MAXOL variable also).  They can be greater than $MAXOL as long as they are less than $MAXSEP_CONTIGUOUS characters from each other.

my $MAXOL = 0.20; # if more than MAXOL percent of HSP overlaps with any HSP in any current group it will become a new group.  If the HSPs are near each other in the database sequence overlapping HSPs in TBLASTN searches tend to be from frame shift mutations.  We don't really want to separate these so we check $MAXSEP_CONTIGUOUS.  $MAXOL is mainly for separating homologous genes that hit the same region of query sequence.

my $MAX_PERCENT_DIFF = 70; # if the sequence identity of an HSP is is more than $MAX_PERCENT_DIFF percentage points lower than that of the first HSP of it's closest group it will become a new group.  The idea here is that such an HSP, even if part of the gene or pseudogene is following a different model of evolution and may bias phylogenetic analysis.  Made this pretty high now because a TBLASTN HSP could get a low average percent id from a frame shift error that extends into junk region.  However since higher id HSPs get priority on determining the sequence to use often these bad regions will be replaced.

my $MIN_LEN = 10; # after the first HSP an HSP has to be greater than or equal to this length

my $PCNT_ID_MIN = 10; # after the first HSP and HSP has to have greater than or equal to this percent id to the query

use warnings;
use strict;

my @args = qw(BLAST-FILE MAX-EXPECT);
die "usage: $0 @args\n" unless $#ARGV == $#args;

my $file = shift;
my $MAX_EXPECT = shift;
$MAX_EXPECT != 0 and $MAX_EXPECT <= 1e-300 and
  die "ERROR: MAX_EXPECT must be either zero or greater than 1e-300\n";


my ($i, $pos, $qpos, $spos, $query, $querylength, $strand);
my $verbose = 0;
my ($report);
my ($hit, $id, $sbjct_length);
my ($hsp, @qa, @sa);
my @inc = (1, -1);

my $multi_report = new BPlite::Multi(GetFileHandle($file));
while ( $report = $multi_report->nextReport) {

  $query = $report->query;
  $query =~ s/ \(\d+ letters\)//;
  $querylength = $report->queryLength;

  while( $hit = $report->nextSbjct ) {

    $hsp = $hit->nextHSP;
    $hsp->P <= $MAX_EXPECT or last;

    my $id = $hit->name;
    $id =~ s/^>+//;
    my $sbjct_length = $hit->length;
    printf STDERR "%s\n", $id;
    my @SPLICE_GROUPS = ([$hsp]);

    while( $hsp = $hit->nextHSP ) {
      ($hsp->percent >= $PCNT_ID_MIN and $hsp->length >= $MIN_LEN) or next;
      FindGroup($id, $hsp, \@SPLICE_GROUPS);
    }

    my @gene;
    my $group = shift @SPLICE_GROUPS;
    foreach $hsp (sort {$b->percent <=> $a->percent} @$group) {
      ImposeHsp(\@gene, $hsp);
    }
    PrintGene(\@gene, $querylength, $id);

    my $group_cnt = 2;
    foreach $group (@SPLICE_GROUPS) {
      my @gene;
      foreach $hsp (sort {$b->percent <=> $a->percent} @$group) {
	ImposeHsp(\@gene, $hsp);
      }
      PrintGene(\@gene, $querylength, "$group_cnt.$id");
      $group_cnt++;
    }
  }
}

sub FindGroup {
  my $id = shift;
  my $hsp = shift;
  my $groups = shift;
  my ($qb, $qe, $sb, $se) = ($hsp->qb, $hsp->qe, $hsp->sb, $hsp->se);

  # Use this to check strands of groups
  my $strand = ($sb < $se) ? 0 : 1;

  my ($d, $group, $group_hsp, $group_strand);

  my %potential_groups;
  foreach $group (@$groups) {
    $group_strand = ($group->[0]->sb < $group->[0]->se) ? 0 : 1;
    $strand == $group_strand or next;
    $potential_groups{$group} = $group;
  }

  if (not %potential_groups) {
    print STDERR "INFO: No current group is on same strand. CREATING NEW GROUP\n";
    push @$groups, [$hsp];
    return;
  }

  # find closest group that is within MAXSEP
  # note that sb and se are in the same orientation or else the above would have returned
  my $mind = 1e100;
  my $group_mind;
  my $closest_group;
  foreach $group (values %potential_groups) {
    $group_mind = 1e100;
    foreach $group_hsp (@$group) {
      if ($strand == 0) { # forward
	$d = max($sb, $group_hsp->sb) - min($se, $group_hsp->se);
      } else {
	$d = max($se, $group_hsp->se) - min($sb, $group_hsp->sb);
      }
      $d < $group_mind and $group_mind = $d;
    }
    if ($group_mind > $MAXSEP) {
      delete $potential_groups{$group};
    }
    if ($group_mind < $mind) {
      $closest_group = $group;
      $mind = $group_mind;
    }
  }

  my @groups = keys %potential_groups;
  if (not @groups) {
    print STDERR "INFO: HSP is $mind characters away from closest previous HSP in database sequence. CREATING NEW GROUP\n";
    push @$groups, [$hsp];
    return;
  }

  my $DO_OL_CHECK = 0;
  # and that they don't hit same region of query sequence
  if ($DO_OL_CHECK) {
  my $maxol = 0;
  my $ol;
  foreach $group_hsp (@$closest_group) {
    $ol = min($qe, $group_hsp->qe) - max($qb, $group_hsp->qb);
    $ol > $maxol and $maxol = $ol;
  }
  my $hsplen = $hsp->length;
  $maxol = $maxol / $hsplen;
  if ($maxol > $MAXOL) {
    if ($mind > $MAXSEP_CONTIGUOUS) {
      printf STDERR "INFO: HSP has (%.2f%% > %.2f%%) matches to same region of query as a previous HSP and is $mind characters away from closest previous HSP in database sequence. CREATING NEW GROUP\n", $maxol * 100, $MAXOL * 100;
      push @$groups, [$hsp];
      return;
    } else {
      printf STDERR "INFO: HSP has overlap problem with other HSP (%.2f%% > %.2f%%) but is only $mind characters away from closest previous HSP in database sequence. OK\n", $maxol * 100, $MAXOL * 100;
    }
  } elsif ($maxol > 0) {
    printf STDERR "INFO: HSP has (%.2f%% < %.2f%%) matches to same region of query as a previous HSP. OK\n", $maxol * 100, $MAXOL * 100;
  }
}
  # check that a percent identity is similar to first HSP
#   if ( ($closest_group->[0]->percent - $hsp->percent) > $MAX_PERCENT_DIFF ) {
#     printf STDERR "INFO: HSP identity = %.1f%% first HSP identity = %.1f%%. CREATING NEW GROUP\n", $hsp->percent, $closest_group->[0]->percent;
#     push @$groups, [$hsp];
#     return;
#   } else {
#     printf STDERR "INFO: HSP identity = %.1f%% first HSP identity = %.1f%%. OK\n", $hsp->percent, $closest_group->[0]->percent;
#   }

  push @$closest_group, $hsp;

  return 0; # will be combined with first HSP
}

sub ImposeHsp {
  my $gene = shift;
  my $hsp = shift;

  my $len = $hsp->length;
  my @qa = split '', $hsp->qa;
  my @sa = split '', $hsp->sa;
  my $qpos = $hsp->qb;

  for (my $i = 0; $i < $len; $i++) {
    if ($qa[$i] ne '-') {
      if (not defined $gene->[$qpos]) {
	$gene->[$qpos] = $sa[$i];
      }
      $qpos++;
    }
  }
}

sub max {
  my $max = shift(@_);
  foreach my $foo (@_) {
    $foo > $max and $max = $foo;
  }
  return $max;
}

sub min {
  my $min = shift(@_);
  foreach my $foo (@_) {
    $foo < $min and $min = $foo;
  }
  return $min;
}

sub PrintGene {
  my $gene = shift;
  my $querylength = shift;
  my $def = shift;
  print ">$def\n";
  # print out template (contiged gene)
  for (my $i = 1; $i <= $querylength; $i++) {
    if (defined $gene->[$i]) {
      $gene->[$i] eq '*' and $gene->[$i] = 'X';
      print $gene->[$i];
    } else {
      print '-';
    }
  }
  print "\n";
}

sub GetFileHandle {
  my $file = shift;
  `zcat -t $file >& /dev/null`;
  if ($?) {
    open IN, $file or die "ERROR: can't open $file\n";
  } else {
    my $tmpfile = `mktemp`;
    chomp $tmpfile;
    `zcat $file > $tmpfile`;
    open IN, $tmpfile or die "ERROR: can't open $tmpfile\n";
  }
  return \*IN;
}


#my @groups = qw{Cryptophyta Haptophyceae stramenopiles Dinophyceae Apicomplexa Ciliophora Metazoa Gammaproteobacteria Firmicutes Thermotogae Epsilonproteobacteria Deltaproteobacteria Alphaproteobacteria Actinobacteria Deinococcus Betaproteobacteria magnoliids asterids rosids eudicotyledons Liliopsida Magnoliophyta Cyanobacteria Rhodophyta Fusobacteria Chlamydiae Chloroflexi Aquificae Chlorobi Bacteroidetes Planctomycetes Spirochaetes Chlorophyta Fungi Euglenozoa Archaea Entamoebidae Ginkgophyta Coniferopsida Bryophyta Tracheophyta Mycetozoa Proteobacteria Diplomonadida Trichomonada Choanoflagellida Glaucocystophyceae Marchantiophyta Anthocerotophyta Coleochaetales Mesostigmatophyceae Streptophyta Acidobacteria};

__END__

=head1 NAME

blast2malign

=head1 SYNOPSIS

blast2malign blast-output-file expectation-cutoff > malign.fas

=head1 DESCRIPTION

This will take all HSPs from a hit and give an alignment relative to the query sequence.

HSPs for a given database sequence are separated into groups that tend to correspond to individual genes.  For example a database sequence could be a chromosome that contains many homologs that match the query sequence.

The script also attempts to take into account the possibility of introns and pseudogenes.

See TWEAKABLE VARIABLES and SUBROUTINES for a better explanation of how groups are determined.

This should work on protein or dna BLAST output

=cut

=head1 TWEAKABLE VARIABLES

The following variables can be found at the top of the script and modified for specific purposes.  See SUBROUTINES for more information.

my $MAXSEP = 12000;

If HSP is more than MAXSEP characters away from of any HSP in any current group it will become a new group.  This is to allow exons separated by large introns to be combined together.  NOTE: that <10% of introns in the human genome are more than 11,000 bp in length.

my $MAXSEP_CONTIGUOUS = 100; CURRENTLY DIABLED

This variable is for determing whether or not to combine two HSPs that overlap on the query sequence (see MAXOL variable also).  They can be greater than $MAXOL as long as they are less than $MAXSEP_CONTIGUOUS characters from each other.

my $MAXOL = 0.20; CURRENTLY DIABLED

If more than MAXOL percent of HSP overlaps with any HSP in any current group it will become a new group.  If the HSPs are near each other in the database sequence overlapping HSPs in TBLASTN searches tend to be from frame shift mutations.  We don't really want to separate these so we check $MAXSEP_CONTIGUOUS.  $MAXOL is mainly for separating homologous genes that hit the same region of query sequence.

my $MAX_PERCENT_DIFF = 70; CURRENTLY DIABLED

If the sequence identity of an HSP is is more than $MAX_PERCENT_DIFF percentage points lower than that of the first HSP of it's closest group it will become a new group.  The idea here is that such an HSP, even if part of the gene or pseudogene is following a different model of evolution and may bias phylogenetic analysis.  Made this pretty high now because a TBLASTN HSP could get a low average percent id from a frame shift error that extends into junk region.  However since higher id HSPs get priority on determining the sequence to use often these bad regions will be replaced.

my $MIN_LEN = 10;

After the first HSP an HSP has to be greater than or equal to this length

my $PCNT_ID_MIN = 10;

After the first HSP and HSP has to have greater than or equal to this percent id to the query

=cut

=head1 SUBROUTINES

FindGroup

The main goal of this routine is to figure out if two HSPs are part of the same gene or part of a duplicated homologous gene on the same database sequence.  HSPs far away on the database seqeunce are more likely to be duplicates but may be exons separated by introns.  HSPs hiting same regions of query may be homologs or repeats in same gene or from frame shift erros with TBLASTN searches.

If an HSP is further away than $MAXSEP characters from any HSP in any current group this HSP is used to start a brand new group.

If the $MAXOL percent of the HSP hits the same region of the query sequence as an HSP in a current group and if the HSP is futher than $MAXSEP_CONTIGUOUS from any HSP in that group this HSP is used to start a brand new group.

NOTE the following screen has been removed for now.

If the sequence identity of an HSP is is more than $MAX_PERCENT_DIFF percentage points lower than that of the first HSP of it's closest group it will become a new group.  The idea here is that such an HSP, even if part of the gene or pseudogene, is following a different model of evolution and may bias phylogenetic analysis.  Made this pretty high now because a TBLASTN HSP could get a low average percent id from a frame shift error that extends into junk region.  However since higher id HSPs get priority on determining the sequence to use often these bad regions will be replaced.

=head1 AUTHOR

Danny Rice

You can use this or modify it.  If you want you can give me credit or sue me whichever you prefer.

=cut

BEGIN {
package BPlite;
use strict;
use overload '""' => '_overload';
###############################################################################
# BPlite
###############################################################################
sub new {
  my ($class, $fh, $multi_report) = @_;
  if (ref $fh !~ /GLOB/) {
    die "BPlite error: new expects a GLOB reference not $fh\n";
  }
  my $this = bless {};
  $this->{FH} = $fh;
  $this->{DATABASE} = $multi_report->{DATABASE};
  $this->{LASTLINE} = "";
  if ($this->_parseHeader) {
    # there are alignments
    $this->{REPORT_DONE} = 0;
    die if not defined $this->{QUERY};
    die if not defined $this->{DATABASE};
  } else {
    # empty report
    $this->{REPORT_DONE} = 1;
  }
  $multi_report->{DATABASE} = $this->{DATABASE};
  return $this;
}
sub query       {shift->{QUERY}}
sub queryLength {shift->{QUERY_LENGTH}}
sub database    {shift->{DATABASE}}
sub nextSbjct {
  my ($this) = @_;
  $this->_fastForward or return 0;
	
  #######################
  # get all sbjct lines #
  #######################
  if ($this->{LASTLINE} =~ /^Parameters:|^\s+Database:|^Matrix:/) {
    return 0;
  }
  my $def = $this->{LASTLINE};
  my $FH = $this->{FH};
  while (<$FH>) {
    if ($_ !~ /\S/) {
      $this->{LASTLINE} = <$FH>; last;
    } elsif ($_ =~ /Strand HSP/) {
      next;
    }				# WU-BLAST non-data
    #		elsif ($_ =~ /^\s{0,2}Score/) {$this->{LASTLINE} = $_; last} # removed this because sometimes the definition will have a line that matches this.  Thus we are relying on there being a blank line after the definition.
    else {
      $def .= $_;
    }
  }
  return 0 unless $def =~ /^>/;
  $def =~ s/\s+/ /g;
  $def =~ s/\s+$//g;
  # my ($sbjct_length) = $def =~ /Length = ([\d,]+)$/;
  my ($sbjct_length) = $def =~ /Length\s*=\s*([\d,]+)/; # danny rice Feb 2006
  $sbjct_length =~ s/,//g;
  # $def =~ s/Length = [\d,]+$//g;
  $def =~ s/Length\s*=\s*[\d,]+.*//g; # danny rice Feb 2006
	
  ####################
  # the Sbjct object #
  ####################
  my $sbjct = BPlite::Sbjct::new($def, $sbjct_length,
				 $this->{FH}, $this->{LASTLINE}, $this);
  return $sbjct;
}
sub _parseHeader {
  my ($this) = @_;
  my $FH = $this->{FH};
  while (<$FH>) {
    if ($_ =~ /^Query=\s+(.*)/) {
      my $query = $1;
      while (<$FH>) {
	last if $_ !~ /\S/;
	$query .= " $_";
      }
      $query =~ s/\s+/ /g;
      $this->{QUERY} = $query;
      # Danny Rice May 2006
      if ($query =~ /([\d,]+) letters/) {
	$this->{QUERY_LENGTH} = $1;
      } elsif ($query =~ /Length\s*=\s*([\d,]+)/) {
	$this->{QUERY_LENGTH} = $1
      }
      #			($this->{QUERY_LENGTH}) = $query =~ /([\d,]+) letters/;
      $this->{QUERY_LENGTH} =~ s/\D//g;
    } elsif ($_ =~ /^Database:\s+(.+)/) {
      $this->{DATABASE} = $1;
    } elsif ($_ =~ /^>/) {
      $this->{LASTLINE} = $_; return 1;
    } elsif ($_ =~ /^Parameters|^\s+Database:|^Matrix:/) {
      $this->{LASTLINE} = $_;
      $this->{REPORT_DONE} = 1;
      return 0;			# there's nothing in the report
    }
  }
}
sub _fastForward {
  my ($this) = @_;
  return 0 if $this->{REPORT_DONE};
  return 1 if $this->{LASTLINE} =~ /^>/;
  if ($this->{LASTLINE} =~ /^Parameters|^\s+Database:|^Matrix:/) {
    $this->{REPORT_DONE} = 1;
    return 1;
  }
  my $FH = $this->{FH};
  while (<$FH>) {
    die if /^BLAST/;
    if ($_ =~ /^>/) {
      $this->{LASTLINE} = $_;
      return 1;
    } elsif ($_ =~ /^Parameters|^\s+Database:|^Matrix:/) {
      $this->{LASTLINE} = $_;
      $this->{REPORT_DONE} = 1;
      return 1;
    }
  }
  warn "Possible parse error in _fastForward in BPlite.pm\n";
}
sub _overload {
  my ($this) = @_;
  return $this->{QUERY} . " vs. " . $this->{DATABASE};
}

###############################################################################
# BPlite::Sbjct
###############################################################################
package BPlite::Sbjct;
use overload '""' => 'name';
sub new {
  my $sbjct = bless {};
  ($sbjct->{NAME}, $sbjct->{LENGTH}, $sbjct->{FH},$sbjct->{LASTLINE},
   $sbjct->{PARENT}) = @_;
  $sbjct->{HSP_ALL_PARSED} = 0;
  return $sbjct;
}
sub name {shift->{NAME}}
sub length {shift->{LENGTH}}
sub nextHSP {
  my ($sbjct) = @_;
  return 0 if $sbjct->{HSP_ALL_PARSED};
	
  ############################
  # get and parse scorelines #
  ############################
  my $scoreline = $sbjct->{LASTLINE};
  my $FH = $sbjct->{FH};
  if (not $scoreline =~ /Score =/) {
    while (<$FH>) {
      if (/Score =/) {
	$scoreline = $_;
	last;
      }
    }
  }
  my $nextline = <$FH>;
  return undef if not defined $nextline;
  $scoreline .= $nextline;
  my ($score, $bits);
  if ($scoreline =~ /\d bits\)/) {
    ($score, $bits) = $scoreline =~
      /Score = (\d+) \((\S+) bits\)/; # WU-BLAST
  } else {
    ($bits, $score) = $scoreline =~
      /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST
  }
  my ($match, $length) = $scoreline =~ /Identities = (\d+)\/(\d+)/;
  my ($positive) = $scoreline =~ /Positives = (\d+)/;
  $positive = $match if not defined $positive;
  my ($p)       = $scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/;
  if (not defined $p) {
    ($p) = $scoreline =~ /Expect\S*\s+=\s+(\S+)/; # thanks Keith Allen
  }
  $p =~ s/,//g;
  $p =~ /^e/ and $p = "1$p";
	
  die "parse error $scoreline\n" if not defined $score;

  #######################
  # get alignment lines #
  #######################
  my @hspline;
  my ($qstrand, $sstrand) = ("+", "+");
  while (<$FH>) {
    die if /^BLAST/;
    if ($_ =~ /^WARNING:|^NOTE:|^ERROR:|^FATAL:/) {
      while (<$FH>) {
	last if $_ !~ /\S/;
      }
    } elsif ($_ !~ /\S/) {
      next;
    } elsif ($_ =~ /^\s*Score/) {
      $sbjct->{LASTLINE} = $_; last;
    } elsif ($_ =~ /Strand HSP/) {
      next;
    }				# not capturing
    elsif ($_ =~ /^\s*Frame/) {
      next;
    }				# not capturing
    elsif ($_ =~ /^\s*Links/) {
      next;
    }				# not capturing
    #		elsif ($_ =~ /^\s*Strand/)    {next} # not capturing
    elsif ($_ =~ /^\s*Strand = (\S+) \/ (\S+)/) {
      $qstrand = $1;
      $sstrand = $2;
      $qstrand =~ /Plus/i and $qstrand = '+';
      $qstrand =~ /Minus/i and $qstrand = '-';
      $sstrand =~ /Plus/i and $sstrand = '+';
      $sstrand =~ /Minus/i and $sstrand = '-';
    }				# not capturing
    elsif ($_ =~ /^EXIT CODE/) {
      last;
    } elsif ($_ =~ /^>|^Parameters|^\s+Database:|^Matrix:/) {
      $sbjct->{LASTLINE} = $_;
      $sbjct->{PARENT}->{LASTLINE} = $_;
      $sbjct->{HSP_ALL_PARSED} = 1;
      last;
    }
    #		elsif ($_ =~ /^Query:/) {
    elsif ($_ =~ /^Query:?/) {	# changed by Danny Rice June 2005
      push @hspline, $_;	# store the query line
      my $l1 = <$FH>;		# either alignment line or sbjct line
      if ($l1 =~ /^Sbjct/) {
	push @hspline, "";	# dummy line, this is a -noseq option
	push @hspline, $l1;	# so store a fake alignment and real sbjct
	next;
      }
      push @hspline, $l1;	# grab/store the alignment line
      my $l2 = <$FH>; push @hspline, $l2; # grab/store the sbjct line
    }
  }
	
  #########################
  # parse alignment lines #
  #########################
  my ($ql, $sl, $as) = ("", "", "");
  my ($qb, $qe, $sb, $se) = (0,0,0,0);
  my (@QL, @SL, @AS);		# for better memory management
			
  for (my $i=0;$i<@hspline;$i+=3) {
    #warn $hspline[$i], $hspline[$i+2];
    #		$hspline[$i]   =~ /^Query:\s+(\d+)\s*(\S+)\s+(\d+)/;
    $hspline[$i]   =~ /^Query:?\s+(\d+)\s*(\S+)\s+(\d+)/; # changed by Danny Rice June 2005
    return 0 unless defined $3;
    $ql = $2; $qb = $1 unless $qb; $qe = $3;
		
    my $offset = index($hspline[$i], $ql);
    $as = substr($hspline[$i+1], $offset, CORE::length($ql))
      if $hspline[$i+1];
		
    $hspline[$i+2] =~ /^Sbjct:?\s+(\d+)\s*(\S+)\s+(\d+)/;
    return 0 unless defined $3;
    $sl = $2; $sb = $1 unless $sb; $se = $3;
		
    push @QL, $ql; push @SL, $sl; push @AS, $as;
  }

  ##################
  # the HSP object #
  ##################
  $ql = join("", @QL);
  $sl = join("", @SL);
  $as = join("", @AS);
  my $qgaps = $ql =~ tr/-/-/;
  my $sgaps = $sl =~ tr/-/-/;
  my $hsp = BPlite::HSP::new( $score, $bits, $match, $positive, $length, $p,
			      #		$qb, $qe, $sb, $se, $ql, $sl, $as, $qgaps, $sgaps);
			      $qb, $qe, $sb, $se, $ql, $sl, $as, $qgaps, $sgaps, $qstrand, $sstrand);
  return $hsp;
}

###############################################################################
# BPlite::HSP
###############################################################################
package BPlite::HSP;
use overload '""' => '_overload';
sub new {
  my $hsp = bless {};
  ($hsp->{SCORE}, $hsp->{BITS},
   $hsp->{MATCH}, $hsp->{POSITIVE}, $hsp->{LENGTH},$hsp->{P},
   $hsp->{QB}, $hsp->{QE}, $hsp->{SB}, $hsp->{SE},
   #		$hsp->{QL}, $hsp->{SL}, $hsp->{AS}, $hsp->{QG}, $hsp->{SG}) = @_;
   $hsp->{QL}, $hsp->{SL}, $hsp->{AS}, $hsp->{QG}, $hsp->{SG}, $hsp->{QSTRAND}, $hsp->{SSTRAND}) = @_;
  $hsp->{PERCENT} = int(1000 * $hsp->{MATCH}/$hsp->{LENGTH})/10;
  return $hsp;
}
sub _overload {
  my $hsp = shift;
  return $hsp->queryBegin."..".$hsp->queryEnd." ".$hsp->bits;
}
sub score           {shift->{SCORE}}
sub bits            {shift->{BITS}}
sub percent         {shift->{PERCENT}}
sub match           {shift->{MATCH}}
sub positive        {shift->{POSITIVE}}
sub length          {shift->{LENGTH}}
sub P               {shift->{P}}
sub queryBegin      {shift->{QB}}
sub queryEnd        {shift->{QE}}
sub sbjctBegin      {shift->{SB}}
sub sbjctEnd        {shift->{SE}}
sub queryAlignment  {shift->{QL}}
sub sbjctAlignment  {shift->{SL}}
sub alignmentString {shift->{AS}}
sub queryGaps       {shift->{QG}}
sub sbjctGaps       {shift->{SG}}
sub qb              {shift->{QB}}
sub qe              {shift->{QE}}
sub sb              {shift->{SB}}
sub se              {shift->{SE}}
sub qa              {shift->{QL}}
sub sa              {shift->{SL}}
sub as              {shift->{AS}}
sub qg              {shift->{QG}}
sub sg              {shift->{SG}}
sub qs              {shift->{QSTRAND}}
sub ss              {shift->{SSTRAND}}

###############################################################################
# BPlite::Multi
###############################################################################
package BPlite::Multi;
sub new {
  my ($class, $fh) = @_;
  if (ref $fh !~ /GLOB/) {
    die "BPlite error: new expects a GLOB reference not $fh\n";
  }
  my $this = bless {};
  $this->{FH} = $fh;
  return $this;
}
sub nextReport {
  my ($this) = @_;
  my $FH = $this->{FH};
  my $t = $FH;
  while (<$FH>) {
    if (/^T?BLAST[NPX]/) {
      last;
    } elsif (/^Query=/) {
      seek $this->{FH}, $t, "SEEK_SET";
      last;
    }
    $t = tell $FH;
  }
  return 0 if not defined $_;
  # Passing database from previous Report cause NCBI multi format only prints database once from netblast search with mutiple queries.
  my $blast = new BPlite($this->{FH}, $this);
  return $blast;
}

1;

}
