#!/usr/bin/perl -w


###################     REannotate     ###################		

# author:   Vini Pereira
# email :   vini_moll@rocketmail.com

# REannotate.   Automated re-annotation and evolutionary analysis of repetitive DNA.
# Copyright (C) 2003-2007 Vini Pereira.


#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


###################  version 26.11.2007 ################
#
# Re-annotates RepeatMasker annotation for complex repeats,
# in order to infer insertion events, i.e. defragment multiple hits
# that originated in the same insertion event.
# If required it writes the sequence of identified, defragmented
# elements to disk in two formats, the raw genomic sequence and
# a gapped sequence of regions matching, and aligned to, a library sequence
# (used in the RepeatMasker search).
#
# For LTR-elements the full element plus LTR and internal region sequences
# are saved in separate files (fasta format). If required intra-element
# LTR pairs can be automatedly aligned using clustalW, and the
# number of nucleotide substitutions between them estimated, so that
# the element age of insertion can also be estimated.
#
# The script also resolves nesting patterns of complex repeat elements.
#
# Obligatory command line argument:
# RepeatMasker annotation (.out) file.
# Second optional argument is a FASTA  file
# contaning second the query sequence(s) that were annotated by RepeatMasker
# and reported in its annotatation file (the first argument). The latter
# argument is necessary if sequences of elements identified by REannotate
# are to be written to disk, and sequence analyses of LTR-elements performed.

use strict;
use Getopt::Long;
use File::Basename;
use English;
use FileHandle;


my %parameter = ( fragmentSearchRange  => 40000,  	      # Max distance in base pairs considered by the LTR pairing algorithm
		  # maxIndel           => 15000,  	      # Max tolerated LTR fragment overlap/gap in base pairs
		  boundaryTolerance    => 40,      	      # Discrepancy (in num of bases) tolerated when dealing sequence boundaries
		  minDistanceToSoloLTR => 15000,	      # Minimum distance between an unpaired LTR and the nearest internal region 
		                                                #     of same family and orientation necessary for such an unpaired LTR to be
		                                                #     classified as a 'solo' LTR
		  doAlign              => 0,                  # Align intra-element LTR pairs (set if alignment -k option is TRUE)
		  pathToClustalW       => 0,                  # Full path to clustalW (set if "alignment -c" option is TRUE)) 
		  arabidopsis          => 0,                  # TRUE if query sequences come from Arabidopsis thaliana, FALSE otherwise 
		                                                #     (optional)
		  chromo               => 0,                  # Chromosome number will be set if "arabidopsis -a" option is TRUE
		  rateOfEvolution      => 0,                  # Rate of nucleotide substitution in substitutions per site per million years
		                                                #     (set if "kalign -k" option is TRUE)
		  outDir               => "REannotate_output",  # Basename of directory within the directory from which this script is run,
		                                                #     where all output of REannotate will be written
		  LTRoutDir            => "LTR_elements",       # Basename of directory within the REannotate output directory
		                                                #     where output LTR element sequences will be stored
		  otherOutDir          => "non-LTR_elements",   # Basename of directory within the REannotate output directory
		                                                #     where output non-LTR element sequences will be stored
		  completeDir          => "complete",           # Basename of dir within LTRoutDir where sequences of complete elements 
		                                                #     will be stored
		  LTRdir               => "LTRs",               # Basename of directory within completeDir where intra-element LTR pair 
		                                                #     sequences will be stored
		  INTdir               => "internal",           # Basename of directory within completeDir where internal region sequences 
		                                                #     will be stored
		  fullDir              => "full",               # Basename of directory within completeDir where full sequences will be stored
		  soloDir              => "solo",               # Basename of dir within LTRoutDir where solo LTR sequences will be stored
		  truncatedDir         => "truncated",          # Basename of directory within LTRoutDir where truncated element sequences 
                                                                #     will be stored
		  alignDir             => "alignments",         # Basename of direcotry within LTRdir where intra-element LTR pair 
		                                                #     alignments would be stored
		  isFASTAinput         => 1,                    # This flag stays TRUE if input sequence file is in fasta format
		  noSeqOutput          => 0,                    # This flag stays FALSE unless user supresses sequence output at command line
		  maxRecursion         => 6,                    # Max number of recursive calls for sub 'defragmentation' when assembling 
		                                                #     multiple hits associated with each element
		  repeatClassColumn    => 1,                    # TRUE if the input RepeatMasker annotation file has a 'repeat class' column
		  fuzzynames           => [],                   # this array ref will store arrays of equivalent lib seq names if option -f
		  atomicfuzzynames     => [],                   # this array ref will store an atomic array with all the names in equivalent lib seq names if option -f
		  truncatedNesting     => 0,                    # resolve truncated nesting if TRUE (i.e. if set with the -t option)
		  inputError           => "",                   # Will store error messages related to improper input data
		  UCSCinput            => 0,                    # Will indicate whether input annotation is from UCSC Genome Browser table
		  ungapped             => 0                     # If option "u|ungapped" is used this will be set to TRUE and ungapped sequences output
		);

my ($script) = ($0 =~ m|([^/]*)$|);
my $version = "26.11.2007";
my $USAGE = "\nUSAGE:


(perl) ./REannotate [-OPTIONS] <RepeatMasker annotation file> [<sequence file>]


options:

-h  (-help)
   : Displays command line options

-v  (-version)
   : Displays version of REannotate

-n  (-noseq)
   : This option suppresses all sequence output, so that
    REannotate will only generate annotation.
    If this option is used no sequence input is required.

-u  (-ungapped)
   : This option outputs the chromosomal sequence spanned
    by a defragmented element (from the first to the last
    fragment), in addition to the default gapped element
    sequence that is aligned to a reference element.

-k  (-kalign)   <path to clustalW>
   : Aligns intra-element LTR pairs using the clustalW program and
	  estimates K (the number of nucleotide substitutions per site
	  between intra-element LTRs) using the Kimura 2-parameter model,
    for each 'complete' element found.
    Required argument is the full path to the clustalW programme.

-r  (-rate)     <rate of evolution>
	: Estimates the time elapsed since the insertion of structurally
	  'complete' LTR-elements, if the -k option is being used.
    Required argument is the rate of evolution
	  in number of substitutions per site per million years.

-g  (-gff)
  : Outputs extra annotation in GFF format (e.g. for
   visualisation in the Apollo Genome Browser).

-f  (-fuzzy)  <filename>
   : Allows defragmentation of chromosomal repeat fragments that match
    different reference elements. The required argument is the
    (path to) the filename of a text file containing lists of
    'related' (equivalent) reference sequence names.

-t  (-tnest)
  : Allows 'truncated nesting' to be annotated,
   i.e. elements that interrupt one terminus of another
   element (but which are not contained within the latter) will be annotated
   as nested.

-d  (-drange)  <distance in nucleotides>
	: Sets the 'search range' for defragmentation, the maximum distance
    (in number of nucleotides) that is allowed between two repeat fragments
    to be considered as a candidate fragments of the same element.
    Required argument is the search range in nucleotides.
	  Default = 40000.

-b  (-boundary) <distance in nucleotides>
	: Sets the maximum tolerated overlap
    (in number of nucleotides along a reference element sequence)
    between chromosomal matches to the same reference element
    to be considered as fragments of the same chromosomal element.
    Required argument is the alignment overlap in nucleotides along
    a reference sequence.
	  Default = 40.

-s  (-solo)  <distance in nucleotides>
	: Sets the minimum distance required (in number of nucleotides)
	  between a given LTR and any other LTR or internal region
	  (belonging to the same family and in the same orientation on the
	  host chromosome) for the given LTR to be considered as a
	  'solo' LTR. Required argument is the distance in nucleotides.
    Default = 15000.

-m  (-stream)
  : Outputs re-annotation to STDOUT
    (all messages saved in file
    <REannotate_output/re.messages>)

-l  (-long)
  : Outputs ASCII representation of the order and
   nesting of defragmented elements along a query
   sequence. (NOT RECOMMENDED FOR LARGE QUERIES!)

-c  (-class)
  : Outputs re-annotation to four separate files
   (instead of the one file by default), one file for
   each class: non-LTR-elements, complete LTR-elements,
   truncated LTR-elements, and solo LTRs.
";
#-a  (-arabidopsis)  <chromosome number>
#	: RepeatMasked queries assumed to come from an A. thaliana
#	  chromosome, extra information (distance from centromere,
#	  superfamily) is generated.

#options list
my ($opt_m,$opt_h,$opt_v,$opt_k,$opt_r,$opt_a,$opt_d,$opt_i,$opt_s,$opt_b,$opt_n,$opt_f,$opt_t,$opt_g,$opt_l,$opt_c,$opt_u)=(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);

# get the supplied command line options, and set flags
&GetOptions('m|stream!'=>\$opt_m,'h|help!'=>\$opt_h,'v|version!'=>\$opt_v,'k|kalign=s'=>\$opt_k,'r|rate=f'=>\$opt_r,'a|arabidopsis=i'=>\$opt_a,'d|drange=i'=>\$opt_d,'i|indelmax=i'=>\$opt_i,'s|solo=i'=>\$opt_s,'b|boundary=i'=>\$opt_b,'n|noseq!'=>\$opt_n,'f|fuzzy=s'=>\$opt_f,'t|tnest!'=>\$opt_t,'g|gff!'=>\$opt_g,'l|long!'=>\$opt_l,'c|class!'=>\$opt_c,'u|ungapped!'=>\$opt_u);
my $MESSAGES = *STDOUT; # file handle for messages

my $outPath=$ENV{'PWD'}."/";	        # set output path to current directory
# check whether the output directory already exists:
# if not create it, if it does rename existing structure before creating new one
open (TIME, "date +%T-%F |");
my $timeStamp = <TIME>;
chomp $timeStamp;
close (TIME);

if (-e $outPath.$parameter{outDir})
{
  # output directory exists from a previous run
  # add time and date stamp when renaming previous output directory
  system("mv ".$parameter{outDir}." ".$outPath."previous_".$parameter{outDir}."_renamed_".$timeStamp);
}

$outPath =~ s/\/$//;  # remove trailing '/'
my $outDirPath = createDirectory($outPath, $parameter{outDir});


if ($#ARGV == -1 && !($opt_h|$opt_v))
  {
  die messageOUT("Required command line argument: <RepeatMasker .out file>; optionally:  <query sequence file>\n\n".$USAGE);
  }
if ($opt_h)
  {
    print messageOUT($USAGE);
    exit;
  }
if ($opt_v)
  {
    print messageOUT("version $version");
    exit;
  }
if ($opt_m)
  { # stream mode
  $MESSAGES = *MESSAGES; # handle for file where messages will be stored if in stream mode
  my $messagesFile = ">".$outDirPath."/re.messages";
  open (MESSAGES, $messagesFile) || die "can't open file: $!";
}
if ($opt_n)
{
  print $MESSAGES messageOUT("No sequence output is being generated on this run");
  $parameter{"noSeqOutput"} = 1;
}
else
{
  unless ($ARGV[1])
    {
#      print $MESSAGES messageOUT($USAGE);
      print $MESSAGES messageOUT("Query sequence(s) file is required as last command line argument\n".
				 "in order to generate sequence output.\n".
				 "Use option -n (-noseq) if sequence output is not required."
				);
      exit;
    }
}
if ($opt_k) {
        if ($opt_n)
         {
           print messageOUT("option -k (-kalign) cannot be used with no sequence output option -n (-noseq)");
           exit;
         }
        unless ( $opt_k =~ /clustal/i )
         {
           print messageOUT("option -k (-kalign) requires full path to ClustalW");
           exit;
         }
	 unless ( -e $opt_k )
         {
           print messageOUT("file $opt_k does not exist!\n...exiting now...");
           exit;
         }
	print $MESSAGES messageOUT("intra-element LTR pairs will be aligned using ClustalW");
	$parameter{"doAlign"}=1;
	$parameter{"pathToClustalW"} = $opt_k;
}
else { print $MESSAGES messageOUT("intra-element LTR pair alignments and \nK (number of substitutions per site) estimates \nare NOT being generated on this run"); }
if ($opt_r) {
        unless ($opt_k) {
		       print messageOUT("Option -r (-rate) only relevant when option -k (-kalign) is used");
		       die messageOUT($USAGE);
		      }
	$parameter{"rateOfEvolution"} = $opt_r;
}
if ($opt_a) {
        $parameter{"arabidopsis"} = 1;
	$parameter{"chromo"} = $opt_a;
}
if ($opt_d) {
        $parameter{"fragmentSearchRange"} = $opt_d;
}
if ($opt_i) {
        $parameter{"maxIndel"} = $opt_i;
}
if ($opt_s) {
        $parameter{"minDistanceToSoloLTR"} = $opt_s;
}
if ($opt_b) {
        $parameter{"boundaryTolerance"} = $opt_b;
}
if ($opt_u) {
        $parameter{"ungapped"} = 1;
}
if ($opt_f)
  {
    getFuzzyNames($opt_f, \%parameter, $MESSAGES);
  }
if ($opt_t)
{
  print $MESSAGES messageOUT("'truncated nesting' algorithm is on:\nan element interrupting one end of,\nbut not contained within, another element\nwill be considered 'nested'");
  $parameter{truncatedNesting} = 1;
}


# get command line arguments
my $chr = "";		                # chromosome number
my $chromoID = "";		        # chromosome sequence ID
my $RM_file = $ARGV[0];			# input file 1 (RepeatMasker output)
my $DNAfile = $ARGV[1];	                # input file 2 (MIPS chromosome sequence file)

# append current directory path to input file only if necessary
$RM_file = $outPath."/".$RM_file if ($RM_file!~/^\//);
if ($DNAfile)
{
  $DNAfile = $outPath."/".$DNAfile if ($DNAfile!~/^\//);
}

my $SEQ = *SEQ;		# DNA sequence file handle
my $RepeatMaskerAnnotationFile = *RepeatMaskerAnnotationFile; 	# input file handle (for the RepeatMasker output file)
my $PAIRS = *PAIRS;     # output file handle for saving annotation on paired LTRs
my $TRUNC = *TRUNC;     # output file handle for saving annotation on truncated elements
my $SOLO = *SOLO;       # output file handle for saving annotation on solo LTRs
my $OTHER = *OTHER;     # output file handle for saving annotation on non-LTR elements
my $ALL = *ALL;         # output file handle for saving annotation on all types of elements together
my $RECORD = *RECORD;  	# handle $LTRrecordFile
my $GFF = *GFF;         # handle for GFF output file
my $GFFplus = *GFFp;

my $LTRdir;
if ($opt_c || !$parameter{noSeqOutput}) {

  my $LTRfilePath = createDirectory($outDirPath, $parameter{LTRoutDir});
  my $otherFilePath = createDirectory($outDirPath, $parameter{otherOutDir});

  unless ( $parameter{"noSeqOutput"} )
    {
      createDirectory($LTRfilePath, $parameter{"soloDir"});
      my $truncatedDirectory = createDirectory($LTRfilePath, $parameter{"truncatedDir"});
      createDirectory($truncatedDirectory, $parameter{"LTRdir"});
      createDirectory($truncatedDirectory,  $parameter{"INTdir"});
      createDirectory($truncatedDirectory,  $parameter{"fullDir"});
      my $pairDirectory = createDirectory($LTRfilePath, $parameter{"completeDir"});
      $LTRdir = createDirectory($pairDirectory, $parameter{"LTRdir"});
      createDirectory($pairDirectory,  $parameter{"INTdir"});
      createDirectory($pairDirectory,  $parameter{"fullDir"});
      createDirectory($LTRdir, $parameter{"alignDir"}) if $parameter{doAlign};
    }

  if($opt_c) {

    my $pairs_outfile = ">".$LTRfilePath."/complete.REannotation";	# will store annotation on paired LTRs (complete elements)
    my $solos_outfile = ">".$LTRfilePath."/solos.REannotation";		# will store annotation on solo LTRs
    my $truncated_outfile = ">".$LTRfilePath."/truncated.REannotation";	# will store annotation on truncated elements
    my $otherOutfile = ">".$otherFilePath."/nonLTR.REannotation";         # will store annotation on non-LTR elements

    open (PAIRS, $pairs_outfile) || die "can't open file: $!";
    open (SOLO, $solos_outfile) || die "can't open file: $!";
    open (TRUNC, $truncated_outfile) || die "can't open file: $!";
    open (OTHER, $otherOutfile) || die "can't open file: $!";

  }

}

my $rm_filename = basename($RM_file,".out");
my $allOutfile = ">".$outDirPath."/".$rm_filename.".REannotation";           # will store annotation on all types of elements
my $recordFile = ">".$outDirPath."/REannotate.summary";

open (SEQ, "<".$DNAfile) || die "can't open file: $!" unless ( $parameter{noSeqOutput} );
open (RepeatMaskerAnnotationFile, "<".$RM_file) || die "can't open input file $RM_file: $!";
open (ALL, $allOutfile) || die "can't open file: $!";
open (RECORD, $recordFile) || die "can't open file: $!";


# If sequence output is required,
# read in (multiple) sequence(s) from fasta file, or entire chromosome sequence from MIPS file:
my %FASTAentries;  # this will store fastaIDs (keys)/ sequences (values)
unless ( $parameter{noSeqOutput} )
  {
    unless ( getFASTAseqs($SEQ, \%FASTAentries, $MESSAGES) )
      {
	$parameter{"isFASTAinput"} = 0;
	# Sequence file not in fasta format.
	# Assume it is a MIPS chromosome file:
	close(SEQ) || die "can't close file: $!";
	# count the number of lines in the MIPS file (assuming line terminator is "\n")
	my $numLines = 0;
	open (SEQ, $DNAfile) || die "can't open file: $!";
	$numLines += tr/\n/\n/ while sysread($SEQ, $_, 2 ** 16);
	# close and re-open file handle
	close (SEQ) || die "can't close file: $!";
	open (SEQ, $DNAfile) || die "can't open file: $!";
	# get chromosome sequence
	my ($chromosomeSequence,@coordRange) = getSequenceFromMIPSchromo($SEQ, $numLines);
	$FASTAentries{"chr".$parameter{"chromo"}} = $chromosomeSequence;
      }
  }


# parse RepeatMasker (RM) annotation output to create records of all hits to complex repeats
my @record;  # Will hold the record of each element in the RepeatMasker output file ;
	     # a record typically holds information on (a fragment of) the LTR or internal region of
	     # an LTR retrotransposon. The 11 record fields are:
	     # "score" (alignment score to ref sequence), "divergenceFromRef",
	     # "chromoStart" (chromosome coordinate of the start of the match to ref seq),
	     # "chromoEnd", "orientation" (strand orientation), "refName" (name of matching
	     # ref seq), "refStart" (start coord of ref seq match), "refEnd",
	     # "leftAfterRefEnd" (no.bases left in ref seq after match), "query", "superfamily", "id", and "index" (, "distCen").
@record = getRecordsfromRMoutput( $RepeatMaskerAnnotationFile, \%parameter, \%FASTAentries, $MESSAGES );
# (further fields will be generated if alingment option is chosen)
print $MESSAGES messageOUT("'Repeat Class/Family field is missing for some or all of the records\nin the RepeatMasker annotation file")
  unless ($parameter{repeatClassColumn});

close(SEQ) || die "can't close file: $!" unless ( $parameter{noSeqOutput} );
close (RepeatMaskerAnnotationFile) || die "can't close file: $!";


# Info on each LTR pair is output to file $query.LTRpairs on a separate line. The sequence of values on each line corresponds to:
# pair id,chromosome#,family name(s),div from ref 1,div from ref 2,start1,end1,start2,end2,hits#1,hits#2,hitsIR,
# numLTRfrags1,-2,numInternalFrags, orientation,K,K.sd,timeK,timeK.sd,numSites,T,V,indels,T/V.
# output header line containing field names

# select filehandle + format, and output header lines of annotation files
my $annotationFORMAT;
if ($parameter{"doAlign"})
  {
    $annotationFORMAT = ($parameter{"arabidopsis"}) ? "FIELD_NAMES_optAK" : "FIELD_NAMES_optK";
  }
else
  {
    $annotationFORMAT = ($parameter{"arabidopsis"}) ? "FIELD_NAMES_optA" : "FIELD_NAMES";
  }
if ($opt_c) {
  select(PAIRS);
  $FORMAT_NAME = $annotationFORMAT;
  write;
  select(OTHER);
  $FORMAT_NAME = $annotationFORMAT;
  write;
  select(TRUNC);
  $FORMAT_NAME = $annotationFORMAT;
  write;
  select(SOLO);
  $FORMAT_NAME = $annotationFORMAT;
  write;
}
select(ALL);
$FORMAT_NAME = $annotationFORMAT;
write;
select(STDOUT);


################   ANALYSE SEQUENCES AND RE-ANNOTATE REPEATS
my ($annotationByQuery_,$annotHashTableByQuery_,$sortedTermini_) = annotateElements(\%FASTAentries, $outDirPath."/", \@record, \%parameter, $MESSAGES);
################


# empty sequence variable and clean up
%FASTAentries = ();
if ($parameter{doAlign})
  {
    my $clustalExecutable = basename($parameter{pathToClustalW});
    system("rm -f $LTRdir/$clustalExecutable");
  }

# load RepeatMasker annotation file to re-annotate element IDs
open (RepeatMaskerAnnotationFile, "<".$RM_file) || die "can't open file $RM_file: $! \n";
my @RM_lines;
push @RM_lines, $_ while (<RepeatMaskerAnnotationFile>);
close (RepeatMaskerAnnotationFile) || die "can't close file $RM_file: $! \n";

# establish whether the RepeatMasker annotation file has an ID column
my $IDline = 0;
$IDline++ while ( $IDline < scalar(@RM_lines) -1 && $RM_lines[$IDline] !~ /left/i );
my $RMid = ( $RM_lines[$IDline] =~ /\sID\s/i );

# open file for re-annotated RepeatMasker output
open (RepeatMaskerAnnotationFile, ">".$outDirPath."/".$rm_filename.".REannotated") || print $MESSAGES "can't open file ".($outDirPath."/".(basename($RM_file,"")).".REannotated").": $! \n";

# WRITE ANNOTATION TO FILES (optionally including GFF output)
print $MESSAGES messageOUT("...outputting annotation...");
my @annotation;
# $annotationByQuery_ is a ref to a hash whose keys are the query seq names found in the RM annotation,
# the hash element value for each query is a ref to an array of annotation records for each identified element in that query
my $numNestedElements = 0;
my $lowComplexityCounter = 0;
foreach my $query (sort keys %{$annotationByQuery_})
  {
    if($opt_g) {
      # output GFF header line
      open (GFF, ">".$outDirPath."/".$query.".REannotate.gff") || die "can't open file ".($outDirPath."/".$query.".REannotate.gff").": $! \n";
      open (GFFp, ">".$outDirPath."/+".$query.".REannotate.gff") || die "can't open file ".($outDirPath."/+".$query.".REannotate.gff").": $! \n";
      foreach my $FILE ($GFF, $GFFplus)
	{
	  print $FILE "##gff-version 2 \n##date $timeStamp \n";
	  print $FILE "##sequence $query ".( ($parameter{noSeqOutput})? "" : "(in file $DNAfile)" )."\n";
	  print $FILE "## GFF file generated by REannotate. If this is to be viewed in Apollo append file <REannotate.tiers> to your tiers file.\n";
	}
    }
    foreach my $element_ (@{$annotationByQuery_->{$query}})
      {
	@annotation = getAnnotationRecord($element_, \%parameter);

	my $id = $element_->{id};
	my $nestingLevel = $element_->{nest};
	$numNestedElements++ if ( $nestingLevel );  # count nested elements

	# output detailed annotation
	if(!$opt_m)
	  {
	    if(!$opt_c) {
	      select(ALL)
	    } else {
	      select(PAIRS) if ( $id =~ /i|u/ );
	      select(TRUNC) if ( $id =~ /t/ );
	      select(SOLO) if ( $id =~ /s/ && $id !~ /t/);
	      select(OTHER) if ( $id =~ /n/ );
	    }
	  }
	if ($parameter{"doAlign"})
	  {
	    $FORMAT_NAME = ($parameter{"arabidopsis"}) ? "FIELD_VALUES_optAK" : "FIELD_VALUES_optK";
	  }
	else
	  {
	    $FORMAT_NAME = ($parameter{"arabidopsis"}) ? "FIELD_VALUES_optA" : "FIELD_VALUES";
	  }
	write;
	select(STDOUT);

	# Output GFF and re-annotate RepeatMasker element IDs
	my $elementLineNumbers = "";
	if ( !$element_->{DNArearrangement} || $element_->{DNArearrangement} eq "NA" || $element_->{DNArearrangement} =~ /\*/ )
	  {
	    $elementLineNumbers .= $element_->{hits1}."-" unless (!($element_->{hits1}) || $element_->{hits1} eq "NA");
	    $elementLineNumbers .= $element_->{hitsI}."-" unless (!($element_->{hitsI}) || $element_->{hitsI} eq "NA");
	    $elementLineNumbers .= $element_->{hits2}."-" unless (!($element_->{hits2}) || $element_->{hits2} eq "NA");
	  }
	else
	  {
	    $elementLineNumbers = $element_->{DNArearrangement};
	  }
	    $elementLineNumbers =~ s/^(.+)-+$/$1/;  # remove trailing hyphen(s) from series
	my @elementLineNumbers = split("-",$elementLineNumbers);

	foreach my $line (@elementLineNumbers)
	  {
	    if($opt_g) {
	      # output GFF
	      outputGFF($GFF, $element_, $line, \@record, \%parameter, 0);
	      outputGFF($GFFplus, $element_, $line, \@record, \%parameter, 1);
	    }
	    # re-annotate IDs in the RepeatMasker annotation
	    if ($RMid)
	      {
		    $RM_lines[$line-1] =~ s/\d+((\s|\*)+)$/ $id$2/;
	      }
	    else
	      {
		$RM_lines[$line-1] =~ s/(\d|\))((\s|\*)+)$/$1   $id$3/;
	      }
	  }
      }
    if($opt_g) 
      {
      # output GFF for the unmasked sequence and simple/low complexity repeats
      my $unmaskedStart = 1;
      my $unmaskedEnd;
      my $unmaskedSeq;
      for (my $recNum = 0; $recNum<scalar(@record); $recNum++)
	{
	  $unmaskedEnd = $record[$recNum]->{chromoStart} - 1;
	  if ( $unmaskedEnd - $unmaskedStart > 0 )
	    {
	      # output GFF for unmasked sequence between current and previous repeats
	      $unmaskedSeq = $query."_".$unmaskedStart."-".$unmaskedEnd;
	      print $GFF "$unmaskedSeq\tun-RepeatMasked_sequence\tsimilarity\t$unmaskedStart\t$unmaskedEnd\t0\t+\t.\t$unmaskedSeq\t$unmaskedStart\t$unmaskedEnd\n";
	      print $GFFplus "$unmaskedSeq\tun-RepeatMasked_sequence\tsimilarity\t$unmaskedStart\t$unmaskedEnd\t0\t+\t.\t$unmaskedSeq\t$unmaskedStart\t$unmaskedEnd\n";
	    }
	  $unmaskedStart = $record[$recNum]->{chromoEnd} + 1;
	  # output GFF if current record refers to a simple or low complexity repeat
	  if ( $record[$recNum]->{superfamily} =~ /(Low_)|(Simple_)/i )
	    {
	      $lowComplexityCounter++;
	      outputGFF($GFF, $record[$recNum], $recNum+$parameter{RMheaderLines}+1, \@record, \%parameter, 0);
	      outputGFF($GFFplus, $record[$recNum], $recNum+$parameter{RMheaderLines}+1, \@record, \%parameter, 1);
	    }
	}
      close (GFF) || die "can't close file: $!";
    }
  }


# write re-annotated RepeatMasker output
$RM_lines[$IDline] =~ s/(\(left\))(\s*)$/ $1   ID$2/;  # include 'ID' (column) in header if it's not there
foreach my $line (@RM_lines)
  {
    # remove old IDs if present
    $line =~ s/(\s+)\d+((\s|\*)+)$/$1$3/ if ($RMid);
    print RepeatMaskerAnnotationFile $line;
  }


# OUTPUT NESTING info IF option 'l|long' has been chosen
my $nestingInfo = "";
if($opt_l)
  {
    foreach my $query (sort keys %{$annotationByQuery_})
      {
	$nestingInfo .= "\##### query  $query :\n\n";
	$nestingInfo .= sprintf("%-10s  %-12s  %-36s \n\n","end coords","family","IDs");
	my $numQueryElements = scalar(@{$annotationByQuery_->{$query}});
	foreach my $repeatTerminus_ (@{$sortedTermini_})
	  { # repeat termini in $sortedTermini are in ascending query sequence coordinate order
	    if ($repeatTerminus_->{query} eq $query)
	      {
		my $repeatID = $repeatTerminus_->{id};
		my $repeatIndex = $annotHashTableByQuery_->{$query}->{$repeatID};
		my $nest = $annotationByQuery_->{$query}->[$repeatIndex]->{nestIDs}."-";
		$nest = "" if ( $nest =~ m|NA| );
		$nestingInfo .= sprintf("%-10d  %-12s  %-36s\n",
					$repeatTerminus_->{coord},
					$annotationByQuery_->{$query}->[$repeatIndex]->{family},
					$nest.$repeatID
				       );
	      }
	  }
	$nestingInfo .= "\n\n";
      }
    #print $MESSAGES messageOUT($nestingInfo);
  }

# OUTPUT SUMMARY
print $RECORD "\t\t\tREannotate summary ( run started at $timeStamp ):\n\n\n";
my $numNonLTRelements;
my $numDNAtransposons;
my $numNonLTRretrotransposons;
my $numSatellites;
my $numRollingCircles;
my $numLTRelements;
my $numTruncatedLTRelements;
my $numCompleteLTRelements;
my $numSoloLTRelements;
my $summary;
my $recordSummary;
my $numNestedElementsInQuery;
if ( $parameter{repeatClassColumn} )
  {
    foreach my $query ( sort keys %{$annotationByQuery_} )
      {
	$numNonLTRelements = grep { $_->{id} =~ /n/ } @{$annotationByQuery_->{$query}};
	$numDNAtransposons = grep { $_->{superfamily} =~ /DNA/i } @{$annotationByQuery_->{$query}};
	$numNonLTRretrotransposons = grep { $_->{superfamily} =~ /(LINE)|(SINE)|(non-LTR)/i } @{$annotationByQuery_->{$query}};
	$numSatellites = grep { $_->{superfamily} =~ /satellite/i } @{$annotationByQuery_->{$query}};
	$numRollingCircles = grep { $_->{superfamily} =~ /(RC|rolling|circle)/ } @{$annotationByQuery_->{$query}};
	$numLTRelements = grep { $_->{id} =~ /[stui]/ } @{$annotationByQuery_->{$query}};
	$numCompleteLTRelements = grep { $_->{id} =~ /[ui]/ } @{$annotationByQuery_->{$query}};
	$numTruncatedLTRelements = grep { $_->{id} =~ /t/ } @{$annotationByQuery_->{$query}};
	$numSoloLTRelements = grep { $_->{id} =~ /s/ && $_->{id} !~ /t/ } @{$annotationByQuery_->{$query}};
        $numNestedElementsInQuery = grep { $_->{nest} > 0 } @{$annotationByQuery_->{$query}};
	$summary = "\n##### query  $query :\n\n".
	  "\t##   Number of identified  NON-LTR ELEMENTS = $numNonLTRelements\n\n".
	    "\t\t##  Number of DNA transposons = $numDNAtransposons\n".
	      "\t\t##  Number of non-LTR retrotransposons = $numNonLTRretrotransposons\n".
		"\t\t##  Number of rolling circle transposons = $numRollingCircles\n".
		  "\t\t##  Number of satellite repeats = $numSatellites\n".
		    "\t\t##  Number of unclassified elements = ".
		      ($numNonLTRelements-$numDNAtransposons-$numNonLTRretrotransposons-$numRollingCircles-$numSatellites)."\n\n\n".
			"\t##  Number of identified  LTR ELEMENTS = $numLTRelements\n\n".
			  "\t\t##  Number of 'COMPLETE' LTR elements = $numCompleteLTRelements\n".
			    "\t\t##  Number of 'SOLO' LTR elements = $numSoloLTRelements\n".
			      "\t\t##  Number of 'TRUNCATED' LTR elements = $numTruncatedLTRelements\n\n\n".
				"\t##  Number of NESTED ELEMENTS = $numNestedElementsInQuery\n\n\n";
	
	$recordSummary = $summary.
	  "\ttotal number of hits to non-LTR elements (excluding low complexity repeats) = ".
	    ( grep { $_->{query} eq $query && $_->{superfamily}!~/LTR/i && $_->{superfamily}!~/(low|simple)_/ } @record )."\n".
	      "\ttotal number of hits to LTRs = ".
		( grep { $_->{query} eq $query && LTRname($_->{refName}, $_->{superfamily}) } @record )."\n".
                  "\ttotal number of hits to internal regions of LTR elements = ".
		    ( grep { $_->{query} eq $query && INTname($_->{refName}, $_->{superfamily}) } @record )."\n\n\n";

	print $MESSAGES messageOUT($summary);
	print $RECORD $recordSummary;
      }
  }
else
  {
    foreach my $query ( sort keys %{$annotationByQuery_} )
      {
	$numNonLTRelements = grep { $_->{id} =~ /n/ } @{$annotationByQuery_->{$query}};
	$numLTRelements = grep { $_->{id} =~ /[stui]/ } @{$annotationByQuery_->{$query}};
	$numCompleteLTRelements = grep { $_->{id} =~ /[ui]/ } @{$annotationByQuery_->{$query}};
	$numTruncatedLTRelements = grep { $_->{id} =~ /t/ } @{$annotationByQuery_->{$query}};
	$numSoloLTRelements = grep { $_->{id} =~ /s[^t]/ } @{$annotationByQuery_->{$query}};
	$numNestedElementsInQuery = grep { $_->{nest} > 0 } @{$annotationByQuery_->{$query}};
	$summary .= "\n##### query  $query :\n\n".
	  "\t##   Number of identified  NON-LTR ELEMENTS = $numNonLTRelements\n\n".
	    "\t##  Number of identified  LTR ELEMENTS = $numLTRelements\n\n".
	      "\t\t##  Number of 'COMPLETE' LTR elements = $numCompleteLTRelements\n".
		"\t\t##  Number of 'SOLO' LTR elements = $numSoloLTRelements\n".
		  "\t\t##  Number of 'TRUNCATED' LTR elements = $numTruncatedLTRelements\n\n\n".
		    "\t##  Number of NESTED ELEMENTS = $numNestedElementsInQuery\n\n\n";
	
	$recordSummary .= $summary.
	  "\ttotal number of hits to non-LTR elements (excluding low complexity repeats) = ".
	    ( grep { $_->{query} eq $query && !LTRname($_->{refName}, $_->{superfamily}) && !INTname($_->{refName}, $_->{superfamily}) && $_->{superfamily}!~/(low|simple)_/ } @record )."\n".
	      "\ttotal number of hits to LTRs = ".
		( grep { $_->{query} eq $query && LTRname($_->{refName}, $_->{superfamily}) } @record )."\n".
                  "\ttotal number of hits to internal regions of LTR elements = ".
		    ( grep { $_->{query} eq $query && INTname($_->{refName}, $_->{superfamily}) } @record )."\n\n\n";
      }
  }

print $MESSAGES messageOUT("Total number of nested elements in all query sequences = $numNestedElements\n\n\n");

print $RECORD $nestingInfo if($opt_l);

# output error log if necessary
if ($parameter{inputError})
  {
    open(ERROR,">".$outDirPath."/ERROR.log");
    print ERROR $parameter{inputError};
    close(ERROR);
  }

# CLOSE FILES
if($opt_c) {
  close (PAIRS) || die "can't close file: $!";
  close (SOLO) || die "can't close file: $!";
  close (TRUNC) || die "can't close file: $!";
  close (OTHER) || die "can't close file: $!";
}
close (ALL) || die "can't close file: $!";
close (RECORD) || die "can't close file: $!";
close (RepeatMaskerAnnotationFile) || die "can't close file: $!";

print $MESSAGES messageOUT("DONE!");


format FIELD_NAMES =
id	   query	      family	            div1           divI           div2           start      end1        start2     end        hits1             hitsI             hits2                nhits1 nhitsI nhits2 ref1 refI ref2 lenR orient superfamily nest nestIDs                            DNArearrangement
.
format FIELD_VALUES =
@<<<<<<<<< @<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<  @<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<< @<<<<< @<<<<< @<<<<< @<<< @<<< @<<< @.## @<<<<< @<<<<<<<<<< @<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
@annotation
.
format FIELD_NAMES_optK =
id	   query	      family	            div1           divI           div2           start      end1        start2     end        hits1             hitsI             hits2                nhits1 nhitsI nhits2 ref1 refI ref2 lenR orient superfamily nest nestIDs                            DNArearrangement                   K              K.sd           time         time.sd      numSites T       V
.
format FIELD_VALUES_optK =
@<<<<<<<<< @<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<  @<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<< @<<<<< @<<<<< @<<<<< @<<< @<<< @<<< @.## @<<<<< @<<<<<<<<<< @<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<< @<<<<<< @<<<<<<
@annotation
.
format FIELD_NAMES_optA =
id	   query	      family	            div1           divI           div2           start      end1        start2     end        hits1             hitsI             hits2                nhits1 nhitsI nhits2 ref1 refI ref2 lenR orient superfamily nest nestIDs                            DNArearrangement                   distCen
.
format FIELD_VALUES_optA =
@<<<<<<<<< @<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<  @<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<< @<<<<< @<<<<< @<<<<< @<<< @<<< @<<< @.## @<<<<< @<<<<<<<<<< @<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<
@annotation
.
format FIELD_NAMES_optAK =
id	   query	      family	            div1           divI           div2           start      end1        start2     end        hits1             hitsI             hits2                nhits1 nhitsI nhits2 ref1 refI ref2 lenR orient superfamily nest nestIDs                            DNArearrangement                   K              K.sd           time         time.sd      numSites T       V       distCen
.
format FIELD_VALUES_optAK =
@<<<<<<<<< @<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<  @<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<< @<<<<< @<<<<< @<<<<< @<<< @<<< @<<< @.## @<<<<< @<<<<<<<<<< @<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<< @<<<<<< @<<<<<< @<<<<<<
@annotation
.
#################################################   end of script main  ##############


###################
# Takes the full path to a file containing names of lib seqs for fuzzy name maching,
# plus a ref to the parameter hash. The file must contain one set of fuzzy names per
# line, with names separated by space(s) or tab(s). Empty lines and lines starting
# with # will be ignored.

sub getFuzzyNames
  {
    my ($file, $parameter_, $MESSAGES) = @_;

    my $path=$ENV{'PWD'}."/";  # set path to current directory
    # append current directory path to input file only if necessary
    $file = $path.$file if ($file!~/\w+.*\//);

    open(IN, "<".$file) || die "can't open file $file: $! \n";
    my @lines;
    while (<IN>)
      {
	chomp;
	push @lines, $_;
      }
    close(IN) || die "can't close file $! \n";

    my @names = grep { /^[^#].*\w/ } @lines; # get non-empty lines excluding comments starting with #
    my @fuzzyNameList;
    foreach my $line (@names)
      {
	@fuzzyNameList = split(/\s+/, $line);
	shift @fuzzyNameList if $fuzzyNameList[0] !~ /\w/;
	push @{$parameter_->{fuzzynames}}, [@fuzzyNameList];
#	shift @{$parameter_->{fuzzynames}->[-1]} if $parameter_->{fuzzynames}->[-1]->[0] !~ /\w/;
	push @{$parameter_->{atomicfuzzynames}}, @fuzzyNameList;
      }

  }


###################
# Takes a ref to an annotation record and a ref to the parameter hash. 
# Returns a list of field values to be printed using the format FIELD_VALUES
# and its option variants

sub getAnnotationRecord
  {
    my ($rec_, $parameter_) = @_;
    my @annotation;
    push @annotation, $rec_->{id};                                    # id
    push @annotation, $rec_->{query};                                 # query
    push @annotation, $rec_->{family};                                # family
    push @annotation, ($rec_->{div1})? $rec_->{div1} : "NA";         # div1
    push @annotation, ($rec_->{divI})? $rec_->{divI} : "NA";         # divI
    push @annotation, ($rec_->{div2})? $rec_->{div2} : "NA";         # div2
    push @annotation, $rec_->{start};                                 # start
    push @annotation, ($rec_->{end1})? $rec_->{end1} : "NA";         # end1
    push @annotation, ($rec_->{start2})? $rec_->{start2} : "NA";     # start2
    push @annotation, $rec_->{end};                                   # end
    push @annotation, ($rec_->{hits1})? $rec_->{hits1} : "NA";       # RM line number of hits1
    push @annotation, ($rec_->{hitsI})? $rec_->{hitsI} : "NA";       # RM line number of hitsI
    push @annotation, ($rec_->{hits2})? $rec_->{hits2} : "NA";       # RM line number of hits2
    push @annotation, ($rec_->{frags1})? $rec_->{frags1} : 0;         # nhits1
    push @annotation, ($rec_->{fragsI})? $rec_->{fragsI} : 0;         # nhitsI
    push @annotation, ($rec_->{frags2})? $rec_->{frags2} : 0;         # nhits2
    push @annotation, ($rec_->{ref1})? $rec_->{ref1} : 0;             # fraction of ref seq matched 1
    push @annotation, ($rec_->{refI})? $rec_->{refI} : 0;             # fraction of ref seq matched I
    push @annotation, ($rec_->{ref2})? $rec_->{ref2} : 0;             # fraction of ref seq matched 2
    push @annotation, $rec_->{lenR};                                  # length ratio of element chromosomal to ref seq
    push @annotation, $rec_->{orientation};                           # orientation
    push @annotation, $rec_->{superfamily};                           # superfamily
    push @annotation, $rec_->{nest};                                  # nesting level
    push @annotation, $rec_->{nestIDs};                               # RM line numbers of nests
    push @annotation, ($rec_->{DNArearrangement})? $rec_->{DNArearrangement} : "NA"; # RM line numbers of hits involved in rearrangement
    if ( $parameter_->{doAlign} )
      {
	push @annotation, (defined($rec_->{K}) && $rec_->{K}=~/\d/)? $rec_->{K} : "NA";                 # K
	push @annotation, (defined($rec_->{Ksd}))? $rec_->{Ksd} : "NA";        # K.sd
	push @annotation, (defined($rec_->{time}))? $rec_->{time} : "NA";        # time
	push @annotation, (defined($rec_->{timesd}))? $rec_->{timesd} : "NA";  # time.sd
	push @annotation, ($rec_->{numSites})? $rec_->{numSites} : "NA";# number of nucleotide sites in intra-element LTR alignment
	push @annotation, (defined($rec_->{T}))? $rec_->{T} : "NA";              # transitions
	push @annotation, (defined($rec_->{V}))? $rec_->{V} : "NA";              # transversions
      }
    if ($parameter{"arabidopsis"})
      {
	push @annotation, $rec_->{distCen};                           # relative distance from centromere
      }

    return @annotation;
  }


###################
# Output GFF annotation (especially for visualization in Apollo viewer)

sub outputGFF
  {
    # 1st arg the output file handle, 2nd argument a ref to a hash containing annotation for a particular element,
    # 3rd arg the RepeatMasker annotation file line number for one of the hits correponding to this element,
    # 4th a ref to the (RM) records array, next a ref to the parameter hash. Next arg a boolean value, if true
    # output "plus" style for visualisation in Apollo (i.e. as if all features were on forward strand, plus
    # additional arrows under elements of reverse strand; otherwise if false output more standard gff.
   my ($GFF, $annotationRec_, $RMlineNum, $record_, $parameter_, $plusStyle) = @_;

   my $rec = $RMlineNum - 1 - $parameter_->{RMheaderLines};
   my $orientation = ($annotationRec_->{orientation} eq "+")? "+" : "-";
   my $GFFrecord = "";
   my $low_complexity = 0;  # will be set to TRUE (1) if this is a simple/low complexity repeat

   # output first field <seqname>
   my $id = $annotationRec_->{id};
   if ($id)
     {
       $id .= $orientation."_".$annotationRec_->{family};
     }
   else
     {
       # this is a low complexity repeat
       $low_complexity = 1;
       $id = $annotationRec_->{refName}."_".$RMlineNum;
     }
   $GFFrecord =  $id."\t";

   # write next fields: <source>, which in this case will contain info on the nesting level of the element, and <feature>
   my $prefix = "REannotate_";
   my $nest = $annotationRec_->{nest};
   my $tier = ($nest)? $nest."-nested_" : "";
   $prefix .= $tier;
   my $resulttype = $prefix."repeat";

   my $feature = "repeat";
   my $superfamily = $annotationRec_->{superfamily};
   my $refName = $record_->[$rec]->{refName};
   if ( $superfamily =~ /LTR/i )
     {
       # feature is part of an LTR element
       if ( LTRname($refName, $superfamily) )
	 {
	   $feature = "LTR";
	   $resulttype = $prefix.$feature;
	 }
       elsif ( INTname($refName, $superfamily) )
	 {
	   $feature = "LTR_internal";
	   $resulttype = $prefix.$feature;
	 }
     }
   elsif ( $superfamily =~ /DNA/i )
     {
       $feature = "DNA_transposon";
       $resulttype = $prefix.$feature;
     }
   elsif ($superfamily =~ /(LINE)|(SINE)|(non-LTR)/i)
     {
       $feature = "non-LTR_retrotransposon";
       $resulttype = $prefix.$feature;
     }
   elsif ($superfamily =~ /satellite/i)
     {
       $feature = "satellite";
       $resulttype = $prefix.$feature;
     }
   elsif ( $superfamily !~ m%NA% )
     {
       $feature = $superfamily;
     }
#   else
#     {
#       $resulttype .= "repeat";
#     }
   $resulttype = "RepeatMasker_low_complexity" if ($low_complexity);
   $GFFrecord .=  $resulttype."\tsimilarity\t";
   # output next fields <start> and <end> and <score>
   $GFFrecord .=  ($record_->[$rec]->{chromoStart})."\t".($record_->[$rec]->{chromoEnd})."\t".($record_->[$rec]->{score})."\t";
   # output <strand> and <frame>
   if ($plusStyle)
     {
       # for Apollo visualisation purposes all elements are printed as if on forward strand
       $GFFrecord .=  "+"."\t.\t";
     }
   else
     {
       # conventional gff
       $GFFrecord .= ( ($orientation eq "+")? "+" : "-" )."\t.\t";
     }
   # output [attributes]
   $GFFrecord .=  $id."\t".$record_->[$rec]->{refStart}."\t".$record_->[$rec]->{refEnd};
   $GFFrecord .=  "\n";
   print $GFF $GFFrecord;
  }


###################
# Takes a filehandle for an open fasta file, and a reference to a hash to contain
# the fasta IDs (hash keys)/sequences (hash values). Stores IDs and sequences in
# such hash.

sub getFASTAseqs {
	my($SEQ,$FASTAentries_, my $MESSAGES) = @_;
	my $sequence;
	my $query;
	my $numNucleotides;
	my $lineCounter = 0;

	if ( defined($_=<$SEQ>) ) {	# read first line from SEQ file
	  $_=<$SEQ> while ($_!~/\w/);      # 'discard' any empty lines at top of file

		if ($_ =~ /^>(\S+)/) {	# first non-empty line should be a fasta ID
			$query = $1;	# store fasta ID

			while (defined ($_=<$SEQ>)) {	        # read another line from file
				unless ($_ =~ /^>(\S+)/) {	# current line contains sequence data
					chomp;	                # remove new line character
					$sequence .= $_;	# append line sequence
				}
				else {	# current line is a fasta ID
					$FASTAentries_ -> {$query} = $sequence;	        # store previous ID and sequence
					$sequence = "";	                                # clear variable
					$query = $1;	                                # store new ID
				}
			}
			$FASTAentries_ -> {$query} = $sequence;	# store last fasta ID/sequence
		}
		else {
			print $MESSAGES messageOUT("::getFASTAseqs: cannot process file \n  < $ARGV[1] > : \n  first (non-empty) line should start with '>'. \n\n  Assuming that sequence file is instead a MIPS chromosome");
			return 0;
		}

	}
	else {
		print $MESSAGES messageOUT ("::getFASTAseqs: cannot read from file \n  < $ARGV[1] >");
		return 0;
	}
	return 1;
 }


###################
# Takes a path and a name for a new directory to be created in that path, if the directory doesn't exist yet. Returns the path to the
# new directory.

sub createDirectory {
	my($path, $dirName) = @_;
	chdir $path;
	return $path."/".$dirName if ( chdir $dirName );		    # directory already exists
	mkdir $dirName, 0755 or die "can't create directory $path : $! \n"; # create directory if it doesn't
	return $path."/".$dirName;
}


###################
# Takes a filehandle for an open FASTA chromosome sequence file.
# Returns the sequence (excluding 'new line' characters).
# NOTE: when the end of the input file has been reached the function will return zero!

sub getFASTAseq {
	my($SEQ,$numLines) = @_;
 	my $sequence;
  	my $initialCoord;
  	my $numNucleotides;
 	my $lineCounter;

 	<$SEQ>;		# read FASTA id line
  	while (defined ($_=<$SEQ>)) {
		chomp;
		$sequence .= $_;
     }
		return $sequence;
}


###################
# Takes a filehandle for an open MIPS chromosome file, and a number of lines.
# Returns a 3-element list consisting of:
# a) sequence (excluding 'new line' characters) from the specified number of lines
# (starting from current input file position);
# b) initial chromosome coordinate for the sequence;
# c) end chromosome coordinate for the sequence.
# NOTE: when the end of the input file has been reached the function will return zero!

sub getSequenceFromMIPSchromo {
	my($MIPS,$numLines) = @_;
 	my $sequence;
  	my $initialCoord;
  	my $numNucleotides;
 	my $lineCounter;

  	if (defined ($_=<$MIPS>)) {

		$lineCounter = 1;
  		# get initial coordinate and sequence
   		($initialCoord,$sequence) = /(\d+)\s*([a-zA-Z]+)\n/;
    		$numNucleotides = length($sequence);

     		# get remaining sequence and number of nucleotides
			while ( ($lineCounter < $numLines) && defined ($_=<$MIPS>) ) {
			$lineCounter++;
  			/[a-zA-Z]+/;
	 		$numNucleotides += length($&);
  			$sequence .= $&;
     	  	}
		return ($sequence,$initialCoord,$initialCoord+$numNucleotides-1);
	}
	return 0;  # if end of input file has been reached
 }


###################
# Decides whether input annotation is in the original RepeatMasker
# format or in UCSC genome browser table format.
# Reads records from the RepeatMasker output file (1 record per line),
# returns array of processed records. Returned records contain

sub getRecordsfromRMoutput {
	# argument is a file handle to a RepeatMasker output (.out) file
	my($RM,$parameter_,$chromoSeq_, $MESSAGES) = @_;
	my $ucsc = 0; # will be set to 1 (TRUE) if input annotation is a UCSC genome browser table
                            #print $MESSAGES messageOUT("entering getRecordsfromRMoutput");
	# count number of header lines in the RepeatMasker annotation file
	my $headerLines = 0;
	my $head = <$RM>;
	while ( $head !~ /^\s*\d/ )
	  {  # first data field of the first data line should be a numerical score value
	    $headerLines++;
	    $head = <$RM>;
	  }
	$parameter_->{"RMheaderLines"} = $headerLines;  # this parameter will store the number of lines in the RM annotation file header

	# get all record fields (separated by spaces) from current input line

	my @field = split (/\s+/, $head);
	shift( @field ) if ($field[0] !~ /\w/ );  # remove first (0) field if it's empty
	# Check whether the library used with RepeatMasker was especially formatted for it 
	# ( format of such library entry IDs: entryName#elementType ),
	# in which case field[10] will contain the 'repeat class';
	# if the library entry names weren't especially formatted (do not contain character '#'),
	# then a column for 'repeat class' will be missing from the RepeatMasker output, and
	# so field[10] will contain the start or end position of a match on the reference (library) sequence.
	$parameter_->{repeatClassColumn} = 0 if ( $field[10]=~/^\d+$/ | $field[10]=~/^\(\d+\)$/ );

	# step back to begining of file
	seek($RM, 0, 0) or die "can't step back: $!";
	$. = 0;  # reset current input file line number
	# discard header lines of the RM output annotation file
	for (my $line=1; $line<=$headerLines; $line++) 
	  {
	    my $headerline = <$RM>;
	    $ucsc = 1 if($headerline =~ /bin/);
	  }

	unless($ucsc)
	  {
	    return readRMout($RM, $parameter_, $chromoSeq_, $MESSAGES)
	  }
	else
	  {
	    $parameter_->{UCSCinput} = 1;
	    return readUCSC($RM,$parameter_,$chromoSeq_, $MESSAGES)
	  }
}


###################
# Reads records from the RepeatMasker output file (1 record per line),
# returns array of processed records. Returned records contain

sub readRMout {

  # argument is a file handle to a RepeatMasker output (.out) file
  my($RM,$parameter_,$chromoSeq_, $MESSAGES) = @_;
  my @record;

  my ($shiftIndex, $superfamily);
  if($parameter_->{repeatClassColumn})
    {
      $shiftIndex = 0;
    }
  else
    {
      $superfamily = "NA";
      $shiftIndex = 1;
    }

	my $chromoStart;
	my $chromoEnd;
	my $query;
	# if sequence input file is not in FASTA format (Mips chromosome file is then assumed), 
		# set query name to a standard
	$query = "chr".$parameter_->{"chromo"} unless ( $parameter_->{"isFASTAinput"} );

	my @field;
	while ( defined ($_=<$RM>) )
	  {
	    if ($_=~/\w/)
	      { # non-empty line:
		# get all record fields (separated by spaces) from RM output file
		@field = split (/\s+/, $_);
		shift( @field ) if ($field[0] !~ /\w/ );  # remove first (0) field if it's empty

		# field[4] contains query sequence FASTA ID
		# (if either script 'splitMIPSchromo.pl' (Additional data file 7 in Genome Biology 5(10), R79)
		# or routine 'sub createFragmentFromFASTAchromosome' is used, then field[4] contains the coordinates
		# of a chunk of the query sequence, in the format CHUNK_START::CHUNK_END).
		my $query = $field[4];

		if ( $field[4] !~ /^\d+::\d+$/ ) {
		  $chromoStart = $field[5];
		  $chromoEnd = $field[6];
		}
                else {
		  $query =~ /(\d+)::\d+/;
		  my $coordOffset = $1;
		  $chromoStart = $coordOffset + $field[5] - 1;
		  $chromoEnd = $coordOffset + $field[6] - 1;
	        }


		# exit if queries do not correspond between command line INPUT FILES
		unless ( $parameter_->{noSeqOutput} )
		  {
		    die printINPUT_ERROR() unless ( defined($chromoSeq_->{$query}) );
		  }
		
		$superfamily = $field[10];
		# subtract $shiftIndex from all field indices > 10 (see function 

# ARABIDOPSIS ONLY:      ####################################################################
		my $distCen = "";
		if ($parameter_->{arabidopsis}) {
		  $distCen = getDistFromCentromere($parameter_->{chromo},($chromoStart+$chromoEnd)/2);
		  if ( $field[10] =~ /^LTR/i )
		    { # this is an LTR-element
		      $superfamily = "LTR/TRIM" if ($field[10] =~ /trim/i | $field[9] =~ /katydid/i);
		      $superfamily = "LTR/athila" if ($field[9] =~ /athila/i);
		    }
		}
#############################################################################################

		# $field[11-$shiftIndex] (C strand) or $field[13-$shiftIndex] (+ strand) contains number of bases left in repeat seq after end of match;
		# it is read in with enclosing parentheses, removed below:
		my $negCoordinate = 0;  # this will be flagged TRUE if the RepeatMasker annotation record contains negative coordinates!
		( $field[8] eq "C" ? $field[11-$shiftIndex] : $field[13-$shiftIndex] ) =~ s/(\(-?)(\d+)\)/$2/;
		$negCoordinate++ if ( defined($1) && $1 =~ /-/ );

		my $family = $field[9];
		my $ltrN = LTRname($family, $superfamily);#s/[-_](I|int|LTR)$//i;
		if($ltrN) {
		  $family = $ltrN;
		} else {
		  my $intN = INTname($family, $superfamily);
		  $family = $intN if($intN);
		}
		$family = fuzzyNames($family,$parameter_);


		# create new record
		push @record, {	"score"			=> $field[0],   	# Score of match to reference element
				"divergenceFromRef"	=> $field[1],   	# Percent divergence from ref element
				"chromoStart"		=> $chromoStart,	# Start chromo coordinate of match
				"chromoEnd"	 	=> $chromoEnd,  	# End chromo coordinate of match
				"orientation"		=> $field[8],		# Strand orientation
				"refName"		=> $field[9],   	# Name of matching reference sequence
				"family"                => $family,             # used for hit name matching if ref lib is redundant
				"refStart"		=> $field[8] eq "C" ? $field[13-$shiftIndex] : $field[11-$shiftIndex], # Start coord 
				                                                #   of reference match
				"query"                 => $query,              # Fasta ID of the query sequence
				"refEnd"		=> $field[12-$shiftIndex],  # end coord of reference match
				"leftAfterRefEnd"	=> $field[8] eq "C" ? $field[11-$shiftIndex] : $field[13-$shiftIndex], # No. of bases
				                                                #   left in ref seq after match
				"id"			=> 0,			# Id numbers will be assigned to identified elements
				"superfamily"		=> ($parameter_->{repeatClassColumn})? $superfamily : "NA",
#				"distCen"		=> $distCen,
				"index"                 => -1,                  # In case of an LTR or IR this will be set during 
				                                                #   defragmentation
				"rearrangement"         => 0,                   # Will be set to RM annotation line numbers if hit is judged to have been involved
				                                                #   in DNA rearrangement other than retrotransposisition
				"nesting"               => 0,                   # Will store level of nesting
				"nest"                  => "NA",                # Will store IDs of other elements within which this hit is
#				"negCoord"              => 0                    # will be flagged TRUE if this record contain negative coordinates
			      };
		# RepeatMasker version Open 3.1.0 sometimes reports NEGATIVE (!) values for refStart and refEnd (on 1 occasion 
		# the correct values were respectively be "0" and "abs(refStart-refEnd)" !! Better check...
		$negCoordinate++ if ( $record[-1]->{refStart} < 0 || $record[-1]->{refEnd} < 0 );
		if ($negCoordinate)
		  {
		    my $error = "WARNING: the RepeatMasker annotation file contains NEGATIVE\ncoordinate values for a match to the consensus sequence on line number $. !!\nInput line:\n$_\n";
		    print $MESSAGES messageOUT($error);
		    $parameter_->{inputError} .= $error;
		  }
		$record[-1]->{negCoord} = $negCoordinate;
		if ( $record[-1]->{refStart} < 0 )
		  {
		    $record[-1]->{refEnd} = abs( $record[-1]->{refEnd} - $record[-1]->{refStart} );
		    $record[-1]->{refStart} = 0;
		  }
	      }
	  }
print $MESSAGES messageOUT("...RepeatMasker annotation successfully read into memory...");
	return @record;
}


###################
# Computes relative distance from centromere
# First arg is a chromosome number, second a chromosomal coordinate

sub getDistFromCentromere {
	my ($chr,$coord) = @_;
	my @centromereCoord = (14700000,3700000,13700000,3100000,11800000);		# in bp
	my @chromoLength = (30136242,19847292,23775479,17790890,26992358);		# in bp

	return ($coord - $centromereCoord[$chr-1] < 0) ? ($centromereCoord[$chr-1]-$coord)/$centromereCoord[$chr-1] :
			                                 ($coord-$centromereCoord[$chr-1])/($chromoLength[$chr-1]-$centromereCoord[$chr-1]);
}


###################
# Reads records from the RepeatMasker output file (1 record per line),
# returns array of processed records. Returned records contain

sub readUCSC {

  # argument is a file handle to a RepeatMasker output (.out) file
  my($RM,$parameter_,$chromoSeq_, $MESSAGES) = @_;
  my @record;

	my $chromoStart;
	my $chromoEnd;
	my $query;
	# if sequence input file is not in FASTA format (Mips chromosome file is then assumed), 
		# set query name to a standard
	$query = "chr".$parameter_->{"chromo"} unless ( $parameter_->{"isFASTAinput"} );

	my @field;
	while ( defined ($_=<$RM>) )
	  {

	    if ($_=~/\w/)
	      { # non-empty line:
		# get all record fields (separated by spaces) from RM output file
		@field = split (/\s+/, $_);
		shift( @field ) if ($field[0] !~ /\w/ );  # remove first (0) field if it's empty
		shift( @field );                          # remove "#bin' field

		# field[4] contains query sequence FASTA ID
		# (if either script 'splitMIPSchromo.pl' (Additional data file 7 in Genome Biology 5(10), R79)
		# or routine 'sub createFragmentFromFASTAchromosome' is used, then field[4] contains the coordinates
		# of a chunk of the query sequence, in the format CHUNK_START::CHUNK_END).
		my $query = $field[4];

		  $chromoStart = $field[5] + 1;
		  $chromoEnd = $field[6];

		# exit if queries do not correspond between command line INPUT FILES
		unless ( $parameter_->{noSeqOutput} )
		  {
		    die printINPUT_ERROR() unless ( defined($chromoSeq_->{$query}) );
		  }

		# Check whether the library used with RepeatMasker was especially formatted for it 
		# ( format of such library entry IDs: entryName#elementType ),
		# in which case field[10] will contain the 'repeat class';
		# if the library entry names weren't especially formatted (do not contain character '#'),
		# then a column for 'repeat class' will be missing from the RepeatMasker output, and
		# so field[10] will contain the start or end position of a match on the reference (library) sequence.
		my $superfamily = $field[10]."/".$field[11];

		# $field[11] (C strand) or $field[13] (+ strand) contains number of bases left in repeat seq after end of match;
		# it is read in with enclosing parentheses, removed below:
		my $negCoordinate = 0;  # this will be flagged TRUE if the RepeatMasker annotation record contains negative coordinates!
		( $field[8] eq "-" ? $field[12] : $field[14] ) =~ s/(-?)(\d+)/$2/;
		$negCoordinate++ if ( defined($1) && $1 =~ /-/ );

		my $family = $field[9];
		my $ltrN = LTRname($family, $superfamily);#s/[-_](I|int|LTR)$//i;
		if($ltrN) {
		  $family = $ltrN;
		} else {
		  my $intN = INTname($family, $superfamily);
		  $family = $intN if($intN);
		}
		$family = fuzzyNames($family,$parameter_);

		my ($refStart, $leftAfterRefEnd);
		if($field[8] eq "-")
		  {
		    $refStart = $field[14] + 1;
		    ($leftAfterRefEnd) = $field[12] =~ /(\d+)$/;
		  }
		else
		  {
		    $refStart = $field[12] + 1;
		    ($leftAfterRefEnd) = $field[14] =~ /(\d+)$/;
		  }

		# create new record
		push @record, {	"score"			=> $field[0],   	# Score of match to reference element
				"divergenceFromRef"	=> $field[1]/10,   	# Percent divergence from ref element
				"chromoStart"		=> $chromoStart,	# Start chromo coordinate of match
				"chromoEnd"	 	=> $chromoEnd,  	# End chromo coordinate of match
				"orientation"		=> ($field[8] eq "-")? "C" : $field[8],		# Strand orientation
				"refName"		=> $field[9],   	# Name of matching reference sequence
				"family"                => $family,             # used for hit name matching if ref lib is redundant
				"refStart"		=> $refStart,           # Start coord
				                                                #   of reference match
				"query"                 => $query,              # Fasta ID of the query sequence
				"refEnd"		=> $field[13],          # end coord of reference match
				"leftAfterRefEnd"	=> $leftAfterRefEnd,    # No. of bases
				                                                #   left in ref seq after match
				"id"			=> 0,			# Id numbers will be assigned to identified elements
				"superfamily"		=> $superfamily,
				"index"                 => -1,                  # In case of an LTR or IR this will be set during 
				                                                #   defragmentation
				"rearrangement"         => 0,                   # Will be set to RM annotation line numbers if hit is judged to have been involved
				                                                #   in DNA rearrangement other than retrotransposisition
				"nesting"               => 0,                   # Will store level of nesting
				"nest"                  => "NA",                # Will store IDs of other elements within which this hit is
			      };
	      }
	}
print $MESSAGES messageOUT("...RepeatMasker annotation successfully read into memory...");
	return @record;
}


###################
# Checks whether any hits between two *ordered* hit record numbers passed as first two arguments
# are part of elements that have any associated hits with record numbers outside the
# range defined by the first two arguments. Last 4 args are refs to the LTR-index, I-index,
# nonLTR-index, and all hits-record arrays.
sub interleavedHits
  {
    my ($rec1, $rec2, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_) = @_;

    my $interleavedHits = 0; # will be set to TRUE if hits belonging to the same element are found interleaved with with rec1 and rec2

    my $name1 = $record_->[$rec1]->{refName};
    my $name2 = $record_->[$rec2]->{refName};
    my $firstRec1 = $rec1;
    my $lastRec2 = $rec2;

    my ($nestedHitIndexRecord_, $nestedHitName, $nestedHitIndex, $nestedHitID, $nestedHitSuperfamily);
    for (my $nestedHitRec = $rec1+1; $nestedHitRec < $rec2; $nestedHitRec++)
      {
	$nestedHitName = $record_->[$nestedHitRec]->{refName};
	$nestedHitSuperfamily = $record_->[$nestedHitRec]->{superfamily};
	$nestedHitID = $record_->[$nestedHitRec]->{id};
	$nestedHitIndex = $record_->[$nestedHitRec]->{index};
	# check for interleaved hits
	unless ( $record_->[$nestedHitRec]->{superfamily} =~ /(low_)|(simple_)/i )
	  { # not a low complexity hit, so check where the first and last hits associated with this nested element are

	    if ( LTRname($nestedHitName, $nestedHitSuperfamily) )
	      { # this is a hit to an LTR
		$nestedHitIndexRecord_ = $LTRrecord_;
		$nestedHitID =~ s/(\d)[a-b]$/$1/;  # remove 'a' or 'b' from the LTR id label
	      }
	    elsif ( INTname($nestedHitName, $nestedHitSuperfamily) )
	      { # this is a hit to the internal region of an LTR element
		$nestedHitIndexRecord_ = $Irecord_;
	      }
	    else
	      { # this is a hit to a non-LTR element
		$nestedHitIndexRecord_ = $nonLTRrecord_;
	      }

	    $interleavedHits++ if ( getLastRec($nestedHitRec, $record_, $nestedHitIndexRecord_) > $rec2 ||
				    getFirstRec($nestedHitRec, $record_, $nestedHitIndexRecord_) < $rec1
				  );
	  }
      }
    return $interleavedHits;
  }
##### end of &interleavedHits


###################
# "Defragments" multiple hits that originated with the same element insertion.
# Takes an index (first argument) and its type and labels all other indices belonging to the same element.
# 3rd, 4th and 5th arguments are references to the respective type array of indices.
# 6th argument a ref to array of all records (hits).
# 7th argument a ref to the parameter hash, last arg the maximum number of further recursive calls to defragment.
sub defragment 
  {
    my ($index, $type, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_, $parameter_,$maxRecursion) = @_;
    my $indexRecord_;
    if ($type =~ /_I/)
      {
	# this is a hit to the internal region of an LTR element
	$indexRecord_ = $Irecord_;
      }
    elsif ($type =~ /_LTR/)
      {
	# this is a hit to an LTR
	$indexRecord_ = $LTRrecord_;
      }
    else
      {
	# this is a hit to a non-LTR element
	$indexRecord_ = $nonLTRrecord_;
      }

    my $label = $indexRecord_->[$index][1];
    # return 0 (FALSE) if this hit has already been labeled as part of a multi-hit element
    return 0 if ($maxRecursion < 1 || $label);
    my $rec = $indexRecord_->[$index][0];
    my $query = $record_->[$rec]->{query};
    my $orientation = $record_->[$rec]->{orientation};
    my $superfamily  = $record_->[$rec]->{superfamily};
    my $refLength = $record_->[$rec]->{refEnd} + $record_->[$rec]->{leftAfterRefEnd};

    # Half search range in case element is located in satellite array
    my $searchRange = $parameter_->{fragmentSearchRange};
    my $inSatelliteArray = 0;
    if ( $superfamily =~ /Satellite/i ||
	 ( $rec - 1 >= 0 && $record_->[$rec-1]->{superfamily} =~ /Satellite/ ) ||
	 ( $rec + 1 < scalar(@{$record_}) && $record_->[$rec+1]->{superfamily} =~ /Satellite/ )
       )
      {
	$inSatelliteArray = 1;
	$searchRange /= 2;
      }

    # look for fragments
    my $anotherIndex = $index + 1;
    my $numIndexRecs = scalar(@{$indexRecord_});
    # search (within given range) for hits that could be fragments of the same element,
    # making sure we haven't already found all hits by checking whether the last hit
    # associated with the element matches the end of the ref seq
    while ( $anotherIndex < $numIndexRecs &&
	    $record_->[$indexRecord_->[$anotherIndex][0]]->{chromoStart} - $record_->[$rec]->{chromoEnd} < $searchRange &&
	    $query eq $record_->[ $indexRecord_->[$anotherIndex][0] ]->{query} &&
	    ( ($orientation eq "+")?
	      $record_->[ $indexRecord_->[getLastFragment($index,$indexRecord_)][0] ]->{leftAfterRefEnd} :
	      $record_->[ $indexRecord_->[getLastFragment($index,$indexRecord_)][0] ]->{refStart} - 1
	    )
	  )
      {
	# check whether combined length doesn't exceed that of the reference, that the hits are colinear with (match consecutive
	# regions of) the reference, whether the next hit is as yet unlabeled, and perform name/orientation/ref length checks,
	
	if ( !$indexRecord_->[$anotherIndex][1] &&
	     ( getRefLength($index, $indexRecord_, $record_) + getRefLength($anotherIndex, $indexRecord_, $record_) 
	       <= $refLength + $parameter_->{boundaryTolerance}
	     ) &&
	     sameElement($indexRecord_, $record_, $parameter_, getLastFragment($index,$indexRecord_), $anotherIndex)
	   )
	  {
	    # hit labeled by $anotherIndex could be part of the same element hit $index,
	    # but first check whether there are any nested elements whose last hits have indices higher than $anotherIndex or,
	    # in other words, DO NOT ASSIGN INTERLEAVED HITS TO DIFFERENT ELEMENTS
	    my $lastRec = $indexRecord_->[getLastFragment($index,$indexRecord_)][0];
	    my $anotherRec = $indexRecord_->[$anotherIndex][0];
	    unless ( interleavedHits($lastRec, $anotherRec, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_) )
	      {
		# flag current hits as fragments of a multiple-hit element
		$indexRecord_->[$index][1] = ($label) ? $label : "f".$index.$type;
		$indexRecord_->[$anotherIndex][1] = $indexRecord_->[$index][1];
		unshift @{$indexRecord_->[$anotherIndex][2]}, @{$indexRecord_->[$index][2]};
                foreach my $frag (@{$indexRecord_->[$index][2]})
		  {
		    @{$indexRecord_->[$frag][2]} = @{$indexRecord_->[$anotherIndex][2]};
		  }
	      }
	  }
	elsif ( !$inSatelliteArray && $record_->[ $indexRecord_->[$anotherIndex][0] ]->{superfamily} =~ /Satellite/i )
	  { # we're possibly in a satellite array!
	    $inSatelliteArray = 1;
	    $searchRange /= 2;
#	    # allow only one level of recursion if current element is in satellite array
#	    my ($package, $filename, $line, $subroutine, $hasargs,$wantarray, $evaltext, $is_require, $hints, $bitmask) = caller(1);
#	    $maxRecursion = 0 if ( $subroutine =~ /defragment/i);
	  }
	# (temporarily) exit search for further fragments if the last candidate hit belongs to the same family,
	# in order to avoid "complex (re-)arrangements" of elements and search for nested elements of the same family (same orientation)
	elsif ( !$indexRecord_->[$anotherIndex][1] &&
		nameAndOrientationMatch($indexRecord_->[$index][0],$indexRecord_->[$anotherIndex][0],$record_,$parameter_)
	      )
	  {
            # call ourselves RECURSIVELY, decreasing the maximum level of allowed recursion by one
            defragment($anotherIndex, $type, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_, $parameter_, $maxRecursion-1);
          }
	$anotherIndex++;
      }
  # if this hit has no other fragments (starting at a higher coordinate along the query), label it as a single hit
  $label = $indexRecord_->[$index][1];
  $indexRecord_->[$index][1] = ($label) ? $label : "only".$index.$type;

}  # end of &defragment


##e#################
# Annotates hits to LTR elements

sub annotateElements {
	# The last argument is a ref to the parameter hash, the penultimate one a ref to the array of (references to) all records (hashes);
	# first is a reference to the entire chromosome sequence; third the path to the directory to contain annotation and sequences

	my($chromoSeq_, $FASTAFilePath, $record_, $parameter_, $MESSAGES) = @_;

	my $fragmentSearchRange = $parameter_->{"fragmentSearchRange"};
#	my $maxIndel = $parameter_->{"maxIndel"};
	my $boundaryTolerance = $parameter_->{"boundaryTolerance"};
	my $minDistanceToSoloLTR = $parameter_->{"minDistanceToSoloLTR"};
	my $LTRfilePath = $FASTAFilePath.$parameter_->{LTRoutDir};

	my ($numProcessedLTRrecords, $numProcessedIRrecords) = (0,0);
	my @LTRrecord;  	# Stores record numbers of all LTR-sequence records, and a flag value that informs whether
			      	# this LTR sequence has been paired ('0' for unpaired, the local_pair_ID for paired, or (temporarily)
				# 'f' for records that are fragments of the same LTR), in the array form:
			      	# [record number, flag].
	my @Irecord;     # Stores record numbers of all internal region sequence records,
	                 # and a flag value that informs whether this internal region sequence has been associated with LTRs:
			 # 'u' or 'i' +local_element_ID if it's part of 'complete' element,
	                 # 't' +local_element_ID if it's part of a truncated element,
	                 # or (temporarily) 'f' +local_element_ID for records that are fragments of the same IR).
	                 # Each array element has itself the array form:  [record number, flag].
	my @nonLTRrecord;       # Stores record numbers of all non-LTR hit records, and a flag value that informs whether
			      	# this hit is part of a multi-hit element, in the array form:
			      	# [record number, flag].

	my @completeLTRannotation;   # Stores annotation on all 'complete' elements identified. Each array element will contain an
	                        # annotation record (a hash) on each identified 'complete' element.
	my %completeLTRannotHashTable;

	my @truncatedLTRannotation;  # Stores annotation on all truncated elements identified. Each array element will contain an
	                        # annotation record (a hash) on each identified truncated element.

	my @soloLTRannotation;           # Stores annotation on all 'solo' LTR elements identified. Each array element will contain an
	                        # annotation record (a hash) on each identified 'solo LTR' element.
	my %soloLTRannotHashTable;

	my @nonLTRannotation;      # Stores annotation on all non-LTR elements identified. Each array element will contain an
	                        # annotation record (a hash) on each identified 'non-LTR' element.


	my $numRecords = scalar(@{$record_});

	# store indices of hits to LTRs, to internal regions of LTR-elements, and to non-LTR elements (sequence similarity hits)
	my $numLTRrecords = 0;
	my $numIrecords = 0;
	my $numNonLTRrecords = 0;

	for (my $recordNum = 0; $recordNum < $numRecords; $recordNum++) 
	  {
	    my $family = $record_->[$recordNum]->{"refName"};
	    my $superfamily = $record_->[$recordNum]->{"superfamily"};
	    if ( LTRname($family, $superfamily) )
	      { # LTR sequence!
		$numLTRrecords++;
		push @LTRrecord, [ $recordNum, 0, [$numLTRrecords-1] ];
		$record_->[$recordNum]->{"index"} = $numLTRrecords - 1;
	      }
	    if ( INTname($family, $superfamily) )
	      { # internal region sequence!
		$numIrecords++;
		push @Irecord, [ $recordNum, 0, [$numIrecords-1] ];
		$record_->[$recordNum]->{"index"} = $numIrecords - 1;
	      }
	    if ( $superfamily !~ /LTR/i && $superfamily !~ /simple_repeat/i && $superfamily !~ /low_complexity/i ) 
	      { # hit to non-LTR element
		$numNonLTRrecords++;
		push @nonLTRrecord, [ $recordNum, 0, [$numNonLTRrecords-1] ];
		$record_->[$recordNum]->{"index"} = $numNonLTRrecords - 1;
	      }
	  }

	# DEFRAGMENT MULTIPLE HITS TO THE SAME ELEMENT

	# label LTRs which correspond to more than one record (hit)
	print $MESSAGES messageOUT("...defragmenting hits to LTR elements...");
	my $type;
	# label LTRs that correspond to more than one record (hit)
	$type = "_LTR";
	for (my $ltr = 0; $ltr < scalar(@LTRrecord); $ltr++)
	  {
	    defragment($ltr, $type, \@LTRrecord, \@Irecord, \@nonLTRrecord, $record_, $parameter_, $parameter_->{maxRecursion});
	  }
	# label internal regions which correspond to more than one record (hit)
	$type = "_I";
	for (my $int = 0; $int < scalar(@Irecord); $int++)
	  {
	    defragment($int, $type, \@LTRrecord, \@Irecord, \@nonLTRrecord, $record_, $parameter_, $parameter_->{maxRecursion});
	  }
	# label non-LTR elements that correspond to more than one record (hit)
	print $MESSAGES messageOUT("...defragmenting hits to other repetitive elements...");
	$type = "_non-LTR";
	for (my $nonLTR = 0; $nonLTR < scalar(@nonLTRrecord); $nonLTR++)
	  {
	    defragment($nonLTR, $type, \@LTRrecord, \@Irecord, \@nonLTRrecord, $record_, $parameter_, $parameter_->{maxRecursion});
	  }


	# FIND ALL LTR PAIRS IN COMPLETE, UNINTERRUPTED ELEMENTS

	print $MESSAGES messageOUT("...resolving structure of LTR elements...");
	my $uElementCounter = 0;
	my $ltr = (@LTRrecord)? getLastFragment(0, \@LTRrecord) : -1;  # last hit associated with candidate LTR1

	while ( $ltr + 1 < $numLTRrecords )
	  {
	    my $LTR1 = $ltr;
	    my $LTR2 = $LTR1 + 1;  # first hit associated with candidate LTR2
	    # check if LTRs are akin and flank akin internal region (uninterrupted element)
	    my ($uElementFound, $uIRindices, $uLTRindices, $uIRpairIndex_, @uLTRpairIndex) =
	      uninterruptedElement($LTR1,$LTR2,\@LTRrecord,\@Irecord,$record_,$parameter_);
	    if ($uElementFound)
	      { # uninterrupted element found
		$uElementCounter++;
		my $id = 'u'.$uElementCounter;
		processPair(@uLTRpairIndex,$uLTRindices,$uIRpairIndex_,$uIRindices,\@LTRrecord,\@Irecord,$record_, $id,
			    $chromoSeq_,$LTRfilePath,$parameter_,\@completeLTRannotation);
		$completeLTRannotHashTable{$id} = $uElementCounter - 1;
		my @uLTRindices = split("-",$uLTRindices);
		my @uIRindices = split("-",$uIRindices);
		$numProcessedLTRrecords += scalar( @uLTRindices );
		$numProcessedIRrecords += scalar( @uIRindices );
	      }
	    $ltr = getLastFragment($LTR2, \@LTRrecord);  # last hit associated with previous candidate LTR2 becomes candidate LTR1
	  }
	#####################end of FIND UNINTERRUPTED, COMPLETE LTR ELEMENTS


	# FIND ALL LTR PAIRS IN INTERRUPTED ELEMENTS, AND UNPAIRED LTRs
	my $loneLTRcounter = 0;
	my $iElementCounter = 0;
	my $soloLTRcounter = 0;
	# get the indices of first two unprocessed LTR-records
	my($LTR1,$LTR2) = get2UnprocessedLTRindices(\@LTRrecord, $numLTRrecords, $record_, $boundaryTolerance);

	GET_INTERRUPTED: until ( $numProcessedLTRrecords eq $numLTRrecords )
	    {

	      while ( $LTR2 eq -1 )
		{   # no LTRs left as a candidate to pair up with LTR1, process LTR1 as unpaired
		  $loneLTRcounter++;
		  my $id =  's'.$loneLTRcounter;
		  if ( processSingle($LTR1, \@LTRrecord, $record_, $id, $parameter_, $LTRfilePath, $chromoSeq_,\@soloLTRannotation) )
		    { # this unpaired LTR has been classified as a 'solo' LTR
		      $soloLTRcounter++;
		      $soloLTRannotHashTable{$id} = $soloLTRcounter -1;
		    }
		  $numProcessedLTRrecords += scalar( @{$LTRrecord[$LTR1]->[2]} );

		  ($LTR1, $LTR2) = get2UnprocessedLTRindices(\@LTRrecord, $numLTRrecords, $record_, $boundaryTolerance);
		  last GET_INTERRUPTED if ( $numProcessedLTRrecords eq scalar(@LTRrecord) || $LTR1 eq -1 ); #all LTR hits tested!
		}

		my $LTRrec1 = $LTRrecord[ getLastFragment($LTR1,\@LTRrecord) ][0];
		my $LTRrec2 = $LTRrecord[ getFirstFragment($LTR2,\@LTRrecord) ][0];
		# check that LTRs are hits to the same query and within pairing range
		if ( $record_->[$LTRrec1]->{"query"} eq $record_->[$LTRrec2]->{"query"} &&
		     ($record_->[$LTRrec2]->{"chromoStart"} - $record_->[$LTRrec1]->{"chromoEnd"}) <= $fragmentSearchRange
		   )
		  {
		    # check if LTRs could flank an interrupted element
		    my ($iElementFound, $iIRindices, $iLTRindices, $iIRpairIndex_, @iLTRpairIndex) = 
		      interruptedElement($LTR1, $LTR2, \@LTRrecord,\@Irecord,\@nonLTRrecord, $record_, $parameter_);

		    if ( $iElementFound )
		      {  # interrupted element found

			# process and save this pair
			$iElementCounter++;
			my $id =  'i'.$iElementCounter;
			processPair(@iLTRpairIndex,$iLTRindices,$iIRpairIndex_,$iIRindices,\@LTRrecord,\@Irecord,$record_,
				   $id,$chromoSeq_,$LTRfilePath,$parameter_,\@completeLTRannotation);
			$completeLTRannotHashTable{$id} = $uElementCounter + $iElementCounter - 1;
			my @iLTRindices = split("-",$iLTRindices);
			my @iIRindices = split("-",$iIRindices);
			$numProcessedLTRrecords += scalar( @iLTRindices );
			$numProcessedIRrecords += scalar( @iIRindices );

			# search for another pair
			($LTR1,$LTR2) = get2UnprocessedLTRindices(\@LTRrecord, $numLTRrecords, $record_, $boundaryTolerance);
		      }
		    else
		      {  # Intra- (interrupted) element pair not found
			# try to match yet another LTR hit to LTR1
			$LTR2 = get1UnprocessedLTRindex(\@LTRrecord, $numLTRrecords, $record_, $LTR2+1);
		      }
		  }
		else 
		  { # the two LTR hits are either outside pairing range or not in the same query sequence 
		    $loneLTRcounter++;
		    my $id =  's'.$loneLTRcounter;
		    # mark LTR1 as an unpaired LTR sequence
		    if ( processSingle($LTR1, \@LTRrecord, $record_, $id, $parameter_, $LTRfilePath, $chromoSeq_,\@soloLTRannotation) )
		      { # this unpaired LTR has been classified as a 'solo' LTR
			$soloLTRcounter++;
			$soloLTRannotHashTable{$id} = $soloLTRcounter - 1;
		      }
		    $numProcessedLTRrecords += scalar( @{$LTRrecord[$LTR1]->[2]} );

		    ($LTR1,$LTR2) = get2UnprocessedLTRindices(\@LTRrecord, $numLTRrecords, $record_, $boundaryTolerance);
		  }

	      }  # end of GET_INTERRUPTED
	#####################end of FIND COMPLETE INTERRUPTED and SOLO LTR ELEMENTS


	# FIND TRUNCATED LTR ELEMENTS
	my $tElementCounter = 0;

	# collect unpaired LTRs excluding solos and those putatively involved in DNA rearrangements other than transposition
	my @unpairedLTR = grep { $record_->[$_->[0]]->{id} =~ /^s\d+$/ } @LTRrecord;

	my ($tElementFound, $tLTRindices, $tLTRindex, $tIRindices, @tIRindex, $singleLTRprefix, $id);
	my $numtLTRs = scalar(@unpairedLTR);
	for(my $tLTR=0; $tLTR < $numtLTRs; $tLTR++)
	  {
	    # erase any previous DNA rearrangement flags suggested for this LTR (if it hasn't been classified into an element)
	    $record_->[ $unpairedLTR[$tLTR][0] ]->{rearrangement} = 0 if ($record_->[ $unpairedLTR[$tLTR][0] ]->{id} =~ /^s\d+$/);

	    ($tElementFound, $tLTRindices, $tLTRindex, $tIRindices, @tIRindex) =
	      truncatedElement($tLTR,$numtLTRs,\@LTRrecord,\@Irecord,\@nonLTRrecord,$record_,\@unpairedLTR,$parameter_);
	    if ($tElementFound)
	      {
		$tElementCounter++;
		my @LTRindices =  split("-",$tLTRindices);
		$numProcessedLTRrecords += scalar(@LTRindices) - scalar( @{$LTRrecord[$record_->[$unpairedLTR[$tLTR][0]]->{index}]->[2]} );
		$singleLTRprefix = "s";
		if ( @tIRindex )
		  {
		    $singleLTRprefix = "";
		    my @IRindices = split("-",$tIRindices);
		    $numProcessedIRrecords += scalar(@IRindices);
		  }
		else
		  {
		    $tIRindex[0] = -1;
		  }
		$id = $singleLTRprefix."t".$tElementCounter;
		processTruncated($tLTRindex,$tLTRindices,$tIRindex[0],$tIRindices,\@LTRrecord,\@Irecord,$record_,
				 $id,$chromoSeq_,$LTRfilePath,$parameter_,\@truncatedLTRannotation);
	      }
	  }

	# process all IRs unassociated with LTRs as truncated elements
	my @bareIR = grep { !$record_->[$_->[0]]->{"id"} } @Irecord;  # store all IR indices of as yet unprocessed IRs
	foreach my $tIR (@bareIR)
	  {
	    unless ( $record_->[$tIR->[0]]->{"id"} )
	      {  # this hit has not been processed
		$tElementCounter++;
		my $IRindex = $record_->[$tIR->[0]]->{"index"};
		my @IRindex = @{$Irecord[$IRindex]->[2]};
		processTruncated(-1, "", $IRindex, hyphenate(@IRindex), \@LTRrecord, \@Irecord, $record_,
				 "t".$tElementCounter, $chromoSeq_, $LTRfilePath, $parameter_,\@truncatedLTRannotation);
		$numProcessedIRrecords += scalar(@IRindex);
	      }
	  }

	################### end of truncated element annotation


	print $MESSAGES messageOUT("...resolving structure of non-LTR elements...");
	# FIND NON-LTR ELEMENTS
	my $nElementCounter = 0;

	for (my $hit=0; $hit < $numNonLTRrecords; $hit++ )
	  {
	    my $rec = $nonLTRrecord[$hit][0];
	    my $id = $record_->[$rec]->{id};
	    unless ($id)
	      {
		$nElementCounter++;
		$id = "n".$nElementCounter;
		# process this element as it hasn't been processed yet, and store annotation
		my $annotationRec_ = processNonLTR($hit,\@nonLTRrecord,$record_,$id,$outDirPath,$chromoSeq_,$parameter_, $MESSAGES);
		push @nonLTRannotation, $annotationRec_;  # this annotation record is a ref to a hash
	      }
	  }


	# RESOLVE NESTING  ######################

	print $MESSAGES messageOUT("...resolving nesting of repetitive elements...");

	my %nestingStacksByQuery;

	# sort element terminal coordinates in ascending order
	my @sortedTermini = sortTermini($record_,\@LTRrecord,\@Irecord,\@nonLTRrecord,$numRecords,
					\@completeLTRannotation,\@truncatedLTRannotation,\%completeLTRannotHashTable,\%soloLTRannotHashTable);
	my $numTermini = scalar(@sortedTermini);

	# resolve nesting
	my %annotationByQuery;  # will store all element records split by query and sorted by start chromosomal coordinate
	my %annotHashTableByQuery; # will store hash table for element records (indexed by id) split by query
	my %queryCounter;       # will count the number of elements in each query
	
	RESOLVE_NESTING: 
	for (my $terminusIndex=0; $terminusIndex < $numTermini; $terminusIndex++)
	  {
	    my $query = $sortedTermini[$terminusIndex]->{query};
	    my $id = $sortedTermini[$terminusIndex]->{id};
	    my $rec = $sortedTermini[$terminusIndex]->{rec};

	    # create nesting stack for query sequence in which the current element was found, 
	    # if it doesn't exist yet (in which case we push the current element onto stack)
	    $nestingStacksByQuery{$query} = [] unless ($nestingStacksByQuery{$query});
	    my $stackTop = ($nestingStacksByQuery{$query}->[-1]) ? $nestingStacksByQuery{$query}->[-1] : "stack bottom";

	    # check whether current element terminus differs from the top of the stack
	    my $element_;
	    if ( $id ne $stackTop )
	      {
		# first check that there is no boundary conflict (i.e. that the RepeatMasker annotation does not report
		# interleaved elements such that the end coordinate of the first element is higher than the start coordinated of
		# the consecutive element

		unless ($stackTop eq "stack bottom")
		  {
		    # there is a previous element in the stack, make sure there is no boundary conflict (interleaved termini)
		    # with any other element
		    my $nextTerminusIndex = $terminusIndex;
		    my %interleavedWithStackTopTermini; # will store ID and first terminus index of elements whose termini
                                                        # are interleaved with the beginning and end of current $stackTop
		    while ( $nextTerminusIndex < $numTermini - 1 && $sortedTermini[$nextTerminusIndex]->{id} ne $stackTop )
		      {
			if ( $sortedTermini[$nextTerminusIndex]->{query} eq $query )
			  {
			    my $nextID = $sortedTermini[$nextTerminusIndex]->{id};
			    if ( $interleavedWithStackTopTermini{$nextID} )
			      { # found two termini for this element, NOT interleaved with stack top
				delete $interleavedWithStackTopTermini{$nextID};
			      }
			    else
			      {
				# first terminal for this element
				$interleavedWithStackTopTermini{$nextID} = $nextTerminusIndex;
			      }
			  }
			$nextTerminusIndex++;
		      }
		    my $numberOfInterleavedIDs = scalar(keys %interleavedWithStackTopTermini);
		    if ( $numberOfInterleavedIDs > 0 )
		      {
			my $stackTopEndIndex = $nextTerminusIndex;
			my $stackTopEnd = $sortedTermini[$stackTopEndIndex]->{coord};

			# sort interleavedIDs in ascending order of their start termini
			my @interleavedID =
			  ( sort { $interleavedWithStackTopTermini{$a}<=>$interleavedWithStackTopTermini{$b} }
			    keys %interleavedWithStackTopTermini
			  );
			my $interleavedID = $interleavedID[0];  # pick the interleaved id with lowest start coord
			my $interleavedIDendIndex = $stackTopEndIndex + 1;
			$interleavedIDendIndex++ while( $interleavedIDendIndex < $numTermini - 1 &&
							$sortedTermini[$interleavedIDendIndex]->{id} ne $interleavedID
						      );
			my $start = $sortedTermini[$interleavedWithStackTopTermini{$interleavedID}]->{coord};
			my $end = $sortedTermini[$interleavedIDendIndex]->{coord};

			if ( $stackTopEnd - $start < $end - $stackTopEnd )
			  { # swap stack top end terminus position with position of interleavedID start
			    $sortedTermini[$stackTopEndIndex]->{coord} = $start;
			    $sortedTermini[$stackTopEndIndex]->{id} = $interleavedID;
			    $sortedTermini[$interleavedWithStackTopTermini{$interleavedID}]->{coord} = $stackTopEnd;
			    $sortedTermini[$interleavedWithStackTopTermini{$interleavedID}]->{id} = $stackTop;
			    # pop nesting stack if we have moved the stack top end terminus to the current id position
			    if ( $interleavedWithStackTopTermini{$interleavedID} eq $terminusIndex )
			      {
				pop @{$nestingStacksByQuery{$query}};
				next RESOLVE_NESTING;
			      }
			  }
			else
			  { # swap stack top end terminus position with position of interleavedID end
			    $sortedTermini[$stackTopEndIndex]->{coord} = $end;
			    $sortedTermini[$stackTopEndIndex]->{id} = $interleavedID;
			    $sortedTermini[$interleavedIDendIndex]->{coord} = $stackTopEnd;
			    $sortedTermini[$interleavedIDendIndex]->{id} = $stackTop;
			  }
		      }
		  }

		# determine whether this is a non-LTR element, or a complete, truncated or solo LTR element

		my $elementIndex;
		if ($id =~/i|u/)
		  {
		    $element_ = \@completeLTRannotation;
		    $elementIndex = $completeLTRannotHashTable{$id};
		  }
		elsif ($id =~/t(\d+)/)
		  {
		    $element_ = \@truncatedLTRannotation;
		    $elementIndex = $1 - 1;
		  }
		elsif ($id =~/s[^t]/)
		  {
		    $element_ = \@soloLTRannotation;
		    $elementIndex = $soloLTRannotHashTable{$id};
		  }
		elsif ($id =~/n/)
		  {
		    $element_ = \@nonLTRannotation;
		    $id =~ /(\d+)$/;
		    $elementIndex = $1 - 1;
		  }

		# store element record in array in respective query key
		# (in which case this is the end terminus)
		unless ($annotationByQuery{$query})
		  { # initialise this query key with array ref, and its hash table with a hash ref
		    $annotationByQuery{$query} = [];
		    $annotHashTableByQuery{$query} = {};
		    $queryCounter{$query} = 0;
		  }

		push @{$annotationByQuery{$query}}, $element_->[$elementIndex];
		$annotHashTableByQuery{$query}->{$id} = $queryCounter{$query};
		$queryCounter{$query}++;
		# determine nesting level and nesting elements
		$annotationByQuery{$query}->[-1]->{nest} = scalar(@{$nestingStacksByQuery{$query}});
		my $nestIDs =  ( $nestingStacksByQuery{$query}->[-1] ) ? hyphenate(@{$nestingStacksByQuery{$query}}) : "NA";
		$annotationByQuery{$query}->[-1]->{nestIDs} = $nestIDs;
		# push this element onto the stack
		push @{$nestingStacksByQuery{$query}}, $id;
	      }
	    else
	      {
		# matches top of stack, this is the end terminus of current element, so pop it off the stack
		pop @{$nestingStacksByQuery{$query}};
	      }
	  }


	# resolve truncated nesting if required
	if ($parameter_->{truncatedNesting})
	  {
	    for(my $terminus=0; $terminus<$numTermini-1; $terminus++)
	      { # look for complete element terminus truncating another element's terminus

		# pick two consecutive elements along the query
		my $query1 = $sortedTermini[$terminus]->{query};
		my $query2 = $sortedTermini[$terminus+1]->{query};
		my $rec1 = $sortedTermini[$terminus]->{rec};
		my $rec2 = $sortedTermini[$terminus+1]->{rec};
		my $id1 = $sortedTermini[$terminus]->{id};
		my $id2 = $sortedTermini[$terminus+1]->{id};
		my $annotIndex1 = $annotHashTableByQuery{$query1}->{$id1};
		my $annotIndex2 = $annotHashTableByQuery{$query2}->{$id2};
		my $queryElements_ = $annotationByQuery{$query1};

		if ( $query1 eq $query2 && $id1 ne $id2 &&
		     $queryElements_->[$annotIndex2]->{nestIDs} !~ /\b$id1\b/ &&
		     $queryElements_->[$annotIndex1]->{nestIDs} !~ /\b$id2\b/
		   )
		  { # same query, different elements, and second element hasn't as yet been annotated as nested within the first one

		    #  check whether one interrupts the other (unless they're in same family/orientation)
		    unless ( nameAndOrientationMatch($rec1,$rec2,$record_,$parameter_) )
		      {
			if ( interruption($rec1,$rec2,$record_,$parameter_->{boundaryTolerance}) &&
			     !interruption($rec2,$rec1,$record_,$parameter_->{boundaryTolerance})
			   )
			  { # second element interrupts first
			    annotateTruncatedNesting($annotIndex1, $annotIndex2, $queryElements_);
			  }
			elsif ( interruption($rec2,$rec1,$record_,$parameter_->{boundaryTolerance}) &&
				!interruption($rec1,$rec2,$record_,$parameter_->{boundaryTolerance})
			      )
			  { # first element interrupts second
			    annotateTruncatedNesting($annotIndex2, $annotIndex1, $queryElements_);
			  }
		      }
		  }
	      }
	  }
	################ end of resolve nesting


	# place upper and lower bounds on the ages of non-LTR elements in nested structures involving complete LTR-elements
	if ($parameter_->{doAlign})
	  { # ages (K between intra-element LTRs) have been estimated for complete LTR-elements
	    foreach my $query (sort keys %annotationByQuery)
	      {
		my $queryElements_ = $annotationByQuery{$query};  # ref to array of elements in current query
		my $queryHash_ = $annotHashTableByQuery{$query}; # ref to hash table (by id) for $queryElements_
		foreach my $element_ (@{$queryElements_})
		  {
		    if ( $element_->{id} !~ /u|i/ )
		      { # this is not a complete LTR-element
			if ($element_->{nest})
			  { # this element is nested
			    # place upper bound on element's age if nested within complete LTR-element
			    ageUpperBound($element_, $queryElements_, $queryHash_);
			  }
		      }
		    else
		      { # this is a complete LTR-element
			if ($element_->{nest})
			  { # this element is nested
			    # place upper bound on nesting elements' ages (unless they're complete LTR-elements)
			    ageLowerBound($element_, $queryElements_, $queryHash_);
			  }
		      }
		  }
	      }
	  }


	# return annotation
	return (\%annotationByQuery, \%annotHashTableByQuery, \@sortedTermini);
}


###################
# 
sub sortTermini
  {
    my ($record_, $LTRrec_, $IRrec_, $nonLTRrec_, $numRecords, $cLTRannot_, $tLTRannot_, $cLTRannotHashTable_, $sLTRannotHashTable_) = @_;
    my @sortedTermini;

    my $currentHighestCoord;
    my $currentQuery = "";
    my $annot_;    # will be set accordingly to repeat type
    my $indexRec_; # will be set accordingly to repeat type
    # solo LTRs and non-LTR elements will be counted, as their annotation indices already ordered by the start coord of the element
    my ($sCounter, $nCounter) = (0,0);
    my $annotIndex;
    my $index;
    my $id;
    my $eStart = 0; # will be set to TRUE (and hold a coordinate value) if this record marks the start of a repeat element
    my $eEnd = 0; # will be set to TRUE (and hold a coordinate value) if this record marks the end of a repeat element

    for (my $rec=0; $rec < $numRecords; $rec++)
      {
	unless ( $record_->[$rec]->{superfamily} =~ /(low_complexity|simple_repeat)/i )
	  {
	    $index = $record_->[$rec]->{index};
	    $id = $record_->[$rec]->{id};

	    if ( $record_->[$rec]->{query} ne $currentQuery )
	      { # new query
		$currentHighestCoord = 0;
		$currentQuery = $record_->[$rec]->{query};
	      }

	    if ( $id =~ /^n/ )
	      { # rec is part of a non-LTR element
		$indexRec_ = $nonLTRrec_;
		$eStart = $record_->[$rec]->{chromoStart} if ( $index ne -1 && $index eq getFirstFragment($index, $indexRec_) );
		$eEnd = $record_->[$rec]->{chromoEnd} if ( $index ne -1 && $index eq getLastFragment($index, $indexRec_) );
	      }
	    elsif ( $id =~ /^[iu]/ )
	      { # rec is part of 'complete' LTR element
		$annot_ = $cLTRannot_;
		$annotIndex = $cLTRannotHashTable_->{$id};

		if ( LTRname($record_->[$rec]->{refName}, $record_->[$rec]->{superfamily}) )
		  { # this is an LTR
		    $indexRec_ = $LTRrec_;
		    if ( $index ne -1 && $index eq  getFirstFragment($index, $indexRec_) )
		      { # this record marks the beginning of an LTR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoStart} eq $annot_->[$annotIndex]->{start} )
			    { # this LTR marks the beginning of the element
			      $eStart = $record_->[$rec]->{chromoStart};
			    }
		      }
		    if ( $index ne -1 && $index eq  getLastFragment($index, $indexRec_) )
		      { # this record marks the end of an LTR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoEnd} eq $annot_->[$annotIndex]->{end} )
			    { # this LTR marks the end of the element
			      $eEnd = $record_->[$rec]->{chromoEnd};
			    }
		      }
		  }
		elsif ( INTname($record_->[$rec]->{refName}, $record_->[$rec]->{superfamily}) )
		  { # this is an IR
		    $indexRec_ = $IRrec_;
		    if ( $index ne -1 && $index eq  getFirstFragment($index, $indexRec_) )
		      { # this record marks the beginning of an IR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoStart} eq $annot_->[$annotIndex]->{start} )
			    { # this IR marks the beginning of the element
			      $eStart = $record_->[$rec]->{chromoStart};
			    }
		      }
		    if ( $index ne -1 && $index eq  getLastFragment($index, $indexRec_) )
		      { # this record marks the end of an IR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoEnd} eq $annot_->[$annotIndex]->{end} )
			    { # this IR marks the end of the element
			      $eEnd = $record_->[$rec]->{chromoEnd};
			    }
		      }
		  }
	      }
	    elsif ( $id =~ /t(\d+)/ )
	      { # rec is part of truncated LTR element
		$annot_ = $tLTRannot_;
		$annotIndex = $1 - 1;
		if ( LTRname($record_->[$rec]->{refName}, $record_->[$rec]->{superfamily}) )
		  { # this is an LTR
		    $indexRec_ = $LTRrec_;
		    $id =~ s/[ab]$//;  # remove 'a' or 'b' label from id
		    if ( $index ne -1 && $index eq getFirstFragment($index, $indexRec_) )
		      { # this record marks the beginning of an LTR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoStart} eq $annot_->[$annotIndex]->{start} )
			    { # this LTR marks the beginning of the element
			      $eStart = $record_->[$rec]->{chromoStart};
			    }
		      }
		    if ( $index ne -1 && $index eq  getLastFragment($index, $indexRec_) )
		      { # this record marks the end of an LTR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoEnd} eq $annot_->[$annotIndex]->{end} )
			    { # this LTR marks the end of the element
			      $eEnd = $record_->[$rec]->{chromoEnd};
			    }
		      }
		  }
		elsif ( INTname($record_->[$rec]->{refName}, $record_->[$rec]->{superfamily}) )
		  { # this is an IR
		    $indexRec_ = $IRrec_;
		    if ( $index ne -1 && $index eq  getFirstFragment($index, $indexRec_) )
		      { # this record marks the beginning of an IR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoStart} eq $annot_->[$annotIndex]->{start} )
			    { # this IR marks the beginning of the element
			      $eStart = $record_->[$rec]->{chromoStart};
			    }
		      }
		    if ( $index ne -1 && $index eq  getLastFragment($index, $indexRec_) )
		      { # this record marks the end of an IR
			if ( defined($annotIndex) && $record_->[$rec]->{chromoEnd} eq $annot_->[$annotIndex]->{end} )
			    { # this IR marks the end of the element
			      $eEnd = $record_->[$rec]->{chromoEnd};
			    }
		      }
		  }
	      }
	    elsif ( $id =~ /^s/ )
	      { # rec is part of solo LTR element
		$indexRec_ = $LTRrec_;
		$eStart = $record_->[$rec]->{chromoStart} if ( $index ne -1 && $index eq getFirstFragment($index, $indexRec_) );
		$eEnd = $record_->[$rec]->{chromoEnd} if ( $index ne -1 && $index eq getLastFragment($index, $indexRec_) );
		$id =~ s/^solo-//;  # remove prefix from solo id (in record array) to match the id in the solo annotation array
	      }

	    # as we are scanning along the query sequences, termini should be automatically sorted
	    push @sortedTermini, { query=>$currentQuery, id=>$id, coord=>$eStart, rec=>$rec } if ( $eStart );
	    push @sortedTermini, { query=>$currentQuery, id=>$id, coord=>$eEnd, rec=>$rec } if ( $eEnd );
	  }
	$eStart = 0;
	$eEnd = 0;
      }
    return @sortedTermini;
  }


###################
# updates "truncated nesting" annotation to include second argument (plus any elements nested within it) as nested in first argument
sub annotateTruncatedNesting
  {
    # last arg is a ref to an array of element models ordered along a given query sequence
    my ($nestIndex, $nestedIndex, $queryElements_) = @_;
    my $nestedID = $queryElements_->[$nestedIndex]->{id};
    my $nestID = $queryElements_->[$nestIndex]->{id};
    my $numQueryElements = scalar(@{$queryElements_});

    # annotate truncated nesting
    $queryElements_->[$nestedIndex]->{nest}++;
    $queryElements_->[$nestedIndex]->{nestIDs} = ($queryElements_->[$nestedIndex]->{nestIDs}=~/\d/)? 
                                                  $queryElements_->[$nestedIndex]->{nestIDs}."-".$nestID : $nestID;
    # update nesting annotation of any elements nested within the interrupting element
    my $i=1;
    while ( $nestedIndex + $i < $numQueryElements && $queryElements_->[$nestedIndex + $i]->{nestIDs} =~ /\b$nestedID\b/ )
      { # current element nested within interrupting element
	$queryElements_->[$nestedIndex + $i]->{nest}++;
	$queryElements_->[$nestedIndex + $i]->{nestIDs} = $nestID."-".$queryElements_->[$nestedIndex + $i]->{nestIDs};
	$i++;
      }
    while ( $nestedIndex - $i >= 0 && $queryElements_->[$nestedIndex - $i]->{nestIDs} =~ /\b$nestedID\b/ )
      { # current element nested within interrupting element
	$queryElements_->[$nestedIndex - $i]->{nest}++;
	$queryElements_->[$nestedIndex - $i]->{nestIDs} = $nestID."-".$queryElements_->[$nestedIndex - $i]->{nestIDs};
	$i++;
      }
  }


###################
# Calculates lower bound on the age (K) of an incomplete LTR- or of a non-LTR element if nested within a complete LTR-element.
# Last arg is a ref to an array of annotation records in a given query sequence, first arg a ref to an element of such hash.
sub ageLowerBound
  {
    my ($element_, $queryElements_, $queryHash_) = @_;
    if ($element_->{K})
      { # LTRs for this element have been aligned (matched overlapping regions of the ref LTR seq)

	# get IDs of any elements nesting this element (excluding *complete* LTR-elements)
	my @nest = grep {/^[^ui]/} split("-", $element_->{nestIDs});
	foreach my $nest (@nest)
	  {
	    my $nestIndex = $queryHash_->{$nest};
#	    $nestIndex++ while ( $queryElements_->[$nestIndex]->{id} ne $nest );
	    # get previous value of the bound K in case there are more than one complete LTR-element nested within,
	    # we want the oldest complete LTR-element insertion value to provide the bound
	    my ($previousKlowerBound) = ($queryElements_->[$nestIndex]->{K})? $queryElements_->[$nestIndex]->{K} =~ />(\d\.\d+)/ : (0);
	    my $Kbound =  sprintf("%.4f",$element_->{K});
	    my $K_sdBound = sprintf("%.4f",$element_->{Ksd});
	    my $timeBound = sprintf("%.3f",$element_->{time});
	    my $time_sdBound = sprintf("%.3f",$element_->{timesd});
	    unless ($previousKlowerBound)
	      {
		$queryElements_->[$nestIndex]->{K} = (defined($queryElements_->[$nestIndex]->{K}))?
		  ">".$Kbound.$queryElements_->[$nestIndex]->{K} : ">".$Kbound;
		$queryElements_->[$nestIndex]->{Ksd} = (defined($queryElements_->[$nestIndex]->{Ksd}))?
		  ">".$K_sdBound.$queryElements_->[$nestIndex]->{Ksd} : ">".$K_sdBound;
		$queryElements_->[$nestIndex]->{time} = (defined($queryElements_->[$nestIndex]->{time}))?
		  ">".$timeBound.$queryElements_->[$nestIndex]->{time} : ">".$timeBound;
		$queryElements_->[$nestIndex]->{timesd} = (defined($queryElements_->[$nestIndex]->{timesd}))?
		  ">".$time_sdBound.$queryElements_->[$nestIndex]->{timesd} : ">".$time_sdBound;
	      }
	    elsif ( $element_->{K} > $previousKlowerBound )
	      { # we can increase the lower bound
		$queryElements_->[$nestIndex]->{K} =~ s/>\d\.\d+/>$Kbound/;
		$queryElements_->[$nestIndex]->{Ksd} =~ s/>\d\.\d+/>$K_sdBound/;
		$queryElements_->[$nestIndex]->{time} =~ s/>\d\.\d+/>$timeBound/;
		$queryElements_->[$nestIndex]->{timesd} =~ s/>\d\.\d+/>$time_sdBound/;
	      }
	  }
      }
  }


###################
# Calculates upper bound on the age (K) of an incomplete LTR- or of a non-LTR element if nested within a complete LTR-element.
# Last arg is a ref to an array of annotation records in a given query sequence, first arg a ref to a complete LTR-element of such hash.
sub ageUpperBound
  {
    my ($element_, $queryElements_, $queryHash_) = @_;
    # get IDs of any *complete* LTR elements nesting this element
    my @nest = grep {/u|i/} split("-", $element_->{nestIDs});
    if (@nest)
      { # there is at least one complete LTR element nesting this one
	my $nest = $nest[-1];
	my $nestIndex = $queryHash_->{$nest};
#	$nestIndex++ while ( $queryElements_->[$nestIndex]->{id} ne $nest );
	my $K = $queryElements_->[$nestIndex]->{K};
	my $Kbound = ( $K && $K ne "NA")? sprintf("%.4f",$K) : "";
	my $Ksd = $queryElements_->[$nestIndex]->{Ksd};
	my $K_sdBound = ($Ksd && $Ksd ne "NA")? sprintf("%.4f",$Ksd) : "";
	my $time = $queryElements_->[$nestIndex]->{time};
	my $timeBound = ($time && $time ne "NA")? sprintf("%.3f",$time) : "";
	my $timesd = $queryElements_->[$nestIndex]->{timesd};
	my $time_sdBound = ($timesd && $timesd ne "NA")? sprintf("%.3f",$timesd) : "";
	if ($Kbound)
	  { # the nest LTRs have been aligned (had overlapping matches to the ref seq)
	    $element_->{K} = (defined($element_->{K}))? $element_->{K}."<".$Kbound : "<".$Kbound;
	    $element_->{Ksd} = (defined($element_->{Ksd}))? $element_->{Ksd}."<".$K_sdBound : "<".$K_sdBound;
	    $element_->{time} = (defined($element_->{time}))? $element_->{time}."<".$timeBound : "<".$timeBound;
	    $element_->{timesd} = (defined($element_->{timesd}))? $element_->{timesd}."<".$time_sdBound : "<".$time_sdBound;
	  }
      }
  }


###################
# Returns the record number of the last hit associated with the given element.
# (note that element here refers to an element proper, so in case of LTRelements
# this can span both LTR and IR hits.

sub getLastRec
{
  # First arg is the record number of a hit, second is a reference to the all hits-array, last an index array.
  my($rec, $record_, $indexRec_) = @_;
  return undef if ( !defined($rec) || $rec < 0 );  # rec number out of range
  my $numRecs = scalar(@{$record_});
  return undef if ( $rec > $numRecs - 1 );  # rec number out of range

  my $LTRelement = ($record_->[$rec]->{superfamily} =~ /LTR/i)? 1:0;
  # if this record refers to a non-LTR element or hasn't yet been processed, return last fragment from index
  # (note that in case of an LTRelment IR/LTR any associated LTRs/IR-LTR cannot not be considered if it record hasn't been processed)
  my $label = $record_->[$rec]->{id};
  return $indexRec_->[ getLastFragment($record_->[$rec]->{index},$indexRec_) ][0] if ( !$LTRelement || ($LTRelement && !$label) );

  $label =~ s/[a-b]$//;  # remove 'a' or 'b' in case of an LTR hit
  for (my $hitRec = $numRecs-1;  $hitRec >= $rec; $hitRec--)
    {
      return $hitRec if ( $record_->[$hitRec]->{id} =~ /($label)[a-b]?$/ );
    }
}


###################
# Returns the record number of the first hit associated with the given element.
# (note that element here refers to an element proper, so in case of LTRelements
# this can span both LTR and IR hits.

sub getFirstRec
{
  # First arg is the record number of a hit, second is a reference to the all hits-array, last an index array.
  my($rec, $record_, $indexRec_) = @_;
  return undef if ( !defined($rec) || $rec < 0 );  # rec number out of range
  my $numRecs = scalar(@{$record_});
  return undef if ( $rec > $numRecs - 1 );  # rec number out of range

  my $LTRelement = ($record_->[$rec]->{superfamily} =~ /LTR/i)? 1:0;
  # if this record refers to a non-LTR element or hasn't yet been processed, return first fragment from index
  # (note that in case of an LTRelement IR/LTR any associated LTRs/IR-LTR cannot not be considered if its record hasn't been processed)
  my $label = $record_->[$rec]->{id};
  return $indexRec_->[ getFirstFragment($record_->[$rec]->{index},$indexRec_) ][0] if ( !$LTRelement || ($LTRelement && !$label) );

  $label =~ s/[a-b]$//;  # remove 'a' or 'b' in case of an LTR hit
  for (my $hitRec = 0;  $hitRec <= $rec; $hitRec++)
    {
      return $hitRec if ( $record_->[$hitRec]->{id} =~ /($label)[a-b]?$/ );
    }
}


###################
# Returns the index of the last hit associated with the given element.
# (note that as far as this subroutine is concerned, an LTR or IR of an LTRelement
# is an "element", as opposed to the whole retrotransposon or endogenous retrovirus).

sub getLastFragment
{
  # First arg is the index of a hit, second is a reference to the Index-array.
  my($index, $indexrecord_) = @_;
  return -1 if ( $index eq -1 );  # index out of range
  my $numIndexRecs =  scalar(@{$indexrecord_});
  return -1 if ( $index > $numIndexRecs - 1 );  # index out of range

#  sort {$a<=>$b} @{$indexrecord_->[$index][2]};

  return $indexrecord_->[$index][2]->[-1];
}


###################
# Returns the index of the first hit associated with this "element"
# (note that as far as this subroutine is concerned, an LTR or IR of an LTRelement
# is an "element", as opposed to the whole retrotransposon or endogenous retrovirus).

sub getFirstFragment
  {
    # First arg is the index of a hit, second is a reference to the Index-array.
    my($index, $indexrecord_) = @_;
    return -1 if ( $index eq -1 );  # index out of range
    my $numIndexRecs = scalar(@{$indexrecord_});
    return -1 if ( $index > $numIndexRecs - 1 );  # index out of range

    #  sort {$a<=>$b} @{$indexrecord_->[$index][2]};
    return $indexrecord_->[$index][2]->[0];
  }


###################
# Returns an array of indices associated with all the hits to a given element (whose index is passed as the first arg).
# (note that as far as this subroutine is concerned, an LTR or IR of an LTRelement
# is an "element", as opposed to the whole retrotransposon or endogenous retrovirus).

sub getFrags {
	# First arg is the index of a hit, second is a reference to the Index-array.
	my($index, $indexrecord_) = @_;

	return () if (!defined($index) || $index eq -1);
	my $numIndexRecs =  scalar(@{$indexrecord_});
	return () if ( $index > $numIndexRecs-1 );  # index out of range

	my @frags;
	my $label = $indexrecord_->[$index][1];

	# return input index if this hasn't been labeled yet
	return ($index) if ($label eq 0);

	for (my $indexindex=0;  $indexindex < $numIndexRecs; $indexindex++)
	  {
		push @frags, $indexindex if ( $indexrecord_->[$indexindex][1] eq $label );
	  }
	return @frags;
}


###################
# Returns the combined length of reference sequence matches to query hits labeled as fragments of same LTR or internal region

sub getRefLength {
	# First arg is an index, second a reference to the index array, last a ref to the all-hits records array
	my($hitIndex, $indexRec_, $record_) = @_;

	my $totalLength = 0;
	my $label = $indexRec_->[$hitIndex][1];
	my $rec = $indexRec_->[$hitIndex][0];

	# return ref length for hit passed as argument if it hasn't been labeled
	return $record_->[$rec]->{"refEnd"} - $record_->[$rec]->{"refStart"} + 1 if ($label eq 0);

	foreach my $i (@{$indexRec_->[$hitIndex][2]})
	  {
	    $rec = $indexRec_->[$i][0];
	    $totalLength += $record_->[$rec]->{refEnd} - $record_->[$rec]->{refStart} + 1;
	  }
	return $totalLength;
}


###################
# Returns the combined (chromosomal) length of hits labeled as fragments of same LTR or internal region

sub getLength {
	# First argument is an LTR or IR index, second a ref to the index array, last a ref to the records array
	my($hit, $indexRec_, $record_) = @_;

	my $totalLength = 0;
	my $label = $indexRec_->[$hit][1];
	my $rec = $indexRec_->[$hit][0];

	# return length of hit passed as argument if it hasn't been labeled
	return $record_->[$rec]->{"chromoEnd"} - $record_->[$rec]->{"chromoStart"} + 1 if ($label eq 0);
	foreach my $i (@{$indexRec_->[$hit][2]})
	  {
	    $rec = $indexRec_->[$i][0];
	    $totalLength += $record_->[$rec]->{"chromoEnd"} - $record_->[$rec]->{"chromoStart"} + 1;
	  }
	return $totalLength;
}


###################
# Returns the chromosomal sequence length between the beginning and end of the first and last hits, respectively,
# which are labeled as fragments of same element

sub getChromoLength {
	# First argument is an LTR or IR index, second a ref to the index array, last a ref to the records array
	my($hit, $indexrec_, $record_) = @_;

	my $totalLength = 0;
	my $label = $indexrec_->[$hit][1];
	my $rec = $indexrec_->[$hit][0];

	# return length of hit passed as argument if it hasn't been labeled
	return $record_->[$rec]->{"chromoEnd"} - $record_->[$rec]->{"chromoStart"} + 1 if ($label eq 0);

	my @indexR = @{$indexrec_->[$hit][2]};
	my $firstRec = $indexrec_->[ $indexR[0] ][0];
	my $lastRec = $indexrec_->[ $indexR[-1] ][0];
        return $record_->[$lastRec]->{"chromoEnd"} - $record_->[$firstRec]->{"chromoStart"} + 1;
}


###################
# If option -f has been used, a file containing the names of EQUIVALENT (i.e. closely related) entries
# in the reference library has been read in, and the equivalent names stored in parameter {fuzzynames}.
# Takes a ref seq name and returns the first equivalent name if any.

sub fuzzyNames
  {
    # Args are a reference seq name and a ref to the parameter hash
    my($seqName, $parameter_) = @_;

    return($seqName) unless( grep(/$seqName/i, @{$parameter_->{atomicfuzzynames}}) );

    # hash will store the first name and the matching name of a given equivalence group
    my %nameMatch = (family=>"", name=>"");
    foreach my $nameList_ (@{$parameter_->{fuzzynames}})
      {
	my $family = $nameList_->[0];
	foreach my $name (@{$nameList_})
	  {
	    return $family if($name eq $seqName);
	    if ( (grep {/$name/i} $seqName) && length($name) > length($nameMatch{name}) )
	      {
		$nameMatch{name} = $name;
		$nameMatch{family} = $family
	      }
	  }
      }

    #return (length($nameMatch{name}) > 0)? $nameMatch{family} : $seqName;
    return $nameMatch{family};
}


###################
# Returns TRUE if the ends two sequences, whose records are passed (in order) as first two arguments,
# match apart from a maximum gap set by the last argument. The third argument is a reference to the
# (all) records array.

sub fuzzyCoordinateMatch {
	# First two args are the records of two sequences
	my($rec1, $rec2, $record_, $tolerance) = @_;
	my $chromoEnd1 = $record_->[$rec1]->{"chromoEnd"};
	my $chromoStart2 = $record_->[$rec2]->{"chromoStart"};
	my $refLength = $record_->[$rec1]->{refEnd} + $record_->[$rec1]->{leftAfterRefEnd};

	# make sure maximum boundary tolerance is 10% of reference library sequence length
	$tolerance = $refLength/10 if ( $tolerance > $refLength/10 );

	# return TRUE if sequence ends are within tolerated distance
	return 1 if ( abs($chromoStart2 - $chromoEnd1) <= $tolerance );
	return 0;  # if sequence ends are farther apart than tolerated distance
}


###################
# Returns TRUE if two hits (whose recs are ordered in the first two arguments) match consecutive
# (with some boundary tolerance) segments of the reference (library) sequence.
# The third argument is a reference to the (all) records array.
# (It assumes that the 2nd hit has the same family name, orientation, and host query name as
# the 1st record.)

sub consecutiveReferenceMatch {
	# First two args are the records of two hits
	my($rec1, $rec2, $record_, $tolerance) = @_;
	my $orientation = $record_->[$rec1]->{orientation};
	my $refLength = $record_->[$rec1]->{refEnd} + $record_->[$rec1]->{leftAfterRefEnd};

	# make sure maximum boundary tolerance is 10% of reference library sequence length
	$tolerance = $refLength/10 if ( $tolerance > $refLength/10 );

	my $prevRefEnd = ($orientation eq "+") ? $record_->[$rec1]->{refEnd} : $record_->[$rec2]->{refEnd};
	my $nextRefStart = ($orientation eq "+") ? $record_->[$rec2]->{refStart} : $record_->[$rec1]->{refStart};

	# return TRUE if matches to the ref seq are consecutive within tolerance
	return 1 if ( ($nextRefStart - $prevRefEnd) >= (0 - $tolerance) );
	return 0;  # if sequence ends overlap by more than tolerated distance
}


###################
# Returns the (LTR-record) index of first as yet unpaired LTR sequence

sub get1UnprocessedLTRindex {
	# First and second args are references to the LTR- and all- records arrays. Last (optional) argument is a lower boundary
	# for the returned index.
	my($LTRrecord_, $numLTRrecords, $record_, $minIndex) = @_;
	my $LTRindex = ( defined($minIndex) ) ? $minIndex : 0;

	# get first unprocessed LTR index
	$LTRindex++ while ( $LTRindex < ($numLTRrecords-1) && $record_->[ $LTRrecord_->[$LTRindex][0] ]->{"id"} );

	return ( $LTRindex < $numLTRrecords ) ? $LTRindex : -1;
}


###################
# Returns the (LTR-record) indices of first two as yet unprocessed LTR sequences. It makes sure that
# index 1 is the innermost (highest record number) in case LTR1 corresponds to multiple records.

sub get2UnprocessedLTRindices {
	# Args 1 and 3 are references to the LTR- and all- records arrays. The last argument
	# is an excess length tolerance parameter used when finding the innermost index1. The
	# second arg is the total number of LTR records.
	my($LTRrecord_, $numLTRrecords, $record_, $tolerance) = @_;
	return (-1,-1) unless (@{$LTRrecord_});

	# get first unprocessed LTR index
	my $index1 = get1UnprocessedLTRindex($LTRrecord_, $numLTRrecords, $record_);  # get index for LTR1
	# get innermost record if LTR1 correspond to multiple records
	$index1 = getLastFragment($index1, $LTRrecord_);

	# get second unprocessed LTR index
	my $index2 = get1UnprocessedLTRindex($LTRrecord_, $numLTRrecords, $record_, $index1+1);  # get index for LTR2

	return ($index1, $index2);
}


###################
# Returns true if record indices in given list are hits to the same LTR or IR

sub sameElement {
  # First two args are references to a hit-index and the all-records arrays. Third argument is a ref
  # to the parameter hash. Last argument is a list of indices of the putative element fragments.
	my($indexRecord_, $record_, $parameter_, @elementFrag) = @_;
	my $numFragments = scalar(@elementFrag);
	my $tolerance = $parameter_->{boundaryTolerance}; # max tolerated fragment overlap (in number of bases)

#	my $continuityCheck = 1; # records can be considered as hits to the same LTR (e.g. when another retrotransposon insertion within the LTR)
	# check if LTRorIR records have the same name and orientation,
	# and whether they match consecutive regions of the reference sequence
	my $nameAndOrientationCheck = 1;
	my $queryNameCheck = 1;
	my $consecutiveReferenceMatchCheck = 1;
	for (my $frag=1; $frag < $numFragments; $frag++) 
	  {
	    my $LTRorIRrec1 = $indexRecord_->[$elementFrag[$frag-1]][0];
	    my $LTRorIRrec2 = $indexRecord_->[$elementFrag[$frag]][0];
	    $nameAndOrientationCheck++ if ( nameAndOrientationMatch($LTRorIRrec1, $LTRorIRrec2, $record_, $parameter_) );
	    #		$continuityCheck++ if ( fuzzyCoordinateMatch($LTRrec1, $LTRrec2, $record_, $tolerance) );
	    $queryNameCheck++ if ( $record_->[$LTRorIRrec1]->{"query"} eq $record_->[$LTRorIRrec2]->{"query"} );
	    $consecutiveReferenceMatchCheck++ if ( consecutiveReferenceMatch($LTRorIRrec1, $LTRorIRrec2, $record_, $tolerance) );
	  }
	
	return 1 if ( # ($indexCheck eq $numFragments) &&
		      ($nameAndOrientationCheck eq $numFragments) &&
		      # ($continuityCheck eq $numFragments) &&
		      ($queryNameCheck eq $numFragments) &&
		      ($consecutiveReferenceMatchCheck eq $numFragments) );  # possibly fragments of same LTRorIR!

	return 0;  # not fragments of same LTR or IR
}


###################
# Processes all records (hits) associated with a 'complete' LTR element.

sub processPair 
  {
  # First two args are the indices of the (element's flanking) LTRs in the LTR-records array,
  # followed by a string containing all LTR indices associated with the element separated by hyphens.
  # The fourth is a ref to the indices of the IR in the IR-records array, followed by a string containing all IR indices 
  # associated with the element separated by hyphens.
  # The sixth is a reference to the LTR-records array, seventh a reference to the IR-records array. The eighth is
  # a reference to the records array. Nineth arg is this element's id. The tenth is a reference to the entire chromosome
  # sequence. The ninth arg is the path to the output files to contain the sequences of the LTR pairs.

    my($LTR1,$LTR2,$LTRindices,$IR_,$IRindices,$LTRrecord_,$IRrecord_,$record_,$id,$chromoSeq_,$FASTAFilePath,$parameter_,$completeLTRannotation_) = @_;
    my ($numProcessedLTRrecords, $numProcessedIRrecords) = (0,0);

    my @LTR1index =  @{$LTRrecord_->[$LTR1]->[2]};
    my @LTR2index =  @{$LTRrecord_->[$LTR2]->[2]};
    my @IRindex = @{$IR_};
    my @IRindices = split("-", $IRindices);
    my @LTRindices = split("-", $LTRindices);
    my $numLTRfrags1 = scalar(@LTR1index);
    my $numLTRfrags2 = scalar(@LTR2index);
    my $numInternalFrags = scalar(@IRindex);

    my $orientation = $record_->[ $LTRrecord_->[$LTR1][0] ]->{orientation};
    my $name = LTRname($record_->[ $LTRrecord_->[$LTR1][0] ]->{refName}, $record_->[ $LTRrecord_->[$LTR1][0] ]->{superfamily});
#    $name =~ s/(.*)_LTR.*/$1/;  # remove '_LTR' from ref name of LTRs
    my $label1 = $LTRrecord_->[$LTR1][1];
    my $label2 = $LTRrecord_->[$LTR2][1];
    my $id1 = $id."a";
    my $id2 = $id."b";
    my @internalRecs;      # will store the record numbers of matches to the internal region of this element

    # label all records (hits) associated with LTRs and the IR  as part of the element
    foreach my $hit (@IRindices)
      {
	my $rec = $IRrecord_->[$hit][0];
	$record_->[$rec]->{"id"} = $id;
	push @internalRecs, $rec if ( grep /^$hit$/,@IRindex );
      }
    foreach my $hit (@LTRindices)
      {
	my $rec = $LTRrecord_->[$hit][0];
	$record_->[$rec]->{"id"} = $id;
      }

    # store record numbers and info on chromosomal hits (fragments) associated with this single element insertion
    my @allLTRrecs = ();
    my @allIRrecs = ();
    foreach my $index (@LTRindices)
      {
	push @allLTRrecs, $LTRrecord_->[$index][0];
      }
    foreach my $index (@IRindices)
      {
	push @allIRrecs, $IRrecord_->[$index][0];
      }
    my @pairFragInfo = (\@allLTRrecs, \@allIRrecs, $numLTRfrags1, $numLTRfrags2, $numInternalFrags);

     my $firstLTR1rec = $LTRrecord_->[ $LTR1index[0] ][0];
     my $innermostLTR1rec = $LTRrecord_->[ $LTR1index[-1] ][0];
     my $innermostLTR2rec = $LTRrecord_->[ $LTR2index[0] ][0];
     my $lastLTR2rec = $LTRrecord_->[ $LTR2index[-1] ][0];
     my $firstLTR1start= $record_->[$firstLTR1rec]->{"refStart"};
     my $firstLTR1end= $record_->[$firstLTR1rec]->{"refEnd"};
     my $innerStart1= $record_->[$innermostLTR1rec]->{"refStart"};
     my $innerEnd1= $record_->[$innermostLTR1rec]->{"refEnd"};
     my $innerStart2= $record_->[$innermostLTR2rec]->{"refStart"};
     my $innerEnd2= $record_->[$innermostLTR2rec]->{"refEnd"};
     my $lastLTR2end= $record_->[$lastLTR2rec]->{"refEnd"};
     my $lastLTR2start= $record_->[$lastLTR2rec]->{"refStart"};

      savePair($firstLTR1rec, $innermostLTR1rec, $innermostLTR2rec, $lastLTR2rec, $record_, $id, $chromoSeq_,
	       $FASTAFilePath, @pairFragInfo, $parameter_, $completeLTRannotation_, $MESSAGES, @internalRecs);
  }


###################
# Processes all records (hits) associated with a 'complete' LTR element.

sub processTruncated 
  {
    # First arg is an index of the element's LTR in the LTR-records array (if no LTR the index = -1),
    # followed by a string containing all LTR indices associated with the element separated by hyphens.
    # The third arg is an index of the IR in the IR-records array, followed by a string containing all IR indices 
    # associated with the element separated by hyphens.
    # The fifth is a reference to the LTR-records array, sixth a reference to the IR-records array. The seventh is
    # a reference to the records array. The eighth arg is this element's id. The tenth is a reference to the entire chromosome
    # sequence. The ninth arg is the path to the output files to contain the sequences of the LTR pairs.
    my($LTR1,$LTRindices,$IR,$IRindices,$LTRrecord_,$IRrecord_,$record_,$id,$chromoSeq_,$FASTAFilePath,$parameter_,$truncatedLTRannotation_) = @_;

	my @LTRindex =  ($LTR1 ne -1)? @{$LTRrecord_->[$LTR1]->[2]} : ();
	my @IRindex = ($IR ne -1)? @{$IRrecord_->[$IR]->[2]} : ();
	my @IRindices;
	my @LTRindices;
	@IRindices = ($IR ne -1)? split("-", $IRindices) : ();
	@LTRindices = ($LTR1 ne -1)? split("-", $LTRindices) : ();
	my $numLTRfrags1 = (@LTRindex)? scalar(@LTRindex) : 0;
	my $numInternalFrags = (@IRindex)? scalar(@IRindex) : 0;

	my $orientation = ($LTR1 ne -1)? $record_->[ $LTRrecord_->[$LTR1][0] ]->{"orientation"} :
	                                 $record_->[ $IRrecord_->[$IR][0] ]->{"orientation"};
	my $name = ($LTR1 ne -1)? LTRname($record_->[ $LTRrecord_->[$LTR1][0] ]->{refName}, $record_->[ $LTRrecord_->[$LTR1][0] ]->{superfamily}) :
	                          INTname($record_->[ $IRrecord_->[$IR][0] ]->{refName}, $record_->[ $IRrecord_->[$IR][0] ]->{superfamily});
#	$name =~ s/^(.+)_((LTR)|(I)).*/$1/;  # remove '_LTR' from ref name of LTRs
	my $LTRlabel =  ($LTR1 ne -1)? $LTRrecord_->[$LTR1][1] : "";
	my $IRlabel = ($IR ne -1)? $IRrecord_->[$IR][1] : "";
	my $id1 = $id."a";
	my @internalRecs;      # will store the record numbers of matches to the internal region of this element


	# label all records (hits) associated with LTRs and the IR  as part of the element
	foreach my $hit (@IRindices)
	  {
#	    $IRrecord_->[$hit][2] = \@IRindices;
	    my $rec = $IRrecord_->[$hit][0];
	    $record_->[$rec]->{"id"} = $id;
	    push @internalRecs, $rec;
	  }
	foreach my $hit (@LTRindices)
	  {
#	    $LTRrecord_->[$hit][2] = \@LTRindices;
	    my $rec = $LTRrecord_->[$hit][0];
	    $record_->[$rec]->{"id"} = $id;
	  }
	foreach my $hit (@LTRindex)
	  {
	    $record_->[ $LTRrecord_->[$hit][0] ]->{"id"} = $id1;
	  }

	# store info on number of chromosomal hits (fragments) associated with this single element insertion
	my @fragInfo = ($numLTRfrags1, $numInternalFrags);

	saveTruncated($LTR1,$LTRindices,$IR,$IRindices,$LTRrecord_,$IRrecord_,$record_,$id,$chromoSeq_,$FASTAFilePath,@fragInfo,
		      $parameter_,$truncatedLTRannotation_,$MESSAGES,@internalRecs
		     );
}


###################
# Writes sequence(s) of the internal region of an element whose both LTRs have
# been identified to a fasta format file. Returns the
# gapped sequence correponding to a consensus element
# (and possibly the ungapped chromosomal
# sequence, including inserted, intervening sequences that probably don't belong to original element).

sub saveInternalRegion {
	# First arg is a ref to the all-records array.
	# 2nd arg is the path to the output file. 3rd is the element ID. 4th is a ref to the chromosomal sequence.
	# Last arg is an array storing the record indices associated with the internal region.

	my($record_, $path, $id, $chromoSeq_, $MESSAGES, $parameter_, @internalRecs) = @_;
	my $outFile = $id.".INT.fasta";
	$outFile =~ tr/\//\./;
	my $FASTAFilePath = $path."internal/";
	my $sequence = "";
	my @family;

	# store the chromosome coordinates of and other info on the fragment sequences to be written to the out file
	my @startCoord;
	my @endCoord;
	my @startRef;
	my @endRef;
	my @leftAfterRefEnd;
	my @div;
	my $numFrags = scalar(@internalRecs);
	for (my $frag=0; $frag<$numFrags; $frag++) {
		push @startCoord, $record_->[$internalRecs[$frag]]->{"chromoStart"};
		push @endCoord, $record_->[$internalRecs[$frag]]->{"chromoEnd"};
		push @startRef, $record_->[$internalRecs[$frag]]->{"refStart"};
		push @endRef, $record_->[$internalRecs[$frag]]->{"refEnd"};
		push @leftAfterRefEnd, $record_->[$internalRecs[$frag]]->{"leftAfterRefEnd"};
		push @div, $record_->[$internalRecs[$frag]]->{"divergenceFromRef"};
	}

	# get all reference names associated with these LTR sequences (if different from each other)
	my $recRefName;
	foreach my $rec (@internalRecs) {
		$recRefName = INTname($record_->[$rec]->{refName}, $record_->[$rec]->{superfamily});
#		$recRefName =~ s/(.+)_I.*$/$1/i;
		push(@family, $recRefName) if ( !grep(/$recRefName/, @family) );
	}

	my $orientation = $record_->[$internalRecs[0]]->{"orientation"};	# store orientation of the element on the chromosome
	my $query = $record_->[$internalRecs[0]]->{"query"};
	my @elementData = (\@startCoord,\@endCoord,\@startRef,\@endRef,\@leftAfterRefEnd,$query,$orientation);
	my $FASTAid = $id."_INT";
	my $FASTA = *FASTA;
	
	# SAVE GAPPED SEQUENCE IN FASTA FORMAT
	open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
	$sequence = writeGappedSeq($FASTAid, $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	close(FASTA) || print $MESSAGES "can't close file ".$FASTAFilePath.$outFile.": $! \n";

	my $ungappedSeq="";
	if($parameter_->{ungapped}) {
	  # SAVE UNGAPPED (CHROMOSOMAL) SEQUENCE FOR THE REGION AS WELL
	  $outFile = $id.".INT.ungapped.fasta";
	  $outFile =~ tr/\//\./;
	  open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
	  $ungappedSeq = substr( $chromoSeq_->{$query}, $startCoord[0] - 1, $endCoord[-1] - $startCoord[0] + 1 );
	  print FASTA ">".$id."_INT_ungapped \n";
	  $FASTA = *FASTA;
	  printStringIntoLines($FASTA,60,$ungappedSeq);
	  close(FASTA) || die "can't close file ".$FASTAFilePath.$outFile.": $! \n";
	}

	return ($sequence,$ungappedSeq);
}


###################
# Takes a filehandle and a string. Divides string into
# chunks of so many characters ($lineSize) separated by new lines, then
# writes result to the filehandle

sub printStringIntoLines {

	my ($OUT, $lineSize, $sequence) = @_;
	my $FASTAtemplate;
	my $numChunks = length($sequence)/$lineSize;
	for (my $chunk=1; $chunk <= $numChunks; $chunk++) {
		$FASTAtemplate .= "A".$lineSize." ";
	}
	$FASTAtemplate .= "A*";   # append last line if shorter than $lineSize nucleotides
	my @chunksOfNucleotides = unpack($FASTAtemplate, $sequence);
	$sequence = (join "\n", @chunksOfNucleotides)."\n";
	print $OUT $sequence;
}


####################
# Processes a non-LTR element. Returns an annotation record on the element.

sub processNonLTR {
	# First arg is an index of the element in the otherIndex- array. The second is a reference to the
	# otherIndex- array, third a reference to the records array. The fourth is this element's
	# id. The 5th arg is the path to the directory to contain annotation and sequences.
	# 6th and 7th respectively refs to the hash containing query seqs.
	# The last arg is a ref to the parameter hash.
	my($index,$nonLTRrecord_,$record_,$id,$FASTAFilePath,$chromoSeq_,$parameter_, $MESSAGES) = @_;

	my @index = @{$nonLTRrecord_->[$index]->[2]};
	my $numfrags = scalar(@index);

	my $nestingLevel = 0;
	my $nestIDs = "NA";
	my $firstrec = $nonLTRrecord_->[ $index[0] ][0];
	my $lastrec = $nonLTRrecord_->[ $index[-1] ][0];
	my $family = $record_->[$firstrec]->{"refName"};
	my $query = $record_->[$firstrec]->{"query"};
	my $orientation = $record_->[$firstrec]->{"orientation"};

	my @startCoord;
	my @endCoord;
	my @startRef;
	my @endRef;
	my @leftAfterRefEnd;
	my @div;
	my @RMrec;

	# process all hits associated with the element
	foreach my $hit (@index)
	  {
	    my $rec = $nonLTRrecord_->[$hit][0];
	    $record_->[ $rec ]->{"id"} = $id;  # flag current  record as processed single

	    # store the chromosome coordinates of and other info on the fragment sequences to be written to the annotation file
	    push @startCoord, $record_->[$rec]->{"chromoStart"};
	    push @endCoord, $record_->[$rec]->{"chromoEnd"};
	    push @startRef, $record_->[$rec]->{"refStart"};
	    push @endRef, $record_->[$rec]->{"refEnd"};
	    push @leftAfterRefEnd, $record_->[$rec]->{"leftAfterRefEnd"};
	    push @div, $record_->[$rec]->{"divergenceFromRef"};
	    push @RMrec, $rec + $parameter_->{RMheaderLines} + 1;
	  }

	# write  sequence to a fasta file (unless sequence output is not required)
	unless ($parameter_->{"noSeqOutput"} )
	  {
	    my @elementData = (\@startCoord,\@endCoord,\@startRef,\@endRef,\@leftAfterRefEnd,$query,$orientation);
	    $FASTAFilePath .= "/".$parameter_->{otherOutDir}."/";
	    my $FASTAid = $id."_".$query."_".$orientation."_".$family;
	    $FASTAid =~ tr/\//\./;  # remove any forward slashes to avoid problems with shell file operations
	    my $outFile = $FASTAid.".fasta";
	    my $FASTA = *FASTA;

	    # SAVE GAPPED SEQUENCE IN FASTA FORMAT
	    open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
	    writeGappedSeq($FASTAid, $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	    close(FASTA) || print $MESSAGES "can't close file ".$FASTAFilePath.$outFile.": $! \n";

	    if($parameter_->{ungapped}) {
	      # SAVE UNGAPPED CHROMOSOMAL SEQUENCE AS WELL
	      my $ungappedSeq = substr( $chromoSeq_->{$query}, $startCoord[0] - 1, $endCoord[-1] - $startCoord[0] + 1 );
	      $outFile = $FASTAid.".ungapped.fasta";
	      open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
	      print FASTA ">".$FASTAid."_ungapped \n";
	      printStringIntoLines($FASTA,60,$ungappedSeq);
	      close(FASTA) || print $MESSAGES "can't close file ".$FASTAFilePath.$outFile.": $! \n";
	    }

	  }

	# create additional annotation for this element
	my $score = $record_->[$lastrec]->{"score"};
	my $div;
	for (my $i=0; $i<scalar(@div); $i++) { $div .= defined($div)? "-".$div[$i] : $div[$i]; }
	my $reflength = $record_->[$firstrec]->{"refEnd"} + $record_->[$firstrec]->{"leftAfterRefEnd"};
	my $lengthRatio;  # ratio of element/reference length
	my $ref = 0;  # this will store the fraction of the reference  sequence matched by this
	if ( defined($reflength) && $reflength > 0 )
	  {
	    for (my $i=0; $i<scalar(@startRef); $i++) { $ref +=  ($endRef[$i]-$startRef[$i]+1)/$reflength; }
	    $lengthRatio = ( $endCoord[-1] - $startCoord[0] + 1 )/$reflength;
	  }
	else
	  {
	    $ref = "NA";
	    $lengthRatio = "NA";
	  }
	my $superfamily = $record_->[$lastrec]->{"superfamily"};
	my $distCen = $record_->[$lastrec]->{"distCen"};

	# output annotation record
	return { 'id'=>$id,
		 'query'=>$query,
		 'family'=>$family,
		 'divI'=>$div,
		 'start'=>$startCoord[0],
		 'end'=>$endCoord[-1],
		 'hitsI'=> hyphenate(@RMrec),
		 'fragsI'=>$numfrags,
		 'refI'=>$ref,
		 'lenR'=>$lengthRatio,
		 'orientation'=>$orientation,
		 'superfamily'=>$superfamily,
		 'distCen'=>$distCen,
		 'nest'=>$nestingLevel,
		 'nestIDs'=>$nestIDs,
	       };
}


##################
#  Writes to file in FASTA format the sequence of an element. Only the chromosomal sequence of the element that
# matches the reference sequence is written. For any chromosomal regions within the element where its sequence
# does not match the reference, gaps are written instead of the chromosomal sequence. If the element has truncated
# ends (relative to the reference seq), gaps are written at the ends of the sequence. The number of gaps written
# always corresponds to the number of "unmatched" (by sequence similarity search) nucleotides in the reference
# sequence.
sub writeGappedSeq
  {
    # First arg is the FASTA id to be written to file, 2nd a file handle, 3rd a ref to the query seqs hash,
    # last a list of data on the sequence element. Except for the last two items of this list (query,orientation),
    # these items are references to arrays of as many items as hits to this element
	my ($FASTAid, $FASTA, $chromoSeq_, $MESSAGES, @elementData) = @_;

	my ($startCoord_,$endCoord_,$startRef_,$endRef_,$leftAfterRefEnd_,$query,$orientation) = @elementData;
	my $numfrags = scalar(@{$startCoord_});  # number of hits associated with this element

	my $sequence = "";
	# get sequence for the element from the chromosome sequence
	my $seqOffset;		# if the chromosomal coordinates of consecutive hits to a given element overlap,
				#   this will store the length of the overlap (in number of nucleotide positions)
	my $fragGap;		#this will store the gap (in number of nucleotides) between two consecutive hits to the same element

	# add as many gaps to the beginning of the  sequence as bases missing before the match to the reference starts
	if ( $orientation eq "+" && $startRef_->[0] > 1 ) {
		for (my $gap=1; $gap<$startRef_->[0]; $gap++) { $sequence .= "-"; }
	}
	elsif ( $orientation eq "C" && $leftAfterRefEnd_->[0] > 0 ) {
		for (my $gap=1; $gap<=$leftAfterRefEnd_->[0]; $gap++) { $sequence .= "-"; }
	}
	# loop through all fragments (hits) and append their sequences
	$seqOffset = 0;
	for (my $frag=0; $frag<$numfrags; $frag++) {
		if ( $frag>0 && ($startCoord_->[$frag]-$endCoord_->[$frag-1] < 1) ) { # fragments overlap on the chromosome
			$seqOffset = abs($startCoord_->[$frag]-$endCoord_->[$frag-1]) + 1;
		}

		$sequence .= substr($chromoSeq_->{$query},$startCoord_->[$frag]+$seqOffset-1,$endCoord_->[$frag]-$startCoord_->[$frag]+1 );
		print $MESSAGES messageOUT("ERROR !!\nQuery coordinates of repeat in the RepeatMasker annotation file \nfall outside range of input sequence\n****************") unless ($sequence);
		if ( $frag+1 <= $numfrags-1 ) {
			if ( $orientation eq "+") {		# element on + strand
				$fragGap = $startRef_->[$frag+1] - $endRef_->[$frag] - 1;
		    }
		    else {							# element on complementary strand
			    $fragGap = $startRef_->[$frag] - $endRef_->[$frag+1] - 1;
			}
			if ($fragGap > 0) {
				for (my $gap=1; $gap<=$fragGap; $gap++) { $sequence .= "-"; }
			}
		}
		$seqOffset = 0;
	}
	# add as many gaps to the end of the element sequence as bases missing after the match to the reference ends
	if ( $orientation eq "+" && $leftAfterRefEnd_->[-1] > 0 ) {
		for (my $gap=1; $gap<=$leftAfterRefEnd_->[-1]; $gap++) { $sequence .= "-"; }
	}
	elsif ( $orientation eq "C" && $startRef_->[-1] > 1 ) {
		for (my $gap=1; $gap<$startRef_->[-1]; $gap++) { $sequence .= "-"; }
	}

	# write sequences to file in fastA format with lines of 60 nucleotides
	my $FASTAtemplate;  # this is a template for the unpack function, to specify chunks of 60 residues + trailing sequence

	# fastA id for solo :
	print $FASTA ">".$FASTAid."\n";
	#  sequence:
	printStringIntoLines($FASTA,60,$sequence);

	return $sequence

}


####################
# Processes an unpaired LTR-sequence. Returns the number of processed records, as the LTR may
# correspond to multiple records.

sub processSingle {
	# First arg is the index of the LTR in LTR-records array. The second is a reference to the
	# LTR-records array, third a reference to the records array. The fourth is this unpaired LTR's
	# number (id). The 5th a ref to the parameter hash.
	# The 6th is the path to the directory to contain the sequences of the solo LTRs.
	# 7th and 8th respectively refs to the hash containing contig seqs and start coordinates.
	# The penultimate arg is the solo LTRs output file handle. Last arg a ref to the solo LTR annotation.
	my($LTRindex,$LTRrecord_,$record_,$id,$parameter_,$FASTAFilePath,$chromoSeq_,$soloLTRannotation_) = @_;

	my @LTRindex = @{$LTRrecord_->[$LTRindex]->[2]};
	my $lastLTRindex = $LTRindex[-1];
	my $lastLTRrec = $LTRrecord_->[$lastLTRindex][0];	# if the LTR consists of multiple fragments,
	my $label = $LTRrecord_->[$lastLTRindex][1];			# this is the innermost fragment

	my $numLTRfrags = scalar(@LTRindex);
	my $numProcessedLTRrecords = 0;

	# check if LTR consists of multiple records
	foreach my $hit (@LTRindex)
	  {
	    # flag current LTR record as processed single
	    $record_->[ $LTRrecord_->[$hit][0] ]->{"id"} = $id;
	    $numProcessedLTRrecords++;
	  }

	my $firstLTRindex = $LTRindex[0];
	my $firstLTRrec = $LTRrecord_->[ $firstLTRindex ][0];
	# check if this LTR is a solo LTR and save its sequence if it is
	if ( isSolo($firstLTRindex,$lastLTRindex,$LTRrecord_,$record_,$parameter_,$id) ) {	# this is a solo LTR
		# write LTR sequence to a fasta file
		saveSolo($firstLTRrec, $lastLTRrec, $record_, $id, $chromoSeq_, $FASTAFilePath, 
			 $numLTRfrags, $parameter_,$soloLTRannotation_, $MESSAGES
			);
		return 1;
	}

	return 0;
}


###################
# Checks whether an unpaired LTR-sequence is a solo LTR. Returns TRUE if it is, FALSE otherwise.

sub isSolo 
  {
    # First arg is the index of the first LTR fragment, in LTR-records array.
    # 2nd arg is the index of the last LTR fragment, in LTR-records array.
    # The 3rd is a reference to the
    # LTR-records array, 4th a reference to the records array.
    # The 5th the minimum distance to the nearest internal region of same family
    # and orientation necessary for this LTR to be classified as 'solo'.
    # The last arg is this unpaired LTR's id number.
    my($firstLTRindex,$lastLTRindex,$LTRrecord_,$record_,$parameter_,$id) = @_;
    my $minIntDistanceToSoloLTR = $parameter_->{minDistanceToSoloLTR};

	my $firstLTRrec = $LTRrecord_->[$firstLTRindex][0];
	my $lastLTRrec = $LTRrecord_->[$lastLTRindex][0];
	my $LTRstart = $record_->[$firstLTRrec]->{"chromoStart"};
	my $LTRend =  $record_->[$lastLTRrec]->{"chromoEnd"};
	my $LTRorientation = $record_->[$lastLTRrec]->{"orientation"};
	my $LTRfamily = LTRname($record_->[$lastLTRrec]->{refName}, $record_->[$lastLTRrec]->{superfamily});
#	$LTRfamily =~ s/^(.+)_LTR.*$/$1/;	# remove '_LTR' from family name

	my $solo = 1; # flag will remain TRUE if the LTR is deemed to be solo

	my $rec = $firstLTRrec - 1;	# this is the record immediately preceding the LTR
	# Look for an internal region or LTR before the current LTR that could be part of the same retroelement insertion,
	# within the search range:
	while ( $rec >= 0 && ($LTRstart-$record_->[$rec]->{"chromoEnd"}) < $minIntDistanceToSoloLTR && $solo ) 
	  {
	    # check if this is an internal region is of same family and orientation as the LTR, and that it hasn't been processed
	    my $name = $record_->[$rec]->{"refName"};
	    my $orient = $record_->[$rec]->{"orientation"};
	    my $idI = $record_->[$rec]->{"id"};
	    $solo = 0 if ( nameAndOrientationMatch($firstLTRrec, $rec, $record_, $parameter_) );
	    $rec--;
	  }

	if ($solo) 
	  {	
	    # this could still be a solo LTR
	    $rec = $lastLTRrec + 1;	# this is the record immediately after the LTR
	    # Look for an internal region after the LTR that could be part of the same retroelement insertion,
	    # within the search range:
	    my $numRecords = scalar(@{$record_});
	    while ( $rec < $numRecords && ($record_->[$rec]->{"chromoStart"}-$LTRend) < $minIntDistanceToSoloLTR && $solo )
	      {
		# check if this is an internal region is of same family and orientation as the LTR, and that it hasn't been processed
		my $name = $record_->[$rec]->{"refName"};
		my $orient = $record_->[$rec]->{"orientation"};
		my $idI = $record_->[$rec]->{"id"};
		$solo = 0 if ( nameAndOrientationMatch($lastLTRrec, $rec, $record_, $parameter_) );
		$rec++;
	      }
	  }

	if ($solo) 
	  {	
	    # this is a solo LTR!
	    # flag LTR records as solo
	    my $label = $LTRrecord_->[$firstLTRindex][1];
	    for (my $index=$firstLTRindex; $index<=$lastLTRindex; $index++) 
	      {
		$record_->[ $LTRrecord_->[$index][0] ]->{"id"} = "solo-".$id if ($LTRrecord_->[$index][1] eq $label);
	      }
	  }
	return $solo;
}


###################
# Runs clustalW on all LTR pair fasta file.
# Must be run from the same directory as ClustalW itself.
# Command line arguments: clustalW directory path, destination directory path, path to fasta file with sequences to be aligned.


sub runClustalW {

  my($clustalPath, $outDir, $infile) = @_;
  #$infile =~ /\/(.+)$/;
  my $fileName = basename($infile,"");
  my $dirName = dirname($infile);
  my $clustalw = basename($clustalPath,"");
  my $clustalWdir = dirname($clustalPath);

  system("cp ".$clustalPath." ".$dirName."/") unless (-e $dirName."/clustalw");
  my $command = $dirName."/".$clustalw." /PWGAPOPEN=0.5 /PWGAPEXT=0.01 /INFILE=".$fileName;
  system($command);

  $fileName =~ s/(.*)\.fasta$/$1/;	# remove .fasta extension from string in $fileName
  # move alignment file to alignments (out) directory
  system("mv ".$dirName."/".$fileName.".aln ".$outDir.$fileName.".aln");
  # clean up
  system("rm -f *.dnd");

}


###################
# Writes sequence of a truncated element to fasta format files:
# full sequence to 1(2) gapped (and ungapped) file(s), internal sequence to another (2) file(s), LTR sequences (gapped) to another file.
# Outputs annotation on the truncated element.

sub saveTruncated {
	# First and third args are indices of the LTR and IR, respectively. The
	# second and fourth are strings containing all indices (separated by hyphens) associated with the LTR and IR, respectively.
        # Fifth, sixth and 7th are refs to the LTR-, IR-, and all- records arrays, next arg the element id.
	# Next is a reference to the entire chromosomal sequence.
	# Next is the path to the output files to contain the sequences of the elements.
	# Next is the file handle to the output file containing info on paired LTRs.
	# The next 2 args are: num of fragments associated with LTR,
	# num of fragments associated with IR.
        # The penultimate arg is a ref to the parameter hash, last arg a list of the IR record numbers.

	my($LTR,$LTRindices,$IR,$IRindices,$LTRrecord_,$IRrecord_,$record_,$id,$chromoSeq_,
	   $FASTAFilePath,$numLTRfrags1,$numInternalFrags,$parameter_,$truncatedLTRannotation_,$MESSAGES,@internalRecs
	  ) = @_;

	my $LTRrec = ($LTR ne -1)? $LTRrecord_->[$LTR][0] : -1;
	my $IRrec = ($IR ne -1)? $IRrecord_->[$IR][0] : -1;
	my $query = ($LTR ne -1)? $record_->[$LTRrec]->{"query"} : $record_->[$IRrec]->{"query"};
	my $nestingLevel = 0;
	my $nestIDs = "NA";
	my $DNArearrangement = ($LTR ne -1)? $record_->[$LTRrec]->{"rearrangement"} : $record_->[$IRrec]->{"rearrangement"};
	my $orientation = ($LTR ne -1)? $record_->[$LTRrec]->{"orientation"} :
	                                $record_->[$IRrec]->{"orientation"};	# store orientation on the chromosome

	my @names = ( ($LTR ne -1)? LTRname($record_->[$LTRrec]->{refName}, $record_->[$LTRrec]->{superfamily}) :
		                    INTname($record_->[$IRrec]->{refName}, $record_->[$IRrec]->{superfamily})
		    );

	# remove 'LTR' from name (RepBase Update naming scheme)
#	$names[0] =~ s/^(.+)_((LTR)|(I)).*$/$1/i;

	my $familyID = $names[0];
	$familyID =~ s/-//;
	my $ID = $id."_".$query."_".$orientation."_".$familyID;
	my $outfile = $ID.".fasta";
	my $truncatedDir = $FASTAFilePath."/".$parameter_->{"truncatedDir"}."/";
	my $LTRdir = $truncatedDir.$parameter_->{"LTRdir"}."/";

	my $sequence1 = ""; 
	my $ungappedSeq1 = "";

	my $refLTRlength = ($LTR ne -1)? $record_->[$LTRrec]->{"refEnd"} + $record_->[$LTRrec]->{"leftAfterRefEnd"} : -1;
	my $refIRlength = ($IR ne -1)? $record_->[$internalRecs[0]]->{"refEnd"} + $record_->[$internalRecs[0]]->{"leftAfterRefEnd"} : -1;

	# store the chromosome coordinates of and other info on the fragment sequences to be written to the out file
	my $label = ($LTR ne -1)? $record_->[$LTRrec]->{"id"} : $record_->[$IRrec]->{"id"};
	my @LTR1startCoord = ();
	my @LTR1endCoord = ();
	my @LTR1startRef = ();
	my @LTR1endRef = ();
	my @LTR1leftAfterRefEnd = ();
	my @LTR1div = ();
	my @LTRlineNumbers;
	my @LTR = ();
	unless ($LTR eq -1)
	  {
	    @LTR = @{$LTRrecord_->[$LTR]->[2]};
	    foreach my $index (@LTR)
	      {
		my $rec = $LTRrecord_->[$index][0];
		push @LTR1startCoord, $record_->[$rec]->{"chromoStart"};
		push @LTR1endCoord, $record_->[$rec]->{"chromoEnd"};
		push @LTR1startRef, $record_->[$rec]->{"refStart"};
		push @LTR1endRef, $record_->[$rec]->{"refEnd"};
		push @LTR1leftAfterRefEnd, $record_->[$rec]->{"leftAfterRefEnd"};
		push @LTR1div, $record_->[$rec]->{"divergenceFromRef"};
		push @LTRlineNumbers, $LTRrecord_->[$index][0] + $parameter_->{"RMheaderLines"} + 1;
	      }

	    unless ( $parameter_->{"noSeqOutput"} )
	      { # output sequence
		
		# get sequences for LTR from the chromosome sequence
		my $seqOffset;		# if the chromosomal coordinates of consecutive LTR frags of a given LTR overlap,
		                        # this will store the length of the overlap (in number of nucleotide positions)
		my $fragGap;		# this will store the gap (in number of nucleotides) between two consecutive fragments of the same LTR
		# LTR1
		# add as many gaps to the beginning of the LTR1 sequence as bases missing before the match to the reference starts
		if ( $orientation eq "+" && $LTR1startRef[0] > 1 ) {
		  for (my $gap=1; $gap<$LTR1startRef[0]; $gap++) { $sequence1 .= "-"; }
		}
		elsif ( $orientation eq "C" && $LTR1leftAfterRefEnd[0] > 0 ) {
		  for (my $gap=1; $gap<=$LTR1leftAfterRefEnd[0]; $gap++) { $sequence1 .= "-"; }
		}
		# loop through all LTR1 fragments and append their sequences
		$seqOffset = 0;
		my $numLTR1frags = scalar(@LTR1endRef);
		for (my $frag=0; $frag<$numLTR1frags; $frag++) {
		  if ( $frag>0 && ($LTR1startCoord[$frag]-$LTR1endCoord[$frag-1] < 1) ) { # fragments overlap on the chromosome
		    $seqOffset = abs($LTR1startCoord[$frag]-$LTR1endCoord[$frag-1]) + 1;
		  }
		  $sequence1 .= substr($chromoSeq_->{$query},$LTR1startCoord[$frag]+$seqOffset-1,$LTR1endCoord[$frag]-$LTR1startCoord[$frag]+1 );
		  if ( $frag+1 <= $numLTR1frags-1 ) {
		    # write gaps between fragments
		    if ( $orientation eq "+") {		# element on + strand
		      $fragGap = $LTR1startRef[$frag+1] - $LTR1endRef[$frag] - 1;
		    }
		    else {										# element on complementary strand
		      $fragGap = $LTR1startRef[$frag] - $LTR1endRef[$frag+1] - 1;
		    }
		    if ($fragGap > 0) {
		      for (my $gap=1; $gap<=$fragGap; $gap++) { $sequence1 .= "-"; }
		    }
		  }
		  $seqOffset = 0;
		}
		# add as many gaps to the end of the LTR1 sequence as bases missing after the match to the reference ends
		if ( $orientation eq "+" && $LTR1leftAfterRefEnd[-1] > 0 ) {
		  for (my $gap=1; $gap<=$LTR1leftAfterRefEnd[-1]; $gap++) { $sequence1 .= "-"; }
		}
		elsif ( $orientation eq "C" && $LTR1startRef[-1] > 1 ) {
		  for (my $gap=1; $gap<$LTR1startRef[-1]; $gap++) { $sequence1 .= "-"; }
		}
	

		# write LTR sequences to be aligned to fasta file
		saveLTRs($LTRdir.$outfile, $ID."_ltr", $orientation, $sequence1, "", \@names, "", $MESSAGES);
		
		if($parameter_->{ungapped}) {
		  # SAVE UNGAPPED CHROMOSOMAL SEQUENCES AS WELL
		  $ungappedSeq1 = substr( $chromoSeq_->{$query}, $LTR1startCoord[0] - 1, $LTR1endCoord[-1] - $LTR1startCoord[0] + 1 );
		  saveLTRs($LTRdir.$ID.".ungapped.fasta", $ID."_ltr_ungapped", $orientation, 
			   $ungappedSeq1, "", \@names, "", $MESSAGES
			  );
		}

	      }
	  }  # end of unless ($LTR eq -1)
	
	my $intSeq = "";
	my $ungappedIntSeq = "";
	my @internalStartCoord = ();
	my @internalEndCoord = ();
	my @internalStartRef = ();
	my @internalEndRef = ();
	my @internalLeftAfterRefEnd = ();
	my @internalDiv = ();
	my @IRlineNumbers = ();
	unless ($IR eq -1)
	  {
	    foreach my $rec (@internalRecs)
	      {
		push @internalStartCoord, $record_->[$rec]->{"chromoStart"};
		push @internalEndCoord, $record_->[$rec]->{"chromoEnd"};
		push @internalLeftAfterRefEnd, $record_->[$rec]->{"leftAfterRefEnd"};
		push @internalStartRef, $record_->[$rec]->{"refStart"};
		push @internalEndRef, $record_->[$rec]->{"refEnd"};
		push @internalDiv, $record_->[$rec]->{"divergenceFromRef"};
		push @IRlineNumbers, $rec + $parameter_->{"RMheaderLines"} + 1;
	      }
	  }

	unless ( $parameter_->{"noSeqOutput"} )
	      {
		# save INTERNAL REGION
		($intSeq,$ungappedIntSeq) = saveInternalRegion($record_,$truncatedDir,$ID,$chromoSeq_,$MESSAGES,$parameter_,@internalRecs) unless ($IR eq -1);

		# save FULL SEQUENCE (both the chromosomal seq with possible indels and the gapped sequence homologous to consensus)
		my ($fullSeq, $fullUngappedSeq, $intStart, $intEnd, $ungappedSeq1);
		if ($LTRrec < $IRrec )
		  {
		    $intStart = ($IR ne -1)? $internalStartCoord[0] : $LTR1endCoord[-1] + 1;
		    $intEnd   = ($IR ne -1)? $internalEndCoord[-1] : $LTR1endCoord[-1] + 1;
		    $ungappedSeq1 = ($LTR ne -1)? substr($chromoSeq_->{$query}, $LTR1startCoord[0] - 1, $intStart - $LTR1startCoord[0] ) : "";
		    $fullSeq = $sequence1.$intSeq;
		    $fullUngappedSeq = $ungappedSeq1.$ungappedIntSeq;
		  }
		else
		  {
		    $intStart = ($IR ne -1)? $internalStartCoord[0] : $LTR1startCoord[0] - 1;
		    $intEnd   = ($IR ne -1)? $internalEndCoord[-1] : $LTR1startCoord[0] - 1;
		    $ungappedSeq1 = ($LTR ne -1)? substr($chromoSeq_->{$query}, $intEnd, $LTR1endCoord[0] - $intEnd ) : "";
		    $fullSeq = $intSeq.$sequence1;
		    $fullUngappedSeq = $ungappedIntSeq.$ungappedSeq1;
		  }

		my $FASTA = *FASTA;
		$ID =~ tr/\//\./;
		open($FASTA, ">".$truncatedDir."full/".$ID.".FULL.fasta") || print $MESSAGES "can't open file ".($truncatedDir."full/".$ID.".FULL.ungapped.fasta").": $! \n";
		print $FASTA ">".$ID."_full \n";
		printStringIntoLines($FASTA,60,$fullSeq);
		close($FASTA) || print $MESSAGES "can't close file: $! \n";

		if($parameter_->{ungapped}) {
		  open($FASTA, ">".$truncatedDir."full/".$ID.".FULL.ungapped.fasta") || print $MESSAGES "can't open file ".($truncatedDir."full/".$ID.".FULL.ungapped.fasta").": $! \n";
		  print $FASTA ">".$ID."_full_ungapped\n";
		  printStringIntoLines($FASTA,60,$fullUngappedSeq);
		  close($FASTA) || print $MESSAGES "can't close file: $! \n";
		}

	      }

	# save additional annotation for this pair
	my @allLTRrecs = ();
	my @allIRrecs = ();
	my ($div1, $divIR, $div2, $ref1, $refIR, $ref2, $hits1, $hitsI, $hits2, $frags1, $fragsI, $frags2, $lengthRatio,$start,$end);
	$div2 = "NA";
	$ref1 = 0;
	$ref2 = 0;
	$refIR= 0;
	$hits2 = 0;
	$frags2 = 0;
	if ($LTR ne -1)
	  {
	    # store all LTR rec numbers...(including possible hits involved in rearrangement)
	    foreach my $index ( sort {$a<=>$b} split("-",$LTRindices) )
	      {
		push @allLTRrecs, $LTRrecord_->[$index][0];
	      }

	    # store RMannotation line numbers and div
	    $hits1 = hyphenate(@LTRlineNumbers);
	    $div1 = hyphenate(@LTR1div);
	    $frags1 = scalar(@LTRlineNumbers);
	    # store fraction of ref seq matched
	    if ( defined($refLTRlength) && $refLTRlength > 0 )
	      {
		for (my $i=0; $i<scalar(@LTR1startRef); $i++) { $ref1 +=  ($LTR1endRef[$i]-$LTR1startRef[$i]+1)/$refLTRlength; }
	      }
	    if ($IR ne -1)
	      {
		if ( defined($refLTRlength) && defined($refIRlength) && $refLTRlength > 0 && $refIRlength > 0 )
		  {
		    $lengthRatio = ( $internalEndCoord[-1] - $internalStartCoord[0] + $LTR1endCoord[-1] - $LTR1startCoord[0] + 2 )/
		      ( $refLTRlength + $refIRlength );
		  }
		else
		  {
		    $lengthRatio = "NA";
		  }
		if ($LTRrec > $IRrec)
		  {
		    $div2 = $div1;
		    $div1 = "NA";
		    $ref2 = $ref1;
		    $ref1 = 0;
		    $hits2 = $hits1;
		    $hits1 = "NA";
		    $frags2 = $frags1;
		    $frags1 = 0;
		  }
		# store all IR rec numbers associated with element, including possible hits involved in rearrangements (if any)
		foreach my $index ( sort {$a<=>$b} split("-",$IRindices) )
		  {
		    push @allIRrecs, $IRrecord_->[$index][0];
		  }
		$start = ($allLTRrecs[0] < $allIRrecs[0])? $record_->[$allLTRrecs[0]]->{chromoStart} : $record_->[$allIRrecs[0]]->{chromoStart};
		$end = ($allLTRrecs[-1] > $allIRrecs[-1])? $record_->[$allLTRrecs[-1]]->{chromoEnd} : $record_->[$allIRrecs[-1]]->{chromoEnd};
	      }
	    else
	      {
		$start = $record_->[$allLTRrecs[0]]->{chromoStart};
		$end = $record_->[$allLTRrecs[-1]]->{chromoEnd};
	      }
	  }
	else
	  {
	    $lengthRatio = (defined($refIRlength) && $refIRlength > 0)? 
	      ( $internalEndCoord[-1] - $internalStartCoord[0] + 1 )/( $refIRlength ) : "NA";
	  }

	if ($IR ne -1)
	  {
	    # store all IR rec numbers associated with element, including possible hits involved in rearrangements (if any)
	    unless (@allIRrecs)
	      {
		foreach my $index ( sort {$a<=>$b} split("-",$IRindices) )
		  {
		    push @allIRrecs, $IRrecord_->[$index][0];
		  }
	      }
	    $start = $record_->[$allIRrecs[0]]->{chromoStart} unless (defined($start));
	    $end = $record_->[$allIRrecs[-1]]->{chromoEnd} unless (defined($end));

	    # store div
	    $divIR = hyphenate(@internalDiv);
	    $hitsI = hyphenate(@IRlineNumbers);
	    $fragsI = scalar(@IRlineNumbers);
	    # store the fraction of the reference IR sequence matched by this internal region
	    if ( defined($refIRlength) && $refIRlength > 0 )
	      {
		for (my $i=0; $i<scalar(@internalEndRef); $i++) { $refIR +=  ($internalEndRef[$i]-$internalStartRef[$i]+1)/$refIRlength; }
	      }
	  }
	else
	  {
	    $divIR = "NA";
	    $hitsI = "NA";
	    $fragsI = 0;
	    $lengthRatio = ( defined($refLTRlength) && $refLTRlength > 0 )?
	    ( $LTR1endCoord[-1] - $LTR1startCoord[0] + 1 )/( $refLTRlength ) : "NA";
	  }

	my $superfamily = ($LTR ne -1)? $record_->[$LTRrec]->{"superfamily"} : $record_->[$IRrec]->{"superfamily"};
	my $distCen;

	if ($parameter_->{"arabidopsis"}) {
	  $superfamily = ($LTR ne -1)? $record_->[$LTRrec]->{"superfamily"} : $record_->[$IRrec]->{"superfamily"};
	  $distCen = ($LTR ne -1)? $record_->[$LTRrec]->{"distCen"} : $record_->[$IRrec]->{"distCen"};
        }

	# output ANNOTATION

        # when annotating the start and end coords of this element, extend to the fragments involved in rearrangements

	my $end1 = "NA";
	my $start2 = "NA";
	unless ($LTR eq -1) {  # truncated element contains an LTR
	  $end1 = $LTR1endCoord[-1] if ($IRrec eq -1 || $LTRrec < $IRrec);    # no IR or LTR precedes IR
	  $start2 = $LTR1startCoord[0] if ($IRrec ne -1 && $LTRrec > $IRrec); # LTR follow IR
	}

	my %record = ( id=>$id,
		       'query'=>$query,
		       'family'=>$familyID,
		       'div1'=> $div1,
		       'div2'=> $div2,
		       'divI'=> $divIR,
		       'start'=> $start,
		       end1=> $end1,
		       start2=> $start2,
		       'end'=> $end,
		       'hits1'=> $hits1,
		       'hits2'=> $hits2,
		       'hitsI'=> $hitsI,
		       'frags1'=> $frags1,
		       'frags2'=> $frags2,
		       'fragsI'=> $numInternalFrags,
		       ref1=> $ref1,
		       ref2=> $ref2,
		       'refI'=> $refIR,
		       'lenR'=> $lengthRatio,
		       'orientation'=>$orientation,
		       'superfamily'=>$superfamily,
		       'nest'=>$nestingLevel,
		       'nestIDs'=>$nestIDs,
		       'DNArearrangement'=>$DNArearrangement
		     );
	if ($parameter{"arabidopsis"})
	  {
	    $record{"distCen"} = $distCen;
	  }
	push @{$truncatedLTRannotation_}, \%record;

}


###################
# Writes sequence of a complete element to a fasta format files:
# full sequence to gapped (and ungapped) file(s), internal sequence to another (2) file(s), LTR sequences (gapped) to another file.
# Creates intra-element LTR alignment, computes intra-element LTR divergence, saves record containing
# info on the complete element.

sub savePair
  {
    # First two args are the record numbers of the first and last records associated with LTR1. The
    # third and fourth are the record numbers of the first and last records associated with LTR2.
    # The fifth arg is a reference to the records array, the sixth arg the element id. The 6th
    # is a reference to the entire query sequence hash.
    # The 7th arg is the path to the output files to contain the sequences of the LTR pairs.
    # The 8th and 9th args are refs to arrays of record numbers for all LTR and IR hits associated with
    # the element, respectively.
    # The next 3 args are: num of fragments associated with LTR1,
    # num of fragments associated with LTR2, num of fragments associated with the internal region).
    # The penultimate arg is a ref to the parameter hash, last arg a list of the IR record numbers.

    my($firstLTR1rec, $lastLTR1rec, $firstLTR2rec, $lastLTR2rec, $record_, $pairID, $chromoSeq_, $FASTAFilePath,
       $LTRrecs_, $IRrecs_,$numLTRfrags1, $numLTRfrags2, $numInternalFrags, $parameter_, $completeLTRannotation_, $MESSAGES,
       @internalRecs
      ) = @_;

    my $query = $record_->[$firstLTR1rec]->{"query"};
    my $ID = $pairID;

    my $nestingLevel = 0;
    my $nestIDs = "NA";
    my $DNArearrangement = $record_->[$internalRecs[0]]->{rearrangement};

    # if possible DNA rearrangement involves only the IR, flag it with a "*" to indicate that it may be chimaeric element 
    # relatively to the ref seq (instead of a post-integration event)
    $DNArearrangement .= "-*" if ($DNArearrangement && !$record_->[$firstLTR1rec]->{rearrangement});

	my $orientation = $record_->[$firstLTR1rec]->{"orientation"};	# store orientation of the element on the chromosome

	my @names1 = ( LTRname($record_->[$lastLTR1rec]->{refName}, $record_->[$lastLTR1rec]->{superfamily}) );
	my @names2 = ( LTRname($record_->[$firstLTR2rec]->{refName}, $record_->[$firstLTR2rec]->{superfamily}) );

	my $familyID = $names1[0];
	$pairID .= "_".$query."_".$orientation."_".$familyID;

	my $outfile = $ID.".fasta";
	# avoid problems with the unix shell: remove any "|" or "/" characters from filenames
	$outfile =~ s/\|//g;
              $outfile =~ tr/\//\./;
	$pairID =~ s/\|//g;
              $pairID =~ tr/\//\./;
	$FASTAFilePath .=  "/".$parameter_->{"completeDir"}."/";
	my $LTRdir = $FASTAFilePath.$parameter_->{"LTRdir"}."/";

	my $sequence1 = ""; my $ungappedSeq1 = "";
	my $sequence2 = ""; my $ungappedSeq2 = "";

	my $refLTRlength = $record_->[$firstLTR1rec]->{"refEnd"} + $record_->[$firstLTR1rec]->{"leftAfterRefEnd"};
	my $refIRlength = $record_->[$internalRecs[0]]->{"refEnd"} + $record_->[$internalRecs[0]]->{"leftAfterRefEnd"};

	# store the chromosome coordinates of and other info on the fragment sequences to be written to the out file
	my $label1 = $record_->[$firstLTR1rec]->{"id"};
	my $label2 = $record_->[$firstLTR2rec]->{"id"};
	my @LTR1startCoord;
	my @LTR2startCoord;
	my @LTR1endCoord;
	my @LTR2endCoord;
	my @LTR1startRef;
	my @LTR2startRef;
	my @LTR1endRef;
	my @LTR2endRef;
	my @LTR1leftAfterRefEnd;
	my @LTR2leftAfterRefEnd;
	my @LTR1div;
	my @LTR2div;
	my @RMrec1;
	my @RMrec2;
	my $rec1range = $lastLTR1rec - $firstLTR1rec;
	my $rec2range = $lastLTR2rec - $firstLTR2rec;
	for (my $frag=0; $frag <= $rec1range; $frag++)
{
	  if ( $record_->[$firstLTR1rec + $frag]->{"id"} eq $label1 )
	    {
		push @LTR1startCoord, $record_->[$firstLTR1rec + $frag]->{"chromoStart"};
		push @LTR1endCoord, $record_->[$firstLTR1rec + $frag]->{"chromoEnd"};
		push @LTR1startRef, $record_->[$firstLTR1rec + $frag]->{"refStart"};
		push @LTR1endRef, $record_->[$firstLTR1rec + $frag]->{"refEnd"};
		push @LTR1leftAfterRefEnd, $record_->[$firstLTR1rec + $frag]->{"leftAfterRefEnd"};
		push @LTR1div, $record_->[$firstLTR1rec + $frag]->{"divergenceFromRef"};
		push @RMrec1, $firstLTR1rec + $frag + $parameter_->{RMheaderLines} + 1;
	      }
	}
	for (my $frag=0; $frag <= $rec2range; $frag++) {
	  if ( $record_->[$firstLTR2rec + $frag]->{"id"} eq $label2 )
	    {
		push @LTR2startCoord, $record_->[$firstLTR2rec + $frag]->{"chromoStart"};
		push @LTR2endCoord, $record_->[$firstLTR2rec + $frag]->{"chromoEnd"};
		push @LTR2startRef, $record_->[$firstLTR2rec + $frag]->{"refStart"};
		push @LTR2endRef, $record_->[$firstLTR2rec + $frag]->{"refEnd"};
		push @LTR2leftAfterRefEnd, $record_->[$firstLTR2rec + $frag]->{"leftAfterRefEnd"};
		push @LTR2div, $record_->[$firstLTR2rec + $frag]->{"divergenceFromRef"};
		push @RMrec2, $firstLTR2rec + $frag + $parameter_->{RMheaderLines} + 1;
	      }
	}
	my @internalStartRef;
	my @internalEndRef;
        my @divI;
	foreach my $rec (@internalRecs)
	  {
	    push @internalStartRef, $record_->[$rec]->{"refStart"};
	    push @internalEndRef, $record_->[$rec]->{"refEnd"};
	    push @divI, $record_->[$rec]->{divergenceFromRef};
	  }

	my ($K,$K_sd,$K_TajimaNei,$K_TNsd,$timeK,$timeKsd,$timeTN,$numComparedSites,$transitions,$transversions,$recRefName);

# get all reference names associated with these LTR sequences (if different from each other)
	
#	for ( my $rec = $firstLTR1rec; $rec <= $lastLTR1rec; $rec++) {
#		$recRefName = $record_->[$rec]->{"refName"};
#		$recRefName =~ s/(.+)_LTR.*$/$1/i;
#		push(@names1, $recRefName) if ( !grep(/$recRefName/, @names1) );
#	}
#	for (my $rec = $firstLTR2rec; $rec <= $lastLTR2rec; $rec++) {
#		$recRefName = $record_->[$rec]->{"refName"};
#		$recRefName =~ s/(.+)_LTR.*$/$1/i;
#		push(@names2, $recRefName) if ( !grep(/$recRefName/, @names2) );
#	}


      unless ( $parameter_->{"noSeqOutput"} )
	{ # output sequences

	  # write LTR sequences to be aligned to fasta file (file name kept short to avoid problems running clustalW)
	  open(FASTA, ">".$LTRdir.$outfile) || print $MESSAGES "can't open file ".$LTRdir.$outfile.": $! \n";
	  my $FASTA = *FASTA;
	  my $LTR1terminus = ($orientation eq "+")? "5'_" : "3'_";
	  my $LTR2terminus = ($orientation eq "+")? "3'_" : "5'_";
	  my @elementData = (\@LTR1startCoord,\@LTR1endCoord,\@LTR1startRef,\@LTR1endRef,\@LTR1leftAfterRefEnd,$query,$orientation);
	  $sequence1 = writeGappedSeq($LTR1terminus.$pairID."_ltr", $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	  @elementData = (\@LTR2startCoord,\@LTR2endCoord,\@LTR2startRef,\@LTR2endRef,\@LTR2leftAfterRefEnd,$query,$orientation);
	  $sequence2 = writeGappedSeq($LTR2terminus.$pairID."_ltr", $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	  close(FASTA) || print $MESSAGES "can't close file: $! \n";

	  # align LTRs (using clustalW) if alignments are required

	  if ($parameter_->{"doAlign"} &&
	       # Before trying to align the pair of LTRs, check that this element's LTRs match overlapping regions of the ref seq
	      (
	       ( $orientation eq "+" && $LTR1endRef[-1] > $LTR2startRef[0] && $LTR2endRef[-1] > $LTR1startRef[0] ) ||
		    # the element is on the + strand of the chromosome and its LTR1 and LTR2 matches to the ref seq overlap
	       ( $orientation eq "C" && $LTR2endRef[0] > $LTR1startRef[-1] && $LTR1endRef[0] > $LTR2startRef[-1] )
		    # the element is on the C strand of the chromosome and its LTR1 and LTR2 matches to the ref seq overlap
	      )
	     )
	    {
	      my $alignDir = $FASTAFilePath.$parameter_->{"LTRdir"}."/".$parameter_->{"alignDir"}."/";
	      my $clustalPath = $parameter_->{"pathToClustalW"};
	      runClustalW($clustalPath, $alignDir, $LTRdir.$outfile);

	      # compute intra-element LTR divergence (K , Kimura 2-parameter model) and related quantities
	      ($K,$K_sd,$K_TajimaNei,$K_TNsd,$timeK,$timeKsd,$numComparedSites,$transitions,$transversions,$timeTN) =
		align2K($pairID, $alignDir.$ID.".aln", $parameter_, $MESSAGES);
	
	      # rename LTR files (adding intra-element LTR divergence to file names)
	      $pairID .= sprintf ("_K%.3f",$K);
	      system("mv ".$alignDir.$ID.".aln ".$alignDir.$pairID.".aln");
	    }
	  else { $K="NA"; }

	  # write sequences to file again, this time with a more informative filename
	  open(FASTA, ">".$LTRdir.$pairID.".fasta") || print $MESSAGES "can't open file ".($LTRdir.$pairID.".fasta").": $! \n";
	  @elementData = (\@LTR1startCoord,\@LTR1endCoord,\@LTR1startRef,\@LTR1endRef,\@LTR1leftAfterRefEnd,$query,$orientation);
	  writeGappedSeq($LTR1terminus.$pairID."_ltr", $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	  @elementData = (\@LTR2startCoord,\@LTR2endCoord,\@LTR2startRef,\@LTR2endRef,\@LTR2leftAfterRefEnd,$query,$orientation);
	  writeGappedSeq($LTR2terminus.$pairID."_ltr", $FASTA, $chromoSeq_, $MESSAGES, @elementData);
	  close(FASTA) || print $MESSAGES "can't close file: $! \n";
	  # remove sequence file with short filename
	  unlink($LTRdir.$ID.".fasta");

	  if($parameter_->{ungapped}) {
	    # SAVE UNGAPPED CHROMOSOMAL SEQUENCES AS WELL
	    $ungappedSeq1 = substr( $chromoSeq_->{$query}, $LTR1startCoord[0] - 1, $LTR1endCoord[-1] - $LTR1startCoord[0] + 1 );
	    $ungappedSeq2 = substr( $chromoSeq_->{$query}, $LTR2startCoord[0] - 1, $LTR2endCoord[-1] - $LTR2startCoord[0] + 1 );
	    saveLTRs($FASTAFilePath.$parameter_->{"LTRdir"}."/".$pairID.".ungapped.fasta", $pairID."_ltr_ungapped", $orientation, 
		     $ungappedSeq1, $ungappedSeq2, \@names1, \@names2, $MESSAGES
		    );
	  }
	

	  # save INTERNAL REGION
	  my ($intSeq, $ungappedIntSeq) = saveInternalRegion($record_, $FASTAFilePath, $pairID, $chromoSeq_, $MESSAGES, $parameter_, @internalRecs);

	  # save FULL SEQUENCE (both the chromosomal seq with possible indels and the gapped sequence homologous to consensus)
	  my $intStart = $record_->[$internalRecs[0]]->{"chromoStart"};
	  my $intEnd   = $record_->[$internalRecs[-1]]->{"chromoEnd"};
	  $ungappedSeq1 = substr($chromoSeq_->{$query}, $LTR1startCoord[0] - 1, $intStart - $LTR1startCoord[0] );
	  $ungappedSeq2 = substr($chromoSeq_->{$query}, $intEnd, $LTR2endCoord[-1] - $intEnd );
	
	  my $fullSeq = $sequence1.$intSeq.$sequence2;
	  my $fullUngappedSeq = $ungappedSeq1.$ungappedIntSeq.$ungappedSeq2;
	  open($FASTA, ">".$FASTAFilePath."full/".$pairID.".FULL.fasta") || print $MESSAGES "can't open file ".($FASTAFilePath."full/".$pairID.".FULL.fasta")." : $! \n";
	  print $FASTA ">".$pairID."_full \n";
	  printStringIntoLines($FASTA,60,$fullSeq);
	  close($FASTA) || print $MESSAGES "can't close file: $!";

	  if($parameter_->{ungapped}) {
	    open($FASTA, ">".$FASTAFilePath."full/".$pairID.".FULL.ungapped.fasta") || print $MESSAGES "can't open file ".($FASTAFilePath."full/".$pairID.".FULL.ungapped.fasta").": $! \n";
	    print $FASTA ">".$pairID."_full_ungapped\n";
	    printStringIntoLines($FASTA,60,$fullUngappedSeq);
	    close($FASTA) || print $MESSAGES "can't close file: $! \n";
	  }

	}

	# save additional annotation for this pair

#	my $score1 = $record_->[$firstLTR1rec]->{"score"};
#	my $score2 = $record_->[$lastLTR2rec]->{"score"};
	my $div1;
	for (my $i=0; $i<scalar(@LTR1div); $i++) { $div1 .= defined($div1)? "-".$LTR1div[$i] : $LTR1div[$i]; }
	my $div2;
	for (my $i=0; $i<scalar(@LTR2div); $i++) { $div2 .= defined($div2)? "-".$LTR2div[$i] : $LTR2div[$i]; }
	my $divI;
	for (my $i=0; $i<scalar(@divI); $i++) { $divI .= defined($divI)? "-".$divI[$i] : $divI[$i]; }
	my $ref1 = 0;  # this will store the fraction of the reference LTR sequence matched by LTR1
	my $ref2 = 0;  # this will store the fraction of the reference LTR sequence matched by LTR2

        if ( defined($refLTRlength) && $refLTRlength > 0 )
         {
	   for (my $i=0; $i<scalar(@LTR1startRef); $i++) { $ref1 +=  ($LTR1endRef[$i]-$LTR1startRef[$i]+1)/$refLTRlength; }
	   for (my $i=0; $i<scalar(@LTR2startRef); $i++) { $ref2 +=  ($LTR2endRef[$i]-$LTR2startRef[$i]+1)/$refLTRlength; }
	 }

	my $refIR = 0;  # this will store the fraction of the reference IR sequence matched by this internal region
        if ( defined($refIRlength) && $refIRlength > 0 )
         {
	   for (my $i=0; $i<scalar(@internalEndRef); $i++) { $refIR +=  ($internalEndRef[$i]-$internalStartRef[$i]+1)/$refIRlength; }
	 }

        # ratio of element/reference length
	my $lengthRatio = ( defined($refLTRlength) && defined($refIRlength) && $refLTRlength > 0 && $refIRlength > 0 )?
                          ( $LTR2endCoord[-1] - $LTR1startCoord[0] + 1 )/( 2*$refLTRlength + $refIRlength ) : "NA";
	my $superfamily = $record_->[$firstLTR1rec]->{"superfamily"};
	my $distCen;
#	my ($ID) = $pairID =~ /^([^_]+)_/;   # save as Id only the beginning of the element identifier used on file names
        if ($parameter_->{"arabidopsis"}) 
        {
	  $superfamily = $record_->[$firstLTR1rec]->{"superfamily"};
	  $distCen = ($record_->[$firstLTR1rec]->{"distCen"} + $record_->[$lastLTR2rec]->{"distCen"})/2;
        }

	# output ANNOTATION

        # when annotating the start and end coords of this element, extend to the fragments involved in rearrangements
        my @allLTRrecs = sort {$a<=>$b} @{$LTRrecs_};
        my @allIRrecs = sort {$a<=>$b} @{$IRrecs_};
        my $start = ($allLTRrecs[0] < $allIRrecs[0])? $record_->[$allLTRrecs[0]]->{chromoStart} : $record_->[$allIRrecs[0]]->{chromoStart};
        my $end = ($allLTRrecs[-1] > $allIRrecs[-1])? $record_->[$allLTRrecs[-1]]->{chromoEnd} : $record_->[$allIRrecs[-1]]->{chromoEnd};

       # make internal records equal to the line numbers in the RepeatMasker annotation file
       for (my $rec=0; $rec < scalar(@internalRecs); $rec++ )
	  {
	    $internalRecs[$rec] += $parameter_->{"RMheaderLines"} + 1;
	  }

        my %record =  ( 'id'=>$ID,
			'query'=>$query,
			'family'=>$familyID,
			'div1'=>$div1,
			'div2'=>$div2,
			'divI'=>$divI,
			'start'=>$start,
			'end1'=>$LTR1endCoord[-1],
			'start2'=>$LTR2startCoord[0],
			'end'=>$end,
			'hits1'=> hyphenate(@RMrec1),
			'hits2'=> hyphenate(@RMrec2),
			'hitsI'=>hyphenate(@internalRecs),
			'frags1'=>$numLTRfrags1,
			'frags2'=>$numLTRfrags2,
			'fragsI'=>$numInternalFrags,
			'ref1'=>$ref1,
			'ref2'=>$ref2,
			'refI'=>$refIR,
			'lenR'=>$lengthRatio,
			'orientation'=>$orientation,
			'superfamily'=>$superfamily,
			'nest'=>$nestingLevel,
			'nestIDs'=>$nestIDs,
			'DNArearrangement'=>$DNArearrangement
		      );
	if ($parameter{"doAlign"})
	  {
	    $record{K} = $K;
	    $record{Ksd} = $K_sd;
	    $record{time} = $timeK;
	    $record{timesd} = $timeKsd;
	    $record{numSites} = $numComparedSites;
	    $record{T} = $transitions;
	    $record{V} = $transversions;
            if ($parameter{arabidopsis})
              {
		$record{distCen} = $distCen;
              }
	  }
	else
	  {
	    if ($parameter{arabidopsis})
              {
		$record{distCen} = $distCen;
              }
	  }

        push @{$completeLTRannotation_}, \%record;
}


###################
# Writes sequences of a pair of LTRs into a fasta format file.

sub saveLTRs {

  my($outfile, $pairID, $orientation, $sequence1, $sequence2, $names1_, $names2_, $MESSAGES) = @_;

	open(FASTA, ">".$outfile) || print $MESSAGES "can't open file $outfile : $! \n";

	my $terminal;
	# fastA id for LTR1:
	$terminal = ($orientation eq "+") ? "5'_" : "3'_";
	my $familyID = $names1_->[0];
	for (my $i=1; $i < scalar(@{$names1_}); $i++) {
		$familyID .= "-".$names1_->[$i];
	}
	print FASTA ">".$terminal.$pairID."\n";
	# LTR1 sequence:
	my $FASTA = *FASTA;
	printStringIntoLines($FASTA,60,$sequence1);

  if ($sequence2)
    {
      # fastA id for LTR2:
      $terminal = ($orientation eq "+") ? "3'_" : "5'_";
      $familyID = $names2_->[0];
      for (my $i=1; $i < scalar(@{$names2_}); $i++) 
	{
	  $familyID .= "_".$names2_->[$i];
	}
      print FASTA "\n>".$terminal.$pairID."\n";
      # LTR2 sequence:
      printStringIntoLines($FASTA,60,$sequence2);
    }

  close(FASTA) || print $MESSAGES "can't close file: $! \n";

}


###################
# Calculates distance between aligned pairs of LTRs (stored in clustalW format). K (the number of nucleotide substitutions per site)
# is calculated according to both the Kimura and Tajima & Nei methods.

sub align2K {

my ($pairID, $alignFile, $parameter_, $MESSAGES) = @_;

# calculate distances for all pairs of LTRs

	# store all the lines in alignment file
	my @lines = ();

	open(IN, $alignFile) || die "\ncan't open alignment file $alignFile: $! \n\n********* EXITING NOW *********\n";
	push (@lines, $_) while ( defined($_ = <IN>) );
	close(IN) || print $MESSAGES "can't close file: $! \n";

        my $clustalwOFFSET = 0;	  # will store the number of lines in the header of clustalw .aln files (= no.lines before alignment proper)
        $clustalwOFFSET++ while ( $clustalwOFFSET <= scalar(@lines) && $lines[$clustalwOFFSET] !~ /(5'|3')/ );

	# store lines corresponding to the LTR1 and LTR2 sequences
	my @LTR1lines = grep /5'/ , @lines;
	my @LTR2lines = grep /3'/ , @lines;

	# remove 'newline' characters and sequence identifier to leave only the nucleotide sequences on each "line"
	grep { chomp; s/^\S+\s+([\w-]+)$/$1/ } @LTR1lines;
	grep { chomp; s/^\S+\s+([\w-]+)$/$1/ } @LTR2lines;

	# store the lines corresponding to site identity/difference (asterisks/spaces)
	my @consensusLines;
	for (my $lineNum=1; $lineNum <= scalar(@LTR1lines); $lineNum++) {
		push(@consensusLines, $lines[($clustalwOFFSET-2) + 4 * $lineNum]);  # offset @lines to avoid the clustalW header, then get every fourth line
		# remove 'newline' and keep only the characters that refer to the LTR sequences
		chomp($consensusLines[$lineNum-1]);
		my $numSitesOnThisLine = length($LTR1lines[$lineNum-1]);
		$consensusLines[$lineNum-1] =~ s/.*(.{$numSitesOnThisLine})$/$1/;
	}

	# Concatenate LTR and consensus lines into single strings
	my $LTR1 = join '', @LTR1lines;
	my $LTR2 = join '', @LTR2lines;
	my $consensus = join '', @consensusLines;

	# get distances (Kimura and Tajima&Nei, plus standard deviations) and other statistics (
	# proportion of transitions and transversions, number of sites compared between the 2 seqs,
	# number of indels, transition/transversion ratio).
	return getDistances($LTR1, $LTR2, $consensus, $parameter_);

}


###################
# Calculates the number of transitions, transversions, and indels (end gaps not included) between two sequences.
# It then calls functions that compute estimates of the number of nucleotide substitutions (K) between the
# two sequences, using the Kimura 2-parameter method and Tajima & Nei's method (both assume that all sites
# evolve at the same rate). Returns a record (hash) containing all these values (plus the number of sites
# compared).

sub getDistances {
	# First two args are the sequences (previously made the same length by a clustalW alignment (extra hifens added if necessary),
	# third the consensus line (clustalW format). Fourth is the rate of synonymous nucleotide substitutions per site per million years.
	# Last argument is the output file handle (key in parameter hash).
	my($seq1, $seq2, $consensus, $parameter_) = @_;
	my $rateOfEvolution = $parameter_->{"rateOfEvolution"};
	my $arabidopsisRateOfEvolution = 1.5 * 10**(-2);  # number of synonymous nucleotide substitutions per site per million years
# my $humanLowRateOfEvolution = 1.1 * 10**(-3);   	#low HUMAN number of synonymous nucleotide substitutions per site per million years
# my $humanHighRateOfEvolution = 2.1 * 10**(-3);   	#high HUMAN number of synonymous nucleotide substitutions per site per million years
	$rateOfEvolution = $arabidopsisRateOfEvolution if ($parameter_->{"arabidopsis"});

# translate sequences into numbers: A=0, C=1, G=2, T=3 (keep gaps (hyfens) as they are)
#	$seq1 =~ tr/ACGTacgt/01230123/;
#	$seq2 =~ tr/ACGTacgt/01230123/;

	# split sequences and consensus into arrays of single characters
	my @seq1 = split(//, $seq1);
	my @seq2 = split(//, $seq2);
	my @consensus = split(//, $consensus);

	my $numComparedSites = 0;
	my $transitions = 0;
	my $transversions = 0;
	my $indels = 0;
	my @ACGT_content = (0,0,0,0);
	my %ACGT_pairs = ("AC", 0, "AG", 0, "AT", 0, "CG", 0, "CT", 0, "GT", 0);

	# find site corresponding to the beginning of the alignment in case there is an initial gap
	my $initialSite = 0;
	$initialSite++ while ( $seq1[$initialSite] eq "-" || $seq2[$initialSite] eq "-" );
	# find site corresponding to the end of the alignment in case there is a final gap
	my $finalSite = length($consensus) - 1;
	$finalSite-- while ( $seq1[$finalSite] eq "-" || $seq2[$finalSite] eq "-" );

	$numComparedSites = $finalSite - $initialSite + 1;

	# get differences between sequences
	my $basesAtThisSite;
	my $lastIndelSite = $initialSite - 1;
	for (my $site = $initialSite; $site <= $finalSite; $site++) {
		$basesAtThisSite = $seq1[$site].$seq2[$site];

		if ( $consensus[$site] eq " " ) {  # this site is different between the two sequences

			if ( $basesAtThisSite =~ /-/ ) {  			# indel!
				$indels++ if ( $lastIndelSite < $site - 2 );  # do not count 2 indels if they're separated by only one base match
				$lastIndelSite = $site;
				$numComparedSites--;  # do not include gap in site comparisons
			}
			elsif ( $basesAtThisSite =~ /AC|CA/i ) {  	# A and C at this site
				$ACGT_content[0]++; $ACGT_content[1]++;
				$ACGT_pairs{"AC"}++;
				$transversions++;
			}
			elsif ( $basesAtThisSite =~ /AG|GA/i ) {  	# A and G at this site
				$ACGT_content[0]++; $ACGT_content[2]++;
				$ACGT_pairs{"AG"}++;
				$transitions++;
			}
			elsif ( $basesAtThisSite =~ /AT|TA/i ) {  	# A and C at this site
				$ACGT_content[0]++; $ACGT_content[3]++;
				$ACGT_pairs{"AT"}++;
				$transversions++;
			}
			elsif ( $basesAtThisSite =~ /CG|GC/i ) {  	# C and G at this site
				$ACGT_content[1]++; $ACGT_content[2]++;
				$ACGT_pairs{"CG"}++;
				$transversions++;
			}
			elsif ( $basesAtThisSite =~ /CT|TC/i ) {  	# C and T at this site
				$ACGT_content[1]++; $ACGT_content[3]++;
				$ACGT_pairs{"CT"}++;
				$transitions++;
			}
			elsif ( $basesAtThisSite =~ /GT|TG/i ) {  	# G and T at this site
				$ACGT_content[2]++; $ACGT_content[3]++;
				$ACGT_pairs{"GT"}++;
				$transversions++;
			}
		}

		else {  # this site is identical between the two sequences
			$ACGT_content[0]+=2 if ( $basesAtThisSite =~ /A/i );  	# A and A at this site
			$ACGT_content[1]+=2 if ( $basesAtThisSite =~ /C/i );  	# C and C at this site
			$ACGT_content[2]+=2 if ( $basesAtThisSite =~ /G/i );  	# G and G at this site
			$ACGT_content[3]+=2 if ( $basesAtThisSite =~ /T/i );  	# T and T at this site
		}
	}

	# convert numbers of nucleotides into frquencies
	foreach my $nucleotideNum (@ACGT_content) {
		$nucleotideNum /= 2*$numComparedSites;
	}
	# convert numbers of mis-matched nucleotides into frequencies
	my @ACGT_pairs;
	foreach my $key (sort keys %ACGT_pairs) {
		push(@ACGT_pairs, $ACGT_pairs{$key}/$numComparedSites);
	}
	# get K (and its variance) using Kimura's 2-parameter method
	my($K_Kimura, $V_Kimura) = getK_Kimura($numComparedSites, $transitions, $transversions);

	# get K (and its variance) using Tajima and Nei's method
	my($K_TajimaNei, $V_TajimaNei) = getK_TajimaNei($numComparedSites, $transitions+$transversions,
										@ACGT_pairs, @ACGT_content);

	my $K_Ksd = sqrt($V_Kimura);  # standard deviation
	my $K_TNsd = sqrt($V_TajimaNei);

	# time of divergence betwen two seqs in millions years
	my $timeK = "NA";
	my $timeTN = "NA";
	my $timeKsd = "NA";
	if ($rateOfEvolution) {
	  $timeTN = $K_TajimaNei/(2*$rateOfEvolution);
	  $timeK = $K_Kimura/(2*$rateOfEvolution);
	  # compute variance of time estimate including variance for the Poisson process of nucleotide substitutions
	  $timeKsd = sqrt( ($V_Kimura*$numComparedSites**2 + $K_Kimura*$numComparedSites)/(2*$rateOfEvolution*$numComparedSites)**2 );
	}

	# return field values
	return $K_Kimura, $K_Ksd, $K_TajimaNei, $K_TNsd, $timeK, $timeKsd, $numComparedSites,
	       $transitions/$numComparedSites, $transversions/$numComparedSites, $timeTN;

}


###################
# Returns the number of nucleotide substitutions (K) between two sequences (and its variance), using the
# Kimura 2-parameter method (which assumes that all sites evolve at the same rate, and that nucleotide
# frequencies are the same).

sub getK_Kimura {
	# Last two args are the number of transitions and transversions between two sequences. First argument
	# is the number of sites compared between the two sequences.
	my($length, $transitions, $transversions) = @_;
	$transitions /= $length;		# the proportion of transitions
	$transversions /= $length;	# the proportion of transversions

	my $a = 1/(1 - 2*$transitions -$transversions);
	my $b = 1/(1 - 2*$transversions);
	my $c = ($a + $b)/2;

	my $K = log($a)/2 + log($b)/4;  	# K distance

	# approximate sampling variance
	my $V = ($transitions*$a**2 + $transversions*$c**2 - ($a*$transitions + $c*$transversions)**2)/$length;

	return ($K, $V);
}


###################
# Returns the number of nucleotide substitutions (K) between two sequences (and its variance), using
# Tajima and Nei's method (which assumes that all sites evolve at the same rate, but doesn't assume
# equal frequencies of the 4 nucleotides).

sub getK_TajimaNei {
	# First argument is the number of sites compared between the two sequences.
	# Second is the number if different nucleotides when the two seqs are aligned.
	# Next six args are the frequencies of non-matching pairs of nucleotides in the order:
	# "AC", "AG", "AT", "CG", "CT", and "GT".
	# Last arg is a list containing the frequencies of each nucleotide in the aligned seqs.
	my($length, $diffs, $fAC, $fAG, $fAT, $fCG, $fCT, $fGT, @ACGT_f) = @_;
	$diffs /= $length; 	# the proportion of different sites

	my @ACGT_pairs = ([0,$fAC,$fAG,$fAT],[0,0,$fCG,$fCT],[0,0,0,$fGT]);

	my $b1 = 1 - $ACGT_f[0]**2 - $ACGT_f[1]**2 - $ACGT_f[2]**2 - $ACGT_f[3]**2;
	my $h = 0;
	for (my $i = 0; $i < 3; $i++) {
		for (my $j= $i + 1; $j <= 3; $j++) {
			$h += ($ACGT_f[$i] && $ACGT_f[$j])? $ACGT_pairs[$i][$j]**2/(2*$ACGT_f[$i]*$ACGT_f[$j]) : 0;
		}
	}
	my $b = ($h) ? ( $b1 + $diffs**2/$h )/2 : 1;  # K will be zero if no diffs between seqs

	# K distance
	my $K = (-1) * $b * log(1 - $diffs/$b);

	# approximate sampling variance
	my $V = $b**2 * $diffs * (1-$diffs)/( ($b-$diffs)**2 * $length );

	return ($K, $V);
}


##################
#
sub saveSolo {
	# First two args are the record indices of the first and last records associated with the solo LTR.
	# The 3rd arg is a reference to the records array, the 4th arg the element id. The 5th
	# is a reference to the hash containing the fasta IDs/sequences.
	# The 6th arg is the path to the output files to contain the sequences of the LTR pairs.
	# The 7th arg is the num of fragments associated with the LTR.
        # The last arg is a reference to the parameter hash.
	my($firstLTRrec, $lastLTRrec, $record_, $ID, $chromoSeq_, $FASTAFilePath,
	   $numLTRfrags, $parameter_, $soloLTRannotation_, $MESSAGES
	  ) = @_;

	my $nestingLevel = 0;
	my $nestIDs = "NA";

	my @names = ( LTRname($record_->[$lastLTRrec]->{refName}, $record_->[$lastLTRrec]->{superfamily}) );
	# remove 'LTR' from names (RepBase Update naming scheme)
#	$names[0] =~ s/(.+)_LTR.*$/$1/i;

	my $family = $names[0];
	my $query = $record_->[$firstLTRrec]->{"query"};
	my $orientation = $record_->[$firstLTRrec]->{"orientation"};
	$FASTAFilePath .= "/solo/";
#	$chromoID =~ s/chromo(\d).*/$1/;   # keep only the chromosome number

	my $sequence = "";
	# store the chromosome coordinates of and other info on the fragment sequences to be written to the out file
	my $label = $record_->[$firstLTRrec]->{"id"};
	my @LTRstartCoord;
	my @LTRendCoord;
	my @LTRstartRef;
	my @LTRendRef;
	my @LTRleftAfterRefEnd;
	my @LTRdiv;
	my @RMrec;
	my $recRange = $lastLTRrec - $firstLTRrec;
	for (my $frag=0; $frag <= $recRange; $frag++) {
	  if ( $record_->[$firstLTRrec + $frag]->{"id"} eq $label )
	    {
		push @LTRstartCoord, $record_->[$firstLTRrec + $frag]->{"chromoStart"};
		push @LTRendCoord, $record_->[$firstLTRrec + $frag]->{"chromoEnd"};
		push @LTRstartRef, $record_->[$firstLTRrec + $frag]->{"refStart"};
		push @LTRendRef, $record_->[$firstLTRrec + $frag]->{"refEnd"};
		push @LTRleftAfterRefEnd, $record_->[$firstLTRrec + $frag]->{"leftAfterRefEnd"};
		push @LTRdiv, $record_->[$firstLTRrec + $frag]->{"divergenceFromRef"};
		push @RMrec, $firstLTRrec + $frag + $parameter_->{RMheaderLines} + 1;
	      }
	}

	# get all reference names associated with these LTR sequences (if different from each other)
#	my $recRefName;
#	for ( my $rec = $firstLTRrec; $rec <= $lastLTRrec; $rec++) {
#		$recRefName = $record_->[$rec]->{"refName"};
#		$recRefName =~ s/(.+)_LTR.*$/$1/i;
#		push(@names, $recRefName) if ( !grep(/$recRefName/, @names) );
#	}

	my $familyID = $names[0];

	unless ($parameter_->{"noSeqOutput"} )
	  { # output sequence
	        my $FASTA = *FASTA;
		# output GAPPED LTR SEQUENCEs IN FASTA FORMAT
		
		my $FASTAid = $ID."_".$query."_".$orientation."_".$family."_solo";
		$FASTAid =~ s/\|//g;
		$FASTAid =~ tr/\//\./;
		my $outFile = $FASTAid.".fasta";
		
	        open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
		my @elementData = (\@LTRstartCoord,\@LTRendCoord,\@LTRstartRef,\@LTRendRef,\@LTRleftAfterRefEnd,$query,$orientation);
		writeGappedSeq($FASTAid, $FASTA, $chromoSeq_, $MESSAGES, @elementData);
		close(FASTA) || print $MESSAGES "can't close file: $! \n";

		if($parameter_->{ungapped}) {
		  # SAVE UNGAPPED CHROMOSOMAL SEQUENCE AS WELL
		  my $ungappedSeq = substr( $chromoSeq_->{$query}, $LTRstartCoord[0] - 1, $LTRendCoord[-1] - $LTRstartCoord[0] + 1 );
		  $outFile = $FASTAid.".ungapped.fasta";
		  open(FASTA, ">".$FASTAFilePath.$outFile) || print $MESSAGES "can't open file ".$FASTAFilePath.$outFile.": $! \n";
		  print FASTA ">".$ID."_".$query."_".$orientation."_solo_ungapped \n";
		  printStringIntoLines($FASTA,60,$ungappedSeq);
		  close(FASTA) || print $MESSAGES "can't close file: $! \n";
		}

	      }
	
	# save additional annotation on this solo LTR
	my $score = $record_->[$lastLTRrec]->{"score"};
	my $div;
	for (my $i=0; $i<scalar(@LTRdiv); $i++) { $div .= defined($div)? "-".$LTRdiv[$i] : $LTRdiv[$i]; }
	my $refLTRlength = $record_->[$firstLTRrec]->{"refEnd"} + $record_->[$firstLTRrec]->{"leftAfterRefEnd"};
	my $ref = 0;  # this will store the fraction of the reference LTR sequence matched by this LTR
	my $lengthRatio;
	if ( defined($refLTRlength) && $refLTRlength > 0 )
	  {
	    for (my $i=0; $i<scalar(@LTRstartRef); $i++) { $ref +=  ($LTRendRef[$i]-$LTRstartRef[$i]+1)/$refLTRlength; }
	    $lengthRatio = ( $LTRendCoord[-1] - $LTRstartCoord[0] + 1 )/$refLTRlength;  # ratio of element/reference length
	  }
	else
	  {
	    $ref = "NA";
	    $lengthRatio = "NA";
	  }
	my $superfamily = $record_->[$lastLTRrec]->{"superfamily"};
	my $distCen;
	if ($parameter_->{"arabidopsis"}) {
	  $superfamily = $record_->[$lastLTRrec]->{"superfamily"};
	  $distCen = $record_->[$lastLTRrec]->{"distCen"};
	}

	# Info on the solo LTR is output to file 1.solos on a single line. The sequence of values on the line corresponds to:
	# solo id,chromosome#,family name(s),div from ref,start,end, rec#, numLTRfrags, score, orientation.

	my %record = ( 'id'=>$ID,
		       'query'=>$query,
		       'family'=>$familyID,
		       'div1'=>$div,
		       'start'=>$LTRstartCoord[0],
		       'end'=>$LTRendCoord[-1],
		       'hits1'=> hyphenate(@RMrec),
		       'frags1'=>$numLTRfrags,
		       'ref1'=>$ref,
		       'lenR'=>$lengthRatio,
		       'orientation'=>$orientation,
		       'superfamily'=>$superfamily,
		       'nest'=>$nestingLevel,
		       'nestIDs'=>$nestIDs,
		     );
	if ($parameter{"arabidopsis"})
	  {
	    $record{"distCen"} = $distCen;
	  }
	push @{$soloLTRannotation_}, \%record;

	return 1;
}


###################
# Checks whether an LTR or IR is uninterrupted. Returns true or false

sub uninterruptedLTRorIR {
  # first arg is an LTRorIR index, second is a ref to the index array, third a ref to the records array, last is the boundary tolerance.
  my ($index, $index_, $record_, $tolerance) = @_;

  my @LTRorIR = @{$index_->[$index]->[2]};
  my $firstRec = $index_->[ $LTRorIR[0] ][0];
  my $lastRec = $index_->[ $LTRorIR[-1] ][0];

  # check whether LTR or IRs are uninterrupted by insertions (excepting Low_complexity and Simple_repeat regions)
  # compute number and total length of low complexity and simple repeat hits
  my $countLow = 0;
  my $lengthLow = 0;
  for (my $rec = $firstRec; $rec <= $lastRec; $rec++)
    {
      if ( $record_->[$rec]->{"superfamily"} =~ /(Low_)|(Simple_)/i )
	{
	  $countLow++;
	  $lengthLow += abs($record_->[$rec]->{"chromoEnd"} - $record_->[$rec]->{"chromoStart"} + 1);
	}
    }
  my $refLength = $record_->[$lastRec]->{"refEnd"} + $record_->[$lastRec]->{"leftAfterRefEnd"};
  # make sure maximum boundary tolerance is 10% of reference library sequence length
  $tolerance = $refLength/10 if ( $tolerance > $refLength/10 );

  return 1 unless ( ($lastRec - $firstRec + 1) > scalar(@LTRorIR) + $countLow ||
		    getChromoLength($index,$index_,$record_) - $lengthLow > $refLength + $tolerance*scalar(@LTRorIR)
		  );  # uninterrupted!
  return 0;  # probably interrupted by insertions
}


###################
# Checks whether a given LTR is part of a truncated element. Returns a list:
# (boolean for TRUE or FALSE, an index of the IR, an index of the LTR).
sub truncatedElement
  {
    # First arg is the index of an LTR hit in the @{$unpairedLTR_} array, second the number of elements in that array.
    # Next three are refs to the LTR-, IR-, and all- records arrays,
    # penultimate arg a ref to the unpaired LTR record array, last a ref to the parameter hash
    my ($tLTRindex, $numtLTRs, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_, $unpairedLTR_, $parameter_) = @_;
    return 0 if ($tLTRindex > $numtLTRs - 1);  # index out of range!!
    my $LTRrec = $unpairedLTR_->[$tLTRindex][0];

    return 0 if ( $LTRrec eq -1 || $record_->[$LTRrec]->{"id"} =~ /^[uit]/i );  # record out of range or this LTR has already been processed as part of an element

    my @LTRindex = @{$LTRrecord_->[$record_->[$LTRrec]->{index}]->[2]}; # this is the index in @{LTRrecord_}
    my $allLTRindices = hyphenate(@LTRindex);
    my $allIRindices = "";
    my $firstLTRrec = $LTRrecord_->[ $LTRindex[0] ][0];
    my $lastLTRrec = $LTRrecord_->[ $LTRindex[-1] ][0];

    my $maxDist = $parameter_->{minDistanceToSoloLTR};
    my $LTRstart = $record_->[$firstLTRrec]->{chromoStart};
    my $LTRend = $record_->[$lastLTRrec]->{chromoEnd};

    # look for IR preceding current LTR
    my $IRrec = $firstLTRrec - 1;
    my $prevIRrec = -1;
    PREVir: while ( $IRrec >= 0 && $LTRstart - $record_->[$IRrec]->{chromoEnd} <= $maxDist )
      {
	if ( !$record_->[$IRrec]->{id} &&
	     INTname($record_->[$IRrec]->{refName}, $record_->[$IRrec]->{superfamily}) &&
	     nameAndOrientationMatch($IRrec,$firstLTRrec,$record_,$parameter_)
	   )
	  {
	    $prevIRrec = $IRrec;
	    last PREVir;
	  }
	$IRrec--;
      }

    # look for IR following current LTR
    my $numRecords = scalar(@{$record_});
    $IRrec = $lastLTRrec + 1;
    my $nextIRrec = -1;
    NEXTir: while ( $IRrec < $numRecords && $record_->[$IRrec]->{chromoStart} - $LTRend <= $maxDist )
      {
	if ( !$record_->[$IRrec]->{id} &&
	     INTname($record_->[$IRrec]->{refName}, $record_->[$IRrec]->{superfamily}) &&
	     nameAndOrientationMatch($IRrec,$lastLTRrec,$record_,$parameter_)
	   )
	  {
	    $nextIRrec = $IRrec;
	    last NEXTir;
	  }
	$IRrec++;
      }

    my $prevIRindex = $record_->[$prevIRrec]->{"index"};
    my $nextIRindex = $record_->[$nextIRrec]->{"index"};

    # check that that IRs exist
    my $prevIR = ($prevIRrec eq -1 ) ? 0 : 1;
    my $nextIR = ($nextIRrec eq -1 ) ? 0 : 1;

    # CHECK WHETHER THERE ARE REPEAT ELEMENTS WITH HITS INTERLEAVED WITH THE LTR AND PREV/NEXT IR
    $prevIR = 0 if ( $prevIR && interleavedHits($prevIRrec, $firstLTRrec, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_) );
    $nextIR = 0 if ( $nextIR && interleavedHits($lastLTRrec, $nextIRrec, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_) );

    my @prevIRindex = ($prevIR)? @{$Irecord_->[$prevIRindex]->[2]} : (-1);
    my @nextIRindex = ($nextIR)? @{$Irecord_->[$nextIRindex]->[2]} : (-1);

    my $LTRindex = $LTRindex[0];
    my @IRindex = ();

    my $prevIRlastRec = ($prevIR)? $Irecord_->[$prevIRindex][0] : -1;
    my $nextIRfirstRec = ($nextIR)? $Irecord_->[$nextIRindex][0] : -1;
    my $prevDist = ($prevIR)? dist($prevIRlastRec, $firstLTRrec, $record_) : 2*$maxDist;
    my $nextDist = ($nextIR)? dist($lastLTRrec, $nextIRfirstRec, $record_) : 2*$maxDist;


    # check that LTR is not nested in IRs, and close enough to be considered part of the same element as LTR
    $prevIR = ( $prevDist > $maxDist )? 0 : $prevIR if ($prevIR);
    $nextIR = ( $nextDist > $maxDist )? 0 : $nextIR if ($nextIR);
#    $prevIR = ( $prevDist > $maxDist || $prevIRindex ne $prevIRindex[-1] )? 0 : $prevIR if ($prevIR);
#    $nextIR = ( $nextDist > $maxDist || $nextIRindex ne $nextIRindex[0] )? 0 : $nextIR if ($nextIR);

    my $anotherLTRrec = -1;  # this will be set to next unpaired LTR record number that will 'compete' with the current one
                             # for being classified with the 'next' IR as part of a truncated element
    my ($anotherElement, $anotherLTRindices, $anotherLTR, $anotherIRindices, @anotherIR);
    $anotherElement = 0;     # this will be set to TRUE if there is a possibility that another LTR is part of truncated element
                             # with candidate IR considered for current LTR


    # get next unpaired LTR of same family and orientation (that hasn't already been classified as a truncated element)

    my $uLTRindex = $tLTRindex;  # note that this index refers to @unpairedLTR and *not* to @LTRrecord.
    $uLTRindex++ while ( $uLTRindex < $numtLTRs && $unpairedLTR_->[$uLTRindex][0] <= $lastLTRrec );
    # uLTRindex should now be the @unpairedLTR index immediately after the index for current LTR
    if ( $uLTRindex < $numtLTRs )
      {
	# new index is still valid
	$uLTRindex++ while ( $uLTRindex < $numtLTRs - 1 &&
			     !( nameAndOrientationMatch($firstLTRrec, $unpairedLTR_->[$uLTRindex][0], $record_,$parameter_) &&
				!$record_->[ $unpairedLTR_->[$uLTRindex][0] ]->{id}
			      )
			   );
	$anotherLTRrec = $unpairedLTR_->[$uLTRindex][0] unless ($record_->[ $unpairedLTR_->[$uLTRindex][0] ]->{id});
      }


    # check if any IRs are adjacent to the LTR
    if ( $prevIR &&
	 consecutive($prevIRrec, $firstLTRrec, $record_, $parameter_) && !consecutive($lastLTRrec, $nextIRrec, $record_, $parameter_) 
       )
      {
	# previous IR and this LTR are part of a truncated element
	push @IRindex, $prevIRindex;
      }
    elsif ( $nextIR &&
	    !consecutive($prevIRrec, $firstLTRrec, $record_, $parameter_) && consecutive($lastLTRrec, $nextIRrec, $record_, $parameter_)
	  )
      {
	# next IR and this LTR are part of a truncated element
	push @IRindex, $nextIRindex;
      }
    elsif ( $prevIR && $nextIR &&
	    consecutive($prevIRrec, $firstLTRrec, $record_, $parameter_) && consecutive($lastLTRrec, $nextIRrec, $record_, $parameter_)
	  )
      {
	# flag these hits as possibly involved in a DNA rearrangement other than transposition,
	# and return both IRs and this LTR as part of a truncated element
	my @bothIRindices = (@prevIRindex, @nextIRindex);
	flagLTR_IRrearrangement( $LTRrecord_, $Irecord_, $record_, \@LTRindex, \@bothIRindices, $parameter_ );
	push @IRindex, ($prevIRindex, $nextIRindex);
      }
    else
      {
	if ( $prevIR && !$nextIR )
	  {
	    # need only consider previous IR
	    my @prevIRindex = @{$Irecord_->[$record_->[$prevIRrec]->{index}]->[2]};
	    my $prevIRlastRec = $Irecord_->[$prevIRindex[-1]][0];
	    push @IRindex, $prevIRindex[0] if ( dist($prevIRlastRec, $firstLTRrec, $record_) < $parameter_->{"minDistanceToSoloLTR"} );
	  }
	elsif ( !$prevIR && $nextIR )
	  {
	    # need only consider next IR
	    my @nextIRindex = @{$Irecord_->[$record_->[$nextIRrec]->{index}]->[2]};
	    my $nextIRfirstRec = $Irecord_->[$nextIRindex[0]][0];
	    push @IRindex, $nextIRindex[0] if ( dist($lastLTRrec, $nextIRfirstRec, $record_) < $parameter_->{"minDistanceToSoloLTR"} );
	    # but consider the possibility that the next IR is part of another truncated element
	    ($anotherElement, $anotherLTRindices, $anotherLTR, $anotherIRindices, @anotherIR) = 
	      truncatedElement($uLTRindex,$numtLTRs,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$unpairedLTR_,$parameter_);  # RECURSION
	  }
	elsif ( !$prevIR && !$nextIR )
	  {
	    # return just the LTR as a truncated element
	  }
	else  # 2 IRs within maxdist, neither consecutive to the LTR?
	  {
	    # check whether one of the IRs has a smaller ref seq distance to the LTR (within a significant threshold difference)
	    my $prevRefDist = refDist($prevIRlastRec, $firstLTRrec, $record_);
	    my $nextRefDist = refDist($lastLTRrec, $nextIRfirstRec, $record_);
	    if ($prevRefDist < $nextRefDist - $parameter_->{"boundaryTolerance"})
	      {
		# previous IR and this LTR could be part of a truncated element
		push @IRindex, $prevIRindex[0];
	      }
	    elsif ($nextRefDist < $prevRefDist - $parameter_->{"boundaryTolerance"})
	      {
		# next IR and this LTR could be part of a truncated element
		push @IRindex, $nextIRindex[0];
		($anotherElement, $anotherLTRindices, $anotherLTR, $anotherIRindices, @anotherIR) =
		  truncatedElement($uLTRindex,$numtLTRs,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$unpairedLTR_,$parameter_);  # RECURSION
	      }
	    else  # ref seq distances not significantly different (within tolerance)
	      {
		# check whether one of the IRs has a smaller chromosomal distance to the LTR (within a significant threshold difference)
		my $significance = 4 * $parameter_->{"boundaryTolerance"};
		if ( $prevDist < $parameter_->{"minDistToSoloLTR"} && $prevDist < $nextDist - $significance )
		  {
		    # previous IR and this LTR could be part of a truncated element
		    push @IRindex, $prevIRindex[0];
		  }
		elsif ( $nextDist < $parameter_->{"minDistanceToSoloLTR"} && $nextDist < $prevDist - $significance )
		  {
		    # next IR and this LTR could be part of a truncated element
		    push @IRindex, $nextIRindex[0];
		    ($anotherElement, $anotherLTRindices, $anotherLTR, $anotherIRindices, @anotherIR) =
		      truncatedElement($uLTRindex,$numtLTRs,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$unpairedLTR_,$parameter_);  # RECURSION
		  }
		else  # query seq distances not significantly different (within tolerance)
		  {
		    # we're lacking criteria to decide! check whether the next IR and next unpaired LTR could form an element
		    ($anotherElement, $anotherLTRindices, $anotherLTR, $anotherIRindices, @anotherIR) =
		      truncatedElement($uLTRindex,$numtLTRs,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$unpairedLTR_,$parameter_);  # RECURSION
		    my @anotherIRindex = @{$Irecord_->[ $anotherIR[0] ]->[2]} if ($anotherElement);
		    if ($anotherElement  && defined($nextIRindex[0]) && defined($anotherIRindex[0]) && $nextIRindex[0] eq $anotherIRindex[0])
		      {
			# next IR seems to be part of a truncated element that includes the *next* LTR,
			# return current LTR and the previous IR as a truncated element
			push @IRindex, $prevIRindex[0];
			my @IRindices;
			foreach my $index (@IRindex)
			  {
			    push @IRindices,  @{$Irecord_->[$index]->[2]};
			  }
			return (1, $allLTRindices, $LTRindex, hyphenate(@IRindices), @IRindex);
		      }
		    else
		      {
			# flag possibility of DNA rearrangement other than transposition, return both previous and next IRs as part
			# of a truncated element including the current LTR
			my @bothIRindices = (@prevIRindex, @nextIRindex);
			flagLTR_IRrearrangement($LTRrecord_, $Irecord_, $record_, \@LTRindex, \@bothIRindices, $parameter_);
			push @IRindex, (getRefLength($prevIRindex[0],$Irecord_,$record_) > getRefLength($nextIRindex[0],$Irecord_,$record_))?
			               ($prevIRindex[0], $nextIRindex[0]) : ($nextIRindex[0], $prevIRindex[0]);
			return (1, $allLTRindices, $LTRindex, hyphenate(@bothIRindices), @IRindex);
		      }
		  }
	      }
	  }
      }
    # in case the next IR is the one considered to be part of a truncated element including the current LTR,
    # check whether the next IR is possibly associated with another LTR
    my @anotherLTR = @{$LTRrecord_->[$anotherLTR]->[2]} if ($anotherElement);
    my @anotherIRindex = (defined($anotherIR[0]))? @{$Irecord_->[ $anotherIR[0] ]->[2]} : ();
    my @IR = (defined($IRindex[0]))? @{$Irecord_->[ $IRindex[0] ]->[2]} : ();
    if ($anotherElement && defined($IR[0]) && defined($anotherIRindex[0]) && $anotherIRindex[0] eq $IR[0])
      {
	my $nextIRlastRec = $LTRrecord_->[ $nextIRindex[-1] ][0];
	my $nextRefDist = refDist($lastLTRrec, $nextIRfirstRec, $record_);
	my $firstAnotherLTRrec = $LTRrecord_->[ $anotherLTR[0] ][0];
	my $anotherRefDist = refDist($nextIRlastRec, $firstAnotherLTRrec, $record_);
	my $anotherDist = dist($nextIRlastRec, $firstAnotherLTRrec, $record_);
	if ($anotherRefDist < $nextRefDist - $parameter_->{"boundaryTolerance"})
	  {
	    # return next IR and next LTR as a truncated element
	    @IRindex = @anotherIR;
	    $LTRindex = $anotherLTR;
	    $allLTRindices = hyphenate(@anotherLTR);
	  }
	elsif ($nextRefDist < $anotherRefDist - $parameter_->{"boundaryTolerance"})
	  {
	    # the current LTR is classified as a truncated element including the next IR
	  }
	elsif ($anotherDist < $nextDist - 4*$parameter_->{"boundaryTolerance"})
	  {
	    # return next IR and *next* LTR as a truncated element
	    @IRindex = @anotherIR;
	    $LTRindex = $anotherLTR;
	    $allLTRindices = hyphenate(@anotherLTR);
	  }
	elsif ($nextDist < $anotherDist - 4*$parameter_->{"boundaryTolerance"})
	  {
	    # the current LTR is classified as a truncated element including the next IR
	  }
	else
	  {
	    # flag possibility of DNA rearrangement other than transposition
	    my @bothLTRindices = (@LTRindex, @anotherLTR);

	    flagLTR_IRrearrangement($LTRrecord_, $Irecord_, $record_, \@bothLTRindices, \@nextIRindex, $parameter_);
	    $allLTRindices = hyphenate(@bothLTRindices);
	    $LTRindex = $anotherLTR if ( getRefLength($LTRindex,$LTRrecord_,$record_) < getRefLength($anotherLTR,$LTRrecord_,$record_) );
	  }
      }
    my @IRindices;
    foreach my $index (@IRindex)
      {
	push @IRindices, @{$Irecord_->[$index]->[2]};
      }

    return (1, $allLTRindices, $LTRindex, hyphenate(@IRindices), @IRindex);

}


###################
# Calculates the number of nucleotides the neighbouring ends of two hits (whose records are passed as the first two args).

sub dist
  {
	# First two args are the record numbers of the two hits, third argument is a ref to the array of (refs to) all records.
	my($rec1, $rec2, $record_) = @_;
	return 0 if ($rec1 eq -1 || $rec2 eq -1);             # record number out of range

	if ($rec1 > $rec2)
	  {
	    my $temp = $rec1;
	    $rec1 = $rec2;
	    $rec2 = $temp;
	  }

	# calculate number of nucleotides between the neighbouring ends of the hits
	return abs( $record_->[$rec2]->{"chromoStart"} - $record_->[$rec1]->{"chromoEnd"} );
  }


###################
# Calculates the number of nucleotides missing from the ref sequence in respective matches to the neighbouring ends of two hits
# (whose records are passed as the first two args).
# THIS SUBROUTINE ASSUMES THAT THE HITS WHOSE REC NUMBERS ARE PASSED AS ARGS ARE IN THE SAME ORIENTATION.

sub refDist {
	# First two args are the record numbers of the two hits, third argument is a ref to the array of (refs to) all records.
	my($rec1, $rec2, $record_) = @_;
	return 0 if ($rec1 eq -1 || $rec2 eq -1);             # record number out of range

	my $refMissing1;                                      # will store number of nuclotides missing from the
	                                                      #    end of the first hit (relative to reference seq)
	my $refMissing2;                                      # will store number of nuclotides missing from the
	                                                      #    beginning of the second hit (relative to reference seq)
	my $orientation = $record_->[$rec1]->{"orientation"};

	# calculate number of missing nucleotides from matches to the reference sequence at the neighbouring ends of the hits
	$refMissing1 = ( $orientation eq "C") ? ($record_->[$rec1]->{"refStart"} - 1) : $record_->[$rec1]->{"leftAfterRefEnd"};
	$refMissing2 = ( $orientation eq "C") ? $record_->[$rec2]->{"leftAfterRefEnd"} : ($record_->[$rec2]->{"refStart"} - 1);

	return $refMissing1 + $refMissing2;
}


###################
# Checks whether two LTR sequences belong to the same complete retrotransposon, uninterrupted by other retrotransposon insertions.
# Returns a list in the form: (true or false boolean value, LTR1 index, LTR2 index, IR index).
sub uninterruptedElement {
	# First two arguments are the LTR-record indices of two LTR sequences,  third and fourth and fifth are references
	# to the LTR-, IR-, and all- records arrays. Sixth is the value of higher level counter of uninterrupted elements.
        # Last arg is a ref to the parameter hash
	my($LTR1, $LTR2, $LTRrecord_, $Irecord_, $record_, $parameter_) = @_;
	my $tolerance = $parameter_->{"boundaryTolerance"};

	my @LTRpairIndex = ($LTR1, $LTR2);  # will store a pair of LTR indices to be returned in case an element is identified
	my $IRpairIndex = 0;
	my @IRindex = ();       # will store an IR indices, a ref to it will be returned in case in case an element is identified
	my $allLTRindices = ""; # will store all the LTR indices associated with the element, separated by hyphens
	my $allIRindices = "";  # will store all the IR indices associated with the element, separated by hyphens
	
	# get hits associated with LTRs
	my @LTR1 = @{$LTRrecord_->[$LTR1]->[2]};
	my $firstRec1 = $LTRrecord_->[$LTR1[0]][0];
	my $lastRec1 = $LTRrecord_->[$LTR1[-1]][0];
	my @LTR2 = @{$LTRrecord_->[$LTR2]->[2]};
	my $firstRec2 = $LTRrecord_->[$LTR2[0]][0];
	my $lastRec2 = $LTRrecord_->[$LTR2[-1]][0];


	# make sure the hits are in the same query sequence
	return (0,0,0,0) if ($record_->[$lastRec1]->{"query"} ne $record_->[$firstRec2]->{"query"});  # LTRs not in the same query sequence!

	# make sure the records haven't been classified
	my $classified1 = $record_ ->[$lastRec1]->{"id"};
	my $classified2 = $record_ ->[$firstRec2]->{"id"};
	return (0,0,0,0) if ( $classified1 || $classified2 );  # LTRs have already been classified!

	# do not consider LTR2 as a candidate pair with LTR1 if LTR1 is 'contiguously' preceded by an 
	# internal region of same family/orientation, and not contiguously followed by a smilar IR 
	# (in which case we probably have a complex rearrangement)
	my $IRcontiguousLTR1 = 0;
	my @fuzzyIindex = ();  # this will store extra IR indices if there is a preceding IR contiguous to LTR1
	if ( nameAndOrientationMatch($firstRec1-1, $firstRec1, $record_, $parameter_) &&
	     INTname($record_->[$firstRec1-1]->{refName}, $record_->[$firstRec1-1]->{superfamily}) &&
	     consecutive($firstRec1-1,$firstRec1,$record_,$parameter_)
	   )
	  {
#	    return (0) unless (  nameAndOrientationMatch($lastRec1, $lastRec1+1, $record_,$parameter_) &&
#				 $record_->[$lastRec1+1]->{refName} =~ /_I/ &&
#				 consecutive($lastRec1,$lastRec1+1,$record_,$parameter_) &&
#				 !$record_->[$firstRec1-1]->{id}
#			      );
	    $IRcontiguousLTR1 = 1; # LTR1 has contiguous IRs on both sides
	    @fuzzyIindex = @{$Irecord_->[$record_->[$firstRec1-1]->{index}]->[2]};
	  }

	# check if same family, orientation, not adjacent
	if ( nameAndOrientationMatch($lastRec1, $firstRec2, $record_,$parameter_) && ($firstRec2-$lastRec1-1) )
	  {
	    # check whether LTRs are uninterrupted by insertions
	    return (0,0,0,0) unless ( uninterruptedLTRorIR($LTR1[0],$LTRrecord_,$record_,$tolerance) &&
				      uninterruptedLTRorIR($LTR2[0],$LTRrecord_,$record_,$tolerance)
				    );
		# check whether all (if any) records between the two LTRs correspond to internal sequences of the same family,
	        # apart from low_complexity and simple_repeats
		my $countI = 0;
		my $countLow = 0;
		my $labelI = 0;
		my $nameI; my $orientationI; my $index; my $superfamily;
		for (my $internalRecNum = $lastRec1+1; $internalRecNum < $firstRec2; $internalRecNum++) 
		  {
			$nameI = $record_->[$internalRecNum]->{"refName"};
			$orientationI = $record_->[$internalRecNum]->{"orientation"};
			$index = $record_->[$internalRecNum]->{"index"};
			$superfamily = $record_->[$internalRecNum]->{superfamily};
			# get index of the first record between LTR1 and LTR2 that corresponds to an IR
			$IRpairIndex = $index if ( INTname($nameI, $superfamily) && !$IRpairIndex);
			# get label of the first record between LTR1 and LTR2 that corresponds to an IR
			$labelI = $Irecord_->[$index][1] if ( INTname($nameI, $superfamily) && !$labelI );
			$countI++ if ( INTname($nameI, $superfamily) &&                                                  # this is an IR
				       nameAndOrientationMatch($lastRec1, $internalRecNum, $record_,$parameter_) && # LTR and IR have same name/orient
				       $Irecord_->[$index][1] eq $labelI                                  # hit to the same IR
				     );
			$countLow++ if ( $record_->[$internalRecNum]->{"superfamily"} =~ /(Low_)|(Simple_)/i );
		  }
		# Return TRUE if all records between LTRs correspond to hits to the same IR (apart from 
		# 'Low_complexity' and 'Simple_repeat' hits), if the first and last of these
		# are the first and last records classified as fragments of the same internal region, and if
		# LTRs are adjacent (within tolerance) to them.
		if ( $countI eq ($firstRec2-$lastRec1-1-$countLow) &&
		     $countI eq scalar(@{$Irecord_->[$IRpairIndex]->[2]}) &&
		     consecutive($lastRec1, $lastRec1+1, $record_, $parameter_) &&
		     consecutive($firstRec2-1, $firstRec2, $record_, $parameter_)
		   )
		  { # this could be an uninterrupted 'complete' element!
		    my @IRindex = sort {$a<=>$b} @{$Irecord_->[$IRpairIndex]->[2]};
		    my $last = -1;  # this will be used to store and weed out duplicate indices
		    $allIRindices = hyphenate( grep { ($last eq $_)? 0 : ($last=$_, 1) } sort{$a<=>$b}(@IRindex,@fuzzyIindex) );
		    $allLTRindices = hyphenate( sort {$a<=>$b} (@LTR1, @LTR2) );

		    # But RECURSIVELY check possibility of nested (consecutive) insertion of a complete element of same family/orientation:
		    # get outermost index if LTR2 corresponds to more than one hit
		    my $outerLTR2 = getLastFragment($LTR2, $LTRrecord_);
		    my $LTR3 = $outerLTR2 + 1;

		    if ( $LTR3 < scalar(@{$LTRrecord_}) && !$record_->[ $LTRrecord_->[$LTR3][0] ]->{"id"} )
		      {
			my ($nestedPair, $nestedIRindices, $nestedLTRindices, $nestedIRpairIndex_, @nestedLTRpairIndex) = 
			  uninterruptedElement($outerLTR2, $LTR3, $LTRrecord_, $Irecord_, $record_, $parameter_); # RECURSION

			my @nestedLTRindices = split("-", $nestedLTRindices) if ($nestedPair);
			if ( $nestedPair && ( grep /^$LTR2$/,@nestedLTRindices) )
			  { # possibly a nested element!
			    my @nestedIRindex = @{$nestedIRpairIndex_};
			    # check whether one of these elements is interrupted by the other
			    my $lastIRrec = $Irecord_->[ $IRindex[-1] ][0];
			    my $firstNestedIRrec = $Irecord_->[ $nestedIRindex[0] ][0];

			    if ( interruption($lastIRrec, $firstRec2, $record_, $tolerance) &&
				 !interruption($firstNestedIRrec, $lastRec2, $record_, $tolerance)
			       )
			      { # a nested pair! (nested element (LTR2) interrupts previous element)
			        @LTRpairIndex = @nestedLTRpairIndex;
				$IRpairIndex = $nestedIRindex[0];
				@IRindex = @nestedIRindex;
				$allIRindices = $nestedIRindices;
				$allLTRindices = $nestedLTRindices;
			      }
			    elsif ( !interruption($lastIRrec, $firstRec2, $record_, $tolerance) &&
				    interruption($firstNestedIRrec, $lastRec2, $record_, $tolerance)
				  )
			      { # a nested pair! (LTR2 interrupts next element)
				removeRearrangementFlags( $Irecord_, $record_, split("-", $nestedIRindices) );
				removeRearrangementFlags( $LTRrecord_, $record_, split("-", $nestedLTRindices) );
			      }
			    else
			      { # not enough evidence for nesting, this could be a result of DNA rearrangement other than transposition
				# flag all relevant hits as possibly involved in DNA rearrangements
				my $last = -1;
				my @rearrangementLTRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) } sort {$a<=>$b} ( @LTR1, @nestedLTRindices );
				$last = -1;
				my @rearrangementIRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) } sort {$a<=>$b} ( @IRindex, split("-",$nestedIRindices) );
				flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_);
				$allIRindices = hyphenate(@rearrangementIRindex);
				$allLTRindices = hyphenate(@rearrangementLTRindex);
				
				# pick a pair if one IR is contiguous to LTR2 (and there's no deletion in boundary),
				# and if LTR2 is not contiguous to the other IR
				my $IR = contiguityTest($IRpairIndex,$LTR2,$nestedIRindex[0],$Irecord_,$LTRrecord_,$record_,$parameter_);
				if ($IR)
				  { # one of the IRs is contiguous to LTR2 and the other isn't, choose contiguous pair
				    if ( getFirstFragment($IR,$Irecord_) eq $nestedIRindex[0] )
				      {
					@LTRpairIndex = @nestedLTRpairIndex;
					@IRindex = @nestedIRindex;
					$IRpairIndex = $IRindex[0];
				      }
				  }
				else
				  { # pick the pair with lowest intra-pair LTR divergence from the ref seq (if any)
				    my ($divDiff, @LTRdivLowest) = LTRdivTest(@LTRpairIndex,@nestedLTRpairIndex,$LTRrecord_,$record_);
				    if (@LTRdivLowest = @nestedLTRpairIndex)
				      {
					@LTRpairIndex = @nestedLTRpairIndex;
					@IRindex = @nestedIRindex;
					$IRpairIndex = $IRindex[0];
				      }
				  }
			      }
			  }
		      }
		    else
		      { # no nested element

			# check whether the second LTR is immediately followed by an IR of same family/orientation 
			# (and that there's no deletion at the boundary)
			if ( consecutiveINT($lastRec2, $record_, $parameter_) )
			  { # possibility of a DNA rearrangement other than transposition
			    my $last = - 1; # will store indices (but here initialized with a value out of range) to weed out duplicate ones
			                    # (using grep) see Glover, Humphreys & Weiss "Perl 5 How To" p.456:
			    my @rearrangementLTRindex = split("-", $allLTRindices);
			    my @rearrangementIRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) }
			                               sort{$a<=>$b}( split("-",$allIRindices), 
								      @{$Irecord_->[$record_->[$lastRec2+1]->{index}]->[2]}
								    );
			    flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_);
			    $allIRindices = hyphenate(@rearrangementIRindex);
			  }
			else
			  { # there is no consecutive IR
			    # flag DNA rearrangement other than transposition if LTR1 is preceded by a contiguous IR
			    my @rearrangementLTRindex = split("-", $allLTRindices);
			    my @rearrangementIRindex = split("-", $allIRindices);
			    flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_) 
			      if ($IRcontiguousLTR1);
			  }
		      }

		    # return complete element
		    return (1, $allIRindices, $allLTRindices, \@IRindex, @LTRpairIndex);
		  }
	  }
	return 0;  # not a complete, uninterrupted LTR-retrotransposon insertion!
}


###################
# Takes a list and returns a string containing the elements of the list separated by hyphens
sub hyphenate
  {
    my @list = @_;
    my $hyphenatedItemsOfList = "";
    foreach my $item (@list)
      {
	$hyphenatedItemsOfList .= "-".$item;
      }
    $hyphenatedItemsOfList =~ s/^-//;
    return $hyphenatedItemsOfList;
  }


###################
# Checks whether an LTR is followed or preceded by an internal region without intervening indels (and not both).
# First arg is an index of the putatively preceding IR, second an index of the query LTR, third an index of the 
# putatively following IR, fourth a ref to the IR records array, 5th a ref to the LTRrecords array, next a ref to the records array,
# last a ref to the parameter hash.
# Returns the index of an IR if that IR is contiguous the LTR AND the LTR is not contiguous to the other IR. Otherwise returns 0.
sub contiguityTest
  {
    my ($IRindex, $LTRindex, $nextIRindex, $Irecord_, $LTRrecord_, $record_, $parameter_) = @_;
    my $IRconsecutiveLTR = consecutive( $Irecord_->[getLastFragment($IRindex,$Irecord_)][0], 
					 $LTRrecord_->[getFirstFragment($LTRindex,$LTRrecord_)][0], $record_, $parameter_ );
    my $LTRconsecutiveNextIR = consecutive( $LTRrecord_->[getLastFragment($LTRindex,$LTRrecord_)][0], 
					    $Irecord_->[getFirstFragment($nextIRindex,$Irecord_)][0], $record_, $parameter_ );
    my $IR = 0;

    if ($IRconsecutiveLTR && !$LTRconsecutiveNextIR) 
      { # classify previous element
	$IR = $IRindex;
      }
    elsif (!$IRconsecutiveLTR && $LTRconsecutiveNextIR)
      { # classify next element
	$IR = $nextIRindex;
      }
    return $IR;
}


###################
# Compares intra-pair divergences (to the ref seq) between two LTR pairs. Returns 1 and the indices of the pair with lower intra-pair
# divergence, returns 0 if the divergences do not differ significantly.
# First two args are the indices of the first pair, next two args those of the second pair, 5th a ref to the LTRrecords array,
# last a ref to the records array.
sub LTRdivTest
  {
    my ($ltr1, $ltr2, $nextLTR1, $nextLTR2, $LTRrecord_, $record_) = @_;
    my @LTRpair1 = ($ltr1,$ltr2);
    my @LTRpair2 = ($nextLTR1,$nextLTR2);
    my $significance = 0.5;

    # Choose LTR pair whose divergences from the reference are most similar to each other
    my ($div1, $div2, $divNext1, $divNext2) = (0,0,0,0); # mean div for hits to LTR1, LTR2, nextLTR1 and nextLTR2
    my @LTR1 = @{$LTRrecord_->[$LTRpair1[0]]->[2]};
    my @LTR2 = @{$LTRrecord_->[$LTRpair1[1]]->[2]};
    my @nextLTR1 = @{$LTRrecord_->[$LTRpair2[0]]->[2]};
    my @nextLTR2 = @{$LTRrecord_->[$LTRpair2[1]]->[2]};
    foreach my $hit (@LTR1)
      {
	$div1 += $record_->[ $LTRrecord_->[$hit][0] ]->{"divergenceFromRef"};
      }
    foreach my $hit (@LTR2)
      {
	$div2 += $record_->[ $LTRrecord_->[$hit][0] ]->{"divergenceFromRef"};
      }
    foreach my $hit (@nextLTR1)
      {
	$divNext1 += $record_->[ $LTRrecord_->[$hit][0] ]->{"divergenceFromRef"};
      }
    foreach my $hit (@nextLTR2)
      {
	$divNext2 += $record_->[ $LTRrecord_->[$hit][0] ]->{"divergenceFromRef"};
      }
    $div1 = $div1/scalar(@LTR1);
    $div2 = $div2/scalar(@LTR2);
    $divNext1 = $divNext1/scalar(@nextLTR1);
    $divNext2 = $divNext2/scalar(@nextLTR2);

    return (1, @LTRpair1) if ( abs($div1-$div2) < abs($divNext1-$divNext2) - $significance );
    return (1, @LTRpair2) if ( abs($div1-$div2) - $significance > abs($divNext1-$divNext2) );
    return 0;
}


###################
# Flags the records hits possibly involved in a DNA rearrangement other than transposition.
# First two args are refs to the LTR and IR record number arrays, third a ref to the records array.
# The next two args are refs to lists of LTR and IR indices. The last arg a ref to the
# parameter hash.

sub flagLTR_IRrearrangement
{
  my ($LTRrec_, $IRrec_, $record_, $LTRindex_, $IRindex_, $parameter_) = @_;
  my $recList = "";  # will store a hyphenated list of RM line numbers corresponding to hits putatively involved
                     # in a DNA rearrangement other than transposition

  if (defined($LTRindex_))
    {
      foreach my $index (@{$LTRindex_})
        {
          $recList .= ($LTRrec_->[$index][0] + $parameter_->{"RMheaderLines"} +1)."-";
        }
    }
  if (defined($IRindex_))
    {
      foreach my $index (@{$IRindex_})
        {
          $recList .= ($IRrec_->[$index][0] + $parameter_->{"RMheaderLines"} +1)."-";
        }
    }
  $recList =~ s/^(.+)-$/$1/;  # remove last '-'

  # write list to the record of every hit involved
  if (defined($LTRindex_))
    {
      foreach my $index (@{$LTRindex_})
        {
          $record_->[ $LTRrec_->[$index][0] ]->{"rearrangement"} = $recList;
        }
    }
  if (defined($IRindex_))
    {
      foreach my $index (@{$IRindex_})
        {
          $record_->[ $IRrec_->[$index][0] ]->{"rearrangement"} = $recList;
        }
    }

}


###################
# Flags the records hits possibly involved in a DNA rearrangement other than transposition.
# First two args are refs to an index array the all hits- records array.
# The next arg is a ref to the parameter hash, then the list of indices of hits involved in
# the rearrangement

sub flagRearrangement 
  {
    my ($indexRec_, $record_, $parameter_, @index) = @_;
    my $recList = "";  # will store a hyphenated list of RM line numbers corresponding to hits putatively involved
    # in a DNA rearrangement other than transposition

    foreach my $index (@index)
      {
	$recList .= ($indexRec_->[$index][0] + $parameter_->{RMheaderLines} +1)."-";
      }
    $recList =~ s/^(.+)-$/$1/;  # remove last '-'

    foreach my $index (@index)
      {
	$record_->[ $indexRec_->[$index][0] ]->{rearrangement} = $recList;
      }

  }


###################
# Removes rearrangement flags of hits passed as arguments (indices)

sub removeRearrangementFlags
  {
    my ($indexRec_, $record_, @index) = @_;

    foreach my $index (@index)
      {
	$record_->[ $indexRec_->[$index][0] ]->{rearrangement} = 0;
      }

  }


###################
# Checks whether two LTR sequences belong to the same (interrupted by insertions) retrotransposon.
# Returns true or false, plus a  string containing all indices of the IR associated with the element
# (there can be more than one in case of a DNA rearrangement other than transposition) separated by hyphens,
# plus a string with all the indices of LTRs associated with the element separated by hyphens,
# plus an a ref to "internal" IR indices between LTR1 and LTR2, plus an index of each LTR in a two list.
sub interruptedElement
  {
    # First two arguments are the LTR-record indices of two LTR sequences,
    # third, fourth, fifth and sixty  are references to the LTR-, IR-, non-LTR- indices and all-hits arrays. 
    # Last a ref to the parameter hash.
	my($LTR1, $LTR2, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_, $parameter_) = @_;
	my $fragmentSearchRange = $parameter_->{"fragmentSearchRange"};
	my @LTRpairIndex;  # will store a pair of LTR indices to be returned in case an element is identified
	my $IRpairIndex;   # will store an IR index to be returned in case in case an element is identified
	my $allLTRindices; # will store all the LTR indices associated with the element, separated by hyphens
	my $allIRindices;  # will store all the IR indices associated with the element, separated by hyphens

	# get hits associated with LTRs
	my @LTR1 = @{$LTRrecord_->[$LTR1]->[2]};
	my $firstRec1 = $LTRrecord_->[$LTR1[0]][0];
	my $lastRec1 = $LTRrecord_->[$LTR1[-1]][0];
	my @LTR2 = @{$LTRrecord_->[$LTR2]->[2]};
	my $firstRec2 = $LTRrecord_->[$LTR2[0]][0];
	my $lastRec2 = $LTRrecord_->[$LTR2[-1]][0];

	my $classified1 = $record_->[$firstRec1]->{"id"};
	my $classified2 = $record_->[$firstRec2]->{"id"};

	# make sure the hits are in the same query sequence
	return (0,0,0,0) if ($record_->[$firstRec1]->{"query"} ne $record_->[$firstRec2]->{"query"});  # LTRs not in the same query sequence!

	# make sure the records haven't been classified
	return (0,0,0,0) if ( $classified1 || $classified2 );

	# check if distance between LTRs is within pairing range
	return (0,0,0,0) if ( ($record_->[$firstRec2]->{"chromoStart"} - $record_->[$lastRec1]->{"chromoEnd"}) >= $fragmentSearchRange );

	# check if LTR1 is 'contiguously' preceded by an 
	# internal region of same family/orientation, and not (*CURRENTLY COMMENTED OUT-> contiguously) followed by a similar IR
	# (in which case we may have a complex rearrangement)
	my $IRcontiguousLTR1 = 0;
	my @fuzzyIindex = ();  # this will store extra IR indices if there is a preceding IR contiguous to LTR1
	if ( nameAndOrientationMatch($firstRec1-1, $firstRec1, $record_,$parameter_) &&
	     INTname($record_->[$firstRec1-1]->{refName}, $record_->[$firstRec1-1]->{superfamily}) &&
	     consecutive($firstRec1-1,$firstRec1,$record_,$parameter_)
	   )
	  {
	    return (0) unless (  nameAndOrientationMatch($lastRec1, $lastRec1+1, $record_,$parameter_) &&
				 $record_->[$lastRec1+1]->{refName} =~ /_I/ &&
#				 consecutive($lastRec1,$lastRec1+1,$record_,$parameter_) &&
				 !$record_->[$firstRec1-1]->{id}
			      );
	    $IRcontiguousLTR1 = 1; # LTR1 has contiguous IRs on both sides
	    @fuzzyIindex = @{$Irecord_->[$record_->[$firstRec1-1]->{index}]->[2]};
	  }

	if ( nameAndOrientationMatch($lastRec1, $firstRec2, $record_,$parameter_) ) 
	  { # same orientation and element family

	    # exit if LTRs are consecutive records (do not count them as a pair)
		return (0,0,0,0) if ( $firstRec2 eq $lastRec1 + 1);

		# CHECK THAT THERE ARE NO REPEAT ELEMENTS WITH HITS INTERLEAVED WITH LTR1 AND LTR2.

		return (0,0,0,0) if ( interleavedHits($lastRec1, $firstRec2, $LTRrecord_, $Irecord_, $nonLTRrecord_, $record_) );

		# Count any records between LTRs that correspond to internal sequences of the same family and have same orientation as LTRs,
		# as long as they are unclassified (not previously identified as part of an element).

		# Also count number of intervening LTRs that have already been paired (NOTE: this probably redundant given the
                #                                                                            interleavedHits check above.)
		my $countI = 0;
		my $countLTR = 0;
		my %IRstate = (); # keys will store (defrag) labels of IRs that have at least 1 hit between LTR1 and LTR2
                                  # (and as yet unclassified), values will store a 2-list (array ref):
		                  # [number of hits of each such IR found between LTR1 and LTR2, first index encountered]
		my %nestedLTRstate = ();  # keys will store IDs of LTRs (found in between LTR1 and LTR2) already classified into a pair,
                                          # values will store a 2-list (array ref):
                                          # [number of hits of each such LTR found between LTR1 and LTR2, first index encountered]
		my $nameI; my $orientI; my $index; my $idI; my $label; my $superfamily;

		for (my $intRecNum = $lastRec1+1; $intRecNum < $firstRec2; $intRecNum++)
		  {
			$nameI = $record_->[$intRecNum]->{"refName"};
			$orientI = $record_->[$intRecNum]->{"orientation"};
			$idI = $record_->[$intRecNum]->{"id"};
			$index = $record_->[$intRecNum]->{"index"};
			$superfamily = $record_->[$intRecNum]->{superfamily};
			# store index of the first record between LTR1 and LTR2 that corresponds to an a NEW unclassified IR
			if ( INTname($nameI, $superfamily) && !$idI  &&  nameAndOrientationMatch($lastRec1, $intRecNum, $record_,$parameter_) )
			  {
			    $label = $Irecord_->[$index][1];
			    $IRstate{$label} = [0,$index] unless ( $IRstate{$label} );
			    $IRstate{$label}->[0]++;  # count this hit
			  }
			if ( $idI && $idI !~ /s/ && LTRname($nameI, $superfamily) )
			  {
			    $label = $LTRrecord_->[$index][1];
			    $nestedLTRstate{$label} = [0,$index] unless ( $nestedLTRstate{$label} );
			    $nestedLTRstate{$label}->[0]++;  # count this hit
			  }
		   }

		# look for an IR with all its associate hits between LTR1 and LTR2; in case there are more than one,
		# flag a chimaeric IR (relative to the ref seq).
		# Identify the longest one (in terms of matches to the reference sequence) as well
		my @IRindex = ();
		my $LTR1contiguousIR = 0;  #
		my $IRcontiguousLTR2 = 0;  # these will be set to the labels contigous IRs if they exist
		my $LTR1recFollowedByIRrec = 0;
		my $LTR2recPrecededByIRrec = 0;  # these will be set to the labels of IRs with adjacent records if they exist
		my $chimaericIR = (scalar(keys %IRstate) > 1)? 1 : 0;  # will flag possibility that this element's IR is chimaeric
		                                                       # relative to the reference seq, if two IRs are different
		                                                       # (according to defragmentation of hits relative to ref seqs) are found
		foreach my $IRlabel (keys %IRstate)
		  {
		    my $index = $IRstate{$IRlabel}->[1];
		    my $count = $IRstate{$IRlabel}->[0];
		    # check if this IR is consecutive to either LTR
		    $LTR1contiguousIR = $IRlabel if ( consecutive($lastRec1,$Irecord_->[$index][0],$record_,$parameter_) );
		    $IRcontiguousLTR2 = $IRlabel if ( consecutive($Irecord_->[getLastFragment($index,$Irecord_)][0],
								  $firstRec2,$record_,$parameter_)
						    );
		    $LTR1recFollowedByIRrec = $IRlabel if ( $Irecord_->[getFirstFragment($index,$Irecord_)][0] == $lastRec1 + 1 );
		    $LTR2recPrecededByIRrec = $IRlabel if ( $Irecord_->[getLastFragment($index,$Irecord_)][0] == $firstRec2 - 1 );
		    # store the index of the longest IR between LTRs 1 and 2
		    $IRpairIndex = $index if ( scalar(@{$Irecord_->[$index]->[2]}) eq $count &&
					       ( !$IRpairIndex ||
						 getRefLength($index,$Irecord_,$record_) > getRefLength($IRpairIndex,$Irecord_,$record_) 
					       )
					     );
		    @IRindex = ( @IRindex, @{$Irecord_->[$index]->[2]} );
		  }
		@IRindex = sort {$a<=>$b} @IRindex;

		if ( $LTR1contiguousIR && $IRcontiguousLTR2 && $LTR1contiguousIR eq $IRcontiguousLTR2 )
		  { # same IR has ends contiguous to both LTRs
#		    @IRindex = @{$Irecord_->[$IRstate{$LTR1contiguousIR}->[1]]->[2]};
		  }
		elsif ( $LTR1contiguousIR && $IRcontiguousLTR2 && $LTR1contiguousIR ne $IRcontiguousLTR2 )
		  { # include both IRs if there is no IR preceding and contiguous to LTR1 and reference names match exactly
		    my $Irec1 = $Irecord_->[ $IRstate{$LTR1contiguousIR}->[-1] ][0];
		    my $Irec2 = $Irecord_->[ getLastFragment($IRstate{$IRcontiguousLTR2}->[-1],$Irecord_) ][0];
		    return 0 if ($IRcontiguousLTR1 && $record_->[$Irec1]->{refName} ne $record_->[$Irec2]->{refName});
		    my $I1refLength = getRefLength($IRstate{$LTR1contiguousIR}->[1],$Irecord_,$record_);
		    my $I2refLength = getRefLength($IRstate{$IRcontiguousLTR2}->[1],$Irecord_,$record_);
		    my $refLength1 = $record_->[$Irec1]->{refEnd} + $record_->[$Irec1]->{leftAfterRefEnd};
		    my $refLength2 = $record_->[$Irec2]->{refEnd} + $record_->[$Irec1]->{leftAfterRefEnd};
		    #if ( $I1refLength + $I2refLength < $refLength1 + $refLength2 )
		    #  {
		    $IRpairIndex = ($I1refLength > $I2refLength)? $IRstate{$LTR1contiguousIR}->[-1] : $IRstate{$IRcontiguousLTR2}->[-1];
		    # flag possibility of chimaeric IR (relative to ref seq)
		    my @Ifrags1 = @{$Irecord_->[$IRstate{$LTR1contiguousIR}->[-1]]->[2]};
		    my @Ifrags2 = @{$Irecord_->[$IRstate{$IRcontiguousLTR2}->[-1]]->[2]};
#		    @IRindex = sort {$a<=>$b} (@Ifrags1,@Ifrags2);
		    $chimaericIR = 1;
		    #  }
		  }
		elsif ( ($LTR1contiguousIR)# || $LTR1recFollowedByIRrec)
			&& !($IRcontiguousLTR2 || $LTR2recPrecededByIRrec)
		      )
		  { # choose IR contiguous to LTR1
		    my $ir = ($LTR1contiguousIR)? $LTR1contiguousIR : $LTR1recFollowedByIRrec;
		    @IRindex = getFrags($IRstate{$ir}->[1],$Irecord_);
		  }
		elsif ( !($LTR1contiguousIR  || $LTR1recFollowedByIRrec)
			&& ($IRcontiguousLTR2)# || $LTR2recPrecededByIRrec)
		      )
		  { # choose IR contiguous to LTR2
		    my $ir = ($IRcontiguousLTR2)? $IRcontiguousLTR2 : $LTR2recPrecededByIRrec;
		    @IRindex = getFrags($IRstate{$ir}->[1],$Irecord_);
		  }

		# check that the total number of already classified LTRs between LTR1 and LTR2 is even,
		# and that all hits associated with those LTRs are found between LTR1 and LTR2.
		my $singleNestedLTR = scalar(keys(%nestedLTRstate)) % 2;
		foreach my $LTRi (keys %nestedLTRstate)
		  {
		    my $index = $nestedLTRstate{$LTRi}->[1];
		    my $count = $nestedLTRstate{$LTRi}->[0];
		    $singleNestedLTR++ unless ( scalar(@{$LTRrecord_->[$index]->[2]}) eq $count );
		  }

		# If there is at least 1 corresponding internal region, if the number of LTRs
		# previously classified as 'paired' in between is even, and if all the hits associated with
		# these LTRs are between LTR1 and LTR2, then this is possibly a pair!
		if ( defined($IRpairIndex) && !$singleNestedLTR )
		  { # this could be a pair!

                    @LTRpairIndex = ($LTR1, $LTR2);

		    # store all IR indices so far associated with the element (separated by hyphens)
		    my $last = -1;  # this will be used to store and weed out duplicate indices
		    $allIRindices = hyphenate( grep { ($last eq $_)? 0 : ($last=$_, 1) } sort{$a<=>$b}(@IRindex,@fuzzyIindex) );
		    my @LTR1 = @{$LTRrecord_->[$LTR1]->[2]};
		    my @LTR2 = @{$LTRrecord_->[$LTR2]->[2]};

		    $allLTRindices = hyphenate( sort{$a<=>$b}(@LTR1, @LTR2) );

		    # BUT before classifying LTR1 and LTR2 as flanking an interrupted element,
		    # RECURSIVELY check for as yet unclassified, nested elements of same family and orientation

		    my $nestedLTR = $LTR1[-1] + 1;
                    $nestedLTR++ while ( $nestedLTR < $LTR2[0] - 1  &&
					 ( !nameAndOrientationMatch($LTRrecord_->[$nestedLTR][0], $LTRrecord_->[$LTR2[0]][0], $record_,$parameter_) ||
					   ( nameAndOrientationMatch($LTRrecord_->[$nestedLTR][0], $LTRrecord_->[$LTR2[0]][0], $record_,$parameter_) &&
					     $record_->[ $LTRrecord_->[$nestedLTR][0] ]->{id}
					   )
					 )
				       ); # pick an yet unclassified nested LTR of same family and orientation (if any)
		    my $nestedLTRrec = $LTRrecord_->[$nestedLTR][0];
		    if ( $nestedLTR < $LTR2[0] && !$record_->[ $LTRrecord_->[$nestedLTR][0] ]->{id} )
		      {
			# RECURSION
			my ($nestedPair, $nestedIRindices, $nestedLTRindices, $nestedIRpairIndex_, @nestedLTRpairIndex) =
			  interruptedElement($nestedLTR,$LTR2,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$parameter_); # RECURSION
			if ($nestedPair)
			  { # nested element found!
			    ($allIRindices, $allLTRindices, @LTRpairIndex) = ($nestedIRindices, $nestedLTRindices, @nestedLTRpairIndex);
			    @IRindex = @{$nestedIRpairIndex_};
			  }
		      }

                    # Before returning this element check whether the second LTR could be part of a consecutive interrupted element
		    my $nextLTR = ($LTR2[-1] < scalar(@{$LTRrecord_}) - 1)? $LTR2[-1] + 1 : $LTR2[-1];
                    $nextLTR++ while ( $nextLTR < scalar(@{$LTRrecord_}) - 1  &&
				       !( nameAndOrientationMatch($LTRrecord_->[$nextLTR][0], $LTRrecord_->[$LTR2[0]][0], $record_,$parameter_) &&
					  !$record_->[$LTRrecord_->[$nextLTR][0]]->{"id"}
					)
				     ); # pick an yet unclassified LTR of same family and orientation (if any)

		    # RECURSION
                    my ($consecutivePair,$consecutiveIRindices,$consecutiveLTRindices,$consecutiveIRpairIndex_,@consecutiveLTRpairIndex) =
		      interruptedElement($LTR2,$nextLTR,$LTRrecord_,$Irecord_,$nonLTRrecord_,$record_,$parameter_);

		    my $firstLTR2index = $LTR2[0];
		    my @consecutiveLTRindices = split("-", $consecutiveLTRindices) if ($consecutivePair);

		    if ( $consecutivePair && (grep /^$firstLTR2index$/,@consecutiveLTRindices) )
		      { # LTR2 could also be part of an element pairing with nextLTR (or this is a region of DNA rearrangement)

			my @nextLTR =  @{$LTRrecord_->[$consecutiveLTRpairIndex[1]]->[2]};
			my @nextIR = @{$consecutiveIRpairIndex_};

			my $lastIRrec = $Irecord_->[ $IRindex[-1] ][0];
			my $firstNextIRrec = $Irecord_->[ $nextIR[0] ][0];
			my $firstRec2 = $LTRrecord_->[ $LTR2[0] ][0];
			my $lastRec2 = $LTRrecord_->[ $LTR2[-1] ][0];
			my $tolerance = $parameter_->{"boundaryTolerance"};
			# check whether LTR2 interrupts either the previous or the next IR
			if ( interruption($lastIRrec, $firstRec2, $record_, $tolerance) &&
			     !interruption($firstNextIRrec, $lastRec2, $record_, $tolerance)
			   )
			  { # next element (LTR2) interrupts previous element! Choose next element as the complete element to be returned:
			    ($allIRindices,$allLTRindices,@LTRpairIndex) = ($consecutiveIRindices,$consecutiveLTRindices,@consecutiveLTRpairIndex);
			    @IRindex = @nextIR;
			  }
			elsif ( !interruption($lastIRrec, $firstRec2, $record_, $tolerance) &&
				interruption($firstNextIRrec, $lastRec2, $record_, $tolerance)
			      )
			  { # previous element (LTR2) interrupts next element! Choose previous element to be returned.
			    # Remove any rearrangement flags from recursive call
			    removeRearrangementFlags( $Irecord_, $record_, split("-", $consecutiveIRindices) );
			    removeRearrangementFlags( $LTRrecord_, $record_, split("-", $consecutiveLTRindices) );
			  }
			elsif ( ( consecutive($lastRec2, $firstNextIRrec, $record_, $parameter_) )#|| $firstNextIRrec == $lastRec2 + 1 )
				&&
			       !( consecutive($lastIRrec,$firstRec2,$record_,$parameter_)  || $lastIRrec == $firstRec2 - 1 )
			      )
			  { # LTR2 is contiguous to (or its rec followed by the rec of) the next IR, and not contiguous to (or its rec preceded by the rec of) 
			    # the previous IR: 
			    # choose next element
			    ($allIRindices,$allLTRindices,@LTRpairIndex) = ($consecutiveIRindices,$consecutiveLTRindices,@consecutiveLTRpairIndex);
			    @IRindex = @nextIR;
			  }
			elsif (!( consecutive($lastRec2, $firstNextIRrec, $record_, $parameter_) || $firstNextIRrec == $lastRec2 + 1 )
			       &&
				( consecutive($lastIRrec,$firstRec2,$record_,$parameter_) )#|| $lastIRrec == $firstRec2 - 1 )
			      )
			  { # LTR2 is contiguous to (or its rec preceded by the rec of) the previous IR, and not contiguous to (or its rec followed by the rec of) 
			    # the next IR:
			    # choose current element.
			    # Remove any rearrangement flags from recursive call
			    removeRearrangementFlags( $Irecord_, $record_, split("-", $consecutiveIRindices) );
			    removeRearrangementFlags( $LTRrecord_, $record_, split("-", $consecutiveLTRindices) );
			  }
			else
			  {
			    # flag both previous and consecutive "elements" as a region of possible DNA rearrangement
			    my $last = - 1; # will store indices (but here initialized with a value out of range) to weed out duplicate ones
			                    # (using grep) see Glover, Humphreys & Weiss "Perl 5 How To" p.456:

			    my @rearrangementLTRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) } 
			                                sort{$a<=>$b}(@LTR1, @consecutiveLTRindices);

			    $last = -1;
			    my @rearrangementIRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) } 
			                               sort{$a<=>$b}(@IRindex, @fuzzyIindex, split("-",$consecutiveIRindices));
			    flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_);

                            # store all IR indices so far associated with the element (separated by hyphens)
                            $allIRindices = hyphenate(@rearrangementIRindex);
		            $allLTRindices = hyphenate(@rearrangementLTRindex);

			    # pick a pair (just for the sake of uniformity of annotation appearance, as this is a rearrangement other than
			    # transposition) if one IR is contiguous to LTR2 (and there's no deletion in boundary),
			    # and if LTR2 is not contiguous to the other IR
			    @IRindex = sort {$a<=>$b} @IRindex;
			    @nextIR = sort {$a<=>$b} @nextIR;
			    my $IR = contiguityTest($IRindex[-1],$LTRpairIndex[1],$nextIR[0],
						    $Irecord_,$LTRrecord_,$record_,$parameter_);
			    if ($IR)
			      { # one of the IRs is contiguous to LTR2 and the other isn't, choose contiguous pair
				if ( getFirstFragment($IR,$Irecord_) eq $nextIR[0] )
				  {
				    @LTRpairIndex = @consecutiveLTRpairIndex;
				    @IRindex = @nextIR;
				  }
			      }
			    else
			      { # pick the pair with lowest intra-pair LTR divergence from the ref seq (if any)
				my ($divDiff, @LTRdivLowest) = LTRdivTest(@LTRpairIndex,@consecutiveLTRpairIndex,$LTRrecord_,$record_);
				if (@LTRdivLowest = @consecutiveLTRpairIndex)
				  {
				    @LTRpairIndex = @consecutiveLTRpairIndex;
				    @IRindex = @nextIR;
				  }
			      }
			  }
		      }
		    else # no consecutive possibility of another LTR pair
		      {
			# Before returning this element check whether it's immediately followed by an IR of same family and orientation,
			# (and that there's no deletion at the boundary)
			@LTR2 = @{$LTRrecord_->[$LTRpairIndex[1]]->[2]};
			@IRindex = sort {$a<=>$b} @IRindex;
			$lastRec2 = $LTRrecord_->[ $LTR2[-1] ][0];
			$firstRec2 = $LTRrecord_->[ $LTR2[0] ][0];
			my $lastIRrec = $Irecord_->[ $IRindex[-1] ][0];
#			if ( consecutiveINT($lastRec2, $record_, $parameter_) && !consecutive($lastIRrec,$firstRec2,$record_,$parameter_) )
#			  { # LTR2 probably originated with the consecutive internal region
#			    return 0;
#			  }
			if ( consecutiveINT($lastRec2, $record_, $parameter_) )
			  { # flag identified (interrupted) 'complete' element and the next IR as possibly involved in DNA rearrangements
			    # other than transposition
			    my $last = - 1; # will store indices (but here initialized with a value out of range) to weed out duplicate ones
			                    # (using grep) see Glover, Humphreys & Weiss "Perl 5 How To" p.456:
			    my @rearrangementLTRindex = split("-", $allLTRindices);
			    my @rearrangementIRindex = grep { ($last eq $_)? 0 : ($last=$_, 1) }
			                               sort{$a<=>$b}( split("-",$allIRindices), 
								      @{$Irecord_->[$record_->[$lastRec2+1]->{index}]->[2]}
								    );
			    flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_);
			    $allIRindices = hyphenate(@rearrangementIRindex);
			  }
			else
			  { # there is no consecutive IR
			    # flag DNA rearrangement other than transposition if LTR1 is preceded by a contiguous IR
			    my @rearrangementLTRindex = split("-", $allLTRindices);
			    my @rearrangementIRindex = split("-", $allIRindices);
			    flagLTR_IRrearrangement($LTRrecord_,$Irecord_,$record_,\@rearrangementLTRindex,\@rearrangementIRindex,$parameter_) 
			      if ($IRcontiguousLTR1);
			  }
		      }

		    # flag rearrangement of internal region, only, if the possibility of a chimaeric IR
		    # has been flagged and no other rearrangements have been detected
		    flagRearrangement($Irecord_,$record_,$parameter_,@IRindex) 
		      if ( $chimaericIR && !$record_->[ $Irecord_->[ $IRindex[0] ][0] ]->{rearrangement} );

		    return (1, $allIRindices, $allLTRindices, \@IRindex, @LTRpairIndex);
		  }
	}

	return 0;  # LTRs not part of the same element!
}


###################
# Checks if a hit (whose record number is passed as the second arg) interrupts the sequence of another hit (rec no = first arg).

sub interruption {
	# First two args are record numbers of two hits, third argument is a reference to the array of (references to) all records,
	# fourth is the max tolerated discrepancy between sequence boundaries.
	my ($hitRec1, $hitRec2, $record_, $tolerance) = @_;
	my ($refMissing1, $refMissing2);                               # will store number of nuclotides missing from the
	                                                               #   end/beginning of the hit 1/2 (relative to reference seq)
	my $refLength1 = $record_->[$hitRec1]->{refEnd} + $record_->[$hitRec1]->{leftAfterRefEnd};
	my $refLength2 = $record_->[$hitRec2]->{refEnd} + $record_->[$hitRec2]->{leftAfterRefEnd};

	# make sure maximum boundary tolerance is 10% of reference library sequence length
	my $tolerance1 = ( $tolerance > $refLength1/10 )? $refLength1/10 : $tolerance;
	my $tolerance2 = ( $tolerance > $refLength2/10 )? $refLength2/10 : $tolerance;

	my $interruptingCoord;
	my $interruptedCoord;

	if ($hitRec2 > $hitRec1)
	  {
	    $interruptingCoord = $record_->[$hitRec2]->{"chromoStart"};  # chromosome coordinate for start of (putatively) interrupting hit 2
	    $interruptedCoord = $record_->[$hitRec1]->{"chromoEnd"};      # chromosome coordinate for end of (putatively) interrupted hit 1
	    $refMissing1 = ($record_->[$hitRec1]->{"orientation"} eq "C") ?
	                    ($record_->[$hitRec1]->{"refStart"} - 1) : $record_->[$hitRec1]->{"leftAfterRefEnd"};
	    $refMissing2 = ($record_->[$hitRec2]->{"orientation"} eq "C") ?
	                    $record_->[$hitRec2]->{"leftAfterRefEnd"} : ($record_->[$hitRec2]->{"refStart"} - 1);
	  }
	else
	  {
	    $interruptingCoord = $record_->[$hitRec2]->{"chromoEnd"};    # chromo coordinate for the end of (putatively) interrupting hit 2
	    $interruptedCoord = $record_->[$hitRec1]->{"chromoStart"};    # chromo coordinate for the start of (putatively) interrupted hit 1
	    $refMissing1 = ($record_->[$hitRec1]->{"orientation"} eq "C") ?
	                    $record_->[$hitRec1]->{"leftAfterRefEnd"} : ($record_->[$hitRec1]->{"refStart"} - 1);
	    $refMissing2 = ($record_->[$hitRec2]->{"orientation"} eq "C") ?
	                    ($record_->[$hitRec2]->{"refStart"} - 1) : $record_->[$hitRec2]->{"leftAfterRefEnd"};
	  }
	# return TRUE if the putatively interrupting hit end corresponds to the end of the ref seq,
	#  if there is sequence missing (of length at least 2*tolerance) from the putatively interrupted end of the other hit,
	#  and if the two hits are adjacent
	return 1 if ( $refMissing2 <= $tolerance2 && $refMissing1 > 2*$tolerance1 && (abs($interruptingCoord-$interruptedCoord) <= $tolerance) );
	return 0;
}


###################
# Checks whether two hits orderered along the query (whose records are passed as the first two args) are adjacent 
# and of the same family and orientation,
# and whether their neighbouring ends correspond to their respective reference sequence ends

sub consecutive {
	# First two args are the record numbers of the two hits (ordered along the query),
        # second argument is a ref to the array of (refs to) all records,
	# third is a ref to parameter hash (pointing to max tolerated discrepancy between sequence boundaries).
	my($rec1, $rec2, $record_, $parameter_) = @_;
	return 0 if ($rec1 eq -1 || $rec2 eq -1);             # record number out of range

	my $rec1end = $record_->[$rec1]->{"chromoEnd"};       # chromosome coordinate for end of first hit
	my $rec2start = $record_->[$rec2]->{"chromoStart"};   # chromosome coordinate for the start of next hit
	my $tolerance = $parameter_->{"boundaryTolerance"};
	my $refLength1 = $record_->[$rec1]->{refEnd} + $record_->[$rec1]->{leftAfterRefEnd};
	my $refLength2 = $record_->[$rec2]->{refEnd} + $record_->[$rec2]->{leftAfterRefEnd};

	# make sure maximum boundary tolerance is 10% of reference library sequence length
	my $tolerance1 = ( $tolerance > $refLength1/10 )? $refLength1/10 : $tolerance;
	my $tolerance2 = ( $tolerance > $refLength2/10 )? $refLength2/10 : $tolerance;

	my $refMissing1;                                      # will store number of nuclotides missing from the
	                                                      #    end of the first hit (relative to reference seq)
	my $refMissing2;                                      # will store number of nuclotides missing from the
	                                                      #    beginning of the second hit (relative to reference seq)

	# check whether the next record is a hit to internal region of same family and orientation
	if ( nameAndOrientationMatch($rec1, $rec2, $record_,$parameter_) )
	  {
	    $refMissing1 = ($record_->[$rec1]->{"orientation"} eq "C") ?
	                    ($record_->[$rec1]->{"refStart"} - 1) : $record_->[$rec1]->{"leftAfterRefEnd"};
	    $refMissing2 = ($record_->[$rec2]->{"orientation"} eq "C") ?
	                    $record_->[$rec2]->{"leftAfterRefEnd"} : ($record_->[$rec2]->{"refStart"} - 1);

	    # return TRUE if the two hits are contiguous (within tolerance), if the end of hit 1 corresponds to one terminal of ref seq 1,
	    # and if the beginning of hit 2 correponds to one terminal of ref seq 2....
	    # OR
	    # ... if the distance between the neighbouring ends of two hits corresponds to the the length of unmatched ref seq between them
	    # (the last criterion is useful to pick elements whose termini are diverged from the reference)
	    return 1 if ( ( $rec2start - $rec1end <= $tolerance && $refMissing1 + $refMissing2 <= $tolerance1+$tolerance2 ) ||
			  ( abs( $rec2start - $rec1end - ($refMissing1 + $refMissing2) ) <= $tolerance )
			);
	  }
	return 0;
}


###################
# Checks if an LTR sequence (whose record number is passed as the first arg) immediately precedes an internal region of
# the same family and orientation.

sub consecutiveINT {
	# First arg is a record index (immediately after LTR hit), second argument is a ref to the array of (refs to) all records,
	# third is a ref to parameter hash (pointing to max tolerated discrepancy between sequence boundaries).
	my($LTRrec, $record_, $parameter_) = @_;
	return 0 if ( $LTRrec > scalar(@{$record_}) - 2 );  # return 0 if this LTR is the last record in RM annotation
	my $tolerance = $parameter_->{"boundaryTolerance"};
	my $nextRec = $LTRrec + 1;                                  # record number for the hit following LTR
	my $LTRend = $record_->[$LTRrec]->{"chromoEnd"};            # chromosome coordinate for start of LTR sequence
	my $nextRecStart = $record_->[$nextRec]->{"chromoStart"};   # chromosome coordinate for end of previous sequence

	my $refMissingLTR;                                    # will store number of nuclotides missing from the
	                                                      #    end of the LTR hit (relative to reference seq)
	my $refMissingIR;                                     # will store number of nuclotides missing from the
	                                                      #    beginning of the IR hit (relative to reference seq)
	
	# check whether the next record is a hit to an internal region of same family and orientation
	if ( nameAndOrientationMatch($LTRrec, $nextRec, $record_,$parameter_) &&
	     INTname($record_->[$nextRec]->{"refName"}, $record_->[$nextRec]->{superfamily})
	   )
	  {
	    $refMissingLTR = ($record_->[$LTRrec]->{"orientation"} eq "C") ?
	                     ($record_->[$LTRrec]->{"refStart"} - 1) : $record_->[$LTRrec]->{"leftAfterRefEnd"};
	    $refMissingIR = ($record_->[$nextRec]->{"orientation"} eq "C") ?
	                    $record_->[$nextRec]->{"leftAfterRefEnd"} : ($record_->[$nextRec]->{"refStart"} - 1);

	    # return TRUE if the two hits are contiguous (within tolerance) and their neighbouring ends correspond to
	    # the ends of their respective reference sequences (within tolerance)
	    return 1 if ( ($nextRecStart-$LTRend) <= $tolerance && $refMissingLTR + $refMissingIR < 2*$tolerance );
	  }
	return 0;
}


###################
# Returns TRUE if the two records passed as arguments have the same orientation and name.

sub nameAndOrientationMatch {
  # First two args are the records of two sequences, 3rdarg a reference to the (all) records array,
  # last arg a ref to the parameter hash.
	my($rec1, $rec2, $record_,$parameter_) = @_;

	# first make sure both records correspond to hits in the same query sequence
	return 0 if ($record_->[$rec1]->{"query"} ne $record_->[$rec2]->{"query"});  # LTRs not in the same query sequence!

	my $name1 = $record_->[$rec1]->{family};
	my $name2 = $record_->[$rec2]->{family};
	my $orient1 = $record_->[$rec1]->{"orientation"};
	my $orient2 = $record_->[$rec2]->{"orientation"};

	# return TRUE if same element family and orientation
	return 1 if ( $name1 eq $name2 && $orient1 eq $orient2 );
	return 0;  # otherwise
}


###################
# Returns INPUT ERROR message.

sub printINPUT_ERROR {
  return "*                                                                      *
*********************   REannotate   ***********************************
*                                                                      *
*  ERROR: query sequence names in files                                *
*         < $ARGV[0] >
*         and                                                          *
*         < $ARGV[1] >
*         do not seem to correspond.                                   *
*                                                                      *
*  Exiting NOW.                                                        *
*                                                                      *
************************************************************************

";
}


###################
# Return framed input message.

sub messageOUT {
  my ($message) = @_;

  my $messageOUT = "

*********************   REannotate
*
*";

  # split message into lines
  my @messageLine = split(/\n/,$message);
  foreach my $line (@messageLine)
    {
      $messageOUT .= "\n*     ".$line;
    }
  $messageOUT .= "\n\n";

  return $messageOUT;
}


###################
# Takes the name of a repeat annotated by RepeatMasker
# and checks whether it corresponds to the LTR
# of an LTR-element, which could be marked by
# one of the suffixes
# { '-LTR', '_LTR'};
# the superfamily of an LTR-element should
# indicate so.
# Returns 0 if this name does not correspond to
# an LTR-element or if it corresponds to the internal
# region of an LTR-element.
# Otherwise it returns the repeat family name WITHOUT any suffixes.

sub LTRname
  {
    my ($name, $superfamily) = @_;
    if( $superfamily =~ /LTR/i && $name !~ /[_-](I|int)$/i )
      {
	$name =~ s/[_-]LTR$//i;
	return $name;
      }
    return 0;
  }


###################
# Takes the name of a repeat annotated by RepeatMasker
# and checks whether it corresponds to the internal region
# of an LTR-element, which should be marked by
# one of the suffixes
# { '-int', '_int', '_I', '-I' } .
# Returns 0 if no such suffix is found,
# or the repeat name WITHOUT the suffix otherwise.

sub INTname
  {
    my ($name, $superfamily) = @_;
    if ( $superfamily =~ /LTR/i && $name =~ /^(.+)[_-](I|int)$/i )
      { # this is the name of an LTR-element internal region
	my $family = $1;
	$family =~ s/([_-]LTR)$//i; # remove LTR suffix if there is one
	return $family;
      }
    return 0;
  }