#!/usr/bin/perl -w

#######################################################################
#######################################################################
#  Copyright 2008 Roney S. Coimbra

#  This file is part of genealiases

#  genealiases is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License.
#  genealiases is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along with genealiases (file: COPYING).  If not, see <http://www.gnu.org/licenses/>.

#######################################################################
#######################################################################


my $input = $ARGV[0];


my $entry_alias;
my $entry_unrelated;

my $canonical_name;

my @aliases = ();
my @unrelated = ();

my $short_input = substr ($input, 0, -4);

open (OUTPUT, "> $short_input.res");

print OUTPUT "LOCUS_LINK_NAME\tALIAS_OR_UNRELATED\tJACCARD\tVOCABULARY\tTYPE\tCLASS\n";

open (INPUT, "< $input") || die "can't open input file";

while (<INPUT>){
  next if (/^\s$/);  if (! /CANONICAL_NAME/){
    my $add1;
    my $add2;

    chomp;

    my @line = split "\t";

    if ($line[4] eq $line[0]){

      $add1 = "canonical";

    } elsif (($line[4] eq $line[1])||($line[4] eq $line[2])||($line[4] eq $line[3])){

      $add1 = "unrelated";

    } else {

      $add1 = "alias";
	
	  }
    if (($line[5] eq $line[1])||($line[5] eq $line[2])||($line[5] eq $line[3])) {
      $add2 = "unrelated";

    } else {

      $add2 = "alias";

    }


    # retrieve only comparisons between canonical and alias, or canonical and unrelated
    if (($add1 eq "canonical")&&($add2 eq "alias")){
      $entry_alias = "$line[4]\t$line[5]\t$line[6]\t$line[7]\t"."$add1"."_vs_"."$add2";
      $entry_unrelated = "";
    } elsif (($add1 eq "canonical")&&($add2 eq "unrelated")){
      $entry_unrelated = "$line[4]\t$line[5]\t$line[6]\t$line[7]\t"."$add1"."_vs_"."$add2";
      $entry_alias = "";
    }

    if (($entry_alias)||($entry_unrelated)){


      if (! $canonical_name){
	
	# BEGINING OF THE FIRST GENE GROUP
	
	if ($entry_alias){
	  @aliases = ($entry_alias);
	  @unrelated = ();
	} elsif ($entry_unrelated){
	  @unrelated = ($entry_unrelated);
	  @aliases = ();
	}
	$canonical_name = $line[0];
	
	
      } elsif (($canonical_name) && ($canonical_name eq $line[4])){
	# SAME GENE GROUP
	
	if ($entry_alias){
	  push @aliases , $entry_alias;

	} elsif ($entry_unrelated){
	  push @unrelated , $entry_unrelated;
	}

	
      } elsif (($canonical_name) && ($canonical_name ne $line[4])){
	# END OF A GENE GROUP

	if ((@aliases > 0) && (@unrelated > 0)){

	  my $distance = 1;
	  my $final_string;
	
	  for ($i = 0; $i < @unrelated; $i++){
	    my $control1 = $unrelated[$i];
	    my @columns1 = split "\t", $control1;

	    if ($columns1[2] <= $distance){
	      $final_string = $unrelated[$i];
	      $distance = $columns1[2];
	    }
	  }
	  print OUTPUT "$final_string\tunrelated\n";
	
	  for ($i = 0; $i < @aliases; $i++){
	
	    my $control2 = $aliases[$i];
	    my @columns2 = split "\t", $control2;
	
	    if ($columns2[2] >= $distance){
	      print OUTPUT "$aliases[$i]\tambiguous\n";
	
	    } else {
	      print OUTPUT "$aliases[$i]\tsynonym\n";
	    }
	  }
	
	  if ($entry_alias){
	    @aliases = ($entry_alias);
	    @unrelated = ();
	  } elsif ($entry_unrelated){
	    @unrelated = ($entry_unrelated);
	    @aliases = ();
	  }

	  $canonical_name = $line[0]; # reinitialize $canonical_name
	
	} else {

	  if ($entry_alias){
	    @aliases = ($entry_alias);
	    @unrelated = ();
	  } elsif ($entry_unrelated){
	    @unrelated = ($entry_unrelated);
	    @aliases = ();
	  }

	  $canonical_name = $line[0]; # reinitialize $canonical_name
	}
      }
    }
  }
}


if ((@aliases > 0) && (@unrelated > 0)){

  my $distance = 1;
  my $final_string;

  for ($i = 0; $i < @unrelated; $i++){
    my $control1 = $unrelated[$i];
    my @columns1 = split "\t", $control1;

    if ($columns1[2] <= $distance){

      $final_string = $unrelated[$i];
      $distance = $columns1[2];
    }
  }
  print OUTPUT "$final_string\tunrelated\n";


  for ($i = 0; $i < @aliases; $i++){

    my $control2 = $aliases[$i];
    my @columns2 = split "\t", $control2;

    if ($columns2[2] >= $distance){
      print OUTPUT "$aliases[$i]\tambiguous\n";

    } else {
      print OUTPUT "$aliases[$i]\tsynonym\n";
    }
  }
}
