#!/usr/bin/perl

# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#


use strict;

my $samecnt=0, my $fcnt=0, my $scnt=0;
my $count=0;

if(@ARGV < 2)
{
	print "Insufficient number of arguments to $0\n";
	exit 1;
}
my $data1 = $ARGV[0];
my $data2 = $ARGV[1];
print "Cannot find $data1\n" and exit 1 if(! -f "$data1" );
print "Cannot find $data2\n" and exit 1 if(! -f "$data2" );

my $res=`grep -v '^*' $data1 | awk '{print \$11}'  | sort -b | uniq > d1.tmp`;
print "Error : $!\n" and exit 1 if( $res);

$res=`grep -v '^*' $data2 | awk '{print \$11}'  | sort -b | uniq > d2.tmp`;
print "Error : $!\n" and exit 1 if( $res);
open( d1, "d1.tmp") || die "cannot open temporary file d1.tmp\n";
open( d2, "d2.tmp") || die "cannot open temporary file d2.tmp\n";

my $w1 = <d1>;	my $w2 = <d2>;
my $cnteq = 0, my $cnt1 =0, my $cnt2 = 0;
while ( (defined $w1) and (defined $w2) )
{
	$w1 = trim($w1);	$w2 = trim($w2);
	if( $w1 eq $w2 )
	{
		$cnteq++;
		$w1 = <d1>;	$w2 = <d2>;
	}
	elsif( $w1 lt $w2)
	{
		$cnt1++;
		$w1=<d1>;
	}
	else
	{
		$cnt2++;
		$w2=<d2>;
	}
}#while

while (defined $w1)
{
	$cnt1++;
	$w1=<d1>;
}

while (defined $w2)
{
	$cnt2++;
	$w2=<d2>;
}
$samecnt += $cnteq;
$fcnt += $cnt1;
$scnt += $cnt2;
close 'd1'; unlink 'd1.tmp';
close 'd2'; unlink 'd2.tmp';

print "genes in file1 : $cnt1\n";
print "genes in file2 : $cnt2\n";
print "common genes : $cnteq\n";

#trim the whitespaces at the start and beginning of a line
sub trim()
{
	my $ret = shift;	
	$ret =~ s/^\s*//;	#remove leading whitespaces
	$ret =~ s/\s*$//;	#remove trailing whitespaces
	return $ret;
}
