#! /usr/bin/perl -w

# Released under the terms of the BiO Licence.
# http://biomatics.kaist.ac.kr/Research/Biolicense/
# 
# Script uses the SCOP database [1] and its associated parsable files [2].
# 
# [1] Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995).
#     SCOP: a structural classification of proteins database for 
#     the investigation of sequences and structures.
#     J. Mol. Biol. 247, 536-540
#     http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf
# 
# [2] Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002).
#     SCOP database in 2002: refinements accommodate structural genomics.
#     Nucl. Acid Res.  30(1), 264-267.
#     http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf
#

use 5.005;
use strict;
use Getopt::Long;

my $PDB = '';
my $DES = '/project/StruPPi/BiO/DBd/SCOP/def/dir.des.scop.txt_1.71';

GetOptions (
  "pdb=s"	=> \$PDB,
  "des=s"	=> \$DES,
);

die( "BAD PDB\n". &usage ) unless $PDB =~ /^\d\w{3}$/;
die( "BAD DES\n". &usage ) unless -s $DES;

$PDB = lc($PDB);

# Is there another way?
my @color = qw(
  Red
  Green
  Blue
  Cyan
  Magenta
  Yellow
  Orange
  SeaGreen
  Pink
  SkyBlue
  Gold
  Brown
  HotPink
  Purple
  BlueTint
  Grey
  GreenBlue
  PinkTint
  GreenTint
  RedOrange
  Violet
  YellowTint
);


# Get domain data.
open( DES, "<$DES" ) or die "$DES:$!\n";

# List of domains in the pdb given
my @domain;

while(<DES>){
  if ( /^\d{5}\tpx\t.+$PDB /o ){
    my (
      $sunid,
      undef,
      $sccs,
      $sid,
      $pdb,
      $domainDef ) = split( /\t|\s/, $_ );
    
    #print join( "\t", split( /\t|\s/, $_ ) ), "\n";
    
    # Format must match below...
    push @domain, [ $pdb, $sunid, $sid, $sccs, $domainDef ];
  }
}
die "$PDB NOT FOUND IN $DES\n" unless @domain;


my $time = localtime();

print "
  load inline
  echo pdbHighlight(SCOP): $PDB\t$time
  echo Script Author:      DB
  echo 
  echo SCOP
  echo Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995).
  echo SCOP: a structural classification of proteins database for
  echo the investigation of sequences and structures.
  echo J. Mol. Biol. 247, 536-540
  echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf
  echo 
  echo PARSED
  echo Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002).
  echo SCOP database in 2002: refinements accommodate structural genomics.
  echo Nucl. Acid Res.  30(1), 264-267.
  echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf
  echo 
  echo \n\n
  show info
";

my %sccs;	# Holders for
my %color;	# multi groups.

# For each domain

for (my $i=0; $i<@domain; $i++){
  
  my (
      $pdb,
      $sunid,
      $sid,
      $sccs,
      $domainDef
     ) = @{$domain[$i]};
  
  # Select a color...
  my $color = $color[$i%@color];
  
  # Format SCCS for rasmol
  my $sccs_name = $sccs;
     $sccs_name =~ tr/\./_/;
  
  # Format display text
  my $details =
    sprintf(
	    "%-12s %-5s %-5s %-9s %-15s %-15s %-8s\n",
	    ("DOMAIN($i):", $pdb, $sunid, $sid, $sccs_name, $domainDef, $color)
	   );
  
  # Convert SCOP domain definition into rasmol format.
  my $selectDomain
     = &scop2rasmol( $domainDef );
  
  # Treet groups explicitly!
  
  push @{$sccs{$sccs_name}}, "no$i";
  push @{$color{$color}},    "no$i";
  
  # Do the main
  
  print "
    echo $details
    
    select $selectDomain				# Select domain
    color  $color					# Color domain.
    
    define px$sunid		 selected		# Define alias...
    define No$i			 selected		# ...
    
#    select within(5.0,selected)			# Select domain contacts!
    
#    define surf_px$sunid	 selected		# Define alias...
#    define surf_No$i		 selected		# ...
    
    select !*						# Reset selection.
    \n";
}


foreach (keys %sccs){
  
  my $select = join( ",", @{$sccs{$_}} );
  
  print "
    echo Selecting $_
    select $select
    define $_ selected
  ";
}

foreach (keys %color){
  
  my $select = join( ",", @{$color{$_}} );
  
  print "
    #echo Selecting x$_
    select $select
    define x$_ selected
  ";
}


print "
  select *
  \n
  echo 
  echo Each domain is defined as...
  echo px00000		(scop sunid)
  echo a_1_1_1_1	(scop sccs)
  echo xCOLOR	 	(COLOR = rasmol color, eg xRed)
  echo No1		(n = domain number, eg No1)
  echo 
  \n
  exit					# These newlines are important!
  \n\n";				# But I don't know why.

warn "OK\n";




sub scop2rasmol {
  my $scopDomainDefinition = shift;
  my @rasmolSelect;
  
  # Split the domain definition into components.
  
  foreach ( split(/,/, $scopDomainDefinition) ){
    
    if    ( /^(.{1}):$/o                                      ){ push @rasmolSelect,      ":$1" }	# X:
    elsif (        /^((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$1-$2:"   }	#   40-90
    elsif ( /^(.{1}):((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$2-$3:$1" }	# X:40-90
    elsif ( /^-$/o                                            ){ push @rasmolSelect, "*"        }	# -
    else  {
      die "$_:BAD DOMAIN DEF!\n"
    }
  }
  my $rasmolSelectString = join(",", @rasmolSelect);
  
  return $rasmolSelectString;
}

sub usage{
  warn <<"EOS";
  
  Create a rasmol script for defining the SCOP domain structure of a PDB.

USAGE:
  $0 -pdb <PDB code> -des <dir.des.scop.txt file>
  
  PDB : $PDB
  DES : $DES
  
EOS
}

