#! /usr/bin/perl -w # Released under the terms of the BiO Licence. # http://biomatics.kaist.ac.kr/Research/Biolicense/ # # Script uses the SCOP database [1] and its associated parsable files [2]. # # [1] Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995). # SCOP: a structural classification of proteins database for # the investigation of sequences and structures. # J. Mol. Biol. 247, 536-540 # http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf # # [2] Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002). # SCOP database in 2002: refinements accommodate structural genomics. # Nucl. Acid Res. 30(1), 264-267. # http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf # use 5.005; use strict; use Getopt::Long; my $PDB = ''; my $DES = '/project/StruPPi/BiO/DBd/SCOP/def/dir.des.scop.txt_1.71'; GetOptions ( "pdb=s" => \$PDB, "des=s" => \$DES, ); die( "BAD PDB\n". &usage ) unless $PDB =~ /^\d\w{3}$/; die( "BAD DES\n". &usage ) unless -s $DES; $PDB = lc($PDB); # Is there another way? my @color = qw( Red Green Blue Cyan Magenta Yellow Orange SeaGreen Pink SkyBlue Gold Brown HotPink Purple BlueTint Grey GreenBlue PinkTint GreenTint RedOrange Violet YellowTint ); # Get domain data. open( DES, "<$DES" ) or die "$DES:$!\n"; # List of domains in the pdb given my @domain; while(){ if ( /^\d{5}\tpx\t.+$PDB /o ){ my ( $sunid, undef, $sccs, $sid, $pdb, $domainDef ) = split( /\t|\s/, $_ ); #print join( "\t", split( /\t|\s/, $_ ) ), "\n"; # Format must match below... push @domain, [ $pdb, $sunid, $sid, $sccs, $domainDef ]; } } die "$PDB NOT FOUND IN $DES\n" unless @domain; my $time = localtime(); print " load inline echo pdbHighlight(SCOP): $PDB\t$time echo Script Author: DB echo echo SCOP echo Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995). echo SCOP: a structural classification of proteins database for echo the investigation of sequences and structures. echo J. Mol. Biol. 247, 536-540 echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf echo echo PARSED echo Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002). echo SCOP database in 2002: refinements accommodate structural genomics. echo Nucl. Acid Res. 30(1), 264-267. echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf echo echo \n\n show info "; my %sccs; # Holders for my %color; # multi groups. # For each domain for (my $i=0; $i<@domain; $i++){ my ( $pdb, $sunid, $sid, $sccs, $domainDef ) = @{$domain[$i]}; # Select a color... my $color = $color[$i%@color]; # Format SCCS for rasmol my $sccs_name = $sccs; $sccs_name =~ tr/\./_/; # Format display text my $details = sprintf( "%-12s %-5s %-5s %-9s %-15s %-15s %-8s\n", ("DOMAIN($i):", $pdb, $sunid, $sid, $sccs_name, $domainDef, $color) ); # Convert SCOP domain definition into rasmol format. my $selectDomain = &scop2rasmol( $domainDef ); # Treet groups explicitly! push @{$sccs{$sccs_name}}, "no$i"; push @{$color{$color}}, "no$i"; # Do the main print " echo $details select $selectDomain # Select domain color $color # Color domain. define px$sunid selected # Define alias... define No$i selected # ... # select within(5.0,selected) # Select domain contacts! # define surf_px$sunid selected # Define alias... # define surf_No$i selected # ... select !* # Reset selection. \n"; } foreach (keys %sccs){ my $select = join( ",", @{$sccs{$_}} ); print " echo Selecting $_ select $select define $_ selected "; } foreach (keys %color){ my $select = join( ",", @{$color{$_}} ); print " #echo Selecting x$_ select $select define x$_ selected "; } print " select * \n echo echo Each domain is defined as... echo px00000 (scop sunid) echo a_1_1_1_1 (scop sccs) echo xCOLOR (COLOR = rasmol color, eg xRed) echo No1 (n = domain number, eg No1) echo \n exit # These newlines are important! \n\n"; # But I don't know why. warn "OK\n"; sub scop2rasmol { my $scopDomainDefinition = shift; my @rasmolSelect; # Split the domain definition into components. foreach ( split(/,/, $scopDomainDefinition) ){ if ( /^(.{1}):$/o ){ push @rasmolSelect, ":$1" } # X: elsif ( /^((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$1-$2:" } # 40-90 elsif ( /^(.{1}):((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$2-$3:$1" } # X:40-90 elsif ( /^-$/o ){ push @rasmolSelect, "*" } # - else { die "$_:BAD DOMAIN DEF!\n" } } my $rasmolSelectString = join(",", @rasmolSelect); return $rasmolSelectString; } sub usage{ warn <<"EOS"; Create a rasmol script for defining the SCOP domain structure of a PDB. USAGE: $0 -pdb -des PDB : $PDB DES : $DES EOS }