ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/pdbHighlight/scopHighlight_using_flat_file.plx
Revision: 1.3
Committed: Tue Apr 10 09:16:48 2007 UTC (9 years, 2 months ago) by dmb
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +58 -55 lines
Log Message:
Slug fix.

Don't forget to visit http://www.bioinformatics.org/pdbHighlight/

Line File contents
1 #! /usr/bin/perl -w
2
3 # Released under the terms of the BiO Licence.
4 # http://biomatics.kaist.ac.kr/Research/Biolicense/
5 #
6 # Script uses the SCOP database [1] and its associated parsable files [2].
7 #
8 # [1] Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995).
9 # SCOP: a structural classification of proteins database for
10 # the investigation of sequences and structures.
11 # J. Mol. Biol. 247, 536-540
12 # http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf
13 #
14 # [2] Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002).
15 # SCOP database in 2002: refinements accommodate structural genomics.
16 # Nucl. Acid Res. 30(1), 264-267.
17 # http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf
18 #
19
20 use 5.005;
21 use strict;
22 use Getopt::Long;
23
24 my $PDB = '';
25 my $DES = '/project/StruPPi/BiO/DBd/SCOP/def/dir.des.scop.txt_1.71';
26
27 GetOptions (
28 "pdb=s" => \$PDB,
29 "des=s" => \$DES,
30 );
31
32 die( "BAD PDB\n". &usage ) unless $PDB =~ /^\d\w{3}$/;
33 die( "BAD DES\n". &usage ) unless -s $DES;
34
35 $PDB = lc($PDB);
36
37 # Is there another way?
38 my @color = qw(
39 Red
40 Green
41 Blue
42 Cyan
43 Magenta
44 Yellow
45 Orange
46 SeaGreen
47 Pink
48 SkyBlue
49 Gold
50 Brown
51 HotPink
52 Purple
53 BlueTint
54 Grey
55 GreenBlue
56 PinkTint
57 GreenTint
58 RedOrange
59 Violet
60 YellowTint
61 );
62
63
64 # Get domain data.
65 open( DES, "<$DES" ) or die "$DES:$!\n";
66
67 # List of domains in the pdb given
68 my @domain;
69
70 while(<DES>){
71 if ( /^\d{5}\tpx\t.+$PDB /o ){
72 my (
73 $sunid,
74 undef,
75 $sccs,
76 $sid,
77 $pdb,
78 $domainDef ) = split( /\t|\s/, $_ );
79
80 #print join( "\t", split( /\t|\s/, $_ ) ), "\n";
81
82 # Format must match below...
83 push @domain, [ $pdb, $sunid, $sid, $sccs, $domainDef ];
84 }
85 }
86 die "$PDB NOT FOUND IN $DES\n" unless @domain;
87
88
89 my $time = localtime();
90
91 print "
92 load inline
93 echo pdbHighlight(SCOP): $PDB\t$time
94 echo Script Author: DB
95 echo
96 echo SCOP
97 echo Murzin A. G., Brenner S. E., Hubbard T., Chothia C. (1995).
98 echo SCOP: a structural classification of proteins database for
99 echo the investigation of sequences and structures.
100 echo J. Mol. Biol. 247, 536-540
101 echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/1995-jmb-scop.pdf
102 echo
103 echo PARSED
104 echo Lo Conte L., Brenner S. E., Hubbard T.J.P., Chothia C., Murzin A. (2002).
105 echo SCOP database in 2002: refinements accommodate structural genomics.
106 echo Nucl. Acid Res. 30(1), 264-267.
107 echo http://scop.mrc-lmb.cam.ac.uk/scop/ref/nar2002.pdf
108 echo
109 echo \n\n
110 show info
111 ";
112
113 my %sccs; # Holders for
114 my %color; # multi groups.
115
116 # For each domain
117
118 for (my $i=0; $i<@domain; $i++){
119
120 my (
121 $pdb,
122 $sunid,
123 $sid,
124 $sccs,
125 $domainDef
126 ) = @{$domain[$i]};
127
128 # Select a color...
129 my $color = $color[$i%@color];
130
131 # Format SCCS for rasmol
132 my $sccs_name = $sccs;
133 $sccs_name =~ tr/\./_/;
134
135 # Format display text
136 my $details =
137 sprintf(
138 "%-12s %-5s %-5s %-9s %-15s %-15s %-8s\n",
139 ("DOMAIN($i):", $pdb, $sunid, $sid, $sccs_name, $domainDef, $color)
140 );
141
142 # Convert SCOP domain definition into rasmol format.
143 my $selectDomain
144 = &scop2rasmol( $domainDef );
145
146 # Treet groups explicitly!
147
148 push @{$sccs{$sccs_name}}, "no$i";
149 push @{$color{$color}}, "no$i";
150
151 # Do the main
152
153 print "
154 echo $details
155
156 select $selectDomain # Select domain
157 color $color # Color domain.
158
159 define px$sunid selected # Define alias...
160 define No$i selected # ...
161
162 # select within(5.0,selected) # Select domain contacts!
163
164 # define surf_px$sunid selected # Define alias...
165 # define surf_No$i selected # ...
166
167 select !* # Reset selection.
168 \n";
169 }
170
171
172 foreach (keys %sccs){
173
174 my $select = join( ",", @{$sccs{$_}} );
175
176 print "
177 echo Selecting $_
178 select $select
179 define $_ selected
180 ";
181 }
182
183 foreach (keys %color){
184
185 my $select = join( ",", @{$color{$_}} );
186
187 print "
188 #echo Selecting x$_
189 select $select
190 define x$_ selected
191 ";
192 }
193
194
195 print "
196 select *
197 \n
198 echo
199 echo Each domain is defined as...
200 echo px00000 (scop sunid)
201 echo a_1_1_1_1 (scop sccs)
202 echo xCOLOR (COLOR = rasmol color, eg xRed)
203 echo No1 (n = domain number, eg No1)
204 echo
205 \n
206 exit # These newlines are important!
207 \n\n"; # But I don't know why.
208
209 warn "OK\n";
210
211
212
213
214 sub scop2rasmol {
215 my $scopDomainDefinition = shift;
216 my @rasmolSelect;
217
218 # Split the domain definition into components.
219
220 foreach ( split(/,/, $scopDomainDefinition) ){
221
222 if ( /^(.{1}):$/o ){ push @rasmolSelect, ":$1" } # X:
223 elsif ( /^((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$1-$2:" } # 40-90
224 elsif ( /^(.{1}):((?:-|)\d+)(?:\D|)-((?:-|)\d+)(?:\D|)$/o ){ push @rasmolSelect, "$2-$3:$1" } # X:40-90
225 elsif ( /^-$/o ){ push @rasmolSelect, "*" } # -
226 else {
227 die "$_:BAD DOMAIN DEF!\n"
228 }
229 }
230 my $rasmolSelectString = join(",", @rasmolSelect);
231
232 return $rasmolSelectString;
233 }
234
235 sub usage{
236 warn <<"EOS";
237
238 Create a rasmol script for defining the SCOP domain structure of a PDB.
239
240 USAGE:
241 $0 -pdb <PDB code> -des <dir.des.scop.txt file>
242
243 PDB : $PDB
244 DES : $DES
245
246 EOS
247 }