[OpenMMS] TURN table issue in pdbase
zia mohades
zia.simab at gmail.com
Wed Jan 7 23:06:46 EST 2009
Hi everyone,
To follow up on my previous email, I am sending the script I used, the
structure of my table, as well as the querry I use to search in my new
table:
The Script used to create the Table Turn:
USE PdBase;
###############This is the main script which creates the _TURN
table#############
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURNS_tmp_CAN`;
CREATE TEMPORARY TABLE `mohaddez_HE`.`_TURNS_tmp_CAN`
(
SELECT MMS_ENTRY.id AS pdb,
STRUCT_CONF.struct_conf_key AS id,
STRUCT_CONF.id AS name,
ENTITY_POLY.pdbx_seq_one_letter_code_can AS sequence,
MMS_ENTRY.entry_key,
STRUCT_CONF.beg_label_seq_id AS begin,
STRUCT_CONF.end_label_seq_id AS end,
STRUCT_CONF.end_label_seq_id-STRUCT_CONF.beg_label_seq_id+1 AS length,
STRUCT_CONF.beg_auth_comp_id AS first_residu,
STRUCT_CONF.beg_label_asym_id AS chain
FROM ENTITY_POLY, STRUCT_CONF, MMS_ENTRY, STRUCT_ASYM
WHERE
MMS_ENTRY.entry_key = ENTITY_POLY.entry_key AND
MMS_ENTRY.entry_key = STRUCT_CONF.entry_key AND
MMS_ENTRY.entry_key = STRUCT_ASYM.entry_key AND
STRUCT_CONF.conf_type_id = 'TURN_P' AND
STRUCT_ASYM.id = STRUCT_CONF.beg_label_asym_id AND
STRUCT_ASYM.entity_id = ENTITY_POLY.entity_id
);
###########The rest of the script makes modification to the original
script########################
UPDATE mohaddez_HE._TURNS_tmp_CAN SET Sequence = REPLACE(Sequence,'\n','');
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED`;
##############################################################################
######create the new table TURNS_tmp_CAN_RESTRICTED#####
Sequence = between Begin and ENd
####################################################################################
CREATE TABLE `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED`
(
SELECT mohaddez_HE._TURNS_tmp_CAN.pdb,
mohaddez_HE._TURNS_tmp_CAN.id,
mohaddez_HE._TURNS_tmp_CAN.name,
SUBSTRING( mohaddez_HE._TURNS_tmp_CAN.sequence,
mohaddez_HE._TURNS_tmp_CAN.begin,
mohaddez_HE._TURNS_tmp_CAN.end -
mohaddez_HE._TURNS_tmp_CAN.begin + 1
) AS sequence,
mohaddez_HE._TURNS_tmp_CAN.entry_key,
mohaddez_HE._TURNS_tmp_CAN.begin,
mohaddez_HE._TURNS_tmp_CAN.end,
mohaddez_HE._TURNS_tmp_CAN.length,
mohaddez_HE._TURNS_tmp_CAN.first_residu,
mohaddez_HE._TURNS_tmp_CAN.chain
FROM mohaddez_HE._TURNS_tmp_CAN
);
############################################################################################
DROP TABLE `mohaddez_HE`.`_TURNS_tmp_CAN`;
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN`;
RENAME TABLE `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED` TO
`mohaddez_HE`.`_TURN`;
####################################################################################
######changing the types of the fields#####
####################################################################################
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `begin` `begin` SMALLINT NULL
DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `end` `end` SMALLINT NULL DEFAULT
NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `length` `length` SMALLINT NULL
DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `pdb` `pdb` VARCHAR( 4 ) CHARACTER
SET ascii COLLATE ascii_general_ci NOT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `name` `name` VARCHAR( 11 )
CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `first_residu` `first_residu`
VARCHAR( 3 ) CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `sequence` `sequence` LONGTEXT
CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL ;
ALTER TABLE `mohaddez_HE`.`_TURN` ADD `is_in_surface` BINARY DEFAULT '0' NOT
NULL ;
ALTER TABLE `mohaddez_HE`.`_TURN` ADD `distance_to_surface` FLOAT NOT NULL
DEFAULT '0';
######################
##############################################################
###################order the table by pdb#########################
CREATE TEMPORARY TABLE `mohaddez_HE`.`_TURN_PDB_tmp` AS
SELECT sequence, GROUP_CONCAT( DISTINCT (pdb) ) AS pdbs, count(
* ) AS pdbscount
FROM mohaddez_HE._TURN
GROUP BY sequence;
ALTER TABLE `mohaddez_HE`.`_TURN_PDB_tmp` ADD INDEX ( `sequence` ( 10 ) );
##########################################################
#######
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN_WITH_PDBS_tmp`;
CREATE TABLE `mohaddez_HE`.`_TURN_WITH_PDBS_tmp`
AS SELECT mohaddez_HE._TURN.*, mohaddez_HE._TURN_PDB_tmp.pdbs,
mohaddez_HE._TURN_PDB_tmp.pdbscount AS pdbcount
FROM mohaddez_HE._TURN, mohaddez_HE._TURN_PDB_tmp
WHERE mohaddez_HE._TURN.sequence=mohaddez_HE._TURN_PDB_tmp.sequence;
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN`;
RENAME TABLE `mohaddez_HE`.`_TURN_WITH_PDBS_tmp` TO `mohaddez_HE`.`_TURN`;
###############################################################################################################################
My Turn Table looks like this:
+---------------------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------------------+--------------+------+-----+---------+-------+
| pdb | varchar(4) | | | | |
| id | int(11) | | PRI | 0 | |
| name | varchar(11) | YES | | NULL | |
| sequence | longtext | YES | | NULL | |
| entry_key | int(11) | | | 0 | |
| begin | smallint(6) | YES | | NULL | |
| end | smallint(6) | YES | | NULL | |
| length | smallint(6) | YES | | NULL | |
| first_residu | char(3) | YES | | NULL | |
| chain | varchar(255) | YES | | NULL | |
| is_in_surface | binary(1) | | | 0 | |
| distance_to_surface | float | | | 0 | |
| pdbs | longtext | YES | | NULL | |
| pdbcount | bigint(21) | | | 0 | |
+---------------------+--------------+------+-----+---------+-------+
Th Querry:
select pdb,id, name, sequence, entry_key, begin, end, length,first_residue,
chain from _TURN where pdb ="2pab";
The result:
+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+
| pdb | id | name | sequence | entry_key | begin | end | length |
first_residu | chain |
+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+
| 2pab | 609473 | TURN_P1 | SESG | 30320 | 50 | 53 | 4 |
SER | A |
| 2pab | 609475 | TURN_P2 | SESG | 30320 | 50 | 53 | 4 |
SER | C |
+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+
However after I check the pdb site mysql, I have noticed that there are
definitely more than 1 turn there:
http://www.rcsb.org/pdb/explore/remediatedSequence.do?structureId=2PAB. I
have found other pdbs will similar problem
Also from the result above it can be observed that both rows are the
identical, however this is normal , since there are 2 chains.
I definitely appreciate your help with this, since all my work will be based
on this table. I will also investigate the _BETA table to see if I do find
similar inconsistencies.
Thank you for your time!
best regards,
zia
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.bioinformatics.org/pipermail/openmmsusers-general/attachments/20090107/707a1d89/attachment.html>
More information about the OpenMMSusers-general
mailing list