[OpenMMS] TURN table issue in pdbase

zia mohades zia.simab at gmail.com
Wed Jan 7 23:06:46 EST 2009


Hi everyone,

To follow up on my previous email, I am sending the script I used, the
structure of my table, as well as the querry I use to search in my new
table:

The Script used to create the Table Turn:


USE PdBase;

###############This is the main script which creates the _TURN
table#############

DROP TABLE IF EXISTS `mohaddez_HE`.`_TURNS_tmp_CAN`;

CREATE TEMPORARY TABLE `mohaddez_HE`.`_TURNS_tmp_CAN`
(
SELECT     MMS_ENTRY.id AS pdb,
    STRUCT_CONF.struct_conf_key AS id,
    STRUCT_CONF.id AS name,
    ENTITY_POLY.pdbx_seq_one_letter_code_can AS sequence,
    MMS_ENTRY.entry_key,
    STRUCT_CONF.beg_label_seq_id AS begin,
    STRUCT_CONF.end_label_seq_id AS end,
    STRUCT_CONF.end_label_seq_id-STRUCT_CONF.beg_label_seq_id+1 AS length,
    STRUCT_CONF.beg_auth_comp_id AS first_residu,
    STRUCT_CONF.beg_label_asym_id AS chain
FROM     ENTITY_POLY, STRUCT_CONF, MMS_ENTRY, STRUCT_ASYM
WHERE
    MMS_ENTRY.entry_key = ENTITY_POLY.entry_key AND
    MMS_ENTRY.entry_key = STRUCT_CONF.entry_key AND
    MMS_ENTRY.entry_key = STRUCT_ASYM.entry_key AND
    STRUCT_CONF.conf_type_id = 'TURN_P' AND
    STRUCT_ASYM.id = STRUCT_CONF.beg_label_asym_id AND
    STRUCT_ASYM.entity_id = ENTITY_POLY.entity_id
);

###########The  rest of the script makes modification to the original
script########################

UPDATE mohaddez_HE._TURNS_tmp_CAN SET Sequence = REPLACE(Sequence,'\n','');
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED`;
##############################################################################
######create the new table TURNS_tmp_CAN_RESTRICTED#####
Sequence = between Begin and ENd
####################################################################################


CREATE TABLE `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED`
(
SELECT     mohaddez_HE._TURNS_tmp_CAN.pdb,
    mohaddez_HE._TURNS_tmp_CAN.id,
    mohaddez_HE._TURNS_tmp_CAN.name,
    SUBSTRING(     mohaddez_HE._TURNS_tmp_CAN.sequence,
            mohaddez_HE._TURNS_tmp_CAN.begin,
            mohaddez_HE._TURNS_tmp_CAN.end -
mohaddez_HE._TURNS_tmp_CAN.begin + 1
         ) AS sequence,
    mohaddez_HE._TURNS_tmp_CAN.entry_key,
    mohaddez_HE._TURNS_tmp_CAN.begin,
    mohaddez_HE._TURNS_tmp_CAN.end,
    mohaddez_HE._TURNS_tmp_CAN.length,
    mohaddez_HE._TURNS_tmp_CAN.first_residu,
    mohaddez_HE._TURNS_tmp_CAN.chain
FROM     mohaddez_HE._TURNS_tmp_CAN
);
############################################################################################



DROP TABLE `mohaddez_HE`.`_TURNS_tmp_CAN`;
DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN`;
RENAME TABLE `mohaddez_HE`.`_TURNS_tmp_CAN_RESTRICTED` TO
`mohaddez_HE`.`_TURN`;

####################################################################################
######changing the types of the fields#####
####################################################################################
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `begin` `begin` SMALLINT NULL
DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `end` `end` SMALLINT NULL DEFAULT
NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `length` `length` SMALLINT NULL
DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `pdb` `pdb` VARCHAR( 4 ) CHARACTER
SET ascii COLLATE ascii_general_ci NOT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `name` `name` VARCHAR( 11 )
CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL;
ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `first_residu` `first_residu`
VARCHAR( 3 ) CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL;

ALTER TABLE `mohaddez_HE`.`_TURN` CHANGE `sequence` `sequence` LONGTEXT
CHARACTER SET ascii COLLATE ascii_general_ci NULL DEFAULT NULL ;
ALTER TABLE `mohaddez_HE`.`_TURN` ADD `is_in_surface` BINARY DEFAULT '0' NOT
NULL ;
ALTER TABLE `mohaddez_HE`.`_TURN` ADD `distance_to_surface` FLOAT NOT NULL
DEFAULT '0';

######################
##############################################################
###################order the table by pdb#########################

CREATE TEMPORARY TABLE `mohaddez_HE`.`_TURN_PDB_tmp` AS
    SELECT         sequence, GROUP_CONCAT( DISTINCT (pdb) ) AS pdbs, count(
* ) AS pdbscount
    FROM             mohaddez_HE._TURN
    GROUP BY         sequence;
ALTER TABLE `mohaddez_HE`.`_TURN_PDB_tmp` ADD INDEX ( `sequence` ( 10 ) );
##########################################################
#######

DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN_WITH_PDBS_tmp`;
CREATE TABLE `mohaddez_HE`.`_TURN_WITH_PDBS_tmp`
AS SELECT mohaddez_HE._TURN.*, mohaddez_HE._TURN_PDB_tmp.pdbs,
mohaddez_HE._TURN_PDB_tmp.pdbscount AS pdbcount
FROM mohaddez_HE._TURN, mohaddez_HE._TURN_PDB_tmp
WHERE mohaddez_HE._TURN.sequence=mohaddez_HE._TURN_PDB_tmp.sequence;


DROP TABLE IF EXISTS `mohaddez_HE`.`_TURN`;
RENAME TABLE `mohaddez_HE`.`_TURN_WITH_PDBS_tmp` TO `mohaddez_HE`.`_TURN`;

###############################################################################################################################


My Turn Table looks like this:

+---------------------+--------------+------+-----+---------+-------+
| Field               | Type         | Null | Key | Default | Extra |
+---------------------+--------------+------+-----+---------+-------+
| pdb                 | varchar(4)   |      |     |         |       |
| id                  | int(11)      |      | PRI | 0       |       |
| name                | varchar(11)  | YES  |     | NULL    |       |
| sequence            | longtext     | YES  |     | NULL    |       |
| entry_key           | int(11)      |      |     | 0       |       |
| begin               | smallint(6)  | YES  |     | NULL    |       |
| end                 | smallint(6)  | YES  |     | NULL    |       |
| length              | smallint(6)  | YES  |     | NULL    |       |
| first_residu        | char(3)      | YES  |     | NULL    |       |
| chain               | varchar(255) | YES  |     | NULL    |       |
| is_in_surface       | binary(1)    |      |     | 0       |       |
| distance_to_surface | float        |      |     | 0       |       |
| pdbs                | longtext     | YES  |     | NULL    |       |
| pdbcount            | bigint(21)   |      |     | 0       |       |
+---------------------+--------------+------+-----+---------+-------+



Th Querry:
select pdb,id, name, sequence, entry_key, begin, end, length,first_residue,
chain from _TURN where pdb ="2pab";
The result:

+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+
| pdb  | id     | name    | sequence | entry_key | begin | end  | length |
first_residu | chain |
+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+
| 2pab | 609473 | TURN_P1 | SESG     |     30320 |    50 |   53 |      4 |
SER          | A     |
| 2pab | 609475 | TURN_P2 | SESG     |     30320 |    50 |   53 |      4 |
SER          | C     |
+------+--------+---------+----------+-----------+-------+------+--------+--------------+-------+


However after I check the pdb site mysql, I have noticed that there are
definitely more than 1 turn there:
http://www.rcsb.org/pdb/explore/remediatedSequence.do?structureId=2PAB. I
have found other pdbs will similar problem

Also from the result above it can be observed that both rows are the
identical, however this is normal , since there are 2 chains.


I definitely appreciate your help with this, since all my work will be based
on this table. I will also investigate the _BETA table to see if I do find
similar inconsistencies.

Thank you for your time!

best regards,

zia
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.bioinformatics.org/pipermail/openmmsusers-general/attachments/20090107/707a1d89/attachment.html>


More information about the OpenMMSusers-general mailing list