ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/scripts/updateUniprot.sh
Revision: 1053
Committed: Wed Apr 28 10:58:18 2010 UTC (11 years, 5 months ago) by jmduarteg
File size: 2929 byte(s)
Log Message:
New script to maintain a local copy of blastable uniprot sequences db.
Line File contents
1 #!/bin/sh
2 # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3
4 if [ -z "$1" ]
5 then
6 echo "Usage: $0 <base local dir>"
7 exit 1
8 fi
9
10
11 LOCALDIR=$1
12 CURRENT="$LOCALDIR/current"
13 DOWNLOAD="$LOCALDIR/download"
14
15 #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
16 #SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
17 SITE="ftp://ftp.expasy.org" # swiss mirror
18
19
20 COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
21
22 SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
23
24
25 SPROT="uniprot_sprot.fasta"
26 SPROTGZ="${SPROT}.gz"
27 TREMBL="uniprot_trembl.fasta"
28 TREMBLGZ="${TREMBL}.gz"
29 ALL="uniprot_all.fasta"
30 RELDATEFILE="reldate.txt"
31 SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
32
33 sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
34 tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
35 reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
36
37 # remove existing download directory if there was one
38 rm -rf $DOWNLOAD
39 # create the download dir
40 mkdir $DOWNLOAD
41
42 # getting the release date file if newer available
43 release=""
44 curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
45 if [ -s "$DOWNLOAD/$RELDATEFILE" ]
46 then
47 release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
48 echo "New uniprot release $release available. Downloading files."
49 else
50 echo "No new uniprot release available. Exiting"
51 rm -rf $DOWNLOAD
52 exit 0
53 fi
54
55
56 # download if newer available
57 curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
58 if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
59 then
60 echo "New trembl version downloaded"
61 else
62 echo "Remote trembl file not newer than local one. Something wrong. Exiting."
63 exit 1
64 fi
65
66 curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
67 if [ -s "$DOWNLOAD/${SPROT}.gz" ]
68 then
69 echo "New sprot version downloaded"
70 else
71 echo "Remote sprot file not newer than local one. Something wrong. Exiting."
72 exit 1
73 fi
74
75
76 # getting the SIFTS PDB to UNIPROT mapping file
77 curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
78
79
80 # uncompressing
81 gzip -df $DOWNLOAD/${SPROT}.gz
82 gzip -df $DOWNLOAD/${TREMBL}.gz
83 # creating the "all" file
84 cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
85
86 # run formatdb
87 # formatdb appends the path used to run it to the .pal index file,
88 # thus if the path used is an absolute path it's effectively hard coding
89 # them making the directory not movable. That's why we have to cd to the
90 # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
91
92
93 echo "Running formatdb..."
94
95 #formatdb log file
96 logfile="$DOWNLOAD/formatdb.log"
97
98 cd $DOWNLOAD
99 formatdb -p T -o T -l $logfile -i $SPROT
100 formatdb -p T -o T -l $logfile -i $TREMBL
101 formatdb -p T -o T -l $logfile -i $ALL
102
103 #renaming DOWNLOAD dir to uniprot version and updating current symlink
104 echo "Creating new symlink..."
105 mv $DOWNLOAD $LOCALDIR/uniprot_$release
106 rm -f $CURRENT
107 cd $LOCALDIR
108 ln -s uniprot_$release current
109
110 echo "Done"

Properties

Name Value
svn:executable *