1 |
jmduarteg |
1053 |
#!/bin/sh |
2 |
|
|
# Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb) |
3 |
|
|
|
4 |
|
|
if [ -z "$1" ] |
5 |
|
|
then |
6 |
|
|
echo "Usage: $0 <base local dir>" |
7 |
|
|
exit 1 |
8 |
|
|
fi |
9 |
|
|
|
10 |
|
|
|
11 |
|
|
LOCALDIR=$1 |
12 |
|
|
CURRENT="$LOCALDIR/current" |
13 |
|
|
DOWNLOAD="$LOCALDIR/download" |
14 |
|
|
|
15 |
|
|
#SITE="ftp://ftp.uniprot.org/pub" # US main ftp |
16 |
|
|
#SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror |
17 |
|
|
SITE="ftp://ftp.expasy.org" # swiss mirror |
18 |
|
|
|
19 |
|
|
|
20 |
|
|
COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete" |
21 |
|
|
|
22 |
|
|
SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst" |
23 |
|
|
|
24 |
|
|
|
25 |
|
|
SPROT="uniprot_sprot.fasta" |
26 |
|
|
SPROTGZ="${SPROT}.gz" |
27 |
|
|
TREMBL="uniprot_trembl.fasta" |
28 |
|
|
TREMBLGZ="${TREMBL}.gz" |
29 |
|
|
ALL="uniprot_all.fasta" |
30 |
|
|
RELDATEFILE="reldate.txt" |
31 |
|
|
SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst" |
32 |
|
|
|
33 |
|
|
sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ" |
34 |
|
|
tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ" |
35 |
|
|
reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE" |
36 |
|
|
|
37 |
|
|
# remove existing download directory if there was one |
38 |
|
|
rm -rf $DOWNLOAD |
39 |
|
|
# create the download dir |
40 |
|
|
mkdir $DOWNLOAD |
41 |
|
|
|
42 |
|
|
# getting the release date file if newer available |
43 |
|
|
release="" |
44 |
|
|
curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE |
45 |
|
|
if [ -s "$DOWNLOAD/$RELDATEFILE" ] |
46 |
|
|
then |
47 |
|
|
release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"` |
48 |
|
|
echo "New uniprot release $release available. Downloading files." |
49 |
|
|
else |
50 |
|
|
echo "No new uniprot release available. Exiting" |
51 |
|
|
rm -rf $DOWNLOAD |
52 |
|
|
exit 0 |
53 |
|
|
fi |
54 |
|
|
|
55 |
|
|
|
56 |
|
|
# download if newer available |
57 |
|
|
curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz |
58 |
|
|
if [ -s "$DOWNLOAD/${TREMBL}.gz" ] |
59 |
|
|
then |
60 |
|
|
echo "New trembl version downloaded" |
61 |
|
|
else |
62 |
|
|
echo "Remote trembl file not newer than local one. Something wrong. Exiting." |
63 |
|
|
exit 1 |
64 |
|
|
fi |
65 |
|
|
|
66 |
|
|
curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz |
67 |
|
|
if [ -s "$DOWNLOAD/${SPROT}.gz" ] |
68 |
|
|
then |
69 |
|
|
echo "New sprot version downloaded" |
70 |
|
|
else |
71 |
|
|
echo "Remote sprot file not newer than local one. Something wrong. Exiting." |
72 |
|
|
exit 1 |
73 |
|
|
fi |
74 |
|
|
|
75 |
|
|
|
76 |
|
|
# getting the SIFTS PDB to UNIPROT mapping file |
77 |
|
|
curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT |
78 |
|
|
|
79 |
|
|
|
80 |
|
|
# uncompressing |
81 |
|
|
gzip -df $DOWNLOAD/${SPROT}.gz |
82 |
|
|
gzip -df $DOWNLOAD/${TREMBL}.gz |
83 |
|
|
# creating the "all" file |
84 |
|
|
cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL |
85 |
|
|
|
86 |
|
|
# run formatdb |
87 |
|
|
# formatdb appends the path used to run it to the .pal index file, |
88 |
|
|
# thus if the path used is an absolute path it's effectively hard coding |
89 |
|
|
# them making the directory not movable. That's why we have to cd to the |
90 |
|
|
# DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file |
91 |
|
|
|
92 |
|
|
|
93 |
|
|
echo "Running formatdb..." |
94 |
|
|
|
95 |
|
|
#formatdb log file |
96 |
|
|
logfile="$DOWNLOAD/formatdb.log" |
97 |
|
|
|
98 |
|
|
cd $DOWNLOAD |
99 |
|
|
formatdb -p T -o T -l $logfile -i $SPROT |
100 |
|
|
formatdb -p T -o T -l $logfile -i $TREMBL |
101 |
|
|
formatdb -p T -o T -l $logfile -i $ALL |
102 |
|
|
|
103 |
|
|
#renaming DOWNLOAD dir to uniprot version and updating current symlink |
104 |
|
|
echo "Creating new symlink..." |
105 |
|
|
mv $DOWNLOAD $LOCALDIR/uniprot_$release |
106 |
|
|
rm -f $CURRENT |
107 |
|
|
cd $LOCALDIR |
108 |
|
|
ln -s uniprot_$release current |
109 |
|
|
|
110 |
|
|
echo "Done" |