ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/scripts/updateUniprot.sh
Revision: 1485
Committed: Tue Nov 15 10:24:40 2011 UTC (10 years ago) by jmduarteg
File size: 3525 byte(s)
Log Message:
Adding downloading of uniref100
Line File contents
1 #!/bin/sh
2 # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3
4 if [ -z "$1" ]
5 then
6 echo "Usage: $0 <base local dir>"
7 exit 1
8 fi
9
10
11 LOCALDIR=$1
12 CURRENT="$LOCALDIR/current"
13 DOWNLOAD="$LOCALDIR/download"
14
15 FORMATDB=/usr/bin/formatdb
16
17 #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
18 SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
19 # the swiss mirror doesn't seem to update properly, not using it anymore
20 #SITE="ftp://ftp.expasy.org" # swiss mirror
21
22
23 COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
24 UNIREFDIR="databases/uniprot/uniref/uniref100"
25
26 SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
27
28
29 SPROT="uniprot_sprot.fasta"
30 SPROTGZ="${SPROT}.gz"
31 TREMBL="uniprot_trembl.fasta"
32 TREMBLGZ="${TREMBL}.gz"
33 UNIREF100="uniref100.fasta"
34 UNIREF100GZ="uniref100.fasta.gz"
35 ALL="uniprot_all.fasta"
36 RELDATEFILE="reldate.txt"
37 SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
38
39 sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
40 tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
41 uref100url="$SITE/$UNIREFDIR/$UNIREF100GZ"
42 reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
43
44 # remove existing download directory if there was one
45 rm -rf $DOWNLOAD
46 # create the download dir
47 mkdir $DOWNLOAD
48
49 # getting the release date file if newer available
50 release=""
51 curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
52 if [ -s "$DOWNLOAD/$RELDATEFILE" ]
53 then
54 release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
55 echo "New uniprot release $release available. Downloading files."
56 else
57 echo "No new uniprot release available. Exiting"
58 rm -rf $DOWNLOAD
59 exit 0
60 fi
61
62
63 # download if newer available
64 curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
65 if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
66 then
67 echo "New trembl version downloaded"
68 else
69 echo "Remote trembl file not newer than local one. Something wrong. Exiting."
70 exit 1
71 fi
72
73 curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
74 if [ -s "$DOWNLOAD/${SPROT}.gz" ]
75 then
76 echo "New sprot version downloaded"
77 else
78 echo "Remote sprot file not newer than local one. Something wrong. Exiting."
79 exit 1
80 fi
81
82 curl -z $CURRENT/$UNIREF100 $uref100url > $DOWNLOAD/${UNIREF100}.gz
83 if [ -s "$DOWNLOAD/${UNIREF100}.gz" ]
84 then
85 echo "New Uniref100 version downloaded"
86 else
87 echo "Remote Uniref100 file not newer than local one. Something wrong. Exiting"
88 exit 1
89 fi
90
91 # getting the SIFTS PDB to UNIPROT mapping file
92 curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
93
94
95 # uncompressing
96 gzip -df $DOWNLOAD/${SPROT}.gz
97 gzip -df $DOWNLOAD/${TREMBL}.gz
98 gzip -df $DOWNLOAD/${UNIREF100}.gz
99 # creating the "all" file
100 cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
101
102 # run formatdb
103 # formatdb appends the path used to run it to the .pal index file,
104 # thus if the path used is an absolute path it's effectively hard coding
105 # them making the directory not movable. That's why we have to cd to the
106 # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
107
108
109 echo "Running formatdb..."
110
111 #formatdb log file
112 logfile="$DOWNLOAD/formatdb.log"
113
114 cd $DOWNLOAD
115 #$FORMATDB -p T -o T -l $logfile -i $SPROT
116 #$FORMATDB -p T -o T -l $logfile -i $TREMBL
117 $FORMATDB -p T -o T -l $logfile -i $ALL
118 $FORMATDB -p T -o T -l $logfile -i $UNIREF100
119
120 #renaming DOWNLOAD dir to uniprot version and updating current symlink
121 echo "Creating new symlink..."
122 mv $DOWNLOAD $LOCALDIR/uniprot_$release
123 rm -f $CURRENT
124 cd $LOCALDIR
125 ln -s uniprot_$release current
126
127 echo "Done"

Properties

Name Value
svn:executable *