ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/scripts/updateUniprot.sh
Revision: 1076
Committed: Wed May 26 10:10:06 2010 UTC (12 years ago) by jmduarteg
File size: 3002 byte(s)
Log Message:
Now using UK mirror
Line File contents
1 #!/bin/sh
2 # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3
4 if [ -z "$1" ]
5 then
6 echo "Usage: $0 <base local dir>"
7 exit 1
8 fi
9
10
11 LOCALDIR=$1
12 CURRENT="$LOCALDIR/current"
13 DOWNLOAD="$LOCALDIR/download"
14
15 #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
16 SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
17 # the swiss mirror doesn't seem to update properly, not using it anymore
18 #SITE="ftp://ftp.expasy.org" # swiss mirror
19
20
21 COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
22
23 SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
24
25
26 SPROT="uniprot_sprot.fasta"
27 SPROTGZ="${SPROT}.gz"
28 TREMBL="uniprot_trembl.fasta"
29 TREMBLGZ="${TREMBL}.gz"
30 ALL="uniprot_all.fasta"
31 RELDATEFILE="reldate.txt"
32 SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
33
34 sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
35 tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
36 reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
37
38 # remove existing download directory if there was one
39 rm -rf $DOWNLOAD
40 # create the download dir
41 mkdir $DOWNLOAD
42
43 # getting the release date file if newer available
44 release=""
45 curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
46 if [ -s "$DOWNLOAD/$RELDATEFILE" ]
47 then
48 release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
49 echo "New uniprot release $release available. Downloading files."
50 else
51 echo "No new uniprot release available. Exiting"
52 rm -rf $DOWNLOAD
53 exit 0
54 fi
55
56
57 # download if newer available
58 curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
59 if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
60 then
61 echo "New trembl version downloaded"
62 else
63 echo "Remote trembl file not newer than local one. Something wrong. Exiting."
64 exit 1
65 fi
66
67 curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
68 if [ -s "$DOWNLOAD/${SPROT}.gz" ]
69 then
70 echo "New sprot version downloaded"
71 else
72 echo "Remote sprot file not newer than local one. Something wrong. Exiting."
73 exit 1
74 fi
75
76
77 # getting the SIFTS PDB to UNIPROT mapping file
78 curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
79
80
81 # uncompressing
82 gzip -df $DOWNLOAD/${SPROT}.gz
83 gzip -df $DOWNLOAD/${TREMBL}.gz
84 # creating the "all" file
85 cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
86
87 # run formatdb
88 # formatdb appends the path used to run it to the .pal index file,
89 # thus if the path used is an absolute path it's effectively hard coding
90 # them making the directory not movable. That's why we have to cd to the
91 # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
92
93
94 echo "Running formatdb..."
95
96 #formatdb log file
97 logfile="$DOWNLOAD/formatdb.log"
98
99 cd $DOWNLOAD
100 formatdb -p T -o T -l $logfile -i $SPROT
101 formatdb -p T -o T -l $logfile -i $TREMBL
102 formatdb -p T -o T -l $logfile -i $ALL
103
104 #renaming DOWNLOAD dir to uniprot version and updating current symlink
105 echo "Creating new symlink..."
106 mv $DOWNLOAD $LOCALDIR/uniprot_$release
107 rm -f $CURRENT
108 cd $LOCALDIR
109 ln -s uniprot_$release current
110
111 echo "Done"

Properties

Name Value
svn:executable *