ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/scripts/updateUniprot.sh
Revision: 1289
Committed: Wed Dec 15 11:57:49 2010 UTC (10 years, 11 months ago) by jmduarteg
File size: 3033 byte(s)
Log Message:
Introduced constant for formatdb executable
Line File contents
1 #!/bin/sh
2 # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3
4 if [ -z "$1" ]
5 then
6 echo "Usage: $0 <base local dir>"
7 exit 1
8 fi
9
10
11 LOCALDIR=$1
12 CURRENT="$LOCALDIR/current"
13 DOWNLOAD="$LOCALDIR/download"
14
15 FORMATDB=/usr/bin/formatdb
16
17 #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
18 SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
19 # the swiss mirror doesn't seem to update properly, not using it anymore
20 #SITE="ftp://ftp.expasy.org" # swiss mirror
21
22
23 COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
24
25 SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
26
27
28 SPROT="uniprot_sprot.fasta"
29 SPROTGZ="${SPROT}.gz"
30 TREMBL="uniprot_trembl.fasta"
31 TREMBLGZ="${TREMBL}.gz"
32 ALL="uniprot_all.fasta"
33 RELDATEFILE="reldate.txt"
34 SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
35
36 sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
37 tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
38 reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
39
40 # remove existing download directory if there was one
41 rm -rf $DOWNLOAD
42 # create the download dir
43 mkdir $DOWNLOAD
44
45 # getting the release date file if newer available
46 release=""
47 curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
48 if [ -s "$DOWNLOAD/$RELDATEFILE" ]
49 then
50 release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
51 echo "New uniprot release $release available. Downloading files."
52 else
53 echo "No new uniprot release available. Exiting"
54 rm -rf $DOWNLOAD
55 exit 0
56 fi
57
58
59 # download if newer available
60 curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
61 if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
62 then
63 echo "New trembl version downloaded"
64 else
65 echo "Remote trembl file not newer than local one. Something wrong. Exiting."
66 exit 1
67 fi
68
69 curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
70 if [ -s "$DOWNLOAD/${SPROT}.gz" ]
71 then
72 echo "New sprot version downloaded"
73 else
74 echo "Remote sprot file not newer than local one. Something wrong. Exiting."
75 exit 1
76 fi
77
78
79 # getting the SIFTS PDB to UNIPROT mapping file
80 curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
81
82
83 # uncompressing
84 gzip -df $DOWNLOAD/${SPROT}.gz
85 gzip -df $DOWNLOAD/${TREMBL}.gz
86 # creating the "all" file
87 cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
88
89 # run formatdb
90 # formatdb appends the path used to run it to the .pal index file,
91 # thus if the path used is an absolute path it's effectively hard coding
92 # them making the directory not movable. That's why we have to cd to the
93 # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
94
95
96 echo "Running formatdb..."
97
98 #formatdb log file
99 logfile="$DOWNLOAD/formatdb.log"
100
101 cd $DOWNLOAD
102 $FORMATDB -p T -o T -l $logfile -i $SPROT
103 $FORMATDB -p T -o T -l $logfile -i $TREMBL
104 $FORMATDB -p T -o T -l $logfile -i $ALL
105
106 #renaming DOWNLOAD dir to uniprot version and updating current symlink
107 echo "Creating new symlink..."
108 mv $DOWNLOAD $LOCALDIR/uniprot_$release
109 rm -f $CURRENT
110 cd $LOCALDIR
111 ln -s uniprot_$release current
112
113 echo "Done"

Properties

Name Value
svn:executable *