ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/trunk/scripts/updateUniprot.sh
Revision: 1076
Committed: Wed May 26 10:10:06 2010 UTC (12 years, 2 months ago) by jmduarteg
File size: 3002 byte(s)
Log Message:
Now using UK mirror
Line User Rev File contents
1 jmduarteg 1053 #!/bin/sh
2     # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3    
4     if [ -z "$1" ]
5     then
6     echo "Usage: $0 <base local dir>"
7     exit 1
8     fi
9    
10    
11     LOCALDIR=$1
12     CURRENT="$LOCALDIR/current"
13     DOWNLOAD="$LOCALDIR/download"
14    
15     #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
16 jmduarteg 1076 SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
17     # the swiss mirror doesn't seem to update properly, not using it anymore
18     #SITE="ftp://ftp.expasy.org" # swiss mirror
19 jmduarteg 1053
20    
21     COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
22    
23     SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
24    
25    
26     SPROT="uniprot_sprot.fasta"
27     SPROTGZ="${SPROT}.gz"
28     TREMBL="uniprot_trembl.fasta"
29     TREMBLGZ="${TREMBL}.gz"
30     ALL="uniprot_all.fasta"
31     RELDATEFILE="reldate.txt"
32     SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
33    
34     sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
35     tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
36     reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
37    
38     # remove existing download directory if there was one
39     rm -rf $DOWNLOAD
40     # create the download dir
41     mkdir $DOWNLOAD
42    
43     # getting the release date file if newer available
44     release=""
45     curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
46     if [ -s "$DOWNLOAD/$RELDATEFILE" ]
47     then
48     release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
49     echo "New uniprot release $release available. Downloading files."
50     else
51     echo "No new uniprot release available. Exiting"
52     rm -rf $DOWNLOAD
53     exit 0
54     fi
55    
56    
57     # download if newer available
58     curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
59     if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
60     then
61     echo "New trembl version downloaded"
62     else
63     echo "Remote trembl file not newer than local one. Something wrong. Exiting."
64     exit 1
65     fi
66    
67     curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
68     if [ -s "$DOWNLOAD/${SPROT}.gz" ]
69     then
70     echo "New sprot version downloaded"
71     else
72     echo "Remote sprot file not newer than local one. Something wrong. Exiting."
73     exit 1
74     fi
75    
76    
77     # getting the SIFTS PDB to UNIPROT mapping file
78     curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
79    
80    
81     # uncompressing
82     gzip -df $DOWNLOAD/${SPROT}.gz
83     gzip -df $DOWNLOAD/${TREMBL}.gz
84     # creating the "all" file
85     cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
86    
87     # run formatdb
88     # formatdb appends the path used to run it to the .pal index file,
89     # thus if the path used is an absolute path it's effectively hard coding
90     # them making the directory not movable. That's why we have to cd to the
91     # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
92    
93    
94     echo "Running formatdb..."
95    
96     #formatdb log file
97     logfile="$DOWNLOAD/formatdb.log"
98    
99     cd $DOWNLOAD
100     formatdb -p T -o T -l $logfile -i $SPROT
101     formatdb -p T -o T -l $logfile -i $TREMBL
102     formatdb -p T -o T -l $logfile -i $ALL
103    
104     #renaming DOWNLOAD dir to uniprot version and updating current symlink
105     echo "Creating new symlink..."
106     mv $DOWNLOAD $LOCALDIR/uniprot_$release
107     rm -f $CURRENT
108     cd $LOCALDIR
109     ln -s uniprot_$release current
110    
111     echo "Done"

Properties

Name Value
svn:executable *