ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/owl/tags/owl-1.9.0/scripts/updateUniprot.sh
Revision: 1303
Committed: Fri Jan 14 20:21:03 2011 UTC (11 years ago) by hstehr
File size: 3033 byte(s)
Log Message:
tagging version 1.9.0 matching CMView 1.9.0 released as 1.1rc3
Line User Rev File contents
1 jmduarteg 1053 #!/bin/sh
2     # Script to update a local uniprot copy with sequence files and generate blast dbs for them (formatdb)
3    
4     if [ -z "$1" ]
5     then
6     echo "Usage: $0 <base local dir>"
7     exit 1
8     fi
9    
10    
11     LOCALDIR=$1
12     CURRENT="$LOCALDIR/current"
13     DOWNLOAD="$LOCALDIR/download"
14    
15 jmduarteg 1289 FORMATDB=/usr/bin/formatdb
16    
17 jmduarteg 1053 #SITE="ftp://ftp.uniprot.org/pub" # US main ftp
18 jmduarteg 1076 SITE="ftp://ftp.ebi.ac.uk/pub" # UK mirror
19     # the swiss mirror doesn't seem to update properly, not using it anymore
20     #SITE="ftp://ftp.expasy.org" # swiss mirror
21 jmduarteg 1053
22    
23     COMPLETEKBDIR="databases/uniprot/current_release/knowledgebase/complete"
24    
25     SIFTSPDB2UNIPROTFTP="ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/text/pdb_chain_uniprot.lst"
26    
27    
28     SPROT="uniprot_sprot.fasta"
29     SPROTGZ="${SPROT}.gz"
30     TREMBL="uniprot_trembl.fasta"
31     TREMBLGZ="${TREMBL}.gz"
32     ALL="uniprot_all.fasta"
33     RELDATEFILE="reldate.txt"
34     SIFTSPDB2UNIPROT="pdb_chain_uniprot.lst"
35    
36     sproturl="$SITE/$COMPLETEKBDIR/$SPROTGZ"
37     tremblurl="$SITE/$COMPLETEKBDIR/$TREMBLGZ"
38     reldateurl="$SITE/$COMPLETEKBDIR/$RELDATEFILE"
39    
40     # remove existing download directory if there was one
41     rm -rf $DOWNLOAD
42     # create the download dir
43     mkdir $DOWNLOAD
44    
45     # getting the release date file if newer available
46     release=""
47     curl -z $CURRENT/$RELDATEFILE $reldateurl > $DOWNLOAD/$RELDATEFILE
48     if [ -s "$DOWNLOAD/$RELDATEFILE" ]
49     then
50     release=`head -1 $DOWNLOAD/$RELDATEFILE | sed "s/UniProt Knowledgebase Release \(...._..\).*/\1/"`
51     echo "New uniprot release $release available. Downloading files."
52     else
53     echo "No new uniprot release available. Exiting"
54     rm -rf $DOWNLOAD
55     exit 0
56     fi
57    
58    
59     # download if newer available
60     curl -z $CURRENT/$TREMBL $tremblurl > $DOWNLOAD/${TREMBL}.gz
61     if [ -s "$DOWNLOAD/${TREMBL}.gz" ]
62     then
63     echo "New trembl version downloaded"
64     else
65     echo "Remote trembl file not newer than local one. Something wrong. Exiting."
66     exit 1
67     fi
68    
69     curl -z $CURRENT/$SPROT $sproturl > $DOWNLOAD/${SPROT}.gz
70     if [ -s "$DOWNLOAD/${SPROT}.gz" ]
71     then
72     echo "New sprot version downloaded"
73     else
74     echo "Remote sprot file not newer than local one. Something wrong. Exiting."
75     exit 1
76     fi
77    
78    
79     # getting the SIFTS PDB to UNIPROT mapping file
80     curl $SIFTSPDB2UNIPROTFTP > $DOWNLOAD/$SIFTSPDB2UNIPROT
81    
82    
83     # uncompressing
84     gzip -df $DOWNLOAD/${SPROT}.gz
85     gzip -df $DOWNLOAD/${TREMBL}.gz
86     # creating the "all" file
87     cat $DOWNLOAD/$TREMBL $DOWNLOAD/$SPROT > $DOWNLOAD/$ALL
88    
89     # run formatdb
90     # formatdb appends the path used to run it to the .pal index file,
91     # thus if the path used is an absolute path it's effectively hard coding
92     # them making the directory not movable. That's why we have to cd to the
93     # DOWNLOAD dir first, so that there's no hard-coded paths in the .pal file
94    
95    
96     echo "Running formatdb..."
97    
98     #formatdb log file
99     logfile="$DOWNLOAD/formatdb.log"
100    
101     cd $DOWNLOAD
102 jmduarteg 1289 $FORMATDB -p T -o T -l $logfile -i $SPROT
103     $FORMATDB -p T -o T -l $logfile -i $TREMBL
104     $FORMATDB -p T -o T -l $logfile -i $ALL
105 jmduarteg 1053
106     #renaming DOWNLOAD dir to uniprot version and updating current symlink
107     echo "Creating new symlink..."
108     mv $DOWNLOAD $LOCALDIR/uniprot_$release
109     rm -f $CURRENT
110     cd $LOCALDIR
111     ln -s uniprot_$release current
112    
113     echo "Done"

Properties

Name Value
svn:executable *