#!/bin/bash -u
# **************************************** #
# CONFIGURATION                        *** #
# 1. Select PDB file server              * #
# 2. Select your program for downloading * #
# **************************************** #
#readonly PDB_SERVER=ftp.wwpdb.org/pub/pdb/data/structures/
readonly PDB_SERVER=ftp.ebi.ac.uk/pub/databases/rcsb/pdb/data/structures
readonly PRG=wget_rcsb
#readonly PRG=ncftp
#readonly PRG=wget
#readonly PRG=wget_rcsb
#readonly PRG=curl
# ******************************************************** #
# PROGRAM PARAMETER                                      * #
#  $1 Fasta-files separated by Komma ,                   * #
#  $2 File path of already downloaded CIF files.         * #
# ******************************************************** #
readonly FASTA=$1
readonly SKIP=${2:-}
readonly DOWNLOAD_DIR=pdb/divided
declare -A already
already=()
# Make a set of IDs already on HD.
# E.g. if 3ni0.cif.gz is found, then already[3ni0] will be set to 1

anyKey(){
    read -n1 -r -p 'Press any key to continue ...' key
}
searchCifFiles(){
    local root=${1:-pdb/divided}
    find $root -name '*.cif.gz' -size -2k -delete
    while read id; do already[$id]=1; done < <(find $root -name '*.cif.gz' -not -empty|sed 's|.*/||1;s|.cif.gz||1')
    echo Number of identified cif-files after searching $root: ${#already[@]}
}
for i in ${SKIP//,/ }; do
    i=${i%%\$*}
    if [[ -d $i ]]; then
       searchCifFiles $i
    else
        echo WARNING: $i should be the parent directory of an already existing CIF-file collection. However, $SKIP is not a directory
    fi
done
searchCifFiles  $DOWNLOAD_DIR
anyKey

# Note: This bash syntax prevents creation of a subshell: < <(...)
mkdir -p tmp $DOWNLOAD_DIR/{0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}{0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}
lineByLine() {
    local pdb_dir=$1
    echo $ANSI_INVERSE lineByLine $ANSI_RESET
    searchCifFiles $DOWNLOAD_DIR
    sleep 2
    echo >tmp/ftp.txt
    local isFTP=0
    while read id; do
        if [[ ${already[$id]:-} == 1 ]]; then
            echo -n y
            continue
        fi
        echo -n $ANSI_GREEN$id$ANSI_RESET' '
        local divisionSlashFile=${id:1:2}/$id.cif.gz
        local f=$DOWNLOAD_DIR/$divisionSlashFile
	    case $PRG in
            echo) echo $f;;
            lftp) isFTP=1;;
	        ncftp) isFTP=1;;
            wget_rcsb) wget -O $f http://files.rcsb.org/download/$id.cif.gz;;
		    wget) wget -O $f ftp://$PDB_SERVER$pdb_dir/$divisionSlashFile;;
            curl) curl -o $f ftp://$PDB_SERVER$pdb_dir/$divisionSlashFile;;
	    esac
        ((isFTP)) && echo get' '${PDB_SERVER#*/}/$pdb_dir/$divisionSlashFile -o $f >>tmp/ftp.txt
    done;
    if  ((isFTP)); then
        echo $ANSI_FG_GREEN
        ls -l tmp/ftp.txt
        echo $ANSI_RESET
	    set -x
        cat tmp/ftp.txt | $PRG ${PDB_SERVER%%/*}
	    set +x
    fi
}
for pdb_dir in divided/mmCIF models/current/pdb models/obsolete/pdb obsolete/mmCIF;do
    for fa in ${FASTA//,/ }; do
        c=cat
        [[ $fa == *.gz ]] && c=zcat
        lineByLine $pdb_dir < <($c $fa | grep '>'|tr 'A-Z' 'a-z'|sed -n 's|>\([a-z0-9]\{4\}\).*|\1|p'|sort -u)
    done
    break
done
