package charite.christo.aaSetup;
import java.io.*;
import java.util.*;
import charite.christo.*;
import charite.christo.strap.*;
import static charite.christo.ChUtils.*;
import static charite.christo.strap.Strap.*;
import static java.lang.Runtime.getRuntime;
#if 0
/*
  java test.Taa 2>&1 | head -n 30
cd ~/m1/aaTest; source Source.sh
aa_setup -testPdbID=211L_A  # PAR_TEST_RO_ARCHIVE_PDB

  source ~/strapSrc/web/toHTML/myUtils/AAupload.sh
  aa_setup -mkPdbFA=tmp/pdbseq/pdbaanr.gz,tmp/pdbseq/pdb_seqres.txt.gz # PAR_MK_PDB
  aa_setup -mkPdb=pdb/short.mfa # PAR_MK_PDB

  ## COPY TO SERVER
  rsync -av  compressed $SERVERAA:$DIRAA/

*/
#endif //0
#define _vProblems setNoClr(242)
public final class AASetup implements Runnable{
    OVERRIDE_PUBLIC void run(){
        setPrprtyB(-IS_CACHE_READ);
        setPrprtyB(-IS_CACHE_WRITE);
        String v;
        try{
            if((v=prprty(iPAR_MK_PDB))!=null) mkPdb(splitTkns(',',v));
            else{
                if(prgOptOn(iPAR_FORMATDB)){
                    if(prprty(iPAR_LOCAL_DB)==null) baOut(RED_ERROR).aln("Missing parameter "+PAR_LOCAL_DB); else LocalSequenceDBFormater.format();
                }else if(null!=(v=prprty(iPAR_MK_CSA))){ /*X csa_dot_txt */
                    final File f=fileDel(0,iFile(F_CATALYTIC_SITE_ATLAS));
                    final Map<String,BA>m=new HashMap(),mInv=new HashMap();
                    final BA txt=toBA(rtExecOutput(0,new Object[]{splitTkns('|',"sort|-k|5|-t|,"),file(v)}));
                    if(txt==null) {baOut(PAR_MK_CSA).aFile(v).special(EXIT_NOW); return;}
                    final byte[]T=txt.bytes();
                    final BA tmp=new BA(99);
                    {
                        final int eol[]=txt.eol(),TABS[]=new int[7];
                        FORiL(0,eol.length){
                            final int b=BOL0(iL,eol),e=eol[iL];
                            if(tabulatrs(',',T,b,e,TABS)<7 || strEquAt(0,"PDB ",T,b)) continue;
                            char chain=TABS[3]-TABS[2]==1?'A': (char)T[TABS[2]+1];
                            if(chain=='_'||chain>127) chain='A';
                            if(!chrClas(LETTR_DIGT)[chain]) baOut("Chain character! ").aFT(txt,b,e).special(EXIT_NOW);
                            final String id=s(clr(tmp).aFilter(FILTER_TO_UPPER,T,b,TABS[0]).aa('_',chain));
                            BA data=m.get(id);
                            if(data==null) m.put(id,data=new BA(99));
                            final char rt;
                            {
                                int i=TABS[1]+1;
                                final int aaa=(T[i]&~32)|((T[i+1]&~32)<<8)|((T[i+1]&~32)<<16);
                                i+=3;
                                if(!iThBool(rt=TABS[2]-TABS[1]-1!=3?0:(char)toOneLetterCode(aaa),chrClas(LETTR))){
                                    if(aaa==('P'|('Y'<<8)|('R'<<16))) baOut(RED_WARNING+"CSA Skipped\n").aFT(T,b,e).aln();
                                    else baOut(" Three letter code! ").aFT(txt,b,e).special(EXIT_NOW);
                                }
                            }
#define evid T[TABS[5]+1]
                            data.a(evid=='L'?rt:(char)(rt|32)).aFT(T,TABS[3]+1,TABS[4]).a(' ');
#undef evid
                        }
                    }
                    for(Map.Entry<String,BA>e:entryArry(m)){
                        final String id=e.getKey(),data=s(clr(tmp).a(e.getValue()).del(' '));
                        m.put(id,null);
                        BA ids=mInv.get(data);
                        if(ids==null) mInv.put(data,ids=new BA(22));
                        ids.aa(id,' ');
                    }
                    final BA out=new BA(700*1000);
                    for(Map.Entry<String,BA>e:entryArry(mInv)) out.a(e.getValue()).del(' ').a('\t').aln(e.getKey());
                    putln(sze(wrte(WRTE_REPORT_STDOUT,f,out))>0?GREEN_SUCCESS:RED_FAILED);
                }else if((v=prprty(iPAR_MK_PDB_FA))!=null){
                    final String[]ff=splitTkns(',',v);
                    final Collection vHC=new HashSet();
                    final BA LINE=new BA(90),SEQ=new BA(999),HEADER=new BA(99);
                    final OutputStream os=fOutStrm(0,iFile(F_PDB_FA));
                    for(int j=0,i=0;i<ff.length;i++){
                        final ChInStream cis=new ChInStream(ff[i],4048);
                        while(true){
                            final boolean read=cis.readLine(clr(LINE)),gt=LINE.charAt(0)=='>';
                            if(j++%10000==0) baOut(ff[i]).aa(' ',j).aln();
                            if((gt||!read) && sze(SEQ)>0){
                                if(vHC.add(new Long(hashCdLUC(SEQ,0,MAX_INT)))){
                                    HEADER.aln().aln(SEQ).writeTxt(os);
                                    clr(HEADER);
                                    clr(SEQ);
                                }
                            }
                            if(gt){
                                LINE.delBlanksR().setEnd(strchr(' '|STR_E,LINE));
                                if(strchr('_',LINE)<0){/*X pdbaanr.gz has no underscore preceeding the Chain ID */
                                    HEADER.aFT(LINE.bytes(),0,5).toUpperOrLowerE(FILTER_TO_LOWER,0).a('_').aFT(LINE,5,MAX_INT);
                                }else{/*X pdb_seqres.txt.gz*/
                                    HEADER.a(LINE);
                                }
                            }else SEQ.a(LINE);
                            if(!read) break;
                        }
                    }
                    closeStrm(os);
                }else if(prprty(iPAR_TEST_SEQ4ID)!=null){
                    for(String id:splitTkns(prprty(iPAR_TEST_SEQ4ID))) baOut(null).an('=',40).a("\nText for ").aln(id).aln(LocalSequenceDB.getTextForID(id));
                }else if((v=orS(prprty(iPAR_TEST_PDBSEQ),prprty(iPAR_TEST_ID4SEQ)))!=null){
                    final Protein[]pp=newProteinInstances(PROTEIN_INSTANCE_FILE_OR_SEQUENCE,splitTkns(v));
                    if(prprty(iPAR_TEST_PDBSEQ)!=null){
                        FindPdbByBlat.compute(pp);
                        for(Protein p:pp){
                            final String id=FindPdbByBlat.id(p);
                            baOut(PAR_TEST_ID4SEQ).aa(p,'\t').a0(p.getResType()).aa(" id=",id,' ').aln(id==null?RED_FAILED:GREEN_SUCCESS);
                        }
                    }
                    if(prprty(iPAR_TEST_ID4SEQ)!=null){
                        ExactMatch.search(pp);
                        for(Protein p:pp){
                            if(p!=null) baOut("query sequence=").aa(p.getResTypeUC()," #annotations=",p.residueAnnotations().length," refs=").aln(p.getRefs(REFS_BY_IDENT)).aln();
                        }
                    }
                }else if((v=prprty(iPAR_TEST_RO_ARCHIVE_PDB))!=null){
                    //baOut("DIR_THIS_JAR_PARENT=").aFile(iFile(DIR_THIS_JAR_PARENT)).aln();
                    //baOut("F_TAXONDATA_ZIP=").aFile(iFile(F_TAXONDATA_ZIP)).aln();
                    //baOut("F_RO_ARCHIVE_PDB=").aFile(iFile(F_RO_ARCHIVE_PDB)).a(" test ID=").aln(v);
                    wrte(WRTE_REPORT_STDOUT,iFile(F_TEST_ARCHIVE_STRAPFORMAT),xyzArchiveGetTxt(F_RO_ARCHIVE_PDB,v));
                    wrte(WRTE_REPORT_STDOUT,iFile(F_TEST_ARCHIVE_STRAPFORMAT_XYZ),xyzArchiveGetTxt(F_RO_ARCHIVE_PDB_XYZ,v));
                    final Protein p=xyzArchiveGetProtein(v,null,null);
                    if(p==null) baOut(RED_FAILED).aln(" p is null");
                    else{
                        //if (DEBUG_NOW==DEBUG_NOW)System.exit(9);
                        baOut(GREEN_SUCCESS).aa(" #residues=",p.countRes()).aln();
                        FORk(0,3){
                            if(DEBUG_NOW==DEBUG_NOW && k!=1) continue;
                            final Collection out=strapInstance(SEQWRITER_PDB).getProteinText(k==0?SEQW_SEQRES|SEQW_HELIX_SHEET:
                                                                                             k==1?SEQW_SEQRES|SEQW_HELIX_SHEET|SEQW_SIDE_CHAIN_ATOMS:
                                                                                             SEQW_SEQRES|SEQW_HELIX_SHEET|SEQW_HETEROS,
                                                                                             p,null,null,null);
                            baOut("pdb-text=\n").join0(out).aln();

                            //wrte(WRTE_REPORT_STDOUT,iFile(F_TEST_ARCHIVE_PDB+k),out);
                        }
                    }
                }
            }
        }catch(Exception iox){
            stckTrc(96,iox);
        }
        closeStrm(_saveProblem);
    }
/* <<< run <<< */
/* ---------------------------------------- */
/* >>> PDB FASTA>>> */
    private static String[]aaPdbIDandChainFromFasta(String[]fileNames){
        final String LOGPFX=ANSI_MAGENTA+"AASetup.aaPdbIDandChainFromFasta "+ANSI_RESET;
        final BA LINE=new BA(99);
        final Collection v=new ArrayList();
        for(String fn:fileNames){
            int count=0;
            if(sze(file(fn))>0){
                final ChInStream cis=new ChInStream(fn,4048);
                while(cis.readLine(clr(LINE))){
                    final byte[]T=LINE.bytes();
                    if(sze(LINE)>LENGTH_PDBIDS && T[0]=='>'){
                        if(strchr('_'|STR_E,T,1,sze(LINE))!=LENGTH_PDBIDS+1) baOut(RED_ERROR).a(LOGPFX).aln(LINE);
                        else{
                            v.add(LINE.newString(1,nxt(STR_E,chrClas(-LETTR_DIGT_US),T,1,MAX_INT)));
                            count++;
                        }
                    }
                }
            }
            if(count==0) baOut(RED_ERROR).a(LOGPFX).aln(" No IDs found in FASTA files ").aln(fn);
        }
        return toStrgArray(0,v);
    }
/* <<< PDB FASTA <<< */
/* ---------------------------------------- */
/* >>> PDB >>> */
    private final static Object[]_saveProblem=new Object[F_DIR_URL_ZZZ];
    private static void saveProblem(int f,BA txt){
        if(sze(txt)>0 && (f>0||_vProblems.add(longObjct(hashCdL(txt))))){
            if(f<0) f=-f;
            if(onlyOnce(11)){
                try{
                    if(_saveProblem[f]==null) _saveProblem[f]=fOutStrm(0,iFile(f));
                    baOut(RED_WARNING).aBYTES('L','o','g',' ').aln(fileDel(0,iFile(f)));
                    txt.writeTxt(derefZ(_saveProblem[f],OutputStream.class));
                }catch(Exception ex){errorEx("saveProblem ",ex);}
                if(_saveProblem[f]==null) _saveProblem[f]="";
                putln(txt);
            }
        }
    }
    private static void mkPdb(String[]fileNames)throws IOException{
        fileDel(0,iFile(F_RO_ARCHIVE_PDB|FILEC_DOT_INDEX));
        final Map<String,Collection>map=new HashMap();
        final Collection v4=new ArrayList();
        { /*X Order by pdb ID,Remove duplicates */
            final Collection set6=new HashSet(),set4=new HashSet();
            for(String id:aaPdbIDandChainFromFasta(fileNames)){
                if(id!=null&&set6.add(id)){
                    final String id4=pdbID(id);
                    Collection v=map.get(id4);
                    if(v==null) map.put(id4,v=new ArrayList());
                    v.add(id);
                    if(set4.add(id4)) v4.add(id4);
                }
            }
            baOut(PAR_MK_PDB).a(fileNames).aBYTES(' ','#','4','=').a(sze(v4)).aBYTES(' ','#','6','=').a(sze(set6)).aln();
        }
        final OutputStream os=fOutStrm(0,iFile(F_RO_ARCHIVE_PDB));
#define sbErr() baClr(27)
        final BA sb=new BA(3333);
        FORi(0,sze(v4)){
            final String id4=iThStrg(i,v4);
            final int time=timeOn();
            final BA txt=readBytes(pdbFileIfAlreadyDownloaded(id4),baClr(28));
            if(sze(txt)==0){saveProblem(-F_RO_ARCHIVE_PDB_PROBLEMS,sbErr().aa(id4,"Read file",' '));continue;}
            final String[][]chainsByType=chainsInPdbOrCif(txt);
            if(chainsByType==null){saveProblem(-F_RO_ARCHIVE_PDB_PROBLEMS,sbErr().aa(id4,"chains=null",' '));continue;}
            sb.aBYTES('#','<').aa(id4,' ');
@*SARRAYeq_CHAINTYPE
P=CHAINTYPE_PEP R=CHAINTYPE_RNA D=CHAINTYPE_DNA H=CHAINTYPE_HET
*@
            ROFj0(SOBJECT_ZZZ) if(iConst(SARRAYeq_CHAINTYPE,j)!=null&&sze(chainsByType[j])>0) sb.aa(iConst(SARRAYeq_CHAINTYPE,j),'=').joinSpc(chainsByType[j]).a(' ');
            sb.delBlanksR().aln();
#define this_vID6 map.get(id4)
        nextID:
            FORj(0,sze(this_vID6)){
                final String id6=iThStrg(j,this_vID6);
                ROFt0(CHAINTYPE_HET){
                    for(String cn:chainsByType[t]){
                        if(t!=CHAINTYPE_PEP || !eqStr(cn,pdbChain(id6))) continue;
                        sb.aBYTES('#',STRAPFORMAT_HASH_CHAIN).aln(pdbChain(id6));
                        final Protein p=new Protein();
                        _baLog[LOG_FILES]=sbErr();
                        new SequenceParserImpl3D(SPARSER_CIF).parse(0,cn,txt,p);
                        if(!p.residueHasXYZ(true,0)) continue;
                        if(sze(_baLog[LOG_FILES])>0){
                            saveProblem(F_RO_ARCHIVE_PDB_PROBLEMS,baClr(29).aln().a(ANSI_GREEN).an('=',3).aa(' ',id6,':',cn,' ',txt.getFile(),' ').an('=',3).aln(ANSI_RESET));
                            saveProblem(F_RO_ARCHIVE_PDB_PROBLEMS,_baLog[LOG_FILES].delBlanksL());
                        }
                        //baOut("p=").a(p).a(' ').a0(p.getResType()).aln();
                        if(p.countRes()==0){
                            saveProblem(F_RO_ARCHIVE_PDB_PROBLEMS,sbErr().aa("NO_AMINO ",id4,':',cn," isSubset=",p.isSubset(),' ').aFromDashTo(p.subsetStart(),p.subsetEnd()).a(' '));
                            continue;
                        }
                        p.sequenceInStrapFormat(SEQW_DIRECTLY_INTO_SB|SEQW_SIDE_CHAIN_ATOMS|SEQW_CALPHA_ATOMS,sb,null);
                        continue nextID;
                    }
                }
                saveProblem(-F_RO_ARCHIVE_PDB_PROBLEMS,sbErr().aa(id4," CHAIN NOT IN chainsByType ",id6));
            }
            if(i%100==0) baOut(ANSI_CURSOR_LEFT+ANSI_CLR_FROM_CURSOR+"Processing ").format10(i,0).a('/').format10(sze(v4),0).aln();
            if(i%1000==0) baOut(ANSI_YELLOW+"Total Free  Maximum memory (max+free-total) [kb]:")
                              .format10((int)(getRuntime().totalMemory()>>>10),16)
                              .format10((int)(getRuntime().freeMemory()>>>10),16)
                              .format10((int)(getRuntime().maxMemory()>>>10),20)
                              .format10((int)((getRuntime().maxMemory()-getRuntime().totalMemory()+getRuntime().freeMemory())>>>10),20)
                              .aln(ANSI_RESET);

            baOut("id=").aa(id4," time=",timeOn()-time).aln();
            wrteOS(os,sb.aBYTES('#',STRAPFORMAT_HASH_ID,'\n'));
            clr(sb);
        }
        closeStrm(os);
        mkPdbIndex();
    }
    private static void mkPdbIndex()throws IOException{
        //final String LOGPFX=ANSI_MAGENTA+"AASetup.mkPdbIndex "+ANSI_RESET;
        if(fileNewr(iFile(F_RO_ARCHIVE_PDB|FILEC_DOT_INDEX),iFile(F_RO_ARCHIVE_PDB))) return;
        final BA LINE=new BA(99);
        final ChInStream cis=new ChInStream(iFile(F_RO_ARCHIVE_PDB),4048);
        int begin=-1,xyz=-1,pos=0;
        String id=null;
        final Map map=new HashMap();
        while(cis.readLine(clr(LINE))){
            final byte[]T=LINE.bytes();
            final int e=LINE.end();
            if(T[0]=='#'){
                if(T[1]=='<'){
                    begin=pos;
                    id=wordAt(T,2);
                }else if(id!=null&&T[1]==STRAPFORMAT_HASH_ID){
                    final Object key=io(hashCdUC(id)%RO_ARCHIVE_PDB_SECTIONS);
                    BA sb=(BA)map.get(key);
                    if(sb==null) map.put(key,sb=new BA(99)); else sb.a(' ');
                    if(xyz<0) xyz=pos;
                    sb.aa(id,'=').aHex(begin).a(',').aHex(xyz-begin).a(',').aHex(pos-xyz);
                    id=null;
                    xyz=begin=-1;
                }
            }else if(xyz==-1 && T[0]=='x'&&strStarts(0,SCRIPT_xyz,T)){
                xyz=pos;
            }
            pos+=e+1;
        }
        final OutputStream os=fOutStrm(0,iFile(F_RO_ARCHIVE_PDB|FILEC_DOT_INDEX));
        FORi(0,RO_ARCHIVE_PDB_SECTIONS){
            final BA sb=(BA)map.get(io(i));
            if(sb!=null) sb.writeTxt(os);
            os.write('\n');
        }
        closeStrm(os);
    }
/* <<< PDB  <<< */
/* ---------------------------------------- */
/* >>> Catalytic Site Atlas >>> */
#if 0
/*
      s6stdout -formatdb -CSA=/local/bioinf/DB/CSA_2_0_121113.txt.sorted
      s6stdout -testCSA=pdb1sbc.ent -CSA=/local/bioinf/DB/CSA_2_0_121113.txt

      typePdb(){
      wget -O /dev/stdout  http://files.rcsb.org/download/$1.pdb.gz | zcat | fgrep $2
      fgrep -i -A 4 $1 /local/bioinf/DB/pdb_seqres.txt
      }
*/
#endif //0

/* <<< Catalytic Site Atlas  <<< */
/* ---------------------------------------- */
/* >>>  >>> */
}
