package charite.christo.strap;
import charite.christo.*;
import java.util.*;
import static charite.christo.ChUtils.*;
import static charite.christo.strap.Strap.*;

/* http://biowulf.nih.gov/apps/bfast.html */
/* http://linusben.net/wiki/index.php?title=BFAST&redirect=no&printable=yes */

public final class ExactMatch{
    private final LocalSequenceDB _sdb;
    ExactMatch(LocalSequenceDB db) {_sdb=db;}
    public static void reduceAlphabet(byte[]aa,int f,int t,BA sb){
        int e=sb.end();
        if(t>aa.length) t=aa.length;
        final byte[]T=sb.ensureCapacity(e+t-f);
#define _ACTG ('A'|('C'<<8)|('T'<<16)|('G'<<24))
        FORi(f,t){
            final int a=aa[i];
            if(a==0) break;
            if(IS_LOWER(a))T[e++]=(byte)((_ACTG>>>((a%4)*8))&127);
        }
        sb.setEnd(e);
    }
    public static boolean search(Protein[]pp){
        boolean changed=false;
        for(LocalSequenceDB db:LocalSequenceDB.instances()) changed|=db.exactMatch()._search(pp);
        return changed;
    }

    private boolean _search(Protein[]pp){
        boolean changed=false;
        BA sb=null;
        final BA buffer=new BA(999),bufID=new BA(99);
        final Collection<LocalSequenceDB.SeqWithID>v=new ArrayList();
        final boolean[]needCompute=new boolean[pp.length];
        ROFiP0(pp.length){
            final Protein p=pp[iP];
            if(p!=null && !alreadyKnownID(p,buffer,v)){
                needCompute[iP]=true;
                if(sb==null) sb=new BA(999);
                reduceAlphabet(p.getResType(),0,MAX_INT,sb.aa(">s",iP).aln());
                sb.a('\n');
            }
        }
        if(sb!=null){
            final int time=timeOn();
            boolean ok=false;
            BA output=null;
            wrte(iFile(F_BLAT_IN),sb);
            if(prprty(iPAR_GF_SERVER_UP_PORT)!=null){
                ok=0==rtExecV(0,new Object[]{fileFindExecutable("gfClient"),"-minIdentity=100","localhost",prprty(iPAR_GF_SERVER_UP_PORT),addSfx(XMATCH_SFX_DIR,_sdb.f()),iFile(F_BLAT_IN),iFile(F_BLAT_OUT) });
                output=readBytes(iFile(F_BLAT_OUT));
            }
            if(!ok){
                final String url=s(new BA(99).a(prprty(iPAR_GF_SERVER_UP)).del('/').a("/aaGfClient.php"));
                final BA[]response=new BA[1];
                startThrd(thread_serverRspns(url,new BA(0).aWWW("fasta",sb),response));
                for(int msec=4444+sze(sb)/200; (msec-=44)>=0 && response[0]==null;) sleepMS(44);
                if(sze(output=response[0])>0){
                    ok=output.delBlanksR().end()!=output.del("\nend").end() && strchr('\t',output)>=0;
                    output.aln();
                }
                baOut(ANSI_STYLE_DL).aa(url,'?').aWWW("fasta",sb).aa(ANSI_RESET,ok?GREEN_SUCCESS:RED_FAILED).an(' ',4).formatSize(sze(output)).aln();
            }
            if(!ok){
                rtExecV(0,new Object[]{fileFindExecutable("blat"),"-minIdentity=100","-noHead","-fastMap",addSfx(XMATCH_SFX_2BIT,_sdb.f()),iFile(F_BLAT_IN),iFile(F_BLAT_OUT) });
                output=readBytes(iFile(F_BLAT_OUT));
            }
            if(output==null)  baOut(RED_FAILED).aFile(iFile(F_BLAT_OUT)).aln();
            else{
                final String[]allID=FindPdbByBlat.idsFromBlat(true,pp,output),ID_BUFFER={null};
                baOut(GREEN_SUCCESS).a("ExactMatch blat ").a(timeOn()-time).a("ms IDs=").joinSpc(allID).aln();
                final BA sbKey=new BA(222);
                baOut("LocalSequenceDB.getText pp=").a(pp.length).a('\t').joinSpc(pp).aln().aln();
                ROFiP0(pp.length){
                    if(!needCompute[iP]) continue;
                    //baOut("").a0(pp[iP].getResType()).a(' ').joinSpc(allID[iP]).aln('\n');
                    clr(bufID);
                    IF_MEIN_DEBUG(assert XMATCH_SEP_SYN_ID==XMATCH_SEP_ID);;
                    for(String id:splitTkns1(allID[iP],0,MAX_INT,XMATCH_SEP_SYN_ID,ID_BUFFER)){
                        bufID.a(id).a(XMATCH_SEP_ID);
                        v.add(new LocalSequenceDB.SeqWithID(id,pp[iP]));
                    }
                    if(bufID.del(XMATCH_SEP_ID).end()==0) bufID.a('?');
                    clr(sbKey).aa(_sdb.db(),'_',cacheKeyForSeqAsStrg(pp[iP],null));
                    IF_AA_CACHE(aaCachePut(ExactMatch.class,sbKey,bufID));;
                    UNLESS_AA_CACHE(CacheResult.putValue(CACHE_NOT_RAM,ExactMatch.class,s(sbKey),bufID));;
                }
            }
        }
        if(sze(v)>0){
            _sdb.getSeqAndAnnoTextMulti(false,toArry(v,LocalSequenceDB.SeqWithID.class),_main);/*X RUN_SEQDB_FOUND_SEQ4ID*/
        }
        return changed;
    }
/* matchSize=0, s0=9, id=13 */
/* 176 0 0 0 0 0 0 0 + s10 176 0 176 Q5PK45 176 0 176 1 176, 0, 0, */

    private boolean alreadyKnownID(Protein p,BA buffer,Collection<LocalSequenceDB.SeqWithID>v){
        final String id1=p.seqId(BLAST4ID_DB_UNIPROT);
        if(id1!=null){
            v.add(new LocalSequenceDB.SeqWithID(id1,p));
            return true;
        }else if(isPrprty(IS_CACHE_READ)){
            final BA sbKey=new BA(222).aa(_sdb.db(),'_',cacheKeyForSeq(p,null));
            final BA ids=IF_AA_CACHE(aaCacheGet(ExactMatch.class,sbKey,clr(buffer))) UNLESS_AA_CACHE(CacheResult.getValue(ExactMatch.class,s(sbKey),clr(buffer)));
            if(sze(ids)>0){
                for(String id:splitTkns(XMATCH_SEP_ID,ids)){
                    if(!"?".equals(id))  v.add(new LocalSequenceDB.SeqWithID(id,p));
                }
                return true;
            }
        }
        return false;
    }

    public boolean gffTextToProtein(Protein p,BA txt,String[]buf1){
        if(txt==null) return false;
        final byte[]T=txt.bytes();
        final int[]eol=txt.eol();
        if(eol.length<2 || T[0]!='>') {assrt(); return false;}
        final int E=txt.end();
        String id0=null;
        final String db=_sdb.db();
        if(p.getIntProperty(PROTEINI_DONE_EXACT_MATCH)==0){
            p.setIntProperty(PROTEINI_DONE_EXACT_MATCH,1);
            final int seqStart=eol[0]+1;
            int n=p.countRes();
            final byte[]aa=p.getResType();
            if(n<3) return false;
            if((aa[n-1]|32)=='x') n--;/*X  Get rid of Stop codon  */
            final int offset=strstr(STR_IC|STR_X_MATCHES_ANY,aa,T,seqStart,eol[1])-seqStart;
            if(offset<0) return false;
            for(String id:splitTkns1(T,1,eol[0],XMATCH_SEP_SYN_ID,buf1)){
                p.addSeqRef(ADD_SEQREF_BY_IDENT|offset,addPfx(db,id0=id));
            }
        }
        final int gff=eol[1]+1;
        Collection vIds=(Collection)p.getProperty(PROTEINO_vIdsOfGffText);
        if(vIds==null) p.setProperty(PROTEINO_vIdsOfGffText,vIds=new ArrayList());
        if(id0!=null && E>gff && (T[gff]=='\t'||T[gff]=='#') && adUniq(addPfx(db,id0),vIds)){
            BA sb=null;
            FORiL(2,eol.length){
                final int b=eol[iL-1]+1,e=eol[iL];
                if(e-b<4) continue;
                if(T[b]=='#'){
                    if(T[b+1]=='O' && T[b+2]=='=') p.setProperty(PROTEINO_FLAG_IF_EMPTY|P_ORGANISM_MNEMONIC,T,b+3,e);
                    if(T[b+1]=='D' && T[b+2]=='=') p.setProperty(PROTEINO_FLAG_IF_EMPTY|P_TITLE,T,b+3,e);
                }else{
                    if(sb==null) sb=new BA(txt.length()).a(SEQFEAT_PFX_DATA_SRC_EQ).aln("GFF_UNIPROT:*");
                    sb.aa(id0,'\t',db).del(':').aFT(T,b+1,eol[iL]).a('\n');
                }
            }
            if(sb!=null){
                Collection vGFF=(Collection)p.getProperty(PROTEINO_vGFF);
                if(vGFF==null) p.setProperty(PROTEINO_vGFF,vGFF=new ArrayList());
                adUniq(sb.trimSize(),vGFF);
            }
        }
        return true;
    }

#if CPP_DEACTIVATED_MAIN
/*
       cd /var/www/aa
       rm -r DB/short.fasta.d
       java -ea -Xmx555M charite.christo.strap.Strap -stdout -stderr -formatdb -localSequenceDB=UNIPROT:DB/short.fasta -GFF=UNIPROT:DB/uniprot-reviewed.gff
       fgrep  Q6GZX0 DB/short.fasta.d/hash/*
       fgrep  005R_FRG3G DB/short.fasta.d/hash/*

       java -ea charite.christo.strap.ExactMatchTest ~/m1/Q6GZX0.fasta  -localSequenceDB=UNIPROT:/var/www/aa/DB/short.fasta

       java -ea charite.christo.strap.ExactMatchTest Q6GZX0        -localSequenceDB=UNIPROT:/var/www/aa/DB/short.fasta
       java -ea charite.christo.strap.ExactMatchTest 005R_FRG3G    -localSequenceDB=UNIPROT:/var/www/aa/DB/short.fasta

       Q6GZX0|005R_FRG3G

       ==================================================================================================
       cd /var/www/aa
       rm -r DB/uniprot_sprot.fasta.gz.d
       java -ea -Xmx555M charite.christo.strap.Strap -stdout -stderr  -formatdb -localSequenceDB=UNIPROT:DB/uniprot_sprot.fasta.gz -GFF=UNIPROT:DB/uniprot-reviewed.gff
       java -ea charite.christo.strap.ExactMatchTest ~/m1/a2_DrosophilaMelanogaster.swiss -localSequenceDB=UNIPROT:/var/www/aa/DB/uniprot_sprot.fasta.gz

       P40301=PSA2_DROME
       java -ea charite.christo.strap.ExactMatchTest P40301 -localSequenceDB=UNIPROT:/var/www/aa/DB/uniprot_sprot.fasta.gz
       java -ea charite.christo.strap.ExactMatchTest ~/m1/Q6GZX0.fasta  -localSequenceDB=UNIPROT:/var/www/aa/DB/uniprot_sprot.fasta.gz

*/
    public static void main(String[]argv)throws Exception{
        setNoGui(argv);
        setPrprtyB(-IS_CACHE_READ);
        setPrprtyB(-IS_CACHE_WRITE);
        if(fileExsts(argv[0])) test1(argv);
        else test2(argv);
    }
    public static void test1(String[]argv){

        final Protein p0=newProteinInstance(0,file(argv[0])),p=new Protein();
        p.setResidueType(p0.getResType());
        ExactMatch.search(new Protein[]{p});
        baOut("IDs=").aln(p.getRefs(REFS_BY_IDENT));
        System.exit(0);/*MAIN*/
    }

    public static void test2(String[]argv){

        putln("Text="+LocalSequenceDB.getTextForID("UNIPROT:"+argv[0]));
    }

#endif //CPP_DEACTIVATED_MAIN
}
