/*  s8stdout -ckURLs  */
/* --- ~/strapSrc/src/noGui/charite/christo/rsc_Customize_Prot.rsc  hrefEnsemblUrl()  --- */
/* baLog(LOG_CK_URL).a(ANSI_INVERSE+"addAlternativeUrl "+ANSI_RESET+" "); */
/* addAlternativeUrl(addPfx(URL_PFX_DOWNLOAD_PDB,"3rec.pdb.gz"),addPfx(URL_PFX_DOWNLOAD_PDB,"xxxx.pdb.gz")); */
/* TODO  NCBI_NEIGHBOR: GO:   UNIREF90: IPI:  http://www.rcsb.org/pdb/images/1iav_bio_r_80.jpg HTTP 301 */
#define CHECKURLS_PDB 1
#define CHECKURLS_PDB_OBSOLETE 2
#define CHECKURLS_SEQUENCE_DB 3
#define CHECKURLS_SEQ_DB_FETCH 4
#define CHECKURLS_HREF 5
#define CHECKURLS_URLS 6
#define CHECKURLS_PREDICTION 7
#define CHECKURLS_BLAST 8
#define CHECKURLS_BLAST3D 9
@*SARRAYeq_CHECKURLS_NAME
 P PDB=CHECKURLS_PDB
 O Obsolete PDB=CHECKURLS_PDB_OBSOLETE
 S Sequence-DB=CHECKURLS_SEQUENCE_DB
 F Sequence-DB-Fetch=CHECKURLS_SEQ_DB_FETCH
 H Customizable URLs=CHECKURLS_HREF
 U URLs=CHECKURLS_URLS
 D Prediction=CHECKURLS_PREDICTION
 B BLAST=CHECKURLS_BLAST
 3 BLAST 3D=CHECKURLS_BLAST3D
*@
@*SARRAY_CHECKURLS_PDBIDS
 3rec RecA protein
 163d _pdbx_struct_assembly.id
*@
@*SARRAY_CHECKURLS_MATCHES_STRUCTURE_FILE
 ^REMARK\b|^_pdbx_struct_assembly.id
*@
@*SARRAY_CHECKURLS_DBFETCH
 UNIPROT:P25787~^AC |
 UNIPROT:CLPQ_BACSU~^AC |
 EMBL:X56734~^AC |
 EMBL:AL450380~^AC |
 NCBI:AAA63473~^AC |
 NCBI:213940~LOCUS
*@
@*SARRAY_CHECKURLS_MATCHES_BLASTXML
 <hit number|<Hit_hsps>
*@
@*~RSC_CHECKURLS_MATCHES_BLAST_SEQ
 RNGHVVIAGDGQATLGNTVMKGNVKKVRR
*@
@*SARRAYeq_CHECKURLS_SEQS_FOR_PREDICT
 LLKSAEREKEMASMKEEFTRLKEALEKSEARRKE=INTRFC_PredictionFromSequenceCoiledCoil
 VFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVADLLLVFILPFAV=INTRFC_PredictionFromSequenceTmHelix
 KLVQIEYALAAVAAGAPSVGIKATNGVVLATEKKQKSILYDEQ=
*@

@*RSC_CHECKURLS_MSG_PARA1
 The following checks will be performed:
*@
@*RSC_CHECKURLS_MSG_PARA2
 All checks will be performed unless one particular is selected with the option CPP_UNSTRINGIZE(PAR_ckURLs)=<character>.
*@

private static boolean _checkURLsYesNo(int id){
    final String p=prprty(iPAR_ckURLs),n=iConst(SARRAYeq_CHECKURLS_NAME,id);
    baOut(ANSI_INVERSE).a(n).aln(ANSI_RESET);
    if(sze(p)==0||p.indexOf(chrAt(0,n))>=0)return true;
    putln("Not selected");
    return false;
}
@SuppressWarnings("all")
REFLECTION_PUBLIC_STATIC_VOID checkURLs(){
    final BA ba=baLog(LOG_CK_URL).aln(rsc(RSC_CHECKURLS_MSG_PARA1)).joinLns(arry(SARRAYeq_CHECKURLS_NAME)).aln().aln(rsc(RSC_CHECKURLS_MSG_PARA2));
    final int save=isPrprty(IS_CACHE_READ)?1:-1;
    setPrprtyB(-IS_CACHE_READ);
    while(_internetOK==INT_NAN) sleepMS(222);
    if(_checkURLsYesNo(CHECKURLS_PDB)){
        for(String url:custSettings(CUSTOM_pdbFiles)){
            for(String id_match:arry(SARRAY_CHECKURLS_PDBIDS)){
                final String[]ss=splitTkns(id_match);
                _checkUrl(strplc(CHECKURLS_PDB,"$id23",ss[0].substring(1,3),strplc(0,"$id",ss[0],url)),ss[1]);
            }
        }
    }
    if(_checkURLsYesNo(CHECKURLS_PDB_OBSOLETE)){
        for(String structuresSlash:custSettings(CUSTOM_obsoletePdbFiles)){
            final Collection v=new ArrayList();
            for(String sub:arry(SARRAY_PDB_FTP_SECTIONS)) v.add(s(new BA(99).aa(structuresSlash,sub).PDB_APPEND_13_1234("200d",sub.indexOf("/mmCIF/")>0)));
            _checkUrl(v,arry(SARRAY_CHECKURLS_MATCHES_STRUCTURE_FILE));
        }
    }
/* wget -O - 'http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/EMBL/X56734/annot?style=raw' | nl   Hier fehlt die DNA sequence */
/* wget -O - 'http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/EMBL/X56734/embl?style=raw' | nl */
    if(_checkURLsYesNo(CHECKURLS_SEQ_DB_FETCH)){
        for(String line:arry(SARRAY_CHECKURLS_DBFETCH)){
            final String ss[]=splitTkns('~',line),id=ss[0];
            final int colon=id.indexOf(':');
            final File f=dbfetchId2file(id.substring(0,colon),id.substring(colon+1),"");
            ba.aa(id,' ');
            _checkUrlAndTxt(readBytes(f),ss[1]);
        }
    }
    ba.aln();
#undef FORMAT
    if(_checkURLsYesNo(CHECKURLS_URLS)){
        ROFi0(F_DIR_URL_ZZZ){
            final String check=iConst(SARRAYeq_URLS_CHECK,i);
            if(check!=null){
                final int tilde=check.indexOf('~');
                _checkUrl(addSfx(check.substring(0,tilde),iUrl(i)),check.substring(tilde+1));
            }
        }
    }
    if(_checkURLsYesNo(CHECKURLS_HREF)){
        for(int t:new int[]{HREF_PROTEIN_FILE,HREF_PLAIN_TEXT,HREF_WEB_LINK}){
            for(String d:hrefGetDB(t)[0]){
                if(sze(d)==0)continue;
                final String[]check=splitTkns('~',mapStrStr(MAPSTR_HREF_CHECK).get(d));
                if(check.length==2){
                    final String url=hrefToUrlString(t,addPfx(d,check[0]));
                    if(setNoClr(189).add(url)) _checkUrl(url,check[1]);
                }else if(check.length!=0) ba.aa(RED_WARNING,"checkURLs",' ',iConst(SARRAYeq_CHECKURLS_NAME,CHECKURLS_HREF),' ',d,' ').aln(check);
            }
        }
    }
    IF_MEIN_DEBUG(if(INTRFC_PredictionFromSequenceSecstru!=INTRFC_PredictionFromSequenceCoiledCoil-1 || INTRFC_PredictionFromSequenceSecstru!=INTRFC_PredictionFromSequenceTmHelix-2) assrt());;
    // PredictionFromSequenceImpl_computeInc
    if(_checkURLsYesNo(CHECKURLS_PREDICTION)){
        FORi(INTRFC_PredictionFromSequenceSecstru,INTRFC_PredictionFromSequenceTmHelix+1){
            for(int c:sclImplementationsForInterface(i)){
                final PredictionFromSequence p=(PredictionFromSequence)mkInstanceBid(c);
                ba.aa(ANSI_INVERSE,shrtClasNamOrAlias(p)).aln(" ...");
                p.setGappedSequences(new byte[][]{toByts(iConst(SARRAYeq_CHECKURLS_SEQS_FOR_PREDICT,i))});
                final byte[][]pred=p.getPrediction();
                ba.aa(ANSI_INVERSE,shrtClasNamOrAlias(p)).aln(ANSI_RESET).aln(iConst(SARRAYeq_CHECKURLS_SEQS_FOR_PREDICT,i));
                if(sze(pred)==0||sze(pred[0])==0) ba.a(RED_FAILED).aln("NULL");
                else{
                    ba.aa(strchr(i==INTRFC_PredictionFromSequenceCoiledCoil?'X':'H',pred[0])>=0?GREEN_SUCCESS:RED_FAILED,' ')
                        .a(" Output=")
                        .aFilter(LETTR|FILTER_NO_MATCH_TO_SPACE,pred[0]).formatSize(sze(pred[0])).aln();
                }
            }
        }
    }
    {
        final String Q=toStrgTrim(rsc(RSC_CHECKURLS_MATCHES_BLAST_SEQ)),par=prprty(iPAR_ckURLs);
        if(!withGui() && sze(par)==0){
            putln(ANSI_FG_GRAY+"Press ENTER to continue with Sequence Blast"+ANSI_RESET);
            try{System.in.read();}catch(Exception ex){}
        }
        final int end=ba.end(),bb[]={SEQ_BLASTER_WEB_EBI,SEQ_BLASTER_WEB_NCBI};
        if(strchr('n',par)>=0) bb[0]=-1;
        if(strchr('e',par)>=0) bb[1]=-1;

        if(_checkURLsYesNo(CHECKURLS_BLAST)){
            for(int i:bb){
                if(i>=0){
                    final SequenceBlaster b=new SequenceBlaster(0,i,Q,"uniprot",Q);
                    ba.aa("\n"+ANSI_INVERSE+"BLAST UniProt ",shrtClasNamOrAlias(b)," ...").aln(ANSI_RESET);
                    b.computeBlast();
                    ba.a(' ')
                        .aFile(wrte(file(NEWFILE_TMPFILE_WITH_EXT,new BA(99).aa('_',shrtClasNamOrAlias(b),".uniprot.xml")),b.blastXML()))
                        .aln();
                    final BA xml=b.blastXML();
                    ba.a(" Output=").aFT(xml,0,1000).aln(" ...");
                    _checkUrlAndTxt(xml,arry(SARRAY_CHECKURLS_MATCHES_BLASTXML));
                }
            }
        }
        if(_checkURLsYesNo(CHECKURLS_BLAST3D)){
            for(int i:bb){
                if(i>=0){
                    final Protein p=new Protein();
                    p.setResidueType(Q);
                    ba.aa("\n"+ANSI_INVERSE+"BLAST PDB "," ...").aln(ANSI_RESET);
                    blast4id(((BLAST4ID_DB_RECURS_PDB<<BLAST4ID_DB_SHIFT)|(i<<BLAST4ID_BLASTER_SHIFT)),spp(p),null);
                    final Object v=p.getProperty(PROTEINO_RECURS_PDB_RESULT);
                    FORj(0,sze(v)) ba.aa(iThEl(RECURS_PDB_PDBID,iThEl(j,v)),' ');
                    ba.aln(sze(v)>5?GREEN_SUCCESS:RED_FAILED);
                }
            }
        }
        baOut(null).aFT(ba,end,MAX_INT).aln();
    }
    setPrprtyB(IS_CACHE_READ*save);
    ba.aln().aln(GREEN_DONE).an('=',80).aln();
    REFLECTION_RETURN;
}
private static void _checkUrl(Object url,Object pattern){/*X pattern is String or String[] */
    BA txt=null;
    baLog(LOG_CK_URL).aa("checkURLs",' ').join(JOINGLUE_LF_SPC4,url=oo(url)).a(' ').send();
    final int time=timeOn();
    for(Object u:oo(url)) if(null!=(txt=readBytes(u))) break;
    baLog(LOG_CK_URL).formatSize(sze(txt)).aa(' ',timeOn()-time," ms ").send();
    _checkUrlAndTxt(txt,pattern);
}
private static void _checkUrlAndTxt(BA txt,Object pattern){
    if(sze(txt)<2){
        baLog(LOG_CK_URL).aa("checkURLs",": NO_TEXT_RETURNED ").aln(RED_FAILED);
        return;
    }
    //java.util.regex.Pattern.matches(s,Pattern_DOTALL)
    boolean success=false;
    if(strEnds(SFX_CHECKURLS_BIGGER_THAN,pattern)) success=sze(txt)>xatoi(pattern)*1024;
    else{
        for(String s:toStrgArray(0,pattern)){
            if(success=java.util.regex.Pattern.compile(s,Pattern_MULTILINE).matcher(txt).find(0)) break;
        }
    }
    //pattern instanceof String && chrAt(0,pattern)=='^'?strstr(STR_BEGIN_OF_LINE,pattern.substring(1),txt):
    baLog(LOG_CK_URL).a(' ');
    if(success) baLog(LOG_CK_URL).aln(GREEN_SUCCESS);
    else baLog(LOG_CK_URL).aa("DOES_NOT_CONTAIN ",pattern,' ').aln(RED_FAILED);
}
