// -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // Copyright (C) 2007 Author: Fathi Elloumi. // This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. // You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #include #include #include #include #include "utilities2.h" //-------------------------------------------------------------------------------------- void concat(char * tab, char c1, int nbpoints, char c2) { //char * tab; int i; //tab = new char[nbpoints+3]; //tab = NULL; //if (tab==NULL) throw(Cherror()); tab[0]=c1; tab[nbpoints+1]=c2; tab[nbpoints+2]='\0'; for(i=1;it2) return t1 ; else return t2; } int matching(char * tab,char * s1, char * s2) { //char * tab; int t1,t2,min, max,i; bool stop; t1=strlen(s1); t2=strlen(s2); if ((t1==0) || (t2==0) ) { return -1;}; if (t1>t2) {min=t2;max=t1; }else {min=t1;max=t2; } ; //tab = NULL; //tab = new char[max+1]; //if (tab==NULL) throw(Cherror()); stop=false;i=0; while( (it2) while (i(overlap)/100); // printf("Coverage: %f",coverage); result = fopen(sim,"w") ; //fprintf(result,"Pattern\tLength\tDensity\tsupport\ttotal\tzs-sup"); for (i=1;i<=numpat;i++) fprintf(result,"\tpatt%i ",i); fprintf(result,"Pattern\tLength\tDensity\tsupport\ttotal\tzs-sup\tzs-tot"); for (i=1;i<=numpat;i++) fprintf(result,"\tpatt%i ",i); fprintf(result,"\n"); revcomp = fopen(rev,"w") ; //fprintf(revcomp,"Pattern\tLength\tDensity\tsupport\ttotal\tzs-sup\t#revcomp"); fprintf(revcomp,"Pattern\tLength\tDensity\tsupport\ttotal\tzs-sup\tzs-tot\t#revcomp"); fprintf(revcomp,"\n"); // copy the known sites to array SITES ASSUMED ORDERED -------------------------------------------------- // fscanf(known,"%i %i",&klen,&knnbre); // length and number of sites fscanf(known,"%s %s %s %s %s %s %s %s",s1,s2,s3,s4,s5,s6,s7,s8); // printf("%s %s %s %s %s %s %s %s\n",s1,s2,s3,s4,s5,s6,s7,s8); while ( !feof(known) ) { fscanf(known,"%s %i %i %i %i %Le %Le",kpat,&klen,&kden,&ksup,&knnbre,&eproba,&epvalue); if ( feof(known) ) break; kntab= new elem[knnbre+1]; knboo= new bool[knnbre]; // loading the known sites i=1; while ( i<=knnbre ) { fscanf(known,"%i %c %i %c",&seq,&c1,&pos,&c2); kntab[i-1].seq=seq; kntab[i-1].pos=pos; i++; }; kntab[knnbre].seq=1000; // fprintf(result,"%s\t%i\t%i\t%i\t%i\t%Le",kpat,klen,kden,ksup,knnbre,eproba); // fprintf(revcomp,"%s\t%i\t%i\t%i\t%i\t%Le",kpat,klen,kden,ksup,knnbre,eproba); fprintf(result,"%s\t%i\t%i\t%i\t%i\t%Le\t%Le",kpat,klen,kden,ksup,knnbre,eproba,epvalue); fprintf(revcomp,"%s\t%i\t%i\t%i\t%i\t%Le\t%Le",kpat,klen,kden,ksup,knnbre,eproba,epvalue); // -------------------------------------------------------------------------------------------------------// // For each predicted pattern read and process the predicted sites predic = fopen(best,"r") ; ///////////////////////////////////// opening if (predic==NULL) {printf("Error in reading some files");return 0;}; if ( feof(predic) ){printf("Error : input file %s is empty",best);return 0;}; fscanf(predic,"%s %s %s %s %s %s %s %s",s1,s2,s3,s4,s5,s6,s7,s8); // printf("%s %s %s %s %s %s %s %s\n",s1,s2,s3,s4,s5,s6,s7,s8); revnumber=0; comptage=0; while ( !feof(predic) ) { fscanf(predic,"%s %i %i %i %i %Le %Le",epat,&elen,&eden,&esup,&etot,&eproba,&epvalue); if ( feof(predic) ) break; comptage++; ///---------------rev comp test --------------------------- if ( klen==elen && kden==eden && ksup==esup && knnbre==etot) { if (testrev(kpat,epat,klen)) revnumber= comptage; }; ///-------------------------------------------------------- prboo= new bool[etot]; for (i=0;ikntab[j].seq) j++; jprec=j; // maj while (seq==kntab[j].seq) { // check overlapping // printf("---Check:\n"); // printf("seq %i pos %i posinf %i possup %i ", seq, pos ,kntab[j].pos - static_cast (floor((klen*0.75)+0.5))+klen-elen,kntab[j].pos + static_cast (floor((klen*0.75)+0.5)) ); if ( pos >= kntab[j].pos - static_cast (floor((klen*coverage)+0.5))+klen-elen && pos <= kntab[j].pos + static_cast (floor((klen*coverage)+0.5)) ) { knboo[j]=true;prboo[i-1]=true; }; /*else { if ( pos+elen-1 >= kntab[j].pos+ static_cast (floor((elen*0.25)+0.5))-1 && pos+elen-1 <= kntab[j].pos+elen-1 ) { knboo[j]=true;prboo[i-1]=true; }; };*/ j++; }; seqprec=seq; //maj i++; }; stp=0;sfp=0; //printf("next\n"); //for (i=0;i (stp) /(stp+sfn); sppv=static_cast (stp)/(stp+sfp); sasp=(ssn+sppv)/2; // spc= static_cast (stp) /(stp+sfn+sfp); fprintf(result,"\t%f",sasp);delete [] prboo; }; fprintf(revcomp,"\t%i\n",revnumber);fprintf(result,"\n"); fclose(predic);delete [] knboo; delete [] kntab; }; //new fclose(result);fclose(known);fclose(predic);fclose(revcomp); } catch (exception &e) { printf("exception: %s\n",e.what()) ; } return 1; } int prostat(double ppa,double ppc,double ppg,double ppt,char *fin, char *fpro, char *fstat) { char nucleotide[4]={'A','C','G','T'}; char notpresent[4]={'B','D','H','V'}; char codes[4][4]={{'A','M','R','W'},{'M','C','S','Y'},{'R','S','G','K'},{'W','Y','K','T'}}; double tabproba[4]; // ={0.196432,0.304403,0.313149,0.186016}; typedef struct {double f; int position;} enreg; enreg tabtrie[4]; double ic,freq,vlog; FILE *inlogo,*outic,*outstat; char ligne[101]; int tabfreq[4][100]; char epat[41]; //,chaine[41],c1,c2; char s1[10],s2[10],s3[10],s4[10],s5[10],s6[10],s7[10]; int i,j,k,c,v; int elen,eden,esup,etot; long double eproba,epvalue; tabproba[0]=ppa;tabproba[1]=ppc;tabproba[2]=ppg;tabproba[3]=ppt; inlogo = fopen(fin,"r") ; outic = fopen(fpro,"w") ; outstat = fopen(fstat,"w") ; if (inlogo==NULL) {printf("Error in reading input file");return 0;}; if ( feof(inlogo) ){printf("Error : input file is empty");return 0;}; fprintf(outstat,"Pattern\tConsensus\tLength\tDensity\tSupport\tTotal\tZssup\tZstot\tMIC\n"); // fprintf(outstat,"Pattern-Consensus\tLen-Den\tSup-Tot\tZs-sup\tZs-tot\tMIC\n"); // traitement du fichier logo pour extraire la matrice des frequences du pattern while ( !feof( inlogo) ) { fscanf(inlogo,"%s %s %s %i %s %i %s %i %s %i %s %Le %s %Le",s1,epat,s2,&elen,s3,&eden,s4,&esup,s5,&etot,s6,&eproba,s7,&epvalue); if ( feof(inlogo) ) break; // fprintf(outmatrix,"Pattern %s %i %i %i %i %i %Lf \n",epat,elen,eden,esup,etot,esbd,epvalue); fprintf(outic,"Pattern %s Length %i Density %i Support %i Total %i Zssup %Le Zstot %Le \n",epat,elen,eden,esup,etot,eproba,epvalue); // mise a zero for (k=0;k<4;k++)for (c=0;c<100;c++)tabfreq[k][c]=0; i=1; while ( i<=etot ) { fscanf(inlogo,"%s",ligne); for (j=0;j=i;v--) tabtrie[v]=tabtrie[v-1]; tabtrie[i].f=freq;tabtrie[i].position=k; }; /* printf("freq %f position %i", tabtrie[0].f,tabtrie[0].position); printf("freq %f position %i", tabtrie[1].f,tabtrie[1].position); printf("freq %f position %i", tabtrie[2].f,tabtrie[2].position); printf("freq %f position %i", tabtrie[3].f,tabtrie[3].position); */ if ( (tabtrie[0].f>0.5) && (tabtrie[0].f>=(2*tabtrie[1].f)) ) ligne[j]=nucleotide[tabtrie[0].position]; else if ( (tabtrie[0].f<=0.5) && (tabtrie[1].f<=0.5) && ((tabtrie[0].f+tabtrie[1].f)>0.75) ) ligne[j]=codes[tabtrie[0].position][tabtrie[1].position]; else if ( (tabtrie[3].f<=0.0) && (tabtrie[2].f>0.0) ) ligne[j]=notpresent[tabtrie[3].position]; else ligne[j]='N'; }; // end for ligne[elen]='\0'; // affichage for (k=0;k<4;k++) { fprintf(outic,"%c ",nucleotide[k]); for (j=0;ji;j--) T[j]=T[j-1]; strcpy(T[i].pat,epat); strcpy(T[i].listpos,elistpos); T[i].len=elen; T[i].den=eden; T[i].sup=esup; T[i].tot=etot; T[i].proba=eproba; T[i].pvalue=epvalue; }; }; // while fclose(infresult); return 1; } //-------- int besttot(elt T[], int & nbelts,int taille,char *fich) { FILE *infresult; char s1[20],s2[20],s3[20],s4[20],s5[20],s6[20],s7[20],s8[25]; char epat[41],elistpos[25000]; int i,j,pos; int elen,eden,esup,etot; long double eproba,epvalue; infresult= fopen(fich,"r") ; if (infresult==NULL) {printf("Error in reading file : %s",fich);return 0;}; if ( feof(infresult) ){printf("Error : input file %s is empty",fich);return 0;}; fscanf(infresult,"%s %s %s %s %s %s %s %s",s1,s2,s3,s4,s5,s6,s7,s8); //printf("%s %s %s %s %s %s %s %s %s\n",s1,s2,s3,s4,s5,s6,s7,s8,s9); while ( !feof( infresult) ) { fscanf(infresult,"%s %i %i %i %i %Le %Le %s",epat,&elen,&eden,&esup,&etot,&eproba,&epvalue,elistpos); // printf("%s %s %i %i %i %i %Le %Le %s",epat,emat,&elen,&eden,&esup,&etot,&eproba,&epvalue,elistpos); if ( feof(infresult) ) break; if (nbelts==0)//if1 { nbelts=1; strcpy(T[0].pat,epat); strcpy(T[0].listpos,elistpos); T[0].len=elen; T[0].den=eden; T[0].sup=esup; T[0].tot=etot; T[0].proba=eproba; T[0].pvalue=epvalue; } else //if1 if (epvalue <= T[nbelts-1].pvalue )//if2 { if (nbelts < taille) //if3 { strcpy(T[nbelts].pat,epat); strcpy(T[nbelts].listpos,elistpos); T[nbelts].len=elen; T[nbelts].den=eden; T[nbelts].sup=esup; T[nbelts].tot=etot; T[nbelts].proba=eproba; T[nbelts].pvalue=epvalue; nbelts++; };//if3 } else //if2 { i=0; while (epvalue <=T[i].pvalue) i++; if (nbelts==taille) pos=taille-1; else { pos=nbelts; nbelts++; }; for (j=pos;j>i;j--) T[j]=T[j-1]; strcpy(T[i].pat,epat); strcpy(T[i].listpos,elistpos); T[i].len=elen; T[i].den=eden; T[i].sup=esup; T[i].tot=etot; T[i].proba=eproba; T[i].pvalue=epvalue; }; }; // while fclose(infresult); return 1; } //--------- int pospatt(char *fin,char *fpos,char *flog) { FILE *infrev,*infinfo,*inftot, *outmatrix,*infresult,*outlogo; //char * tseq; char * tnum; //char ligne[500],lignum[50] char epat[41],chaine[41],c1,c2; char s1[20],s2[20],s3[20],s4[20],s5[20],s6[20],s7[20],s8[25]; /// typedef char tligne[500]; typedef char tlignum[50]; tligne * tl; tlignum * tlg; int * tsize; //// int i,j,nbre,vtrash; int elen,eden,esup,etot,seq,pos,vraipos; long double eproba,epvalue; infrev = fopen("frev.txt","r") ; infinfo = fopen("finfo.txt","r") ; inftot = fopen("ftot.txt","r") ; infresult = fopen(fin,"r") ; if ((infrev==NULL)||(infinfo==NULL)||(inftot==NULL)||(infresult==NULL)) {printf("Error in reading some files");return 0;}; if ( feof(infresult) ){printf("Error : input file is empty");return 0;}; outmatrix = fopen(fpos,"w") ; outlogo = fopen(flog,"w") ; // // prepa tableau des sequences et tableau des numeros de sequences fscanf(inftot,"%i",&nbre); // tseq=new char [nbre][500];tnum=new char[nbre][50]; tl =new tligne[2*nbre];tlg =new tlignum[2*nbre];tsize= new int[2*nbre]; printf("current nbre of sequences to be scanned:%i ...\n",nbre); i=0; while ( i < (2*nbre) ) { //fscanf(infrev,"%s", tseq[i]); //fscanf(infinfo,"%i %s",vtrash, tnum[i]); fscanf(infrev,"%s", tl[i]); //printf("seq:%s\n",tl[i]); fscanf(infinfo,"%i %s %i",&vtrash, tlg[i],&tsize[i]); //printf("trash:%i info:%s size:%i\n",vtrash,tlg[i],tsize[i]); i++; }; fclose(infrev); fclose(infinfo);fclose(inftot); // fin prepa tableaux // traitement du fichier entree pour extraire la matrice du pattern // lire ligne head = labels fscanf(infresult,"%s %s %s %s %s %s %s %s",s1,s2,s3,s4,s5,s6,s7,s8); // printf("%s %s %s %s %s %s %s %s\n",s1,s2,s3,s4,s5,s6,s7,s8); while ( !feof( infresult) ) { fscanf(infresult,"%s %i %i %i %i %Le %Le",epat,&elen,&eden,&esup,&etot,&eproba,&epvalue); if ( feof(infresult) ) break; // fprintf(outmatrix,"Pattern %s %i %i %i %i %i %Lf \n",epat,elen,eden,esup,etot,esbd,epvalue); fprintf(outmatrix,"Pattern %s Length %i Density %i Support %i Total %i Zssup %Le Zstot %Le \n",epat,elen,eden,esup,etot,eproba,epvalue); fprintf(outlogo,"Pattern %s Length %i Density %i Support %i Total %i Zssup %Le Zstot %Le \n",epat,elen,eden,esup,etot,eproba,epvalue); i=1; while ( i<=etot ) { fscanf(infresult,"%i %c %i %c",&seq,&c1,&pos,&c2); if (seq%2==0) vraipos=pos-tsize[seq]; else vraipos= -1*(pos+1); fprintf(outmatrix,"ns:%3i ps:%3i seq:%20s pos:%4i ",seq,pos,tlg[seq],vraipos); for (j=pos;j