// -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- // Copyright (C) 2007 Author: Fathi Elloumi. // This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. // You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #include "slist.h" #include int best(long double T[], int & nbelts,int taille,char *fich) { FILE *infresult; char s1[20],s2[20],s3[20],s4[20],s5[20],s6[20],s7[20],s8[25]; char epat[41],elistpos[20000]; int i,j,pos; int elen,eden,esup,etot; long double eproba,epvalue; infresult= fopen(fich,"r") ; if (infresult==NULL) {printf("Error in reading file : %s",fich);return 0;}; if ( feof(infresult) ){printf("Error : input file %s is empty",fich);return 0;}; fscanf(infresult,"%s %s %s %s %s %s %s %s",s1,s2,s3,s4,s5,s6,s7,s8); //printf("%s %s %s %s %s %s %s %s %s\n",s1,s2,s3,s4,s5,s6,s7,s8,s9); while ( !feof( infresult) ) { fscanf(infresult,"%s %i %i %i %i %Le %Le %s",epat,&elen,&eden,&esup,&etot,&eproba,&epvalue,elistpos); // printf("%s %s %i %i %i %i %Le %Le %s",epat,emat,&elen,&eden,&esup,&etot,&eproba,&epvalue,elistpos); if ( feof(infresult) ) break; if (nbelts==0)//if1 { nbelts=1; T[0]=eproba; } else //if1 if (eproba <= T[nbelts-1] )//if2 { if (nbelts < taille) //if3 { T[nbelts]=eproba; nbelts++; };//if3 } else //if2 { i=0; while (eproba <=T[i]) i++; if (nbelts==taille) pos=taille-1; else { pos=nbelts; nbelts++; }; for (j=pos;j>i;j--) T[j]=T[j-1]; T[i]=eproba; }; }; // while fclose(infresult); //////////// return 1; } //// function MAIN ///////////////////////////////////////////////////// int main(int nbre, char *arg[]) { char nucleotide[4]={'A','C','G','T'}; long double tabproba[4]={0.328008,0.167476,0.193845,0.310671}; // bacillus subtilis nucleotides frequencies // long double tabproba[4]={0.328008,0.167476,0.193845,0.310671}; char fich[4][9]={"resA.txt","resC.txt","resG.txt","resT.txt"}; long double pa,pc,pg,pt,totpatt,tab[2000];//------------------------/// maj char reponse; char chtotbd[7]; FILE * in1, *in2,*outrec; int i,j,nbretotal,k,vsup,vlen,TOTBD,taille_seqs,indice,nombre,voutput; int tabtailles[100]; tabtablist super; Slist matrice_lst_patterns[4][4]; Slist L; bool finish,stop; time_t start,end; double dif; char namefile[20],buffer[33]; try { //----------------------- parameters ----------------------------- if ((nbre!=5) && (nbre!=7)&& (nbre!=10) && (nbre!=12)) { printf("Error: Invalid number of parameters\n"); printf("\nSYNTAX: search_best_random_scores minsupport maxlength [stats ][output ] \n"); printf("\nDEFAULTS PARAMETERS: [stats with nucleotides Bacillus Subtilis probs] [output=40]"); return 0; }; stop=false; if ((nbre>=5) && ((strcmp(arg[1],"minsupport")!=0) || (strcmp(arg[3],"maxlength")!=0))) {printf("Error: Invalid key words: should be -minsupport- or -maxlength-\n"); return 0;}; ////39 if ( (nbre==7) && (strcmp(arg[5],"output")!=0)) {printf("Error: Invalid key word: should be -output-\n"); return 0;}; if ( (nbre>=10) && (strcmp(arg[5],"stats")!=0)) {printf("Error: Invalid key word: should be -stats-\n"); return 0;}; if ( (nbre==12) && (strcmp(arg[10],"output")!=0)) {printf("Error: Invalid key word: should be -output-\n"); return 0;}; vsup=atoi(arg[2]);vlen=atoi(arg[4]); if (vsup==0) vsup=2; if (vlen==0) vlen=12; if (vsup<2) {printf ("Error: Invalid value for minsupport\n");stop=true;}; if ((vlen<2) || (vlen>40)) {printf ("Error: maxlength value must be between 2 and 40\n");stop=true;}; if (stop) return 0; voutput=40; if (nbre==7) { voutput=atoi(arg[6]) ; if ((voutput<10) || (voutput>2000)) {printf ("Error: output must be between 10 and 2000\n");return 0;}; } else { if (nbre>=10) { pa=atof(arg[6]);pc=atof(arg[7]);pg=atof(arg[8]);pt=atof(arg[9]); if (((pa <=0.0) || (pa >=1.0)) ||((pc <=0.0) || (pc >=1.0)) ||((pg <=0.0) || (pg >=1.0)) ||((pt <=0.0) || (pt >=1.0))) {printf("Error: Invalid probablity values\n"); return 0;} printf ("Stats with probability values: %Lf & %Lf & %Lf & %Lf\n",pa,pc,pg,pt); tabproba[0]=pa;tabproba[1]=pc;tabproba[2]=pg;tabproba[3]=pt; if (nbre==12) { voutput=atoi(arg[11]) ; if ((voutput<10) || (voutput>2000)) {printf ("Error: output must be between 10 and 2000\n");return 0;}; }; }; }; system("cls"); printf("PARAMETERS:\n"); printf("input_promo_file: 1000 samples"); printf ("min_support: %i max_lenght:%i\n",vsup,vlen); printf ("Stats with probability values: %Lf & %Lf & %Lf & %Lf\n",tabproba[0],tabproba[1],tabproba[2],tabproba[3]); printf ("Output number: %i \n",voutput); // printf("\nCONTINUE ? (if yes type Y):");scanf("%c",&reponse); if (reponse!='Y') {printf("\n BYE !"); return 0;}; //---------------------------------------------------------------- outrec= fopen("random-scores.txt","w"); //---------------------------------- maj fprintf(outrec,"Totpatt\tTime");for (i=1;i<=voutput;i++) fprintf(outrec,"\tzs%i ",i);fprintf(outrec,"\n"); //for (indice=1;indice<=1000;indice++) for (indice=1;indice<=1000;indice++) { strcpy(namefile,"sample"); itoa (indice,buffer,10); strcat(namefile,buffer);strcat(namefile,".txt"); printf("\n%s",namefile); /// START ........................................................................///// time (&start); j= revcomp(namefile,taille_seqs,tabtailles); // reverse complement if (j==0) { printf("Error in input file: %s\n",namefile); return 0; }; if (j==1) { printf("Error: number of sequences must be greater than 1\n"); return 0; }; if (vsup>j) { printf("Error: min_support must be less or equal than the number of sequences:%i\n",j); return 0; }; //printf("number of sequences:%i\n",j); int l,c; // for (l=0;l[2*j]; // for nucleotide A super[1] = new Listgen[2*j]; // for nucleotide C super[2] = new Listgen[2*j]; // for nucleotide G super[3] = new Listgen[2*j]; // for nucleotide T k= create(super); // creation of 4 nucleotides arrays of positions if (k==0) {printf("Error in processing file that contains sequences and their reverses"); return 0;}; totpatt=0.0; for (l=0;l<4;l++) { printf("Processing family: %c ...\n",nucleotide[l]); for (c=0;c<4;c++) { matrice_lst_patterns[l][c].join(super[l],super[c],2*j,nucleotide[l],nucleotide[c],vsup,vlen-2); // matrice_lst_patterns[l][c].affich(); }; matrice_lst_patterns[l][0].append_v2( matrice_lst_patterns[l][1]); matrice_lst_patterns[l][0].append_v2( matrice_lst_patterns[l][2]); matrice_lst_patterns[l][0].append_v2( matrice_lst_patterns[l][3]); //printf("Liste des patterns -----------------------\n"); // matrice_lst_patterns[l][0].affich(); matrice_lst_patterns[l][0].morespecific_v2(vsup,vlen-2); // printf("Statistics:...\n"); totpatt=totpatt + matrice_lst_patterns[l][0].affichjmp2(fich[l],tabtailles,j,tabproba); matrice_lst_patterns[l][0].delete_list_pattern(); }; //printf("\nTotal patterns for 4 families= %12.0Lf\n",totpatt) ; time (&end); dif = difftime (end,start); //printf ("\nRunTime = %.2lf seconds.\n", dif ); ////////////////////////////// fprintf(outrec,"%12.0Lf\t",totpatt); fprintf(outrec,"%.2lf",dif) ; nombre=0; printf ("\nStatus:"); printf ("\nFamily A: %i",best(tab,nombre,voutput,"resA.txt")); printf ("\nFamily C: %i",best(tab,nombre,voutput,"resC.txt")); printf ("\nFamily G: %i",best(tab,nombre,voutput,"resG.txt")); printf ("\nFamily T: %i",best(tab,nombre,voutput,"resT.txt")); for (i=0;i