ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 112 | Line 112
112   void convertQ64(GStr& q);
113  
114   int main(int argc, char * const argv[]) {
115 <  GArgs args(argc, argv, "QRDCl:d:3:5:n:r:o:");
115 >  GArgs args(argc, argv, "YQRDCl:d:3:5:n:r:o:");
116    int e;
117    int icounter=0; //counter for input reads
118    int outcounter=0; //counter for output reads
# Line 120 | Line 120
120        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
121        exit(224);
122        }
123 <  debug=(args.getOpt('D')!=NULL);
123 >  debug=(args.getOpt('Y')!=NULL);
124    phred64=(args.getOpt('Q')!=NULL);
125    doCollapse=(args.getOpt('C')!=NULL);
126    doDust=(args.getOpt('D')!=NULL);
# Line 155 | Line 155
155      }
156  
157    openfw(f_out, args, 'o');
158 +  if (f_out==NULL) f_out=stdout;
159    if (trashReport)
160      openfw(freport, args, 'r');
161    char* infile=NULL;
# Line 333 | Line 334
334           }
335        if (isfasta) {
336          if (prefix.is_empty()) {
337 <          fprintf(f_out, "@%s_x%d\n%s\n", qd->firstname, qd->count,
337 >          fprintf(f_out, ">%s_x%d\n%s\n", qd->firstname, qd->count,
338                          rseq.chars());
339            }
340          else { //use custom read name
341 <          fprintf(f_out, "@%s%08d_x%d\n%s\n", prefix.chars(), outcounter,
341 >          fprintf(f_out, ">%s%08d_x%d\n%s\n", prefix.chars(), outcounter,
342                       qd->count, rseq.chars());
343            }
344          }
# Line 601 | Line 602
602   //     be at coordinate 0
603  
604   bool extendMatch(const char* a, int alen, int ai,
605 <                 const char* b, int blen, int bi, int mlen, int& l5, int& l3) {
605 >                 const char* b, int blen, int bi, int mlen, int& l5, int& l3, bool end5=false) {
606   //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
607 < //GStr dbg(a);
607 > //if (debug)
608 > //  GStr dbg(b);
609   //GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(ai, mlen)).chars(), ai);
610   int a_l=ai; //alignment coordinates on a
611   int a_r=ai+mlen-1;
# Line 613 | Line 615
615   int bi_maxscore=bi;
616   int score=mlen*a_m_score;
617   int maxscore=score;
618 < int mism5score=a_mis_score-2; //increase penalty for mismatches at 5' end
618 > int mism5score=a_mis_score;
619 > if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
620   //try to extend to the left first, if possible
621   while (ai>0 && bi>0) {
622     ai--;
# Line 628 | Line 631
631     }
632   a_l=ai_maxscore;
633   b_l=bi_maxscore;
634 + //if (debug) GMessage("  after l-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
635   //now extend to the right
636   ai_maxscore=a_r;
637   bi_maxscore=b_r;
# Line 661 | Line 665
665     }
666    a_r=ai_maxscore;
667    b_r=bi_maxscore;
668 <  if (maxscore>=a_min_score && (alen-a_r<3 || blen-b_r<3 || a_l<2 || b_l<2)) {
669 <     if (a_l<alen-a_r-1) {
668 >  int a_ovh3=alen-a_r-1;
669 >  int b_ovh3=blen-b_r-1;
670 >  int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
671 >  int mmovh5=(a_l<b_l)? a_l : b_l;
672 >  //if (debug) GMessage("  after r-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
673 >  if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
674 >     if (a_l<a_ovh3) {
675          //adapter closer to the left end (typical for 5' adapter)
676          l5=a_r+1;
677          l3=alen-1;
# Line 674 | Line 683
683          }
684       return true;
685       }
686 +  //do not trim:
687 +  l5=0;
688 +  l3=alen-1;
689    return false;
690   }
691  
# Line 725 | Line 737
737   }
738  
739   bool trim_adapter5(GStr& seq, int&l5, int &l3) {
740 + //if (debug) GMessage("trim_adapter5 on: %s\n", seq.chars());
741   int rlen=seq.length();
742   l5=0;
743   l3=rlen-1;
# Line 749 | Line 762
762     //l5=a5len-4;
763     //while (fi+l5<a5len && l5<l3 && adapter5[fi+l5]==seq[l5]) l5++;
764     //return true;
765 +   //if (debug) GMessage(" first 12char of the read match adaptor!\n");
766     extendMatch(seq.chars(), rlen, 1,
767 <                 adapter5.chars(), a5len, fi,  12, l5,l3);
767 >                 adapter5.chars(), a5len, fi,  12, l5,l3, true);
768     return true;
769     }
770    
# Line 760 | Line 774
774   if (apstart<0) { apstart=0; aplen=a5len-1; }
775   GStr bstr=adapter5.substr(apstart, aplen);
776   if ((fi=seq.index(bstr))>=0) {
777 +   //if (debug) GMessage(" last 12char of adaptor match the read!\n");
778     extendMatch(seq.chars(), rlen, fi,
779 <                 adapter5.chars(), a5len, apstart,  aplen, l5,l3);
779 >                 adapter5.chars(), a5len, apstart,  aplen, l5,l3,true);
780     return true;
781     }
782   //no easy cases, find a triplet match as a seed for alignment extension
# Line 770 | Line 785
785     apstart=a5len-iw-4;
786     GStr aword=adapter5.substr(apstart,3);
787     if ((fi=seq.index(aword))>=0) {
788 +      //if (debug) GMessage("extending wmatch :%*s\n", fi+3, aword.chars());
789        if (extendMatch(seq.chars(), rlen, fi, adapter5.chars(),
790 <                   a5len, apstart, 3, l5,l3)) {
790 >                   a5len, apstart, 3, l5,l3,true)) {
791                       return true;
792                       }
793        }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines