ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 112 | Line 112
112   void convertQ64(GStr& q);
113  
114   int main(int argc, char * const argv[]) {
115 <  GArgs args(argc, argv, "QRDCl:d:3:5:n:r:o:");
115 >  GArgs args(argc, argv, "YQRDCl:d:3:5:n:r:o:");
116    int e;
117    int icounter=0; //counter for input reads
118    int outcounter=0; //counter for output reads
# Line 120 | Line 120
120        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
121        exit(224);
122        }
123 <  debug=(args.getOpt('D')!=NULL);
123 >  debug=(args.getOpt('Y')!=NULL);
124    phred64=(args.getOpt('Q')!=NULL);
125    doCollapse=(args.getOpt('C')!=NULL);
126    doDust=(args.getOpt('D')!=NULL);
# Line 601 | Line 601
601   //     be at coordinate 0
602  
603   bool extendMatch(const char* a, int alen, int ai,
604 <                 const char* b, int blen, int bi, int mlen, int& l5, int& l3) {
604 >                 const char* b, int blen, int bi, int mlen, int& l5, int& l3, bool end5=false) {
605   //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
606 < //GStr dbg(a);
606 > //if (debug)
607 > //  GStr dbg(b);
608   //GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(ai, mlen)).chars(), ai);
609   int a_l=ai; //alignment coordinates on a
610   int a_r=ai+mlen-1;
# Line 613 | Line 614
614   int bi_maxscore=bi;
615   int score=mlen*a_m_score;
616   int maxscore=score;
617 < int mism5score=a_mis_score-2; //increase penalty for mismatches at 5' end
617 > int mism5score=a_mis_score;
618 > if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
619   //try to extend to the left first, if possible
620   while (ai>0 && bi>0) {
621     ai--;
# Line 628 | Line 630
630     }
631   a_l=ai_maxscore;
632   b_l=bi_maxscore;
633 + //if (debug) GMessage("  after l-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
634   //now extend to the right
635   ai_maxscore=a_r;
636   bi_maxscore=b_r;
# Line 661 | Line 664
664     }
665    a_r=ai_maxscore;
666    b_r=bi_maxscore;
667 <  if (maxscore>=a_min_score && (alen-a_r<3 || blen-b_r<3 || a_l<2 || b_l<2)) {
668 <     if (a_l<alen-a_r-1) {
667 >  int a_ovh3=alen-a_r-1;
668 >  int b_ovh3=blen-b_r-1;
669 >  int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
670 >  int mmovh5=(a_l<b_l)? a_l : b_l;
671 >  //if (debug) GMessage("  after r-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
672 >  if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
673 >     if (a_l<a_ovh3) {
674          //adapter closer to the left end (typical for 5' adapter)
675          l5=a_r+1;
676          l3=alen-1;
# Line 725 | Line 733
733   }
734  
735   bool trim_adapter5(GStr& seq, int&l5, int &l3) {
736 + //if (debug) GMessage("trim_adapter5 on: %s\n", seq.chars());
737   int rlen=seq.length();
738   l5=0;
739   l3=rlen-1;
# Line 749 | Line 758
758     //l5=a5len-4;
759     //while (fi+l5<a5len && l5<l3 && adapter5[fi+l5]==seq[l5]) l5++;
760     //return true;
761 +   //if (debug) GMessage(" first 12char of the read match adaptor!\n");
762     extendMatch(seq.chars(), rlen, 1,
763 <                 adapter5.chars(), a5len, fi,  12, l5,l3);
763 >                 adapter5.chars(), a5len, fi,  12, l5,l3, true);
764     return true;
765     }
766    
# Line 760 | Line 770
770   if (apstart<0) { apstart=0; aplen=a5len-1; }
771   GStr bstr=adapter5.substr(apstart, aplen);
772   if ((fi=seq.index(bstr))>=0) {
773 +   //if (debug) GMessage(" last 12char of adaptor match the read!\n");
774     extendMatch(seq.chars(), rlen, fi,
775 <                 adapter5.chars(), a5len, apstart,  aplen, l5,l3);
775 >                 adapter5.chars(), a5len, apstart,  aplen, l5,l3,true);
776     return true;
777     }
778   //no easy cases, find a triplet match as a seed for alignment extension
# Line 770 | Line 781
781     apstart=a5len-iw-4;
782     GStr aword=adapter5.substr(apstart,3);
783     if ((fi=seq.index(aword))>=0) {
784 +      //if (debug) GMessage("extending wmatch :%*s\n", fi+3, aword.chars());
785        if (extendMatch(seq.chars(), rlen, fi, adapter5.chars(),
786 <                   a5len, apstart, 3, l5,l3)) {
786 >                   a5len, apstart, 3, l5,l3,true)) {
787                       return true;
788                       }
789        }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines