ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 94 | Line 94
94   int a3len=0;
95   int a5len=0;
96   // adaptor matching metrics -- for extendMatch() function
97 + const int match_score=2; //match score
98 + const int mismatch_score=-3; //mismatch
99 +
100   const int a_m_score=2; //match score
101   const int a_mis_score=-3; //mismatch
102   const int a_dropoff_score=7;
103 + const int poly_dropoff_score=7;
104   int a_min_score=12; //an exact match of 6 bases at the proper ends WILL be trimmed
105   const int a_min_chain_score=15; //for gapped alignments
106  
# Line 327 | Line 331
331   //returns 0 if the read was untouched, 1 if it was trimmed and a trash code if it was trashed
332  
333   bool ntrim(GStr& rseq, int &l5, int &l3); //returns true if any trimming occured
334 + bool polytrim(GStr& rseq, int &l5, int &l3); //returns true if any trimming occured
335   bool qtrim(GStr& qvs, int &l5, int &l3); //return true if any trimming occured
336   int dust(GStr& seq);
337   bool trim_adapter3(GStr& seq, int &l5, int &l3); //returns true if any trimming occured
# Line 1024 | Line 1029
1029     }
1030   }
1031   #endif
1032 + const char *pA_seed="AAAA";
1033 + const char *pT_seed="TTTT";
1034 + const int polyA_seed=*(int*)pA_seed;
1035 + const int polyT_seed=*(int*)pT_seed;
1036 +
1037 + bool trim_poly3(GStr &seq, int &l5, int &l3) {
1038 + int rlen=seq.length();
1039 + l5=0;
1040 + l3=rlen-1;
1041 + //assumes N trimming was already done
1042 + //so a poly match should be very close to the end of the read
1043 + //TODO: should we attempt polyA and polyT at the same time, same end?
1044 + // -- find the initial match (seed)
1045 + int rlo=GMAX((rlen-10), 0);
1046 + int si;
1047 + for (si=rlen-5;si>rlo;si--) {
1048 +   if (polyA_seed==*(int*)&(seq.chars()+si)) {
1049 +      break;
1050 +      }
1051 +   }
1052 + if (si<=rlo) return false;
1053 + //seed found, try to extend it to left and right
1054 + int score=4*match_score;
1055 + max_rlo=rlo;
1056 + rlo--;
1057 + while (rlo>=0) {
1058 +    rlo--;
1059 +    score+=
1060 +    
1061 +    }
1062 + }
1063  
1064   bool trim_adapter3(GStr& seq, int&l5, int &l3) {
1065   int rlen=seq.length();

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines