ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.cpp
(Generate patch)
# Line 230 | Line 230
230         nificantly better than this */
231  
232      max_dist = GMIN(GREEDY_MAX_COST,
233 <                   len2 / GREEDY_MAX_COST_FRACTION + 1);
233 >                   (len2/GREEDY_MAX_COST_FRACTION + 1));
234  
235      /* the main loop assumes that the index of all diagonals is
236         biased to lie in the middle of allocated bookkeeping
# Line 726 | Line 726
726            GXSeed* newseed=new GXSeed(ai,bi,len);
727            seeds.Add(newseed);
728            diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
729 +          //special last resort terminal match to be used if no better alignment is there
730 +          if (len>4 && ai==0 && (bi+len==b_len || bi+len==b_len-1)) diagstrips->tmatch=newseed;
731            }
732         }
733      }//for each 4-mer at the beginning of seqa
# Line 785 | Line 787
787            GXSeed* newseed=new GXSeed(ai,bi,len);
788            seeds.Add(newseed);
789            diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
790 +          //special last resort terminal match to be used if no better alignment is there
791 +          if (len>4 && bi==0 && ai+len==a_len) diagstrips->tmatch=newseed;
792            }
793         }
794      }//for each 4-mer at the end of seqa
# Line 884 | Line 888
888                              reward, penalty, s_ext_r, q_ext_r, *gxmem, ed_script_fwd);
889      //check extension here and bail out if not a good right extension
890      //assuming s_max is really at the right end of s_seq
891 <    if (trim!=NULL && trim->type==galn_TrimRight && s_alnstart+s_ext_r<trim->boundary) {
891 >    if (trim!=NULL && trim->type==galn_TrimRight &&
892 >           s_alnstart+s_ext_r<trim->boundary) {
893        delete ed_script_fwd;
894        if (freeAlnMem) delete gxmem;
895        return NULL;
# Line 906 | Line 911
911      alninfo->score=retscore;
912      alninfo->pid = 100 * (1 - ((double) numdiffs) / hsp_length);
913   #ifdef GDEBUG
914 <    if (ed_script_rev) {
915 <       GMessage("Final Edit script ::: ");
916 <       printEditScript(ed_script_rev);
917 <       }
914 >    //if (ed_script_rev) {
915 >    //   GMessage("Final Edit script ::: ");
916 >    //   printEditScript(ed_script_rev);
917 >    //   }
918   #endif
919      alninfo->editscript=ed_script_rev;
920      alninfo->gapinfo = new CAlnGapInfo(ed_script_rev, alninfo->ql-1, alninfo->sl-1);
# Line 944 | Line 949
949    bool editscript=false;
950    #ifdef GDEBUG
951     editscript=true;
952 <   GMessage("==========> matching Right (3') end ========\n");
952 >  // GMessage("==========> matching Right (3') end ========\n");
953    #endif
954  
955    CAlnTrim trimInfo(galn_TrimRight, seqb, seqb_len);
# Line 953 | Line 958
958    GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
959    //did we find a shortcut?
960    if (alnbands->qmatch) {
961 <    #ifdef GDEBUG
962 <     GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
963 <    #endif
961 >    //#ifdef GDEBUG
962 >    // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
963 >    //#endif
964      anchor_seeds.Add(alnbands->qmatch);
965      }
966    else {
# Line 963 | Line 968
968      int top_band_count=0;
969      for (int b=0;b<alnbands->Count();b++) {
970         if (alnbands->Get(b)->score<4) break;
971 <       #ifdef GDEBUG
972 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
973 <       #endif
971 >       //#ifdef GDEBUG
972 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
973 >       //#endif
974         top_band_count++;
975         GXBand& band=*(alnbands->Get(b));
976         band.seeds.setSorted(cmpSeedScore);
# Line 973 | Line 978
978         band.tested=true;
979         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
980         }
981 <    #ifdef GDEBUG
982 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
983 <    #endif
981 >    //#ifdef GDEBUG
982 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
983 >    //#endif
984      }
985  
986    GList<GXAlnInfo> galns(true,true,false);
# Line 989 | Line 994
994               galns.AddIfNew(alninfo, true);
995          else delete alninfo;
996      }
997 <  //special case: due to the scoring scheme bias towards the 5' end of the read,
998 <  //also try some seeds at the 3' end
997 >  //special 3' end case: due to the seed scoring scheme being biased towards the 5' end of the read,
998 >  //we should also try some seeds closer to the 3' end
999    if (galns.Count()==0) {
1000 +    anchor_seeds.Clear();
1001      alnbands->setSorted(cmpDiagBands_R);
1002      int max_top_bands=4;
1003      int top_band_count=0;
1004 <    #ifdef GDEBUG
1005 <    GMessage(":::>> Retrying adjusting sort order.\n");
1006 <    #endif
1004 >    //#ifdef GDEBUG
1005 >    //GMessage(":::>> Retrying adjusting sort order.\n");
1006 >    //#endif
1007 >    if (alnbands->tmatch) {
1008 >      //anchor_seeds.setSorted(false);
1009 >      anchor_seeds.Add(alnbands->tmatch);
1010 >      }
1011      for (int b=0;b<alnbands->Count();b++) {
1012         if (alnbands->Get(b)->score<4) break;
1013 <       #ifdef GDEBUG
1014 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1015 <       #endif
1013 >       //#ifdef GDEBUG
1014 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1015 >       //#endif
1016         if (alnbands->Get(b)->tested) continue;
1017         top_band_count++;
1018         GXBand& band=*(alnbands->Get(b));
# Line 1010 | Line 1020
1020         anchor_seeds.Add(band.seeds.First());
1021         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1022         }
1023 <    #ifdef GDEBUG
1024 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1025 <    #endif
1023 >    //#ifdef GDEBUG
1024 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1025 >    //#endif
1026      for (int i=0;i<anchor_seeds.Count();i++) {
1027        GXSeed& aseed=*anchor_seeds[i];
1028        int a1=aseed.a_ofs+(aseed.len>>1)+1;
# Line 1024 | Line 1034
1034            else delete alninfo;
1035        }
1036      }
1027  #ifdef GDEBUG
1028  for (int i=0;i<galns.Count();i++) {
1029    GXAlnInfo* alninfo=galns[i];
1030    GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
1031                         alninfo->sl, alninfo->sr, alninfo->score, alninfo->pid);
1032    if (alninfo->gapinfo!=NULL) {
1033      GMessage("Alignment:\n");
1034      alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1035      }
1036    }
1037  #endif
1037    //---- done
1038    delete alnbands;
1039    if (galns.Count()) {
1040      GXAlnInfo* bestaln=galns.Shift();
1041 +    #ifdef GDEBUG
1042 +      GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1043 +          bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1044 +      if (bestaln->gapinfo!=NULL) {
1045 +        bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1046 +        }
1047 +    #endif
1048 +
1049      return bestaln;
1050      }
1051    else return NULL;
# Line 1049 | Line 1056
1056    bool editscript=false;
1057    #ifdef GDEBUG
1058     editscript=true;
1059 <   GMessage("==========> matching Left (5') end ========\n");
1059 >   //GMessage("==========> matching Left (5') end ========\n");
1060    #endif
1061    CAlnTrim trimInfo(galn_TrimLeft, seqb, seqb_len);
1062    GList<GXSeed> rseeds(true,true,false);
# Line 1058 | Line 1065
1065    GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
1066  
1067    if (alnbands->qmatch) {
1068 <    #ifdef GDEBUG
1069 <     GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
1070 <    #endif
1068 >    //#ifdef GDEBUG
1069 >    // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
1070 >    //#endif
1071      anchor_seeds.Add(alnbands->qmatch);
1072      }
1073    else {
# Line 1068 | Line 1075
1075      int top_band_count=0;
1076      for (int b=0;b<alnbands->Count();b++) {
1077         if (alnbands->Get(b)->score<4) break;
1078 <       #ifdef GDEBUG
1079 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1080 <       #endif
1078 >       //#ifdef GDEBUG
1079 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1080 >       //#endif
1081         top_band_count++;
1082         GXBand& band=*(alnbands->Get(b));
1083         band.seeds.setSorted(cmpSeedScore);
1084         anchor_seeds.Add(band.seeds.First());
1085         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1086         }
1087 <    #ifdef GDEBUG
1088 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1089 <    #endif
1087 >    //#ifdef GDEBUG
1088 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1089 >    //#endif
1090      }
1091    for (int i=0;i<anchor_seeds.Count();i++) {
1092      GXSeed& aseed=*anchor_seeds[i];
# Line 1092 | Line 1099
1099              galns.AddIfNew(alninfo, true);
1100         else delete alninfo;
1101      }
1102 <
1102 >  if (galns.Count()==0 && alnbands->tmatch) {
1103 >      GXSeed& aseed=*alnbands->tmatch;
1104 >      int a1=aseed.a_ofs+(aseed.len>>1)+1;
1105 >      int a2=aseed.b_ofs+(aseed.len>>1)+1;
1106 >      GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1107 >                              seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1108 >      if (alninfo) galns.Add(alninfo);
1109 >      }
1110    #ifdef GDEBUG
1111 +  /*
1112    for (int i=0;i<galns.Count();i++) {
1113      GXAlnInfo* alninfo=galns[i];
1114      GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
# Line 1103 | Line 1118
1118        alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1119        }
1120      }
1121 +  */
1122    #endif
1123    //---- done
1124    delete alnbands;
1125    if (galns.Count()) {
1126      GXAlnInfo* bestaln=galns.Shift();
1127 +    #ifdef GDEBUG
1128 +      GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1129 +          bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1130 +      if (bestaln->gapinfo!=NULL) {
1131 +        bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1132 +        }
1133 +    #endif
1134      return bestaln;
1135      }
1136    else return NULL;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines