ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GAlnExtend.cpp
(Generate patch)
# Line 726 | Line 726
726            GXSeed* newseed=new GXSeed(ai,bi,len);
727            seeds.Add(newseed);
728            diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
729 +          //special last resort terminal match to be used if no better alignment is there
730 +          if (len>4 && ai==0 && (bi+len==b_len || bi+len==b_len-1)) diagstrips->tmatch=newseed;
731            }
732         }
733      }//for each 4-mer at the beginning of seqa
# Line 785 | Line 787
787            GXSeed* newseed=new GXSeed(ai,bi,len);
788            seeds.Add(newseed);
789            diagstrips->addSeed(newseed);//add it to all 3 adjacent diagonals
790 +          //special last resort terminal match to be used if no better alignment is there
791 +          if (len>4 && bi==0 && ai+len==a_len) diagstrips->tmatch=newseed;
792            }
793         }
794      }//for each 4-mer at the end of seqa
# Line 906 | Line 910
910      alninfo->score=retscore;
911      alninfo->pid = 100 * (1 - ((double) numdiffs) / hsp_length);
912   #ifdef GDEBUG
913 <    if (ed_script_rev) {
914 <       GMessage("Final Edit script ::: ");
915 <       printEditScript(ed_script_rev);
916 <       }
913 >    //if (ed_script_rev) {
914 >    //   GMessage("Final Edit script ::: ");
915 >    //   printEditScript(ed_script_rev);
916 >    //   }
917   #endif
918      alninfo->editscript=ed_script_rev;
919      alninfo->gapinfo = new CAlnGapInfo(ed_script_rev, alninfo->ql-1, alninfo->sl-1);
# Line 944 | Line 948
948    bool editscript=false;
949    #ifdef GDEBUG
950     editscript=true;
951 <   GMessage("==========> matching Right (3') end ========\n");
951 >  // GMessage("==========> matching Right (3') end ========\n");
952    #endif
953  
954    CAlnTrim trimInfo(galn_TrimRight, seqb, seqb_len);
# Line 953 | Line 957
957    GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
958    //did we find a shortcut?
959    if (alnbands->qmatch) {
960 <    #ifdef GDEBUG
961 <     GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
962 <    #endif
960 >    //#ifdef GDEBUG
961 >    // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
962 >    //#endif
963      anchor_seeds.Add(alnbands->qmatch);
964      }
965    else {
# Line 963 | Line 967
967      int top_band_count=0;
968      for (int b=0;b<alnbands->Count();b++) {
969         if (alnbands->Get(b)->score<4) break;
970 <       #ifdef GDEBUG
971 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
972 <       #endif
970 >       //#ifdef GDEBUG
971 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
972 >       //#endif
973         top_band_count++;
974         GXBand& band=*(alnbands->Get(b));
975         band.seeds.setSorted(cmpSeedScore);
# Line 973 | Line 977
977         band.tested=true;
978         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
979         }
980 <    #ifdef GDEBUG
981 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
982 <    #endif
980 >    //#ifdef GDEBUG
981 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
982 >    //#endif
983      }
984  
985    GList<GXAlnInfo> galns(true,true,false);
# Line 989 | Line 993
993               galns.AddIfNew(alninfo, true);
994          else delete alninfo;
995      }
996 <  //special case: due to the scoring scheme bias towards the 5' end of the read,
997 <  //also try some seeds at the 3' end
996 >  //special 3' end case: due to the seed scoring scheme being biased towards the 5' end of the read,
997 >  //we should also try some seeds closer to the 3' end
998    if (galns.Count()==0) {
999 +    anchor_seeds.Clear();
1000      alnbands->setSorted(cmpDiagBands_R);
1001      int max_top_bands=4;
1002      int top_band_count=0;
1003 <    #ifdef GDEBUG
1004 <    GMessage(":::>> Retrying adjusting sort order.\n");
1005 <    #endif
1003 >    //#ifdef GDEBUG
1004 >    //GMessage(":::>> Retrying adjusting sort order.\n");
1005 >    //#endif
1006 >    if (alnbands->tmatch) {
1007 >      //anchor_seeds.setSorted(false);
1008 >      anchor_seeds.Add(alnbands->tmatch);
1009 >      }
1010      for (int b=0;b<alnbands->Count();b++) {
1011         if (alnbands->Get(b)->score<4) break;
1012 <       #ifdef GDEBUG
1013 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1014 <       #endif
1012 >       //#ifdef GDEBUG
1013 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1014 >       //#endif
1015         if (alnbands->Get(b)->tested) continue;
1016         top_band_count++;
1017         GXBand& band=*(alnbands->Get(b));
# Line 1010 | Line 1019
1019         anchor_seeds.Add(band.seeds.First());
1020         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1021         }
1022 <    #ifdef GDEBUG
1023 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1024 <    #endif
1022 >    //#ifdef GDEBUG
1023 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1024 >    //#endif
1025      for (int i=0;i<anchor_seeds.Count();i++) {
1026        GXSeed& aseed=*anchor_seeds[i];
1027        int a1=aseed.a_ofs+(aseed.len>>1)+1;
# Line 1024 | Line 1033
1033            else delete alninfo;
1034        }
1035      }
1027  #ifdef GDEBUG
1028  for (int i=0;i<galns.Count();i++) {
1029    GXAlnInfo* alninfo=galns[i];
1030    GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
1031                         alninfo->sl, alninfo->sr, alninfo->score, alninfo->pid);
1032    if (alninfo->gapinfo!=NULL) {
1033      GMessage("Alignment:\n");
1034      alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1035      }
1036    }
1037  #endif
1036    //---- done
1037    delete alnbands;
1038    if (galns.Count()) {
1039      GXAlnInfo* bestaln=galns.Shift();
1040 +    #ifdef GDEBUG
1041 +      GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1042 +          bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1043 +      if (bestaln->gapinfo!=NULL) {
1044 +        bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1045 +        }
1046 +    #endif
1047 +
1048      return bestaln;
1049      }
1050    else return NULL;
# Line 1049 | Line 1055
1055    bool editscript=false;
1056    #ifdef GDEBUG
1057     editscript=true;
1058 <   GMessage("==========> matching Left (5') end ========\n");
1058 >   //GMessage("==========> matching Left (5') end ========\n");
1059    #endif
1060    CAlnTrim trimInfo(galn_TrimLeft, seqb, seqb_len);
1061    GList<GXSeed> rseeds(true,true,false);
# Line 1058 | Line 1064
1064    GList<GXSeed> anchor_seeds(cmpSeedDiag, NULL, true); //stores unique seeds per diagonal
1065  
1066    if (alnbands->qmatch) {
1067 <    #ifdef GDEBUG
1068 <     GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
1069 <    #endif
1067 >    //#ifdef GDEBUG
1068 >    // GMessage("::: Found a quick long match -- using it instead of diagonal bands\n");
1069 >    //#endif
1070      anchor_seeds.Add(alnbands->qmatch);
1071      }
1072    else {
# Line 1068 | Line 1074
1074      int top_band_count=0;
1075      for (int b=0;b<alnbands->Count();b++) {
1076         if (alnbands->Get(b)->score<4) break;
1077 <       #ifdef GDEBUG
1078 <       GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1079 <       #endif
1077 >       //#ifdef GDEBUG
1078 >       //GMessage("\tBand %d score: %d\n", b, alnbands->Get(b)->score);
1079 >       //#endif
1080         top_band_count++;
1081         GXBand& band=*(alnbands->Get(b));
1082         band.seeds.setSorted(cmpSeedScore);
1083         anchor_seeds.Add(band.seeds.First());
1084         if (anchor_seeds.Count()>2 || top_band_count>max_top_bands) break;
1085         }
1086 <    #ifdef GDEBUG
1087 <    GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1088 <    #endif
1086 >    //#ifdef GDEBUG
1087 >    //GMessage("::: Collected %d anchor seeds.\n",anchor_seeds.Count());
1088 >    //#endif
1089      }
1090    for (int i=0;i<anchor_seeds.Count();i++) {
1091      GXSeed& aseed=*anchor_seeds[i];
# Line 1092 | Line 1098
1098              galns.AddIfNew(alninfo, true);
1099         else delete alninfo;
1100      }
1101 <
1101 >  if (galns.Count()==0 && alnbands->tmatch) {
1102 >      GXSeed& aseed=*alnbands->tmatch;
1103 >      int a1=aseed.a_ofs+(aseed.len>>1)+1;
1104 >      int a2=aseed.b_ofs+(aseed.len>>1)+1;
1105 >      GXAlnInfo* alninfo=GreedyAlignRegion(seqa, a1, seqa_len,
1106 >                              seqb, a2, seqb_len, gxmem, &trimInfo, editscript);
1107 >      if (alninfo) galns.Add(alninfo);
1108 >      }
1109    #ifdef GDEBUG
1110 +  /*
1111    for (int i=0;i<galns.Count();i++) {
1112      GXAlnInfo* alninfo=galns[i];
1113      GMessage("a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", alninfo->ql, alninfo->qr,
# Line 1103 | Line 1117
1117        alninfo->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1118        }
1119      }
1120 +  */
1121    #endif
1122    //---- done
1123    delete alnbands;
1124    if (galns.Count()) {
1125      GXAlnInfo* bestaln=galns.Shift();
1126 +    #ifdef GDEBUG
1127 +      GMessage("Best alignment: a(%d..%d) align to b(%d..%d), score=%d, pid=%4.2f\n", bestaln->ql, bestaln->qr,
1128 +          bestaln->sl, bestaln->sr, bestaln->score, bestaln->pid);
1129 +      if (bestaln->gapinfo!=NULL) {
1130 +        bestaln->gapinfo->printAlignment(stderr, seqa, seqa_len, seqb, seqb_len);
1131 +        }
1132 +    #endif
1133      return bestaln;
1134      }
1135    else return NULL;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines