ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/seqalign/GXAlign.h
(Generate patch)
# Line 5 | Line 5
5  
6   #define GDEBUG 1
7  
8 #define MAX_SPACE 2000000
9
8   enum {
9      EDIT_OP_MASK = 0x3,
10      EDIT_OP_ERR  = 0x0,
# Line 136 | Line 134
134   };
135  
136  
137 + /** Bookkeeping structure for greedy alignment. When aligning
138 +    two sequences, the members of this structure store the
139 +    largest offset into the second sequence that leads to a
140 +    high-scoring alignment for a given start point */
141 + struct SGreedyOffset {
142 +    int insert_off;    // Best offset for a path ending in an insertion
143 +    int match_off;     // Best offset for a path ending in a match
144 +    int delete_off;    // Best offset for a path ending in a deletion
145 + };
146 +
147   // ----- pool allocator -----
148   // works as a linked list of allocated memory blocks
149   struct GXMemPool {
150 <    char* memblock;
150 >    SGreedyOffset* memblock;
151      int used, size;
152      GXMemPool *next;
153 +    static int kMinSpace;
154      //methods
155 <    GXMemPool() { //by default allocate a large block here (10M)
156 <        uint32 amount= MAX_SPACE<<3;
157 <        GCALLOC(memblock, amount);
155 >    GXMemPool(int num_offsp=0) { //by default allocate a large block here (10M)
156 >        num_offsp=GMAX(kMinSpace, num_offsp);
157 >        GMALLOC(memblock, num_offsp*sizeof(SGreedyOffset));
158          if (memblock == NULL) {
159 <           GError("Failed to allocated GXAlnSpace for greedy extension!\n");
159 >           GError("Failed to allocated GXMemPool(%d) for greedy extension!\n",num_offsp);
160             return;
161             }
162 <        used = 0;
163 <        size = amount;
162 >        used = 0;
163 >        size = num_offsp;
164          next = NULL;
165          }
166 +
167      void refresh() {
168         GXMemPool* sp=this;
169         while (sp) {
# Line 173 | Line 183
183        GFREE(memblock);
184        }
185  
186 <  void* getSpace(int amount) { //amount to use or allocate memory, in bytes
186 >  SGreedyOffset* getSpace(int num_alloc) { // SGreedyOffset[num_alloc] array
187       //can use the first found memory block with enough room,
188      // or allocate a new large block
189 <    char* v;
190 <     if (amount < 0) return NULL;
191 <     GXMemPool* S=this;
192 <     while (used+amount > S->size) {
189 >    SGreedyOffset* v;
190 >    if (num_alloc < 0) return NULL;
191 >    GXMemPool* S=this;
192 >    while (used+num_alloc > S->size) {
193          //no room in current block, get a new mem block
194          if (next == NULL) {
195 <           next=new GXMemPool(); //allocates a large contiguous memory block
195 >           next=new GXMemPool(num_alloc); //allocates a large contiguous memory block
196             }
197          S = S->next;
198          }
199 <     v = S->memblock+S->used;
200 <     S->used += amount;
201 <     //align to next 8-byte boundary
202 <     int m8 = S->used & 7; //modulo 8
203 <     if (m8)
199 >    v = S->memblock+S->used;
200 >    S->used += num_alloc;
201 >    //align to first 8-byte boundary
202 >    int m8 = S->used & 7; //modulo 8
203 >    if (m8)
204           S->used += 8 - m8;
205 <     return (void*)v;
206 <     }
205 >    return v;
206 >    }
207 >
208 >  void* getByteSpace(int byte_size) { //amount to use or allocate memory, in bytes
209 >    return (void*)getSpace(byte_size/sizeof(SGreedyOffset));
210 >    }
211 >
212   };
213  
214 < #define ERROR_FRACTION 5  // 1/this : the higher it is, the smaller the diagonal band
214 > #define GREEDY_MAX_COST_FRACTION 5
215 > /* (was 2) sequence_length / (this number) is a measure of how hard the
216 >  alignment code will work to find the optimal alignment; in fact
217 >  this gives a worst case bound on the number of loop iterations */
218 >
219 > #define GREEDY_MAX_COST 1000
220 > // The largest diff distance (max indels+mismatches) to be examined for an optimal alignment
221 > // (should be increased for large sequences)
222 >
223 > #define GX_GALLOC_ERROR "Error: failed to allocate memory for greedy alignment!\n"
224 >
225 > // all auxiliary memory needed for the greedy extension algorithm
226 > struct SGreedyAlignMem {
227 >   int** last_seq2_off;              // 2-D array of distances
228 >   int* max_score;                   // array of maximum scores
229 >   GXMemPool* space;                // local memory pool for SGreedyOffset structs
230 >
231 >   // Allocate memory for the greedy gapped alignment algorithm
232 >   SGreedyAlignMem(int reward, int penalty, int Xdrop) {
233 >      int max_d, d_diff;
234 >      if (penalty<0) penalty=-penalty;
235 >      if (reward % 2) {
236 >         //scale params
237 >         reward = reward << 1;
238 >         penalty = (penalty << 1);
239 >         Xdrop = Xdrop<<1;
240 >         }
241 >      //if (gap_open == 0 && gap_extend == 0)
242 >      //   gap_extend = (reward >> 1) + penalty;
243 >      const int max_dbseq_length=255; //adjust this accordingly
244 >      max_d = GMIN(GREEDY_MAX_COST,
245 >                  (max_dbseq_length/GREEDY_MAX_COST_FRACTION + 1));
246 >
247 >      last_seq2_off=NULL;   // 2-D array of distances
248 >      max_score=NULL;       // array of maximum scores
249 >      space=NULL;           // local memory pool for SGreedyOffset structs
250 >      //if (score_params.gap_open==0 && score_params.gap_extend==0) {
251 >         //non-affine, simpler Greedy algorithm
252 >       d_diff = (Xdrop+reward/2)/(penalty+reward)+1;
253 >       GMALLOC(last_seq2_off, ((max_d + 2) * sizeof(int*)));
254 >       if (!last_seq2_off)
255 >         GError(GX_GALLOC_ERROR);
256 >       GCALLOC(last_seq2_off[0], ((max_d + max_d + 6) * sizeof(int) * 2));
257 >       //allocates contiguous memory for 2 rows here
258 >       if (!last_seq2_off[0])
259 >               GError(GX_GALLOC_ERROR);
260 >       last_seq2_off[1] = last_seq2_off[0] + max_d + max_d + 6; //memory allocated already for this row
261 >
262 >      GCALLOC(max_score, (sizeof(int) * (max_d + 1 + d_diff)));
263 >      space = new GXMemPool();
264 >      if (!max_score || !space)
265 >         GError(GX_GALLOC_ERROR);
266 >    } //consructor
267 >
268 >   ~SGreedyAlignMem() {
269 >     if (last_seq2_off) {
270 >         GFREE(last_seq2_off[0]);
271 >         GFREE(last_seq2_off);
272 >         }
273 >     GFREE(max_score);
274 >     delete space;
275 >     }
276  
201 struct GXAlnMem { //GreedyAlignMem
202  int** flast_d; //flast_d[][]
203  int* max_row_free;
204  int* uplow_free;
205  GXMemPool* space;
206  int MAX_D;
207  GXAlnMem(int max_len, int reward, int penalty, int Xdrop) {
208    flast_d=NULL;
209    max_row_free=NULL;
210    MAX_D=0;
211    uplow_free=NULL;
212    space=NULL;
213    MAX_D = (int)(max_len / ERROR_FRACTION + 1);
214    //MAX_D = 4;
215    int d_diff = (Xdrop+reward/2)/(penalty+reward) + 1;
216    GCALLOC(flast_d, (MAX_D+2)* sizeof(int*));
217    if (flast_d == NULL) {
218       GError("Error: cannot allocate GXAlnMem structure!\n");
219       }
220    flast_d[0] = (int*)malloc(((MAX_D<<1) + 6) * sizeof(int) * 2);
221    //allocates two int rows - flast_d[1] too
222    if (flast_d[0] == NULL) {
223        GError("Error: failed to allocate flast_d[0] in GXAlnMem!\n");
224        }
225    flast_d[1] = flast_d[0] + (MAX_D<<1) + 6; //int array for next row
226    uplow_free = NULL;
227    max_row_free = (int*) malloc(sizeof(int) * (MAX_D + 1 + d_diff));
228    space = new GXMemPool();
229    }
230  ~GXAlnMem() {
231    delete space;
232    GFREE(max_row_free);
233    GFREE(flast_d[0]);
234    GFREE(flast_d);
235    }
277   };
278  
238 int GXGreedyAlign(const char* s1, int len1,
239        const char* s2, int len2,
240        bool reverse, int xdrop_threshold,
241        int match_cost, int mismatch_cost,
242        int* e1, int* e2,
243        GXAlnMem* abmp, GXEditScript *S=NULL);
279  
280   #define GAPALIGN_SUB ((unsigned char)0)  /*op types within the edit script*/
281   #define GAPALIGN_INS ((unsigned char)1)
# Line 383 | Line 418
418   int score;
419   double pid;
420   GXEditScript* editscript;
421 < GXAlnMem* abmp;
421 > //GXAlnMem* abmp;
422 > SGreedyAlignMem* alnmem;
423   CAlnGapInfo* gapinfo;
424   GXAlnInfo(const char* q, int q_l, int q_r, const char* s, int s_l, int s_r,
425        int sc=0, double percid=0) {
# Line 400 | Line 436
436      }
437    ~GXAlnInfo() {
438      delete editscript;
439 <    delete abmp;
439 >    delete alnmem;
440      delete gapinfo;
441      }
442    bool operator<(GXAlnInfo& d) {
# Line 494 | Line 530
530       }
531  
532    //bands will be sorted by decreasing score eventually, after all seeds are added
533 +  //more seeds better than one longer seed?
534    bool operator<(GXBand& d){
535       return ((score==d.score) ? seeds.Count()>d.seeds.Count() : score>d.score);
536       }
# Line 501 | Line 538
538       return ((score==d.score) ? seeds.Count()<d.seeds.Count() : score<d.score);
539       }
540    bool operator==(GXBand& d){
541 <     return (score==d.score && seeds.Count()==d.seeds.Count());
541 >     //return (score==d.score && seeds.Count()==d.seeds.Count());
542 >    return (score==d.score && seeds.Count()==d.seeds.Count());
543       }
544  
545   };
# Line 536 | Line 574
574       }
575   };
576  
577 +
578   GXBandSet* collectSeeds_R(GList<GXSeed>& seeds, const char* seqa, int a_len, const char* seqb, int b_len); //overlap of 3' end of seqb
579   //void collectSeeds_L(GList<GXSeed>& seeds, const char* seqa, const char* seqb); //overlap of 5' end of seqb
580  
581   void printEditScript(GXEditScript* ed_script);
582  
583  
584 + int GXGreedyExtend(const char* seq1, int len1,
585 +                       const char* seq2, int len2,
586 +                       bool reverse, int xdrop_threshold,
587 +                       int match_cost, int mismatch_cost,
588 +                       int& seq1_align_len, int& seq2_align_len,
589 +                       SGreedyAlignMem& aux_data,
590 +                       GXEditScript *edit_block);
591 +
592   // reward MUST be >1, always
593   GXAlnInfo* GreedyAlignRegion(const char* q_seq, int q_alnstart, int q_max,
594                    const char* s_seq, int s_alnstart, int s_max,
595 <                     bool editscript=false, int reward=2, int penalty=-2, int xdrop=14);
595 >                     bool editscript=false, int reward=2, int penalty=2, int xdrop=14);
596   GXAlnInfo* GreedyAlign(const char* q_seq,  int q_alnstart, const char* s_seq, int s_alnstart,
597 <        bool editscript=false, int reward=2, int penalty=-2, int xdrop=14);
597 >        bool editscript=false, int reward=2, int penalty=2, int xdrop=14);
598  
599   #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines