ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gffread/gff_utils.h
Revision: 26
Committed: Wed Jul 27 19:39:27 2011 UTC (8 years, 2 months ago) by gpertea
File size: 21622 byte(s)
Log Message:
added missing gff_utils source

Line File contents
1 #ifndef GFF_UTILS_H
2 #define GFF_UTILS_H
3 #include "gff.h"
4 #include "GStr.h"
5 #include "GFastaIndex.h"
6 #include "GFaSeqGet.h"
7
8
9 typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd);
10
11 class GeneInfo { //for Ensembl GTF conversion
12 public:
13 int flag;
14 GffObj* gf;
15 GList<GStr> gene_names;
16 GList<GStr> transcripts; //list of transcript IDs
17 GeneInfo():gene_names(true, true, true), transcripts(true,true,true) {
18 gf=NULL;
19 flag=0;
20 }
21 GeneInfo(GffObj* gfrec, bool ensembl_convert=false):gene_names(true, true, true),
22 transcripts(true,true,true) {
23 flag=0;
24 if (gfrec->getGeneName())
25 gene_names.Add(new GStr(gfrec->getGeneName()));
26 transcripts.Add(new GStr(gfrec->getID()));
27 create_gf(gfrec, ensembl_convert);
28 }
29
30 void create_gf(GffObj* gfrec, bool ensembl_convert) {
31 gf=new GffObj(gfrec->getGeneID());
32 gf->gseq_id=gfrec->gseq_id;
33 gf->track_id=gfrec->track_id;
34 gf->start=gfrec->start;
35 gf->end=gfrec->end;
36 gf->strand=gfrec->strand;
37 gf->setFeatureName("gene");
38 gf->isGene(true);
39 gf->isUsed(true);
40 gf->uptr=this;
41 gfrec->incLevel();
42 gfrec->parent=gf;
43 gf->children.Add(gfrec);
44 if (ensembl_convert) {
45 gf->addAttr("type", gf->getTrackName());
46 }
47 //gf->children.Add(gfrec);
48 }
49 //~GeneInfo() {
50 // }
51 void update(GffObj* gfrec) {
52 if (transcripts.AddedIfNew(new GStr(gfrec->getID()))<0)
53 return;
54 gene_names.AddedIfNew(new GStr(gfrec->getGeneName()));
55 if (gf==NULL) {
56 GError("GeneInfo::update() called on uninitialized gf!\n");
57 //create_gf(gfrec);
58 //return;
59 }
60 gfrec->parent=gf;
61 gf->children.Add(gfrec);
62 gfrec->incLevel();
63 if (gf->start>gfrec->start)
64 gf->start=gfrec->start;
65 if (gf->end<gfrec->end)
66 gf->end=gfrec->end;
67 }
68 void finalize() {
69 //prepare attributes for printing
70 //must be called right before printing
71 if (gf==NULL || transcripts.Count()==0) return;
72 if (gene_names.Count()>0) {
73 gf->addAttr("Name", gene_names[0]->chars());
74 /*
75 GStr s(gene_names[0]->chars());
76 for (int i=1;i<gene_names.Count();i++) {
77 s.append(",");
78 s.append(gene_names[i]->chars());
79 }
80 gf->addAttr("genes", s.chars());
81 */
82 } //has gene names
83 GStr t(transcripts[0]->chars());
84 for (int i=1;i<transcripts.Count();i++) {
85 t.append(",");
86 t.append(transcripts[i]->chars());
87 }
88 gf->addAttr("transcripts", t.chars());
89 }
90 };
91
92 //genomic fasta sequence handling
93 class GFastaDb {
94 public:
95 char* fastaPath;
96 GFastaIndex* faIdx; //could be a cdb .cidx file
97 int last_fetchid;
98 GFaSeqGet* faseq;
99 //GCdbYank* gcdb;
100 char* getFastaFile(int gseq_id) {
101 if (fastaPath==NULL) return NULL;
102 GStr s(fastaPath);
103 s.trimR('/');
104 s.appendfmt("/%s",GffObj::names->gseqs.getName(gseq_id));
105 GStr sbase(s);
106 if (!fileExists(s.chars())) s.append(".fa");
107 if (!fileExists(s.chars())) s.append("sta");
108 if (fileExists(s.chars())) return Gstrdup(s.chars());
109 else {
110 GMessage("Warning: cannot find genomic sequence file %s{.fa,.fasta}\n",sbase.chars());
111 return NULL;
112 }
113 }
114
115 GFastaDb(const char* fpath=NULL) {
116 //gcdb=NULL;
117 fastaPath=NULL;
118 faseq=NULL;
119 faIdx=NULL;
120 init(fpath);
121 }
122
123 void init(const char* fpath) {
124 if (fpath==NULL || fpath[0]==0) return;
125 last_fetchid=-1;
126 if (!fileExists(fpath))
127 GError("Error: file/directory %s does not exist!\n",fpath);
128 fastaPath=Gstrdup(fpath);
129 GStr gseqpath(fpath);
130 if (fileExists(fastaPath)>1) { //exists and it's not a directory
131 GStr fainame(fastaPath);
132 if (fainame.rindex(".fai")==fainame.length()-4) {
133 //.fai index file given directly
134 fastaPath[fainame.length()-4]=0;
135 if (!fileExists(fastaPath))
136 GError("Error: cannot find fasta file for index %s !\n", fastaPath);
137 }
138 else fainame.append(".fai");
139 //GMessage("creating GFastaIndex with fastaPath=%s, fainame=%s\n", fastaPath, fainame.chars());
140 faIdx=new GFastaIndex(fastaPath,fainame.chars());
141 GStr fainamecwd(fainame);
142 int ip=-1;
143 if ((ip=fainamecwd.rindex(CHPATHSEP))>=0)
144 fainamecwd.cut(0,ip+1);
145 if (!faIdx->hasIndex()) { //could not load index
146 //try current directory
147 if (fainame!=fainamecwd) {
148 if (fileExists(fainamecwd.chars())>1) {
149 faIdx->loadIndex(fainamecwd.chars());
150 }
151 }
152 } //tried to load index
153 if (!faIdx->hasIndex()) {
154 GMessage("No fasta index found for %s. Rebuilding, please wait..\n",fastaPath);
155 faIdx->buildIndex();
156 if (faIdx->getCount()==0) GError("Error: no fasta records found!\n");
157 GMessage("Fasta index rebuilt.\n");
158 FILE* fcreate=fopen(fainame.chars(), "w");
159 if (fcreate==NULL) {
160 GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fainame.chars());
161 if (fainame!=fainamecwd) fcreate=fopen(fainamecwd.chars(), "w");
162 if (fcreate==NULL)
163 GError("Error: cannot create fasta index %s!\n", fainamecwd.chars());
164 }
165 if (faIdx->storeIndex(fcreate)<faIdx->getCount())
166 GMessage("Warning: error writing the index file!\n");
167 } //index created and attempted to store it
168 } //multi-fasta
169 }
170 GFaSeqGet* fetch(int gseq_id, bool checkFasta=false) {
171 if (fastaPath==NULL) return NULL;
172 if (gseq_id==last_fetchid && faseq!=NULL) return faseq;
173 delete faseq;
174 faseq=NULL;
175 last_fetchid=-1;
176 char* gseqname=GffObj::names->gseqs.getName(gseq_id);
177 // DEBUG:
178 //GMessage("..processing transcripts on: %s\n",gseqname);
179 //genomic sequence given
180 /*
181 if (gcdb!=NULL) {
182 uint32 reclen=0;
183 off_t rpos=gcdb->getRecordPos(gseqname, &reclen);
184 if (rpos<0) // genomic sequence not found
185 GError("Error: cannot find genomic sequence '%s' in %s\n",gseqname, fastaPath);
186 // WARNING: does not validate FASTA line-len uniformity!
187 faseq=new GFaSeqGet(fastaPath,rpos, false);
188 faseq->loadall(reclen); //load the whole sequence, it's faster
189 last_fetchid=gseq_id;
190 return faseq;
191 }
192 */
193 if (faIdx!=NULL) { //fastaPath was the multi-fasta file name
194 GFastaRec* farec=faIdx->getRecord(gseqname);
195 if (farec!=NULL) {
196 faseq=new GFaSeqGet(fastaPath,farec->seqlen, farec->fpos,
197 farec->line_len, farec->line_blen);
198 faseq->loadall(); //just cache the whole sequence, it's faster
199 last_fetchid=gseq_id;
200 }
201 else {
202 GMessage("Warning: couldn't find fasta record for '%s'!\n",gseqname);
203 return NULL;
204 }
205 }
206 else {
207 char* sfile=getFastaFile(gseq_id);
208 if (sfile!=NULL) {
209 faseq=new GFaSeqGet(sfile,checkFasta);
210 faseq->loadall();
211 last_fetchid=gseq_id;
212 GFREE(sfile);
213 }
214 } //one fasta file per contig
215 return faseq;
216 }
217
218 ~GFastaDb() {
219 GFREE(fastaPath);
220 //delete gcdb;
221 delete faIdx;
222 delete faseq;
223 }
224 };
225
226 class GffLocus;
227
228 class GTData { //transcript associated data
229 public:
230 GffObj* rna;
231 GffLocus* locus;
232 GffObj* replaced_by;
233 GeneInfo* geneinfo;
234 int flag;
235 GTData(GffObj* t=NULL) {
236 rna=t;
237 flag=0;
238 locus=NULL;
239 replaced_by=NULL;
240 geneinfo=NULL;
241 if (rna!=NULL) {
242 geneinfo=(GeneInfo*)rna->uptr; //take over geneinfo, if there
243 rna->uptr=this;
244 }
245 }
246 bool operator>(GTData& b) { return (rna > b.rna); }
247 bool operator<(GTData& b) { return (rna < b.rna); }
248 bool operator==(GTData& b) { return (rna==b.rna); }
249 };
250
251 class CGeneSym {
252 public:
253 GStr name;
254 int freq;
255 CGeneSym(const char* n=NULL, int f=0):name(n) {
256 freq=f;
257 }
258 bool operator>(CGeneSym& b) {
259 return (freq==b.freq) ? ( (name.length()==b.name.length()) ? (name>b.name) :
260 (name.length()>b.name.length()) ) : ( freq<b.freq );
261 }
262 bool operator<(CGeneSym& b) {
263 return (freq==b.freq)? ( (name.length()==b.name.length()) ? (name<b.name) :
264 (name.length()<b.name.length()) ) : ( freq>b.freq );
265 }
266 bool operator==(CGeneSym& b) { return name==b.name; }
267 };
268
269 const char* getGeneDescr(const char* gsym);
270
271 class GffLocus:public GSeg {
272 public:
273 int gseq_id; //id of underlying genomic sequence
274 int locus_num;
275 bool is_mrna;
276 char strand;
277 GffObj* t_maxcov; //transcript with maximum coverage (for main "ref" transcript)
278 GList<GffObj> rnas; //list of transcripts (isoforms) for this locus
279 GArray<GSeg> mexons; //list of merged exons in this region
280 GList<CGeneSym> gene_names;
281 GList<CGeneSym> gene_ids;
282 int v; //user flag/data
283 bool operator==(GffLocus& d){
284 return (gseq_id==d.gseq_id && strand==d.strand && start==d.start && end==d.end);
285 }
286 bool operator>(GffLocus& d){
287 if (gseq_id!=d.gseq_id) return (gseq_id>d.gseq_id);
288 if (start==d.start) {
289 if (end==d.end) return (strand>d.strand);
290 else return (end>d.end);
291 } else return (start>d.start);
292 }
293 bool operator<(GffLocus& d){
294 if (gseq_id!=d.gseq_id) return (gseq_id<d.gseq_id);
295 if (start==d.start) {
296 if (end==d.end) return strand<d.strand;
297 else return end<d.end;
298 } else return (start<d.start);
299 }
300
301 const char* getGeneName() {
302 if (gene_names.Count()==0) return NULL;
303 return gene_names.First()->name.chars();
304 }
305 const char* get_tmax_id() {
306 return t_maxcov->getID();
307 }
308 const char* get_descr() {
309 if (gene_names.Count()>0) {
310 for (int i=0;i<gene_names.Count();i++) {
311 const char* gn=getGeneDescr(gene_names.First()->name.chars());
312 if (gn!=NULL) return gn;
313 }
314 }
315 char* s=t_maxcov->getAttr("product");
316 if (s!=NULL) return s;
317 s=t_maxcov->getAttr("descr");
318 if (s!=NULL) return s;
319 s=t_maxcov->getAttr("description");
320 if (s!=NULL) return s;
321 s=t_maxcov->getAttr("info");
322 if (s!=NULL) return s;
323 return NULL;
324 }
325
326 GffLocus(GffObj* t=NULL):rnas(true,false,false),mexons(true,true),
327 gene_names(true,true,false), gene_ids(true,true,false) {
328 //this will NOT free rnas!
329 t_maxcov=NULL;
330 gseq_id=-1;
331 v=0;
332 locus_num=0;
333 start=0;
334 end=0;
335 strand=0;
336 is_mrna=false;
337 if (t!=NULL) {
338 start=t->exons.First()->start;
339 end=t->exons.Last()->end;;
340 gseq_id=t->gseq_id;
341 GSeg seg;
342 for (int i=0;i<t->exons.Count();i++) {
343 seg.start=t->exons[i]->start;
344 seg.end=t->exons[i]->end;
345 mexons.Add(seg);
346 }
347 rnas.Add(t);
348 ((GTData*)(t->uptr))->locus=this;
349 t_maxcov=t;
350 strand=t->strand;
351 if (t->ftype_id==gff_fid_mRNA) {
352 is_mrna=true;
353 }
354 }
355 }
356
357 void addMerge(GffLocus& locus, GffObj* lnkrna) {
358 //add all the elements of the other locus (merging)
359 //-- merge mexons
360 GArray<int> ovlexons(true,true); //list of locus.mexons indexes overlapping existing mexons
361 int i=0; //index of first mexons with a merge
362 int j=0; //index current mrna exon
363 while (i<mexons.Count() && j<locus.mexons.Count()) {
364 uint istart=mexons[i].start;
365 uint iend=mexons[i].end;
366 uint jstart=locus.mexons[j].start;
367 uint jend=locus.mexons[j].end;
368 if (iend<jstart) { i++; continue; }
369 if (jend<istart) { j++; continue; }
370 //if (mexons[i].overlap(jstart, jend)) {
371 //exon overlap was found :
372 ovlexons.Add(j);
373 //extend mexons[i] as needed
374 if (jstart<istart) mexons[i].start=jstart;
375 if (jend>iend) { //mexons[i] end extend
376 mexons[i].end=jend;
377 //now this could overlap the next mexon(s), so we have to merge them all
378 while (i<mexons.Count()-1 && mexons[i].end>mexons[i+1].start) {
379 uint nextend=mexons[i+1].end;
380 mexons.Delete(i+1);
381 if (nextend>mexons[i].end) {
382 mexons[i].end=nextend;
383 break; //no need to check next mexons
384 }
385 } //while next mexons merge
386 } // mexons[i] end extend
387 // } //exon overlap
388 j++; //check the next locus.mexon
389 }
390 //-- add the rest of the non-overlapping mexons:
391 GSeg seg;
392 for (int i=0;i<locus.mexons.Count();i++) {
393 seg.start=locus.mexons[i].start;
394 seg.end=locus.mexons[i].end;
395 if (!ovlexons.Exists(i)) mexons.Add(seg);
396 }
397 // -- add locus.rnas
398 for (int i=0;i<locus.rnas.Count();i++) {
399 ((GTData*)(locus.rnas[i]->uptr))->locus=this;
400 if (locus.rnas[i]!=lnkrna) rnas.Add(locus.rnas[i]);
401 }
402 // -- adjust start/end as needed
403 if (start>locus.start) start=locus.start;
404 if (end<locus.end) end=locus.end;
405 if (locus.is_mrna) is_mrna=true;
406 if (t_maxcov->covlen<locus.t_maxcov->covlen)
407 t_maxcov=locus.t_maxcov;
408 }
409
410
411 bool exonOverlap(GffLocus& loc) {
412 //check if any mexons overlap!
413 if (strand!=loc.strand || loc.start>end || start>loc.end) return false;
414 int i=0;
415 int j=0;
416 while (i<mexons.Count() && j<loc.mexons.Count()) {
417 uint istart=mexons[i].start;
418 uint iend=mexons[i].end;
419 uint jstart=loc.mexons[j].start;
420 uint jend=loc.mexons[j].end;
421 if (iend<jstart) { i++; continue; }
422 if (jend<istart) { j++; continue; }
423 //exon overlap found if we're here:
424 return true;
425 }
426 return false;
427 }
428
429 bool add_RNA(GffObj* t) {
430 //if (rnas.Count()==0) return true; //? should never be called on an empty locus
431 if (t->gseq_id!=gseq_id || t->strand!=strand || t->start>end || start>t->end)
432 return false; //rna must be on the same genomic seq
433 //check for exon overlap with existing mexons
434 //also update mexons accordingly if t is to be added
435 bool hasovl=false;
436 int i=0; //index of first mexons with a merge
437 int j=0; //index current t exon
438 GArray<int> ovlexons(true,true); //list of mrna exon indexes overlapping mexons
439 while (i<mexons.Count() && j<t->exons.Count()) {
440 uint istart=mexons[i].start;
441 uint iend=mexons[i].end;
442 uint jstart=t->exons[j]->start;
443 uint jend=t->exons[j]->end;
444 if (iend<jstart) { i++; continue; }
445 if (jend<istart) { j++; continue; }
446 //exon overlap found if we're here:
447 ovlexons.Add(j);
448 hasovl=true;
449 //extend mexons[i] as needed
450 if (jstart<istart) mexons[i].start=jstart;
451 if (jend>iend) { //mexon stretch up
452 mexons[i].end=jend;
453 //now this could overlap the next mexon(s), so we have to merge them all
454 while (i<mexons.Count()-1 && mexons[i].end>mexons[i+1].start) {
455 uint nextend=mexons[i+1].end;
456 mexons.Delete(i+1);
457 if (nextend>mexons[i].end) {
458 mexons[i].end=nextend;
459 break; //no need to check next mexons
460 }
461 } //while next mexons merge
462 } //possible mexons merge
463
464 j++; //check the next t exon
465 }//all vs all exon check loop
466 if (hasovl) {
467 GSeg seg;
468 //add the rest of the non-overlapping exons
469 for (int i=0;i<t->exons.Count();i++) {
470 seg.start=t->exons[i]->start;
471 seg.end=t->exons[i]->end;
472 if (!ovlexons.Exists(i)) mexons.Add(seg);
473 }
474 rnas_add(t);
475 // add to rnas
476 ((GTData*)t->uptr)->locus=this;
477 gseq_id=t->gseq_id;
478 }
479 return hasovl;
480 }
481
482 //simpler,basic adding of a mrna
483 void rnas_add(GffObj* t) {
484 rnas.Add(t);
485 // adjust start/end
486 if (start>t->start || start==0) start=t->start;
487 if (end<t->end) end=t->end;
488 if (t_maxcov->covlen<t->covlen) t_maxcov=t;
489 if (strand==0) strand=t->strand;
490 if (t->ftype_id==gff_fid_mRNA) is_mrna=true;
491 }
492 };
493
494 class GenomicSeqData {
495 int gseq_id;
496 public:
497 const char* gseq_name;
498 GList<GffObj> gfs; //all non-transcript features -> usually gene features
499 GList<GffObj> rnas; //all transcripts on this genomic sequence
500 GList<GffLocus> loci; //all loci clusters
501 GList<GTData> tdata; //transcript data (uptr holder for all rnas loaded here)
502 //GenomicSeqData(int gid=-1):rnas(true,true,false),loci(true,true,true),
503 GenomicSeqData(int gid=-1):gfs(true, true, false),rnas((GCompareProc*)gfo_cmpByLoc),loci(true,true,false),
504 tdata(false,true,false) {
505 gseq_id=gid;
506 if (gseq_id>=0)
507 gseq_name=GffObj::names->gseqs.getName(gseq_id);
508
509 }
510 bool operator==(GenomicSeqData& d){
511 return gseq_id==d.gseq_id;
512 }
513 bool operator>(GenomicSeqData& d){
514 return (gseq_id>d.gseq_id);
515 }
516 bool operator<(GenomicSeqData& d){
517 return (gseq_id<d.gseq_id);
518 }
519 };
520
521 int gseqCmpName(const pointer p1, const pointer p2);
522
523 class GSpliceSite {
524 public:
525 char nt[3];
526 GSpliceSite(const char* c, bool revc=false) {
527 nt[2]=0;
528 if (c==NULL) {
529 nt[0]=0;
530 nt[1]=0;
531 return;
532 }
533 if (revc) {
534 nt[0]=toupper(ntComplement(c[1]));
535 nt[1]=toupper(ntComplement(c[0]));
536 }
537 else {
538 nt[0]=toupper(c[0]);
539 nt[1]=toupper(c[1]);
540 }
541 }
542
543 GSpliceSite(const char* intron, int intronlen, bool getAcceptor, bool revc=false) {
544 nt[2]=0;
545 if (intron==NULL || intronlen==0)
546 GError("Error: invalid intron or intron len for GSpliceSite()!\n");
547 const char* c=intron;
548 if (revc) {
549 if (!getAcceptor) c+=intronlen-2;
550 nt[0]=toupper(ntComplement(c[1]));
551 nt[1]=toupper(ntComplement(c[0]));
552 }
553 else { //on forward strand
554 if (getAcceptor) c+=intronlen-2;
555 nt[0]=toupper(c[0]);
556 nt[1]=toupper(c[1]);
557 }//forward strand
558 }
559
560 GSpliceSite(const char n1, const char n2) {
561 nt[2]=0;
562 nt[0]=toupper(n1);
563 nt[1]=toupper(n2);
564 }
565 bool canonicalDonor() {
566 return (nt[0]=='G' && (nt[1]=='C' || nt[1]=='T'));
567 }
568 bool operator==(GSpliceSite& c) {
569 return (c.nt[0]==nt[0] && c.nt[1]==nt[1]);
570 }
571 bool operator==(GSpliceSite* c) {
572 return (c->nt[0]==nt[0] && c->nt[1]==nt[1]);
573 }
574 bool operator==(const char* c) {
575 //return (nt[0]==toupper(c[0]) && nt[1]==toupper(c[1]));
576 //assumes given const nucleotides are uppercase already!
577 return (nt[0]==c[0] && nt[1]==c[1]);
578 }
579 bool operator!=(const char* c) {
580 //assumes given const nucleotides are uppercase already!
581 return (nt[0]!=c[0] || nt[1]!=c[1]);
582 }
583 };
584
585 struct GffLoader {
586 GStr fname;
587 FILE* f;
588 bool transcriptsOnly;
589 bool fullAttributes;
590 bool noExonAttrs;
591 bool mergeCloseExons;
592 bool showWarnings;
593 void load(GList<GenomicSeqData>&seqdata, GFValidateFunc* gf_validate=NULL,
594 bool doCluster=true, bool doCollapseRedundant=true, bool matchAllIntrons=true, bool fuzzSpan=false);
595 GffLoader(const char* filename):fname(filename) {
596 f=NULL;
597 transcriptsOnly=true;
598 fullAttributes=false;
599 noExonAttrs=false;
600 mergeCloseExons=false;
601 showWarnings=false;
602 if (fname=="-") {
603 f=stdin;
604 fname="stdin";
605 }
606 else {
607 if ((f=fopen(fname.chars(), "r"))==NULL) {
608 GError("Error: cannot open file %s!\n",fname.chars());
609 }
610 }
611 }
612 ~GffLoader() {
613 if (f!=NULL && f!=stdin) fclose(f);
614 }
615 };
616
617 void printFasta(FILE* f, GStr& defline, char* seq, int seqlen=-1);
618
619 //"position" a given coordinate x within a list of transcripts sorted by their start (lowest)
620 //coordinate, using quick-search; the returned int is the list index of the closest *higher*
621 //GffObj - i.e. starting right *ABOVE* the given coordinate
622 //Convention: returns -1 if there is no such GffObj (i.e. last GffObj starts below x)
623 int qsearch_rnas(uint x, GList<GffObj>& rnas);
624 int qsearch_gloci(uint x, GList<GffLocus>& loci);
625
626 GffObj* redundantTranscripts(GffObj& ti, GffObj& tj, bool matchAllIntrons=true, bool fuzzSpan=false);
627
628 void placeGf(GffObj* t, GenomicSeqData* gdata, bool doCluster=true, bool collapseRedundant=true,
629 bool matchAllIntrons=true, bool fuzzSpan=false);
630 //void loadGFF(FILE* f, GList<GenomicSeqData>& seqdata, const char* fname);
631
632 void collectLocusData(GList<GenomicSeqData>& ref_data);
633
634 #endif