ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
Revision: 2
Committed: Mon Mar 22 22:03:27 2010 UTC (9 years, 6 months ago) by gpertea
File size: 40309 byte(s)
Log Message:
added my gclib source files

Line File contents
1 #include "gff.h"
2
3 //GffNames* GffReader::names=NULL;
4 GffNames* GffObj::names=NULL;
5 //global set of feature names, attribute names etc.
6 // -- common for all GffObjs in current application!
7
8 const uint GFF_MAX_LOCUS = 4000000; //longest known gene in human is ~2.2M, UCSC claims a gene for mouse of ~ 3.1 M
9 const uint GFF_MAX_EXON = 20000; //longest known exon in human is ~11K
10 const uint GFF_MAX_INTRON= 1600000;
11
12 const int gff_fid_mRNA=0;
13 const int gff_fid_exon=1;
14 const int gff_fid_CDS=2;
15 bool gff_warns=true;
16
17 void gffnames_ref(GffNames* &n) {
18 if (n==NULL) n=new GffNames();
19 n->numrefs++;
20 }
21
22 void gffnames_unref(GffNames* &n) {
23 if (n==NULL) GError("Error: attempt to remove reference to null GffNames object!\n");
24 n->numrefs--;
25 if (n->numrefs==0) { delete n; n=NULL; }
26 }
27
28 int gfo_cmpByLoc(const pointer p1, const pointer p2) {
29
30 GffObj& g1=*((GffObj*)p1);
31 GffObj& g2=*((GffObj*)p2);
32 if (g1.gseq_id==g2.gseq_id) return (int)(g1.start-g2.start);
33 else return (int)(g1.gseq_id-g2.gseq_id);
34 }
35
36 static char fnamelc[32];
37
38 GffLine::GffLine(GffReader* reader, const char* l) {
39 line=Gstrdup(l);
40 skip=true;
41 gseqname=NULL;
42 track=NULL;
43 ftype=NULL;
44 info=NULL;
45 Parent=NULL;
46 is_cds=false;
47 is_mrna=false;
48 is_exon=false;
49 exontype=0;
50 gname=NULL;
51 qstart=0;
52 qend=0;
53 qlen=0;
54 exontype=0;
55 ID=NULL;
56 char* t[9];
57 int i=0;
58 int tidx=1;
59 t[0]=line;
60
61 while (line[i]!=0) {
62 if (line[i]=='\t') {
63 line[i]=0;
64 t[tidx]=line+i+1;
65 tidx++;
66 if (tidx>8) break;
67 }
68 i++;
69 }
70
71 if (tidx<8) { // ignore non-GFF lines
72 // GMessage("Warning: error parsing GFF/GTF line:\n%s\n", l);
73 return;
74 }
75 gseqname=t[0];
76 track=t[1];
77 ftype=t[2];
78 info=t[8];
79 char* p=t[3];
80 if (!parseUInt(p,fstart))
81 GError("Error parsing start coordinate from GFF line:\n%s\n",l);
82 p=t[4];
83 if (!parseUInt(p,fend))
84 GError("Error parsing end coordinate from GFF line:\n%s\n",l);
85 if (fend<fstart) swap(fend,fstart);
86 p=t[5];
87 if (p[0]=='.' && p[1]==0) {
88 score=0;
89 }
90 else {
91 if (!parseDouble(p,score))
92 GError("Error parsing feature score from GFF line:\n%s\n",l);
93 }
94 strand=*t[6];
95 if (strand!='+' && strand!='-' && strand!='.')
96 GError("Error parsing strand (%c) from GFF line:\n%s\n",strand,l);
97 phase=*t[7];
98 ID=NULL;
99 Parent=NULL;
100 // exon/CDS/mrna filter
101 strncpy(fnamelc, ftype, 31);
102 fnamelc[31]=0;
103 strlower(fnamelc); //convert to lower case
104 if (strstr(fnamelc, "utr")!=NULL) {
105 exontype=exgffUTR;
106 is_exon=true;
107 }
108 else if (strstr(fnamelc, "exon")!=NULL) {
109 exontype=exgffExon;
110 is_exon=true;
111 }
112 else if (startsWith(fnamelc, "stop") && strstr(fnamelc, "codon")!=NULL) {
113 exontype=exgffStop;
114 is_cds=true;
115 }
116 else if (strcmp(fnamelc, "cds")==0) {
117 exontype=exgffCDS;
118 is_cds=true;
119 }
120 else { //is_mrna is set only if the *current* line is a mRNA or transcript
121 is_mrna=(strcmp(fnamelc,"mrna")==0 ||
122 strcmp(fnamelc,"transcript")==0);
123 }
124
125 if (reader->mrnaOnly) {
126 if (!is_mrna && !is_cds && !is_exon)
127 return; //skip this line, unwanted feature name
128 }
129 p=strstr(info,"ID=");
130 if (p!=NULL) { //has ID attr
131 ID=p+3;
132 p=ID;
133 while (*p!=';' && *p!=0) p++;
134 ID=Gstrdup(ID, p-1);
135 //look for a name attr too:
136 p=strstr(info,"Name=");
137 if (p!=NULL) {
138 gname=p+5;
139 p=gname;
140 while (*p!=';' && *p!=0) p++;
141 gname=Gstrdup(gname, p-1);
142 }
143 }
144 p=NULL;
145 if (!is_mrna)
146 p=strifind(info,"Parent="); //don't care about the parent for mRNA features..
147 if (p!=NULL) { //has Parent attr
148 Parent=p+7;
149 p=Parent;
150 while (*p!=';' && *p!=0) p++;
151 Parent=Gstrdup(Parent, p-1);
152 }
153 else if (ID==NULL) { //no "Parent=" and no "ID=", attempt GTF parsing instead
154 p=strstr(info,"transcript_id");
155 if (p!=NULL) { //GTF detected
156 p+=13;
157 //requires quotes
158 while (*p!='"' && *p!=0) p++;
159 if (*p==0) GError("Error parsing transcript_id (double quotes expected) at GTF line:\n%s\n",l);
160 p++;
161 Parent=p;
162 while (*p!='"' && *p!=0) p++;
163 if (*p==0) GError("Error parsing transcript_id (ending double quotes expected) at GTF line:\n%s\n",l);
164 if (is_mrna) { // RGASP GTF exception: a parent "transcript" feature preceding exon/CDS subfeatures
165 ID=Gstrdup(Parent, p-1);
166 Parent=NULL;
167 }
168 else {
169 Parent=Gstrdup(Parent, p-1);
170 }
171 //check for gene_name or gene_id
172 //p=strstr(info, "gene_name");// this is preferred over gene_id
173 //if (p==NULL)
174 p=strstr(info,"gene_id");
175 if (p!=NULL) {
176 p+=7;//skip 'gene_id'
177 while (*p!='"' && *p!=0) p++;
178 if (*p==0) GError("Error parsing gene_id (double quotes expected) at GTF line:\n%s\n",l);
179 p++;
180 gname=p;
181 while (*p!='"' && *p!=0) p++;
182 if (*p==0) GError("Error parsing gene_id (ending double quotes expected) at GTF line:\n%s\n",l);
183 gname=Gstrdup(gname, p-1);
184 }
185 //prepare for parseAttr by adding '=' character instead of spaces for all attributes
186 //after the attribute name
187 p=info;
188 bool noed=true; //not edited after the last delim
189 bool nsp=false; //non-space found after last delim
190 while (*p!=0) {
191 if (*p==' ') {
192 if (nsp && noed) {
193 *p='=';
194 noed=false;
195 p++;
196 continue;
197 }
198 }
199 else nsp=true;
200 if (*p==';') { noed=true; nsp=false; }
201 p++;
202 }
203 } //gtf detected
204 else {//check for jigsaw or cufflinks format
205 char* fexon=strstr(fnamelc, "exon");
206 if (fexon!=NULL) {
207 if (startsWith(track,"jigsaw")) {
208 is_cds=true;
209 strcpy(track,"jigsaw");
210 p=strchr(info,';');
211 if (p==NULL) Parent=Gstrdup(info);
212 else { Parent=Gstrdup(info,p-1); info=p+1; }
213 }
214 else if ((i=strcspn(info,"; \t\n\r"))<=(int)(strlen(info)+1)) {//one word ID
215 Parent=Gstrdup(info,info+i-1);
216 }
217
218 }
219 else GError("Error parsing Parent/ID at input line:\n%s\n",l);
220 }
221 }
222 p=strstr(info,"Target=");
223 if (p!=NULL) { //has Target attr
224 p+=7;
225 while (*p!=';' && *p!=0 && *p!=' ') p++;
226 if (*p!=' ') {
227 GError("Error parsing target coordinates from GFF line:\n%s\n",l);
228 }
229 if (!parseUInt(p,qstart))
230 GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
231 if (*p!=' ') {
232 GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
233 }
234 p++;
235 if (!parseUInt(p,qend))
236 GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
237 }
238 else {
239 p=strifind(info,"Qreg=");
240 if (p!=NULL) { //has Qreg attr
241 p+=5;
242 if (!parseUInt(p,qstart))
243 GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
244 if (*p!='-') {
245 GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
246 }
247 p++;
248 if (!parseUInt(p,qend))
249 GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
250 if (*p=='|') {
251 p++;
252 if (!parseUInt(p,qlen))
253 GError("Error parsing target length from GFF Qreg|: \n%s\n",l);
254 }
255 }//has Qreg attr
256 }
257 if (qlen==0 && (p=strifind(info,"Qlen="))!=NULL) {
258 p+=5;
259 if (!parseUInt(p,qlen))
260 GError("Error parsing target length from GFF Qlen:\n%s\n",l);
261 }
262 skip=false;
263 }
264
265 int GffObj::addExon(GffLine* gl, bool keepAttr, bool noExonAttr) {
266 //this will make sure we have the right subftype_id!
267 int subf_id=-1;
268 if (ftype_id==gff_fid_mRNA) {
269 if (subftype_id<0) subftype_id=gff_fid_exon;
270 //any recognized mRNA segment gets the generic "exon" type (also applies to CDS)
271 if (!gl->is_cds && !gl->is_exon)
272 //extraneous mRNA feature, will discard for now
273 return -1;
274 }
275 else { //other kind of parent feature, check this subf type
276 subf_id=names->feats.addName(gl->ftype);
277 if (subftype_id<0)
278 subftype_id=subf_id;
279 else {
280 if (subftype_id!=subf_id)
281 GMessage("GFF Warning: multiple subfeatures (%s and %s) found for %s, only %s is kept\n",
282 names->feats.getName(subftype_id), names->feats.getName(subf_id),
283 gffID,names->feats.getName(subftype_id));
284 return -1; //skip this 2nd subfeature type for this parent!
285 }
286 }
287 if (gl->exontype==exgffUTR || gl->exontype==exgffStop)
288 udata=1;//merge 0-distance segments
289 int eidx=addExon(gl->fstart, gl->fend, gl->score, gl->phase,
290 gl->qstart,gl->qend, gl->is_cds, gl->exontype);
291 if (eidx>=0 && keepAttr) {
292 parseAttrs(exons[eidx]->attrs, gl->info, noExonAttr);
293 }
294 return eidx;
295 }
296
297
298 void GffObj::expandExon(int oi, uint segstart, uint segend, char exontype, double sc, char fr, int qs, int qe) {
299 //oi is the index of the *first* overlapping segment found that must be enlarged
300 covlen-=exons[oi]->len();
301 if (segstart<exons[oi]->start)
302 exons[oi]->start=segstart;
303 if (qs<exons[oi]->qstart) exons[oi]->qstart=qs;
304 if (segend>exons[oi]->end)
305 exons[oi]->end=segend;
306 if (qe>exons[oi]->qend) exons[oi]->qend=qe;
307 //warning: score cannot be properly adjusted! e.g. if it's a p-value it's just going to get worse
308 if (sc!=0) exons[oi]->score=sc;
309 covlen+=exons[oi]->len();
310 //if (exons[oi]->exontype< exontype) -- always true
311 exons[oi]->exontype = exontype;
312 if (exontype==exgffCDS) exons[oi]->phase=fr;
313 //we must check if any more exons are also overlapping this
314 int ni=oi+1; //next exon index after oi
315 while (ni<exons.Count() && segend>=exons[ni]->start) { // next segment overlaps new enlarged segment
316 //only allow this if next segment is fully included, and a subordinate
317 if (exons[ni]->exontype<exontype && exons[ni]->end<=segend) {
318 if (exons[ni]->qstart<exons[oi]->qstart) exons[oi]->qstart=exons[ni]->qstart;
319 if (exons[ni]->qend>exons[oi]->qend) exons[oi]->qend=exons[ni]->qend;
320 exons.Delete(ni);
321 }
322 else {
323 GMessage("GFF Warning: overlapping feature segment (%d-%d) for GFF ID %s\n", segstart, segend, gffID);
324 hasErrors=true;
325 break;
326 }
327 }
328 // -- make sure any other related boundaries are updated:
329 start=exons.First()->start;
330 end=exons.Last()->end;
331 if (uptr!=NULL) { //collect stats about the underlying genomic sequence
332 GSeqStat* gsd=(GSeqStat*)uptr;
333 if (start<gsd->mincoord) gsd->mincoord=start;
334 if (end>gsd->maxcoord) gsd->maxcoord=end;
335 }
336 }
337
338 int GffObj::addExon(uint segstart, uint segend, double sc, char fr, int qs, int qe, bool iscds, char exontype) {
339 if (exons.Count()==0) {
340 if (iscds) isCDS=true; //for now, assume CDS only if first "exon" given is a CDS
341 if (subftype_id<0) {
342 subftype_id = (ftype_id==gff_fid_mRNA) ? gff_fid_exon : ftype_id;
343 }
344 }
345 if (iscds) { //update CDS anchors:
346 if (CDstart==0 || segstart<CDstart) {
347 CDstart=segstart;
348 if (exontype==exgffCDS && strand=='+') CDphase=fr;
349 }
350 if (segend>CDend) {
351 if (exontype==exgffCDS && strand=='-') CDphase=fr;
352 CDend=segend;
353 }
354 }
355 else { // !iscds
356 isCDS=false;
357 }
358 if (qs || qe) {
359 if (qs>qe) swap(qs,qe);
360 if (qs==0) qs=1;
361 }
362 int ovlen=0;
363 int oi=exonOverlapIdx(segstart, segend, &ovlen);
364 if (oi>=0) { //overlap existing segment
365 //only allow this for CDS within exon, stop_codon within exon, stop_codon within UTR,
366 // or stop_codon within CDS
367 if (exons[oi]->exontype>exontype &&
368 exons[oi]->start<=segstart && exons[oi]->end>=segend &&
369 !(exons[oi]->exontype==exgffUTR && exontype==exgffCDS)) {
370 //larger segment given first, now the smaller included one
371 return oi; //only used to store attributes from current GffLine
372 }
373 if (exontype>exons[oi]->exontype &&
374 segstart<=exons[oi]->start && segend>=exons[oi]->end &&
375 !(exontype==exgffUTR && exons[oi]->exontype==exgffCDS)) {
376 //smaller segment given first, so we have to enlarge it
377 expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe);
378 //this must also check for overlapping next exon (oi+1)
379 return oi;
380 }
381 //there is also the special case of "ribosomal slippage exception" (programmed frameshift)
382 //where two CDS segments may actually overlap for 1 or 2 bases, but there should be only one encompassing exon
383 if (ovlen>2 || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) {
384 GMessage("GFF Warning: discarded overlapping feature segment (%d-%d) for GFF ID %s\n",
385 segstart, segend, gffID);
386 hasErrors=true;
387 return false; //segment NOT added
388 }
389 /* --
390 else {
391 // else add the segment if the overlap is small and between two CDS segments
392 //we might want to add an attribute here with the slippage coordinate and size?
393 }
394 */
395 }//overlap of existing segment
396
397 // --- no overlap, or accepted micro-overlap (ribosomal slippage)
398 // create & add the new segment
399 GffExon* enew=new GffExon(segstart, segend, sc, fr, qs, qe, exontype);
400 int eidx=exons.Add(enew);
401 if (eidx<0) {
402 GMessage("GFF Warning: duplicate segment %d-%d not added for GFF ID %s!\n",
403 segstart, segend, gffID);
404 return -1;
405 }
406 covlen+=(int)(exons[eidx]->end-exons[eidx]->start)+1;
407 start=exons.First()->start;
408 end=exons.Last()->end;
409 if (uptr!=NULL) { //collect stats about the underlying genomic sequence
410 GSeqStat* gsd=(GSeqStat*)uptr;
411 if (start<gsd->mincoord) gsd->mincoord=start;
412 if (end>gsd->maxcoord) gsd->maxcoord=end;
413 }
414 return eidx;
415 }
416
417 void GffObj::removeExon(int idx) {
418 /*
419 if (idx==0 && segs[0].start==gstart)
420 gstart=segs[1].start;
421 if (idx==segcount && segs[segcount].end==gend)
422 gend=segs[segcount-1].end;
423 */
424 if (idx<0 || idx>=exons.Count()) return;
425 int segstart=exons[idx]->start;
426 int segend=exons[idx]->end;
427 exons.Delete(idx);
428 covlen -= (int)(segend-segstart)+1;
429 start=exons.First()->start;
430 end=exons.Last()->end;
431 if (isCDS) { CDstart=start; CDend=end; }
432 }
433
434 GffObj::GffObj(GffReader *gfrd, GffLine* gffline, bool keepAttr, bool noExonAttr):
435 GSeg(0,0), exons(true,true,true) {
436 xstart=0;
437 xend=0;
438 xstatus=0;
439 partial=false;
440 isCDS=false;
441 uptr=NULL;
442 ulink=NULL;
443 udata=0;
444 CDstart=0;
445 CDend=0;
446 gname=NULL;
447 attrs=NULL;
448 gffID=NULL;
449 track_id=-1;
450 gseq_id=-1;
451 ftype_id=-1;
452 subftype_id=-1;
453 hasErrors=false;
454 if (gfrd==NULL)
455 GError("Cannot use this GffObj constructor with a NULL GffReader!\n");
456 gffnames_ref(names);
457 if (gfrd->names==NULL) gfrd->names=names;
458 qlen=0;qstart=0;qend=0;
459 gscore=0;
460 uscore=0;
461 covlen=0;
462 qcov=0;
463 if (gffline->Parent!=NULL) {
464 //GTF style -- subfeature given directly
465 if (gffline->is_cds || gffline->is_exon)
466 ftype_id=gff_fid_mRNA;
467 else {
468 //group of other subfeatures of type ftype:
469 ftype_id=names->feats.addName(gffline->ftype);
470 }
471 gffID=gffline->Parent;
472 gffline->Parent=NULL; //just take over
473 if (gffline->gname!=NULL) {
474 gname=gffline->gname;
475 gffline->gname=NULL;
476 }
477 gseq_id=names->gseqs.addName(gffline->gseqname);
478 track_id=names->tracks.addName(gffline->track);
479 strand=gffline->strand;
480 qlen=gffline->qlen;
481 start=gffline->fstart;
482 end=gffline->fend;
483 isCDS=gffline->is_cds; //for now
484 addExon(gffline, keepAttr, noExonAttr);
485 if (keepAttr && noExonAttr) {
486 //simply move the attrs from this first exon
487 //to the transcript
488 if (exons.First()->attrs!=NULL) {
489 attrs=exons.First()->attrs;
490 exons.First()->attrs=NULL;
491 }
492 }
493 } //GTF line
494 else { //GffReader made sure this is a parent line (no parent)
495 //even for a mRNA with a Parent= line
496 gscore=gffline->score;
497 if (gffline->ID==NULL || gffline->ID[0]==0)
498 GError("Error: no ID found for GFF record start\n");
499 gffID=gffline->ID; //there must be an ID here
500 if (gffline->is_mrna) ftype_id=gff_fid_mRNA;
501 else ftype_id=names->feats.addName(gffline->ftype);
502 gffline->ID=NULL; //steal it
503 if (gffline->gname!=NULL) {
504 gname=gffline->gname;
505 gffline->gname=NULL;
506 }
507 start=gffline->fstart;
508 end=gffline->fend;
509 gseq_id=names->gseqs.addName(gffline->gseqname);
510 track_id=names->tracks.addName(gffline->track);
511 qlen=gffline->qlen;
512 qstart=gffline->qstart;
513 qend=gffline->qend;
514 strand=gffline->strand;
515 if (keepAttr) this->parseAttrs(attrs, gffline->info, noExonAttr);
516 }
517 GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true);
518 uptr=gsd;
519 gsd->gflst.Add(this);
520 if (start<gsd->mincoord) gsd->mincoord=start;
521 if (end>gsd->maxcoord) gsd->maxcoord=end;
522 gfrd->gfoAdd(gffID, gffline->gseqname, this);
523 }
524
525
526 GffLine* GffReader::nextGffLine() {
527 if (gffline!=NULL) return gffline; //caller should free gffline after processing
528 while (gffline==NULL) {
529 //const char* l=linebuf->getLine();
530 int llen=0;
531 buflen=GFF_LINELEN-1;
532 char* l=fgetline(linebuf, buflen, fh, &fpos, &llen);
533 if (l==NULL) {
534 return NULL; //end of file
535 }
536 int ns=0; //first nonspace position
537 while (l[ns]!=0 && isspace(l[ns])) ns++;
538 if (l[ns]=='#' || llen<10) continue;
539 gffline=new GffLine(this, l);
540 if (gffline->skip) {
541 delete gffline;
542 gffline=NULL;
543 }
544 }
545 return gffline;
546 }
547
548
549 GffObj* GffReader::parse(bool keepAttr, bool noExonAttr) { //<- do not use this!
550 //parses one parent feature at a time, assuming absolute grouping of subfeatures
551 //!ASSUMES that subfeatures (exons) are properly grouped together
552 //any interleaving exons from other transcripts will terminate the parsing of the current feature!
553 GffObj* gfo=NULL;
554 while (nextGffLine()!=NULL) {
555 if (gfo==NULL) {//record starts fresh here
556 gfo=new GffObj(this, gffline, keepAttr, noExonAttr);
557 delete gffline; gffline=NULL;
558 continue;
559 }
560 // -- gfo is not NULL from here --
561 if (gffline->Parent==NULL) {// new parent feature starting here
562 //new record start, return what we have so far,
563 //gffline was NOT deleted, so it will be used for the next parse() call
564 return ((gfo==NULL) ? NULL : gfo->finalize());
565 }
566 //-- has a Parent so it's a subfeature segment (exon/CDS/other subfeature)
567 // is it a subfeature of the current gf?
568 if (strcmp(gffline->Parent, gfo->gffID)==0) {
569 //yes, add it
570 gfo->addExon(gffline, !noExonAttr, noExonAttr);
571 delete gffline; gffline=NULL;
572 continue;
573 }
574 // is it a subfeature of a previously loaded gfo?
575 GffObj* prevgfo=gfoFind(gffline->Parent, gffline->gseqname);
576 if (prevgfo==NULL)
577 return ((gfo==NULL) ? NULL : gfo->finalize()); // new subfeature, gffline will be used for the next parse()
578 //this is for an earlier parent
579 prevgfo->addExon(gffline, !noExonAttr, noExonAttr);
580 delete gffline;
581 gffline=NULL;
582 } //while reading gfflines
583 return ((gfo==NULL) ? NULL : gfo->finalize());
584 }
585 char* GffReader::gfoBuildId(const char* id, const char* ctg) {
586 //caller must free the returned pointer
587 char* buf=NULL;
588 int idlen=strlen(id);
589 GMALLOC(buf, idlen+strlen(ctg)+2);
590 strcpy(buf, id);
591 buf[idlen]='~';
592 strcpy(buf+idlen+1, ctg);
593 return buf;
594 }
595
596 void GffReader::gfoRemove(const char* id, const char* ctg) {
597 char* buf=gfoBuildId(id,ctg);
598 phash.Remove(buf);
599 GFREE(buf);
600 }
601
602 void GffReader::gfoAdd(const char* id, const char* ctg, GffObj* gfo) {
603 char* buf=gfoBuildId(id,ctg);
604 phash.Add(buf, gfo);
605 GFREE(buf);
606 }
607 GffObj* GffReader::gfoFind(const char* id, const char* ctg) {
608 char* buf=gfoBuildId(id,ctg);
609 GffObj* r=phash.Find(buf);
610 GFREE(buf);
611 return r;
612 }
613
614
615 void GffReader::parseAll(GffRecFunc* gproc, bool keepAttr, bool noExonAttr, void* userptr1, void* userptr2) {
616 //WARNING: this is all messed up if the GFF lines are NOT grouped by parent feature
617 //!!! do not use this !!!
618 //iterates through all mappings in the input file
619 //calling gproc with each parsed mapping
620 GffObj* gfo;
621 while ((gfo=this->parse(keepAttr,noExonAttr))!=NULL) { //a valid gff record was parsed
622 if (gfo->empty()) { //shouldn't happen!
623 delete gfo;
624 gfo=NULL;
625 continue;
626 }
627 if (gproc(gfo, userptr1, userptr2)) {
628 //true returned from GfProcFunc means no longer needed
629 gfoRemove(gfo->gffID, gfo->getGSeqName());
630 delete gfo;
631 }
632 else {
633 gflst.Add(gfo);
634 }
635 gfo=NULL;
636 } //while records are parsed
637 phash.Clear();
638 }
639
640
641 void GffReader::readAll(bool keepAttr, bool mergeCloseExons, bool noExonAttr) {
642 while (nextGffLine()!=NULL) {
643 if (gffline->Parent==NULL) {//no parent, new GFF3-like record starting
644 //check for uniqueness of gffline->ID in phash !
645 GffObj* f=gfoFind(gffline->ID, gffline->gseqname);
646 if (f!=NULL) {
647 GError("Error: duplicate GFF ID '%s' encountered!\n",gffline->ID);
648 }
649 gflst.Add(new GffObj(this, gffline, keepAttr, noExonAttr));
650 }
651 else { //--- it's a subfeature (exon/CDS/other):
652 GffObj* prevgfo=gfoFind(gffline->Parent, gffline->gseqname);
653 if (prevgfo!=NULL) { //exon of a previously seen GffObj
654 if (gffline->strand!=prevgfo->strand) {
655 GError("Error: duplicate GFF ID '%s' (exons found on different strands of %s)\n",
656 prevgfo->gffID, prevgfo->getGSeqName());
657 }
658 int gdist=(gffline->fstart>prevgfo->end) ? gffline->fstart-prevgfo->end :
659 ((gffline->fend<prevgfo->start)? prevgfo->start-gffline->fend :
660 0 );
661 if (gdist>(int)GFF_MAX_LOCUS) { //too far apart, most likely this is a duplicate ID
662 GError("Error: duplicate GFF ID '%s' (or exons too far apart)!\n",prevgfo->gffID);
663 }
664 prevgfo->addExon(gffline, !noExonAttr, noExonAttr);
665 }
666 else {//new GTF-like record starting here with a subfeature
667 gflst.Add(new GffObj(this, gffline, keepAttr, noExonAttr));
668 //even those with errors will be added here!
669 }
670 } //subfeature
671 //--
672 delete gffline;
673 gffline=NULL;
674 }//while
675 for (int i=0;i<gflst.Count();i++) {
676 gflst[i]->finalize(mergeCloseExons); //finalize the parsing - also merge close exons/features if so requested
677 }
678 // all gff records are now loaded in GList gflst
679 // so we can free the hash
680 phash.Clear();
681 }
682
683 //this may be called prematurely if exon records are not grouped by parent
684 GffObj* GffObj::finalize(bool mergeCloseExons) {
685 udata=0;
686 uptr=NULL;
687 //TODO: merge adjacent or close segments - for mRNAs
688 //must merge adjacent features, and optionally small gaps
689 if (ftype_id==gff_fid_mRNA) {
690 int mindist=mergeCloseExons ? 5:1;
691 for (int i=0;i<exons.Count()-1;i++) {
692 int ni=i+1;
693 uint mend=exons[i]->end;
694 while (ni<exons.Count()) {
695 int dist=(int)(exons[ni]->start-mend);
696 if (dist<0 || dist>mindist) break; //no merging with next segment
697 mend=exons[ni]->end;
698 exons[i]->end=mend;
699 if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL ||
700 exons[i]->attrs->Count()<exons[ni]->attrs->Count())) {
701 //use the other exon attributes, if more
702 delete(exons[i]->attrs);
703 exons[i]->attrs=exons[ni]->attrs;
704 exons[ni]->attrs=NULL;
705 }
706 exons.Delete(ni);
707 } //check for merge with next exon
708 } //for each exon
709 }
710 return this;
711 }
712
713 void GffObj::parseAttrs(GffAttrs*& atrlist, char* info, bool noExonAttr) {
714 if (names==NULL)
715 GError(ERR_NULL_GFNAMES, "parseAttrs()");
716 if (atrlist==NULL)
717 atrlist=new GffAttrs();
718 char* endinfo=info+strlen(info);
719 char* start=info;
720 char* pch=start;
721 while (start<endinfo) {
722 //skip spaces
723 while (*start==' ' && start<endinfo) start++;
724 pch=strchr(start, ';');
725 if (pch==NULL) pch=endinfo;
726 else {
727 *pch='\0';
728 pch++;
729 }
730 char* ech=strchr(start,'=');
731 if (ech!=NULL) { // attr=value format found
732 *ech='\0';
733 if (strcmp(start, "ID")==0 || strcmp(start,"Parent")==0 || strcmp(start,"Name")==0 ||
734 strcmp(start,"transcript_id")==0 || strcmp(start,"gene_id")==0)
735 { start=pch; continue; } //skip this already recognized and stored attribute
736 if (noExonAttr && (strcmp(start, "exon_number")==0 || strcmp(start, "exon")==0)) { start=pch; continue; }
737 ech++;
738 while (*ech==' ' && ech<endinfo) ech++;//skip extra spaces after the '='
739 atrlist->Add(new GffAttr(names->attrs.addName(start),ech));
740 }
741 /*
742 else { //not an attr=value format
743 atrlist->Add(new GffAttr(names->attrs.addName(start),"1"));
744 }
745 */
746 start=pch;
747 }
748 if (atrlist->Count()==0) { delete atrlist; atrlist=NULL; }
749 }
750
751 void GffObj::addAttr(const char* attrname, char* attrvalue) {
752 if (this->attrs==NULL)
753 this->attrs=new GffAttrs();
754 this->attrs->Add(new GffAttr(names->attrs.addName(attrname),attrvalue));
755 }
756
757 void GffObj::getCDS_ends(uint& cds_start, uint& cds_end) {
758 cds_start=0;
759 cds_end=0;
760 if (CDstart==0 || CDend==0) return; //no CDS info
761 int cdsadj=0;
762 if (CDphase=='1' || CDphase=='2') {
763 cdsadj=CDphase-'0';
764 }
765 cds_start=CDstart;
766 cds_end=CDend;
767 if (strand=='-') cds_end-=cdsadj;
768 else cds_start+=cdsadj;
769 }
770
771 void GffObj::mRNA_CDS_coords(uint& cds_mstart, uint& cds_mend) {
772 //sets cds_start and cds_end to the CDS start,end coordinates on the spliced mRNA transcript
773 cds_mstart=0;
774 cds_mend=0;
775 if (CDstart==0 || CDend==0) return; //no CDS info
776 //restore normal coordinates, just in case
777 unxcoord();
778 int cdsadj=0;
779 if (CDphase=='1' || CDphase=='2') {
780 cdsadj=CDphase-'0';
781 }
782 /*
783 uint seqstart=CDstart;
784 uint seqend=CDend;
785 */
786 uint seqstart=exons.First()->start;
787 uint seqend=exons.Last()->end;
788 int s=0; //resulting nucleotide counter
789 if (strand=='-') {
790 for (int x=exons.Count()-1;x>=0;x--) {
791 uint sgstart=exons[x]->start;
792 uint sgend=exons[x]->end;
793 if (seqend<sgstart || seqstart>sgend) continue;
794 if (seqstart>=sgstart && seqstart<=sgend)
795 sgstart=seqstart; //seqstart within this segment
796 if (seqend>=sgstart && seqend<=sgend)
797 sgend=seqend; //seqend within this segment
798 s+=(int)(sgend-sgstart)+1;
799 if (CDstart>=sgstart && CDstart<=sgend) {
800 //CDstart in this segment
801 //and we are getting the whole transcript
802 cds_mend=s-(int)(CDstart-sgstart);
803 //GMessage("Setting cds_mend to %d\n",cds_mend);
804 }
805 if (CDend>=sgstart && CDend<=sgend) {
806 //CDstart in this segment
807 //and we are getting the whole transcript
808 cds_mstart=s-(int)(CDend-cdsadj-sgstart);
809 //GMessage("Setting cds_mstart to %d\n",cds_mstart);
810 }
811 } //for each exon
812 } // - strand
813 else { // + strand
814 for (int x=0;x<exons.Count();x++) {
815 uint sgstart=exons[x]->start;
816 uint sgend=exons[x]->end;
817 if (seqend<sgstart || seqstart>sgend) continue;
818 if (seqstart>=sgstart && seqstart<=sgend)
819 sgstart=seqstart; //seqstart within this segment
820 if (seqend>=sgstart && seqend<=sgend)
821 sgend=seqend; //seqend within this segment
822 s+=(int)(sgend-sgstart)+1;
823 /* for (uint i=sgstart;i<=sgend;i++) {
824 spliced[s]=gsubseq[i-gstart];
825 s++;
826 }//for each nt
827 */
828 if (CDstart>=sgstart && CDstart<=sgend) {
829 //CDstart in this segment
830 cds_mstart=s-(int)(sgend-CDstart-cdsadj);
831 }
832 if (CDend>=sgstart && CDend<=sgend) {
833 //CDend in this segment
834 cds_mend=s-(int)(sgend-CDend);
835 }
836 } //for each exon
837 } // + strand
838 //spliced[s]=0;
839 //if (rlen!=NULL) *rlen=s;
840 //return spliced;
841 }
842
843 char* GffObj::getSpliced(GFaSeqGet* faseq, bool CDSonly, int* rlen, uint* cds_start, uint* cds_end,
844 GList<GSeg>* seglst) {
845 if (CDSonly && CDstart==0) return NULL;
846 if (faseq==NULL) { GMessage("Warning: getSpliced(NULL,.. ) called!\n");
847 return NULL;
848 }
849 //restore normal coordinates:
850 unxcoord();
851 if (exons.Count()==0) return NULL;
852 int fspan=end-start+1;
853 const char* gsubseq=faseq->subseq(start, fspan);
854 if (gsubseq==NULL) {
855 GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
856 }
857 char* spliced=NULL;
858 GMALLOC(spliced, covlen+1); //allocate more here
859 uint seqstart, seqend;
860 int cdsadj=0;
861 if (CDphase=='1' || CDphase=='2') {
862 cdsadj=CDphase-'0';
863 }
864 if (CDSonly) {
865 seqstart=CDstart;
866 seqend=CDend;
867 if (strand=='-') seqend-=cdsadj;
868 else seqstart+=cdsadj;
869 }
870 else {
871 seqstart=exons.First()->start;
872 seqend=exons.Last()->end;
873 }
874 int s=0; //resulting nucleotide counter
875 if (strand=='-') {
876 for (int x=exons.Count()-1;x>=0;x--) {
877 uint sgstart=exons[x]->start;
878 uint sgend=exons[x]->end;
879 if (seqend<sgstart || seqstart>sgend) continue;
880 if (seqstart>=sgstart && seqstart<=sgend)
881 sgstart=seqstart; //seqstart within this segment
882 if (seqend>=sgstart && seqend<=sgend)
883 sgend=seqend; //seqend within this segment
884 if (seglst!=NULL)
885 seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
886 for (uint i=sgend;i>=sgstart;i--) {
887 spliced[s] = ntComplement(gsubseq[i-start]);
888 s++;
889 }//for each nt
890
891 if (!CDSonly && cds_start!=NULL && CDstart>0) {
892 if (CDstart>=sgstart && CDstart<=sgend) {
893 //CDstart in this segment
894 //and we are getting the whole transcript
895 *cds_end=s-(CDstart-sgstart);
896 }
897 if (CDend>=sgstart && CDend<=sgend) {
898 //CDstart in this segment
899 //and we are getting the whole transcript
900 *cds_start=s-(CDend-cdsadj-sgstart);
901 }
902 }//update local CDS coordinates
903 } //for each exon
904 } // - strand
905 else { // + strand
906 for (int x=0;x<exons.Count();x++) {
907 uint sgstart=exons[x]->start;
908 uint sgend=exons[x]->end;
909 if (seqend<sgstart || seqstart>sgend) continue;
910 if (seqstart>=sgstart && seqstart<=sgend)
911 sgstart=seqstart; //seqstart within this segment
912 if (seqend>=sgstart && seqend<=sgend)
913 sgend=seqend; //seqend within this segment
914 if (seglst!=NULL)
915 seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
916 for (uint i=sgstart;i<=sgend;i++) {
917 spliced[s]=gsubseq[i-start];
918 s++;
919 }//for each nt
920 if (!CDSonly && cds_start!=NULL && CDstart>0) {
921 if (CDstart>=sgstart && CDstart<=sgend) {
922 //CDstart in this segment
923 //and we are getting the whole transcript
924 *cds_start=s-(sgend-CDstart-cdsadj);
925 }
926 if (CDend>=sgstart && CDend<=sgend) {
927 //CDstart in this segment
928 //and we are getting the whole transcript
929 *cds_end=s-(sgend-CDend);
930 }
931 }//update local CDS coordinates
932 } //for each exon
933 } // + strand
934 spliced[s]=0;
935 if (rlen!=NULL) *rlen=s;
936 return spliced;
937 }
938
939 char* GffObj::getSplicedTr(GFaSeqGet* faseq, bool CDSonly, int* rlen) {
940 if (CDSonly && CDstart==0) return NULL;
941 //restore normal coordinates:
942 unxcoord();
943 if (exons.Count()==0) return NULL;
944 int fspan=end-start+1;
945 const char* gsubseq=faseq->subseq(start, fspan);
946 if (gsubseq==NULL) {
947 GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
948 }
949
950 char* translation=NULL;
951 GMALLOC(translation, (int)(covlen/3)+1);
952 uint seqstart, seqend;
953 int cdsadj=0;
954 if (CDphase=='1' || CDphase=='2') {
955 cdsadj=CDphase-'0';
956 }
957 if (CDSonly) {
958 seqstart=CDstart;
959 seqend=CDend;
960 if (strand=='-') seqend-=cdsadj;
961 else seqstart+=cdsadj;
962 }
963 else {
964 seqstart=exons.First()->start;
965 seqend=exons.Last()->end;
966 }
967 Codon codon;
968 int nt=0; //codon nucleotide counter (0..2)
969 int aa=0; //aminoacid count
970 if (strand=='-') {
971 for (int x=exons.Count()-1;x>=0;x--) {
972 uint sgstart=exons[x]->start;
973 uint sgend=exons[x]->end;
974 if (seqend<sgstart || seqstart>sgend) continue;
975 if (seqstart>=sgstart && seqstart<=sgend)
976 sgstart=seqstart; //seqstart within this segment
977 if (seqend>=sgstart && seqend<=sgend) {
978 sgend=seqend; //seqend within this segment
979 }
980 for (uint i=sgend;i>=sgstart;i--) {
981 codon.nuc[nt]=ntComplement(gsubseq[i-start]);
982 nt++;
983 if (nt==3) {
984 nt=0;
985 translation[aa]=codon.translate();
986 aa++;
987 }
988 }//for each nt
989 } //for each exon
990 } // - strand
991 else { // + strand
992 for (int x=0;x<exons.Count();x++) {
993 uint sgstart=exons[x]->start;
994 uint sgend=exons[x]->end;
995 if (seqend<sgstart || seqstart>sgend) continue;
996 if (seqstart>=sgstart && seqstart<=sgend)
997 sgstart=seqstart; //seqstart within this segment
998 if (seqend>=sgstart && seqend<=sgend)
999 sgend=seqend; //seqend within this segment
1000 for (uint i=sgstart;i<=sgend;i++) {
1001 codon.nuc[nt]=gsubseq[i-start];
1002 nt++;
1003 if (nt==3) {
1004 nt=0;
1005 translation[aa]=codon.translate();
1006 aa++;
1007 }
1008 }//for each nt
1009 } //for each exon
1010 } // + strand
1011 translation[aa]=0;
1012 if (rlen!=NULL) *rlen=aa;
1013 return translation;
1014 }
1015
1016 void GffObj::printSummary(FILE* fout) {
1017 if (fout==NULL) fout=stdout;
1018 fprintf(fout, "%s\t%c\t%d\t%d\t%4.2f\t%4.1f\n", gffID,
1019 strand, start, end, gscore, (float)qcov/10.0);
1020 }
1021
1022 void GffObj::printGxfLine(FILE* fout, char* tlabel, char* gseqname, bool iscds,
1023 uint segstart, uint segend, int exidx, char phase, bool gff3) {
1024 static char scorestr[14];
1025 strcpy(scorestr,".");
1026 GffAttrs* xattrs=NULL;
1027 if (exidx>=0) {
1028 if (exons[exidx]->score) sprintf(scorestr,"%.2f", exons[exidx]->score);
1029 xattrs=exons[exidx]->attrs;
1030 }
1031 char* geneid=(gname!=NULL)? gname : gffID;
1032 if (phase==0 || !iscds) phase='.';
1033 const char* ftype=iscds ? "CDS" : getSubfName();
1034 if (gff3) {
1035 fprintf(fout,
1036 "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\tParent=%s",
1037 gseqname, tlabel, ftype, segstart, segend, scorestr, strand,
1038 phase, gffID);
1039 if (xattrs!=NULL) {
1040 for (int i=0;i<xattrs->Count();i++)
1041 fprintf(fout, ";%s=%s",names->attrs.getName(xattrs->Get(i)->attr_id),
1042 xattrs->Get(i)->attr_val);
1043 }
1044 fprintf(fout, "\n");
1045 } //GFF
1046 else {//for GTF -- we can only print mRNAs here
1047 fprintf(fout, "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\t",
1048 gseqname, tlabel, ftype, segstart, segend, scorestr, strand, phase);
1049 if (ismRNA())
1050 fprintf(fout,"gene_id \"%s\"; transcript_id \"%s\";", geneid, gffID);
1051 if (xattrs!=NULL) {
1052 for (int i=0;i<xattrs->Count();i++) {
1053 if (xattrs->Get(i)->attr_val==NULL) continue;
1054 fprintf(fout, " %s ",names->attrs.getName(xattrs->Get(i)->attr_id));
1055 if (xattrs->Get(i)->attr_val[0]=='"')
1056 fprintf(fout, "%s;",xattrs->Get(i)->attr_val);
1057 else fprintf(fout, "\"%s\";",xattrs->Get(i)->attr_val);
1058 }
1059 }
1060 fprintf(fout, "\n");
1061 }//GTF
1062 }
1063
1064 void GffObj::printGxf(FILE* fout, GffPrintMode gffp, char* tlabel) {
1065 static char tmpstr[255];
1066 if (tlabel==NULL) {
1067 tlabel=track_id>=0 ? names->tracks.Get(track_id)->name :
1068 (char*)"gffobj" ;
1069 }
1070
1071 unxcoord();
1072 if (exons.Count()==0) return;
1073 char* gseqname=names->gseqs.Get(gseq_id)->name;
1074 bool gff3 = (gffp>=pgffAny);
1075 bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
1076 bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
1077 if (gff3) {
1078 //print GFF3 mRNA line:
1079 if (gscore>0.0) sprintf(tmpstr,"%.2f", gscore);
1080 else strcpy(tmpstr,".");
1081 uint pstart, pend;
1082 if (gffp==pgffCDS) {
1083 pstart=CDstart;
1084 pend=CDend;
1085 }
1086 else { pstart=start;pend=end; }
1087 const char* ftype=ismRNA() ? "mRNA" : getFeatureName();
1088 fprintf(fout,
1089 "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tID=%s",
1090 gseqname, tlabel, ftype, pstart, pend, tmpstr, strand, gffID);
1091 if (gname!=NULL)
1092 fprintf(fout, ";Name=%s",gname);
1093 if (CDstart>0 && !showCDS && !isCDS) fprintf(fout,";CDS=%d:%d",CDstart,CDend);
1094 if (attrs!=NULL) {
1095 for (int i=0;i<attrs->Count();i++) {
1096 fprintf(fout,";%s=%s", names->attrs.getName(attrs->Get(i)->attr_id),
1097 attrs->Get(i)->attr_val);
1098 }
1099 }
1100 fprintf(fout,"\n");
1101 }// gff3 mRNA line
1102 if (showExon) {
1103 //print exons
1104 for (int i=0;i<exons.Count();i++) {
1105 printGxfLine(fout, tlabel, gseqname, isCDS, exons[i]->start, exons[i]->end, i, exons[i]->phase, gff3);
1106 }
1107 }//printing exons
1108 if (showCDS && !isCDS && CDstart>0) {
1109 GArray<GffCDSeg> cds(true,true);
1110 getCDSegs(cds);
1111 for (int i=0;i<cds.Count();i++) {
1112 printGxfLine(fout, tlabel, gseqname, true, cds[i].start, cds[i].end, -1, cds[i].phase, gff3);
1113 }
1114 } //showCDS
1115 }
1116
1117
1118 void GffObj::getCDSegs(GArray<GffCDSeg>& cds) {
1119 GffCDSeg cdseg;
1120 int cdsacc=0;
1121 if (CDphase=='1' || CDphase=='2') {
1122 cdsacc+= 3-(CDphase-'0');
1123 }
1124 if (strand=='-') {
1125 for (int x=exons.Count()-1;x>=0;x--) {
1126 uint sgstart=exons[x]->start;
1127 uint sgend=exons[x]->end;
1128 if (CDend<sgstart || CDstart>sgend) continue;
1129 if (CDstart>=sgstart && CDstart<=sgend)
1130 sgstart=CDstart; //cdstart within this segment
1131 if (CDend>=sgstart && CDend<=sgend)
1132 sgend=CDend; //cdend within this segment
1133 cdseg.start=sgstart;
1134 cdseg.end=sgend;
1135 cdseg.exonidx=x;
1136 //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
1137 cdseg.phase='0'+ (3-cdsacc%3)%3;
1138 cdsacc+=sgend-sgstart+1;
1139 cds.Add(cdseg);
1140 } //for each exon
1141 } // - strand
1142 else { // + strand
1143 for (int x=0;x<exons.Count();x++) {
1144 uint sgstart=exons[x]->start;
1145 uint sgend=exons[x]->end;
1146 if (CDend<sgstart || CDstart>sgend) continue;
1147 if (CDstart>=sgstart && CDstart<=sgend)
1148 sgstart=CDstart; //seqstart within this segment
1149 if (CDend>=sgstart && CDend<=sgend)
1150 sgend=CDend; //seqend within this segment
1151 cdseg.start=sgstart;
1152 cdseg.end=sgend;
1153 cdseg.exonidx=x;
1154 //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
1155 cdseg.phase='0' + (3-cdsacc%3)%3 ;
1156 cdsacc+=sgend-sgstart+1;
1157 cds.Add(cdseg);
1158 } //for each exon
1159 } // + strand
1160 }
1161 /*
1162 #ifdef DEBUG
1163 void GffObj::dbgPrint(const char* msg) {
1164 if (msg!=NULL) fprintf(stdout, ">> %s\n",msg);
1165 char* tlabel=track_id>=0 ? names->tracks.Get(track_id)->name :
1166 (char*)"gmapobj" ;
1167 char scorestr[14];
1168 char strand=revstrand?'-':'+';
1169 unxcoord();
1170 char* gseqname=names->gseqs.Get(gseq_id)->name;
1171 char* fname=f_id>=0 ? names->feats.Get(f_id)->name : (char*)"nofeatname";
1172
1173 fprintf(stdout, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tID=%s;Name=%s\n",
1174 gseqname, tlabel, fname, start, end, strand, gffID, gffID);
1175
1176 for (int fi=0;fi<features->Count();fi++) {
1177 GFeature* feature=features->Get(fi);
1178 fname=names->feats.Get(feature->name_id)->name;
1179 GffExon* segs=feature->segs;
1180 int segcount=feature->segcount;
1181 if (segcount==0 || segs==NULL) continue;
1182 for (int i=0;i<segcount;i++) {
1183 if (segs[i].start==0) continue;
1184 if (segs[i].score) sprintf(scorestr,"%.2f", segs[i].score/100.00);
1185 else strcpy(scorestr,".");
1186 fprintf(stdout,
1187 "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tParent=%s\n",
1188 gseqname, tlabel, fname, segs[i].start, segs[i].end, scorestr, strand, gffID);
1189 }
1190 }
1191 fflush(stdout);
1192 }
1193 #endif
1194 */