ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
Revision: 2
Committed: Mon Mar 22 22:03:27 2010 UTC (9 years, 6 months ago) by gpertea
File size: 40309 byte(s)
Log Message:
added my gclib source files

Line User Rev File contents
1 gpertea 2 #include "gff.h"
2    
3     //GffNames* GffReader::names=NULL;
4     GffNames* GffObj::names=NULL;
5     //global set of feature names, attribute names etc.
6     // -- common for all GffObjs in current application!
7    
8     const uint GFF_MAX_LOCUS = 4000000; //longest known gene in human is ~2.2M, UCSC claims a gene for mouse of ~ 3.1 M
9     const uint GFF_MAX_EXON = 20000; //longest known exon in human is ~11K
10     const uint GFF_MAX_INTRON= 1600000;
11    
12     const int gff_fid_mRNA=0;
13     const int gff_fid_exon=1;
14     const int gff_fid_CDS=2;
15     bool gff_warns=true;
16    
17     void gffnames_ref(GffNames* &n) {
18     if (n==NULL) n=new GffNames();
19     n->numrefs++;
20     }
21    
22     void gffnames_unref(GffNames* &n) {
23     if (n==NULL) GError("Error: attempt to remove reference to null GffNames object!\n");
24     n->numrefs--;
25     if (n->numrefs==0) { delete n; n=NULL; }
26     }
27    
28     int gfo_cmpByLoc(const pointer p1, const pointer p2) {
29    
30     GffObj& g1=*((GffObj*)p1);
31     GffObj& g2=*((GffObj*)p2);
32     if (g1.gseq_id==g2.gseq_id) return (int)(g1.start-g2.start);
33     else return (int)(g1.gseq_id-g2.gseq_id);
34     }
35    
36     static char fnamelc[32];
37    
38     GffLine::GffLine(GffReader* reader, const char* l) {
39     line=Gstrdup(l);
40     skip=true;
41     gseqname=NULL;
42     track=NULL;
43     ftype=NULL;
44     info=NULL;
45     Parent=NULL;
46     is_cds=false;
47     is_mrna=false;
48     is_exon=false;
49     exontype=0;
50     gname=NULL;
51     qstart=0;
52     qend=0;
53     qlen=0;
54     exontype=0;
55     ID=NULL;
56     char* t[9];
57     int i=0;
58     int tidx=1;
59     t[0]=line;
60    
61     while (line[i]!=0) {
62     if (line[i]=='\t') {
63     line[i]=0;
64     t[tidx]=line+i+1;
65     tidx++;
66     if (tidx>8) break;
67     }
68     i++;
69     }
70    
71     if (tidx<8) { // ignore non-GFF lines
72     // GMessage("Warning: error parsing GFF/GTF line:\n%s\n", l);
73     return;
74     }
75     gseqname=t[0];
76     track=t[1];
77     ftype=t[2];
78     info=t[8];
79     char* p=t[3];
80     if (!parseUInt(p,fstart))
81     GError("Error parsing start coordinate from GFF line:\n%s\n",l);
82     p=t[4];
83     if (!parseUInt(p,fend))
84     GError("Error parsing end coordinate from GFF line:\n%s\n",l);
85     if (fend<fstart) swap(fend,fstart);
86     p=t[5];
87     if (p[0]=='.' && p[1]==0) {
88     score=0;
89     }
90     else {
91     if (!parseDouble(p,score))
92     GError("Error parsing feature score from GFF line:\n%s\n",l);
93     }
94     strand=*t[6];
95     if (strand!='+' && strand!='-' && strand!='.')
96     GError("Error parsing strand (%c) from GFF line:\n%s\n",strand,l);
97     phase=*t[7];
98     ID=NULL;
99     Parent=NULL;
100     // exon/CDS/mrna filter
101     strncpy(fnamelc, ftype, 31);
102     fnamelc[31]=0;
103     strlower(fnamelc); //convert to lower case
104     if (strstr(fnamelc, "utr")!=NULL) {
105     exontype=exgffUTR;
106     is_exon=true;
107     }
108     else if (strstr(fnamelc, "exon")!=NULL) {
109     exontype=exgffExon;
110     is_exon=true;
111     }
112     else if (startsWith(fnamelc, "stop") && strstr(fnamelc, "codon")!=NULL) {
113     exontype=exgffStop;
114     is_cds=true;
115     }
116     else if (strcmp(fnamelc, "cds")==0) {
117     exontype=exgffCDS;
118     is_cds=true;
119     }
120     else { //is_mrna is set only if the *current* line is a mRNA or transcript
121     is_mrna=(strcmp(fnamelc,"mrna")==0 ||
122     strcmp(fnamelc,"transcript")==0);
123     }
124    
125     if (reader->mrnaOnly) {
126     if (!is_mrna && !is_cds && !is_exon)
127     return; //skip this line, unwanted feature name
128     }
129     p=strstr(info,"ID=");
130     if (p!=NULL) { //has ID attr
131     ID=p+3;
132     p=ID;
133     while (*p!=';' && *p!=0) p++;
134     ID=Gstrdup(ID, p-1);
135     //look for a name attr too:
136     p=strstr(info,"Name=");
137     if (p!=NULL) {
138     gname=p+5;
139     p=gname;
140     while (*p!=';' && *p!=0) p++;
141     gname=Gstrdup(gname, p-1);
142     }
143     }
144     p=NULL;
145     if (!is_mrna)
146     p=strifind(info,"Parent="); //don't care about the parent for mRNA features..
147     if (p!=NULL) { //has Parent attr
148     Parent=p+7;
149     p=Parent;
150     while (*p!=';' && *p!=0) p++;
151     Parent=Gstrdup(Parent, p-1);
152     }
153     else if (ID==NULL) { //no "Parent=" and no "ID=", attempt GTF parsing instead
154     p=strstr(info,"transcript_id");
155     if (p!=NULL) { //GTF detected
156     p+=13;
157     //requires quotes
158     while (*p!='"' && *p!=0) p++;
159     if (*p==0) GError("Error parsing transcript_id (double quotes expected) at GTF line:\n%s\n",l);
160     p++;
161     Parent=p;
162     while (*p!='"' && *p!=0) p++;
163     if (*p==0) GError("Error parsing transcript_id (ending double quotes expected) at GTF line:\n%s\n",l);
164     if (is_mrna) { // RGASP GTF exception: a parent "transcript" feature preceding exon/CDS subfeatures
165     ID=Gstrdup(Parent, p-1);
166     Parent=NULL;
167     }
168     else {
169     Parent=Gstrdup(Parent, p-1);
170     }
171     //check for gene_name or gene_id
172     //p=strstr(info, "gene_name");// this is preferred over gene_id
173     //if (p==NULL)
174     p=strstr(info,"gene_id");
175     if (p!=NULL) {
176     p+=7;//skip 'gene_id'
177     while (*p!='"' && *p!=0) p++;
178     if (*p==0) GError("Error parsing gene_id (double quotes expected) at GTF line:\n%s\n",l);
179     p++;
180     gname=p;
181     while (*p!='"' && *p!=0) p++;
182     if (*p==0) GError("Error parsing gene_id (ending double quotes expected) at GTF line:\n%s\n",l);
183     gname=Gstrdup(gname, p-1);
184     }
185     //prepare for parseAttr by adding '=' character instead of spaces for all attributes
186     //after the attribute name
187     p=info;
188     bool noed=true; //not edited after the last delim
189     bool nsp=false; //non-space found after last delim
190     while (*p!=0) {
191     if (*p==' ') {
192     if (nsp && noed) {
193     *p='=';
194     noed=false;
195     p++;
196     continue;
197     }
198     }
199     else nsp=true;
200     if (*p==';') { noed=true; nsp=false; }
201     p++;
202     }
203     } //gtf detected
204     else {//check for jigsaw or cufflinks format
205     char* fexon=strstr(fnamelc, "exon");
206     if (fexon!=NULL) {
207     if (startsWith(track,"jigsaw")) {
208     is_cds=true;
209     strcpy(track,"jigsaw");
210     p=strchr(info,';');
211     if (p==NULL) Parent=Gstrdup(info);
212     else { Parent=Gstrdup(info,p-1); info=p+1; }
213     }
214     else if ((i=strcspn(info,"; \t\n\r"))<=(int)(strlen(info)+1)) {//one word ID
215     Parent=Gstrdup(info,info+i-1);
216     }
217    
218     }
219     else GError("Error parsing Parent/ID at input line:\n%s\n",l);
220     }
221     }
222     p=strstr(info,"Target=");
223     if (p!=NULL) { //has Target attr
224     p+=7;
225     while (*p!=';' && *p!=0 && *p!=' ') p++;
226     if (*p!=' ') {
227     GError("Error parsing target coordinates from GFF line:\n%s\n",l);
228     }
229     if (!parseUInt(p,qstart))
230     GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
231     if (*p!=' ') {
232     GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
233     }
234     p++;
235     if (!parseUInt(p,qend))
236     GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
237     }
238     else {
239     p=strifind(info,"Qreg=");
240     if (p!=NULL) { //has Qreg attr
241     p+=5;
242     if (!parseUInt(p,qstart))
243     GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
244     if (*p!='-') {
245     GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
246     }
247     p++;
248     if (!parseUInt(p,qend))
249     GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
250     if (*p=='|') {
251     p++;
252     if (!parseUInt(p,qlen))
253     GError("Error parsing target length from GFF Qreg|: \n%s\n",l);
254     }
255     }//has Qreg attr
256     }
257     if (qlen==0 && (p=strifind(info,"Qlen="))!=NULL) {
258     p+=5;
259     if (!parseUInt(p,qlen))
260     GError("Error parsing target length from GFF Qlen:\n%s\n",l);
261     }
262     skip=false;
263     }
264    
265     int GffObj::addExon(GffLine* gl, bool keepAttr, bool noExonAttr) {
266     //this will make sure we have the right subftype_id!
267     int subf_id=-1;
268     if (ftype_id==gff_fid_mRNA) {
269     if (subftype_id<0) subftype_id=gff_fid_exon;
270     //any recognized mRNA segment gets the generic "exon" type (also applies to CDS)
271     if (!gl->is_cds && !gl->is_exon)
272     //extraneous mRNA feature, will discard for now
273     return -1;
274     }
275     else { //other kind of parent feature, check this subf type
276     subf_id=names->feats.addName(gl->ftype);
277     if (subftype_id<0)
278     subftype_id=subf_id;
279     else {
280     if (subftype_id!=subf_id)
281     GMessage("GFF Warning: multiple subfeatures (%s and %s) found for %s, only %s is kept\n",
282     names->feats.getName(subftype_id), names->feats.getName(subf_id),
283     gffID,names->feats.getName(subftype_id));
284     return -1; //skip this 2nd subfeature type for this parent!
285     }
286     }
287     if (gl->exontype==exgffUTR || gl->exontype==exgffStop)
288     udata=1;//merge 0-distance segments
289     int eidx=addExon(gl->fstart, gl->fend, gl->score, gl->phase,
290     gl->qstart,gl->qend, gl->is_cds, gl->exontype);
291     if (eidx>=0 && keepAttr) {
292     parseAttrs(exons[eidx]->attrs, gl->info, noExonAttr);
293     }
294     return eidx;
295     }
296    
297    
298     void GffObj::expandExon(int oi, uint segstart, uint segend, char exontype, double sc, char fr, int qs, int qe) {
299     //oi is the index of the *first* overlapping segment found that must be enlarged
300     covlen-=exons[oi]->len();
301     if (segstart<exons[oi]->start)
302     exons[oi]->start=segstart;
303     if (qs<exons[oi]->qstart) exons[oi]->qstart=qs;
304     if (segend>exons[oi]->end)
305     exons[oi]->end=segend;
306     if (qe>exons[oi]->qend) exons[oi]->qend=qe;
307     //warning: score cannot be properly adjusted! e.g. if it's a p-value it's just going to get worse
308     if (sc!=0) exons[oi]->score=sc;
309     covlen+=exons[oi]->len();
310     //if (exons[oi]->exontype< exontype) -- always true
311     exons[oi]->exontype = exontype;
312     if (exontype==exgffCDS) exons[oi]->phase=fr;
313     //we must check if any more exons are also overlapping this
314     int ni=oi+1; //next exon index after oi
315     while (ni<exons.Count() && segend>=exons[ni]->start) { // next segment overlaps new enlarged segment
316     //only allow this if next segment is fully included, and a subordinate
317     if (exons[ni]->exontype<exontype && exons[ni]->end<=segend) {
318     if (exons[ni]->qstart<exons[oi]->qstart) exons[oi]->qstart=exons[ni]->qstart;
319     if (exons[ni]->qend>exons[oi]->qend) exons[oi]->qend=exons[ni]->qend;
320     exons.Delete(ni);
321     }
322     else {
323     GMessage("GFF Warning: overlapping feature segment (%d-%d) for GFF ID %s\n", segstart, segend, gffID);
324     hasErrors=true;
325     break;
326     }
327     }
328     // -- make sure any other related boundaries are updated:
329     start=exons.First()->start;
330     end=exons.Last()->end;
331     if (uptr!=NULL) { //collect stats about the underlying genomic sequence
332     GSeqStat* gsd=(GSeqStat*)uptr;
333     if (start<gsd->mincoord) gsd->mincoord=start;
334     if (end>gsd->maxcoord) gsd->maxcoord=end;
335     }
336     }
337    
338     int GffObj::addExon(uint segstart, uint segend, double sc, char fr, int qs, int qe, bool iscds, char exontype) {
339     if (exons.Count()==0) {
340     if (iscds) isCDS=true; //for now, assume CDS only if first "exon" given is a CDS
341     if (subftype_id<0) {
342     subftype_id = (ftype_id==gff_fid_mRNA) ? gff_fid_exon : ftype_id;
343     }
344     }
345     if (iscds) { //update CDS anchors:
346     if (CDstart==0 || segstart<CDstart) {
347     CDstart=segstart;
348     if (exontype==exgffCDS && strand=='+') CDphase=fr;
349     }
350     if (segend>CDend) {
351     if (exontype==exgffCDS && strand=='-') CDphase=fr;
352     CDend=segend;
353     }
354     }
355     else { // !iscds
356     isCDS=false;
357     }
358     if (qs || qe) {
359     if (qs>qe) swap(qs,qe);
360     if (qs==0) qs=1;
361     }
362     int ovlen=0;
363     int oi=exonOverlapIdx(segstart, segend, &ovlen);
364     if (oi>=0) { //overlap existing segment
365     //only allow this for CDS within exon, stop_codon within exon, stop_codon within UTR,
366     // or stop_codon within CDS
367     if (exons[oi]->exontype>exontype &&
368     exons[oi]->start<=segstart && exons[oi]->end>=segend &&
369     !(exons[oi]->exontype==exgffUTR && exontype==exgffCDS)) {
370     //larger segment given first, now the smaller included one
371     return oi; //only used to store attributes from current GffLine
372     }
373     if (exontype>exons[oi]->exontype &&
374     segstart<=exons[oi]->start && segend>=exons[oi]->end &&
375     !(exontype==exgffUTR && exons[oi]->exontype==exgffCDS)) {
376     //smaller segment given first, so we have to enlarge it
377     expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe);
378     //this must also check for overlapping next exon (oi+1)
379     return oi;
380     }
381     //there is also the special case of "ribosomal slippage exception" (programmed frameshift)
382     //where two CDS segments may actually overlap for 1 or 2 bases, but there should be only one encompassing exon
383     if (ovlen>2 || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) {
384     GMessage("GFF Warning: discarded overlapping feature segment (%d-%d) for GFF ID %s\n",
385     segstart, segend, gffID);
386     hasErrors=true;
387     return false; //segment NOT added
388     }
389     /* --
390     else {
391     // else add the segment if the overlap is small and between two CDS segments
392     //we might want to add an attribute here with the slippage coordinate and size?
393     }
394     */
395     }//overlap of existing segment
396    
397     // --- no overlap, or accepted micro-overlap (ribosomal slippage)
398     // create & add the new segment
399     GffExon* enew=new GffExon(segstart, segend, sc, fr, qs, qe, exontype);
400     int eidx=exons.Add(enew);
401     if (eidx<0) {
402     GMessage("GFF Warning: duplicate segment %d-%d not added for GFF ID %s!\n",
403     segstart, segend, gffID);
404     return -1;
405     }
406     covlen+=(int)(exons[eidx]->end-exons[eidx]->start)+1;
407     start=exons.First()->start;
408     end=exons.Last()->end;
409     if (uptr!=NULL) { //collect stats about the underlying genomic sequence
410     GSeqStat* gsd=(GSeqStat*)uptr;
411     if (start<gsd->mincoord) gsd->mincoord=start;
412     if (end>gsd->maxcoord) gsd->maxcoord=end;
413     }
414     return eidx;
415     }
416    
417     void GffObj::removeExon(int idx) {
418     /*
419     if (idx==0 && segs[0].start==gstart)
420     gstart=segs[1].start;
421     if (idx==segcount && segs[segcount].end==gend)
422     gend=segs[segcount-1].end;
423     */
424     if (idx<0 || idx>=exons.Count()) return;
425     int segstart=exons[idx]->start;
426     int segend=exons[idx]->end;
427     exons.Delete(idx);
428     covlen -= (int)(segend-segstart)+1;
429     start=exons.First()->start;
430     end=exons.Last()->end;
431     if (isCDS) { CDstart=start; CDend=end; }
432     }
433    
434     GffObj::GffObj(GffReader *gfrd, GffLine* gffline, bool keepAttr, bool noExonAttr):
435     GSeg(0,0), exons(true,true,true) {
436     xstart=0;
437     xend=0;
438     xstatus=0;
439     partial=false;
440     isCDS=false;
441     uptr=NULL;
442     ulink=NULL;
443     udata=0;
444     CDstart=0;
445     CDend=0;
446     gname=NULL;
447     attrs=NULL;
448     gffID=NULL;
449     track_id=-1;
450     gseq_id=-1;
451     ftype_id=-1;
452     subftype_id=-1;
453     hasErrors=false;
454     if (gfrd==NULL)
455     GError("Cannot use this GffObj constructor with a NULL GffReader!\n");
456     gffnames_ref(names);
457     if (gfrd->names==NULL) gfrd->names=names;
458     qlen=0;qstart=0;qend=0;
459     gscore=0;
460     uscore=0;
461     covlen=0;
462     qcov=0;
463     if (gffline->Parent!=NULL) {
464     //GTF style -- subfeature given directly
465     if (gffline->is_cds || gffline->is_exon)
466     ftype_id=gff_fid_mRNA;
467     else {
468     //group of other subfeatures of type ftype:
469     ftype_id=names->feats.addName(gffline->ftype);
470     }
471     gffID=gffline->Parent;
472     gffline->Parent=NULL; //just take over
473     if (gffline->gname!=NULL) {
474     gname=gffline->gname;
475     gffline->gname=NULL;
476     }
477     gseq_id=names->gseqs.addName(gffline->gseqname);
478     track_id=names->tracks.addName(gffline->track);
479     strand=gffline->strand;
480     qlen=gffline->qlen;
481     start=gffline->fstart;
482     end=gffline->fend;
483     isCDS=gffline->is_cds; //for now
484     addExon(gffline, keepAttr, noExonAttr);
485     if (keepAttr && noExonAttr) {
486     //simply move the attrs from this first exon
487     //to the transcript
488     if (exons.First()->attrs!=NULL) {
489     attrs=exons.First()->attrs;
490     exons.First()->attrs=NULL;
491     }
492     }
493     } //GTF line
494     else { //GffReader made sure this is a parent line (no parent)
495     //even for a mRNA with a Parent= line
496     gscore=gffline->score;
497     if (gffline->ID==NULL || gffline->ID[0]==0)
498     GError("Error: no ID found for GFF record start\n");
499     gffID=gffline->ID; //there must be an ID here
500     if (gffline->is_mrna) ftype_id=gff_fid_mRNA;
501     else ftype_id=names->feats.addName(gffline->ftype);
502     gffline->ID=NULL; //steal it
503     if (gffline->gname!=NULL) {
504     gname=gffline->gname;
505     gffline->gname=NULL;
506     }
507     start=gffline->fstart;
508     end=gffline->fend;
509     gseq_id=names->gseqs.addName(gffline->gseqname);
510     track_id=names->tracks.addName(gffline->track);
511     qlen=gffline->qlen;
512     qstart=gffline->qstart;
513     qend=gffline->qend;
514     strand=gffline->strand;
515     if (keepAttr) this->parseAttrs(attrs, gffline->info, noExonAttr);
516     }
517     GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true);
518     uptr=gsd;
519     gsd->gflst.Add(this);
520     if (start<gsd->mincoord) gsd->mincoord=start;
521     if (end>gsd->maxcoord) gsd->maxcoord=end;
522     gfrd->gfoAdd(gffID, gffline->gseqname, this);
523     }
524    
525    
526     GffLine* GffReader::nextGffLine() {
527     if (gffline!=NULL) return gffline; //caller should free gffline after processing
528     while (gffline==NULL) {
529     //const char* l=linebuf->getLine();
530     int llen=0;
531     buflen=GFF_LINELEN-1;
532     char* l=fgetline(linebuf, buflen, fh, &fpos, &llen);
533     if (l==NULL) {
534     return NULL; //end of file
535     }
536     int ns=0; //first nonspace position
537     while (l[ns]!=0 && isspace(l[ns])) ns++;
538     if (l[ns]=='#' || llen<10) continue;
539     gffline=new GffLine(this, l);
540     if (gffline->skip) {
541     delete gffline;
542     gffline=NULL;
543     }
544     }
545     return gffline;
546     }
547    
548    
549     GffObj* GffReader::parse(bool keepAttr, bool noExonAttr) { //<- do not use this!
550     //parses one parent feature at a time, assuming absolute grouping of subfeatures
551     //!ASSUMES that subfeatures (exons) are properly grouped together
552     //any interleaving exons from other transcripts will terminate the parsing of the current feature!
553     GffObj* gfo=NULL;
554     while (nextGffLine()!=NULL) {
555     if (gfo==NULL) {//record starts fresh here
556     gfo=new GffObj(this, gffline, keepAttr, noExonAttr);
557     delete gffline; gffline=NULL;
558     continue;
559     }
560     // -- gfo is not NULL from here --
561     if (gffline->Parent==NULL) {// new parent feature starting here
562     //new record start, return what we have so far,
563     //gffline was NOT deleted, so it will be used for the next parse() call
564     return ((gfo==NULL) ? NULL : gfo->finalize());
565     }
566     //-- has a Parent so it's a subfeature segment (exon/CDS/other subfeature)
567     // is it a subfeature of the current gf?
568     if (strcmp(gffline->Parent, gfo->gffID)==0) {
569     //yes, add it
570     gfo->addExon(gffline, !noExonAttr, noExonAttr);
571     delete gffline; gffline=NULL;
572     continue;
573     }
574     // is it a subfeature of a previously loaded gfo?
575     GffObj* prevgfo=gfoFind(gffline->Parent, gffline->gseqname);
576     if (prevgfo==NULL)
577     return ((gfo==NULL) ? NULL : gfo->finalize()); // new subfeature, gffline will be used for the next parse()
578     //this is for an earlier parent
579     prevgfo->addExon(gffline, !noExonAttr, noExonAttr);
580     delete gffline;
581     gffline=NULL;
582     } //while reading gfflines
583     return ((gfo==NULL) ? NULL : gfo->finalize());
584     }
585     char* GffReader::gfoBuildId(const char* id, const char* ctg) {
586     //caller must free the returned pointer
587     char* buf=NULL;
588     int idlen=strlen(id);
589     GMALLOC(buf, idlen+strlen(ctg)+2);
590     strcpy(buf, id);
591     buf[idlen]='~';
592     strcpy(buf+idlen+1, ctg);
593     return buf;
594     }
595    
596     void GffReader::gfoRemove(const char* id, const char* ctg) {
597     char* buf=gfoBuildId(id,ctg);
598     phash.Remove(buf);
599     GFREE(buf);
600     }
601    
602     void GffReader::gfoAdd(const char* id, const char* ctg, GffObj* gfo) {
603     char* buf=gfoBuildId(id,ctg);
604     phash.Add(buf, gfo);
605     GFREE(buf);
606     }
607     GffObj* GffReader::gfoFind(const char* id, const char* ctg) {
608     char* buf=gfoBuildId(id,ctg);
609     GffObj* r=phash.Find(buf);
610     GFREE(buf);
611     return r;
612     }
613    
614    
615     void GffReader::parseAll(GffRecFunc* gproc, bool keepAttr, bool noExonAttr, void* userptr1, void* userptr2) {
616     //WARNING: this is all messed up if the GFF lines are NOT grouped by parent feature
617     //!!! do not use this !!!
618     //iterates through all mappings in the input file
619     //calling gproc with each parsed mapping
620     GffObj* gfo;
621     while ((gfo=this->parse(keepAttr,noExonAttr))!=NULL) { //a valid gff record was parsed
622     if (gfo->empty()) { //shouldn't happen!
623     delete gfo;
624     gfo=NULL;
625     continue;
626     }
627     if (gproc(gfo, userptr1, userptr2)) {
628     //true returned from GfProcFunc means no longer needed
629     gfoRemove(gfo->gffID, gfo->getGSeqName());
630     delete gfo;
631     }
632     else {
633     gflst.Add(gfo);
634     }
635     gfo=NULL;
636     } //while records are parsed
637     phash.Clear();
638     }
639    
640    
641     void GffReader::readAll(bool keepAttr, bool mergeCloseExons, bool noExonAttr) {
642     while (nextGffLine()!=NULL) {
643     if (gffline->Parent==NULL) {//no parent, new GFF3-like record starting
644     //check for uniqueness of gffline->ID in phash !
645     GffObj* f=gfoFind(gffline->ID, gffline->gseqname);
646     if (f!=NULL) {
647     GError("Error: duplicate GFF ID '%s' encountered!\n",gffline->ID);
648     }
649     gflst.Add(new GffObj(this, gffline, keepAttr, noExonAttr));
650     }
651     else { //--- it's a subfeature (exon/CDS/other):
652     GffObj* prevgfo=gfoFind(gffline->Parent, gffline->gseqname);
653     if (prevgfo!=NULL) { //exon of a previously seen GffObj
654     if (gffline->strand!=prevgfo->strand) {
655     GError("Error: duplicate GFF ID '%s' (exons found on different strands of %s)\n",
656     prevgfo->gffID, prevgfo->getGSeqName());
657     }
658     int gdist=(gffline->fstart>prevgfo->end) ? gffline->fstart-prevgfo->end :
659     ((gffline->fend<prevgfo->start)? prevgfo->start-gffline->fend :
660     0 );
661     if (gdist>(int)GFF_MAX_LOCUS) { //too far apart, most likely this is a duplicate ID
662     GError("Error: duplicate GFF ID '%s' (or exons too far apart)!\n",prevgfo->gffID);
663     }
664     prevgfo->addExon(gffline, !noExonAttr, noExonAttr);
665     }
666     else {//new GTF-like record starting here with a subfeature
667     gflst.Add(new GffObj(this, gffline, keepAttr, noExonAttr));
668     //even those with errors will be added here!
669     }
670     } //subfeature
671     //--
672     delete gffline;
673     gffline=NULL;
674     }//while
675     for (int i=0;i<gflst.Count();i++) {
676     gflst[i]->finalize(mergeCloseExons); //finalize the parsing - also merge close exons/features if so requested
677     }
678     // all gff records are now loaded in GList gflst
679     // so we can free the hash
680     phash.Clear();
681     }
682    
683     //this may be called prematurely if exon records are not grouped by parent
684     GffObj* GffObj::finalize(bool mergeCloseExons) {
685     udata=0;
686     uptr=NULL;
687     //TODO: merge adjacent or close segments - for mRNAs
688     //must merge adjacent features, and optionally small gaps
689     if (ftype_id==gff_fid_mRNA) {
690     int mindist=mergeCloseExons ? 5:1;
691     for (int i=0;i<exons.Count()-1;i++) {
692     int ni=i+1;
693     uint mend=exons[i]->end;
694     while (ni<exons.Count()) {
695     int dist=(int)(exons[ni]->start-mend);
696     if (dist<0 || dist>mindist) break; //no merging with next segment
697     mend=exons[ni]->end;
698     exons[i]->end=mend;
699     if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL ||
700     exons[i]->attrs->Count()<exons[ni]->attrs->Count())) {
701     //use the other exon attributes, if more
702     delete(exons[i]->attrs);
703     exons[i]->attrs=exons[ni]->attrs;
704     exons[ni]->attrs=NULL;
705     }
706     exons.Delete(ni);
707     } //check for merge with next exon
708     } //for each exon
709     }
710     return this;
711     }
712    
713     void GffObj::parseAttrs(GffAttrs*& atrlist, char* info, bool noExonAttr) {
714     if (names==NULL)
715     GError(ERR_NULL_GFNAMES, "parseAttrs()");
716     if (atrlist==NULL)
717     atrlist=new GffAttrs();
718     char* endinfo=info+strlen(info);
719     char* start=info;
720     char* pch=start;
721     while (start<endinfo) {
722     //skip spaces
723     while (*start==' ' && start<endinfo) start++;
724     pch=strchr(start, ';');
725     if (pch==NULL) pch=endinfo;
726     else {
727     *pch='\0';
728     pch++;
729     }
730     char* ech=strchr(start,'=');
731     if (ech!=NULL) { // attr=value format found
732     *ech='\0';
733     if (strcmp(start, "ID")==0 || strcmp(start,"Parent")==0 || strcmp(start,"Name")==0 ||
734     strcmp(start,"transcript_id")==0 || strcmp(start,"gene_id")==0)
735     { start=pch; continue; } //skip this already recognized and stored attribute
736     if (noExonAttr && (strcmp(start, "exon_number")==0 || strcmp(start, "exon")==0)) { start=pch; continue; }
737     ech++;
738     while (*ech==' ' && ech<endinfo) ech++;//skip extra spaces after the '='
739     atrlist->Add(new GffAttr(names->attrs.addName(start),ech));
740     }
741     /*
742     else { //not an attr=value format
743     atrlist->Add(new GffAttr(names->attrs.addName(start),"1"));
744     }
745     */
746     start=pch;
747     }
748     if (atrlist->Count()==0) { delete atrlist; atrlist=NULL; }
749     }
750    
751     void GffObj::addAttr(const char* attrname, char* attrvalue) {
752     if (this->attrs==NULL)
753     this->attrs=new GffAttrs();
754     this->attrs->Add(new GffAttr(names->attrs.addName(attrname),attrvalue));
755     }
756    
757     void GffObj::getCDS_ends(uint& cds_start, uint& cds_end) {
758     cds_start=0;
759     cds_end=0;
760     if (CDstart==0 || CDend==0) return; //no CDS info
761     int cdsadj=0;
762     if (CDphase=='1' || CDphase=='2') {
763     cdsadj=CDphase-'0';
764     }
765     cds_start=CDstart;
766     cds_end=CDend;
767     if (strand=='-') cds_end-=cdsadj;
768     else cds_start+=cdsadj;
769     }
770    
771     void GffObj::mRNA_CDS_coords(uint& cds_mstart, uint& cds_mend) {
772     //sets cds_start and cds_end to the CDS start,end coordinates on the spliced mRNA transcript
773     cds_mstart=0;
774     cds_mend=0;
775     if (CDstart==0 || CDend==0) return; //no CDS info
776     //restore normal coordinates, just in case
777     unxcoord();
778     int cdsadj=0;
779     if (CDphase=='1' || CDphase=='2') {
780     cdsadj=CDphase-'0';
781     }
782     /*
783     uint seqstart=CDstart;
784     uint seqend=CDend;
785     */
786     uint seqstart=exons.First()->start;
787     uint seqend=exons.Last()->end;
788     int s=0; //resulting nucleotide counter
789     if (strand=='-') {
790     for (int x=exons.Count()-1;x>=0;x--) {
791     uint sgstart=exons[x]->start;
792     uint sgend=exons[x]->end;
793     if (seqend<sgstart || seqstart>sgend) continue;
794     if (seqstart>=sgstart && seqstart<=sgend)
795     sgstart=seqstart; //seqstart within this segment
796     if (seqend>=sgstart && seqend<=sgend)
797     sgend=seqend; //seqend within this segment
798     s+=(int)(sgend-sgstart)+1;
799     if (CDstart>=sgstart && CDstart<=sgend) {
800     //CDstart in this segment
801     //and we are getting the whole transcript
802     cds_mend=s-(int)(CDstart-sgstart);
803     //GMessage("Setting cds_mend to %d\n",cds_mend);
804     }
805     if (CDend>=sgstart && CDend<=sgend) {
806     //CDstart in this segment
807     //and we are getting the whole transcript
808     cds_mstart=s-(int)(CDend-cdsadj-sgstart);
809     //GMessage("Setting cds_mstart to %d\n",cds_mstart);
810     }
811     } //for each exon
812     } // - strand
813     else { // + strand
814     for (int x=0;x<exons.Count();x++) {
815     uint sgstart=exons[x]->start;
816     uint sgend=exons[x]->end;
817     if (seqend<sgstart || seqstart>sgend) continue;
818     if (seqstart>=sgstart && seqstart<=sgend)
819     sgstart=seqstart; //seqstart within this segment
820     if (seqend>=sgstart && seqend<=sgend)
821     sgend=seqend; //seqend within this segment
822     s+=(int)(sgend-sgstart)+1;
823     /* for (uint i=sgstart;i<=sgend;i++) {
824     spliced[s]=gsubseq[i-gstart];
825     s++;
826     }//for each nt
827     */
828     if (CDstart>=sgstart && CDstart<=sgend) {
829     //CDstart in this segment
830     cds_mstart=s-(int)(sgend-CDstart-cdsadj);
831     }
832     if (CDend>=sgstart && CDend<=sgend) {
833     //CDend in this segment
834     cds_mend=s-(int)(sgend-CDend);
835     }
836     } //for each exon
837     } // + strand
838     //spliced[s]=0;
839     //if (rlen!=NULL) *rlen=s;
840     //return spliced;
841     }
842    
843     char* GffObj::getSpliced(GFaSeqGet* faseq, bool CDSonly, int* rlen, uint* cds_start, uint* cds_end,
844     GList<GSeg>* seglst) {
845     if (CDSonly && CDstart==0) return NULL;
846     if (faseq==NULL) { GMessage("Warning: getSpliced(NULL,.. ) called!\n");
847     return NULL;
848     }
849     //restore normal coordinates:
850     unxcoord();
851     if (exons.Count()==0) return NULL;
852     int fspan=end-start+1;
853     const char* gsubseq=faseq->subseq(start, fspan);
854     if (gsubseq==NULL) {
855     GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
856     }
857     char* spliced=NULL;
858     GMALLOC(spliced, covlen+1); //allocate more here
859     uint seqstart, seqend;
860     int cdsadj=0;
861     if (CDphase=='1' || CDphase=='2') {
862     cdsadj=CDphase-'0';
863     }
864     if (CDSonly) {
865     seqstart=CDstart;
866     seqend=CDend;
867     if (strand=='-') seqend-=cdsadj;
868     else seqstart+=cdsadj;
869     }
870     else {
871     seqstart=exons.First()->start;
872     seqend=exons.Last()->end;
873     }
874     int s=0; //resulting nucleotide counter
875     if (strand=='-') {
876     for (int x=exons.Count()-1;x>=0;x--) {
877     uint sgstart=exons[x]->start;
878     uint sgend=exons[x]->end;
879     if (seqend<sgstart || seqstart>sgend) continue;
880     if (seqstart>=sgstart && seqstart<=sgend)
881     sgstart=seqstart; //seqstart within this segment
882     if (seqend>=sgstart && seqend<=sgend)
883     sgend=seqend; //seqend within this segment
884     if (seglst!=NULL)
885     seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
886     for (uint i=sgend;i>=sgstart;i--) {
887     spliced[s] = ntComplement(gsubseq[i-start]);
888     s++;
889     }//for each nt
890    
891     if (!CDSonly && cds_start!=NULL && CDstart>0) {
892     if (CDstart>=sgstart && CDstart<=sgend) {
893     //CDstart in this segment
894     //and we are getting the whole transcript
895     *cds_end=s-(CDstart-sgstart);
896     }
897     if (CDend>=sgstart && CDend<=sgend) {
898     //CDstart in this segment
899     //and we are getting the whole transcript
900     *cds_start=s-(CDend-cdsadj-sgstart);
901     }
902     }//update local CDS coordinates
903     } //for each exon
904     } // - strand
905     else { // + strand
906     for (int x=0;x<exons.Count();x++) {
907     uint sgstart=exons[x]->start;
908     uint sgend=exons[x]->end;
909     if (seqend<sgstart || seqstart>sgend) continue;
910     if (seqstart>=sgstart && seqstart<=sgend)
911     sgstart=seqstart; //seqstart within this segment
912     if (seqend>=sgstart && seqend<=sgend)
913     sgend=seqend; //seqend within this segment
914     if (seglst!=NULL)
915     seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
916     for (uint i=sgstart;i<=sgend;i++) {
917     spliced[s]=gsubseq[i-start];
918     s++;
919     }//for each nt
920     if (!CDSonly && cds_start!=NULL && CDstart>0) {
921     if (CDstart>=sgstart && CDstart<=sgend) {
922     //CDstart in this segment
923     //and we are getting the whole transcript
924     *cds_start=s-(sgend-CDstart-cdsadj);
925     }
926     if (CDend>=sgstart && CDend<=sgend) {
927     //CDstart in this segment
928     //and we are getting the whole transcript
929     *cds_end=s-(sgend-CDend);
930     }
931     }//update local CDS coordinates
932     } //for each exon
933     } // + strand
934     spliced[s]=0;
935     if (rlen!=NULL) *rlen=s;
936     return spliced;
937     }
938    
939     char* GffObj::getSplicedTr(GFaSeqGet* faseq, bool CDSonly, int* rlen) {
940     if (CDSonly && CDstart==0) return NULL;
941     //restore normal coordinates:
942     unxcoord();
943     if (exons.Count()==0) return NULL;
944     int fspan=end-start+1;
945     const char* gsubseq=faseq->subseq(start, fspan);
946     if (gsubseq==NULL) {
947     GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
948     }
949    
950     char* translation=NULL;
951     GMALLOC(translation, (int)(covlen/3)+1);
952     uint seqstart, seqend;
953     int cdsadj=0;
954     if (CDphase=='1' || CDphase=='2') {
955     cdsadj=CDphase-'0';
956     }
957     if (CDSonly) {
958     seqstart=CDstart;
959     seqend=CDend;
960     if (strand=='-') seqend-=cdsadj;
961     else seqstart+=cdsadj;
962     }
963     else {
964     seqstart=exons.First()->start;
965     seqend=exons.Last()->end;
966     }
967     Codon codon;
968     int nt=0; //codon nucleotide counter (0..2)
969     int aa=0; //aminoacid count
970     if (strand=='-') {
971     for (int x=exons.Count()-1;x>=0;x--) {
972     uint sgstart=exons[x]->start;
973     uint sgend=exons[x]->end;
974     if (seqend<sgstart || seqstart>sgend) continue;
975     if (seqstart>=sgstart && seqstart<=sgend)
976     sgstart=seqstart; //seqstart within this segment
977     if (seqend>=sgstart && seqend<=sgend) {
978     sgend=seqend; //seqend within this segment
979     }
980     for (uint i=sgend;i>=sgstart;i--) {
981     codon.nuc[nt]=ntComplement(gsubseq[i-start]);
982     nt++;
983     if (nt==3) {
984     nt=0;
985     translation[aa]=codon.translate();
986     aa++;
987     }
988     }//for each nt
989     } //for each exon
990     } // - strand
991     else { // + strand
992     for (int x=0;x<exons.Count();x++) {
993     uint sgstart=exons[x]->start;
994     uint sgend=exons[x]->end;
995     if (seqend<sgstart || seqstart>sgend) continue;
996     if (seqstart>=sgstart && seqstart<=sgend)
997     sgstart=seqstart; //seqstart within this segment
998     if (seqend>=sgstart && seqend<=sgend)
999     sgend=seqend; //seqend within this segment
1000     for (uint i=sgstart;i<=sgend;i++) {
1001     codon.nuc[nt]=gsubseq[i-start];
1002     nt++;
1003     if (nt==3) {
1004     nt=0;
1005     translation[aa]=codon.translate();
1006     aa++;
1007     }
1008     }//for each nt
1009     } //for each exon
1010     } // + strand
1011     translation[aa]=0;
1012     if (rlen!=NULL) *rlen=aa;
1013     return translation;
1014     }
1015    
1016     void GffObj::printSummary(FILE* fout) {
1017     if (fout==NULL) fout=stdout;
1018     fprintf(fout, "%s\t%c\t%d\t%d\t%4.2f\t%4.1f\n", gffID,
1019     strand, start, end, gscore, (float)qcov/10.0);
1020     }
1021    
1022     void GffObj::printGxfLine(FILE* fout, char* tlabel, char* gseqname, bool iscds,
1023     uint segstart, uint segend, int exidx, char phase, bool gff3) {
1024     static char scorestr[14];
1025     strcpy(scorestr,".");
1026     GffAttrs* xattrs=NULL;
1027     if (exidx>=0) {
1028     if (exons[exidx]->score) sprintf(scorestr,"%.2f", exons[exidx]->score);
1029     xattrs=exons[exidx]->attrs;
1030     }
1031     char* geneid=(gname!=NULL)? gname : gffID;
1032     if (phase==0 || !iscds) phase='.';
1033     const char* ftype=iscds ? "CDS" : getSubfName();
1034     if (gff3) {
1035     fprintf(fout,
1036     "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\tParent=%s",
1037     gseqname, tlabel, ftype, segstart, segend, scorestr, strand,
1038     phase, gffID);
1039     if (xattrs!=NULL) {
1040     for (int i=0;i<xattrs->Count();i++)
1041     fprintf(fout, ";%s=%s",names->attrs.getName(xattrs->Get(i)->attr_id),
1042     xattrs->Get(i)->attr_val);
1043     }
1044     fprintf(fout, "\n");
1045     } //GFF
1046     else {//for GTF -- we can only print mRNAs here
1047     fprintf(fout, "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\t",
1048     gseqname, tlabel, ftype, segstart, segend, scorestr, strand, phase);
1049     if (ismRNA())
1050     fprintf(fout,"gene_id \"%s\"; transcript_id \"%s\";", geneid, gffID);
1051     if (xattrs!=NULL) {
1052     for (int i=0;i<xattrs->Count();i++) {
1053     if (xattrs->Get(i)->attr_val==NULL) continue;
1054     fprintf(fout, " %s ",names->attrs.getName(xattrs->Get(i)->attr_id));
1055     if (xattrs->Get(i)->attr_val[0]=='"')
1056     fprintf(fout, "%s;",xattrs->Get(i)->attr_val);
1057     else fprintf(fout, "\"%s\";",xattrs->Get(i)->attr_val);
1058     }
1059     }
1060     fprintf(fout, "\n");
1061     }//GTF
1062     }
1063    
1064     void GffObj::printGxf(FILE* fout, GffPrintMode gffp, char* tlabel) {
1065     static char tmpstr[255];
1066     if (tlabel==NULL) {
1067     tlabel=track_id>=0 ? names->tracks.Get(track_id)->name :
1068     (char*)"gffobj" ;
1069     }
1070    
1071     unxcoord();
1072     if (exons.Count()==0) return;
1073     char* gseqname=names->gseqs.Get(gseq_id)->name;
1074     bool gff3 = (gffp>=pgffAny);
1075     bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
1076     bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
1077     if (gff3) {
1078     //print GFF3 mRNA line:
1079     if (gscore>0.0) sprintf(tmpstr,"%.2f", gscore);
1080     else strcpy(tmpstr,".");
1081     uint pstart, pend;
1082     if (gffp==pgffCDS) {
1083     pstart=CDstart;
1084     pend=CDend;
1085     }
1086     else { pstart=start;pend=end; }
1087     const char* ftype=ismRNA() ? "mRNA" : getFeatureName();
1088     fprintf(fout,
1089     "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tID=%s",
1090     gseqname, tlabel, ftype, pstart, pend, tmpstr, strand, gffID);
1091     if (gname!=NULL)
1092     fprintf(fout, ";Name=%s",gname);
1093     if (CDstart>0 && !showCDS && !isCDS) fprintf(fout,";CDS=%d:%d",CDstart,CDend);
1094     if (attrs!=NULL) {
1095     for (int i=0;i<attrs->Count();i++) {
1096     fprintf(fout,";%s=%s", names->attrs.getName(attrs->Get(i)->attr_id),
1097     attrs->Get(i)->attr_val);
1098     }
1099     }
1100     fprintf(fout,"\n");
1101     }// gff3 mRNA line
1102     if (showExon) {
1103     //print exons
1104     for (int i=0;i<exons.Count();i++) {
1105     printGxfLine(fout, tlabel, gseqname, isCDS, exons[i]->start, exons[i]->end, i, exons[i]->phase, gff3);
1106     }
1107     }//printing exons
1108     if (showCDS && !isCDS && CDstart>0) {
1109     GArray<GffCDSeg> cds(true,true);
1110     getCDSegs(cds);
1111     for (int i=0;i<cds.Count();i++) {
1112     printGxfLine(fout, tlabel, gseqname, true, cds[i].start, cds[i].end, -1, cds[i].phase, gff3);
1113     }
1114     } //showCDS
1115     }
1116    
1117    
1118     void GffObj::getCDSegs(GArray<GffCDSeg>& cds) {
1119     GffCDSeg cdseg;
1120     int cdsacc=0;
1121     if (CDphase=='1' || CDphase=='2') {
1122     cdsacc+= 3-(CDphase-'0');
1123     }
1124     if (strand=='-') {
1125     for (int x=exons.Count()-1;x>=0;x--) {
1126     uint sgstart=exons[x]->start;
1127     uint sgend=exons[x]->end;
1128     if (CDend<sgstart || CDstart>sgend) continue;
1129     if (CDstart>=sgstart && CDstart<=sgend)
1130     sgstart=CDstart; //cdstart within this segment
1131     if (CDend>=sgstart && CDend<=sgend)
1132     sgend=CDend; //cdend within this segment
1133     cdseg.start=sgstart;
1134     cdseg.end=sgend;
1135     cdseg.exonidx=x;
1136     //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
1137     cdseg.phase='0'+ (3-cdsacc%3)%3;
1138     cdsacc+=sgend-sgstart+1;
1139     cds.Add(cdseg);
1140     } //for each exon
1141     } // - strand
1142     else { // + strand
1143     for (int x=0;x<exons.Count();x++) {
1144     uint sgstart=exons[x]->start;
1145     uint sgend=exons[x]->end;
1146     if (CDend<sgstart || CDstart>sgend) continue;
1147     if (CDstart>=sgstart && CDstart<=sgend)
1148     sgstart=CDstart; //seqstart within this segment
1149     if (CDend>=sgstart && CDend<=sgend)
1150     sgend=CDend; //seqend within this segment
1151     cdseg.start=sgstart;
1152     cdseg.end=sgend;
1153     cdseg.exonidx=x;
1154     //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
1155     cdseg.phase='0' + (3-cdsacc%3)%3 ;
1156     cdsacc+=sgend-sgstart+1;
1157     cds.Add(cdseg);
1158     } //for each exon
1159     } // + strand
1160     }
1161     /*
1162     #ifdef DEBUG
1163     void GffObj::dbgPrint(const char* msg) {
1164     if (msg!=NULL) fprintf(stdout, ">> %s\n",msg);
1165     char* tlabel=track_id>=0 ? names->tracks.Get(track_id)->name :
1166     (char*)"gmapobj" ;
1167     char scorestr[14];
1168     char strand=revstrand?'-':'+';
1169     unxcoord();
1170     char* gseqname=names->gseqs.Get(gseq_id)->name;
1171     char* fname=f_id>=0 ? names->feats.Get(f_id)->name : (char*)"nofeatname";
1172    
1173     fprintf(stdout, "%s\t%s\t%s\t%d\t%d\t.\t%c\t.\tID=%s;Name=%s\n",
1174     gseqname, tlabel, fname, start, end, strand, gffID, gffID);
1175    
1176     for (int fi=0;fi<features->Count();fi++) {
1177     GFeature* feature=features->Get(fi);
1178     fname=names->feats.Get(feature->name_id)->name;
1179     GffExon* segs=feature->segs;
1180     int segcount=feature->segcount;
1181     if (segcount==0 || segs==NULL) continue;
1182     for (int i=0;i<segcount;i++) {
1183     if (segs[i].start==0) continue;
1184     if (segs[i].score) sprintf(scorestr,"%.2f", segs[i].score/100.00);
1185     else strcpy(scorestr,".");
1186     fprintf(stdout,
1187     "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tParent=%s\n",
1188     gseqname, tlabel, fname, segs[i].start, segs[i].end, scorestr, strand, gffID);
1189     }
1190     }
1191     fflush(stdout);
1192     }
1193     #endif
1194     */