5 |
|
#include "GFastaIndex.h" |
6 |
|
#include "GFaSeqGet.h" |
7 |
|
|
8 |
– |
|
8 |
|
typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd); |
9 |
|
|
10 |
|
class GeneInfo { //for Ensembl GTF conversion |
173 |
|
faseq=NULL; |
174 |
|
last_fetchid=-1; |
175 |
|
char* gseqname=GffObj::names->gseqs.getName(gseq_id); |
177 |
– |
// DEBUG: |
178 |
– |
//GMessage("..processing transcripts on: %s\n",gseqname); |
179 |
– |
//genomic sequence given |
180 |
– |
/* |
181 |
– |
if (gcdb!=NULL) { |
182 |
– |
uint32 reclen=0; |
183 |
– |
off_t rpos=gcdb->getRecordPos(gseqname, &reclen); |
184 |
– |
if (rpos<0) // genomic sequence not found |
185 |
– |
GError("Error: cannot find genomic sequence '%s' in %s\n",gseqname, fastaPath); |
186 |
– |
// WARNING: does not validate FASTA line-len uniformity! |
187 |
– |
faseq=new GFaSeqGet(fastaPath,rpos, false); |
188 |
– |
faseq->loadall(reclen); //load the whole sequence, it's faster |
189 |
– |
last_fetchid=gseq_id; |
190 |
– |
return faseq; |
191 |
– |
} |
192 |
– |
*/ |
176 |
|
if (faIdx!=NULL) { //fastaPath was the multi-fasta file name |
177 |
|
GFastaRec* farec=faIdx->getRecord(gseqname); |
178 |
|
if (farec!=NULL) { |
251 |
|
|
252 |
|
const char* getGeneDescr(const char* gsym); |
253 |
|
|
254 |
+ |
void printLocus(GffLocus* loc, const char* pre=NULL); |
255 |
+ |
|
256 |
|
class GffLocus:public GSeg { |
257 |
|
public: |
258 |
|
int gseq_id; //id of underlying genomic sequence |
259 |
|
int locus_num; |
260 |
|
bool is_mrna; |
261 |
+ |
bool startChanged; //prompt reordering of container list |
262 |
|
char strand; |
263 |
|
GffObj* t_maxcov; //transcript with maximum coverage (for main "ref" transcript) |
264 |
|
GList<GffObj> rnas; //list of transcripts (isoforms) for this locus |
266 |
|
GList<CGeneSym> gene_names; |
267 |
|
GList<CGeneSym> gene_ids; |
268 |
|
int v; //user flag/data |
269 |
+ |
/* |
270 |
|
bool operator==(GffLocus& d){ |
271 |
|
return (gseq_id==d.gseq_id && strand==d.strand && start==d.start && end==d.end); |
272 |
|
} |
284 |
|
else return end<d.end; |
285 |
|
} else return (start<d.start); |
286 |
|
} |
287 |
< |
|
287 |
> |
*/ |
288 |
|
const char* getGeneName() { |
289 |
|
if (gene_names.Count()==0) return NULL; |
290 |
|
return gene_names.First()->name.chars(); |
321 |
|
end=0; |
322 |
|
strand=0; |
323 |
|
is_mrna=false; |
324 |
+ |
startChanged=false; |
325 |
|
if (t!=NULL) { |
326 |
|
start=t->exons.First()->start; |
327 |
|
end=t->exons.Last()->end;; |
355 |
|
uint jend=locus.mexons[j].end; |
356 |
|
if (iend<jstart) { i++; continue; } |
357 |
|
if (jend<istart) { j++; continue; } |
370 |
– |
//if (mexons[i].overlap(jstart, jend)) { |
371 |
– |
//exon overlap was found : |
358 |
|
ovlexons.Add(j); |
359 |
|
//extend mexons[i] as needed |
360 |
|
if (jstart<istart) mexons[i].start=jstart; |
370 |
|
} |
371 |
|
} //while next mexons merge |
372 |
|
} // mexons[i] end extend |
387 |
– |
// } //exon overlap |
373 |
|
j++; //check the next locus.mexon |
374 |
|
} |
375 |
|
//-- add the rest of the non-overlapping mexons: |
385 |
|
if (locus.rnas[i]!=lnkrna) rnas.Add(locus.rnas[i]); |
386 |
|
} |
387 |
|
// -- adjust start/end as needed |
388 |
< |
if (start>locus.start) start=locus.start; |
388 |
> |
if (start>locus.start) { |
389 |
> |
startChanged=true; |
390 |
> |
start=locus.start; |
391 |
> |
} |
392 |
|
if (end<locus.end) end=locus.end; |
393 |
|
if (locus.is_mrna) is_mrna=true; |
394 |
|
if (t_maxcov->covlen<locus.t_maxcov->covlen) |
395 |
|
t_maxcov=locus.t_maxcov; |
396 |
|
} |
397 |
|
|
410 |
– |
|
398 |
|
bool exonOverlap(GffLocus& loc) { |
399 |
|
//check if any mexons overlap! |
400 |
|
if (strand!=loc.strand || loc.start>end || start>loc.end) return false; |
470 |
|
void rnas_add(GffObj* t) { |
471 |
|
rnas.Add(t); |
472 |
|
// adjust start/end |
473 |
< |
if (start>t->start || start==0) start=t->start; |
473 |
> |
//if (start==0 || start>t->start) start=t->start; |
474 |
> |
if (start==0) start=t->start; |
475 |
> |
else if (start>t->start) { |
476 |
> |
startChanged=true; |
477 |
> |
start=t->start; |
478 |
> |
} |
479 |
|
if (end<t->end) end=t->end; |
480 |
|
if (t_maxcov->covlen<t->covlen) t_maxcov=t; |
481 |
|
if (strand==0) strand=t->strand; |