ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
(Generate patch)
# Line 788 | Line 788
788    isTranscript(gffline->is_transcript || gffline->exontype!=0);
789    //fromGff3(gffline->is_gff3);
790  
791 <  if (gffline->parents!=NULL) {
791 >  if (gffline->parents!=NULL && !gffline->is_transcript) {
792      //GTF style -- create a GffObj directly by subfeature
793      //(also possible orphan GFF3 exon line, or an exon given before its parent (chado))
794      if (gffline->exontype!=0) { //recognized exon-like feature
# Line 805 | Line 805
805          //this is likely the first exon/segment of the feature
806          addExon(gfrd, gffline, keepAttr, noExonAttr);
807          }
808 <      else { //a parented feature with an ID -- probably an orphan or premature GFF3 subfeature line
808 >      else { //a parented feature with an ID: orphan or premature GFF3 subfeature line
809          if (gffline->is_gff3 && gffline->exontype!=0) {
810               //premature exon given before its parent transcript
811               //create the transcript entry here
# Line 814 | Line 814
814               //this is the first exon/segment of the transcript
815               addExon(gfrd, gffline, keepAttr, noExonAttr);
816               }
817 <            else { //unrecognized non-exon feature ? use the ID instead
817 >        else { //unrecognized non-exon feature ? use the ID instead
818 >             this->hasGffID(true);
819               gffID=Gstrdup(gffline->ID);
820               if (keepAttr) this->parseAttrs(attrs, gffline->info);
821               }
822          }
823 <    } //subfeature given directly
824 <  else { //gffline->parents==NULL
823 >    } //non-transcript parented subfeature given directly
824 >  else {
825 >        //non-parented feature OR a recognizable transcript
826      //create a parent feature in its own right
827      gscore=gffline->score;
828      if (gffline->ID==NULL || gffline->ID[0]==0)
# Line 832 | Line 834
834      ftype_id=names->feats.addName(gffline->ftype);
835      if (gffline->is_transcript)
836        exon_ftype_id=gff_fid_exon;
835
837      if (keepAttr) this->parseAttrs(attrs, gffline->info);
838      }//no parent
839  
840    if (gffline->gene_name!=NULL) {
841       gene_name=Gstrdup(gffline->gene_name);
842       }
843 <  if (gffline->gene_id!=NULL) {
843 >  if (gffline->gene_id) {
844       geneID=Gstrdup(gffline->gene_id);
845       }
846 +  else if (gffline->is_transcript && gffline->parents) {
847 +         geneID=Gstrdup(gffline->parents[0]);
848 +     }
849  
850    GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true);
851    uptr=gsd;
# Line 901 | Line 905
905   GFREE(buf);
906   }
907   */
904 //Warning: if gflst gets altered, idx becomes obsolete
908   GfoHolder* GffReader::gfoAdd(GffObj* gfo, int idx) {
909 < //TODO: must make sure the gfo ID isn't there already.
907 <
909 > //Warning: if gflst gets altered, idx becomes obsolete
910   GVec<GfoHolder>* glst=phash.Find(gfo->gffID);
911   if (glst==NULL)
912           glst=new GVec<GfoHolder>(1);
# Line 929 | Line 931
931        GfoHolder& gfo = gl->Get(i);
932        if (ctg!=NULL && strcmp(ctg, gfo.gffobj->getGSeqName())!=0)
933             continue;
934 <      if (strand && strand != gfo.gffobj->strand)
934 >      if (strand && gfo.gffobj->strand!='.' && strand != gfo.gffobj->strand)
935             continue;
936        if (start>0) {
937             if (abs((int)start-(int)gfo.gffobj->start)>GFF_MAX_LOCUS)
# Line 1109 | Line 1111
1111       GfoHolder* prevseen=NULL;
1112       GVec<GfoHolder>* prevgflst=NULL;
1113       if (gffline->ID && gffline->exontype==0) {
1114 <         //>>>>> for a parent-like IDed feature (mRNA, gene, etc.)
1114 >         //>> for a parent-like IDed feature (mRNA, gene, etc.)
1115                   //look for same ID on the same chromosome/strand/locus
1116                   prevseen=gfoFind(gffline->ID, gffline->gseqname, &prevgflst, gffline->strand, gffline->fstart);
1117                   if (prevseen!=NULL) {
# Line 1145 | Line 1147
1147         if (!prevseen) newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, prevgflst);
1148         }
1149      else { //--- it's a child feature (exon/CDS but could still be a mRNA with gene(s) as parent)
1150 +       //updates all the declared parents with this child
1151         bool found_parent=false;
1152         GfoHolder* newgfo=prevseen;
1153         GVec<GfoHolder>* newgflst=NULL;
1154         for (int i=0;i<gffline->num_parents;i++) {
1155              if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL)
1156                  continue; //skipping discarded parent feature
1157 <            GfoHolder* parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1158 <                                          &newgflst, gffline->strand, gffline->fstart, gffline->fend);
1157 >            GfoHolder* parentgfo=NULL;
1158 >            if (gffline->is_transcript || gffline->exontype==0) //possibly a transcript
1159 >              parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1160 >                                          &newgflst, gffline->strand, gffline->fstart, gffline->fend);
1161 >            else
1162 >              parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1163 >                                          &newgflst, gffline->strand, gffline->fstart);
1164              if (parentgfo!=NULL) { //parent GffObj parsed earlier
1165 <                   found_parent=true;
1165 >
1166 >                   //found_parent=true;
1167                     if (parentgfo->gffobj->isGene() && gffline->is_transcript
1168                                     && gffline->exontype==0) {
1169                         //not an exon, but a transcript parented by a gene
# Line 1172 | Line 1181
1181                     } //overlapping parent feature found
1182              } //for each parsed parent Id
1183         if (!found_parent) { //new GTF-like record starting here with a subfeature directly
1184 <             //or it could be some chado GFF3 barf with exons declared BEFORE their parent :(
1184 >             //or it could be some chado GFF3 barf with exons coming BEFORE their parent :(
1185              //check if this feature isn't parented by a previously stored "exon" subfeature
1186              char* subp_name=NULL;
1187              CNonExon* subp=subfPoolCheck(gffline, pex, subp_name);
# Line 1183 | Line 1192
1192                 if (!addExonFeature(gfoh, gffline, pex, noExonAttr))
1193                        validation_errors=true;
1194                 }
1195 <              else { //no parent seen before, create one directly with this exon
1195 >              else { //no parent seen before,
1196                 //loc_debug=true;
1197                 GfoHolder* ngfo=prevseen;
1198 <               if (ngfo==NULL)
1198 >               if (ngfo==NULL) {
1199 >                   //if it's an exon type, create directly the parent with this exon
1200 >                   //but if it's recognized as a transcript, the object itself is created
1201                     ngfo=newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, newgflst);
1202 <               if (gffline->ID!=NULL && gffline->exontype==0)
1202 >                   }
1203 >               if (!ngfo->gffobj->isTranscript() &&
1204 >                     gffline->ID!=NULL && gffline->exontype==0)
1205                       subfPoolAdd(pex, ngfo);
1206                 //even those with errors will be added here!
1207                 }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines