ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
(Generate patch)
# Line 788 | Line 788
788    isTranscript(gffline->is_transcript || gffline->exontype!=0);
789    //fromGff3(gffline->is_gff3);
790  
791 <  if (gffline->parents!=NULL) {
791 >  if (gffline->parents!=NULL && !gffline->is_transcript) {
792      //GTF style -- create a GffObj directly by subfeature
793      //(also possible orphan GFF3 exon line, or an exon given before its parent (chado))
794      if (gffline->exontype!=0) { //recognized exon-like feature
# Line 805 | Line 805
805          //this is likely the first exon/segment of the feature
806          addExon(gfrd, gffline, keepAttr, noExonAttr);
807          }
808 <      else { //a parented feature with an ID -- probably an orphan or premature GFF3 subfeature line
808 >      else { //a parented feature with an ID: orphan or premature GFF3 subfeature line
809          if (gffline->is_gff3 && gffline->exontype!=0) {
810               //premature exon given before its parent transcript
811               //create the transcript entry here
# Line 814 | Line 814
814               //this is the first exon/segment of the transcript
815               addExon(gfrd, gffline, keepAttr, noExonAttr);
816               }
817 <            else { //unrecognized non-exon feature ? use the ID instead
817 >        else { //unrecognized non-exon feature ? use the ID instead
818 >             this->hasGffID(true);
819               gffID=Gstrdup(gffline->ID);
820               if (keepAttr) this->parseAttrs(attrs, gffline->info);
821               }
822          }
823 <    } //subfeature given directly
824 <  else { //gffline->parents==NULL
823 >    } //non-transcript parented subfeature given directly
824 >  else {
825 >        //non-parented feature OR a recognizable transcript
826      //create a parent feature in its own right
827      gscore=gffline->score;
828      if (gffline->ID==NULL || gffline->ID[0]==0)
# Line 832 | Line 834
834      ftype_id=names->feats.addName(gffline->ftype);
835      if (gffline->is_transcript)
836        exon_ftype_id=gff_fid_exon;
835
837      if (keepAttr) this->parseAttrs(attrs, gffline->info);
838      }//no parent
839  
840    if (gffline->gene_name!=NULL) {
841       gene_name=Gstrdup(gffline->gene_name);
842       }
843 <  if (gffline->gene_id!=NULL) {
843 >  if (gffline->gene_id) {
844       geneID=Gstrdup(gffline->gene_id);
845       }
846 +  else if (gffline->is_transcript && gffline->parents) {
847 +         geneID=Gstrdup(gffline->parents[0]);
848 +     }
849  
850    GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true);
851    uptr=gsd;
# Line 901 | Line 905
905   GFREE(buf);
906   }
907   */
904 //Warning: if gflst gets altered, idx becomes obsolete
908   GfoHolder* GffReader::gfoAdd(GffObj* gfo, int idx) {
909 < //TODO: must make sure the gfo ID isn't there already.
907 <
909 > //Warning: if gflst gets altered, idx becomes obsolete
910   GVec<GfoHolder>* glst=phash.Find(gfo->gffID);
911   if (glst==NULL)
912           glst=new GVec<GfoHolder>(1);
# Line 929 | Line 931
931        GfoHolder& gfo = gl->Get(i);
932        if (ctg!=NULL && strcmp(ctg, gfo.gffobj->getGSeqName())!=0)
933             continue;
934 <      if (strand && strand != gfo.gffobj->strand)
934 >      if (strand && gfo.gffobj->strand!='.' && strand != gfo.gffobj->strand)
935             continue;
936        if (start>0) {
937             if (abs((int)start-(int)gfo.gffobj->start)>GFF_MAX_LOCUS)
# Line 1109 | Line 1111
1111       GfoHolder* prevseen=NULL;
1112       GVec<GfoHolder>* prevgflst=NULL;
1113       if (gffline->ID && gffline->exontype==0) {
1114 <         //>>>>> for a parent-like IDed feature (mRNA, gene, etc.)
1114 >         //>> for a parent-like IDed feature (mRNA, gene, etc.)
1115                   //look for same ID on the same chromosome/strand/locus
1116                   prevseen=gfoFind(gffline->ID, gffline->gseqname, &prevgflst, gffline->strand, gffline->fstart);
1117                   if (prevseen!=NULL) {
# Line 1145 | Line 1147
1147         if (!prevseen) newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, prevgflst);
1148         }
1149      else { //--- it's a child feature (exon/CDS but could still be a mRNA with gene(s) as parent)
1150 +       //updates all the declared parents with this child
1151         bool found_parent=false;
1152         GfoHolder* newgfo=prevseen;
1153         GVec<GfoHolder>* newgflst=NULL;
1154         for (int i=0;i<gffline->num_parents;i++) {
1155              if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL)
1156                  continue; //skipping discarded parent feature
1157 <            GfoHolder* parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1158 <                                          &newgflst, gffline->strand, gffline->fstart, gffline->fend);
1157 >            GfoHolder* parentgfo=NULL;
1158 >            if (gffline->is_transcript || gffline->exontype==0) {//possibly a transcript
1159 >               parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1160 >                                    &newgflst, gffline->strand, gffline->fstart, gffline->fend);
1161 >               }
1162 >            else {
1163 >               //for exon-like entities we only need a parent to be in locus distance,
1164 >               //on the same strand
1165 >               parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
1166 >                                     &newgflst, gffline->strand, gffline->fstart);
1167 >               }
1168              if (parentgfo!=NULL) { //parent GffObj parsed earlier
1169                     found_parent=true;
1170                     if (parentgfo->gffobj->isGene() && gffline->is_transcript
# Line 1172 | Line 1184
1184                     } //overlapping parent feature found
1185              } //for each parsed parent Id
1186         if (!found_parent) { //new GTF-like record starting here with a subfeature directly
1187 <             //or it could be some chado GFF3 barf with exons declared BEFORE their parent :(
1187 >             //or it could be some chado GFF3 barf with exons coming BEFORE their parent :(
1188              //check if this feature isn't parented by a previously stored "exon" subfeature
1189              char* subp_name=NULL;
1190              CNonExon* subp=subfPoolCheck(gffline, pex, subp_name);
# Line 1183 | Line 1195
1195                 if (!addExonFeature(gfoh, gffline, pex, noExonAttr))
1196                        validation_errors=true;
1197                 }
1198 <              else { //no parent seen before, create one directly with this exon
1198 >              else { //no parent seen before,
1199                 //loc_debug=true;
1200                 GfoHolder* ngfo=prevseen;
1201 <               if (ngfo==NULL)
1201 >               if (ngfo==NULL) {
1202 >                   //if it's an exon type, create directly the parent with this exon
1203 >                   //but if it's recognized as a transcript, the object itself is created
1204                     ngfo=newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, newgflst);
1205 <               if (gffline->ID!=NULL && gffline->exontype==0)
1205 >                   }
1206 >               if (!ngfo->gffobj->isTranscript() &&
1207 >                     gffline->ID!=NULL && gffline->exontype==0)
1208                       subfPoolAdd(pex, ngfo);
1209                 //even those with errors will be added here!
1210                 }
# Line 1222 | Line 1238
1238      ftype_id=gff_fid_mRNA;
1239      //exon_ftype_id=gff_fid_exon;
1240      }
1241 < //if (ftype_id==gff_fid_mRNA || exon_ftype_id==gff_fid_exon || mergeCloseExons) {
1242 < if (isTranscript() || exon_ftype_id==gff_fid_exon || mergeCloseExons) {
1243 <   int mindist=mergeCloseExons ? 5:1;
1244 <   for (int i=0;i<exons.Count()-1;i++) {
1245 <     int ni=i+1;
1246 <     uint mend=exons[i]->end;
1247 <     while (ni<exons.Count()) {
1248 <       int dist=(int)(exons[ni]->start-mend);
1249 <       if (dist>mindist) break; //no merging with next segment
1250 <       if (gfr!=NULL && gfr->gff_warns && dist!=0 && (exons[ni]->exontype!=exgffUTR && exons[i]->exontype!=exgffUTR)) {
1251 <          GMessage("GFF warning: merging adjacent/overlapping segments of %s on %s (%d-%d, %d-%d)\n",
1252 <               gffID, getGSeqName(), exons[i]->start, exons[i]->end,exons[ni]->start, exons[ni]->end);
1253 <          }
1254 <       mend=exons[ni]->end;
1255 <       covlen-=exons[i]->len();
1256 <       exons[i]->end=mend;
1257 <       covlen+=exons[i]->len();
1258 <       covlen-=exons[ni]->len();
1259 <       if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL ||
1260 <            exons[i]->attrs->Count()<exons[ni]->attrs->Count())) {
1261 <              //use the other exon attributes, if more
1262 <              delete(exons[i]->attrs);
1263 <              exons[i]->attrs=exons[ni]->attrs;
1264 <              exons[ni]->attrs=NULL;
1265 <              }
1266 <       exons.Delete(ni);
1267 <       } //check for merge with next exon
1241 > if (exons.Count()>0 && (isTranscript() || exon_ftype_id==gff_fid_exon)) {
1242 >   if (mergeCloseExons) {
1243 >     int mindist=mergeCloseExons ? 5:1;
1244 >     for (int i=0;i<exons.Count()-1;i++) {
1245 >       int ni=i+1;
1246 >       uint mend=exons[i]->end;
1247 >       while (ni<exons.Count()) {
1248 >         int dist=(int)(exons[ni]->start-mend);
1249 >         if (dist>mindist) break; //no merging with next segment
1250 >         if (gfr!=NULL && gfr->gff_warns && dist!=0 && (exons[ni]->exontype!=exgffUTR && exons[i]->exontype!=exgffUTR)) {
1251 >            GMessage("GFF warning: merging adjacent/overlapping segments of %s on %s (%d-%d, %d-%d)\n",
1252 >                 gffID, getGSeqName(), exons[i]->start, exons[i]->end,exons[ni]->start, exons[ni]->end);
1253 >            }
1254 >         mend=exons[ni]->end;
1255 >         covlen-=exons[i]->len();
1256 >         exons[i]->end=mend;
1257 >         covlen+=exons[i]->len();
1258 >         covlen-=exons[ni]->len();
1259 >         if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL ||
1260 >              exons[i]->attrs->Count()<exons[ni]->attrs->Count())) {
1261 >                //use the other exon attributes, if more
1262 >                delete(exons[i]->attrs);
1263 >                exons[i]->attrs=exons[ni]->attrs;
1264 >                exons[ni]->attrs=NULL;
1265 >                }
1266 >         exons.Delete(ni);
1267 >         } //check for merge with next exon
1268       } //for each exon
1269 <   }
1269 >   } //merge close exons
1270 >   //shrink transcript to the exons' span
1271 >   this->start=exons.First()->start;
1272 >   this->end=exons.Last()->end;
1273 > }
1274   //attribute reduction for GTF records
1275   if (keepAttrs && !noExonAttr && !hasGffID()
1276            && exons.Count()>0 && exons[0]->attrs!=NULL) {

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines