ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
(Generate patch)
# Line 17 | Line 17
17   const uint gfo_flag_CHILDREN_PROMOTED= 0x00000002;
18   const uint gfo_flag_IS_GENE          = 0x00000004;
19   const uint gfo_flag_IS_TRANSCRIPT    = 0x00000008;
20 < const uint gfo_flag_FROM_GFF3        = 0x00000010;
20 > const uint gfo_flag_HAS_GFF_ID       = 0x00000010; //found GFF3 feature line with its own ID
21   const uint gfo_flag_BY_EXON          = 0x00000020; //created by subfeature (exon) directly
22   const uint gfo_flag_DISCARDED        = 0x00000100;
23   const uint gfo_flag_LST_KEEP         = 0x00000200;
# Line 786 | Line 786
786    isCDS=gffline->is_cds; //for now
787    isGene(gffline->is_gene);
788    isTranscript(gffline->is_transcript || gffline->exontype!=0);
789 <  fromGff3(gffline->is_gff3);
789 >  //fromGff3(gffline->is_gff3);
790  
791    if (gffline->parents!=NULL) {
792      //GTF style -- create a GffObj directly by subfeature
# Line 799 | Line 799
799         //make this GffObj of the same feature type
800         ftype_id=names->feats.addName(gffline->ftype);
801         }
802 <    if (gffline->ID==NULL) { //typical GTF
802 >    if (gffline->ID==NULL) { //typical GTF2 without "transcript" line
803          gffID=Gstrdup(gffline->parents[0]);
804          this->createdByExon(true);
805          //this is likely the first exon/segment of the feature
806          addExon(gfrd, gffline, keepAttr, noExonAttr);
807          }
808 <      else { //a parented feature with an ID -- probably an orphan GFF3 line
808 >      else { //a parented feature with an ID -- probably an orphan or premature GFF3 subfeature line
809          if (gffline->is_gff3 && gffline->exontype!=0) {
810               //premature exon given before its parent transcript
811               //create the transcript entry here
# Line 825 | Line 825
825      gscore=gffline->score;
826      if (gffline->ID==NULL || gffline->ID[0]==0)
827        GError("Error: no ID found for GFF record start\n");
828 +    this->hasGffID(true);
829      gffID=Gstrdup(gffline->ID); //there must be an ID here
830      //if (gffline->is_transcript) ftype_id=gff_fid_mRNA;
831        //else
# Line 947 | Line 948
948       int gfoidx=gflst.Add(newgfo);
949       r=gfoAdd(newgfo, gfoidx);
950       }
951 +  /*
952    if (gff_warns) {
953      int* pcount=tids.Find(newgfo->gffID);
954      if (pcount!=NULL) {
# Line 957 | Line 959
959         tids.Add(newgfo->gffID,new int(1));
960         }
961      }
962 +   */
963    return r;
964   }
965  
# Line 986 | Line 989
989      updateParent(r, parent);
990      if (pexon!=NULL) parent->removeExon(pexon);
991      }
992 +  /*
993    if (gff_warns) {
994      int* pcount=tids.Find(newgfo->gffID);
995      if (pcount!=NULL) {
# Line 996 | Line 1000
1000         tids.Add(newgfo->gffID,new int(1));
1001         }
1002      }
1003 +  */
1004    return r;
1005   }
1006  
1007   GfoHolder* GffReader::updateGffRec(GfoHolder* prevgfo, GffLine* gffline,
1008                                           bool keepAttr) {
1009   if (prevgfo==NULL) return NULL;
1010 < prevgfo->gffobj->createdByExon(false);
1010 > //prevgfo->gffobj->createdByExon(false);
1011   prevgfo->gffobj->ftype_id=prevgfo->gffobj->names->feats.addName(gffline->ftype);
1012   prevgfo->gffobj->start=gffline->fstart;
1013   prevgfo->gffobj->end=gffline->fend;
1014   prevgfo->gffobj->isGene(gffline->is_gene);
1015   prevgfo->gffobj->isTranscript(gffline->is_transcript || gffline->exontype!=0);
1016 < prevgfo->gffobj->fromGff3(gffline->is_gff3);
1016 > prevgfo->gffobj->hasGffID(gffline->ID!=NULL);
1017   if (keepAttr) {
1018     if (prevgfo->gffobj->attrs!=NULL) prevgfo->gffobj->attrs->Clear();
1019     prevgfo->gffobj->parseAttrs(prevgfo->gffobj->attrs, gffline->info);
# Line 1085 | Line 1090
1090   }
1091  
1092   //have to parse the whole file because exons can be scattered all over
1093 + //trans-splicing and fusions are only accepted in proper GFF3 format, with a single parent feature ID entry
1094   void GffReader::readAll(bool keepAttr, bool mergeCloseExons, bool noExonAttr) {
1095    bool validation_errors = false;
1096    //loc_debug=false;
# Line 1094 | Line 1100
1100         //seen this gff ID before?
1101       GfoHolder* prevseen=NULL;
1102       if (gffline->ID && gffline->exontype==0) //GFF3 parent-like feature (mRNA, gene, etc.)
1103 +         //look for same ID on the same chromosome
1104           prevseen=gfoFind(gffline->ID, gffline->gseqname);
1105       if (prevseen!=NULL) {
1106 <            if (prevseen->gffobj->createdByExon()) {
1107 <                //just in case the exon was found before (shouldn't happen)
1106 >            //found same ID/chromosome combo
1107 >            if (prevseen->gffobj->createdByExon() &&
1108 >                  prevseen->gffobj->start>=gffline->fstart && prevseen->gffobj->end<=gffline->fend) {
1109 >                //an exon of this ID was given before
1110 >                //this line has the main attributes for this ID
1111                  updateGffRec(prevseen, gffline, keepAttr);
1112                  }
1113               else {
# Line 1168 | Line 1178
1178   // all gff records are now loaded in GList gflst
1179   // so we can free the hash
1180    phash.Clear();
1181 <  tids.Clear();
1181 >  //tids.Clear();
1182    if (validation_errors) {
1183      exit(1);
1184      }
# Line 1217 | Line 1227
1227       } //for each exon
1228     }
1229   //attribute reduction for GTF records
1230 < if (keepAttrs && !noExonAttr && !fromGff3()
1230 > if (keepAttrs && !noExonAttr && !hasGffID()
1231            && exons.Count()>0 && exons[0]->attrs!=NULL) {
1232     bool attrs_discarded=false;
1233     for (int a=0;a<exons[0]->attrs->Count();a++) {

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines