ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.cpp
(Generate patch)
# Line 882 | Line 882
882   return gffline;
883   }
884  
885 +
886   char* GffReader::gfoBuildId(const char* id, const char* ctg) {
887   //caller must free the returned pointer
888   char* buf=NULL;
# Line 892 | Line 893
893   strcpy(buf+idlen+1, ctg);
894   return buf;
895   }
896 <
896 > /*
897   void GffReader::gfoRemove(const char* id, const char* ctg) {
898   char* buf=gfoBuildId(id,ctg);
899   phash.Remove(buf);
900   GFREE(buf);
901   }
902 <
902 > */
903   //Warning: if gflst gets altered, idx becomes obsolete
904 < GfoHolder* GffReader::gfoAdd(const char* id, const char* ctg, GffObj* gfo, int idx) {
905 < char* buf=gfoBuildId(id,ctg);
906 < GfoHolder* r=new GfoHolder(gfo,idx);
907 < phash.Add(buf, r);
908 < GFREE(buf);
909 < return r;
910 < }
911 <
912 < GfoHolder* GffReader::gfoFind(const char* id, const char* ctg) {
913 < char* buf=gfoBuildId(id,ctg);
914 < GfoHolder* r=phash.Find(buf);
915 < GFREE(buf);
916 < return r;
904 > GfoHolder* GffReader::gfoAdd(GffObj* gfo, int idx) {
905 > GVec<GfoHolder>* glst=new GVec<GfoHolder>(1);
906 > GfoHolder gh(gfo,idx);
907 > int i=glst->Add(gh);
908 > phash.Add(gfo->gffID, glst);
909 > return &(glst->Get(i));
910 > }
911 >
912 > GfoHolder* GffReader::gfoAdd(GVec<GfoHolder>& glst, GffObj* gfo, int idx) {
913 > GfoHolder gh(gfo,idx);
914 > int i=glst.Add(gh);
915 > return &(glst[i]);
916 > }
917 >
918 > GfoHolder* GffReader::gfoFind(const char* id, const char* ctg, char strand, uint start, GVec<GfoHolder>** glst) {
919 > GVec<GfoHolder>* gl=phash.Find(id);
920 > GfoHolder* gh=NULL;
921 > if (gl) {
922 >   for (int i=0;i<gl->Count();i++) {
923 >      GfoHolder& gfo = gl->Get(i);
924 >      if (ctg!=NULL && strcmp(ctg,gfo.gffobj->getGSeqName())!=0)
925 >           continue;
926 >      if (strand && strand != gfo.gffobj->strand)
927 >           continue;
928 >      if (start>0 && abs((int)start-(int)gfo.gffobj->start)>GFF_MAX_LOCUS)
929 >           continue;
930 >      //must be the same transcript, according to given comparison criteria
931 >      gh=&gfo;
932 >      break;
933 >      }
934 >   }
935 > if (glst) *glst=gl;
936 > return gh;
937   }
938  
939   GfoHolder* GffReader::replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx) {
# Line 920 | Line 941
941    GfoHolder* r=NULL;
942    if (replaceidx>=0) {
943       gflst.Put(replaceidx,newgfo);
944 <     r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, replaceidx);
944 >     r=gfoAdd(newgfo, replaceidx);
945       }
946     else {
947       int gfoidx=gflst.Add(newgfo);
948 <     r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, gfoidx);
948 >     r=gfoAdd(newgfo, gfoidx);
949       }
950    if (gff_warns) {
951      int* pcount=tids.Find(newgfo->gffID);
952      if (pcount!=NULL) {
953 <       if (gff_warns) GMessage("Warning: duplicate GFF ID: %s\n", newgfo->gffID);
953 >      if (gff_warns) GMessage("Warning: duplicate GFF ID: %s\n", newgfo->gffID);
954         (*pcount)++;
955         }
956       else {
# Line 956 | Line 977
977   }
978  
979   GfoHolder* GffReader::newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr,
980 <                          GffObj* parent, GffExon* pexon) {
980 >                          GffObj* parent, GffExon* pexon, GVec<GfoHolder>* glst) {
981    GffObj* newgfo=new GffObj(this, gffline, keepAttr, noExonAttr);
982    GfoHolder* r=NULL;
983    int gfoidx=gflst.Add(newgfo);
984 <  r=gfoAdd(newgfo->gffID, gffline->gseqname, newgfo, gfoidx);
984 >  r=(glst) ? gfoAdd(*glst, newgfo, gfoidx) :gfoAdd(newgfo, gfoidx);
985    if (parent!=NULL) {
986      updateParent(r, parent);
987      if (pexon!=NULL) parent->removeExon(pexon);
# Line 1072 | Line 1093
1093    while (nextGffLine()!=NULL) {
1094         //seen this gff ID before?
1095       GfoHolder* prevseen=NULL;
1096 <     if (gffline->ID) //GFF3
1096 >     if (gffline->ID) //GFF3 parent-like feature (mRNA, gene, etc.)
1097           prevseen=gfoFind(gffline->ID, gffline->gseqname);
1098       if (prevseen!=NULL) {
1099              if (prevseen->gffobj->createdByExon()) {
1100 +                //just in case the exon was found before (shouldn't happen)
1101                  updateGffRec(prevseen, gffline, keepAttr);
1102                  }
1103               else {
# Line 1090 | Line 1112
1112      if (gffline->parents==NULL) {//start GFF3-like record with no parent (mRNA, gene)
1113         if (!prevseen) newGffRec(gffline, keepAttr, noExonAttr);
1114         }
1115 <    else { //--- it's a parented feature (could still be a mRNA)
1115 >    else { //--- it's a parented feature (exon/CDS, but might still be a mRNA)
1116         bool found_parent=false;
1117         GfoHolder* newgfo=prevseen;
1118 +       GVec<GfoHolder>* newgflst=NULL;
1119         for (int i=0;i<gffline->num_parents;i++) {
1120              if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL)
1121                  continue; //skipping discarded parent feature
1122 <            GfoHolder* parentgfo=gfoFind(gffline->parents[i], gffline->gseqname);
1122 >            GfoHolder* parentgfo=gfoFind(gffline->parents[i], gffline->gseqname, gffline->strand, gffline->fstart, &newgflst);
1123              if (parentgfo!=NULL) { //parent GffObj parsed earlier
1124                     found_parent=true;
1125                     if (parentgfo->gffobj->isGene() && gffline->is_transcript
# Line 1129 | Line 1152
1152                 }
1153                else { //no parent seen before, create one directly with this exon
1154                 //loc_debug=true;
1155 <               GfoHolder* newgfo=prevseen ? prevseen : newGffRec(gffline, keepAttr, noExonAttr);
1155 >               GfoHolder* newgfo=prevseen ? prevseen : newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, newgflst);
1156                 if (gffline->ID!=NULL && gffline->exontype==0)
1157                       subfPoolAdd(pex, newgfo);
1158                 //even those with errors will be added here!

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines