ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/gff.h
(Generate patch)
# Line 29 | Line 29
29   extern const uint gfo_flag_CHILDREN_PROMOTED;
30   extern const uint gfo_flag_HAS_ERRORS;
31   extern const uint gfo_flag_IS_GENE;
32 < extern const uint gfo_flag_FROM_GFF3; //parsed from GFF3 formatted record
32 > extern const uint gfo_flag_HAS_GFF_ID; //found a GFF3 formatted main feature with its own ID
33   extern const uint gfo_flag_BY_EXON;  //created by subfeature (exon) directly
34                        //(GTF2 and some chado gff3 dumps with exons given before their mRNA)
35   extern const uint gfo_flag_IS_TRANSCRIPT; //recognized as '*RNA' or '*transcript'
# Line 98 | Line 98
98         num_parents=0;
99         parents=NULL;
100         }
101 <    char* extractAttr(const char* pre, bool caseStrict=true, bool enforce_GTF2=false);
101 >    char* extractAttr(const char* pre, bool caseStrict=false, bool enforce_GTF2=false);
102      GffLine(GffLine* l) { //a copy constructor
103        memcpy((void*)this, (void*)l, sizeof(GffLine));
104        line=NULL;
# Line 235 | Line 235
235     bool operator==(GffNameInfo& d){
236         return (strcmp(this->name, d.name)==0);
237         }
238   bool operator>(GffNameInfo& d){
239      return (strcmp(this->name, d.name)>0);
240      }
238     bool operator<(GffNameInfo& d){
239       return (strcmp(this->name, d.name)<0);
240       }
# Line 472 | Line 469
469        if (v) flags |= gfo_flag_HAS_ERRORS;
470          else flags &= ~gfo_flag_HAS_ERRORS;
471        }
472 <  bool fromGff3() { return ((flags & gfo_flag_FROM_GFF3)!=0); }
473 <  void fromGff3(bool v) {
474 <      if (v) flags |= gfo_flag_FROM_GFF3;
475 <        else flags &= ~gfo_flag_FROM_GFF3;
472 >  bool hasGffID() { return ((flags & gfo_flag_HAS_GFF_ID)!=0); }
473 >  void hasGffID(bool v) {
474 >      if (v) flags |= gfo_flag_HAS_GFF_ID;
475 >        else flags &= ~gfo_flag_HAS_GFF_ID;
476        }
477    bool createdByExon() { return ((flags & gfo_flag_BY_EXON)!=0); }
478    void createdByExon(bool v) {
# Line 612 | Line 609
609    
610     bool monoFeature() {
611       return (exons.Count()==0 ||
612 <          (exons.Count()==1 && exon_ftype_id==ftype_id &&
612 >          (exons.Count()==1 &&  //exon_ftype_id==ftype_id &&
613                exons[0]->end==this->end && exons[0]->start==this->start));
614       }
615  
# Line 674 | Line 671
671       }
672     bool exonOverlap(uint s, uint e) {//check if ANY exon overlaps given segment
673        //ignores strand!
674 <      if (s>e) swap(s,e);
674 >      if (s>e) Gswap(s,e);
675        for (int i=0;i<exons.Count();i++) {
676           if (exons[i]->overlap(s,e)) return true;
677           }
# Line 697 | Line 694
694      int exonOverlapIdx(uint s, uint e, int* ovlen=NULL) {
695        //return the exons' index for the overlapping OR ADJACENT exon
696        //ovlen, if given, will return the overlap length
697 <      if (s>e) swap(s,e);
697 >      if (s>e) Gswap(s,e);
698        s--;e++; //to also catch adjacent exons
699        for (int i=0;i<exons.Count();i++) {
700              if (exons[i]->start>e) break;
# Line 975 | Line 972
972  
973   };
974  
975 < class GfoHolder {
979 < public:
975 > struct GfoHolder {
976     int idx; //position in GffReader::gflst array
977 <   GffObj* gffobj;
977 >   GffObj* gffobj;
978     GfoHolder(GffObj* gfo=NULL, int i=0) {
979       idx=i;
980       gffobj=gfo;
# Line 1018 | Line 1014
1014    bool transcriptsOnly; //keep only transcripts w/ their exon/CDS features
1015    GHash<int> discarded_ids; //for transcriptsOnly mode, keep track
1016                              // of discarded parent IDs
1017 <  GHash<GfoHolder> phash; //transcript_id+contig (Parent~Contig) => [gflst index, GffObj]
1018 <  GHash<int> tids; //transcript_id uniqueness
1017 >  GHash< GVec<GfoHolder> > phash; //transcript_id+contig (Parent~Contig) => [gflst index, GffObj]
1018 >  //GHash<int> tids; //just for transcript_id uniqueness
1019    char* gfoBuildId(const char* id, const char* ctg);
1020 <  void gfoRemove(const char* id, const char* ctg);
1021 <  GfoHolder* gfoAdd(const char* id, const char* ctg, GffObj* gfo, int idx);
1022 <  GfoHolder* gfoFind(const char* id, const char* ctg);
1020 >  //void gfoRemove(const char* id, const char* ctg);
1021 >  GfoHolder* gfoAdd(GffObj* gfo, int idx);
1022 >  GfoHolder* gfoAdd(GVec<GfoHolder>& glst, GffObj* gfo, int idx);
1023 >  // const char* id, const char* ctg, char strand, GVec<GfoHolder>** glst, uint start, uint end
1024 >  GfoHolder* gfoFind(const char* id, const char* ctg=NULL, GVec<GfoHolder>** glst=NULL,
1025 >                                                 char strand=0, uint start=0, uint end=0);
1026    CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name);
1027    void subfPoolAdd(GHash<CNonExon>& pex, GfoHolder* newgfo);
1028    GfoHolder* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex,
# Line 1031 | Line 1030
1030   public:
1031    GfList gflst; //accumulate GffObjs being read
1032    GfoHolder* newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr,
1033 <                               GffObj* parent=NULL, GffExon* pexon=NULL);
1033 >                               GffObj* parent=NULL, GffExon* pexon=NULL, GVec<GfoHolder>* glst=NULL);
1034    GfoHolder* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
1035    GfoHolder* updateGffRec(GfoHolder* prevgfo, GffLine* gffline,
1036                                           bool keepAttr);
# Line 1039 | Line 1038
1038    bool addExonFeature(GfoHolder* prevgfo, GffLine* gffline, GHash<CNonExon>& pex, bool noExonAttr);
1039    GList<GSeqStat> gseqstats; //list of all genomic sequences seen by this reader, accumulates stats
1040    GffReader(FILE* f=NULL, bool t_only=false, bool sortbyloc=false):discarded_ids(true),
1041 <                       phash(true), tids(true), gflst(sortbyloc), gseqstats(true,true,true) {
1041 >                       phash(true), gflst(sortbyloc), gseqstats(true,true,true) {
1042        gff_warns=gff_show_warnings;
1043        names=NULL;
1044        gffline=NULL;
# Line 1059 | Line 1058
1058        gflst.sortedByLoc(sortbyloc);
1059        }
1060    GffReader(char* fn, bool t_only=false, bool sort=false):discarded_ids(true), phash(true),
1061 <                             tids(true),gflst(sort),gseqstats(true,true,true) {
1061 >                             gflst(sort),gseqstats(true,true,true) {
1062        gff_warns=gff_show_warnings;
1063        names=NULL;
1064        fname=Gstrdup(fn);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines