ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
Revision: 2
Committed: Mon Mar 22 22:03:27 2010 UTC (9 years, 6 months ago) by gpertea
File size: 12211 byte(s)
Log Message:
added my gclib source files

Line User Rev File contents
1 gpertea 2 #ifndef G_BASE_DEFINED
2     #define G_BASE_DEFINED
3    
4     #include <string.h>
5     #include <stdlib.h>
6     #include <stdio.h>
7     #include <math.h>
8     #include <limits.h>
9     #include <sys/types.h>
10     #include <sys/stat.h>
11     #if defined __WIN32__ || defined _WIN32
12     #include <windows.h>
13     #endif
14    
15     #ifdef DEBUG
16     #undef NDEBUG
17     #endif
18    
19     typedef unsigned int uint32;
20     typedef int int32;
21     typedef unsigned char uchar;
22     typedef unsigned char byte;
23    
24     // If long is natively 64 bit, use the regular fseek and ftell
25     #ifdef _NATIVE_64
26     #define ftello ftell
27     #define fseeko fseek
28     #endif
29    
30     #ifndef MAXUINT
31     #define MAXUINT ((unsigned int)-1)
32     #endif
33    
34     #if defined(_NATIVE_64) || defined(_LP64) || defined(__LP64__)
35     typedef long int64;
36     typedef unsigned long uint64;
37     #else
38     //assume 32bit environment with long long for int64 stuff
39     typedef long long int64;
40     typedef unsigned long long uint64;
41     #endif
42    
43     /****************************************************************************/
44    
45     #ifndef EXIT_FAILURE
46     #define EXIT_FAILURE 1
47     #endif
48    
49     #ifndef EXIT_SUCCESS
50     #define EXIT_SUCCESS 0
51     #endif
52    
53     /****************************************************************************/
54     #define ERR_ALLOC "Error allocating memory.\n"
55     #if defined (__WIN32__) || defined (WIN32)
56     #define CHPATHSEP '\\'
57     #include <io.h>
58     #define ftello ftell
59     #define fseeko fseek
60     #else
61     #define CHPATHSEP '/'
62     #include <unistd.h>
63     #endif
64    
65     //-------------------
66    
67     // Debug helpers
68     #ifndef NDEBUG
69     #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
70     #ifdef TRACE
71     #define GTRACE(exp) (GMessage exp)
72     #else
73     #define GTRACE(exp) ((void)0)
74     #endif
75     #else
76     #define GASSERT(exp) ((void)0)
77     #define GTRACE(exp) ((void)0)
78     #endif
79    
80     #define GERROR(exp) (GError exp)
81     /********************************** Macros ***********************************/
82     // Abolute value
83     #define GABS(val) (((val)>=0)?(val):-(val))
84    
85     // Min and Max
86     #define GMAX(a,b) (((a)>(b))?(a):(b))
87     #define GMIN(a,b) (((a)>(b))?(b):(a))
88    
89     // Min of three
90     #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
91    
92     // Max of three
93     #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
94    
95     // Return minimum and maximum of a, b
96     #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
97    
98     // Clamp value x to range [lo..hi]
99     #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
100    
101     typedef void* pointer;
102     typedef unsigned int uint;
103    
104     typedef int GCompareProc(const pointer item1, const pointer item2);
105     typedef void GFreeProc(pointer item); //usually just delete,
106     //but may also support structures with embedded dynamic members
107    
108     #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
109     GError(ERR_ALLOC)
110     #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
111     GError(ERR_ALLOC)
112     #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
113     GError(ERR_ALLOC)
114     #define GFREE(ptr) GFree((pointer*)(&ptr))
115    
116     inline char* min(char *arg1, char *arg2) {
117     return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
118     }
119    
120     inline int iround(double x) {
121     return (int)floor(x + 0.5);
122     }
123    
124    
125     /****************************************************************************/
126    
127     inline char* max(char *arg1, char *arg2) {
128     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
129     }
130    
131     inline int Gintcmp(int a, int b) {
132     //return (a>b)? 1 : ((a==b)?0:-1);
133     return a-b;
134     }
135    
136     int Gstrcmp(char* a, char* b);
137     //same as strcmp but doesn't crash on NULL pointers
138    
139     int Gstricmp(const char* a, const char* b);
140    
141     inline void swap(int &arg1, int &arg2){
142     arg1 ^= arg2 ^= arg1 ^= arg2;
143     }
144    
145     inline void swap(char* &arg1, char* &arg2){
146     register char* swp=arg1;
147     arg1=arg2; arg2=swp;
148     }
149    
150     inline void swap(unsigned int &arg1, unsigned int &arg2)
151     { arg1 ^= arg2 ^= arg1 ^= arg2; }
152    
153     inline void swap(short &arg1, short &arg2)
154     { arg1 ^= arg2 ^= arg1 ^= arg2; }
155    
156     inline void swap(unsigned short &arg1, unsigned short &arg2)
157     { arg1 ^= arg2 ^= arg1 ^= arg2; }
158    
159     inline void swap(long &arg1, long &arg2)
160     { arg1 ^= arg2 ^= arg1 ^= arg2; }
161    
162     inline void swap(unsigned long &arg1, unsigned long &arg2)
163     { arg1 ^= arg2 ^= arg1 ^= arg2; }
164    
165     inline void swap(char &arg1, char &arg2)
166     { arg1 ^= arg2 ^= arg1 ^= arg2; }
167    
168     inline void swap(unsigned char &arg1, unsigned char &arg2)
169     { arg1 ^= arg2 ^= arg1 ^= arg2; }
170    
171     inline void swap(bool &arg1, bool &arg2)
172     { arg1 ^= arg2 ^= arg1 ^= arg2; }
173    
174    
175     /**************** Memory management ***************************/
176    
177     bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
178     bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
179     bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
180     void GFree(pointer* ptr); // Free memory, resets ptr to NULL
181    
182     /********************* debug functions *********************/
183    
184     void GError(const char* format,...); // Error routine (aborts program)
185     void GMessage(const char* format,...);// Log message to stderr
186     // Assert failed routine:- usually not called directly but through GASSERT
187     void GAssert(const char* expression, const char* filename, unsigned int lineno);
188    
189    
190     // ****************** string manipulation *************************
191     char *Gstrdup(const char* str);
192     //duplicate a string by allocating a copy for it and returning it
193     char* Gstrdup(const char* sfrom, const char* sto);
194     //same as GStrdup, but with an early termination (e.g. on delimiter)
195    
196     char* Gsubstr(const char* str, char* from, char* to=NULL);
197     //extracts a substring, allocating it, including boundaries (from/to)
198    
199     int strsplit(char* str, char** fields, int maxfields, const char* delim);
200     int strsplit(char* str, char** fields, int maxfields, const char delim);
201     int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
202    
203     char* replaceStr(char* &str, char* newvalue);
204    
205     //conversion: to Lower/Upper case
206     // creating a new string:
207     char* upCase(const char* str);
208     char* loCase(const char* str);
209     // changing string in place:
210     char* strlower(char * str);
211     char* strupper(char * str);
212    
213     //strstr but for memory zones: scans a memory region
214     //for a substring:
215     void* Gmemscan(void *mem, unsigned int len,
216     void *part, unsigned int partlen);
217    
218     // test if a char is in a string:
219     bool chrInStr(char c, char* str);
220    
221     char* rstrchr(char* str, char ch);
222     /* returns a pointer to the rightmost
223     occurence of ch in str - like rindex for platforms missing it*/
224    
225     char* strchrs(char* s, const char* chrs);
226     //strchr but with a set of chars instead of only one
227    
228     char* rstrfind(char* str, const char *substr); /* like rindex() but for strings
229     or like the right side version of strstr()
230     */
231     //reverse character string or
232     char* reverseChars(char* str, int slen=0);
233    
234     char* rstrstr(char* rstart, char *lend, char* substr);
235     /*the reversed, rightside equivalent of strstr: starts searching
236     from right end (rstart), going back to left end (lend) and returns
237     a pointer to the last (right) matching character in str */
238    
239     char* strifind(char* str, const char* substr);
240     // the case insensitive version of strstr -- finding a string within a strin
241    
242    
243     //Determines if a string begins with a given prefix
244     //(returns false when any of the params is NULL,
245     // but true when prefix is '' (empty string)!)
246     bool startsWith(char* s, const char* prefix);
247    
248     // ELF hash function for strings
249     int strhash(const char* str);
250    
251    
252    
253     //---- generic base GSeg : genomic segment (interval) --
254     // coordinates are considered 1-based (so 0 is invalid)
255     class GSeg {
256     public:
257     uint start; //start<end always!
258     uint end;
259     GSeg(uint s=0,uint e=0) {
260     if (s>e) { start=e;end=s; }
261     else { start=s;end=e; }
262     }
263     //check for overlap with other segment
264     uint len() { return end-start+1; }
265     bool overlap(GSeg* d) {
266     return start<d->start ? (d->start<=end) : (start<=d->end);
267     }
268    
269     bool overlap(GSeg& d) {
270     return start<d.start ? (d.start<=end) : (start<=d.end);
271     }
272    
273     bool overlap(GSeg& d, int fuzz) {
274     return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
275     }
276    
277     bool overlap(uint s, uint e) {
278     if (s>e) { swap(s,e); }
279     return start<s ? (s<=end) : (start<=e);
280     }
281    
282     //return the length of overlap between two segments
283     int overlapLen(GSeg* r) {
284     if (start<r->start) {
285     if (r->start>end) return 0;
286     return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
287     }
288     else { //r->start<=start
289     if (start>r->end) return 0;
290     return (r->end<end)? r->end-start+1 : end-start+1;
291     }
292     }
293     int overlapLen(uint rstart, uint rend) {
294     if (rstart>rend) { swap(rstart,rend); }
295     if (start<rstart) {
296     if (rstart>end) return 0;
297     return (rend>end) ? end-rstart+1 : rend-rstart+1;
298     }
299     else { //rstart<=start
300     if (start>rend) return 0;
301     return (rend<end)? rend-start+1 : end-start+1;
302     }
303     }
304    
305     //fuzzy coordinate matching:
306     bool coordMatch(GSeg* s, uint fuzz=0) {
307     if (fuzz==0) return (start==s->start && end==s->end);
308     uint sd = (start>s->start) ? start-s->start : s->start-start;
309     uint ed = (end>s->end) ? end-s->end : s->end-end;
310     return (sd<=fuzz && ed<=fuzz);
311     }
312     //comparison operators required for sorting
313     bool operator==(GSeg& d){
314     return (start==d.start && end==d.end);
315     }
316     bool operator>(GSeg& d){
317     return (start==d.start)?(end>d.end):(start>d.start);
318     }
319     bool operator<(GSeg& d){
320     return (start==d.start)?(end<d.end):(start<d.start);
321     }
322     };
323    
324    
325    
326     //--------------------------------------------------------
327     // ************** simple line reading class for text files
328    
329     //GLineReader -- text line reading/buffering class
330     class GLineReader {
331     int len;
332     int allocated;
333     char* buf;
334     bool isEOF;
335     FILE* file;
336     off_t filepos; //current position
337     bool pushed; //pushed back
338     int lcount; //line counter (read lines)
339     public:
340     char* chars() { return buf; }
341     char* line() { return buf; }
342     int readcount() { return lcount; } //number of lines read
343     int length() { return len; }
344     int size() { return len; } //same as size();
345     bool isEof() {return isEOF; }
346     bool eof() { return isEOF; }
347     off_t getfpos() { return filepos; }
348     off_t getFpos() { return filepos; }
349     char* nextLine() { return getLine(); }
350     char* getLine() { if (pushed) { pushed=false; return buf; }
351     else return getLine(file); }
352     char* getLine(FILE* stream) {
353     if (pushed) { pushed=false; return buf; }
354     else return getLine(stream, filepos); }
355     char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
356     // the given file position
357     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
358     // so the next call will in fact return the same line
359     GLineReader(FILE* stream=NULL, off_t fpos=0) {
360     len=0;
361     isEOF=false;
362     allocated=1024;
363     GMALLOC(buf,allocated);
364     lcount=0;
365     buf[0]=0;
366     file=stream;
367     filepos=fpos;
368     pushed=false;
369     }
370     ~GLineReader() {
371     GFREE(buf);
372     }
373     };
374    
375    
376     /* extended fgets() - to read one full line from a file and
377     update the file position correctly !
378     buf will be reallocated as necessary, to fit the whole line
379     */
380     char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
381    
382     /*********************** File management functions *********************/
383    
384     // removes the directory part from a full-path file name
385     // this is a destructive operation for the given string!
386     void delFileName(char* filepath);
387    
388     // returns a pointer to the file name part in a full-path filename
389     char* getFileName(char* filepath);
390    
391     int fileExists(const char* fname);
392     //returns 0 if file entry doesn't exist
393     // 1 if it's a directory
394     // 2 if it's a regular file
395     // 3 otherwise (?)
396    
397     off_t fileSize(const char* fpath);
398    
399     //parses the next number found in a string at the current position
400     //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
401     //updates the char* pointer to be after the last digit parsed
402     bool parseNumber(char* &p, double& v);
403     bool parseDouble(char* &p, double& v); //just an alias for parseNumber
404    
405     bool parseInt(char* &p, int& i);
406     bool parseUInt(char* &p, uint& i);
407     bool parseHex(char* &p, uint& i);
408    
409     #endif /* G_BASE_DEFINED */