ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
Revision: 90
Committed: Mon Oct 3 18:45:22 2011 UTC (8 years, 1 month ago) by gpertea
File size: 13996 byte(s)
Log Message:
reverseChars() minor edit

Line User Rev File contents
1 gpertea 2 #ifndef G_BASE_DEFINED
2     #define G_BASE_DEFINED
3 gpertea 16 #ifndef _POSIX_SOURCE
4     //mostly for MinGW
5     #define _POSIX_SOURCE
6     #endif
7     #ifdef HAVE_CONFIG_H
8     #include "config.h"
9     #endif
10 gpertea 2 #include <string.h>
11     #include <stdlib.h>
12     #include <stdio.h>
13     #include <math.h>
14     #include <limits.h>
15     #include <sys/types.h>
16     #include <sys/stat.h>
17 gpertea 16 #include <stdint.h>
18    
19     #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20     #ifndef __WIN32__
21     #define __WIN32__
22     #endif
23 gpertea 2 #include <windows.h>
24 gpertea 16 #include <io.h>
25     #define CHPATHSEP '\\'
26     #undef off_t
27     #define off_t int64_t
28     #ifdef _fseeki64
29     #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30     #else
31     /*
32     #define _DEFINE_WIN32_FSEEKO
33     int fseeko(FILE *stream, off_t offset, int whence);
34     */
35     #define fseeko fseek
36     #endif
37     #ifdef _ftelli64
38     #define ftello(stream) _ftelli64(stream)
39     #else
40     /*
41     #define _DEFINE_WIN32_FTELLO
42     off_t ftello(FILE *stream);
43     */
44     #define ftello ftell
45     #endif
46     #else
47     #define CHPATHSEP '/'
48     #include <unistd.h>
49 gpertea 2 #endif
50    
51 gpertea 36 #ifndef fseeko
52     #define fseeko fseek
53     #endif
54     #ifndef ftello
55     #define ftello ftell
56     #endif
57 gpertea 16
58 gpertea 2 #ifdef DEBUG
59     #undef NDEBUG
60     #endif
61    
62 gpertea 16 typedef int32_t int32;
63     typedef uint32_t uint32;
64    
65 gpertea 2 typedef unsigned char uchar;
66     typedef unsigned char byte;
67    
68     #ifndef MAXUINT
69     #define MAXUINT ((unsigned int)-1)
70     #endif
71    
72 gpertea 16 #ifndef MAXINT
73     #define MAXINT INT_MAX
74 gpertea 2 #endif
75    
76 gpertea 16 #ifndef MAX_UINT
77     #define MAX_UINT ((unsigned int)-1)
78     #endif
79    
80     #ifndef MAX_INT
81     #define MAX_INT INT_MAX
82     #endif
83    
84     typedef int64_t int64;
85     typedef uint64_t uint64;
86    
87 gpertea 2 /****************************************************************************/
88    
89     #ifndef EXIT_FAILURE
90     #define EXIT_FAILURE 1
91     #endif
92    
93     #ifndef EXIT_SUCCESS
94     #define EXIT_SUCCESS 0
95     #endif
96    
97     /****************************************************************************/
98     #define ERR_ALLOC "Error allocating memory.\n"
99    
100     //-------------------
101    
102     // Debug helpers
103     #ifndef NDEBUG
104     #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
105     #ifdef TRACE
106     #define GTRACE(exp) (GMessage exp)
107     #else
108     #define GTRACE(exp) ((void)0)
109     #endif
110     #else
111     #define GASSERT(exp) ((void)0)
112     #define GTRACE(exp) ((void)0)
113     #endif
114    
115     #define GERROR(exp) (GError exp)
116     /********************************** Macros ***********************************/
117     // Abolute value
118     #define GABS(val) (((val)>=0)?(val):-(val))
119    
120     // Min and Max
121     #define GMAX(a,b) (((a)>(b))?(a):(b))
122     #define GMIN(a,b) (((a)>(b))?(b):(a))
123    
124     // Min of three
125     #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
126    
127     // Max of three
128     #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
129    
130     // Return minimum and maximum of a, b
131     #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
132    
133     // Clamp value x to range [lo..hi]
134     #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
135    
136     typedef void* pointer;
137     typedef unsigned int uint;
138    
139     typedef int GCompareProc(const pointer item1, const pointer item2);
140     typedef void GFreeProc(pointer item); //usually just delete,
141     //but may also support structures with embedded dynamic members
142    
143     #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
144     GError(ERR_ALLOC)
145     #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
146     GError(ERR_ALLOC)
147     #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
148     GError(ERR_ALLOC)
149     #define GFREE(ptr) GFree((pointer*)(&ptr))
150    
151 gpertea 16 inline char* strMin(char *arg1, char *arg2) {
152 gpertea 2 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
153     }
154    
155 gpertea 16 inline char* strMax(char *arg1, char *arg2) {
156     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
157     }
158    
159 gpertea 2 inline int iround(double x) {
160     return (int)floor(x + 0.5);
161     }
162    
163     /****************************************************************************/
164    
165     inline int Gintcmp(int a, int b) {
166     //return (a>b)? 1 : ((a==b)?0:-1);
167     return a-b;
168     }
169    
170     int Gstrcmp(char* a, char* b);
171     //same as strcmp but doesn't crash on NULL pointers
172    
173     int Gstricmp(const char* a, const char* b);
174    
175     inline void swap(int &arg1, int &arg2){
176 gpertea 16 //arg1 ^= arg2;
177     //arg2 ^= arg1;
178     //arg1 ^= arg2;
179     register int swp=arg1;
180     arg1=arg2; arg2=swp;
181 gpertea 2 }
182    
183 gpertea 16 inline void swap(char* &arg1, char* &arg2){ //swap pointers!
184 gpertea 2 register char* swp=arg1;
185     arg1=arg2; arg2=swp;
186     }
187    
188 gpertea 16 inline void swap(uint &arg1, uint &arg2) {
189     register uint swp=arg1;
190     arg1=arg2; arg2=swp;
191     }
192 gpertea 2
193 gpertea 16 inline void swap(short &arg1, short &arg2) {
194     register short swp=arg1;
195     arg1=arg2; arg2=swp;
196     }
197 gpertea 2
198 gpertea 16 inline void swap(unsigned short &arg1, unsigned short &arg2) {
199     register unsigned short swp=arg1;
200     arg1=arg2; arg2=swp;
201     }
202 gpertea 2
203 gpertea 16 inline void swap(long &arg1, long &arg2) {
204     register long swp=arg1;
205     arg1=arg2; arg2=swp;
206     }
207 gpertea 2
208 gpertea 16 inline void swap(unsigned long &arg1, unsigned long &arg2) {
209     register unsigned long swp=arg1;
210     arg1=arg2; arg2=swp;
211     }
212 gpertea 2
213    
214 gpertea 16 inline void swap(char &arg1, char &arg2) {
215     register char swp=arg1;
216     arg1=arg2; arg2=swp;
217     }
218 gpertea 2
219 gpertea 16 inline void swap(unsigned char &arg1, unsigned char &arg2) {
220     register unsigned char swp=arg1;
221     arg1=arg2; arg2=swp;
222     }
223 gpertea 2
224     /**************** Memory management ***************************/
225    
226     bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
227     bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
228     bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
229     void GFree(pointer* ptr); // Free memory, resets ptr to NULL
230    
231    
232 gpertea 16 int saprintf(char **retp, const char *fmt, ...);
233    
234 gpertea 2 void GError(const char* format,...); // Error routine (aborts program)
235     void GMessage(const char* format,...);// Log message to stderr
236     // Assert failed routine:- usually not called directly but through GASSERT
237     void GAssert(const char* expression, const char* filename, unsigned int lineno);
238    
239     // ****************** string manipulation *************************
240     char *Gstrdup(const char* str);
241     //duplicate a string by allocating a copy for it and returning it
242     char* Gstrdup(const char* sfrom, const char* sto);
243     //same as GStrdup, but with an early termination (e.g. on delimiter)
244    
245     char* Gsubstr(const char* str, char* from, char* to=NULL);
246     //extracts a substring, allocating it, including boundaries (from/to)
247    
248     int strsplit(char* str, char** fields, int maxfields, const char* delim);
249     int strsplit(char* str, char** fields, int maxfields, const char delim);
250     int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
251    
252     char* replaceStr(char* &str, char* newvalue);
253    
254     //conversion: to Lower/Upper case
255     // creating a new string:
256     char* upCase(const char* str);
257     char* loCase(const char* str);
258     // changing string in place:
259     char* strlower(char * str);
260     char* strupper(char * str);
261    
262     //strstr but for memory zones: scans a memory region
263     //for a substring:
264     void* Gmemscan(void *mem, unsigned int len,
265     void *part, unsigned int partlen);
266    
267     // test if a char is in a string:
268 gpertea 16 bool chrInStr(char c, const char* str);
269 gpertea 2
270     char* rstrchr(char* str, char ch);
271     /* returns a pointer to the rightmost
272     occurence of ch in str - like rindex for platforms missing it*/
273    
274 gpertea 16 char* strchrs(const char* s, const char* chrs);
275 gpertea 2 //strchr but with a set of chars instead of only one
276    
277 gpertea 90 char* rstrfind(const char* str, const char *substr);
278     // like rindex() but for strings; right side version of strstr()
279 gpertea 2
280 gpertea 90 char* reverseChars(char* str, int slen=0); //in place reversal of string
281    
282 gpertea 16 char* rstrstr(const char* rstart, const char *lend, const char* substr);
283 gpertea 2 /*the reversed, rightside equivalent of strstr: starts searching
284     from right end (rstart), going back to left end (lend) and returns
285     a pointer to the last (right) matching character in str */
286    
287 gpertea 16 char* strifind(const char* str, const char* substr);
288 gpertea 2 // the case insensitive version of strstr -- finding a string within a strin
289    
290    
291     //Determines if a string begins with a given prefix
292     //(returns false when any of the params is NULL,
293     // but true when prefix is '' (empty string)!)
294 gpertea 16 bool startsWith(const char* s, const char* prefix);
295 gpertea 2
296 gpertea 16 bool endsWith(const char* s, const char* suffix);
297     //Note: returns true if suffix is empty string, but false if it's NULL
298    
299    
300 gpertea 2 // ELF hash function for strings
301     int strhash(const char* str);
302    
303    
304    
305     //---- generic base GSeg : genomic segment (interval) --
306     // coordinates are considered 1-based (so 0 is invalid)
307     class GSeg {
308     public:
309     uint start; //start<end always!
310     uint end;
311     GSeg(uint s=0,uint e=0) {
312     if (s>e) { start=e;end=s; }
313     else { start=s;end=e; }
314     }
315     //check for overlap with other segment
316     uint len() { return end-start+1; }
317     bool overlap(GSeg* d) {
318 gpertea 16 //return start<d->start ? (d->start<=end) : (start<=d->end);
319     return (start<=d->end && end>=d->start);
320 gpertea 2 }
321    
322     bool overlap(GSeg& d) {
323 gpertea 16 //return start<d.start ? (d.start<=end) : (start<=d.end);
324     return (start<=d.end && end>=d.start);
325 gpertea 2 }
326    
327     bool overlap(GSeg& d, int fuzz) {
328 gpertea 16 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
329     return (start<=d.end+fuzz && end+fuzz>=d.start);
330 gpertea 2 }
331    
332     bool overlap(uint s, uint e) {
333 gpertea 16 if (s>e) { swap(s,e); }
334     //return start<s ? (s<=end) : (start<=e);
335     return (start<=e && end>=s);
336 gpertea 2 }
337    
338     //return the length of overlap between two segments
339     int overlapLen(GSeg* r) {
340     if (start<r->start) {
341     if (r->start>end) return 0;
342     return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
343     }
344     else { //r->start<=start
345     if (start>r->end) return 0;
346     return (r->end<end)? r->end-start+1 : end-start+1;
347     }
348     }
349     int overlapLen(uint rstart, uint rend) {
350     if (rstart>rend) { swap(rstart,rend); }
351     if (start<rstart) {
352     if (rstart>end) return 0;
353     return (rend>end) ? end-rstart+1 : rend-rstart+1;
354     }
355     else { //rstart<=start
356     if (start>rend) return 0;
357     return (rend<end)? rend-start+1 : end-start+1;
358     }
359     }
360    
361     //fuzzy coordinate matching:
362     bool coordMatch(GSeg* s, uint fuzz=0) {
363     if (fuzz==0) return (start==s->start && end==s->end);
364     uint sd = (start>s->start) ? start-s->start : s->start-start;
365     uint ed = (end>s->end) ? end-s->end : s->end-end;
366     return (sd<=fuzz && ed<=fuzz);
367     }
368     //comparison operators required for sorting
369     bool operator==(GSeg& d){
370     return (start==d.start && end==d.end);
371     }
372     bool operator>(GSeg& d){
373     return (start==d.start)?(end>d.end):(start>d.start);
374     }
375     bool operator<(GSeg& d){
376     return (start==d.start)?(end<d.end):(start<d.start);
377     }
378     };
379    
380    
381    
382     //--------------------------------------------------------
383     // ************** simple line reading class for text files
384    
385     //GLineReader -- text line reading/buffering class
386     class GLineReader {
387 gpertea 16 bool closeFile;
388 gpertea 2 int len;
389     int allocated;
390     char* buf;
391     bool isEOF;
392     FILE* file;
393     off_t filepos; //current position
394     bool pushed; //pushed back
395     int lcount; //line counter (read lines)
396     public:
397     char* chars() { return buf; }
398     char* line() { return buf; }
399     int readcount() { return lcount; } //number of lines read
400 gpertea 16 void setFile(FILE* stream) { file=stream; }
401 gpertea 2 int length() { return len; }
402     int size() { return len; } //same as size();
403     bool isEof() {return isEOF; }
404     bool eof() { return isEOF; }
405     off_t getfpos() { return filepos; }
406     off_t getFpos() { return filepos; }
407     char* nextLine() { return getLine(); }
408     char* getLine() { if (pushed) { pushed=false; return buf; }
409     else return getLine(file); }
410     char* getLine(FILE* stream) {
411     if (pushed) { pushed=false; return buf; }
412     else return getLine(stream, filepos); }
413     char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
414     // the given file position
415     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
416     // so the next call will in fact return the same line
417 gpertea 16 GLineReader(const char* fname) {
418     FILE* f=fopen(fname, "rb");
419     if (f==NULL) GError("Error opening file '%s'!\n",fname);
420     closeFile=true;
421     init(f);
422     }
423 gpertea 2 GLineReader(FILE* stream=NULL, off_t fpos=0) {
424 gpertea 16 closeFile=false;
425     init(stream,fpos);
426     }
427     void init(FILE* stream, off_t fpos=0) {
428 gpertea 2 len=0;
429     isEOF=false;
430     allocated=1024;
431     GMALLOC(buf,allocated);
432     lcount=0;
433     buf[0]=0;
434     file=stream;
435     filepos=fpos;
436     pushed=false;
437     }
438     ~GLineReader() {
439     GFREE(buf);
440 gpertea 16 if (closeFile) fclose(file);
441 gpertea 2 }
442     };
443    
444    
445     /* extended fgets() - to read one full line from a file and
446     update the file position correctly !
447     buf will be reallocated as necessary, to fit the whole line
448     */
449     char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
450    
451 gpertea 16
452     //print int/values nicely formatted in 3-digit groups
453     char* commaprint(uint64 n);
454    
455 gpertea 2 /*********************** File management functions *********************/
456    
457 gpertea 16 // removes the last part (file or directory name) of a full path
458     // WARNING: this is a destructive operation for the given string!
459 gpertea 2 void delFileName(char* filepath);
460    
461 gpertea 16 // returns a pointer to the last file or directory name in a full path
462     const char* getFileName(const char* filepath);
463     // returns a pointer to the file "extension" part in a filename
464     const char* getFileExt(const char* filepath);
465 gpertea 2
466 gpertea 16
467 gpertea 2 int fileExists(const char* fname);
468     //returns 0 if file entry doesn't exist
469     // 1 if it's a directory
470     // 2 if it's a regular file
471     // 3 otherwise (?)
472    
473 gpertea 16 int64 fileSize(const char* fpath);
474 gpertea 2
475 gpertea 16 //write a formatted fasta record, fasta formatted
476     void writeFasta(FILE *fw, const char* seqid, const char* descr,
477     const char* seq, int linelen=60, int seqlen=0);
478    
479 gpertea 2 //parses the next number found in a string at the current position
480     //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
481     //updates the char* pointer to be after the last digit parsed
482     bool parseNumber(char* &p, double& v);
483     bool parseDouble(char* &p, double& v); //just an alias for parseNumber
484    
485     bool parseInt(char* &p, int& i);
486     bool parseUInt(char* &p, uint& i);
487     bool parseHex(char* &p, uint& i);
488    
489     #endif /* G_BASE_DEFINED */