ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
Revision: 310
Committed: Fri Mar 22 20:06:27 2013 UTC (6 years, 3 months ago) by gpertea
File size: 13372 byte(s)
Log Message:
sync with igm repo

Line File contents
1 #ifndef G_BASE_DEFINED
2 #define G_BASE_DEFINED
3 #ifndef _POSIX_SOURCE
4 //mostly for MinGW
5 #define _POSIX_SOURCE
6 #endif
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #include <string.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <math.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <stdint.h>
18
19 #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 #ifndef __WIN32__
21 #define __WIN32__
22 #endif
23 #include <windows.h>
24 #include <io.h>
25 #define CHPATHSEP '\\'
26 #undef off_t
27 #define off_t int64_t
28 #ifndef popen
29 #define popen _popen
30 #endif
31 #ifndef fseeko
32 #ifdef _fseeki64
33 #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
34 #else
35 /*
36 #define _DEFINE_WIN32_FSEEKO
37 int fseeko(FILE *stream, off_t offset, int whence);
38 */
39 #define fseeko fseek
40 #endif
41 #endif
42 #ifndef ftello
43 #ifdef _ftelli64
44 #define ftello(stream) _ftelli64(stream)
45 #else
46 /*
47 #define _DEFINE_WIN32_FTELLO
48 off_t ftello(FILE *stream);
49 */
50 #define ftello ftell
51 #endif
52 #endif
53 #else
54 #define CHPATHSEP '/'
55 #include <unistd.h>
56 #endif
57
58 #ifndef fseeko
59 #define fseeko fseek
60 #endif
61 #ifndef ftello
62 #define ftello ftell
63 #endif
64
65 #ifdef DEBUG
66 #undef NDEBUG
67 #endif
68
69 typedef int32_t int32;
70 typedef uint32_t uint32;
71 typedef int16_t int16;
72 typedef uint16_t uint16;
73
74 typedef unsigned char uchar;
75 typedef unsigned char byte;
76
77 #ifndef MAXUINT
78 #define MAXUINT ((unsigned int)-1)
79 #endif
80
81 #ifndef MAXINT
82 #define MAXINT INT_MAX
83 #endif
84
85 #ifndef MAX_UINT
86 #define MAX_UINT ((unsigned int)-1)
87 #endif
88
89 #ifndef MAX_INT
90 #define MAX_INT INT_MAX
91 #endif
92
93 typedef int64_t int64;
94 typedef uint64_t uint64;
95
96 /****************************************************************************/
97
98 #ifndef EXIT_FAILURE
99 #define EXIT_FAILURE 1
100 #endif
101
102 #ifndef EXIT_SUCCESS
103 #define EXIT_SUCCESS 0
104 #endif
105
106 /****************************************************************************/
107 #define ERR_ALLOC "Error allocating memory.\n"
108
109 //-------------------
110
111 // Debug helpers
112 #ifndef NDEBUG
113 #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
114 #ifdef TRACE
115 #define GTRACE(exp) (GMessage exp)
116 #else
117 #define GTRACE(exp) ((void)0)
118 #endif
119 #else
120 #define GASSERT(exp) ((void)0)
121 #define GTRACE(exp) ((void)0)
122 #endif
123
124 #define GERROR(exp) (GError exp)
125 /********************************** Macros ***********************************/
126 // Abolute value
127 #define GABS(val) (((val)>=0)?(val):-(val))
128
129 // Min and Max
130 #define GMAX(a,b) (((a)>(b))?(a):(b))
131 #define GMIN(a,b) (((a)>(b))?(b):(a))
132
133 // Min of three
134 #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
135
136 // Max of three
137 #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
138
139 // Return minimum and maximum of a, b
140 #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
141
142 // Clamp value x to range [lo..hi]
143 #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
144
145 typedef void* pointer;
146 typedef unsigned int uint;
147
148 typedef int GCompareProc(const pointer item1, const pointer item2);
149 typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
150 typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
151
152 typedef void GFreeProc(pointer item); //usually just delete,
153 //but may also support structures with embedded dynamic members
154
155 #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
156 GError(ERR_ALLOC)
157 #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
158 GError(ERR_ALLOC)
159 #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
160 GError(ERR_ALLOC)
161 #define GFREE(ptr) GFree((pointer*)(&ptr))
162
163 inline char* strMin(char *arg1, char *arg2) {
164 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
165 }
166
167 inline char* strMax(char *arg1, char *arg2) {
168 return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
169 }
170
171 inline int iround(double x) {
172 return (int)floor(x + 0.5);
173 }
174
175 /****************************************************************************/
176
177 inline int Gintcmp(int a, int b) {
178 //return (a>b)? 1 : ((a==b)?0:-1);
179 return a-b;
180 }
181
182 int Gstrcmp(const char* a, const char* b, int n=-1);
183 //same as strcmp but doesn't crash on NULL pointers
184
185 int Gstricmp(const char* a, const char* b, int n=-1);
186
187 //basic swap template function
188 template<class T> void Gswap(T& lhs, T& rhs) {
189 //register T tmp=lhs;
190 T tmp=lhs; //requires copy operator
191 lhs=rhs;
192 rhs=tmp;
193 }
194
195
196 /**************** Memory management ***************************/
197
198 bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
199 bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
200 bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
201 void GFree(pointer* ptr); // Free memory, resets ptr to NULL
202
203
204 //int saprintf(char **retp, const char *fmt, ...);
205
206 void GError(const char* format,...); // Error routine (aborts program)
207 void GMessage(const char* format,...);// Log message to stderr
208 // Assert failed routine:- usually not called directly but through GASSERT
209 void GAssert(const char* expression, const char* filename, unsigned int lineno);
210
211 // ****************** string manipulation *************************
212 char *Gstrdup(const char* str);
213 //duplicate a string by allocating a copy for it and returning it
214 char* Gstrdup(const char* sfrom, const char* sto);
215 //same as GStrdup, but with an early termination (e.g. on delimiter)
216
217 char* Gsubstr(const char* str, char* from, char* to=NULL);
218 //extracts a substring, allocating it, including boundaries (from/to)
219
220 int strsplit(char* str, char** fields, int maxfields, const char* delim);
221 int strsplit(char* str, char** fields, int maxfields, const char delim);
222 int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
223
224 char* replaceStr(char* &str, char* newvalue);
225
226 //conversion: to Lower/Upper case
227 // creating a new string:
228 char* upCase(const char* str);
229 char* loCase(const char* str);
230 // changing string in place:
231 char* strlower(char * str);
232 char* strupper(char * str);
233
234 //strstr but for memory zones: scans a memory region
235 //for a substring:
236 void* Gmemscan(void *mem, unsigned int len,
237 void *part, unsigned int partlen);
238
239 // test if a char is in a string:
240 bool chrInStr(char c, const char* str);
241
242 char* rstrchr(char* str, char ch);
243 /* returns a pointer to the rightmost
244 occurence of ch in str - like rindex for platforms missing it*/
245
246 char* strchrs(const char* s, const char* chrs);
247 //strchr but with a set of chars instead of only one
248
249 char* rstrfind(const char* str, const char *substr);
250 // like rindex() but for strings; right side version of strstr()
251
252 char* reverseChars(char* str, int slen=0); //in place reversal of string
253
254 char* rstrstr(const char* rstart, const char *lend, const char* substr);
255 /*the reversed, rightside equivalent of strstr: starts searching
256 from right end (rstart), going back to left end (lend) and returns
257 a pointer to the last (right) matching character in str */
258
259 char* strifind(const char* str, const char* substr);
260 // the case insensitive version of strstr -- finding a string within a strin
261
262
263 //Determines if a string begins with a given prefix
264 //(returns false when any of the params is NULL,
265 // but true when prefix is '' (empty string)!)
266 bool startsWith(const char* s, const char* prefix);
267
268 bool endsWith(const char* s, const char* suffix);
269 //Note: returns true if suffix is empty string, but false if it's NULL
270
271
272 // ELF hash function for strings
273 int strhash(const char* str);
274
275
276
277 //---- generic base GSeg : genomic segment (interval) --
278 // coordinates are considered 1-based (so 0 is invalid)
279 class GSeg {
280 public:
281 uint start; //start<end always!
282 uint end;
283 GSeg(uint s=0,uint e=0) {
284 if (s>e) { start=e;end=s; }
285 else { start=s;end=e; }
286 }
287 //check for overlap with other segment
288 uint len() { return end-start+1; }
289 bool overlap(GSeg* d) {
290 //return start<d->start ? (d->start<=end) : (start<=d->end);
291 return (start<=d->end && end>=d->start);
292 }
293
294 bool overlap(GSeg& d) {
295 //return start<d.start ? (d.start<=end) : (start<=d.end);
296 return (start<=d.end && end>=d.start);
297 }
298
299 bool overlap(GSeg& d, int fuzz) {
300 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
301 return (start<=d.end+fuzz && end+fuzz>=d.start);
302 }
303
304 bool overlap(uint s, uint e) {
305 if (s>e) { Gswap(s,e); }
306 //return start<s ? (s<=end) : (start<=e);
307 return (start<=e && end>=s);
308 }
309
310 //return the length of overlap between two segments
311 int overlapLen(GSeg* r) {
312 if (start<r->start) {
313 if (r->start>end) return 0;
314 return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
315 }
316 else { //r->start<=start
317 if (start>r->end) return 0;
318 return (r->end<end)? r->end-start+1 : end-start+1;
319 }
320 }
321 int overlapLen(uint rstart, uint rend) {
322 if (rstart>rend) { Gswap(rstart,rend); }
323 if (start<rstart) {
324 if (rstart>end) return 0;
325 return (rend>end) ? end-rstart+1 : rend-rstart+1;
326 }
327 else { //rstart<=start
328 if (start>rend) return 0;
329 return (rend<end)? rend-start+1 : end-start+1;
330 }
331 }
332
333 //fuzzy coordinate matching:
334 bool coordMatch(GSeg* s, uint fuzz=0) {
335 if (fuzz==0) return (start==s->start && end==s->end);
336 uint sd = (start>s->start) ? start-s->start : s->start-start;
337 uint ed = (end>s->end) ? end-s->end : s->end-end;
338 return (sd<=fuzz && ed<=fuzz);
339 }
340 //comparison operators required for sorting
341 bool operator==(GSeg& d){
342 return (start==d.start && end==d.end);
343 }
344 bool operator<(GSeg& d){
345 return (start==d.start)?(end<d.end):(start<d.start);
346 }
347 };
348
349
350
351 //--------------------------------------------------------
352 // ************** simple line reading class for text files
353
354 //GLineReader -- text line reading/buffering class
355 class GLineReader {
356 bool closeFile;
357 int len;
358 int allocated;
359 char* buf;
360 bool isEOF;
361 FILE* file;
362 off_t filepos; //current position
363 bool pushed; //pushed back
364 int lcount; //line counter (read lines)
365 public:
366 char* chars() { return buf; }
367 char* line() { return buf; }
368 int readcount() { return lcount; } //number of lines read
369 void setFile(FILE* stream) { file=stream; }
370 int length() { return len; }
371 int size() { return len; } //same as size();
372 bool isEof() {return isEOF; }
373 bool eof() { return isEOF; }
374 off_t getfpos() { return filepos; }
375 off_t getFpos() { return filepos; }
376 char* nextLine() { return getLine(); }
377 char* getLine() { if (pushed) { pushed=false; return buf; }
378 else return getLine(file); }
379 char* getLine(FILE* stream) {
380 if (pushed) { pushed=false; return buf; }
381 else return getLine(stream, filepos); }
382 char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
383 // the given file position
384 void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
385 // so the next call will in fact return the same line
386 GLineReader(const char* fname) {
387 FILE* f=fopen(fname, "rb");
388 if (f==NULL) GError("Error opening file '%s'!\n",fname);
389 closeFile=true;
390 init(f);
391 }
392 GLineReader(FILE* stream=NULL, off_t fpos=0) {
393 closeFile=false;
394 init(stream,fpos);
395 }
396 void init(FILE* stream, off_t fpos=0) {
397 len=0;
398 isEOF=false;
399 allocated=1024;
400 GMALLOC(buf,allocated);
401 lcount=0;
402 buf[0]=0;
403 file=stream;
404 filepos=fpos;
405 pushed=false;
406 }
407 ~GLineReader() {
408 GFREE(buf);
409 if (closeFile) fclose(file);
410 }
411 };
412
413
414 /* extended fgets() - to read one full line from a file and
415 update the file position correctly !
416 buf will be reallocated as necessary, to fit the whole line
417 */
418 char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
419
420
421 //print int/values nicely formatted in 3-digit groups
422 char* commaprint(uint64 n);
423
424 /*********************** File management functions *********************/
425
426 // removes the last part (file or directory name) of a full path
427 // WARNING: this is a destructive operation for the given string!
428 void delFileName(char* filepath);
429
430 // returns a pointer to the last file or directory name in a full path
431 const char* getFileName(const char* filepath);
432 // returns a pointer to the file "extension" part in a filename
433 const char* getFileExt(const char* filepath);
434
435
436 int fileExists(const char* fname);
437 //returns 0 if file entry doesn't exist
438 // 1 if it's a directory
439 // 2 if it's a regular file
440 // 3 otherwise (?)
441
442 int64 fileSize(const char* fpath);
443
444 //write a formatted fasta record, fasta formatted
445 void writeFasta(FILE *fw, const char* seqid, const char* descr,
446 const char* seq, int linelen=60, int seqlen=0);
447
448 //parses the next number found in a string at the current position
449 //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
450 //updates the char* pointer to be after the last digit parsed
451 bool parseNumber(char* &p, double& v);
452 bool parseDouble(char* &p, double& v); //just an alias for parseNumber
453
454 bool parseInt(char* &p, int& i);
455 bool parseUInt(char* &p, uint& i);
456 bool parseHex(char* &p, uint& i);
457
458 #endif /* G_BASE_DEFINED */