1 |
gpertea |
2 |
#ifndef G_BASE_DEFINED |
2 |
|
|
#define G_BASE_DEFINED |
3 |
gpertea |
16 |
#ifndef _POSIX_SOURCE |
4 |
|
|
//mostly for MinGW |
5 |
|
|
#define _POSIX_SOURCE |
6 |
|
|
#endif |
7 |
|
|
#ifdef HAVE_CONFIG_H |
8 |
|
|
#include "config.h" |
9 |
|
|
#endif |
10 |
gpertea |
2 |
#include <string.h> |
11 |
|
|
#include <stdlib.h> |
12 |
|
|
#include <stdio.h> |
13 |
|
|
#include <math.h> |
14 |
|
|
#include <limits.h> |
15 |
|
|
#include <sys/types.h> |
16 |
|
|
#include <sys/stat.h> |
17 |
gpertea |
16 |
#include <stdint.h> |
18 |
|
|
|
19 |
|
|
#if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_ |
20 |
|
|
#ifndef __WIN32__ |
21 |
|
|
#define __WIN32__ |
22 |
|
|
#endif |
23 |
gpertea |
2 |
#include <windows.h> |
24 |
gpertea |
16 |
#include <io.h> |
25 |
|
|
#define CHPATHSEP '\\' |
26 |
|
|
#undef off_t |
27 |
|
|
#define off_t int64_t |
28 |
|
|
#ifdef _fseeki64 |
29 |
|
|
#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin) |
30 |
|
|
#else |
31 |
|
|
/* |
32 |
|
|
#define _DEFINE_WIN32_FSEEKO |
33 |
|
|
int fseeko(FILE *stream, off_t offset, int whence); |
34 |
|
|
*/ |
35 |
|
|
#define fseeko fseek |
36 |
|
|
#endif |
37 |
|
|
#ifdef _ftelli64 |
38 |
|
|
#define ftello(stream) _ftelli64(stream) |
39 |
|
|
#else |
40 |
|
|
/* |
41 |
|
|
#define _DEFINE_WIN32_FTELLO |
42 |
|
|
off_t ftello(FILE *stream); |
43 |
|
|
*/ |
44 |
|
|
#define ftello ftell |
45 |
|
|
#endif |
46 |
|
|
#else |
47 |
|
|
#define CHPATHSEP '/' |
48 |
|
|
#include <unistd.h> |
49 |
gpertea |
2 |
#endif |
50 |
|
|
|
51 |
gpertea |
36 |
#ifndef fseeko |
52 |
|
|
#define fseeko fseek |
53 |
|
|
#endif |
54 |
|
|
#ifndef ftello |
55 |
|
|
#define ftello ftell |
56 |
|
|
#endif |
57 |
gpertea |
16 |
|
58 |
gpertea |
2 |
#ifdef DEBUG |
59 |
|
|
#undef NDEBUG |
60 |
|
|
#endif |
61 |
|
|
|
62 |
gpertea |
16 |
typedef int32_t int32; |
63 |
|
|
typedef uint32_t uint32; |
64 |
|
|
|
65 |
gpertea |
2 |
typedef unsigned char uchar; |
66 |
|
|
typedef unsigned char byte; |
67 |
|
|
|
68 |
|
|
#ifndef MAXUINT |
69 |
|
|
#define MAXUINT ((unsigned int)-1) |
70 |
|
|
#endif |
71 |
|
|
|
72 |
gpertea |
16 |
#ifndef MAXINT |
73 |
|
|
#define MAXINT INT_MAX |
74 |
gpertea |
2 |
#endif |
75 |
|
|
|
76 |
gpertea |
16 |
#ifndef MAX_UINT |
77 |
|
|
#define MAX_UINT ((unsigned int)-1) |
78 |
|
|
#endif |
79 |
|
|
|
80 |
|
|
#ifndef MAX_INT |
81 |
|
|
#define MAX_INT INT_MAX |
82 |
|
|
#endif |
83 |
|
|
|
84 |
|
|
typedef int64_t int64; |
85 |
|
|
typedef uint64_t uint64; |
86 |
|
|
|
87 |
gpertea |
2 |
/****************************************************************************/ |
88 |
|
|
|
89 |
|
|
#ifndef EXIT_FAILURE |
90 |
|
|
#define EXIT_FAILURE 1 |
91 |
|
|
#endif |
92 |
|
|
|
93 |
|
|
#ifndef EXIT_SUCCESS |
94 |
|
|
#define EXIT_SUCCESS 0 |
95 |
|
|
#endif |
96 |
|
|
|
97 |
|
|
/****************************************************************************/ |
98 |
|
|
#define ERR_ALLOC "Error allocating memory.\n" |
99 |
|
|
|
100 |
|
|
//------------------- |
101 |
|
|
|
102 |
|
|
// Debug helpers |
103 |
|
|
#ifndef NDEBUG |
104 |
|
|
#define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__)) |
105 |
|
|
#ifdef TRACE |
106 |
|
|
#define GTRACE(exp) (GMessage exp) |
107 |
|
|
#else |
108 |
|
|
#define GTRACE(exp) ((void)0) |
109 |
|
|
#endif |
110 |
|
|
#else |
111 |
|
|
#define GASSERT(exp) ((void)0) |
112 |
|
|
#define GTRACE(exp) ((void)0) |
113 |
|
|
#endif |
114 |
|
|
|
115 |
|
|
#define GERROR(exp) (GError exp) |
116 |
|
|
/********************************** Macros ***********************************/ |
117 |
|
|
// Abolute value |
118 |
|
|
#define GABS(val) (((val)>=0)?(val):-(val)) |
119 |
|
|
|
120 |
|
|
// Min and Max |
121 |
|
|
#define GMAX(a,b) (((a)>(b))?(a):(b)) |
122 |
|
|
#define GMIN(a,b) (((a)>(b))?(b):(a)) |
123 |
|
|
|
124 |
|
|
// Min of three |
125 |
|
|
#define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z)) |
126 |
|
|
|
127 |
|
|
// Max of three |
128 |
|
|
#define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z)) |
129 |
|
|
|
130 |
|
|
// Return minimum and maximum of a, b |
131 |
|
|
#define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a))) |
132 |
|
|
|
133 |
|
|
// Clamp value x to range [lo..hi] |
134 |
|
|
#define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x))) |
135 |
|
|
|
136 |
|
|
typedef void* pointer; |
137 |
|
|
typedef unsigned int uint; |
138 |
|
|
|
139 |
|
|
typedef int GCompareProc(const pointer item1, const pointer item2); |
140 |
|
|
typedef void GFreeProc(pointer item); //usually just delete, |
141 |
|
|
//but may also support structures with embedded dynamic members |
142 |
|
|
|
143 |
|
|
#define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \ |
144 |
|
|
GError(ERR_ALLOC) |
145 |
|
|
#define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \ |
146 |
|
|
GError(ERR_ALLOC) |
147 |
|
|
#define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \ |
148 |
|
|
GError(ERR_ALLOC) |
149 |
|
|
#define GFREE(ptr) GFree((pointer*)(&ptr)) |
150 |
|
|
|
151 |
gpertea |
16 |
inline char* strMin(char *arg1, char *arg2) { |
152 |
gpertea |
2 |
return (strcmp(arg1, arg2) < 0)? arg1 : arg2; |
153 |
|
|
} |
154 |
|
|
|
155 |
gpertea |
16 |
inline char* strMax(char *arg1, char *arg2) { |
156 |
|
|
return (strcmp(arg2, arg1) < 0)? arg1 : arg2; |
157 |
|
|
} |
158 |
|
|
|
159 |
gpertea |
2 |
inline int iround(double x) { |
160 |
|
|
return (int)floor(x + 0.5); |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
/****************************************************************************/ |
164 |
|
|
|
165 |
|
|
inline int Gintcmp(int a, int b) { |
166 |
|
|
//return (a>b)? 1 : ((a==b)?0:-1); |
167 |
|
|
return a-b; |
168 |
|
|
} |
169 |
|
|
|
170 |
gpertea |
150 |
int Gstrcmp(const char* a, const char* b, int n=-1); |
171 |
gpertea |
2 |
//same as strcmp but doesn't crash on NULL pointers |
172 |
|
|
|
173 |
gpertea |
150 |
int Gstricmp(const char* a, const char* b, int n=-1); |
174 |
gpertea |
2 |
|
175 |
gpertea |
144 |
//basic swap template function |
176 |
|
|
template<class T> void Gswap(T& lhs, T& rhs) { |
177 |
|
|
//register T tmp=lhs; |
178 |
|
|
T tmp=lhs; //requires copy operator |
179 |
|
|
lhs=rhs; |
180 |
|
|
rhs=tmp; |
181 |
|
|
} |
182 |
gpertea |
2 |
|
183 |
gpertea |
144 |
/// bitCount_32 - this function counts the number of set bits in a value. |
184 |
|
|
/// Ex. CountPopulation(0xF000F000) = 8 |
185 |
|
|
/// Returns 0 if the word is zero. |
186 |
|
|
inline uint bitCount_32(uint32_t Value) { |
187 |
|
|
#if __GNUC__ >= 4 |
188 |
|
|
return __builtin_popcount(Value); |
189 |
|
|
#else |
190 |
|
|
uint32_t v = Value - ((Value >> 1) & 0x55555555); |
191 |
|
|
v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
192 |
|
|
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; |
193 |
|
|
#endif |
194 |
gpertea |
16 |
} |
195 |
gpertea |
2 |
|
196 |
gpertea |
144 |
/// bitCount_64 - this function counts the number of set bits in a value, |
197 |
|
|
/// (64 bit edition.) |
198 |
|
|
inline uint bitCount_64(uint64_t Value) { |
199 |
|
|
#if __GNUC__ >= 4 |
200 |
|
|
return __builtin_popcountll(Value); |
201 |
|
|
#else |
202 |
|
|
uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL); |
203 |
|
|
v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); |
204 |
|
|
v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |
205 |
|
|
return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56); |
206 |
|
|
#endif |
207 |
gpertea |
16 |
} |
208 |
gpertea |
2 |
|
209 |
gpertea |
144 |
/// CountTrailingZeros_32 - this function performs the platform optimal form of |
210 |
|
|
/// counting the number of zeros from the least significant bit to the first one |
211 |
|
|
/// bit. Ex. CountTrailingZeros_32(0xFF00FF00) == 8. |
212 |
|
|
/// Returns 32 if the word is zero. |
213 |
|
|
inline unsigned bitCountTrailingZeros_32(uint32_t Value) { |
214 |
|
|
#if __GNUC__ >= 4 |
215 |
|
|
return Value ? __builtin_ctz(Value) : 32; |
216 |
|
|
#else |
217 |
|
|
static const unsigned Mod37BitPosition[] = { |
218 |
|
|
32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, |
219 |
|
|
4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, |
220 |
|
|
5, 20, 8, 19, 18 |
221 |
|
|
}; |
222 |
|
|
return Mod37BitPosition[(-Value & Value) % 37]; |
223 |
|
|
#endif |
224 |
|
|
} |
225 |
gpertea |
2 |
|
226 |
gpertea |
144 |
// CountTrailingZeros_64 - This function performs the platform optimal form |
227 |
|
|
/// of counting the number of zeros from the least significant bit to the first |
228 |
|
|
/// one bit (64 bit edition.) |
229 |
|
|
/// Returns 64 if the word is zero. |
230 |
|
|
inline unsigned bitCountTrailingZeros_64(uint64_t Value) { |
231 |
|
|
#if __GNUC__ >= 4 |
232 |
|
|
return Value ? __builtin_ctzll(Value) : 64; |
233 |
|
|
#else |
234 |
|
|
static const unsigned Mod67Position[] = { |
235 |
|
|
64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54, |
236 |
|
|
4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55, |
237 |
|
|
47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27, |
238 |
|
|
29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56, |
239 |
|
|
7, 48, 35, 6, 34, 33, 0 |
240 |
|
|
}; |
241 |
|
|
return Mod67Position[(-Value & Value) % 67]; |
242 |
|
|
#endif |
243 |
|
|
} |
244 |
gpertea |
2 |
|
245 |
|
|
/**************** Memory management ***************************/ |
246 |
|
|
|
247 |
|
|
bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory |
248 |
|
|
bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory |
249 |
|
|
bool GRealloc(pointer* ptr,unsigned long size); // Resize memory |
250 |
|
|
void GFree(pointer* ptr); // Free memory, resets ptr to NULL |
251 |
|
|
|
252 |
|
|
|
253 |
gpertea |
16 |
int saprintf(char **retp, const char *fmt, ...); |
254 |
|
|
|
255 |
gpertea |
2 |
void GError(const char* format,...); // Error routine (aborts program) |
256 |
|
|
void GMessage(const char* format,...);// Log message to stderr |
257 |
|
|
// Assert failed routine:- usually not called directly but through GASSERT |
258 |
|
|
void GAssert(const char* expression, const char* filename, unsigned int lineno); |
259 |
|
|
|
260 |
|
|
// ****************** string manipulation ************************* |
261 |
|
|
char *Gstrdup(const char* str); |
262 |
|
|
//duplicate a string by allocating a copy for it and returning it |
263 |
|
|
char* Gstrdup(const char* sfrom, const char* sto); |
264 |
|
|
//same as GStrdup, but with an early termination (e.g. on delimiter) |
265 |
|
|
|
266 |
|
|
char* Gsubstr(const char* str, char* from, char* to=NULL); |
267 |
|
|
//extracts a substring, allocating it, including boundaries (from/to) |
268 |
|
|
|
269 |
|
|
int strsplit(char* str, char** fields, int maxfields, const char* delim); |
270 |
|
|
int strsplit(char* str, char** fields, int maxfields, const char delim); |
271 |
|
|
int strsplit(char* str, char** fields, int maxfields); //splits by tab or space |
272 |
|
|
|
273 |
|
|
char* replaceStr(char* &str, char* newvalue); |
274 |
|
|
|
275 |
|
|
//conversion: to Lower/Upper case |
276 |
|
|
// creating a new string: |
277 |
|
|
char* upCase(const char* str); |
278 |
|
|
char* loCase(const char* str); |
279 |
|
|
// changing string in place: |
280 |
|
|
char* strlower(char * str); |
281 |
|
|
char* strupper(char * str); |
282 |
|
|
|
283 |
|
|
//strstr but for memory zones: scans a memory region |
284 |
|
|
//for a substring: |
285 |
|
|
void* Gmemscan(void *mem, unsigned int len, |
286 |
|
|
void *part, unsigned int partlen); |
287 |
|
|
|
288 |
|
|
// test if a char is in a string: |
289 |
gpertea |
16 |
bool chrInStr(char c, const char* str); |
290 |
gpertea |
2 |
|
291 |
|
|
char* rstrchr(char* str, char ch); |
292 |
|
|
/* returns a pointer to the rightmost |
293 |
|
|
occurence of ch in str - like rindex for platforms missing it*/ |
294 |
|
|
|
295 |
gpertea |
16 |
char* strchrs(const char* s, const char* chrs); |
296 |
gpertea |
2 |
//strchr but with a set of chars instead of only one |
297 |
|
|
|
298 |
gpertea |
90 |
char* rstrfind(const char* str, const char *substr); |
299 |
|
|
// like rindex() but for strings; right side version of strstr() |
300 |
gpertea |
2 |
|
301 |
gpertea |
90 |
char* reverseChars(char* str, int slen=0); //in place reversal of string |
302 |
|
|
|
303 |
gpertea |
16 |
char* rstrstr(const char* rstart, const char *lend, const char* substr); |
304 |
gpertea |
2 |
/*the reversed, rightside equivalent of strstr: starts searching |
305 |
|
|
from right end (rstart), going back to left end (lend) and returns |
306 |
|
|
a pointer to the last (right) matching character in str */ |
307 |
|
|
|
308 |
gpertea |
16 |
char* strifind(const char* str, const char* substr); |
309 |
gpertea |
2 |
// the case insensitive version of strstr -- finding a string within a strin |
310 |
|
|
|
311 |
|
|
|
312 |
|
|
//Determines if a string begins with a given prefix |
313 |
|
|
//(returns false when any of the params is NULL, |
314 |
|
|
// but true when prefix is '' (empty string)!) |
315 |
gpertea |
16 |
bool startsWith(const char* s, const char* prefix); |
316 |
gpertea |
2 |
|
317 |
gpertea |
16 |
bool endsWith(const char* s, const char* suffix); |
318 |
|
|
//Note: returns true if suffix is empty string, but false if it's NULL |
319 |
|
|
|
320 |
|
|
|
321 |
gpertea |
2 |
// ELF hash function for strings |
322 |
|
|
int strhash(const char* str); |
323 |
|
|
|
324 |
|
|
|
325 |
|
|
|
326 |
|
|
//---- generic base GSeg : genomic segment (interval) -- |
327 |
|
|
// coordinates are considered 1-based (so 0 is invalid) |
328 |
|
|
class GSeg { |
329 |
|
|
public: |
330 |
|
|
uint start; //start<end always! |
331 |
|
|
uint end; |
332 |
|
|
GSeg(uint s=0,uint e=0) { |
333 |
|
|
if (s>e) { start=e;end=s; } |
334 |
|
|
else { start=s;end=e; } |
335 |
|
|
} |
336 |
|
|
//check for overlap with other segment |
337 |
|
|
uint len() { return end-start+1; } |
338 |
|
|
bool overlap(GSeg* d) { |
339 |
gpertea |
16 |
//return start<d->start ? (d->start<=end) : (start<=d->end); |
340 |
|
|
return (start<=d->end && end>=d->start); |
341 |
gpertea |
2 |
} |
342 |
|
|
|
343 |
|
|
bool overlap(GSeg& d) { |
344 |
gpertea |
16 |
//return start<d.start ? (d.start<=end) : (start<=d.end); |
345 |
|
|
return (start<=d.end && end>=d.start); |
346 |
gpertea |
2 |
} |
347 |
|
|
|
348 |
|
|
bool overlap(GSeg& d, int fuzz) { |
349 |
gpertea |
16 |
//return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz); |
350 |
|
|
return (start<=d.end+fuzz && end+fuzz>=d.start); |
351 |
gpertea |
2 |
} |
352 |
|
|
|
353 |
|
|
bool overlap(uint s, uint e) { |
354 |
gpertea |
144 |
if (s>e) { Gswap(s,e); } |
355 |
gpertea |
16 |
//return start<s ? (s<=end) : (start<=e); |
356 |
|
|
return (start<=e && end>=s); |
357 |
gpertea |
2 |
} |
358 |
|
|
|
359 |
|
|
//return the length of overlap between two segments |
360 |
|
|
int overlapLen(GSeg* r) { |
361 |
|
|
if (start<r->start) { |
362 |
|
|
if (r->start>end) return 0; |
363 |
|
|
return (r->end>end) ? end-r->start+1 : r->end-r->start+1; |
364 |
|
|
} |
365 |
|
|
else { //r->start<=start |
366 |
|
|
if (start>r->end) return 0; |
367 |
|
|
return (r->end<end)? r->end-start+1 : end-start+1; |
368 |
|
|
} |
369 |
|
|
} |
370 |
|
|
int overlapLen(uint rstart, uint rend) { |
371 |
gpertea |
144 |
if (rstart>rend) { Gswap(rstart,rend); } |
372 |
gpertea |
2 |
if (start<rstart) { |
373 |
|
|
if (rstart>end) return 0; |
374 |
|
|
return (rend>end) ? end-rstart+1 : rend-rstart+1; |
375 |
|
|
} |
376 |
|
|
else { //rstart<=start |
377 |
|
|
if (start>rend) return 0; |
378 |
|
|
return (rend<end)? rend-start+1 : end-start+1; |
379 |
|
|
} |
380 |
|
|
} |
381 |
|
|
|
382 |
|
|
//fuzzy coordinate matching: |
383 |
|
|
bool coordMatch(GSeg* s, uint fuzz=0) { |
384 |
|
|
if (fuzz==0) return (start==s->start && end==s->end); |
385 |
|
|
uint sd = (start>s->start) ? start-s->start : s->start-start; |
386 |
|
|
uint ed = (end>s->end) ? end-s->end : s->end-end; |
387 |
|
|
return (sd<=fuzz && ed<=fuzz); |
388 |
|
|
} |
389 |
|
|
//comparison operators required for sorting |
390 |
|
|
bool operator==(GSeg& d){ |
391 |
|
|
return (start==d.start && end==d.end); |
392 |
|
|
} |
393 |
|
|
bool operator<(GSeg& d){ |
394 |
|
|
return (start==d.start)?(end<d.end):(start<d.start); |
395 |
|
|
} |
396 |
|
|
}; |
397 |
|
|
|
398 |
|
|
|
399 |
|
|
|
400 |
|
|
//-------------------------------------------------------- |
401 |
|
|
// ************** simple line reading class for text files |
402 |
|
|
|
403 |
|
|
//GLineReader -- text line reading/buffering class |
404 |
|
|
class GLineReader { |
405 |
gpertea |
16 |
bool closeFile; |
406 |
gpertea |
2 |
int len; |
407 |
|
|
int allocated; |
408 |
|
|
char* buf; |
409 |
|
|
bool isEOF; |
410 |
|
|
FILE* file; |
411 |
|
|
off_t filepos; //current position |
412 |
|
|
bool pushed; //pushed back |
413 |
|
|
int lcount; //line counter (read lines) |
414 |
|
|
public: |
415 |
|
|
char* chars() { return buf; } |
416 |
|
|
char* line() { return buf; } |
417 |
|
|
int readcount() { return lcount; } //number of lines read |
418 |
gpertea |
16 |
void setFile(FILE* stream) { file=stream; } |
419 |
gpertea |
2 |
int length() { return len; } |
420 |
|
|
int size() { return len; } //same as size(); |
421 |
|
|
bool isEof() {return isEOF; } |
422 |
|
|
bool eof() { return isEOF; } |
423 |
|
|
off_t getfpos() { return filepos; } |
424 |
|
|
off_t getFpos() { return filepos; } |
425 |
|
|
char* nextLine() { return getLine(); } |
426 |
|
|
char* getLine() { if (pushed) { pushed=false; return buf; } |
427 |
|
|
else return getLine(file); } |
428 |
|
|
char* getLine(FILE* stream) { |
429 |
|
|
if (pushed) { pushed=false; return buf; } |
430 |
|
|
else return getLine(stream, filepos); } |
431 |
|
|
char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update |
432 |
|
|
// the given file position |
433 |
|
|
void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request |
434 |
|
|
// so the next call will in fact return the same line |
435 |
gpertea |
16 |
GLineReader(const char* fname) { |
436 |
|
|
FILE* f=fopen(fname, "rb"); |
437 |
|
|
if (f==NULL) GError("Error opening file '%s'!\n",fname); |
438 |
|
|
closeFile=true; |
439 |
|
|
init(f); |
440 |
|
|
} |
441 |
gpertea |
2 |
GLineReader(FILE* stream=NULL, off_t fpos=0) { |
442 |
gpertea |
16 |
closeFile=false; |
443 |
|
|
init(stream,fpos); |
444 |
|
|
} |
445 |
|
|
void init(FILE* stream, off_t fpos=0) { |
446 |
gpertea |
2 |
len=0; |
447 |
|
|
isEOF=false; |
448 |
|
|
allocated=1024; |
449 |
|
|
GMALLOC(buf,allocated); |
450 |
|
|
lcount=0; |
451 |
|
|
buf[0]=0; |
452 |
|
|
file=stream; |
453 |
|
|
filepos=fpos; |
454 |
|
|
pushed=false; |
455 |
|
|
} |
456 |
|
|
~GLineReader() { |
457 |
|
|
GFREE(buf); |
458 |
gpertea |
16 |
if (closeFile) fclose(file); |
459 |
gpertea |
2 |
} |
460 |
|
|
}; |
461 |
|
|
|
462 |
|
|
|
463 |
|
|
/* extended fgets() - to read one full line from a file and |
464 |
|
|
update the file position correctly ! |
465 |
|
|
buf will be reallocated as necessary, to fit the whole line |
466 |
|
|
*/ |
467 |
|
|
char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL); |
468 |
|
|
|
469 |
gpertea |
16 |
|
470 |
|
|
//print int/values nicely formatted in 3-digit groups |
471 |
|
|
char* commaprint(uint64 n); |
472 |
|
|
|
473 |
gpertea |
2 |
/*********************** File management functions *********************/ |
474 |
|
|
|
475 |
gpertea |
16 |
// removes the last part (file or directory name) of a full path |
476 |
|
|
// WARNING: this is a destructive operation for the given string! |
477 |
gpertea |
2 |
void delFileName(char* filepath); |
478 |
|
|
|
479 |
gpertea |
16 |
// returns a pointer to the last file or directory name in a full path |
480 |
|
|
const char* getFileName(const char* filepath); |
481 |
|
|
// returns a pointer to the file "extension" part in a filename |
482 |
|
|
const char* getFileExt(const char* filepath); |
483 |
gpertea |
2 |
|
484 |
gpertea |
16 |
|
485 |
gpertea |
2 |
int fileExists(const char* fname); |
486 |
|
|
//returns 0 if file entry doesn't exist |
487 |
|
|
// 1 if it's a directory |
488 |
|
|
// 2 if it's a regular file |
489 |
|
|
// 3 otherwise (?) |
490 |
|
|
|
491 |
gpertea |
16 |
int64 fileSize(const char* fpath); |
492 |
gpertea |
2 |
|
493 |
gpertea |
16 |
//write a formatted fasta record, fasta formatted |
494 |
|
|
void writeFasta(FILE *fw, const char* seqid, const char* descr, |
495 |
|
|
const char* seq, int linelen=60, int seqlen=0); |
496 |
|
|
|
497 |
gpertea |
2 |
//parses the next number found in a string at the current position |
498 |
|
|
//until a non-digit (and not a '.', 'e','E','-','+') is encountered; |
499 |
|
|
//updates the char* pointer to be after the last digit parsed |
500 |
|
|
bool parseNumber(char* &p, double& v); |
501 |
|
|
bool parseDouble(char* &p, double& v); //just an alias for parseNumber |
502 |
|
|
|
503 |
|
|
bool parseInt(char* &p, int& i); |
504 |
|
|
bool parseUInt(char* &p, uint& i); |
505 |
|
|
bool parseHex(char* &p, uint& i); |
506 |
|
|
|
507 |
|
|
#endif /* G_BASE_DEFINED */ |