ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
(Generate patch)
# Line 1 | Line 1
1   #ifndef G_BASE_DEFINED
2   #define G_BASE_DEFINED
3 <
3 > #ifndef _POSIX_SOURCE
4 > //mostly for MinGW
5 > #define _POSIX_SOURCE
6 > #endif
7 > #ifdef HAVE_CONFIG_H
8 > #include "config.h"
9 > #endif
10   #include <string.h>
11   #include <stdlib.h>
12   #include <stdio.h>
# Line 8 | Line 14
14   #include <limits.h>
15   #include <sys/types.h>
16   #include <sys/stat.h>
17 < #if defined __WIN32__ || defined _WIN32
17 > #include <stdint.h>
18 >
19 > #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 >  #ifndef __WIN32__
21 >    #define __WIN32__
22 >  #endif
23    #include <windows.h>
24 +  #include <io.h>
25 +  #define CHPATHSEP '\\'
26 +  #undef off_t
27 +  #define off_t int64_t
28 +  #ifndef popen
29 +   #define popen _popen
30 +  #endif
31 +  #ifdef _fseeki64
32 +    #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
33 +  #else
34 +    /*
35 +    #define _DEFINE_WIN32_FSEEKO
36 +    int fseeko(FILE *stream, off_t offset, int whence);
37 +    */
38 +    #define fseeko fseek
39 +  #endif
40 +  #ifdef _ftelli64
41 +    #define ftello(stream) _ftelli64(stream)
42 +  #else
43 +    /*
44 +    #define _DEFINE_WIN32_FTELLO
45 +    off_t ftello(FILE *stream);
46 +    */
47 +    #define ftello ftell
48 +  #endif
49 + #else
50 +  #define CHPATHSEP '/'
51 +  #include <unistd.h>
52 + #endif
53 +
54 + #ifndef fseeko
55 + #define fseeko fseek
56 + #endif
57 + #ifndef ftello
58 + #define ftello ftell
59   #endif
60  
61   #ifdef DEBUG
62   #undef NDEBUG
63   #endif
64  
65 < typedef unsigned int uint32;
66 < typedef int int32;
65 > typedef int32_t int32;
66 > typedef uint32_t uint32;
67 > typedef int16_t int16;
68 > typedef uint16_t uint16;
69 >
70   typedef unsigned char uchar;
71   typedef unsigned char byte;
72  
24 // If long is natively 64 bit, use the regular fseek and ftell
25 #ifdef _NATIVE_64
26 #define ftello ftell
27 #define fseeko fseek
28 #endif
29
73   #ifndef MAXUINT
74   #define MAXUINT ((unsigned int)-1)
75   #endif
76  
77 < #if defined(_NATIVE_64) || defined(_LP64) || defined(__LP64__)
78 < typedef long int64;
79 < typedef unsigned long uint64;
80 < #else
81 < //assume 32bit environment with long long for int64 stuff
82 < typedef long long int64;
40 < typedef unsigned long long uint64;
77 > #ifndef MAXINT
78 > #define MAXINT INT_MAX
79 > #endif
80 >
81 > #ifndef MAX_UINT
82 > #define MAX_UINT ((unsigned int)-1)
83   #endif
84  
85 + #ifndef MAX_INT
86 + #define MAX_INT INT_MAX
87 + #endif
88 +
89 + typedef int64_t int64;
90 + typedef uint64_t uint64;
91 +
92   /****************************************************************************/
93  
94   #ifndef EXIT_FAILURE
# Line 52 | Line 101
101  
102   /****************************************************************************/
103   #define ERR_ALLOC "Error allocating memory.\n"
55 #if defined (__WIN32__) || defined (WIN32)
56  #define CHPATHSEP '\\'
57  #include <io.h>
58  #define ftello ftell
59  #define fseeko fseek
60 #else
61  #define CHPATHSEP '/'
62  #include <unistd.h>
63 #endif
104  
105   //-------------------
106  
# Line 113 | Line 153
153                                       GError(ERR_ALLOC)
154   #define GFREE(ptr)       GFree((pointer*)(&ptr))
155  
156 < inline char* min(char *arg1, char *arg2) {
156 > inline char* strMin(char *arg1, char *arg2) {
157      return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
158   }
159  
160 + inline char* strMax(char *arg1, char *arg2) {
161 +    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
162 + }
163 +
164   inline int iround(double x) {
165     return (int)floor(x + 0.5);
166   }
167  
124
168   /****************************************************************************/
169  
127 inline char* max(char *arg1, char *arg2) {
128    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
129 }
130
170   inline int Gintcmp(int a, int b) {
171   //return (a>b)? 1 : ((a==b)?0:-1);
172    return a-b;
173   }
174  
175 < int Gstrcmp(char* a, char* b);
175 > int Gstrcmp(const char* a, const char* b, int n=-1);
176   //same as strcmp but doesn't crash on NULL pointers
177  
178 < int Gstricmp(const char* a, const char* b);
140 <
141 < inline void swap(int &arg1, int &arg2){
142 < arg1 ^= arg2 ^= arg1 ^= arg2;
143 < }
144 <
145 < inline void swap(char* &arg1, char* &arg2){
146 < register char* swp=arg1;
147 < arg1=arg2; arg2=swp;
148 < }
149 <
150 < inline void swap(unsigned int &arg1, unsigned int &arg2)
151 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
152 <
153 < inline void swap(short &arg1, short &arg2)
154 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
155 <
156 < inline void swap(unsigned short &arg1, unsigned short &arg2)
157 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
178 > int Gstricmp(const char* a, const char* b, int n=-1);
179  
180 < inline void swap(long &arg1, long &arg2)
181 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
182 <
183 < inline void swap(unsigned long &arg1, unsigned long &arg2)
184 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
180 > //basic swap template function
181 > template<class T> void Gswap(T& lhs, T& rhs) {
182 > //register T tmp=lhs;
183 > T tmp=lhs; //requires copy operator
184 > lhs=rhs;
185 > rhs=tmp;
186 > }
187  
188 < inline void swap(char &arg1, char &arg2)
189 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
188 > /// bitCount_32 - this function counts the number of set bits in a value.
189 > /// Ex. CountPopulation(0xF000F000) = 8
190 > /// Returns 0 if the word is zero.
191 > inline uint bitCount_32(uint32_t Value) {
192 > #if __GNUC__ >= 4
193 >    return __builtin_popcount(Value);
194 > #else
195 >    uint32_t v = Value - ((Value >> 1) & 0x55555555);
196 >    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
197 >    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
198 > #endif
199 >  }
200  
201 < inline void swap(unsigned char &arg1, unsigned char &arg2)
202 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
201 > /// bitCount_64 - this function counts the number of set bits in a value,
202 > /// (64 bit edition.)
203 > inline uint bitCount_64(uint64_t Value) {
204 > #if __GNUC__ >= 4
205 >    return __builtin_popcountll(Value);
206 > #else
207 >    uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
208 >    v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
209 >    v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
210 >    return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56);
211 > #endif
212 >  }
213  
214 < inline void swap(bool &arg1, bool &arg2)
215 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
214 > /// CountTrailingZeros_32 - this function performs the platform optimal form of
215 > /// counting the number of zeros from the least significant bit to the first one
216 > /// bit.  Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
217 > /// Returns 32 if the word is zero.
218 > inline unsigned bitCountTrailingZeros_32(uint32_t Value) {
219 > #if __GNUC__ >= 4
220 >  return Value ? __builtin_ctz(Value) : 32;
221 > #else
222 >  static const unsigned Mod37BitPosition[] = {
223 >    32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
224 >    4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
225 >    5, 20, 8, 19, 18
226 >  };
227 >  return Mod37BitPosition[(-Value & Value) % 37];
228 > #endif
229 > }
230  
231 + // CountTrailingZeros_64 - This function performs the platform optimal form
232 + /// of counting the number of zeros from the least significant bit to the first
233 + /// one bit (64 bit edition.)
234 + /// Returns 64 if the word is zero.
235 + inline unsigned bitCountTrailingZeros_64(uint64_t Value) {
236 + #if __GNUC__ >= 4
237 +  return Value ? __builtin_ctzll(Value) : 64;
238 + #else
239 +  static const unsigned Mod67Position[] = {
240 +    64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
241 +    4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
242 +    47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
243 +    29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
244 +    7, 48, 35, 6, 34, 33, 0
245 +  };
246 +  return Mod67Position[(-Value & Value) % 67];
247 + #endif
248 + }
249  
250   /**************** Memory management ***************************/
251  
# Line 179 | Line 254
254   bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
255   void GFree(pointer* ptr); // Free memory, resets ptr to NULL
256  
257 < /********************* debug functions *********************/
257 >
258 > int saprintf(char **retp, const char *fmt, ...);
259  
260   void GError(const char* format,...); // Error routine (aborts program)
261   void GMessage(const char* format,...);// Log message to stderr
262   // Assert failed routine:- usually not called directly but through GASSERT
263   void GAssert(const char* expression, const char* filename, unsigned int lineno);
264  
189
265   // ****************** string manipulation *************************
266   char *Gstrdup(const char* str);
267   //duplicate a string by allocating a copy for it and returning it
# Line 216 | Line 291
291                    void *part, unsigned int partlen);
292  
293   // test if a char is in a string:
294 < bool chrInStr(char c, char* str);
294 > bool chrInStr(char c, const char* str);
295  
296   char* rstrchr(char* str, char ch);
297   /* returns a pointer to the rightmost
298    occurence of ch in str - like rindex for platforms missing it*/
299  
300 < char* strchrs(char* s, const char* chrs);
300 > char* strchrs(const char* s, const char* chrs);
301   //strchr but with a set of chars instead of only one
302  
303 < char* rstrfind(char* str, const char *substr); /* like rindex() but for strings
304 < or like the right side version of strstr()
230 < */
231 < //reverse character string or
232 < char* reverseChars(char* str, int slen=0);
303 > char* rstrfind(const char* str, const char *substr);
304 > // like rindex() but for strings;  right side version of strstr()
305  
306 < char* rstrstr(char* rstart, char *lend, char* substr);
306 > char* reverseChars(char* str, int slen=0); //in place reversal of string
307 >
308 > char* rstrstr(const char* rstart, const char *lend, const char* substr);
309   /*the reversed, rightside equivalent of strstr: starts searching
310   from right end (rstart), going back to left end (lend) and returns
311   a pointer to the last (right) matching character in str */
312  
313 < char* strifind(char* str,  const char* substr);
313 > char* strifind(const char* str,  const char* substr);
314   // the case insensitive version of strstr -- finding a string within a strin
315  
316  
317   //Determines if a string begins with a given prefix
318   //(returns false when any of the params is NULL,
319   // but true when prefix is '' (empty string)!)
320 < bool startsWith(char* s, const char* prefix);
320 > bool startsWith(const char* s, const char* prefix);
321 >
322 > bool endsWith(const char* s, const char* suffix);
323 > //Note: returns true if suffix is empty string, but false if it's NULL
324 >
325  
326   // ELF hash function for strings
327   int strhash(const char* str);
# Line 263 | Line 341
341    //check for overlap with other segment
342    uint len() { return end-start+1; }
343    bool overlap(GSeg* d) {
344 <     return start<d->start ? (d->start<=end) : (start<=d->end);
344 >     //return start<d->start ? (d->start<=end) : (start<=d->end);
345 >     return (start<=d->end && end>=d->start);
346       }
347  
348    bool overlap(GSeg& d) {
349 <     return start<d.start ? (d.start<=end) : (start<=d.end);
349 >     //return start<d.start ? (d.start<=end) : (start<=d.end);
350 >     return (start<=d.end && end>=d.start);
351       }
352  
353    bool overlap(GSeg& d, int fuzz) {
354 <     return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
354 >     //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
355 >     return (start<=d.end+fuzz && end+fuzz>=d.start);
356       }
357  
358    bool overlap(uint s, uint e) {
359 <    if (s>e) { swap(s,e); }
360 <     return start<s ? (s<=end) : (start<=e);
359 >     if (s>e) { Gswap(s,e); }
360 >     //return start<s ? (s<=end) : (start<=e);
361 >     return (start<=e && end>=s);
362       }
363  
364    //return the length of overlap between two segments
# Line 291 | Line 373
373          }
374       }
375    int overlapLen(uint rstart, uint rend) {
376 <     if (rstart>rend) { swap(rstart,rend); }
376 >     if (rstart>rend) { Gswap(rstart,rend); }
377       if (start<rstart) {
378          if (rstart>end) return 0;
379          return (rend>end) ? end-rstart+1 : rend-rstart+1;
# Line 313 | Line 395
395    bool operator==(GSeg& d){
396        return (start==d.start && end==d.end);
397        }
316  bool operator>(GSeg& d){
317     return (start==d.start)?(end>d.end):(start>d.start);
318     }
398    bool operator<(GSeg& d){
399       return (start==d.start)?(end<d.end):(start<d.start);
400       }
# Line 328 | Line 407
407  
408   //GLineReader -- text line reading/buffering class
409   class GLineReader {
410 +   bool closeFile;
411     int len;
412     int allocated;
413     char* buf;
# Line 340 | Line 420
420     char* chars() { return buf; }
421     char* line() { return buf; }
422     int readcount() { return lcount; } //number of lines read
423 +   void setFile(FILE* stream) { file=stream; }
424     int length() { return len; }
425     int size() { return len; } //same as size();
426     bool isEof() {return isEOF; }
# Line 356 | Line 437
437                             // the given file position
438     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
439              // so the next call will in fact return the same line
440 +   GLineReader(const char* fname) {
441 +      FILE* f=fopen(fname, "rb");
442 +      if (f==NULL) GError("Error opening file '%s'!\n",fname);
443 +      closeFile=true;
444 +      init(f);
445 +      }
446     GLineReader(FILE* stream=NULL, off_t fpos=0) {
447 +     closeFile=false;
448 +     init(stream,fpos);
449 +     }
450 +   void init(FILE* stream, off_t fpos=0) {
451       len=0;
452       isEOF=false;
453       allocated=1024;
# Line 369 | Line 460
460       }
461     ~GLineReader() {
462       GFREE(buf);
463 +     if (closeFile) fclose(file);
464       }
465   };
466  
# Line 379 | Line 471
471    */
472   char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
473  
474 +
475 + //print int/values nicely formatted in 3-digit groups
476 + char* commaprint(uint64 n);
477 +
478   /*********************** File management functions *********************/
479  
480 < // removes the directory part from a full-path file name
481 < // this is a destructive operation for the given string!
480 > // removes the last part (file or directory name) of a full path
481 > // WARNING: this is a destructive operation for the given string!
482   void delFileName(char* filepath);
483  
484 < // returns a pointer to the file name part in a full-path filename
485 < char* getFileName(char* filepath);
484 > // returns a pointer to the last file or directory name in a full path
485 > const char* getFileName(const char* filepath);
486 > // returns a pointer to the file "extension" part in a filename
487 > const char* getFileExt(const char* filepath);
488 >
489  
490   int fileExists(const char* fname);
491   //returns 0 if file entry doesn't exist
# Line 394 | Line 493
493   //        2 if it's a regular file
494   //        3 otherwise (?)
495  
496 < off_t fileSize(const char* fpath);
496 > int64 fileSize(const char* fpath);
497 >
498 > //write a formatted fasta record, fasta formatted
499 > void writeFasta(FILE *fw, const char* seqid, const char* descr,
500 >        const char* seq, int linelen=60, int seqlen=0);
501  
502   //parses the next number found in a string at the current position
503   //until a non-digit (and not a '.', 'e','E','-','+') is encountered;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines