1 |
kmr |
1 |
/* |
2 |
|
|
This file contains the implementation of the BIOSEG PostgreSQL type. The |
3 |
|
|
implementation is a copy of the SEG type from the contrib directory in the |
4 |
|
|
PostgreSQL source, which has been changed to represent ranges of integers. |
5 |
|
|
The anticipated use of this type is to represent contiguous intervals of a |
6 |
|
|
biological sequence. |
7 |
|
|
|
8 |
|
|
SEG code written Gene Selkov, Jr. |
9 |
|
|
|
10 |
kmr |
19 |
Changes for BIOSEG written by Kim Rutherford <kmr@xenu.org.uk>. |
11 |
kmr |
1 |
*/ |
12 |
|
|
|
13 |
|
|
#include "postgres.h" |
14 |
|
|
|
15 |
|
|
#include <stdlib.h> |
16 |
|
|
#include <errno.h> |
17 |
|
|
#include <limits.h> |
18 |
|
|
|
19 |
|
|
#include "access/gist.h" |
20 |
|
|
#include "access/skey.h" |
21 |
|
|
#include "utils/builtins.h" |
22 |
|
|
|
23 |
|
|
#include "biosegdata.h" |
24 |
|
|
|
25 |
|
|
#ifdef INTERBASE_COORDS |
26 |
|
|
#define BIOSEG bioseg0 |
27 |
|
|
#define BIOSEG_PREFIX(x) bioseg0 ## x |
28 |
|
|
#else |
29 |
|
|
#define BIOSEG bioseg |
30 |
|
|
#define BIOSEG_PREFIX(x) bioseg ## x |
31 |
|
|
#endif |
32 |
|
|
|
33 |
|
|
PG_MODULE_MAGIC; |
34 |
|
|
|
35 |
|
|
extern int BIOSEG_PREFIX(_yyparse)(); |
36 |
|
|
extern void BIOSEG_PREFIX(_yyerror)(const char *message); |
37 |
|
|
extern void BIOSEG_PREFIX(_scanner_init)(const char *str); |
38 |
|
|
extern void BIOSEG_PREFIX(_scanner_finish)(void); |
39 |
|
|
|
40 |
|
|
/* |
41 |
|
|
** Input/Output routines |
42 |
|
|
*/ |
43 |
|
|
SEG *BIOSEG_PREFIX(_in)(char *str); |
44 |
|
|
char *BIOSEG_PREFIX(_out)(SEG * seg); |
45 |
|
|
SEG *BIOSEG_PREFIX(_create)(int32 lower, int32 upper); |
46 |
|
|
int32 BIOSEG_PREFIX(_lower)(SEG * seg); |
47 |
|
|
int32 BIOSEG_PREFIX(_upper)(SEG * seg); |
48 |
|
|
int32 BIOSEG_PREFIX(_center)(SEG * seg); |
49 |
|
|
|
50 |
|
|
/* |
51 |
|
|
** GiST support methods |
52 |
|
|
*/ |
53 |
|
|
bool BIOSEG_PREFIX(_gist_consistent)(GISTENTRY *entry, SEG * query, StrategyNumber strategy); |
54 |
|
|
GISTENTRY *BIOSEG_PREFIX(_gist_compress)(GISTENTRY *entry); |
55 |
|
|
GISTENTRY *BIOSEG_PREFIX(_gist_decompress)(GISTENTRY *entry); |
56 |
|
|
float *BIOSEG_PREFIX(_gist_penalty)(GISTENTRY *origentry, GISTENTRY *newentry, float *result); |
57 |
|
|
GIST_SPLITVEC *BIOSEG_PREFIX(_gist_picksplit)(GistEntryVector *entryvec, GIST_SPLITVEC *v); |
58 |
|
|
bool BIOSEG_PREFIX(_gist_leaf_consistent)(SEG * key, SEG * query, StrategyNumber strategy); |
59 |
|
|
bool BIOSEG_PREFIX(_gist_internal_consistent)(SEG * key, SEG * query, StrategyNumber strategy); |
60 |
|
|
SEG *BIOSEG_PREFIX(_gist_union)(GistEntryVector *entryvec, int *sizep); |
61 |
|
|
SEG *BIOSEG_PREFIX(_gist_binary_union)(SEG * r1, SEG * r2, int *sizep); |
62 |
|
|
bool *BIOSEG_PREFIX(_gist_same)(SEG * b1, SEG * b2, bool *result); |
63 |
|
|
|
64 |
|
|
|
65 |
|
|
/* |
66 |
|
|
** R-tree support functions |
67 |
|
|
*/ |
68 |
|
|
bool BIOSEG_PREFIX(_same)(SEG * a, SEG * b); |
69 |
|
|
bool BIOSEG_PREFIX(_contains_int)(SEG * a, int *b); |
70 |
|
|
bool BIOSEG_PREFIX(_contains)(SEG * a, SEG * b); |
71 |
|
|
bool BIOSEG_PREFIX(_contained)(SEG * a, SEG * b); |
72 |
|
|
bool BIOSEG_PREFIX(_overlap)(SEG * a, SEG * b); |
73 |
|
|
bool BIOSEG_PREFIX(_left)(SEG * a, SEG * b); |
74 |
|
|
bool BIOSEG_PREFIX(_over_left)(SEG * a, SEG * b); |
75 |
|
|
bool BIOSEG_PREFIX(_right)(SEG * a, SEG * b); |
76 |
|
|
bool BIOSEG_PREFIX(_over_right)(SEG * a, SEG * b); |
77 |
|
|
void BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *sz); |
78 |
|
|
int32 BIOSEG_PREFIX(_size)(SEG * a); |
79 |
|
|
|
80 |
|
|
static SEG *BIOSEG_PREFIX(_inter)(SEG * a, SEG * b); |
81 |
|
|
static SEG *BIOSEG_PREFIX(_union)(SEG * a, SEG * b); |
82 |
|
|
|
83 |
|
|
/* |
84 |
|
|
** Various operators |
85 |
|
|
*/ |
86 |
|
|
int32 BIOSEG_PREFIX(_cmp)(SEG * a, SEG * b); |
87 |
|
|
bool BIOSEG_PREFIX(_lt)(SEG * a, SEG * b); |
88 |
|
|
bool BIOSEG_PREFIX(_le)(SEG * a, SEG * b); |
89 |
|
|
bool BIOSEG_PREFIX(_gt)(SEG * a, SEG * b); |
90 |
|
|
bool BIOSEG_PREFIX(_ge)(SEG * a, SEG * b); |
91 |
|
|
bool BIOSEG_PREFIX(_different)(SEG * a, SEG * b); |
92 |
|
|
Datum BIOSEG_PREFIX(_joinsel)(PG_FUNCTION_ARGS); |
93 |
|
|
Datum BIOSEG_PREFIX(_sel)(PG_FUNCTION_ARGS); |
94 |
|
|
Datum BIOSEG_PREFIX(_contsel)(PG_FUNCTION_ARGS); |
95 |
|
|
Datum BIOSEG_PREFIX(_contjoinsel)(PG_FUNCTION_ARGS); |
96 |
|
|
|
97 |
|
|
|
98 |
|
|
static int get_dots(char **str); |
99 |
|
|
static int get_int(char **str, int32 *result); |
100 |
|
|
|
101 |
|
|
|
102 |
|
|
/***************************************************************************** |
103 |
|
|
* Input/Output functions |
104 |
|
|
*****************************************************************************/ |
105 |
|
|
|
106 |
|
|
int MAX_DIGITS = 10; |
107 |
|
|
|
108 |
|
|
SEG * |
109 |
|
|
BIOSEG_PREFIX(_in)(char *str) |
110 |
|
|
{ |
111 |
|
|
int32 lower; |
112 |
|
|
int32 upper; |
113 |
|
|
|
114 |
|
|
if (!get_int(&str, &lower)) { |
115 |
|
|
return NULL; |
116 |
|
|
} |
117 |
|
|
|
118 |
|
|
if (str[0] == 0) { |
119 |
|
|
upper = lower; |
120 |
|
|
} else { |
121 |
|
|
if (!get_dots(&str)) { |
122 |
|
|
ereport(ERROR, |
123 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
124 |
|
|
errmsg("bad bioseg representation"), |
125 |
|
|
errdetail("number followed by something other than ..: %s", str))); |
126 |
|
|
return NULL; |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
if (!get_int(&str, &upper)) { |
130 |
|
|
ereport(ERROR, |
131 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
132 |
|
|
errmsg("bad bioseg representation"), |
133 |
|
|
errdetail("number\"..\" followed by something other than a number: %s", str))); |
134 |
|
|
return NULL; |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
if (lower > upper) { |
138 |
|
|
ereport(ERROR, |
139 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
140 |
|
|
errmsg("bad bioseg representation"), |
141 |
|
|
errdetail("lower limit of range greater than upper"))); |
142 |
|
|
return NULL; |
143 |
|
|
} |
144 |
|
|
|
145 |
|
|
if (str[0] != 0) { |
146 |
|
|
ereport(ERROR, |
147 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
148 |
|
|
errmsg("bad bioseg representation"), |
149 |
|
|
errdetail("garbage at end of string: %s", str))); |
150 |
|
|
return NULL; |
151 |
|
|
} |
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
{ |
155 |
|
|
SEG *result = palloc(sizeof(SEG)); |
156 |
|
|
|
157 |
|
|
result->lower = lower; |
158 |
|
|
result->upper = upper; |
159 |
|
|
return result; |
160 |
|
|
} |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
#ifdef INTERBASE_COORDS |
164 |
kmr |
32 |
#define MIN_LOWER INT_MIN |
165 |
kmr |
1 |
#else |
166 |
|
|
#define MIN_LOWER 1l |
167 |
|
|
#endif |
168 |
|
|
|
169 |
|
|
int get_int(char **strp, int32 *result) { |
170 |
|
|
char *return_pos; |
171 |
|
|
long int long_result = -1; |
172 |
|
|
|
173 |
|
|
if (!(*strp)[0]) { |
174 |
|
|
ereport(ERROR, |
175 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
176 |
|
|
errmsg("bad bioseg representation"), |
177 |
|
|
errdetail("end of string found when expecting an integer"))); |
178 |
|
|
return 0; |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
errno = 0; |
182 |
|
|
|
183 |
|
|
long_result = strtol(*strp, &return_pos, 0); |
184 |
|
|
|
185 |
|
|
if (errno == ERANGE && (long_result == LONG_MAX || long_result == LONG_MIN)) { |
186 |
|
|
ereport(ERROR, |
187 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
188 |
|
|
errmsg("bad bioseg representation"), |
189 |
|
|
errdetail("integer at: %s is out of range", *strp))); |
190 |
|
|
return 0; |
191 |
|
|
} |
192 |
|
|
|
193 |
|
|
if (errno != 0 && long_result == 0) { |
194 |
|
|
ereport(ERROR, |
195 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
196 |
|
|
errmsg("bad bioseg representation"), |
197 |
|
|
errdetail("unable to read an integer: %s", strerror(errno)))); |
198 |
|
|
return 0; |
199 |
|
|
} |
200 |
|
|
|
201 |
|
|
if (*strp == return_pos) { |
202 |
|
|
ereport(ERROR, |
203 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
204 |
|
|
errmsg("bad bioseg representation"), |
205 |
|
|
errdetail("no integer found at: %s", *strp))); |
206 |
|
|
return 0; |
207 |
|
|
} |
208 |
|
|
|
209 |
|
|
if (long_result < MIN_LOWER) { |
210 |
kmr |
32 |
// when INTERBASE_COORDS is set, on machines where |
211 |
|
|
// sizeof(long int) == sizeof(int32) this block won't be executed because |
212 |
|
|
// strtol() will return ERANGE |
213 |
kmr |
1 |
ereport(ERROR, |
214 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
215 |
|
|
errmsg("bad bioseg representation"), |
216 |
kmr |
44 |
errdetail("integer %ld at: %s is out of range - must be >= %ld", |
217 |
kmr |
32 |
long_result, *strp, MIN_LOWER))); |
218 |
kmr |
1 |
return 0; |
219 |
|
|
} |
220 |
|
|
|
221 |
|
|
if (long_result > INT_MAX) { |
222 |
kmr |
32 |
// on machines where sizeof(long int) == sizeof(int32) this won't happen |
223 |
|
|
// because strtol() will return ERANGE |
224 |
kmr |
1 |
ereport(ERROR, |
225 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
226 |
|
|
errmsg("bad bioseg representation"), |
227 |
kmr |
32 |
errdetail("integer %ld at: %s is out of range - must be <= %d", |
228 |
|
|
long_result, *strp, INT_MAX))); |
229 |
kmr |
1 |
return 0; |
230 |
|
|
} |
231 |
|
|
|
232 |
|
|
*strp = return_pos; |
233 |
|
|
*result = long_result; |
234 |
|
|
|
235 |
|
|
return 1; |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
int |
239 |
|
|
get_dots(char **strp) |
240 |
|
|
{ |
241 |
|
|
if ((*strp)[0] == '.') { |
242 |
|
|
(*strp)++; |
243 |
|
|
if ((*strp)[0] == '.') { |
244 |
|
|
(*strp)++; |
245 |
|
|
if ((*strp)[0] == '.') { |
246 |
|
|
// allow for "10...20" |
247 |
|
|
(*strp)++; |
248 |
|
|
} |
249 |
|
|
return 1; |
250 |
|
|
} else { |
251 |
|
|
return 0; |
252 |
|
|
} |
253 |
|
|
} else { |
254 |
|
|
return 0; |
255 |
|
|
} |
256 |
|
|
} |
257 |
|
|
|
258 |
|
|
char * |
259 |
|
|
BIOSEG_PREFIX(_out)(SEG * bioseg) |
260 |
|
|
{ |
261 |
|
|
char *result; |
262 |
|
|
char *p; |
263 |
|
|
|
264 |
|
|
if (bioseg == NULL) |
265 |
|
|
return (NULL); |
266 |
|
|
|
267 |
|
|
p = result = (char *) palloc(40); |
268 |
|
|
|
269 |
|
|
if (bioseg->lower == bioseg->upper) |
270 |
|
|
{ |
271 |
|
|
/* |
272 |
|
|
* indicates that this interval was built by bioseg_in off a single point |
273 |
|
|
*/ |
274 |
|
|
sprintf(p, "%d", bioseg->lower); |
275 |
|
|
} |
276 |
|
|
else |
277 |
|
|
{ |
278 |
|
|
sprintf(p, "%d..%d", bioseg->lower, bioseg->upper); |
279 |
|
|
} |
280 |
|
|
|
281 |
|
|
return (result); |
282 |
|
|
} |
283 |
|
|
|
284 |
|
|
SEG * |
285 |
|
|
BIOSEG_PREFIX(_create)(int32 lower, int32 upper) |
286 |
|
|
{ |
287 |
|
|
SEG *result = palloc(sizeof(SEG)); |
288 |
|
|
|
289 |
|
|
if (lower > upper) { |
290 |
|
|
ereport(ERROR, |
291 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
292 |
|
|
errmsg("bad bioseg representation"), |
293 |
|
|
errdetail("lower limit of range greater than upper"))); |
294 |
|
|
return NULL; |
295 |
|
|
} |
296 |
|
|
|
297 |
kmr |
41 |
if (lower < MIN_LOWER) { |
298 |
|
|
ereport(ERROR, |
299 |
|
|
(errcode(ERRCODE_SYNTAX_ERROR), |
300 |
|
|
errmsg("bad bioseg representation"), |
301 |
|
|
errdetail("lower bound (%ld) must be >= %ld", |
302 |
kmr |
44 |
(long int) lower, MIN_LOWER))); |
303 |
kmr |
41 |
return 0; |
304 |
|
|
} |
305 |
|
|
|
306 |
kmr |
1 |
result->lower = lower; |
307 |
|
|
result->upper = upper; |
308 |
|
|
|
309 |
|
|
return result; |
310 |
|
|
} |
311 |
|
|
|
312 |
|
|
|
313 |
|
|
|
314 |
|
|
int32 |
315 |
|
|
BIOSEG_PREFIX(_center)(SEG * bioseg) |
316 |
|
|
{ |
317 |
|
|
return ((int32) bioseg->lower + (int32) bioseg->upper) / 2; |
318 |
|
|
} |
319 |
|
|
|
320 |
|
|
int32 |
321 |
|
|
BIOSEG_PREFIX(_lower)(SEG * bioseg) |
322 |
|
|
{ |
323 |
|
|
return bioseg->lower; |
324 |
|
|
} |
325 |
|
|
|
326 |
|
|
int32 |
327 |
|
|
BIOSEG_PREFIX(_upper)(SEG * bioseg) |
328 |
|
|
{ |
329 |
|
|
return bioseg->upper; |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
|
333 |
|
|
/***************************************************************************** |
334 |
|
|
* GiST functions |
335 |
|
|
*****************************************************************************/ |
336 |
|
|
|
337 |
|
|
/* |
338 |
|
|
** The GiST Consistent method for biosegments |
339 |
|
|
** Should return false if for all data items x below entry, |
340 |
|
|
** the predicate x op query == FALSE, where op is the oper |
341 |
|
|
** corresponding to strategy in the pg_amop table. |
342 |
|
|
*/ |
343 |
|
|
bool |
344 |
|
|
BIOSEG_PREFIX(_gist_consistent)(GISTENTRY *entry, |
345 |
|
|
SEG * query, |
346 |
|
|
StrategyNumber strategy) |
347 |
|
|
{ |
348 |
|
|
/* |
349 |
|
|
* if entry is not leaf, use bioseg_gist_internal_consistent, else use |
350 |
|
|
* bioseg_gist_leaf_consistent |
351 |
|
|
*/ |
352 |
|
|
if (GIST_LEAF(entry)) |
353 |
|
|
return (BIOSEG_PREFIX(_gist_leaf_consistent)((SEG *) DatumGetPointer(entry->key), query, strategy)); |
354 |
|
|
else |
355 |
|
|
return (BIOSEG_PREFIX(_gist_internal_consistent)((SEG *) DatumGetPointer(entry->key), query, strategy)); |
356 |
|
|
} |
357 |
|
|
|
358 |
|
|
/* |
359 |
|
|
** The GiST Union method for biosegments |
360 |
|
|
** returns the minimal bounding bioseg that encloses all the entries in entryvec |
361 |
|
|
*/ |
362 |
|
|
SEG * |
363 |
|
|
BIOSEG_PREFIX(_gist_union)(GistEntryVector *entryvec, int *sizep) |
364 |
|
|
{ |
365 |
|
|
int numranges; |
366 |
|
|
int i; |
367 |
|
|
SEG *out = (SEG *) NULL; |
368 |
|
|
SEG *tmp; |
369 |
|
|
|
370 |
|
|
#ifdef GIST_DEBUG |
371 |
|
|
fprintf(stderr, "union\n"); |
372 |
|
|
#endif |
373 |
|
|
|
374 |
|
|
numranges = entryvec->n; |
375 |
|
|
tmp = (SEG *) DatumGetPointer(entryvec->vector[0].key); |
376 |
|
|
*sizep = sizeof(SEG); |
377 |
|
|
|
378 |
|
|
for (i = 1; i < numranges; i++) |
379 |
|
|
{ |
380 |
|
|
out = BIOSEG_PREFIX(_gist_binary_union)(tmp, (SEG *) |
381 |
|
|
DatumGetPointer(entryvec->vector[i].key), |
382 |
|
|
sizep); |
383 |
|
|
tmp = out; |
384 |
|
|
} |
385 |
|
|
|
386 |
|
|
return (out); |
387 |
|
|
} |
388 |
|
|
|
389 |
|
|
/* |
390 |
|
|
** GiST Compress and Decompress methods for biosegments |
391 |
|
|
** do not do anything. |
392 |
|
|
*/ |
393 |
|
|
GISTENTRY * |
394 |
|
|
BIOSEG_PREFIX(_gist_compress)(GISTENTRY *entry) |
395 |
|
|
{ |
396 |
|
|
return (entry); |
397 |
|
|
} |
398 |
|
|
|
399 |
|
|
GISTENTRY * |
400 |
|
|
BIOSEG_PREFIX(_gist_decompress)(GISTENTRY *entry) |
401 |
|
|
{ |
402 |
|
|
return (entry); |
403 |
|
|
} |
404 |
|
|
|
405 |
|
|
/* |
406 |
|
|
** The GiST Penalty method for biosegments |
407 |
|
|
** As in the R-tree paper, we use change in area as our penalty metric |
408 |
|
|
*/ |
409 |
|
|
float * |
410 |
|
|
BIOSEG_PREFIX(_gist_penalty)(GISTENTRY *origentry, GISTENTRY *newentry, float *result) |
411 |
|
|
{ |
412 |
|
|
SEG *ud; |
413 |
|
|
int32 tmp1; |
414 |
|
|
int32 tmp2; |
415 |
|
|
|
416 |
|
|
ud = BIOSEG_PREFIX(_union)((SEG *) DatumGetPointer(origentry->key), |
417 |
|
|
(SEG *) DatumGetPointer(newentry->key)); |
418 |
|
|
BIOSEG_PREFIX(_rt_size)(ud, &tmp1); |
419 |
|
|
BIOSEG_PREFIX(_rt_size)((SEG *) DatumGetPointer(origentry->key), &tmp2); |
420 |
|
|
*result = tmp1 - tmp2; |
421 |
|
|
|
422 |
|
|
#ifdef GIST_DEBUG |
423 |
|
|
fprintf(stderr, "penalty\n"); |
424 |
|
|
fprintf(stderr, "\t%g\n", *result); |
425 |
|
|
#endif |
426 |
|
|
|
427 |
|
|
return (result); |
428 |
|
|
} |
429 |
|
|
|
430 |
|
|
|
431 |
|
|
/* |
432 |
|
|
** The GiST PickSplit method for segments |
433 |
|
|
** We use Guttman's poly time split algorithm |
434 |
|
|
*/ |
435 |
|
|
GIST_SPLITVEC * |
436 |
|
|
BIOSEG_PREFIX(_gist_picksplit)(GistEntryVector *entryvec, |
437 |
|
|
GIST_SPLITVEC *v) |
438 |
|
|
{ |
439 |
|
|
OffsetNumber i; |
440 |
|
|
OffsetNumber j; |
441 |
|
|
SEG *datum_alpha; |
442 |
|
|
SEG *datum_beta; |
443 |
|
|
SEG *datum_l; |
444 |
|
|
SEG *datum_r; |
445 |
|
|
SEG *union_d; |
446 |
|
|
SEG *union_dl; |
447 |
|
|
SEG *union_dr; |
448 |
|
|
SEG *inter_d; |
449 |
|
|
bool firsttime; |
450 |
|
|
int32 size_alpha; |
451 |
|
|
int32 size_beta; |
452 |
|
|
int32 size_union; |
453 |
|
|
int32 size_inter; |
454 |
|
|
int32 size_waste; |
455 |
|
|
int32 waste; |
456 |
|
|
int32 size_l; |
457 |
|
|
int32 size_r; |
458 |
|
|
int nbytes; |
459 |
|
|
OffsetNumber seed_1 = 1; |
460 |
|
|
OffsetNumber seed_2 = 2; |
461 |
|
|
OffsetNumber *left; |
462 |
|
|
OffsetNumber *right; |
463 |
|
|
OffsetNumber maxoff; |
464 |
|
|
|
465 |
|
|
#ifdef GIST_DEBUG |
466 |
|
|
fprintf(stderr, "picksplit\n"); |
467 |
|
|
#endif |
468 |
|
|
|
469 |
|
|
maxoff = entryvec->n - 2; |
470 |
|
|
nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
471 |
|
|
v->spl_left = (OffsetNumber *) palloc(nbytes); |
472 |
|
|
v->spl_right = (OffsetNumber *) palloc(nbytes); |
473 |
|
|
|
474 |
|
|
firsttime = true; |
475 |
|
|
waste = 0.0; |
476 |
|
|
|
477 |
|
|
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) |
478 |
|
|
{ |
479 |
|
|
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); |
480 |
|
|
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) |
481 |
|
|
{ |
482 |
|
|
datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key); |
483 |
|
|
|
484 |
|
|
/* compute the wasted space by unioning these guys */ |
485 |
|
|
/* size_waste = size_union - size_inter; */ |
486 |
|
|
union_d = BIOSEG_PREFIX(_union)(datum_alpha, datum_beta); |
487 |
|
|
BIOSEG_PREFIX(_rt_size)(union_d, &size_union); |
488 |
|
|
inter_d = BIOSEG_PREFIX(_inter)(datum_alpha, datum_beta); |
489 |
|
|
BIOSEG_PREFIX(_rt_size)(inter_d, &size_inter); |
490 |
|
|
size_waste = size_union - size_inter; |
491 |
|
|
|
492 |
|
|
/* |
493 |
|
|
* are these a more promising split that what we've already seen? |
494 |
|
|
*/ |
495 |
|
|
if (size_waste > waste || firsttime) |
496 |
|
|
{ |
497 |
|
|
waste = size_waste; |
498 |
|
|
seed_1 = i; |
499 |
|
|
seed_2 = j; |
500 |
|
|
firsttime = false; |
501 |
|
|
} |
502 |
|
|
} |
503 |
|
|
} |
504 |
|
|
|
505 |
|
|
left = v->spl_left; |
506 |
|
|
v->spl_nleft = 0; |
507 |
|
|
right = v->spl_right; |
508 |
|
|
v->spl_nright = 0; |
509 |
|
|
|
510 |
|
|
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key); |
511 |
|
|
datum_l = BIOSEG_PREFIX(_union)(datum_alpha, datum_alpha); |
512 |
|
|
BIOSEG_PREFIX(_rt_size)(datum_l, &size_l); |
513 |
|
|
datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key); |
514 |
|
|
datum_r = BIOSEG_PREFIX(_union)(datum_beta, datum_beta); |
515 |
|
|
BIOSEG_PREFIX(_rt_size)(datum_r, &size_r); |
516 |
|
|
|
517 |
|
|
/* |
518 |
|
|
* Now split up the regions between the two seeds. An important property |
519 |
|
|
* of this split algorithm is that the split vector v has the indices of |
520 |
|
|
* items to be split in order in its left and right vectors. We exploit |
521 |
|
|
* this property by doing a merge in the code that actually splits the |
522 |
|
|
* page. |
523 |
|
|
* |
524 |
|
|
* For efficiency, we also place the new index tuple in this loop. This is |
525 |
|
|
* handled at the very end, when we have placed all the existing tuples |
526 |
|
|
* and i == maxoff + 1. |
527 |
|
|
*/ |
528 |
|
|
|
529 |
|
|
maxoff = OffsetNumberNext(maxoff); |
530 |
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) |
531 |
|
|
{ |
532 |
|
|
/* |
533 |
|
|
* If we've already decided where to place this item, just put it on |
534 |
|
|
* the right list. Otherwise, we need to figure out which page needs |
535 |
|
|
* the least enlargement in order to store the item. |
536 |
|
|
*/ |
537 |
|
|
|
538 |
|
|
if (i == seed_1) |
539 |
|
|
{ |
540 |
|
|
*left++ = i; |
541 |
|
|
v->spl_nleft++; |
542 |
|
|
continue; |
543 |
|
|
} |
544 |
|
|
else if (i == seed_2) |
545 |
|
|
{ |
546 |
|
|
*right++ = i; |
547 |
|
|
v->spl_nright++; |
548 |
|
|
continue; |
549 |
|
|
} |
550 |
|
|
|
551 |
|
|
/* okay, which page needs least enlargement? */ |
552 |
|
|
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); |
553 |
|
|
union_dl = BIOSEG_PREFIX(_union)(datum_l, datum_alpha); |
554 |
|
|
union_dr = BIOSEG_PREFIX(_union)(datum_r, datum_alpha); |
555 |
|
|
BIOSEG_PREFIX(_rt_size)(union_dl, &size_alpha); |
556 |
|
|
BIOSEG_PREFIX(_rt_size)(union_dr, &size_beta); |
557 |
|
|
|
558 |
|
|
/* pick which page to add it to */ |
559 |
|
|
if (size_alpha - size_l < size_beta - size_r) |
560 |
|
|
{ |
561 |
|
|
datum_l = union_dl; |
562 |
|
|
size_l = size_alpha; |
563 |
|
|
*left++ = i; |
564 |
|
|
v->spl_nleft++; |
565 |
|
|
} |
566 |
|
|
else |
567 |
|
|
{ |
568 |
|
|
datum_r = union_dr; |
569 |
|
|
size_r = size_alpha; |
570 |
|
|
*right++ = i; |
571 |
|
|
v->spl_nright++; |
572 |
|
|
} |
573 |
|
|
} |
574 |
|
|
*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ |
575 |
|
|
|
576 |
|
|
v->spl_ldatum = PointerGetDatum(datum_l); |
577 |
|
|
v->spl_rdatum = PointerGetDatum(datum_r); |
578 |
|
|
|
579 |
|
|
return v; |
580 |
|
|
} |
581 |
|
|
|
582 |
|
|
/* |
583 |
|
|
** Equality methods |
584 |
|
|
*/ |
585 |
|
|
bool * |
586 |
|
|
BIOSEG_PREFIX(_gist_same)(SEG * b1, SEG * b2, bool *result) |
587 |
|
|
{ |
588 |
|
|
if (BIOSEG_PREFIX(_same)(b1, b2)) |
589 |
|
|
*result = TRUE; |
590 |
|
|
else |
591 |
|
|
*result = FALSE; |
592 |
|
|
|
593 |
|
|
#ifdef GIST_DEBUG |
594 |
|
|
fprintf(stderr, "same: %s\n", (*result ? "TRUE" : "FALSE")); |
595 |
|
|
#endif |
596 |
|
|
|
597 |
|
|
return (result); |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
/* |
601 |
|
|
** SUPPORT ROUTINES |
602 |
|
|
*/ |
603 |
|
|
bool |
604 |
|
|
BIOSEG_PREFIX(_gist_leaf_consistent)(SEG * key, |
605 |
|
|
SEG * query, |
606 |
|
|
StrategyNumber strategy) |
607 |
|
|
{ |
608 |
|
|
bool retval; |
609 |
|
|
|
610 |
|
|
#ifdef GIST_QUERY_DEBUG |
611 |
|
|
fprintf(stderr, "leaf_consistent, %d\n", strategy); |
612 |
|
|
#endif |
613 |
|
|
|
614 |
|
|
switch (strategy) |
615 |
|
|
{ |
616 |
|
|
case RTLeftStrategyNumber: |
617 |
|
|
retval = (bool) BIOSEG_PREFIX(_left)(key, query); |
618 |
|
|
break; |
619 |
|
|
case RTOverLeftStrategyNumber: |
620 |
|
|
retval = (bool) BIOSEG_PREFIX(_over_left)(key, query); |
621 |
|
|
break; |
622 |
|
|
case RTOverlapStrategyNumber: |
623 |
|
|
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
624 |
|
|
break; |
625 |
|
|
case RTOverRightStrategyNumber: |
626 |
|
|
retval = (bool) BIOSEG_PREFIX(_over_right)(key, query); |
627 |
|
|
break; |
628 |
|
|
case RTRightStrategyNumber: |
629 |
|
|
retval = (bool) BIOSEG_PREFIX(_right)(key, query); |
630 |
|
|
break; |
631 |
|
|
case RTSameStrategyNumber: |
632 |
|
|
retval = (bool) BIOSEG_PREFIX(_same)(key, query); |
633 |
|
|
break; |
634 |
|
|
case RTContainsStrategyNumber: |
635 |
|
|
case RTOldContainsStrategyNumber: |
636 |
|
|
retval = (bool) BIOSEG_PREFIX(_contains)(key, query); |
637 |
|
|
break; |
638 |
|
|
case RTContainedByStrategyNumber: |
639 |
|
|
case RTOldContainedByStrategyNumber: |
640 |
|
|
retval = (bool) BIOSEG_PREFIX(_contained)(key, query); |
641 |
|
|
break; |
642 |
|
|
default: |
643 |
|
|
retval = FALSE; |
644 |
|
|
} |
645 |
|
|
return (retval); |
646 |
|
|
} |
647 |
|
|
|
648 |
|
|
bool |
649 |
|
|
BIOSEG_PREFIX(_gist_internal_consistent)(SEG * key, |
650 |
|
|
SEG * query, |
651 |
|
|
StrategyNumber strategy) |
652 |
|
|
{ |
653 |
|
|
bool retval; |
654 |
|
|
|
655 |
|
|
#ifdef GIST_QUERY_DEBUG |
656 |
|
|
fprintf(stderr, "internal_consistent, %d\n", strategy); |
657 |
|
|
#endif |
658 |
|
|
|
659 |
|
|
switch (strategy) |
660 |
|
|
{ |
661 |
|
|
case RTLeftStrategyNumber: |
662 |
|
|
retval = (bool) !BIOSEG_PREFIX(_over_right)(key, query); |
663 |
|
|
break; |
664 |
|
|
case RTOverLeftStrategyNumber: |
665 |
|
|
retval = (bool) !BIOSEG_PREFIX(_right)(key, query); |
666 |
|
|
break; |
667 |
|
|
case RTOverlapStrategyNumber: |
668 |
|
|
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
669 |
|
|
break; |
670 |
|
|
case RTOverRightStrategyNumber: |
671 |
|
|
retval = (bool) !BIOSEG_PREFIX(_left)(key, query); |
672 |
|
|
break; |
673 |
|
|
case RTRightStrategyNumber: |
674 |
|
|
retval = (bool) !BIOSEG_PREFIX(_over_left)(key, query); |
675 |
|
|
break; |
676 |
|
|
case RTSameStrategyNumber: |
677 |
|
|
case RTContainsStrategyNumber: |
678 |
|
|
case RTOldContainsStrategyNumber: |
679 |
|
|
retval = (bool) BIOSEG_PREFIX(_contains)(key, query); |
680 |
|
|
break; |
681 |
|
|
case RTContainedByStrategyNumber: |
682 |
|
|
case RTOldContainedByStrategyNumber: |
683 |
|
|
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
684 |
|
|
break; |
685 |
|
|
default: |
686 |
|
|
retval = FALSE; |
687 |
|
|
} |
688 |
|
|
return (retval); |
689 |
|
|
} |
690 |
|
|
|
691 |
|
|
SEG * |
692 |
|
|
BIOSEG_PREFIX(_gist_binary_union)(SEG * r1, SEG * r2, int *sizep) |
693 |
|
|
{ |
694 |
|
|
SEG *retval; |
695 |
|
|
|
696 |
|
|
retval = BIOSEG_PREFIX(_union)(r1, r2); |
697 |
|
|
*sizep = sizeof(SEG); |
698 |
|
|
|
699 |
|
|
return (retval); |
700 |
|
|
} |
701 |
|
|
|
702 |
|
|
|
703 |
|
|
bool |
704 |
|
|
BIOSEG_PREFIX(_contains)(SEG * a, SEG * b) |
705 |
|
|
{ |
706 |
|
|
#ifdef INTERBASE_COORDS |
707 |
|
|
return ((a->lower <= b->lower) && (a->upper >= b->upper) && |
708 |
|
|
a->lower != b->upper && a->upper != b->lower); |
709 |
|
|
#else |
710 |
|
|
return ((a->lower <= b->lower) && (a->upper >= b->upper)); |
711 |
|
|
#endif |
712 |
|
|
} |
713 |
|
|
|
714 |
|
|
bool |
715 |
|
|
BIOSEG_PREFIX(_contained)(SEG * a, SEG * b) |
716 |
|
|
{ |
717 |
|
|
return (BIOSEG_PREFIX(_contains)(b, a)); |
718 |
|
|
} |
719 |
|
|
|
720 |
|
|
/***************************************************************************** |
721 |
|
|
* Operator class for R-tree indexing |
722 |
|
|
*****************************************************************************/ |
723 |
|
|
|
724 |
|
|
bool |
725 |
|
|
BIOSEG_PREFIX(_same)(SEG * a, SEG * b) |
726 |
|
|
{ |
727 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) == 0; |
728 |
|
|
} |
729 |
|
|
|
730 |
|
|
/* bioseg_overlap -- does a overlap b? |
731 |
|
|
*/ |
732 |
|
|
bool |
733 |
|
|
BIOSEG_PREFIX(_overlap)(SEG * a, SEG * b) |
734 |
|
|
{ |
735 |
|
|
#ifdef INTERBASE_COORDS |
736 |
|
|
return ( |
737 |
|
|
((a->upper >= b->upper) && (a->lower < b->upper) && |
738 |
|
|
b->lower != a->upper) |
739 |
|
|
|| |
740 |
|
|
((b->upper >= a->upper) && (b->lower < a->upper) && |
741 |
|
|
a->lower != b->upper) |
742 |
|
|
); |
743 |
|
|
#else |
744 |
|
|
return |
745 |
|
|
((a->upper >= b->upper) && (a->lower <= b->upper)) |
746 |
|
|
|| |
747 |
|
|
((b->upper >= a->upper) && (b->lower <= a->upper)); |
748 |
|
|
#endif |
749 |
|
|
} |
750 |
|
|
|
751 |
|
|
/* bioseg_overleft -- is the right edge of (a) located at or left of the right edge of (b)? |
752 |
|
|
*/ |
753 |
|
|
bool |
754 |
|
|
BIOSEG_PREFIX(_over_left)(SEG * a, SEG * b) |
755 |
|
|
{ |
756 |
|
|
return (a->upper <= b->upper); |
757 |
|
|
} |
758 |
|
|
|
759 |
|
|
/* bioseg_left -- is (a) entirely on the left of (b)? |
760 |
|
|
*/ |
761 |
|
|
bool |
762 |
|
|
BIOSEG_PREFIX(_left)(SEG * a, SEG * b) |
763 |
|
|
{ |
764 |
|
|
return (a->upper < b->lower); |
765 |
|
|
} |
766 |
|
|
|
767 |
|
|
/* bioseg_right -- is (a) entirely on the right of (b)? |
768 |
|
|
*/ |
769 |
|
|
bool |
770 |
|
|
BIOSEG_PREFIX(_right)(SEG * a, SEG * b) |
771 |
|
|
{ |
772 |
|
|
return (a->lower > b->upper); |
773 |
|
|
} |
774 |
|
|
|
775 |
|
|
/* bioseg_overright -- is the left edge of (a) located at or right of the left edge of (b)? |
776 |
|
|
*/ |
777 |
|
|
bool |
778 |
|
|
BIOSEG_PREFIX(_over_right)(SEG * a, SEG * b) |
779 |
|
|
{ |
780 |
|
|
return (a->lower >= b->lower); |
781 |
|
|
} |
782 |
|
|
|
783 |
|
|
|
784 |
|
|
static SEG * |
785 |
|
|
BIOSEG_PREFIX(_union)(SEG * a, SEG * b) |
786 |
|
|
{ |
787 |
|
|
SEG *n; |
788 |
|
|
|
789 |
|
|
n = (SEG *) palloc(sizeof(*n)); |
790 |
|
|
|
791 |
|
|
/* take max of upper endpoints */ |
792 |
|
|
if (a->upper > b->upper) |
793 |
|
|
{ |
794 |
|
|
n->upper = a->upper; |
795 |
|
|
} |
796 |
|
|
else |
797 |
|
|
{ |
798 |
|
|
n->upper = b->upper; |
799 |
|
|
} |
800 |
|
|
|
801 |
|
|
/* take min of lower endpoints */ |
802 |
|
|
if (a->lower < b->lower) |
803 |
|
|
{ |
804 |
|
|
n->lower = a->lower; |
805 |
|
|
} |
806 |
|
|
else |
807 |
|
|
{ |
808 |
|
|
n->lower = b->lower; |
809 |
|
|
} |
810 |
|
|
|
811 |
|
|
return (n); |
812 |
|
|
} |
813 |
|
|
|
814 |
|
|
|
815 |
|
|
static SEG * |
816 |
|
|
BIOSEG_PREFIX(_inter)(SEG * a, SEG * b) |
817 |
|
|
{ |
818 |
|
|
SEG *n; |
819 |
|
|
|
820 |
|
|
n = (SEG *) palloc(sizeof(*n)); |
821 |
|
|
|
822 |
|
|
/* take min of upper endpoints */ |
823 |
|
|
if (a->upper < b->upper) |
824 |
|
|
{ |
825 |
|
|
n->upper = a->upper; |
826 |
|
|
} |
827 |
|
|
else |
828 |
|
|
{ |
829 |
|
|
n->upper = b->upper; |
830 |
|
|
} |
831 |
|
|
|
832 |
|
|
/* take max of lower endpoints */ |
833 |
|
|
if (a->lower > b->lower) |
834 |
|
|
{ |
835 |
|
|
n->lower = a->lower; |
836 |
|
|
} |
837 |
|
|
else |
838 |
|
|
{ |
839 |
|
|
n->lower = b->lower; |
840 |
|
|
} |
841 |
|
|
|
842 |
|
|
return (n); |
843 |
|
|
} |
844 |
|
|
|
845 |
|
|
void |
846 |
|
|
BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *size) |
847 |
|
|
{ |
848 |
|
|
if (a == (SEG *) NULL) |
849 |
|
|
*size = 0; |
850 |
|
|
else |
851 |
|
|
#ifdef INTERBASE_COORDS |
852 |
|
|
*size = (int32) (a->upper - a->lower); |
853 |
|
|
#else |
854 |
|
|
*size = (int32) (a->upper - a->lower + 1); |
855 |
|
|
#endif |
856 |
|
|
} |
857 |
|
|
|
858 |
|
|
int32 |
859 |
|
|
BIOSEG_PREFIX(_size)(SEG * a) |
860 |
|
|
{ |
861 |
|
|
#ifdef INTERBASE_COORDS |
862 |
|
|
return a->upper - a->lower; |
863 |
|
|
#else |
864 |
|
|
return a->upper - a->lower + 1; |
865 |
|
|
#endif |
866 |
|
|
} |
867 |
|
|
|
868 |
|
|
|
869 |
|
|
/***************************************************************************** |
870 |
|
|
* Miscellaneous operators |
871 |
|
|
*****************************************************************************/ |
872 |
|
|
int32 |
873 |
|
|
BIOSEG_PREFIX(_cmp)(SEG * a, SEG * b) |
874 |
|
|
{ |
875 |
|
|
if (a->lower < b->lower) |
876 |
|
|
return -1; |
877 |
|
|
if (a->lower > b->lower) |
878 |
|
|
return 1; |
879 |
|
|
|
880 |
|
|
if (a->upper < b->upper) |
881 |
|
|
return -1; |
882 |
|
|
if (a->upper > b->upper) |
883 |
|
|
return 1; |
884 |
|
|
|
885 |
|
|
return 0; |
886 |
|
|
} |
887 |
|
|
|
888 |
|
|
bool |
889 |
|
|
BIOSEG_PREFIX(_lt)(SEG * a, SEG * b) |
890 |
|
|
{ |
891 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) < 0; |
892 |
|
|
} |
893 |
|
|
|
894 |
|
|
bool |
895 |
|
|
BIOSEG_PREFIX(_le)(SEG * a, SEG * b) |
896 |
|
|
{ |
897 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) <= 0; |
898 |
|
|
} |
899 |
|
|
|
900 |
|
|
bool |
901 |
|
|
BIOSEG_PREFIX(_gt)(SEG * a, SEG * b) |
902 |
|
|
{ |
903 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) > 0; |
904 |
|
|
} |
905 |
|
|
|
906 |
|
|
bool |
907 |
|
|
BIOSEG_PREFIX(_ge)(SEG * a, SEG * b) |
908 |
|
|
{ |
909 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) >= 0; |
910 |
|
|
} |
911 |
|
|
|
912 |
|
|
bool |
913 |
|
|
BIOSEG_PREFIX(_different)(SEG * a, SEG * b) |
914 |
|
|
{ |
915 |
|
|
return BIOSEG_PREFIX(_cmp)(a, b) != 0; |
916 |
|
|
} |
917 |
|
|
|
918 |
|
|
bool |
919 |
|
|
BIOSEG_PREFIX(_contains_int)(SEG * a, int *b) |
920 |
|
|
{ |
921 |
|
|
#ifdef INTERBASE_COORDS |
922 |
|
|
return ((a->lower < *b) && (a->upper > *b)); |
923 |
|
|
#else |
924 |
|
|
return ((a->lower <= *b) && (a->upper >= *b)); |
925 |
|
|
#endif |
926 |
|
|
} |
927 |
|
|
|
928 |
|
|
/* |
929 |
kmr |
40 |
** These values lower than those in geo_selfuncs.c to encourage the planner to |
930 |
|
|
** choose to use the GIST index - found by trail and error. |
931 |
kmr |
1 |
*/ |
932 |
|
|
Datum |
933 |
|
|
BIOSEG_PREFIX(_sel)(PG_FUNCTION_ARGS) |
934 |
|
|
{ |
935 |
|
|
PG_RETURN_FLOAT8(2.0e-4); |
936 |
|
|
} |
937 |
|
|
|
938 |
|
|
Datum |
939 |
|
|
BIOSEG_PREFIX(_joinsel)(PG_FUNCTION_ARGS) |
940 |
|
|
{ |
941 |
|
|
PG_RETURN_FLOAT8(1.0e-5); |
942 |
|
|
} |
943 |
|
|
|
944 |
|
|
Datum |
945 |
|
|
BIOSEG_PREFIX(_contsel)(PG_FUNCTION_ARGS) |
946 |
|
|
{ |
947 |
|
|
PG_RETURN_FLOAT8(1.0e-4); |
948 |
|
|
} |
949 |
|
|
|
950 |
|
|
Datum |
951 |
|
|
BIOSEG_PREFIX(_contjoinsel)(PG_FUNCTION_ARGS) |
952 |
|
|
{ |
953 |
|
|
PG_RETURN_FLOAT8(5e-6); |
954 |
|
|
} |