1 |
|
|
/* $OpenBSD: bt_seq.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 1990, 1993, 1994 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* This code is derived from software contributed to Berkeley by |
8 |
|
|
* Mike Olson. |
9 |
|
|
* |
10 |
|
|
* Redistribution and use in source and binary forms, with or without |
11 |
|
|
* modification, are permitted provided that the following conditions |
12 |
|
|
* are met: |
13 |
|
|
* 1. Redistributions of source code must retain the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer. |
15 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
16 |
|
|
* notice, this list of conditions and the following disclaimer in the |
17 |
|
|
* documentation and/or other materials provided with the distribution. |
18 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
19 |
|
|
* may be used to endorse or promote products derived from this software |
20 |
|
|
* without specific prior written permission. |
21 |
|
|
* |
22 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 |
|
|
* SUCH DAMAGE. |
33 |
|
|
*/ |
34 |
|
|
|
35 |
|
|
#include <sys/types.h> |
36 |
|
|
|
37 |
|
|
#include <errno.h> |
38 |
|
|
#include <stddef.h> |
39 |
|
|
#include <stdio.h> |
40 |
|
|
#include <stdlib.h> |
41 |
|
|
|
42 |
|
|
#include <db.h> |
43 |
|
|
#include "btree.h" |
44 |
|
|
|
45 |
|
|
static int __bt_first(BTREE *, const DBT *, EPG *, int *); |
46 |
|
|
static int __bt_seqadv(BTREE *, EPG *, int); |
47 |
|
|
static int __bt_seqset(BTREE *, EPG *, DBT *, int); |
48 |
|
|
|
49 |
|
|
/* |
50 |
|
|
* Sequential scan support. |
51 |
|
|
* |
52 |
|
|
* The tree can be scanned sequentially, starting from either end of the |
53 |
|
|
* tree or from any specific key. A scan request before any scanning is |
54 |
|
|
* done is initialized as starting from the least node. |
55 |
|
|
*/ |
56 |
|
|
|
57 |
|
|
/* |
58 |
|
|
* __bt_seq -- |
59 |
|
|
* Btree sequential scan interface. |
60 |
|
|
* |
61 |
|
|
* Parameters: |
62 |
|
|
* dbp: pointer to access method |
63 |
|
|
* key: key for positioning and return value |
64 |
|
|
* data: data return value |
65 |
|
|
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. |
66 |
|
|
* |
67 |
|
|
* Returns: |
68 |
|
|
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
69 |
|
|
*/ |
70 |
|
|
int |
71 |
|
|
__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) |
72 |
|
|
{ |
73 |
|
|
BTREE *t; |
74 |
|
|
EPG e; |
75 |
|
|
int status; |
76 |
|
|
|
77 |
|
|
t = dbp->internal; |
78 |
|
|
|
79 |
|
|
/* Toss any page pinned across calls. */ |
80 |
|
|
if (t->bt_pinned != NULL) { |
81 |
|
|
mpool_put(t->bt_mp, t->bt_pinned, 0); |
82 |
|
|
t->bt_pinned = NULL; |
83 |
|
|
} |
84 |
|
|
|
85 |
|
|
/* |
86 |
|
|
* If scan unitialized as yet, or starting at a specific record, set |
87 |
|
|
* the scan to a specific key. Both __bt_seqset and __bt_seqadv pin |
88 |
|
|
* the page the cursor references if they're successful. |
89 |
|
|
*/ |
90 |
|
|
switch (flags) { |
91 |
|
|
case R_NEXT: |
92 |
|
|
case R_PREV: |
93 |
|
|
if (F_ISSET(&t->bt_cursor, CURS_INIT)) { |
94 |
|
|
status = __bt_seqadv(t, &e, flags); |
95 |
|
|
break; |
96 |
|
|
} |
97 |
|
|
/* FALLTHROUGH */ |
98 |
|
|
case R_FIRST: |
99 |
|
|
case R_LAST: |
100 |
|
|
case R_CURSOR: |
101 |
|
|
status = __bt_seqset(t, &e, key, flags); |
102 |
|
|
break; |
103 |
|
|
default: |
104 |
|
|
errno = EINVAL; |
105 |
|
|
return (RET_ERROR); |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
if (status == RET_SUCCESS) { |
109 |
|
|
__bt_setcur(t, e.page->pgno, e.index); |
110 |
|
|
|
111 |
|
|
status = |
112 |
|
|
__bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); |
113 |
|
|
|
114 |
|
|
/* |
115 |
|
|
* If the user is doing concurrent access, we copied the |
116 |
|
|
* key/data, toss the page. |
117 |
|
|
*/ |
118 |
|
|
if (F_ISSET(t, B_DB_LOCK)) |
119 |
|
|
mpool_put(t->bt_mp, e.page, 0); |
120 |
|
|
else |
121 |
|
|
t->bt_pinned = e.page; |
122 |
|
|
} |
123 |
|
|
return (status); |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
/* |
127 |
|
|
* __bt_seqset -- |
128 |
|
|
* Set the sequential scan to a specific key. |
129 |
|
|
* |
130 |
|
|
* Parameters: |
131 |
|
|
* t: tree |
132 |
|
|
* ep: storage for returned key |
133 |
|
|
* key: key for initial scan position |
134 |
|
|
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV |
135 |
|
|
* |
136 |
|
|
* Side effects: |
137 |
|
|
* Pins the page the cursor references. |
138 |
|
|
* |
139 |
|
|
* Returns: |
140 |
|
|
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
141 |
|
|
*/ |
142 |
|
|
static int |
143 |
|
|
__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) |
144 |
|
|
{ |
145 |
|
|
PAGE *h; |
146 |
|
|
pgno_t pg; |
147 |
|
|
int exact; |
148 |
|
|
|
149 |
|
|
/* |
150 |
|
|
* Find the first, last or specific key in the tree and point the |
151 |
|
|
* cursor at it. The cursor may not be moved until a new key has |
152 |
|
|
* been found. |
153 |
|
|
*/ |
154 |
|
|
switch (flags) { |
155 |
|
|
case R_CURSOR: /* Keyed scan. */ |
156 |
|
|
/* |
157 |
|
|
* Find the first instance of the key or the smallest key |
158 |
|
|
* which is greater than or equal to the specified key. |
159 |
|
|
*/ |
160 |
|
|
if (key->data == NULL || key->size == 0) { |
161 |
|
|
errno = EINVAL; |
162 |
|
|
return (RET_ERROR); |
163 |
|
|
} |
164 |
|
|
return (__bt_first(t, key, ep, &exact)); |
165 |
|
|
case R_FIRST: /* First record. */ |
166 |
|
|
case R_NEXT: |
167 |
|
|
/* Walk down the left-hand side of the tree. */ |
168 |
|
|
for (pg = P_ROOT;;) { |
169 |
|
|
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
170 |
|
|
return (RET_ERROR); |
171 |
|
|
|
172 |
|
|
/* Check for an empty tree. */ |
173 |
|
|
if (NEXTINDEX(h) == 0) { |
174 |
|
|
mpool_put(t->bt_mp, h, 0); |
175 |
|
|
return (RET_SPECIAL); |
176 |
|
|
} |
177 |
|
|
|
178 |
|
|
if (h->flags & (P_BLEAF | P_RLEAF)) |
179 |
|
|
break; |
180 |
|
|
pg = GETBINTERNAL(h, 0)->pgno; |
181 |
|
|
mpool_put(t->bt_mp, h, 0); |
182 |
|
|
} |
183 |
|
|
ep->page = h; |
184 |
|
|
ep->index = 0; |
185 |
|
|
break; |
186 |
|
|
case R_LAST: /* Last record. */ |
187 |
|
|
case R_PREV: |
188 |
|
|
/* Walk down the right-hand side of the tree. */ |
189 |
|
|
for (pg = P_ROOT;;) { |
190 |
|
|
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
191 |
|
|
return (RET_ERROR); |
192 |
|
|
|
193 |
|
|
/* Check for an empty tree. */ |
194 |
|
|
if (NEXTINDEX(h) == 0) { |
195 |
|
|
mpool_put(t->bt_mp, h, 0); |
196 |
|
|
return (RET_SPECIAL); |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
if (h->flags & (P_BLEAF | P_RLEAF)) |
200 |
|
|
break; |
201 |
|
|
pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; |
202 |
|
|
mpool_put(t->bt_mp, h, 0); |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
ep->page = h; |
206 |
|
|
ep->index = NEXTINDEX(h) - 1; |
207 |
|
|
break; |
208 |
|
|
} |
209 |
|
|
return (RET_SUCCESS); |
210 |
|
|
} |
211 |
|
|
|
212 |
|
|
/* |
213 |
|
|
* __bt_seqadvance -- |
214 |
|
|
* Advance the sequential scan. |
215 |
|
|
* |
216 |
|
|
* Parameters: |
217 |
|
|
* t: tree |
218 |
|
|
* flags: R_NEXT, R_PREV |
219 |
|
|
* |
220 |
|
|
* Side effects: |
221 |
|
|
* Pins the page the new key/data record is on. |
222 |
|
|
* |
223 |
|
|
* Returns: |
224 |
|
|
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
225 |
|
|
*/ |
226 |
|
|
static int |
227 |
|
|
__bt_seqadv(BTREE *t, EPG *ep, int flags) |
228 |
|
|
{ |
229 |
|
|
CURSOR *c; |
230 |
|
|
PAGE *h; |
231 |
|
|
indx_t idx; |
232 |
|
|
pgno_t pg; |
233 |
|
|
int exact; |
234 |
|
|
|
235 |
|
|
/* |
236 |
|
|
* There are a couple of states that we can be in. The cursor has |
237 |
|
|
* been initialized by the time we get here, but that's all we know. |
238 |
|
|
*/ |
239 |
|
|
c = &t->bt_cursor; |
240 |
|
|
|
241 |
|
|
/* |
242 |
|
|
* The cursor was deleted where there weren't any duplicate records, |
243 |
|
|
* so the key was saved. Find out where that key would go in the |
244 |
|
|
* current tree. It doesn't matter if the returned key is an exact |
245 |
|
|
* match or not -- if it's an exact match, the record was added after |
246 |
|
|
* the delete so we can just return it. If not, as long as there's |
247 |
|
|
* a record there, return it. |
248 |
|
|
*/ |
249 |
|
|
if (F_ISSET(c, CURS_ACQUIRE)) |
250 |
|
|
return (__bt_first(t, &c->key, ep, &exact)); |
251 |
|
|
|
252 |
|
|
/* Get the page referenced by the cursor. */ |
253 |
|
|
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) |
254 |
|
|
return (RET_ERROR); |
255 |
|
|
|
256 |
|
|
/* |
257 |
|
|
* Find the next/previous record in the tree and point the cursor at |
258 |
|
|
* it. The cursor may not be moved until a new key has been found. |
259 |
|
|
*/ |
260 |
|
|
switch (flags) { |
261 |
|
|
case R_NEXT: /* Next record. */ |
262 |
|
|
/* |
263 |
|
|
* The cursor was deleted in duplicate records, and moved |
264 |
|
|
* forward to a record that has yet to be returned. Clear |
265 |
|
|
* that flag, and return the record. |
266 |
|
|
*/ |
267 |
|
|
if (F_ISSET(c, CURS_AFTER)) |
268 |
|
|
goto usecurrent; |
269 |
|
|
idx = c->pg.index; |
270 |
|
|
if (++idx == NEXTINDEX(h)) { |
271 |
|
|
pg = h->nextpg; |
272 |
|
|
mpool_put(t->bt_mp, h, 0); |
273 |
|
|
if (pg == P_INVALID) |
274 |
|
|
return (RET_SPECIAL); |
275 |
|
|
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
276 |
|
|
return (RET_ERROR); |
277 |
|
|
idx = 0; |
278 |
|
|
} |
279 |
|
|
break; |
280 |
|
|
case R_PREV: /* Previous record. */ |
281 |
|
|
/* |
282 |
|
|
* The cursor was deleted in duplicate records, and moved |
283 |
|
|
* backward to a record that has yet to be returned. Clear |
284 |
|
|
* that flag, and return the record. |
285 |
|
|
*/ |
286 |
|
|
if (F_ISSET(c, CURS_BEFORE)) { |
287 |
|
|
usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); |
288 |
|
|
ep->page = h; |
289 |
|
|
ep->index = c->pg.index; |
290 |
|
|
return (RET_SUCCESS); |
291 |
|
|
} |
292 |
|
|
idx = c->pg.index; |
293 |
|
|
if (idx == 0) { |
294 |
|
|
pg = h->prevpg; |
295 |
|
|
mpool_put(t->bt_mp, h, 0); |
296 |
|
|
if (pg == P_INVALID) |
297 |
|
|
return (RET_SPECIAL); |
298 |
|
|
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
299 |
|
|
return (RET_ERROR); |
300 |
|
|
idx = NEXTINDEX(h) - 1; |
301 |
|
|
} else |
302 |
|
|
--idx; |
303 |
|
|
break; |
304 |
|
|
} |
305 |
|
|
|
306 |
|
|
ep->page = h; |
307 |
|
|
ep->index = idx; |
308 |
|
|
return (RET_SUCCESS); |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
/* |
312 |
|
|
* __bt_first -- |
313 |
|
|
* Find the first entry. |
314 |
|
|
* |
315 |
|
|
* Parameters: |
316 |
|
|
* t: the tree |
317 |
|
|
* key: the key |
318 |
|
|
* erval: return EPG |
319 |
|
|
* exactp: pointer to exact match flag |
320 |
|
|
* |
321 |
|
|
* Returns: |
322 |
|
|
* The first entry in the tree greater than or equal to key, |
323 |
|
|
* or RET_SPECIAL if no such key exists. |
324 |
|
|
*/ |
325 |
|
|
static int |
326 |
|
|
__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) |
327 |
|
|
{ |
328 |
|
|
PAGE *h; |
329 |
|
|
EPG *ep, save; |
330 |
|
|
pgno_t pg; |
331 |
|
|
|
332 |
|
|
/* |
333 |
|
|
* Find any matching record; __bt_search pins the page. |
334 |
|
|
* |
335 |
|
|
* If it's an exact match and duplicates are possible, walk backwards |
336 |
|
|
* in the tree until we find the first one. Otherwise, make sure it's |
337 |
|
|
* a valid key (__bt_search may return an index just past the end of a |
338 |
|
|
* page) and return it. |
339 |
|
|
*/ |
340 |
|
|
if ((ep = __bt_search(t, key, exactp)) == NULL) |
341 |
|
|
return (0); |
342 |
|
|
if (*exactp) { |
343 |
|
|
if (F_ISSET(t, B_NODUPS)) { |
344 |
|
|
*erval = *ep; |
345 |
|
|
return (RET_SUCCESS); |
346 |
|
|
} |
347 |
|
|
|
348 |
|
|
/* |
349 |
|
|
* Walk backwards, as long as the entry matches and there are |
350 |
|
|
* keys left in the tree. Save a copy of each match in case |
351 |
|
|
* we go too far. |
352 |
|
|
*/ |
353 |
|
|
save = *ep; |
354 |
|
|
h = ep->page; |
355 |
|
|
do { |
356 |
|
|
if (save.page->pgno != ep->page->pgno) { |
357 |
|
|
mpool_put(t->bt_mp, save.page, 0); |
358 |
|
|
save = *ep; |
359 |
|
|
} else |
360 |
|
|
save.index = ep->index; |
361 |
|
|
|
362 |
|
|
/* |
363 |
|
|
* Don't unpin the page the last (or original) match |
364 |
|
|
* was on, but make sure it's unpinned if an error |
365 |
|
|
* occurs. |
366 |
|
|
*/ |
367 |
|
|
if (ep->index == 0) { |
368 |
|
|
if (h->prevpg == P_INVALID) |
369 |
|
|
break; |
370 |
|
|
if (h->pgno != save.page->pgno) |
371 |
|
|
mpool_put(t->bt_mp, h, 0); |
372 |
|
|
if ((h = mpool_get(t->bt_mp, |
373 |
|
|
h->prevpg, 0)) == NULL) { |
374 |
|
|
if (h->pgno == save.page->pgno) |
375 |
|
|
mpool_put(t->bt_mp, |
376 |
|
|
save.page, 0); |
377 |
|
|
return (RET_ERROR); |
378 |
|
|
} |
379 |
|
|
ep->page = h; |
380 |
|
|
ep->index = NEXTINDEX(h); |
381 |
|
|
} |
382 |
|
|
--ep->index; |
383 |
|
|
} while (__bt_cmp(t, key, ep) == 0); |
384 |
|
|
|
385 |
|
|
/* |
386 |
|
|
* Reach here with the last page that was looked at pinned, |
387 |
|
|
* which may or may not be the same as the last (or original) |
388 |
|
|
* match page. If it's not useful, release it. |
389 |
|
|
*/ |
390 |
|
|
if (h->pgno != save.page->pgno) |
391 |
|
|
mpool_put(t->bt_mp, h, 0); |
392 |
|
|
|
393 |
|
|
*erval = save; |
394 |
|
|
return (RET_SUCCESS); |
395 |
|
|
} |
396 |
|
|
|
397 |
|
|
/* If at the end of a page, find the next entry. */ |
398 |
|
|
if (ep->index == NEXTINDEX(ep->page)) { |
399 |
|
|
h = ep->page; |
400 |
|
|
pg = h->nextpg; |
401 |
|
|
mpool_put(t->bt_mp, h, 0); |
402 |
|
|
if (pg == P_INVALID) |
403 |
|
|
return (RET_SPECIAL); |
404 |
|
|
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
405 |
|
|
return (RET_ERROR); |
406 |
|
|
ep->index = 0; |
407 |
|
|
ep->page = h; |
408 |
|
|
} |
409 |
|
|
*erval = *ep; |
410 |
|
|
return (RET_SUCCESS); |
411 |
|
|
} |
412 |
|
|
|
413 |
|
|
/* |
414 |
|
|
* __bt_setcur -- |
415 |
|
|
* Set the cursor to an entry in the tree. |
416 |
|
|
* |
417 |
|
|
* Parameters: |
418 |
|
|
* t: the tree |
419 |
|
|
* pgno: page number |
420 |
|
|
* idx: page index |
421 |
|
|
*/ |
422 |
|
|
void |
423 |
|
|
__bt_setcur(BTREE *t, pgno_t pgno, u_int idx) |
424 |
|
|
{ |
425 |
|
|
/* Lose any already deleted key. */ |
426 |
|
|
if (t->bt_cursor.key.data != NULL) { |
427 |
|
|
free(t->bt_cursor.key.data); |
428 |
|
|
t->bt_cursor.key.size = 0; |
429 |
|
|
t->bt_cursor.key.data = NULL; |
430 |
|
|
} |
431 |
|
|
F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); |
432 |
|
|
|
433 |
|
|
/* Update the cursor. */ |
434 |
|
|
t->bt_cursor.pg.pgno = pgno; |
435 |
|
|
t->bt_cursor.pg.index = idx; |
436 |
|
|
F_SET(&t->bt_cursor, CURS_INIT); |
437 |
|
|
} |