1 |
|
|
/* $OpenBSD: bt_put.c,v 1.13 2005/08/05 13:02:59 espie Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 1990, 1993, 1994 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* This code is derived from software contributed to Berkeley by |
8 |
|
|
* Mike Olson. |
9 |
|
|
* |
10 |
|
|
* Redistribution and use in source and binary forms, with or without |
11 |
|
|
* modification, are permitted provided that the following conditions |
12 |
|
|
* are met: |
13 |
|
|
* 1. Redistributions of source code must retain the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer. |
15 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
16 |
|
|
* notice, this list of conditions and the following disclaimer in the |
17 |
|
|
* documentation and/or other materials provided with the distribution. |
18 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
19 |
|
|
* may be used to endorse or promote products derived from this software |
20 |
|
|
* without specific prior written permission. |
21 |
|
|
* |
22 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 |
|
|
* SUCH DAMAGE. |
33 |
|
|
*/ |
34 |
|
|
|
35 |
|
|
#include <sys/types.h> |
36 |
|
|
|
37 |
|
|
#include <errno.h> |
38 |
|
|
#include <stdio.h> |
39 |
|
|
#include <stdlib.h> |
40 |
|
|
#include <string.h> |
41 |
|
|
|
42 |
|
|
#include <db.h> |
43 |
|
|
#include "btree.h" |
44 |
|
|
|
45 |
|
|
static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *); |
46 |
|
|
|
47 |
|
|
/* |
48 |
|
|
* __BT_PUT -- Add a btree item to the tree. |
49 |
|
|
* |
50 |
|
|
* Parameters: |
51 |
|
|
* dbp: pointer to access method |
52 |
|
|
* key: key |
53 |
|
|
* data: data |
54 |
|
|
* flag: R_NOOVERWRITE |
55 |
|
|
* |
56 |
|
|
* Returns: |
57 |
|
|
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the |
58 |
|
|
* tree and R_NOOVERWRITE specified. |
59 |
|
|
*/ |
60 |
|
|
int |
61 |
|
|
__bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags) |
62 |
|
|
{ |
63 |
|
|
BTREE *t; |
64 |
|
|
DBT tkey, tdata; |
65 |
|
|
EPG *e; |
66 |
|
|
PAGE *h; |
67 |
|
|
indx_t idx, nxtindex; |
68 |
|
|
pgno_t pg; |
69 |
|
|
u_int32_t nbytes, size32; |
70 |
|
|
int dflags, exact, status; |
71 |
|
|
char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; |
72 |
|
|
|
73 |
|
|
t = dbp->internal; |
74 |
|
|
|
75 |
|
|
/* Toss any page pinned across calls. */ |
76 |
|
|
if (t->bt_pinned != NULL) { |
77 |
|
|
mpool_put(t->bt_mp, t->bt_pinned, 0); |
78 |
|
|
t->bt_pinned = NULL; |
79 |
|
|
} |
80 |
|
|
|
81 |
|
|
/* Check for change to a read-only tree. */ |
82 |
|
|
if (F_ISSET(t, B_RDONLY)) { |
83 |
|
|
errno = EPERM; |
84 |
|
|
return (RET_ERROR); |
85 |
|
|
} |
86 |
|
|
|
87 |
|
|
switch (flags) { |
88 |
|
|
case 0: |
89 |
|
|
case R_NOOVERWRITE: |
90 |
|
|
break; |
91 |
|
|
case R_CURSOR: |
92 |
|
|
/* |
93 |
|
|
* If flags is R_CURSOR, put the cursor. Must already |
94 |
|
|
* have started a scan and not have already deleted it. |
95 |
|
|
*/ |
96 |
|
|
if (F_ISSET(&t->bt_cursor, CURS_INIT) && |
97 |
|
|
!F_ISSET(&t->bt_cursor, |
98 |
|
|
CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) |
99 |
|
|
break; |
100 |
|
|
/* FALLTHROUGH */ |
101 |
|
|
default: |
102 |
|
|
errno = EINVAL; |
103 |
|
|
return (RET_ERROR); |
104 |
|
|
} |
105 |
|
|
|
106 |
|
|
/* |
107 |
|
|
* If the key/data pair won't fit on a page, store it on overflow |
108 |
|
|
* pages. Only put the key on the overflow page if the pair are |
109 |
|
|
* still too big after moving the data to an overflow page. |
110 |
|
|
* |
111 |
|
|
* XXX |
112 |
|
|
* If the insert fails later on, the overflow pages aren't recovered. |
113 |
|
|
*/ |
114 |
|
|
dflags = 0; |
115 |
|
|
if (key->size + data->size > t->bt_ovflsize) { |
116 |
|
|
if (key->size > t->bt_ovflsize) { |
117 |
|
|
storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) |
118 |
|
|
return (RET_ERROR); |
119 |
|
|
tkey.data = kb; |
120 |
|
|
tkey.size = NOVFLSIZE; |
121 |
|
|
memmove(kb, &pg, sizeof(pgno_t)); |
122 |
|
|
size32 = key->size; |
123 |
|
|
memmove(kb + sizeof(pgno_t), |
124 |
|
|
&size32, sizeof(u_int32_t)); |
125 |
|
|
dflags |= P_BIGKEY; |
126 |
|
|
key = &tkey; |
127 |
|
|
} |
128 |
|
|
if (key->size + data->size > t->bt_ovflsize) { |
129 |
|
|
if (__ovfl_put(t, data, &pg) == RET_ERROR) |
130 |
|
|
return (RET_ERROR); |
131 |
|
|
tdata.data = db; |
132 |
|
|
tdata.size = NOVFLSIZE; |
133 |
|
|
memmove(db, &pg, sizeof(pgno_t)); |
134 |
|
|
size32 = data->size; |
135 |
|
|
memmove(db + sizeof(pgno_t), |
136 |
|
|
&size32, sizeof(u_int32_t)); |
137 |
|
|
dflags |= P_BIGDATA; |
138 |
|
|
data = &tdata; |
139 |
|
|
} |
140 |
|
|
if (key->size + data->size > t->bt_ovflsize) |
141 |
|
|
goto storekey; |
142 |
|
|
} |
143 |
|
|
|
144 |
|
|
/* Replace the cursor. */ |
145 |
|
|
if (flags == R_CURSOR) { |
146 |
|
|
if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) |
147 |
|
|
return (RET_ERROR); |
148 |
|
|
idx = t->bt_cursor.pg.index; |
149 |
|
|
goto delete; |
150 |
|
|
} |
151 |
|
|
|
152 |
|
|
/* |
153 |
|
|
* Find the key to delete, or, the location at which to insert. |
154 |
|
|
* Bt_fast and __bt_search both pin the returned page. |
155 |
|
|
*/ |
156 |
|
|
if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) |
157 |
|
|
if ((e = __bt_search(t, key, &exact)) == NULL) |
158 |
|
|
return (RET_ERROR); |
159 |
|
|
h = e->page; |
160 |
|
|
idx = e->index; |
161 |
|
|
|
162 |
|
|
/* |
163 |
|
|
* Add the key/data pair to the tree. If an identical key is already |
164 |
|
|
* in the tree, and R_NOOVERWRITE is set, an error is returned. If |
165 |
|
|
* R_NOOVERWRITE is not set, the key is either added (if duplicates are |
166 |
|
|
* permitted) or an error is returned. |
167 |
|
|
*/ |
168 |
|
|
switch (flags) { |
169 |
|
|
case R_NOOVERWRITE: |
170 |
|
|
if (!exact) |
171 |
|
|
break; |
172 |
|
|
mpool_put(t->bt_mp, h, 0); |
173 |
|
|
return (RET_SPECIAL); |
174 |
|
|
default: |
175 |
|
|
if (!exact || !F_ISSET(t, B_NODUPS)) |
176 |
|
|
break; |
177 |
|
|
/* |
178 |
|
|
* !!! |
179 |
|
|
* Note, the delete may empty the page, so we need to put a |
180 |
|
|
* new entry into the page immediately. |
181 |
|
|
*/ |
182 |
|
|
delete: if (__bt_dleaf(t, key, h, idx) == RET_ERROR) { |
183 |
|
|
mpool_put(t->bt_mp, h, 0); |
184 |
|
|
return (RET_ERROR); |
185 |
|
|
} |
186 |
|
|
break; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
/* |
190 |
|
|
* If not enough room, or the user has put a ceiling on the number of |
191 |
|
|
* keys permitted in the page, split the page. The split code will |
192 |
|
|
* insert the key and data and unpin the current page. If inserting |
193 |
|
|
* into the offset array, shift the pointers up. |
194 |
|
|
*/ |
195 |
|
|
nbytes = NBLEAFDBT(key->size, data->size); |
196 |
|
|
if (h->upper - h->lower < nbytes + sizeof(indx_t)) { |
197 |
|
|
if ((status = __bt_split(t, h, key, |
198 |
|
|
data, dflags, nbytes, idx)) != RET_SUCCESS) |
199 |
|
|
return (status); |
200 |
|
|
goto success; |
201 |
|
|
} |
202 |
|
|
|
203 |
|
|
if (idx < (nxtindex = NEXTINDEX(h))) |
204 |
|
|
memmove(h->linp + idx + 1, h->linp + idx, |
205 |
|
|
(nxtindex - idx) * sizeof(indx_t)); |
206 |
|
|
h->lower += sizeof(indx_t); |
207 |
|
|
|
208 |
|
|
h->linp[idx] = h->upper -= nbytes; |
209 |
|
|
dest = (char *)h + h->upper; |
210 |
|
|
WR_BLEAF(dest, key, data, dflags); |
211 |
|
|
|
212 |
|
|
/* If the cursor is on this page, adjust it as necessary. */ |
213 |
|
|
if (F_ISSET(&t->bt_cursor, CURS_INIT) && |
214 |
|
|
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && |
215 |
|
|
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx) |
216 |
|
|
++t->bt_cursor.pg.index; |
217 |
|
|
|
218 |
|
|
if (t->bt_order == NOT) { |
219 |
|
|
if (h->nextpg == P_INVALID) { |
220 |
|
|
if (idx == NEXTINDEX(h) - 1) { |
221 |
|
|
t->bt_order = FORWARD; |
222 |
|
|
t->bt_last.index = idx; |
223 |
|
|
t->bt_last.pgno = h->pgno; |
224 |
|
|
} |
225 |
|
|
} else if (h->prevpg == P_INVALID) { |
226 |
|
|
if (idx == 0) { |
227 |
|
|
t->bt_order = BACK; |
228 |
|
|
t->bt_last.index = 0; |
229 |
|
|
t->bt_last.pgno = h->pgno; |
230 |
|
|
} |
231 |
|
|
} |
232 |
|
|
} |
233 |
|
|
|
234 |
|
|
mpool_put(t->bt_mp, h, MPOOL_DIRTY); |
235 |
|
|
|
236 |
|
|
success: |
237 |
|
|
if (flags == R_SETCURSOR) |
238 |
|
|
__bt_setcur(t, e->page->pgno, e->index); |
239 |
|
|
|
240 |
|
|
F_SET(t, B_MODIFIED); |
241 |
|
|
return (RET_SUCCESS); |
242 |
|
|
} |
243 |
|
|
|
244 |
|
|
#ifdef STATISTICS |
245 |
|
|
u_long bt_cache_hit, bt_cache_miss; |
246 |
|
|
#endif |
247 |
|
|
|
248 |
|
|
/* |
249 |
|
|
* BT_FAST -- Do a quick check for sorted data. |
250 |
|
|
* |
251 |
|
|
* Parameters: |
252 |
|
|
* t: tree |
253 |
|
|
* key: key to insert |
254 |
|
|
* |
255 |
|
|
* Returns: |
256 |
|
|
* EPG for new record or NULL if not found. |
257 |
|
|
*/ |
258 |
|
|
static EPG * |
259 |
|
|
bt_fast(BTREE *t, const DBT *key, const DBT *data, int *exactp) |
260 |
|
|
{ |
261 |
|
|
PAGE *h; |
262 |
|
|
u_int32_t nbytes; |
263 |
|
|
int cmp; |
264 |
|
|
|
265 |
|
|
if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { |
266 |
|
|
t->bt_order = NOT; |
267 |
|
|
return (NULL); |
268 |
|
|
} |
269 |
|
|
t->bt_cur.page = h; |
270 |
|
|
t->bt_cur.index = t->bt_last.index; |
271 |
|
|
|
272 |
|
|
/* |
273 |
|
|
* If won't fit in this page or have too many keys in this page, |
274 |
|
|
* have to search to get split stack. |
275 |
|
|
*/ |
276 |
|
|
nbytes = NBLEAFDBT(key->size, data->size); |
277 |
|
|
if (h->upper - h->lower < nbytes + sizeof(indx_t)) |
278 |
|
|
goto miss; |
279 |
|
|
|
280 |
|
|
if (t->bt_order == FORWARD) { |
281 |
|
|
if (t->bt_cur.page->nextpg != P_INVALID) |
282 |
|
|
goto miss; |
283 |
|
|
if (t->bt_cur.index != NEXTINDEX(h) - 1) |
284 |
|
|
goto miss; |
285 |
|
|
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0) |
286 |
|
|
goto miss; |
287 |
|
|
t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index; |
288 |
|
|
} else { |
289 |
|
|
if (t->bt_cur.page->prevpg != P_INVALID) |
290 |
|
|
goto miss; |
291 |
|
|
if (t->bt_cur.index != 0) |
292 |
|
|
goto miss; |
293 |
|
|
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0) |
294 |
|
|
goto miss; |
295 |
|
|
t->bt_last.index = 0; |
296 |
|
|
} |
297 |
|
|
*exactp = cmp == 0; |
298 |
|
|
#ifdef STATISTICS |
299 |
|
|
++bt_cache_hit; |
300 |
|
|
#endif |
301 |
|
|
return (&t->bt_cur); |
302 |
|
|
|
303 |
|
|
miss: |
304 |
|
|
#ifdef STATISTICS |
305 |
|
|
++bt_cache_miss; |
306 |
|
|
#endif |
307 |
|
|
t->bt_order = NOT; |
308 |
|
|
mpool_put(t->bt_mp, h, 0); |
309 |
|
|
return (NULL); |
310 |
|
|
} |