1 |
|
|
/* $OpenBSD: icdb.c,v 1.8 2016/09/04 16:56:02 nicm Exp $ */ |
2 |
|
|
/* |
3 |
|
|
* Copyright (c) 2015 Ted Unangst <tedu@openbsd.org> |
4 |
|
|
* |
5 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
6 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
7 |
|
|
* copyright notice and this permission notice appear in all copies. |
8 |
|
|
* |
9 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
10 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
11 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
12 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
13 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
14 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
15 |
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
16 |
|
|
*/ |
17 |
|
|
|
18 |
|
|
#include <errno.h> |
19 |
|
|
#include <fcntl.h> |
20 |
|
|
#include <icdb.h> |
21 |
|
|
#include <stddef.h> |
22 |
|
|
#include <stdint.h> |
23 |
|
|
#include <stdio.h> |
24 |
|
|
#include <stdlib.h> |
25 |
|
|
#include <string.h> |
26 |
|
|
#include <unistd.h> |
27 |
|
|
|
28 |
|
|
#include <sys/mman.h> |
29 |
|
|
#include <sys/stat.h> |
30 |
|
|
|
31 |
|
|
#include <siphash.h> |
32 |
|
|
|
33 |
|
|
/* |
34 |
|
|
* Creating a new icdb: icdb_new |
35 |
|
|
* Opening existing icdb: icdb_open |
36 |
|
|
* |
37 |
|
|
* Adding new entries: icdb_add |
38 |
|
|
* Adding entries does not update the disk or indices. |
39 |
|
|
* |
40 |
|
|
* Save to disk: icdb_save |
41 |
|
|
* Update indices: icdb_rehash |
42 |
|
|
* icdb_save will call rehash. |
43 |
|
|
* |
44 |
|
|
* Change an existing entry: icdb_update |
45 |
|
|
* Changing entries does write to disk. |
46 |
|
|
* |
47 |
|
|
* Find an entry: icdb_lookup |
48 |
|
|
* Looking up an entry is only defined when the indices are synced. |
49 |
|
|
* |
50 |
|
|
* Close and free resources: icdb_close |
51 |
|
|
*/ |
52 |
|
|
|
53 |
|
|
/* |
54 |
|
|
* There are two major modes of operation. |
55 |
|
|
* |
56 |
|
|
* Existing databases use the mmap codepath. The entire database is mapped |
57 |
|
|
* into the address space for quick access. Individual entries may be updated, |
58 |
|
|
* but no new entries added. |
59 |
|
|
* |
60 |
|
|
* New databases use malloc backed memory instead. The database may be saved |
61 |
|
|
* with icdb_save. It should be saved to a new file to avoid corrupting any |
62 |
|
|
* open databases in other processes. |
63 |
|
|
*/ |
64 |
|
|
|
65 |
|
|
/* |
66 |
|
|
* An icdb has the following format: |
67 |
|
|
* struct icbinfo header |
68 |
|
|
* indexes [ uint32_t * indexsize * nkeys ] |
69 |
|
|
* entries [ entrysize * nentries ] |
70 |
|
|
* |
71 |
|
|
* To find an entry in the file, the user specifies which key to use. |
72 |
|
|
* The key is hashed and looked up in the index. The index contains the |
73 |
|
|
* position of the entry in the entries array. -1 identifies not found. |
74 |
|
|
* Chaining is done by rehashing the hash. All keys are fixed size byte arrays. |
75 |
|
|
*/ |
76 |
|
|
|
77 |
|
|
/* |
78 |
|
|
* Header info for icdb. This struct is stored on disk. |
79 |
|
|
*/ |
80 |
|
|
struct icdbinfo { |
81 |
|
|
uint32_t magic; /* magic */ |
82 |
|
|
uint32_t version; /* user specified version */ |
83 |
|
|
uint32_t nentries; /* number of entries stored */ |
84 |
|
|
uint32_t entrysize; /* size of each entry */ |
85 |
|
|
uint32_t indexsize; /* number of entries in hash index */ |
86 |
|
|
uint32_t nkeys; /* number of keys defined */ |
87 |
|
|
uint32_t keysize[8]; /* size of each key */ |
88 |
|
|
uint32_t keyoffset[8]; /* offset of each key in entry */ |
89 |
|
|
SIPHASH_KEY siphashkey; /* random hash key */ |
90 |
|
|
}; |
91 |
|
|
|
92 |
|
|
/* |
93 |
|
|
* In memory representation with auxiliary data. |
94 |
|
|
* idxdata and entries will be written to disk after info. |
95 |
|
|
*/ |
96 |
|
|
struct icdb { |
97 |
|
|
struct icdbinfo *info; |
98 |
|
|
void *idxdata[8]; |
99 |
|
|
void *entries; |
100 |
|
|
size_t maplen; |
101 |
|
|
uint32_t allocated; |
102 |
|
|
int fd; |
103 |
|
|
}; |
104 |
|
|
|
105 |
|
|
static const uint32_t magic = 0x1ca9d0b7; |
106 |
|
|
|
107 |
|
|
static uint32_t |
108 |
|
|
roundup(uint32_t num) |
109 |
|
|
{ |
110 |
|
|
uint32_t r = 2; |
111 |
|
|
|
112 |
|
|
while (r < num * 3 / 2) |
113 |
|
|
r *= 2; |
114 |
|
|
return r; |
115 |
|
|
} |
116 |
|
|
|
117 |
|
|
struct icdb * |
118 |
|
|
icdb_new(uint32_t version, uint32_t nentries, uint32_t entrysize, |
119 |
|
|
uint32_t nkeys, const uint32_t *keysizes, const uint32_t *keyoffsets) |
120 |
|
|
{ |
121 |
|
|
struct icdb *db; |
122 |
|
|
struct icdbinfo *info; |
123 |
|
|
int i; |
124 |
|
|
|
125 |
|
|
if (entrysize == 0 || entrysize > 1048576 || nkeys > 8) { |
126 |
|
|
errno = EINVAL; |
127 |
|
|
return NULL; |
128 |
|
|
} |
129 |
|
|
|
130 |
|
|
if (!(db = calloc(1, sizeof(*db)))) |
131 |
|
|
return NULL; |
132 |
|
|
if (!(info = calloc(1, sizeof(*info)))) { |
133 |
|
|
free(db); |
134 |
|
|
return NULL; |
135 |
|
|
} |
136 |
|
|
db->info = info; |
137 |
|
|
db->fd = -1; |
138 |
|
|
info->magic = magic; |
139 |
|
|
info->version = version; |
140 |
|
|
if (nentries) |
141 |
|
|
if ((db->entries = reallocarray(NULL, nentries, entrysize))) |
142 |
|
|
db->allocated = nentries; |
143 |
|
|
info->entrysize = entrysize; |
144 |
|
|
info->nkeys = nkeys; |
145 |
|
|
for (i = 0; i < nkeys; i++) { |
146 |
|
|
info->keysize[i] = keysizes[i]; |
147 |
|
|
info->keyoffset[i] = keyoffsets[i]; |
148 |
|
|
} |
149 |
|
|
return db; |
150 |
|
|
} |
151 |
|
|
DEF_WEAK(icdb_new); |
152 |
|
|
|
153 |
|
|
struct icdb * |
154 |
|
|
icdb_open(const char *name, int flags, uint32_t version) |
155 |
|
|
{ |
156 |
|
|
struct icdb *db = NULL; |
157 |
|
|
struct icdbinfo *info; |
158 |
|
|
struct stat sb; |
159 |
|
|
uint8_t *ptr = MAP_FAILED; |
160 |
|
|
uint32_t baseoff, indexsize, idxmask, idxlen; |
161 |
|
|
int fd, i, saved_errno; |
162 |
|
|
|
163 |
|
|
if ((fd = open(name, flags | O_CLOEXEC)) == -1) |
164 |
|
|
return NULL; |
165 |
|
|
if (fstat(fd, &sb) != 0) |
166 |
|
|
goto fail; |
167 |
|
|
if (sb.st_size < sizeof(struct icdbinfo)) |
168 |
|
|
goto fail; |
169 |
|
|
ptr = mmap(NULL, sb.st_size, PROT_READ | |
170 |
|
|
((flags & O_RDWR) ? PROT_WRITE : 0), MAP_SHARED, fd, 0); |
171 |
|
|
if (ptr == MAP_FAILED) |
172 |
|
|
goto fail; |
173 |
|
|
info = (struct icdbinfo *)ptr; |
174 |
|
|
if (info->magic != magic || info->version != version) { |
175 |
|
|
errno = ENOENT; |
176 |
|
|
goto fail; |
177 |
|
|
} |
178 |
|
|
|
179 |
|
|
if (!(db = calloc(1, sizeof(*db)))) |
180 |
|
|
goto fail; |
181 |
|
|
db->info = info; |
182 |
|
|
|
183 |
|
|
indexsize = info->indexsize; |
184 |
|
|
idxmask = indexsize - 1; |
185 |
|
|
idxlen = indexsize * sizeof(uint32_t); |
186 |
|
|
baseoff = sizeof(*info) + idxlen * info->nkeys; |
187 |
|
|
|
188 |
|
|
for (i = 0; i < info->nkeys; i++) |
189 |
|
|
db->idxdata[i] = ptr + sizeof(*info) + i * idxlen; |
190 |
|
|
db->entries = ptr + baseoff; |
191 |
|
|
db->maplen = sb.st_size; |
192 |
|
|
db->fd = fd; |
193 |
|
|
return db; |
194 |
|
|
|
195 |
|
|
fail: |
196 |
|
|
saved_errno = errno; |
197 |
|
|
if (ptr != MAP_FAILED) |
198 |
|
|
munmap(ptr, sb.st_size); |
199 |
|
|
if (fd != -1) |
200 |
|
|
close(fd); |
201 |
|
|
free(db); |
202 |
|
|
errno = saved_errno; |
203 |
|
|
return NULL; |
204 |
|
|
} |
205 |
|
|
DEF_WEAK(icdb_open); |
206 |
|
|
|
207 |
|
|
int |
208 |
|
|
icdb_get(struct icdb *db, void *entry, uint32_t idx) |
209 |
|
|
{ |
210 |
|
|
uint32_t entrysize = db->info->entrysize; |
211 |
|
|
|
212 |
|
|
memcpy(entry, (uint8_t *)db->entries + idx * entrysize, entrysize); |
213 |
|
|
return 0; |
214 |
|
|
} |
215 |
|
|
DEF_WEAK(icdb_get); |
216 |
|
|
|
217 |
|
|
int |
218 |
|
|
icdb_lookup(struct icdb *db, int keynum, const void *key, void *entry, |
219 |
|
|
uint32_t *idxp) |
220 |
|
|
{ |
221 |
|
|
struct icdbinfo *info = db->info; |
222 |
|
|
uint32_t offset; |
223 |
|
|
uint64_t hash; |
224 |
|
|
uint32_t indexsize, idxmask, idxlen; |
225 |
|
|
uint32_t *idxdata; |
226 |
|
|
|
227 |
|
|
indexsize = info->indexsize; |
228 |
|
|
idxmask = indexsize - 1; |
229 |
|
|
idxlen = indexsize * sizeof(uint32_t); |
230 |
|
|
|
231 |
|
|
idxdata = db->idxdata[keynum]; |
232 |
|
|
|
233 |
|
|
hash = SipHash24(&info->siphashkey, key, info->keysize[keynum]); |
234 |
|
|
while ((offset = idxdata[hash & idxmask]) != -1) { |
235 |
|
|
if (icdb_get(db, entry, offset) != 0) { |
236 |
|
|
errno = ENOENT; |
237 |
|
|
return -1; |
238 |
|
|
} |
239 |
|
|
if (memcmp((uint8_t *)entry + info->keyoffset[keynum], key, |
240 |
|
|
info->keysize[keynum]) == 0) { |
241 |
|
|
if (idxp) |
242 |
|
|
*idxp = offset; |
243 |
|
|
return 0; |
244 |
|
|
} |
245 |
|
|
hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); |
246 |
|
|
} |
247 |
|
|
return 1; |
248 |
|
|
} |
249 |
|
|
DEF_WEAK(icdb_lookup); |
250 |
|
|
|
251 |
|
|
int |
252 |
|
|
icdb_nentries(struct icdb *db) |
253 |
|
|
{ |
254 |
|
|
return db->info->nentries; |
255 |
|
|
} |
256 |
|
|
DEF_WEAK(icdb_nentries); |
257 |
|
|
|
258 |
|
|
const void * |
259 |
|
|
icdb_entries(struct icdb *db) |
260 |
|
|
{ |
261 |
|
|
return db->entries; |
262 |
|
|
} |
263 |
|
|
DEF_WEAK(icdb_entries); |
264 |
|
|
|
265 |
|
|
int |
266 |
|
|
icdb_update(struct icdb *db, const void *entry, int offset) |
267 |
|
|
{ |
268 |
|
|
struct icdbinfo *info = db->info; |
269 |
|
|
uint32_t entrysize = info->entrysize; |
270 |
|
|
uint32_t baseoff; |
271 |
|
|
uint32_t indexsize, idxmask, idxlen; |
272 |
|
|
|
273 |
|
|
indexsize = info->indexsize; |
274 |
|
|
idxmask = indexsize - 1; |
275 |
|
|
idxlen = indexsize * sizeof(uint32_t); |
276 |
|
|
baseoff = sizeof(*info) + idxlen * info->nkeys; |
277 |
|
|
|
278 |
|
|
memcpy((uint8_t *)db->entries + offset * entrysize, entry, entrysize); |
279 |
|
|
if (db->fd != -1) { |
280 |
|
|
msync((uint8_t *)db->entries + offset * entrysize, entrysize, |
281 |
|
|
MS_SYNC); |
282 |
|
|
} |
283 |
|
|
return 0; |
284 |
|
|
} |
285 |
|
|
DEF_WEAK(icdb_update); |
286 |
|
|
|
287 |
|
|
int |
288 |
|
|
icdb_add(struct icdb *db, const void *entry) |
289 |
|
|
{ |
290 |
|
|
struct icdbinfo *info = db->info; |
291 |
|
|
size_t entrysize = info->entrysize; |
292 |
|
|
|
293 |
|
|
if (db->allocated == info->nentries) { |
294 |
|
|
void *p; |
295 |
|
|
size_t amt = db->allocated ? db->allocated * 2 : 63; |
296 |
|
|
if (!(p = reallocarray(db->entries, amt, entrysize))) |
297 |
|
|
return -1; |
298 |
|
|
db->allocated = amt; |
299 |
|
|
db->entries = p; |
300 |
|
|
} |
301 |
|
|
memcpy((uint8_t *)db->entries + info->nentries * entrysize, |
302 |
|
|
entry, entrysize); |
303 |
|
|
info->nentries++; |
304 |
|
|
return 0; |
305 |
|
|
} |
306 |
|
|
DEF_WEAK(icdb_add); |
307 |
|
|
|
308 |
|
|
int |
309 |
|
|
icdb_rehash(struct icdb *db) |
310 |
|
|
{ |
311 |
|
|
struct icdbinfo *info = db->info; |
312 |
|
|
uint32_t entrysize = info->entrysize; |
313 |
|
|
uint32_t indexsize, idxmask, idxlen; |
314 |
|
|
int i, j; |
315 |
|
|
|
316 |
|
|
indexsize = info->indexsize = roundup(info->nentries); |
317 |
|
|
idxmask = indexsize - 1; |
318 |
|
|
idxlen = sizeof(uint32_t) * indexsize; |
319 |
|
|
|
320 |
|
|
arc4random_buf(&info->siphashkey, sizeof(info->siphashkey)); |
321 |
|
|
|
322 |
|
|
for (i = 0; i < info->nkeys; i++) { |
323 |
|
|
uint32_t *idxdata = reallocarray(db->idxdata[i], |
324 |
|
|
indexsize, sizeof(uint32_t)); |
325 |
|
|
if (!idxdata) |
326 |
|
|
return -1; |
327 |
|
|
memset(idxdata, 0xff, idxlen); |
328 |
|
|
db->idxdata[i] = idxdata; |
329 |
|
|
} |
330 |
|
|
for (j = 0; j < info->nentries; j++) { |
331 |
|
|
for (i = 0; i < info->nkeys; i++) { |
332 |
|
|
uint32_t *idxdata = db->idxdata[i]; |
333 |
|
|
uint64_t hash = SipHash24(&info->siphashkey, |
334 |
|
|
(uint8_t *)db->entries + j * entrysize + |
335 |
|
|
info->keyoffset[i], info->keysize[i]); |
336 |
|
|
while (idxdata[hash & idxmask] != -1) |
337 |
|
|
hash = SipHash24(&info->siphashkey, &hash, sizeof(hash)); |
338 |
|
|
idxdata[hash & idxmask] = j; |
339 |
|
|
} |
340 |
|
|
} |
341 |
|
|
return 0; |
342 |
|
|
} |
343 |
|
|
DEF_WEAK(icdb_rehash); |
344 |
|
|
|
345 |
|
|
int |
346 |
|
|
icdb_save(struct icdb *db, int fd) |
347 |
|
|
{ |
348 |
|
|
struct icdbinfo *info = db->info; |
349 |
|
|
uint32_t entrysize = info->entrysize; |
350 |
|
|
uint32_t indexsize, idxlen; |
351 |
|
|
int i; |
352 |
|
|
|
353 |
|
|
if (icdb_rehash(db) != 0) |
354 |
|
|
return -1; |
355 |
|
|
|
356 |
|
|
indexsize = info->indexsize; |
357 |
|
|
idxlen = sizeof(uint32_t) * indexsize; |
358 |
|
|
|
359 |
|
|
if (ftruncate(fd, 0) != 0) |
360 |
|
|
return -1; |
361 |
|
|
if (write(fd, info, sizeof(*info)) != sizeof(*info)) |
362 |
|
|
return -1; |
363 |
|
|
for (i = 0; i < info->nkeys; i++) { |
364 |
|
|
if (write(fd, db->idxdata[i], idxlen) != idxlen) |
365 |
|
|
return -1; |
366 |
|
|
} |
367 |
|
|
if (write(fd, db->entries, info->nentries * entrysize) != |
368 |
|
|
info->nentries * entrysize) |
369 |
|
|
return -1; |
370 |
|
|
return 0; |
371 |
|
|
} |
372 |
|
|
DEF_WEAK(icdb_save); |
373 |
|
|
|
374 |
|
|
int |
375 |
|
|
icdb_close(struct icdb *db) |
376 |
|
|
{ |
377 |
|
|
int i; |
378 |
|
|
|
379 |
|
|
if (db->fd == -1) { |
380 |
|
|
for (i = 0; i < db->info->nkeys; i++) |
381 |
|
|
free(db->idxdata[i]); |
382 |
|
|
free(db->entries); |
383 |
|
|
free(db->info); |
384 |
|
|
} else { |
385 |
|
|
munmap(db->info, db->maplen); |
386 |
|
|
close(db->fd); |
387 |
|
|
} |
388 |
|
|
free(db); |
389 |
|
|
return 0; |
390 |
|
|
} |
391 |
|
|
DEF_WEAK(icdb_close); |