1 |
|
|
/* $OpenBSD: mansearch.c,v 1.50 2016/07/09 15:23:36 schwarze Exp $ */ |
2 |
|
|
/* |
3 |
|
|
* Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> |
4 |
|
|
* Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> |
5 |
|
|
* |
6 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
7 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
8 |
|
|
* copyright notice and this permission notice appear in all copies. |
9 |
|
|
* |
10 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
11 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
13 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 |
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include <sys/mman.h> |
20 |
|
|
#include <sys/types.h> |
21 |
|
|
|
22 |
|
|
#include <assert.h> |
23 |
|
|
#include <err.h> |
24 |
|
|
#include <errno.h> |
25 |
|
|
#include <fcntl.h> |
26 |
|
|
#include <glob.h> |
27 |
|
|
#include <limits.h> |
28 |
|
|
#include <regex.h> |
29 |
|
|
#include <stdio.h> |
30 |
|
|
#include <stdint.h> |
31 |
|
|
#include <stddef.h> |
32 |
|
|
#include <stdlib.h> |
33 |
|
|
#include <string.h> |
34 |
|
|
#include <unistd.h> |
35 |
|
|
|
36 |
|
|
#include <sqlite3.h> |
37 |
|
|
|
38 |
|
|
#include "mandoc.h" |
39 |
|
|
#include "mandoc_aux.h" |
40 |
|
|
#include "mandoc_ohash.h" |
41 |
|
|
#include "manconf.h" |
42 |
|
|
#include "mansearch.h" |
43 |
|
|
|
44 |
|
|
extern int mansearch_keymax; |
45 |
|
|
extern const char *const mansearch_keynames[]; |
46 |
|
|
|
47 |
|
|
#define SQL_BIND_TEXT(_db, _s, _i, _v) \ |
48 |
|
|
do { if (SQLITE_OK != sqlite3_bind_text \ |
49 |
|
|
((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ |
50 |
|
|
errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ |
51 |
|
|
} while (0) |
52 |
|
|
#define SQL_BIND_INT64(_db, _s, _i, _v) \ |
53 |
|
|
do { if (SQLITE_OK != sqlite3_bind_int64 \ |
54 |
|
|
((_s), (_i)++, (_v))) \ |
55 |
|
|
errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ |
56 |
|
|
} while (0) |
57 |
|
|
#define SQL_BIND_BLOB(_db, _s, _i, _v) \ |
58 |
|
|
do { if (SQLITE_OK != sqlite3_bind_blob \ |
59 |
|
|
((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ |
60 |
|
|
errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ |
61 |
|
|
} while (0) |
62 |
|
|
|
63 |
|
|
struct expr { |
64 |
|
|
regex_t regexp; /* compiled regexp, if applicable */ |
65 |
|
|
const char *substr; /* to search for, if applicable */ |
66 |
|
|
struct expr *next; /* next in sequence */ |
67 |
|
|
uint64_t bits; /* type-mask */ |
68 |
|
|
int equal; /* equality, not subsring match */ |
69 |
|
|
int open; /* opening parentheses before */ |
70 |
|
|
int and; /* logical AND before */ |
71 |
|
|
int close; /* closing parentheses after */ |
72 |
|
|
}; |
73 |
|
|
|
74 |
|
|
struct match { |
75 |
|
|
uint64_t pageid; /* identifier in database */ |
76 |
|
|
uint64_t bits; /* name type mask */ |
77 |
|
|
char *desc; /* manual page description */ |
78 |
|
|
int form; /* bit field: formatted, zipped? */ |
79 |
|
|
}; |
80 |
|
|
|
81 |
|
|
static void buildnames(const struct mansearch *, |
82 |
|
|
struct manpage *, sqlite3 *, |
83 |
|
|
sqlite3_stmt *, uint64_t, |
84 |
|
|
const char *, int form); |
85 |
|
|
static char *buildoutput(sqlite3 *, sqlite3_stmt *, |
86 |
|
|
uint64_t, uint64_t); |
87 |
|
|
static struct expr *exprcomp(const struct mansearch *, |
88 |
|
|
int, char *[]); |
89 |
|
|
static void exprfree(struct expr *); |
90 |
|
|
static struct expr *exprterm(const struct mansearch *, char *, int); |
91 |
|
|
static int manpage_compare(const void *, const void *); |
92 |
|
|
static void sql_append(char **sql, size_t *sz, |
93 |
|
|
const char *newstr, int count); |
94 |
|
|
static void sql_match(sqlite3_context *context, |
95 |
|
|
int argc, sqlite3_value **argv); |
96 |
|
|
static void sql_regexp(sqlite3_context *context, |
97 |
|
|
int argc, sqlite3_value **argv); |
98 |
|
|
static char *sql_statement(const struct expr *); |
99 |
|
|
|
100 |
|
|
|
101 |
|
|
int |
102 |
|
|
mansearch_setup(int start) |
103 |
|
|
{ |
104 |
|
|
static void *pagecache; |
105 |
|
|
int c; |
106 |
|
|
|
107 |
|
|
#define PC_PAGESIZE 1280 |
108 |
|
|
#define PC_NUMPAGES 256 |
109 |
|
|
|
110 |
|
|
if (start) { |
111 |
|
|
if (NULL != pagecache) { |
112 |
|
|
warnx("pagecache already enabled"); |
113 |
|
|
return (int)MANDOCLEVEL_BADARG; |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES, |
117 |
|
|
PROT_READ | PROT_WRITE, |
118 |
|
|
MAP_SHARED | MAP_ANON, -1, 0); |
119 |
|
|
|
120 |
|
|
if (MAP_FAILED == pagecache) { |
121 |
|
|
warn("mmap"); |
122 |
|
|
pagecache = NULL; |
123 |
|
|
return (int)MANDOCLEVEL_SYSERR; |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
c = sqlite3_config(SQLITE_CONFIG_PAGECACHE, |
127 |
|
|
pagecache, PC_PAGESIZE, PC_NUMPAGES); |
128 |
|
|
|
129 |
|
|
if (SQLITE_OK == c) |
130 |
|
|
return (int)MANDOCLEVEL_OK; |
131 |
|
|
|
132 |
|
|
warnx("pagecache: %s", sqlite3_errstr(c)); |
133 |
|
|
|
134 |
|
|
} else if (NULL == pagecache) { |
135 |
|
|
warnx("pagecache missing"); |
136 |
|
|
return (int)MANDOCLEVEL_BADARG; |
137 |
|
|
} |
138 |
|
|
|
139 |
|
|
if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) { |
140 |
|
|
warn("munmap"); |
141 |
|
|
pagecache = NULL; |
142 |
|
|
return (int)MANDOCLEVEL_SYSERR; |
143 |
|
|
} |
144 |
|
|
|
145 |
|
|
pagecache = NULL; |
146 |
|
|
return (int)MANDOCLEVEL_OK; |
147 |
|
|
} |
148 |
|
|
|
149 |
|
|
int |
150 |
|
|
mansearch(const struct mansearch *search, |
151 |
|
|
const struct manpaths *paths, |
152 |
|
|
int argc, char *argv[], |
153 |
|
|
struct manpage **res, size_t *sz) |
154 |
|
|
{ |
155 |
|
|
int64_t pageid; |
156 |
|
|
uint64_t outbit, iterbit; |
157 |
|
|
char buf[PATH_MAX]; |
158 |
|
|
char *sql; |
159 |
|
|
struct manpage *mpage; |
160 |
|
|
struct expr *e, *ep; |
161 |
|
|
sqlite3 *db; |
162 |
|
|
sqlite3_stmt *s, *s2; |
163 |
|
|
struct match *mp; |
164 |
|
|
struct ohash htab; |
165 |
|
|
unsigned int idx; |
166 |
|
|
size_t i, j, cur, maxres; |
167 |
|
|
int c, chdir_status, getcwd_status, indexbit; |
168 |
|
|
|
169 |
|
|
if (argc == 0 || (e = exprcomp(search, argc, argv)) == NULL) { |
170 |
|
|
*sz = 0; |
171 |
|
|
return 0; |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
cur = maxres = 0; |
175 |
|
|
*res = NULL; |
176 |
|
|
|
177 |
|
|
if (NULL != search->outkey) { |
178 |
|
|
outbit = TYPE_Nd; |
179 |
|
|
for (indexbit = 0, iterbit = 1; |
180 |
|
|
indexbit < mansearch_keymax; |
181 |
|
|
indexbit++, iterbit <<= 1) { |
182 |
|
|
if (0 == strcasecmp(search->outkey, |
183 |
|
|
mansearch_keynames[indexbit])) { |
184 |
|
|
outbit = iterbit; |
185 |
|
|
break; |
186 |
|
|
} |
187 |
|
|
} |
188 |
|
|
} else |
189 |
|
|
outbit = 0; |
190 |
|
|
|
191 |
|
|
/* |
192 |
|
|
* Remember the original working directory, if possible. |
193 |
|
|
* This will be needed if the second or a later directory |
194 |
|
|
* is given as a relative path. |
195 |
|
|
* Do not error out if the current directory is not |
196 |
|
|
* searchable: Maybe it won't be needed after all. |
197 |
|
|
*/ |
198 |
|
|
|
199 |
|
|
if (getcwd(buf, PATH_MAX) == NULL) { |
200 |
|
|
getcwd_status = 0; |
201 |
|
|
(void)strlcpy(buf, strerror(errno), sizeof(buf)); |
202 |
|
|
} else |
203 |
|
|
getcwd_status = 1; |
204 |
|
|
|
205 |
|
|
sql = sql_statement(e); |
206 |
|
|
|
207 |
|
|
/* |
208 |
|
|
* Loop over the directories (containing databases) for us to |
209 |
|
|
* search. |
210 |
|
|
* Don't let missing/bad databases/directories phase us. |
211 |
|
|
* In each, try to open the resident database and, if it opens, |
212 |
|
|
* scan it for our match expression. |
213 |
|
|
*/ |
214 |
|
|
|
215 |
|
|
chdir_status = 0; |
216 |
|
|
for (i = 0; i < paths->sz; i++) { |
217 |
|
|
if (chdir_status && paths->paths[i][0] != '/') { |
218 |
|
|
if ( ! getcwd_status) { |
219 |
|
|
warnx("%s: getcwd: %s", paths->paths[i], buf); |
220 |
|
|
continue; |
221 |
|
|
} else if (chdir(buf) == -1) { |
222 |
|
|
warn("%s", buf); |
223 |
|
|
continue; |
224 |
|
|
} |
225 |
|
|
} |
226 |
|
|
if (chdir(paths->paths[i]) == -1) { |
227 |
|
|
warn("%s", paths->paths[i]); |
228 |
|
|
continue; |
229 |
|
|
} |
230 |
|
|
chdir_status = 1; |
231 |
|
|
|
232 |
|
|
c = sqlite3_open_v2(MANDOC_DB, &db, |
233 |
|
|
SQLITE_OPEN_READONLY, NULL); |
234 |
|
|
|
235 |
|
|
if (SQLITE_OK != c) { |
236 |
|
|
warn("%s/%s", paths->paths[i], MANDOC_DB); |
237 |
|
|
sqlite3_close(db); |
238 |
|
|
continue; |
239 |
|
|
} |
240 |
|
|
|
241 |
|
|
/* |
242 |
|
|
* Define the SQL functions for substring |
243 |
|
|
* and regular expression matching. |
244 |
|
|
*/ |
245 |
|
|
|
246 |
|
|
c = sqlite3_create_function(db, "match", 2, |
247 |
|
|
SQLITE_UTF8 | SQLITE_DETERMINISTIC, |
248 |
|
|
NULL, sql_match, NULL, NULL); |
249 |
|
|
assert(SQLITE_OK == c); |
250 |
|
|
c = sqlite3_create_function(db, "regexp", 2, |
251 |
|
|
SQLITE_UTF8 | SQLITE_DETERMINISTIC, |
252 |
|
|
NULL, sql_regexp, NULL, NULL); |
253 |
|
|
assert(SQLITE_OK == c); |
254 |
|
|
|
255 |
|
|
j = 1; |
256 |
|
|
c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); |
257 |
|
|
if (SQLITE_OK != c) |
258 |
|
|
errx((int)MANDOCLEVEL_SYSERR, |
259 |
|
|
"%s", sqlite3_errmsg(db)); |
260 |
|
|
|
261 |
|
|
for (ep = e; NULL != ep; ep = ep->next) { |
262 |
|
|
if (NULL == ep->substr) { |
263 |
|
|
SQL_BIND_BLOB(db, s, j, ep->regexp); |
264 |
|
|
} else |
265 |
|
|
SQL_BIND_TEXT(db, s, j, ep->substr); |
266 |
|
|
if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits)) |
267 |
|
|
SQL_BIND_INT64(db, s, j, ep->bits); |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
mandoc_ohash_init(&htab, 4, offsetof(struct match, pageid)); |
271 |
|
|
|
272 |
|
|
/* |
273 |
|
|
* Hash each entry on its [unique] document identifier. |
274 |
|
|
* This is a uint64_t. |
275 |
|
|
* Instead of using a hash function, simply convert the |
276 |
|
|
* uint64_t to a uint32_t, the hash value's type. |
277 |
|
|
* This gives good performance and preserves the |
278 |
|
|
* distribution of buckets in the table. |
279 |
|
|
*/ |
280 |
|
|
while (SQLITE_ROW == (c = sqlite3_step(s))) { |
281 |
|
|
pageid = sqlite3_column_int64(s, 2); |
282 |
|
|
idx = ohash_lookup_memory(&htab, |
283 |
|
|
(char *)&pageid, sizeof(uint64_t), |
284 |
|
|
(uint32_t)pageid); |
285 |
|
|
|
286 |
|
|
if (NULL != ohash_find(&htab, idx)) |
287 |
|
|
continue; |
288 |
|
|
|
289 |
|
|
mp = mandoc_calloc(1, sizeof(struct match)); |
290 |
|
|
mp->pageid = pageid; |
291 |
|
|
mp->form = sqlite3_column_int(s, 1); |
292 |
|
|
mp->bits = sqlite3_column_int64(s, 3); |
293 |
|
|
if (TYPE_Nd == outbit) |
294 |
|
|
mp->desc = mandoc_strdup((const char *) |
295 |
|
|
sqlite3_column_text(s, 0)); |
296 |
|
|
ohash_insert(&htab, idx, mp); |
297 |
|
|
} |
298 |
|
|
|
299 |
|
|
if (SQLITE_DONE != c) |
300 |
|
|
warnx("%s", sqlite3_errmsg(db)); |
301 |
|
|
|
302 |
|
|
sqlite3_finalize(s); |
303 |
|
|
|
304 |
|
|
c = sqlite3_prepare_v2(db, |
305 |
|
|
"SELECT sec, arch, name, pageid FROM mlinks " |
306 |
|
|
"WHERE pageid=? ORDER BY sec, arch, name", |
307 |
|
|
-1, &s, NULL); |
308 |
|
|
if (SQLITE_OK != c) |
309 |
|
|
errx((int)MANDOCLEVEL_SYSERR, |
310 |
|
|
"%s", sqlite3_errmsg(db)); |
311 |
|
|
|
312 |
|
|
c = sqlite3_prepare_v2(db, |
313 |
|
|
"SELECT bits, key, pageid FROM keys " |
314 |
|
|
"WHERE pageid=? AND bits & ?", |
315 |
|
|
-1, &s2, NULL); |
316 |
|
|
if (SQLITE_OK != c) |
317 |
|
|
errx((int)MANDOCLEVEL_SYSERR, |
318 |
|
|
"%s", sqlite3_errmsg(db)); |
319 |
|
|
|
320 |
|
|
for (mp = ohash_first(&htab, &idx); |
321 |
|
|
NULL != mp; |
322 |
|
|
mp = ohash_next(&htab, &idx)) { |
323 |
|
|
if (cur + 1 > maxres) { |
324 |
|
|
maxres += 1024; |
325 |
|
|
*res = mandoc_reallocarray(*res, |
326 |
|
|
maxres, sizeof(struct manpage)); |
327 |
|
|
} |
328 |
|
|
mpage = *res + cur; |
329 |
|
|
mpage->ipath = i; |
330 |
|
|
mpage->bits = mp->bits; |
331 |
|
|
mpage->sec = 10; |
332 |
|
|
mpage->form = mp->form; |
333 |
|
|
buildnames(search, mpage, db, s, mp->pageid, |
334 |
|
|
paths->paths[i], mp->form); |
335 |
|
|
if (mpage->names != NULL) { |
336 |
|
|
mpage->output = TYPE_Nd & outbit ? |
337 |
|
|
mp->desc : outbit ? |
338 |
|
|
buildoutput(db, s2, mp->pageid, outbit) : |
339 |
|
|
NULL; |
340 |
|
|
cur++; |
341 |
|
|
} |
342 |
|
|
free(mp); |
343 |
|
|
} |
344 |
|
|
|
345 |
|
|
sqlite3_finalize(s); |
346 |
|
|
sqlite3_finalize(s2); |
347 |
|
|
sqlite3_close(db); |
348 |
|
|
ohash_delete(&htab); |
349 |
|
|
|
350 |
|
|
/* |
351 |
|
|
* In man(1) mode, prefer matches in earlier trees |
352 |
|
|
* over matches in later trees. |
353 |
|
|
*/ |
354 |
|
|
|
355 |
|
|
if (cur && search->firstmatch) |
356 |
|
|
break; |
357 |
|
|
} |
358 |
|
|
qsort(*res, cur, sizeof(struct manpage), manpage_compare); |
359 |
|
|
if (chdir_status && getcwd_status && chdir(buf) == -1) |
360 |
|
|
warn("%s", buf); |
361 |
|
|
exprfree(e); |
362 |
|
|
free(sql); |
363 |
|
|
*sz = cur; |
364 |
|
|
return 1; |
365 |
|
|
} |
366 |
|
|
|
367 |
|
|
void |
368 |
|
|
mansearch_free(struct manpage *res, size_t sz) |
369 |
|
|
{ |
370 |
|
|
size_t i; |
371 |
|
|
|
372 |
|
|
for (i = 0; i < sz; i++) { |
373 |
|
|
free(res[i].file); |
374 |
|
|
free(res[i].names); |
375 |
|
|
free(res[i].output); |
376 |
|
|
} |
377 |
|
|
free(res); |
378 |
|
|
} |
379 |
|
|
|
380 |
|
|
static int |
381 |
|
|
manpage_compare(const void *vp1, const void *vp2) |
382 |
|
|
{ |
383 |
|
|
const struct manpage *mp1, *mp2; |
384 |
|
|
int diff; |
385 |
|
|
|
386 |
|
|
mp1 = vp1; |
387 |
|
|
mp2 = vp2; |
388 |
|
|
return (diff = mp2->bits - mp1->bits) ? diff : |
389 |
|
|
(diff = mp1->sec - mp2->sec) ? diff : |
390 |
|
|
strcasecmp(mp1->names, mp2->names); |
391 |
|
|
} |
392 |
|
|
|
393 |
|
|
static void |
394 |
|
|
buildnames(const struct mansearch *search, struct manpage *mpage, |
395 |
|
|
sqlite3 *db, sqlite3_stmt *s, |
396 |
|
|
uint64_t pageid, const char *path, int form) |
397 |
|
|
{ |
398 |
|
|
glob_t globinfo; |
399 |
|
|
char *firstname, *newnames, *prevsec, *prevarch; |
400 |
|
|
const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec; |
401 |
|
|
size_t i; |
402 |
|
|
int c, globres; |
403 |
|
|
|
404 |
|
|
mpage->file = NULL; |
405 |
|
|
mpage->names = NULL; |
406 |
|
|
firstname = prevsec = prevarch = NULL; |
407 |
|
|
i = 1; |
408 |
|
|
SQL_BIND_INT64(db, s, i, pageid); |
409 |
|
|
while (SQLITE_ROW == (c = sqlite3_step(s))) { |
410 |
|
|
|
411 |
|
|
/* Decide whether we already have some names. */ |
412 |
|
|
|
413 |
|
|
if (NULL == mpage->names) { |
414 |
|
|
oldnames = ""; |
415 |
|
|
sep1 = ""; |
416 |
|
|
} else { |
417 |
|
|
oldnames = mpage->names; |
418 |
|
|
sep1 = ", "; |
419 |
|
|
} |
420 |
|
|
|
421 |
|
|
/* Fetch the next name, rejecting sec/arch mismatches. */ |
422 |
|
|
|
423 |
|
|
sec = (const char *)sqlite3_column_text(s, 0); |
424 |
|
|
if (search->sec != NULL && strcasecmp(sec, search->sec)) |
425 |
|
|
continue; |
426 |
|
|
arch = (const char *)sqlite3_column_text(s, 1); |
427 |
|
|
if (search->arch != NULL && *arch != '\0' && |
428 |
|
|
strcasecmp(arch, search->arch)) |
429 |
|
|
continue; |
430 |
|
|
name = (const char *)sqlite3_column_text(s, 2); |
431 |
|
|
|
432 |
|
|
/* Remember the first section found. */ |
433 |
|
|
|
434 |
|
|
if (9 < mpage->sec && '1' <= *sec && '9' >= *sec) |
435 |
|
|
mpage->sec = (*sec - '1') + 1; |
436 |
|
|
|
437 |
|
|
/* If the section changed, append the old one. */ |
438 |
|
|
|
439 |
|
|
if (NULL != prevsec && |
440 |
|
|
(strcmp(sec, prevsec) || |
441 |
|
|
strcmp(arch, prevarch))) { |
442 |
|
|
sep2 = '\0' == *prevarch ? "" : "/"; |
443 |
|
|
mandoc_asprintf(&newnames, "%s(%s%s%s)", |
444 |
|
|
oldnames, prevsec, sep2, prevarch); |
445 |
|
|
free(mpage->names); |
446 |
|
|
oldnames = mpage->names = newnames; |
447 |
|
|
free(prevsec); |
448 |
|
|
free(prevarch); |
449 |
|
|
prevsec = prevarch = NULL; |
450 |
|
|
} |
451 |
|
|
|
452 |
|
|
/* Save the new section, to append it later. */ |
453 |
|
|
|
454 |
|
|
if (NULL == prevsec) { |
455 |
|
|
prevsec = mandoc_strdup(sec); |
456 |
|
|
prevarch = mandoc_strdup(arch); |
457 |
|
|
} |
458 |
|
|
|
459 |
|
|
/* Append the new name. */ |
460 |
|
|
|
461 |
|
|
mandoc_asprintf(&newnames, "%s%s%s", |
462 |
|
|
oldnames, sep1, name); |
463 |
|
|
free(mpage->names); |
464 |
|
|
mpage->names = newnames; |
465 |
|
|
|
466 |
|
|
/* Also save the first file name encountered. */ |
467 |
|
|
|
468 |
|
|
if (mpage->file != NULL) |
469 |
|
|
continue; |
470 |
|
|
|
471 |
|
|
if (form & FORM_SRC) { |
472 |
|
|
sep1 = "man"; |
473 |
|
|
fsec = sec; |
474 |
|
|
} else { |
475 |
|
|
sep1 = "cat"; |
476 |
|
|
fsec = "0"; |
477 |
|
|
} |
478 |
|
|
sep2 = *arch == '\0' ? "" : "/"; |
479 |
|
|
mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s", |
480 |
|
|
path, sep1, sec, sep2, arch, name, fsec); |
481 |
|
|
if (access(mpage->file, R_OK) != -1) |
482 |
|
|
continue; |
483 |
|
|
|
484 |
|
|
/* Handle unusual file name extensions. */ |
485 |
|
|
|
486 |
|
|
if (firstname == NULL) |
487 |
|
|
firstname = mpage->file; |
488 |
|
|
else |
489 |
|
|
free(mpage->file); |
490 |
|
|
mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.*", |
491 |
|
|
path, sep1, sec, sep2, arch, name); |
492 |
|
|
globres = glob(mpage->file, 0, NULL, &globinfo); |
493 |
|
|
free(mpage->file); |
494 |
|
|
mpage->file = globres ? NULL : |
495 |
|
|
mandoc_strdup(*globinfo.gl_pathv); |
496 |
|
|
globfree(&globinfo); |
497 |
|
|
} |
498 |
|
|
if (c != SQLITE_DONE) |
499 |
|
|
warnx("%s", sqlite3_errmsg(db)); |
500 |
|
|
sqlite3_reset(s); |
501 |
|
|
|
502 |
|
|
/* If none of the files is usable, use the first name. */ |
503 |
|
|
|
504 |
|
|
if (mpage->file == NULL) |
505 |
|
|
mpage->file = firstname; |
506 |
|
|
else if (mpage->file != firstname) |
507 |
|
|
free(firstname); |
508 |
|
|
|
509 |
|
|
/* Append one final section to the names. */ |
510 |
|
|
|
511 |
|
|
if (prevsec != NULL) { |
512 |
|
|
sep2 = *prevarch == '\0' ? "" : "/"; |
513 |
|
|
mandoc_asprintf(&newnames, "%s(%s%s%s)", |
514 |
|
|
mpage->names, prevsec, sep2, prevarch); |
515 |
|
|
free(mpage->names); |
516 |
|
|
mpage->names = newnames; |
517 |
|
|
free(prevsec); |
518 |
|
|
free(prevarch); |
519 |
|
|
} |
520 |
|
|
} |
521 |
|
|
|
522 |
|
|
static char * |
523 |
|
|
buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit) |
524 |
|
|
{ |
525 |
|
|
char *output, *newoutput; |
526 |
|
|
const char *oldoutput, *sep1, *data; |
527 |
|
|
size_t i; |
528 |
|
|
int c; |
529 |
|
|
|
530 |
|
|
output = NULL; |
531 |
|
|
i = 1; |
532 |
|
|
SQL_BIND_INT64(db, s, i, pageid); |
533 |
|
|
SQL_BIND_INT64(db, s, i, outbit); |
534 |
|
|
while (SQLITE_ROW == (c = sqlite3_step(s))) { |
535 |
|
|
if (NULL == output) { |
536 |
|
|
oldoutput = ""; |
537 |
|
|
sep1 = ""; |
538 |
|
|
} else { |
539 |
|
|
oldoutput = output; |
540 |
|
|
sep1 = " # "; |
541 |
|
|
} |
542 |
|
|
data = (const char *)sqlite3_column_text(s, 1); |
543 |
|
|
mandoc_asprintf(&newoutput, "%s%s%s", |
544 |
|
|
oldoutput, sep1, data); |
545 |
|
|
free(output); |
546 |
|
|
output = newoutput; |
547 |
|
|
} |
548 |
|
|
if (SQLITE_DONE != c) |
549 |
|
|
warnx("%s", sqlite3_errmsg(db)); |
550 |
|
|
sqlite3_reset(s); |
551 |
|
|
return output; |
552 |
|
|
} |
553 |
|
|
|
554 |
|
|
/* |
555 |
|
|
* Implement substring match as an application-defined SQL function. |
556 |
|
|
* Using the SQL LIKE or GLOB operators instead would be a bad idea |
557 |
|
|
* because that would require escaping metacharacters in the string |
558 |
|
|
* being searched for. |
559 |
|
|
*/ |
560 |
|
|
static void |
561 |
|
|
sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) |
562 |
|
|
{ |
563 |
|
|
|
564 |
|
|
assert(2 == argc); |
565 |
|
|
sqlite3_result_int(context, NULL != strcasestr( |
566 |
|
|
(const char *)sqlite3_value_text(argv[1]), |
567 |
|
|
(const char *)sqlite3_value_text(argv[0]))); |
568 |
|
|
} |
569 |
|
|
|
570 |
|
|
/* |
571 |
|
|
* Implement regular expression match |
572 |
|
|
* as an application-defined SQL function. |
573 |
|
|
*/ |
574 |
|
|
static void |
575 |
|
|
sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) |
576 |
|
|
{ |
577 |
|
|
|
578 |
|
|
assert(2 == argc); |
579 |
|
|
sqlite3_result_int(context, !regexec( |
580 |
|
|
(regex_t *)sqlite3_value_blob(argv[0]), |
581 |
|
|
(const char *)sqlite3_value_text(argv[1]), |
582 |
|
|
0, NULL, 0)); |
583 |
|
|
} |
584 |
|
|
|
585 |
|
|
static void |
586 |
|
|
sql_append(char **sql, size_t *sz, const char *newstr, int count) |
587 |
|
|
{ |
588 |
|
|
size_t newsz; |
589 |
|
|
|
590 |
|
|
newsz = 1 < count ? (size_t)count : strlen(newstr); |
591 |
|
|
*sql = mandoc_realloc(*sql, *sz + newsz + 1); |
592 |
|
|
if (1 < count) |
593 |
|
|
memset(*sql + *sz, *newstr, (size_t)count); |
594 |
|
|
else |
595 |
|
|
memcpy(*sql + *sz, newstr, newsz); |
596 |
|
|
*sz += newsz; |
597 |
|
|
(*sql)[*sz] = '\0'; |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
/* |
601 |
|
|
* Prepare the search SQL statement. |
602 |
|
|
*/ |
603 |
|
|
static char * |
604 |
|
|
sql_statement(const struct expr *e) |
605 |
|
|
{ |
606 |
|
|
char *sql; |
607 |
|
|
size_t sz; |
608 |
|
|
int needop; |
609 |
|
|
|
610 |
|
|
sql = mandoc_strdup(e->equal ? |
611 |
|
|
"SELECT desc, form, pageid, bits " |
612 |
|
|
"FROM mpages NATURAL JOIN names WHERE " : |
613 |
|
|
"SELECT desc, form, pageid, 0 FROM mpages WHERE "); |
614 |
|
|
sz = strlen(sql); |
615 |
|
|
|
616 |
|
|
for (needop = 0; NULL != e; e = e->next) { |
617 |
|
|
if (e->and) |
618 |
|
|
sql_append(&sql, &sz, " AND ", 1); |
619 |
|
|
else if (needop) |
620 |
|
|
sql_append(&sql, &sz, " OR ", 1); |
621 |
|
|
if (e->open) |
622 |
|
|
sql_append(&sql, &sz, "(", e->open); |
623 |
|
|
sql_append(&sql, &sz, |
624 |
|
|
TYPE_Nd & e->bits |
625 |
|
|
? (NULL == e->substr |
626 |
|
|
? "desc REGEXP ?" |
627 |
|
|
: "desc MATCH ?") |
628 |
|
|
: TYPE_Nm == e->bits |
629 |
|
|
? (NULL == e->substr |
630 |
|
|
? "pageid IN (SELECT pageid FROM names " |
631 |
|
|
"WHERE name REGEXP ?)" |
632 |
|
|
: e->equal |
633 |
|
|
? "name = ? " |
634 |
|
|
: "pageid IN (SELECT pageid FROM names " |
635 |
|
|
"WHERE name MATCH ?)") |
636 |
|
|
: (NULL == e->substr |
637 |
|
|
? "pageid IN (SELECT pageid FROM keys " |
638 |
|
|
"WHERE key REGEXP ? AND bits & ?)" |
639 |
|
|
: "pageid IN (SELECT pageid FROM keys " |
640 |
|
|
"WHERE key MATCH ? AND bits & ?)"), 1); |
641 |
|
|
if (e->close) |
642 |
|
|
sql_append(&sql, &sz, ")", e->close); |
643 |
|
|
needop = 1; |
644 |
|
|
} |
645 |
|
|
|
646 |
|
|
return sql; |
647 |
|
|
} |
648 |
|
|
|
649 |
|
|
/* |
650 |
|
|
* Compile a set of string tokens into an expression. |
651 |
|
|
* Tokens in "argv" are assumed to be individual expression atoms (e.g., |
652 |
|
|
* "(", "foo=bar", etc.). |
653 |
|
|
*/ |
654 |
|
|
static struct expr * |
655 |
|
|
exprcomp(const struct mansearch *search, int argc, char *argv[]) |
656 |
|
|
{ |
657 |
|
|
uint64_t mask; |
658 |
|
|
int i, toopen, logic, igncase, toclose; |
659 |
|
|
struct expr *first, *prev, *cur, *next; |
660 |
|
|
|
661 |
|
|
first = cur = NULL; |
662 |
|
|
logic = igncase = toopen = toclose = 0; |
663 |
|
|
|
664 |
|
|
for (i = 0; i < argc; i++) { |
665 |
|
|
if (0 == strcmp("(", argv[i])) { |
666 |
|
|
if (igncase) |
667 |
|
|
goto fail; |
668 |
|
|
toopen++; |
669 |
|
|
toclose++; |
670 |
|
|
continue; |
671 |
|
|
} else if (0 == strcmp(")", argv[i])) { |
672 |
|
|
if (toopen || logic || igncase || NULL == cur) |
673 |
|
|
goto fail; |
674 |
|
|
cur->close++; |
675 |
|
|
if (0 > --toclose) |
676 |
|
|
goto fail; |
677 |
|
|
continue; |
678 |
|
|
} else if (0 == strcmp("-a", argv[i])) { |
679 |
|
|
if (toopen || logic || igncase || NULL == cur) |
680 |
|
|
goto fail; |
681 |
|
|
logic = 1; |
682 |
|
|
continue; |
683 |
|
|
} else if (0 == strcmp("-o", argv[i])) { |
684 |
|
|
if (toopen || logic || igncase || NULL == cur) |
685 |
|
|
goto fail; |
686 |
|
|
logic = 2; |
687 |
|
|
continue; |
688 |
|
|
} else if (0 == strcmp("-i", argv[i])) { |
689 |
|
|
if (igncase) |
690 |
|
|
goto fail; |
691 |
|
|
igncase = 1; |
692 |
|
|
continue; |
693 |
|
|
} |
694 |
|
|
next = exprterm(search, argv[i], !igncase); |
695 |
|
|
if (NULL == next) |
696 |
|
|
goto fail; |
697 |
|
|
if (NULL == first) |
698 |
|
|
first = next; |
699 |
|
|
else |
700 |
|
|
cur->next = next; |
701 |
|
|
prev = cur = next; |
702 |
|
|
|
703 |
|
|
/* |
704 |
|
|
* Searching for descriptions must be split out |
705 |
|
|
* because they are stored in the mpages table, |
706 |
|
|
* not in the keys table. |
707 |
|
|
*/ |
708 |
|
|
|
709 |
|
|
for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) { |
710 |
|
|
if (mask & cur->bits && ~mask & cur->bits) { |
711 |
|
|
next = mandoc_calloc(1, |
712 |
|
|
sizeof(struct expr)); |
713 |
|
|
memcpy(next, cur, sizeof(struct expr)); |
714 |
|
|
prev->open = 1; |
715 |
|
|
cur->bits = mask; |
716 |
|
|
cur->next = next; |
717 |
|
|
cur = next; |
718 |
|
|
cur->bits &= ~mask; |
719 |
|
|
} |
720 |
|
|
} |
721 |
|
|
prev->and = (1 == logic); |
722 |
|
|
prev->open += toopen; |
723 |
|
|
if (cur != prev) |
724 |
|
|
cur->close = 1; |
725 |
|
|
|
726 |
|
|
toopen = logic = igncase = 0; |
727 |
|
|
} |
728 |
|
|
if ( ! (toopen || logic || igncase || toclose)) |
729 |
|
|
return first; |
730 |
|
|
|
731 |
|
|
fail: |
732 |
|
|
if (NULL != first) |
733 |
|
|
exprfree(first); |
734 |
|
|
return NULL; |
735 |
|
|
} |
736 |
|
|
|
737 |
|
|
static struct expr * |
738 |
|
|
exprterm(const struct mansearch *search, char *buf, int cs) |
739 |
|
|
{ |
740 |
|
|
char errbuf[BUFSIZ]; |
741 |
|
|
struct expr *e; |
742 |
|
|
char *key, *val; |
743 |
|
|
uint64_t iterbit; |
744 |
|
|
int i, irc; |
745 |
|
|
|
746 |
|
|
if ('\0' == *buf) |
747 |
|
|
return NULL; |
748 |
|
|
|
749 |
|
|
e = mandoc_calloc(1, sizeof(struct expr)); |
750 |
|
|
|
751 |
|
|
if (search->argmode == ARG_NAME) { |
752 |
|
|
e->bits = TYPE_Nm; |
753 |
|
|
e->substr = buf; |
754 |
|
|
e->equal = 1; |
755 |
|
|
return e; |
756 |
|
|
} |
757 |
|
|
|
758 |
|
|
/* |
759 |
|
|
* Separate macro keys from search string. |
760 |
|
|
* If needed, request regular expression handling |
761 |
|
|
* by setting e->substr to NULL. |
762 |
|
|
*/ |
763 |
|
|
|
764 |
|
|
if (search->argmode == ARG_WORD) { |
765 |
|
|
e->bits = TYPE_Nm; |
766 |
|
|
e->substr = NULL; |
767 |
|
|
mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf); |
768 |
|
|
cs = 0; |
769 |
|
|
} else if ((val = strpbrk(buf, "=~")) == NULL) { |
770 |
|
|
e->bits = TYPE_Nm | TYPE_Nd; |
771 |
|
|
e->substr = buf; |
772 |
|
|
} else { |
773 |
|
|
if (val == buf) |
774 |
|
|
e->bits = TYPE_Nm | TYPE_Nd; |
775 |
|
|
if ('=' == *val) |
776 |
|
|
e->substr = val + 1; |
777 |
|
|
*val++ = '\0'; |
778 |
|
|
if (NULL != strstr(buf, "arch")) |
779 |
|
|
cs = 0; |
780 |
|
|
} |
781 |
|
|
|
782 |
|
|
/* Compile regular expressions. */ |
783 |
|
|
|
784 |
|
|
if (NULL == e->substr) { |
785 |
|
|
irc = regcomp(&e->regexp, val, |
786 |
|
|
REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)); |
787 |
|
|
if (search->argmode == ARG_WORD) |
788 |
|
|
free(val); |
789 |
|
|
if (irc) { |
790 |
|
|
regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); |
791 |
|
|
warnx("regcomp: %s", errbuf); |
792 |
|
|
free(e); |
793 |
|
|
return NULL; |
794 |
|
|
} |
795 |
|
|
} |
796 |
|
|
|
797 |
|
|
if (e->bits) |
798 |
|
|
return e; |
799 |
|
|
|
800 |
|
|
/* |
801 |
|
|
* Parse out all possible fields. |
802 |
|
|
* If the field doesn't resolve, bail. |
803 |
|
|
*/ |
804 |
|
|
|
805 |
|
|
while (NULL != (key = strsep(&buf, ","))) { |
806 |
|
|
if ('\0' == *key) |
807 |
|
|
continue; |
808 |
|
|
for (i = 0, iterbit = 1; |
809 |
|
|
i < mansearch_keymax; |
810 |
|
|
i++, iterbit <<= 1) { |
811 |
|
|
if (0 == strcasecmp(key, |
812 |
|
|
mansearch_keynames[i])) { |
813 |
|
|
e->bits |= iterbit; |
814 |
|
|
break; |
815 |
|
|
} |
816 |
|
|
} |
817 |
|
|
if (i == mansearch_keymax) { |
818 |
|
|
if (strcasecmp(key, "any")) { |
819 |
|
|
free(e); |
820 |
|
|
return NULL; |
821 |
|
|
} |
822 |
|
|
e->bits |= ~0ULL; |
823 |
|
|
} |
824 |
|
|
} |
825 |
|
|
|
826 |
|
|
return e; |
827 |
|
|
} |
828 |
|
|
|
829 |
|
|
static void |
830 |
|
|
exprfree(struct expr *p) |
831 |
|
|
{ |
832 |
|
|
struct expr *pp; |
833 |
|
|
|
834 |
|
|
while (NULL != p) { |
835 |
|
|
pp = p->next; |
836 |
|
|
free(p); |
837 |
|
|
p = pp; |
838 |
|
|
} |
839 |
|
|
} |