GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/mandocdb.c Lines: 608 1003 60.6 %
Date: 2017-11-07 Branches: 354 774 45.7 %

Line Branch Exec Source
1
/*	$OpenBSD: mandocdb.c,v 1.205 2017/08/26 20:38:09 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2011-2017 Ingo Schwarze <schwarze@openbsd.org>
5
 * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
 */
19
#include <sys/types.h>
20
#include <sys/mman.h>
21
#include <sys/stat.h>
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <err.h>
26
#include <errno.h>
27
#include <fcntl.h>
28
#include <fts.h>
29
#include <limits.h>
30
#include <stdarg.h>
31
#include <stddef.h>
32
#include <stdio.h>
33
#include <stdint.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <unistd.h>
37
38
#include "mandoc_aux.h"
39
#include "mandoc_ohash.h"
40
#include "mandoc.h"
41
#include "roff.h"
42
#include "mdoc.h"
43
#include "man.h"
44
#include "manconf.h"
45
#include "mansearch.h"
46
#include "dba_array.h"
47
#include "dba.h"
48
49
extern const char *const mansearch_keynames[];
50
51
enum	op {
52
	OP_DEFAULT = 0, /* new dbs from dir list or default config */
53
	OP_CONFFILE, /* new databases from custom config file */
54
	OP_UPDATE, /* delete/add entries in existing database */
55
	OP_DELETE, /* delete entries from existing database */
56
	OP_TEST /* change no databases, report potential problems */
57
};
58
59
struct	str {
60
	const struct mpage *mpage; /* if set, the owning parse */
61
	uint64_t	 mask; /* bitmask in sequence */
62
	char		 key[]; /* rendered text */
63
};
64
65
struct	inodev {
66
	ino_t		 st_ino;
67
	dev_t		 st_dev;
68
};
69
70
struct	mpage {
71
	struct inodev	 inodev;  /* used for hashing routine */
72
	struct dba_array *dba;
73
	char		*sec;     /* section from file content */
74
	char		*arch;    /* architecture from file content */
75
	char		*title;   /* title from file content */
76
	char		*desc;    /* description from file content */
77
	struct mpage	*next;    /* singly linked list */
78
	struct mlink	*mlinks;  /* singly linked list */
79
	int		 name_head_done;
80
	enum form	 form;    /* format from file content */
81
};
82
83
struct	mlink {
84
	char		 file[PATH_MAX]; /* filename rel. to manpath */
85
	char		*dsec;    /* section from directory */
86
	char		*arch;    /* architecture from directory */
87
	char		*name;    /* name from file name (not empty) */
88
	char		*fsec;    /* section from file name suffix */
89
	struct mlink	*next;    /* singly linked list */
90
	struct mpage	*mpage;   /* parent */
91
	int		 gzip;	  /* filename has a .gz suffix */
92
	enum form	 dform;   /* format from directory */
93
	enum form	 fform;   /* format from file name suffix */
94
};
95
96
typedef	int (*mdoc_fp)(struct mpage *, const struct roff_meta *,
97
			const struct roff_node *);
98
99
struct	mdoc_handler {
100
	mdoc_fp		 fp; /* optional handler */
101
	uint64_t	 mask;  /* set unless handler returns 0 */
102
	int		 taboo;  /* node flags that must not be set */
103
};
104
105
106
int		 mandocdb(int, char *[]);
107
108
static	void	 dbadd(struct dba *, struct mpage *);
109
static	void	 dbadd_mlink(const struct mlink *mlink);
110
static	void	 dbprune(struct dba *);
111
static	void	 dbwrite(struct dba *);
112
static	void	 filescan(const char *);
113
static	int	 fts_compare(const FTSENT **, const FTSENT **);
114
static	void	 mlink_add(struct mlink *, const struct stat *);
115
static	void	 mlink_check(struct mpage *, struct mlink *);
116
static	void	 mlink_free(struct mlink *);
117
static	void	 mlinks_undupe(struct mpage *);
118
static	void	 mpages_free(void);
119
static	void	 mpages_merge(struct dba *, struct mparse *);
120
static	void	 parse_cat(struct mpage *, int);
121
static	void	 parse_man(struct mpage *, const struct roff_meta *,
122
			const struct roff_node *);
123
static	void	 parse_mdoc(struct mpage *, const struct roff_meta *,
124
			const struct roff_node *);
125
static	int	 parse_mdoc_head(struct mpage *, const struct roff_meta *,
126
			const struct roff_node *);
127
static	int	 parse_mdoc_Fd(struct mpage *, const struct roff_meta *,
128
			const struct roff_node *);
129
static	void	 parse_mdoc_fname(struct mpage *, const struct roff_node *);
130
static	int	 parse_mdoc_Fn(struct mpage *, const struct roff_meta *,
131
			const struct roff_node *);
132
static	int	 parse_mdoc_Fo(struct mpage *, const struct roff_meta *,
133
			const struct roff_node *);
134
static	int	 parse_mdoc_Nd(struct mpage *, const struct roff_meta *,
135
			const struct roff_node *);
136
static	int	 parse_mdoc_Nm(struct mpage *, const struct roff_meta *,
137
			const struct roff_node *);
138
static	int	 parse_mdoc_Sh(struct mpage *, const struct roff_meta *,
139
			const struct roff_node *);
140
static	int	 parse_mdoc_Va(struct mpage *, const struct roff_meta *,
141
			const struct roff_node *);
142
static	int	 parse_mdoc_Xr(struct mpage *, const struct roff_meta *,
143
			const struct roff_node *);
144
static	void	 putkey(const struct mpage *, char *, uint64_t);
145
static	void	 putkeys(const struct mpage *, char *, size_t, uint64_t);
146
static	void	 putmdockey(const struct mpage *,
147
			const struct roff_node *, uint64_t, int);
148
static	int	 render_string(char **, size_t *);
149
static	void	 say(const char *, const char *, ...)
150
			__attribute__((__format__ (__printf__, 2, 3)));
151
static	int	 set_basedir(const char *, int);
152
static	int	 treescan(void);
153
static	size_t	 utf8(unsigned int, char [7]);
154
155
static	int		 nodb; /* no database changes */
156
static	int		 mparse_options; /* abort the parse early */
157
static	int		 use_all; /* use all found files */
158
static	int		 debug; /* print what we're doing */
159
static	int		 warnings; /* warn about crap */
160
static	int		 write_utf8; /* write UTF-8 output; else ASCII */
161
static	int		 exitcode; /* to be returned by main */
162
static	enum op		 op; /* operational mode */
163
static	char		 basedir[PATH_MAX]; /* current base directory */
164
static	struct mpage	*mpage_head; /* list of distinct manual pages */
165
static	struct ohash	 mpages; /* table of distinct manual pages */
166
static	struct ohash	 mlinks; /* table of directory entries */
167
static	struct ohash	 names; /* table of all names */
168
static	struct ohash	 strings; /* table of all strings */
169
static	uint64_t	 name_mask;
170
171
static	const struct mdoc_handler __mdocs[MDOC_MAX - MDOC_Dd] = {
172
	{ NULL, 0, NODE_NOPRT },  /* Dd */
173
	{ NULL, 0, NODE_NOPRT },  /* Dt */
174
	{ NULL, 0, NODE_NOPRT },  /* Os */
175
	{ parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */
176
	{ parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */
177
	{ NULL, 0, 0 },  /* Pp */
178
	{ NULL, 0, 0 },  /* D1 */
179
	{ NULL, 0, 0 },  /* Dl */
180
	{ NULL, 0, 0 },  /* Bd */
181
	{ NULL, 0, 0 },  /* Ed */
182
	{ NULL, 0, 0 },  /* Bl */
183
	{ NULL, 0, 0 },  /* El */
184
	{ NULL, 0, 0 },  /* It */
185
	{ NULL, 0, 0 },  /* Ad */
186
	{ NULL, TYPE_An, 0 },  /* An */
187
	{ NULL, 0, 0 },  /* Ap */
188
	{ NULL, TYPE_Ar, 0 },  /* Ar */
189
	{ NULL, TYPE_Cd, 0 },  /* Cd */
190
	{ NULL, TYPE_Cm, 0 },  /* Cm */
191
	{ NULL, TYPE_Dv, 0 },  /* Dv */
192
	{ NULL, TYPE_Er, 0 },  /* Er */
193
	{ NULL, TYPE_Ev, 0 },  /* Ev */
194
	{ NULL, 0, 0 },  /* Ex */
195
	{ NULL, TYPE_Fa, 0 },  /* Fa */
196
	{ parse_mdoc_Fd, 0, 0 },  /* Fd */
197
	{ NULL, TYPE_Fl, 0 },  /* Fl */
198
	{ parse_mdoc_Fn, 0, 0 },  /* Fn */
199
	{ NULL, TYPE_Ft, 0 },  /* Ft */
200
	{ NULL, TYPE_Ic, 0 },  /* Ic */
201
	{ NULL, TYPE_In, 0 },  /* In */
202
	{ NULL, TYPE_Li, 0 },  /* Li */
203
	{ parse_mdoc_Nd, 0, 0 },  /* Nd */
204
	{ parse_mdoc_Nm, 0, 0 },  /* Nm */
205
	{ NULL, 0, 0 },  /* Op */
206
	{ NULL, 0, 0 },  /* Ot */
207
	{ NULL, TYPE_Pa, NODE_NOSRC },  /* Pa */
208
	{ NULL, 0, 0 },  /* Rv */
209
	{ NULL, TYPE_St, 0 },  /* St */
210
	{ parse_mdoc_Va, TYPE_Va, 0 },  /* Va */
211
	{ parse_mdoc_Va, TYPE_Vt, 0 },  /* Vt */
212
	{ parse_mdoc_Xr, 0, 0 },  /* Xr */
213
	{ NULL, 0, 0 },  /* %A */
214
	{ NULL, 0, 0 },  /* %B */
215
	{ NULL, 0, 0 },  /* %D */
216
	{ NULL, 0, 0 },  /* %I */
217
	{ NULL, 0, 0 },  /* %J */
218
	{ NULL, 0, 0 },  /* %N */
219
	{ NULL, 0, 0 },  /* %O */
220
	{ NULL, 0, 0 },  /* %P */
221
	{ NULL, 0, 0 },  /* %R */
222
	{ NULL, 0, 0 },  /* %T */
223
	{ NULL, 0, 0 },  /* %V */
224
	{ NULL, 0, 0 },  /* Ac */
225
	{ NULL, 0, 0 },  /* Ao */
226
	{ NULL, 0, 0 },  /* Aq */
227
	{ NULL, TYPE_At, 0 },  /* At */
228
	{ NULL, 0, 0 },  /* Bc */
229
	{ NULL, 0, 0 },  /* Bf */
230
	{ NULL, 0, 0 },  /* Bo */
231
	{ NULL, 0, 0 },  /* Bq */
232
	{ NULL, TYPE_Bsx, NODE_NOSRC },  /* Bsx */
233
	{ NULL, TYPE_Bx, NODE_NOSRC },  /* Bx */
234
	{ NULL, 0, 0 },  /* Db */
235
	{ NULL, 0, 0 },  /* Dc */
236
	{ NULL, 0, 0 },  /* Do */
237
	{ NULL, 0, 0 },  /* Dq */
238
	{ NULL, 0, 0 },  /* Ec */
239
	{ NULL, 0, 0 },  /* Ef */
240
	{ NULL, TYPE_Em, 0 },  /* Em */
241
	{ NULL, 0, 0 },  /* Eo */
242
	{ NULL, TYPE_Fx, NODE_NOSRC },  /* Fx */
243
	{ NULL, TYPE_Ms, 0 },  /* Ms */
244
	{ NULL, 0, 0 },  /* No */
245
	{ NULL, 0, 0 },  /* Ns */
246
	{ NULL, TYPE_Nx, NODE_NOSRC },  /* Nx */
247
	{ NULL, TYPE_Ox, NODE_NOSRC },  /* Ox */
248
	{ NULL, 0, 0 },  /* Pc */
249
	{ NULL, 0, 0 },  /* Pf */
250
	{ NULL, 0, 0 },  /* Po */
251
	{ NULL, 0, 0 },  /* Pq */
252
	{ NULL, 0, 0 },  /* Qc */
253
	{ NULL, 0, 0 },  /* Ql */
254
	{ NULL, 0, 0 },  /* Qo */
255
	{ NULL, 0, 0 },  /* Qq */
256
	{ NULL, 0, 0 },  /* Re */
257
	{ NULL, 0, 0 },  /* Rs */
258
	{ NULL, 0, 0 },  /* Sc */
259
	{ NULL, 0, 0 },  /* So */
260
	{ NULL, 0, 0 },  /* Sq */
261
	{ NULL, 0, 0 },  /* Sm */
262
	{ NULL, 0, 0 },  /* Sx */
263
	{ NULL, TYPE_Sy, 0 },  /* Sy */
264
	{ NULL, TYPE_Tn, 0 },  /* Tn */
265
	{ NULL, 0, NODE_NOSRC },  /* Ux */
266
	{ NULL, 0, 0 },  /* Xc */
267
	{ NULL, 0, 0 },  /* Xo */
268
	{ parse_mdoc_Fo, 0, 0 },  /* Fo */
269
	{ NULL, 0, 0 },  /* Fc */
270
	{ NULL, 0, 0 },  /* Oo */
271
	{ NULL, 0, 0 },  /* Oc */
272
	{ NULL, 0, 0 },  /* Bk */
273
	{ NULL, 0, 0 },  /* Ek */
274
	{ NULL, 0, 0 },  /* Bt */
275
	{ NULL, 0, 0 },  /* Hf */
276
	{ NULL, 0, 0 },  /* Fr */
277
	{ NULL, 0, 0 },  /* Ud */
278
	{ NULL, TYPE_Lb, NODE_NOSRC },  /* Lb */
279
	{ NULL, 0, 0 },  /* Lp */
280
	{ NULL, TYPE_Lk, 0 },  /* Lk */
281
	{ NULL, TYPE_Mt, NODE_NOSRC },  /* Mt */
282
	{ NULL, 0, 0 },  /* Brq */
283
	{ NULL, 0, 0 },  /* Bro */
284
	{ NULL, 0, 0 },  /* Brc */
285
	{ NULL, 0, 0 },  /* %C */
286
	{ NULL, 0, 0 },  /* Es */
287
	{ NULL, 0, 0 },  /* En */
288
	{ NULL, TYPE_Dx, NODE_NOSRC },  /* Dx */
289
	{ NULL, 0, 0 },  /* %Q */
290
	{ NULL, 0, 0 },  /* %U */
291
	{ NULL, 0, 0 },  /* Ta */
292
};
293
static	const struct mdoc_handler *const mdocs = __mdocs - MDOC_Dd;
294
295
296
int
297
mandocdb(int argc, char *argv[])
298
{
299
192
	struct manconf	  conf;
300
	struct mparse	 *mp;
301
	struct dba	 *dba;
302
	const char	 *path_arg, *progname;
303
	size_t		  j, sz;
304
	int		  ch, i;
305
306
96
	if (pledge("stdio rpath wpath cpath flock", NULL) == -1) {
307
		warn("pledge");
308
		return (int)MANDOCLEVEL_SYSERR;
309
	}
310
311
96
	memset(&conf, 0, sizeof(conf));
312
313
	/*
314
	 * We accept a few different invocations.
315
	 * The CHECKOP macro makes sure that invocation styles don't
316
	 * clobber each other.
317
	 */
318
#define	CHECKOP(_op, _ch) do \
319
	if (OP_DEFAULT != (_op)) { \
320
		warnx("-%c: Conflicting option", (_ch)); \
321
		goto usage; \
322
	} while (/*CONSTCOND*/0)
323
324
	path_arg = NULL;
325
96
	op = OP_DEFAULT;
326
327
195
	while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")))
328



6
		switch (ch) {
329
		case 'a':
330
			use_all = 1;
331
			break;
332
		case 'C':
333
			CHECKOP(op, ch);
334
			path_arg = optarg;
335
			op = OP_CONFFILE;
336
			break;
337
		case 'D':
338
			debug++;
339
			break;
340
		case 'd':
341
3
			CHECKOP(op, ch);
342
3
			path_arg = optarg;
343
3
			op = OP_UPDATE;
344
3
			break;
345
		case 'n':
346
			nodb = 1;
347
			break;
348
		case 'p':
349
			warnings = 1;
350
			break;
351
		case 'Q':
352
			mparse_options |= MPARSE_QUICK;
353
			break;
354
		case 'T':
355
			if (strcmp(optarg, "utf8")) {
356
				warnx("-T%s: Unsupported output format",
357
				    optarg);
358
				goto usage;
359
			}
360
			write_utf8 = 1;
361
			break;
362
		case 't':
363
			CHECKOP(op, ch);
364
			dup2(STDOUT_FILENO, STDERR_FILENO);
365
			op = OP_TEST;
366
			nodb = warnings = 1;
367
			break;
368
		case 'u':
369
			CHECKOP(op, ch);
370
			path_arg = optarg;
371
			op = OP_DELETE;
372
			break;
373
		case 'v':
374
			/* Compatibility with espie@'s makewhatis. */
375
			break;
376
		default:
377
			goto usage;
378
		}
379
380
96
	argc -= optind;
381
96
	argv += optind;
382
383
96
	if (nodb) {
384
		if (pledge("stdio rpath flock cpath wpath", NULL) == -1) {
385
			warn("pledge");
386
			return (int)MANDOCLEVEL_SYSERR;
387
		}
388
	}
389
390
96
	if (OP_CONFFILE == op && argc > 0) {
391
		warnx("-C: Too many arguments");
392
		goto usage;
393
	}
394
395
96
	exitcode = (int)MANDOCLEVEL_OK;
396
96
	mchars_alloc();
397
96
	mp = mparse_alloc(mparse_options, MANDOCERR_MAX, NULL,
398
	    MANDOC_OS_OTHER, NULL);
399
96
	mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
400
96
	mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
401
402
96
	if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
403
404
		/*
405
		 * Most of these deal with a specific directory.
406
		 * Jump into that directory first.
407
		 */
408

6
		if (OP_TEST != op && 0 == set_basedir(path_arg, 1))
409
			goto out;
410
411
9
		dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
412
3
		if (dba != NULL) {
413
			/*
414
			 * The existing database is usable.  Process
415
			 * all files specified on the command-line.
416
			 */
417
3
			use_all = 1;
418
1136
			for (i = 0; i < argc; i++)
419
565
				filescan(argv[i]);
420
3
			if (nodb == 0)
421
3
				dbprune(dba);
422
		} else {
423
			/* Database missing or corrupt. */
424
			if (op != OP_UPDATE || errno != ENOENT)
425
				say(MANDOC_DB, "%s: Automatically recreating"
426
				    " from scratch", strerror(errno));
427
			exitcode = (int)MANDOCLEVEL_OK;
428
			op = OP_DEFAULT;
429
			if (0 == treescan())
430
				goto out;
431
			dba = dba_new(128);
432
		}
433
3
		if (OP_DELETE != op)
434
3
			mpages_merge(dba, mp);
435
3
		if (nodb == 0)
436
3
			dbwrite(dba);
437
3
		dba_free(dba);
438
3
	} else {
439
		/*
440
		 * If we have arguments, use them as our manpaths.
441
		 * If we don't, use man.conf(5).
442
		 */
443
93
		if (argc > 0) {
444
93
			conf.manpath.paths = mandoc_reallocarray(NULL,
445
93
			    argc, sizeof(char *));
446
93
			conf.manpath.sz = (size_t)argc;
447
372
			for (i = 0; i < argc; i++)
448
93
				conf.manpath.paths[i] = mandoc_strdup(argv[i]);
449
		} else
450
			manconf_parse(&conf, path_arg, NULL, NULL);
451
452
93
		if (conf.manpath.sz == 0) {
453
			exitcode = (int)MANDOCLEVEL_BADARG;
454
			say("", "Empty manpath");
455
		}
456
457
		/*
458
		 * First scan the tree rooted at a base directory, then
459
		 * build a new database and finally move it into place.
460
		 * Ignore zero-length directories and strip trailing
461
		 * slashes.
462
		 */
463
372
		for (j = 0; j < conf.manpath.sz; j++) {
464
93
			sz = strlen(conf.manpath.paths[j]);
465

186
			if (sz && conf.manpath.paths[j][sz - 1] == '/')
466
				conf.manpath.paths[j][--sz] = '\0';
467
93
			if (0 == sz)
468
				continue;
469
470
93
			if (j) {
471
				mandoc_ohash_init(&mpages, 6,
472
				    offsetof(struct mpage, inodev));
473
				mandoc_ohash_init(&mlinks, 6,
474
				    offsetof(struct mlink, file));
475
			}
476
477
93
			if ( ! set_basedir(conf.manpath.paths[j], argc > 0))
478
				continue;
479
93
			if (0 == treescan())
480
				continue;
481
93
			dba = dba_new(128);
482
93
			mpages_merge(dba, mp);
483
93
			if (nodb == 0)
484
93
				dbwrite(dba);
485
93
			dba_free(dba);
486
487
93
			if (j + 1 < conf.manpath.sz) {
488
				mpages_free();
489
				ohash_delete(&mpages);
490
				ohash_delete(&mlinks);
491
			}
492
		}
493
	}
494
out:
495
96
	manconf_free(&conf);
496
96
	mparse_free(mp);
497
96
	mchars_free();
498
96
	mpages_free();
499
96
	ohash_delete(&mpages);
500
96
	ohash_delete(&mlinks);
501
96
	return exitcode;
502
usage:
503
	progname = getprogname();
504
	fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n"
505
			"       %s [-aDnpQ] [-Tutf8] dir ...\n"
506
			"       %s [-DnpQ] [-Tutf8] -d dir [file ...]\n"
507
			"       %s [-Dnp] -u dir [file ...]\n"
508
			"       %s [-Q] -t file ...\n",
509
		        progname, progname, progname, progname, progname);
510
511
	return (int)MANDOCLEVEL_BADARG;
512
96
}
513
514
/*
515
 * To get a singly linked list in alpha order while inserting entries
516
 * at the beginning, process directory entries in reverse alpha order.
517
 */
518
static int
519
fts_compare(const FTSENT **a, const FTSENT **b)
520
{
521
226742
	return -strcmp((*a)->fts_name, (*b)->fts_name);
522
}
523
524
/*
525
 * Scan a directory tree rooted at "basedir" for manpages.
526
 * We use fts(), scanning directory parts along the way for clues to our
527
 * section and architecture.
528
 *
529
 * If use_all has been specified, grok all files.
530
 * If not, sanitise paths to the following:
531
 *
532
 *   [./]man*[/<arch>]/<name>.<section>
533
 *   or
534
 *   [./]cat<section>[/<arch>]/<name>.0
535
 *
536
 * TODO: accommodate for multi-language directories.
537
 */
538
static int
539
treescan(void)
540
{
541
186
	char		 buf[PATH_MAX];
542
	FTS		*f;
543
	FTSENT		*ff;
544
	struct mlink	*mlink;
545
	int		 gzip;
546
	enum form	 dform;
547
	char		*dsec, *arch, *fsec, *cp;
548
	const char	*path;
549
93
	const char	*argv[2];
550
551
93
	argv[0] = ".";
552
93
	argv[1] = NULL;
553
554
93
	f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR,
555
	    fts_compare);
556
93
	if (f == NULL) {
557
		exitcode = (int)MANDOCLEVEL_SYSERR;
558
		say("", "&fts_open");
559
		return 0;
560
	}
561
562
	dsec = arch = NULL;
563
	dform = FORM_NONE;
564
565
12447
	while ((ff = fts_read(f)) != NULL) {
566
12261
		path = ff->fts_path + 2;
567

12261
		switch (ff->fts_info) {
568
569
		/*
570
		 * Symbolic links require various sanity checks,
571
		 * then get handled just like regular files.
572
		 */
573
		case FTS_SL:
574
			if (realpath(path, buf) == NULL) {
575
				if (warnings)
576
					say(path, "&realpath");
577
				continue;
578
			}
579
			if (strstr(buf, basedir) != buf) {
580
				if (warnings) say("",
581
				    "%s: outside base directory", buf);
582
				continue;
583
			}
584
			/* Use logical inode to avoid mpages dupe. */
585
			if (stat(path, ff->fts_statp) == -1) {
586
				if (warnings)
587
					say(path, "&stat");
588
				continue;
589
			}
590
			/* FALLTHROUGH */
591
592
		/*
593
		 * If we're a regular file, add an mlink by using the
594
		 * stored directory data and handling the filename.
595
		 */
596
		case FTS_F:
597
11569
			if ( ! strcmp(path, MANDOC_DB))
598
				continue;
599

23132
			if ( ! use_all && ff->fts_level < 2) {
600
3
				if (warnings)
601
					say(path, "Extraneous file");
602
				continue;
603
			}
604
			gzip = 0;
605
			fsec = NULL;
606
23126
			while (fsec == NULL) {
607
11563
				fsec = strrchr(ff->fts_name, '.');
608

23126
				if (fsec == NULL || strcmp(fsec+1, "gz"))
609
					break;
610
				gzip = 1;
611
				*fsec = '\0';
612
				fsec = NULL;
613
			}
614
11563
			if (fsec == NULL) {
615
				if ( ! use_all) {
616
					if (warnings)
617
						say(path,
618
						    "No filename suffix");
619
					continue;
620
				}
621
11563
			} else if ( ! strcmp(++fsec, "html")) {
622
				if (warnings)
623
					say(path, "Skip html");
624
				continue;
625
11563
			} else if ( ! strcmp(fsec, "ps")) {
626
				if (warnings)
627
					say(path, "Skip ps");
628
				continue;
629
11563
			} else if ( ! strcmp(fsec, "pdf")) {
630
				if (warnings)
631
					say(path, "Skip pdf");
632
				continue;
633

11563
			} else if ( ! use_all &&
634
11563
			    ((dform == FORM_SRC &&
635
11563
			      strncmp(fsec, dsec, strlen(dsec))) ||
636
11563
			     (dform == FORM_CAT && strcmp(fsec, "0")))) {
637
				if (warnings)
638
					say(path, "Wrong filename suffix");
639
				continue;
640
			} else
641
11563
				fsec[-1] = '\0';
642
643
11563
			mlink = mandoc_calloc(1, sizeof(struct mlink));
644
23126
			if (strlcpy(mlink->file, path,
645
11563
			    sizeof(mlink->file)) >=
646
			    sizeof(mlink->file)) {
647
				say(path, "Filename too long");
648
				free(mlink);
649
				continue;
650
			}
651
11563
			mlink->dform = dform;
652
11563
			mlink->dsec = dsec;
653
11563
			mlink->arch = arch;
654
11563
			mlink->name = ff->fts_name;
655
11563
			mlink->fsec = fsec;
656
11563
			mlink->gzip = gzip;
657
11563
			mlink_add(mlink, ff->fts_statp);
658
11563
			continue;
659
660
		case FTS_D:
661
		case FTS_DP:
662
			break;
663
664
		default:
665
			if (warnings)
666
				say(path, "Not a regular file");
667
			continue;
668
		}
669
670

1198
		switch (ff->fts_level) {
671
		case 0:
672
			/* Ignore the root directory. */
673
			break;
674
		case 1:
675
			/*
676
			 * This might contain manX/ or catX/.
677
			 * Try to infer this from the name.
678
			 * If we're not in use_all, enforce it.
679
			 */
680
240
			cp = ff->fts_name;
681
240
			if (ff->fts_info == FTS_DP) {
682
				dform = FORM_NONE;
683
				dsec = NULL;
684
120
				break;
685
			}
686
687
120
			if ( ! strncmp(cp, "man", 3)) {
688
				dform = FORM_SRC;
689
120
				dsec = cp + 3;
690
120
			} else if ( ! strncmp(cp, "cat", 3)) {
691
				dform = FORM_CAT;
692
				dsec = cp + 3;
693
			} else {
694
				dform = FORM_NONE;
695
				dsec = NULL;
696
			}
697
698
120
			if (dsec != NULL || use_all)
699
				break;
700
701
			if (warnings)
702
				say(path, "Unknown directory part");
703
			fts_set(f, ff, FTS_SKIP);
704
			break;
705
		case 2:
706
			/*
707
			 * Possibly our architecture.
708
			 * If we're descending, keep tabs on it.
709
			 */
710
266
			if (ff->fts_info != FTS_DP && dsec != NULL)
711
133
				arch = ff->fts_name;
712
			else
713
				arch = NULL;
714
			break;
715
		default:
716
			if (ff->fts_info == FTS_DP || use_all)
717
				break;
718
			if (warnings)
719
				say(path, "Extraneous directory part");
720
			fts_set(f, ff, FTS_SKIP);
721
			break;
722
		}
723
	}
724
725
93
	fts_close(f);
726
93
	return 1;
727
93
}
728
729
/*
730
 * Add a file to the mlinks table.
731
 * Do not verify that it's a "valid" looking manpage (we'll do that
732
 * later).
733
 *
734
 * Try to infer the manual section, architecture, and page name from the
735
 * path, assuming it looks like
736
 *
737
 *   [./]man*[/<arch>]/<name>.<section>
738
 *   or
739
 *   [./]cat<section>[/<arch>]/<name>.0
740
 *
741
 * See treescan() for the fts(3) version of this.
742
 */
743
static void
744
filescan(const char *file)
745
{
746
1130
	char		 buf[PATH_MAX];
747
565
	struct stat	 st;
748
	struct mlink	*mlink;
749
	char		*p, *start;
750
751
565
	assert(use_all);
752
753
565
	if (0 == strncmp(file, "./", 2))
754
		file += 2;
755
756
	/*
757
	 * We have to do lstat(2) before realpath(3) loses
758
	 * the information whether this is a symbolic link.
759
	 * We need to know that because for symbolic links,
760
	 * we want to use the orginal file name, while for
761
	 * regular files, we want to use the real path.
762
	 */
763
565
	if (-1 == lstat(file, &st)) {
764
		exitcode = (int)MANDOCLEVEL_BADARG;
765
		say(file, "&lstat");
766
		return;
767
565
	} else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) {
768
		exitcode = (int)MANDOCLEVEL_BADARG;
769
		say(file, "Not a regular file");
770
		return;
771
	}
772
773
	/*
774
	 * We have to resolve the file name to the real path
775
	 * in any case for the base directory check.
776
	 */
777
565
	if (NULL == realpath(file, buf)) {
778
		exitcode = (int)MANDOCLEVEL_BADARG;
779
		say(file, "&realpath");
780
		return;
781
	}
782
783
565
	if (OP_TEST == op)
784
		start = buf;
785
565
	else if (strstr(buf, basedir) == buf)
786
565
		start = buf + strlen(basedir);
787
	else {
788
		exitcode = (int)MANDOCLEVEL_BADARG;
789
		say("", "%s: outside base directory", buf);
790
		return;
791
	}
792
793
	/*
794
	 * Now we are sure the file is inside our tree.
795
	 * If it is a symbolic link, ignore the real path
796
	 * and use the original name.
797
	 * This implies passing stuff like "cat1/../man1/foo.1"
798
	 * on the command line won't work.  So don't do that.
799
	 * Note the stat(2) can still fail if the link target
800
	 * doesn't exist.
801
	 */
802
565
	if (S_IFLNK & st.st_mode) {
803
565
		if (-1 == stat(buf, &st)) {
804
			exitcode = (int)MANDOCLEVEL_BADARG;
805
			say(file, "&stat");
806
			return;
807
		}
808
565
		if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
809
			say(file, "Filename too long");
810
			return;
811
		}
812
		start = buf;
813

1130
		if (OP_TEST != op && strstr(buf, basedir) == buf)
814
565
			start += strlen(basedir);
815
	}
816
817
565
	mlink = mandoc_calloc(1, sizeof(struct mlink));
818
565
	mlink->dform = FORM_NONE;
819
565
	if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
820
	    sizeof(mlink->file)) {
821
		say(start, "Filename too long");
822
		free(mlink);
823
		return;
824
	}
825
826
	/*
827
	 * In test mode or when the original name is absolute
828
	 * but outside our tree, guess the base directory.
829
	 */
830
831

1130
	if (op == OP_TEST || (start == buf && *start == '/')) {
832
		if (strncmp(buf, "man/", 4) == 0)
833
			start = buf + 4;
834
		else if ((start = strstr(buf, "/man/")) != NULL)
835
			start += 5;
836
		else
837
			start = buf;
838
	}
839
840
	/*
841
	 * First try to guess our directory structure.
842
	 * If we find a separator, try to look for man* or cat*.
843
	 * If we find one of these and what's underneath is a directory,
844
	 * assume it's an architecture.
845
	 */
846
565
	if (NULL != (p = strchr(start, '/'))) {
847
565
		*p++ = '\0';
848
565
		if (0 == strncmp(start, "man", 3)) {
849
565
			mlink->dform = FORM_SRC;
850
565
			mlink->dsec = start + 3;
851
565
		} else if (0 == strncmp(start, "cat", 3)) {
852
			mlink->dform = FORM_CAT;
853
			mlink->dsec = start + 3;
854
		}
855
856
565
		start = p;
857

1130
		if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
858
			*p++ = '\0';
859
			mlink->arch = start;
860
			start = p;
861
		}
862
	}
863
864
	/*
865
	 * Now check the file suffix.
866
	 * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
867
	 */
868
565
	p = strrchr(start, '\0');
869

4522
	while (p-- > start && '/' != *p && '.' != *p)
870
		/* Loop. */ ;
871
872
565
	if ('.' == *p) {
873
565
		*p++ = '\0';
874
565
		mlink->fsec = p;
875
565
	}
876
877
	/*
878
	 * Now try to parse the name.
879
	 * Use the filename portion of the path.
880
	 */
881
565
	mlink->name = start;
882
565
	if (NULL != (p = strrchr(start, '/'))) {
883
		mlink->name = p + 1;
884
		*p = '\0';
885
	}
886
565
	mlink_add(mlink, &st);
887
1130
}
888
889
static void
890
mlink_add(struct mlink *mlink, const struct stat *st)
891
{
892
24256
	struct inodev	 inodev;
893
	struct mpage	*mpage;
894
	unsigned int	 slot;
895
896
12128
	assert(NULL != mlink->file);
897
898
36384
	mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");
899
25508
	mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");
900
36384
	mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");
901
36384
	mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");
902
903
12128
	if ('0' == *mlink->fsec) {
904
		free(mlink->fsec);
905
		mlink->fsec = mandoc_strdup(mlink->dsec);
906
		mlink->fform = FORM_CAT;
907

24256
	} else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
908
12128
		mlink->fform = FORM_SRC;
909
	else
910
		mlink->fform = FORM_NONE;
911
912
12128
	slot = ohash_qlookup(&mlinks, mlink->file);
913
12128
	assert(NULL == ohash_find(&mlinks, slot));
914
12128
	ohash_insert(&mlinks, slot, mlink);
915
916
12128
	memset(&inodev, 0, sizeof(inodev));  /* Clear padding. */
917
12128
	inodev.st_ino = st->st_ino;
918
12128
	inodev.st_dev = st->st_dev;
919
12128
	slot = ohash_lookup_memory(&mpages, (char *)&inodev,
920
12128
	    sizeof(struct inodev), inodev.st_ino);
921
12128
	mpage = ohash_find(&mpages, slot);
922
12128
	if (NULL == mpage) {
923
11872
		mpage = mandoc_calloc(1, sizeof(struct mpage));
924
11872
		mpage->inodev.st_ino = inodev.st_ino;
925
11872
		mpage->inodev.st_dev = inodev.st_dev;
926
11872
		mpage->form = FORM_NONE;
927
11872
		mpage->next = mpage_head;
928
11872
		mpage_head = mpage;
929
11872
		ohash_insert(&mpages, slot, mpage);
930
11872
	} else
931
256
		mlink->next = mpage->mlinks;
932
12128
	mpage->mlinks = mlink;
933
12128
	mlink->mpage = mpage;
934
12128
}
935
936
static void
937
mlink_free(struct mlink *mlink)
938
{
939
940
24256
	free(mlink->dsec);
941
12128
	free(mlink->arch);
942
12128
	free(mlink->name);
943
12128
	free(mlink->fsec);
944
12128
	free(mlink);
945
12128
}
946
947
static void
948
mpages_free(void)
949
{
950
	struct mpage	*mpage;
951
	struct mlink	*mlink;
952
953
24032
	while ((mpage = mpage_head) != NULL) {
954
36128
		while ((mlink = mpage->mlinks) != NULL) {
955
12128
			mpage->mlinks = mlink->next;
956
12128
			mlink_free(mlink);
957
		}
958
11872
		mpage_head = mpage->next;
959
11872
		free(mpage->sec);
960
11872
		free(mpage->arch);
961
11872
		free(mpage->title);
962
11872
		free(mpage->desc);
963
11872
		free(mpage);
964
	}
965
96
}
966
967
/*
968
 * For each mlink to the mpage, check whether the path looks like
969
 * it is formatted, and if it does, check whether a source manual
970
 * exists by the same name, ignoring the suffix.
971
 * If both conditions hold, drop the mlink.
972
 */
973
static void
974
mlinks_undupe(struct mpage *mpage)
975
{
976
23744
	char		  buf[PATH_MAX];
977
	struct mlink	**prev;
978
	struct mlink	 *mlink;
979
	char		 *bufp;
980
981
11872
	mpage->form = FORM_CAT;
982
11872
	prev = &mpage->mlinks;
983
48146
	while (NULL != (mlink = *prev)) {
984
12201
		if (FORM_CAT != mlink->dform) {
985
12201
			mpage->form = FORM_NONE;
986
12201
			goto nextlink;
987
		}
988
		(void)strlcpy(buf, mlink->file, sizeof(buf));
989
		bufp = strstr(buf, "cat");
990
		assert(NULL != bufp);
991
		memcpy(bufp, "man", 3);
992
		if (NULL != (bufp = strrchr(buf, '.')))
993
			*++bufp = '\0';
994
		(void)strlcat(buf, mlink->dsec, sizeof(buf));
995
		if (NULL == ohash_find(&mlinks,
996
		    ohash_qlookup(&mlinks, buf)))
997
			goto nextlink;
998
		if (warnings)
999
			say(mlink->file, "Man source exists: %s", buf);
1000
		if (use_all)
1001
			goto nextlink;
1002
		*prev = mlink->next;
1003
		mlink_free(mlink);
1004
		continue;
1005
nextlink:
1006
12201
		prev = &(*prev)->next;
1007
	}
1008
11872
}
1009
1010
static void
1011
mlink_check(struct mpage *mpage, struct mlink *mlink)
1012
{
1013
	struct str	*str;
1014
	unsigned int	 slot;
1015
1016
	/*
1017
	 * Check whether the manual section given in a file
1018
	 * agrees with the directory where the file is located.
1019
	 * Some manuals have suffixes like (3p) on their
1020
	 * section number either inside the file or in the
1021
	 * directory name, some are linked into more than one
1022
	 * section, like encrypt(1) = makekey(8).
1023
	 */
1024
1025
	if (FORM_SRC == mpage->form &&
1026
	    strcasecmp(mpage->sec, mlink->dsec))
1027
		say(mlink->file, "Section \"%s\" manual in %s directory",
1028
		    mpage->sec, mlink->dsec);
1029
1030
	/*
1031
	 * Manual page directories exist for each kernel
1032
	 * architecture as returned by machine(1).
1033
	 * However, many manuals only depend on the
1034
	 * application architecture as returned by arch(1).
1035
	 * For example, some (2/ARM) manuals are shared
1036
	 * across the "armish" and "zaurus" kernel
1037
	 * architectures.
1038
	 * A few manuals are even shared across completely
1039
	 * different architectures, for example fdformat(1)
1040
	 * on amd64, i386, and sparc64.
1041
	 */
1042
1043
	if (strcasecmp(mpage->arch, mlink->arch))
1044
		say(mlink->file, "Architecture \"%s\" manual in "
1045
		    "\"%s\" directory", mpage->arch, mlink->arch);
1046
1047
	/*
1048
	 * XXX
1049
	 * parse_cat() doesn't set NAME_TITLE yet.
1050
	 */
1051
1052
	if (FORM_CAT == mpage->form)
1053
		return;
1054
1055
	/*
1056
	 * Check whether this mlink
1057
	 * appears as a name in the NAME section.
1058
	 */
1059
1060
	slot = ohash_qlookup(&names, mlink->name);
1061
	str = ohash_find(&names, slot);
1062
	assert(NULL != str);
1063
	if ( ! (NAME_TITLE & str->mask))
1064
		say(mlink->file, "Name missing in NAME section");
1065
}
1066
1067
/*
1068
 * Run through the files in the global vector "mpages"
1069
 * and add them to the database specified in "basedir".
1070
 *
1071
 * This handles the parsing scheme itself, using the cues of directory
1072
 * and filename to determine whether the file is parsable or not.
1073
 */
1074
static void
1075
mpages_merge(struct dba *dba, struct mparse *mp)
1076
{
1077
	struct mpage		*mpage, *mpage_dest;
1078
	struct mlink		*mlink, *mlink_dest;
1079
192
	struct roff_man		*man;
1080
96
	char			*sodest;
1081
96
	char			*cp;
1082
	int			 fd;
1083
1084
23936
	for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) {
1085
11872
		mlinks_undupe(mpage);
1086
11872
		if ((mlink = mpage->mlinks) == NULL)
1087
			continue;
1088
1089
11872
		name_mask = NAME_MASK;
1090
11872
		mandoc_ohash_init(&names, 4, offsetof(struct str, key));
1091
11872
		mandoc_ohash_init(&strings, 6, offsetof(struct str, key));
1092
11872
		mparse_reset(mp);
1093
11872
		man = NULL;
1094
11872
		sodest = NULL;
1095
1096
11872
		if ((fd = mparse_open(mp, mlink->file)) == -1) {
1097
			say(mlink->file, "&open");
1098
			goto nextpage;
1099
		}
1100
1101
		/*
1102
		 * Interpret the file as mdoc(7) or man(7) source
1103
		 * code, unless it is known to be formatted.
1104
		 */
1105

11872
		if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) {
1106
11872
			mparse_readfd(mp, fd, mlink->file);
1107
11872
			close(fd);
1108
			fd = -1;
1109
11872
			mparse_result(mp, &man, &sodest);
1110
11872
		}
1111
1112
11872
		if (sodest != NULL) {
1113
94
			mlink_dest = ohash_find(&mlinks,
1114
94
			    ohash_qlookup(&mlinks, sodest));
1115
94
			if (mlink_dest == NULL) {
1116
				mandoc_asprintf(&cp, "%s.gz", sodest);
1117
				mlink_dest = ohash_find(&mlinks,
1118
				    ohash_qlookup(&mlinks, cp));
1119
				free(cp);
1120
			}
1121
94
			if (mlink_dest != NULL) {
1122
1123
				/* The .so target exists. */
1124
1125
94
				mpage_dest = mlink_dest->mpage;
1126
94
				while (1) {
1127
94
					mlink->mpage = mpage_dest;
1128
1129
					/*
1130
					 * If the target was already
1131
					 * processed, add the links
1132
					 * to the database now.
1133
					 * Otherwise, this will
1134
					 * happen when we come
1135
					 * to the target.
1136
					 */
1137
1138
94
					if (mpage_dest->dba != NULL)
1139
21
						dbadd_mlink(mlink);
1140
1141
94
					if (mlink->next == NULL)
1142
						break;
1143
					mlink = mlink->next;
1144
				}
1145
1146
				/* Move all links to the target. */
1147
1148
94
				mlink->next = mlink_dest->next;
1149
94
				mlink_dest->next = mpage->mlinks;
1150
94
				mpage->mlinks = NULL;
1151
94
			}
1152
			goto nextpage;
1153

23556
		} else if (man != NULL && man->macroset == MACROSET_MDOC) {
1154
8794
			mdoc_validate(man);
1155
8794
			mpage->form = FORM_SRC;
1156
8794
			mpage->sec = man->meta.msec;
1157
8794
			mpage->sec = mandoc_strdup(
1158
26382
			    mpage->sec == NULL ? "" : mpage->sec);
1159
8794
			mpage->arch = man->meta.arch;
1160
8794
			mpage->arch = mandoc_strdup(
1161
18719
			    mpage->arch == NULL ? "" : mpage->arch);
1162
8794
			mpage->title = mandoc_strdup(man->meta.title);
1163

14762
		} else if (man != NULL && man->macroset == MACROSET_MAN) {
1164
2984
			man_validate(man);
1165

2985
			if (*man->meta.msec != '\0' ||
1166
1
			    *man->meta.title != '\0') {
1167
2983
				mpage->form = FORM_SRC;
1168
2983
				mpage->sec = mandoc_strdup(man->meta.msec);
1169
2983
				mpage->arch = mandoc_strdup(mlink->arch);
1170
2983
				mpage->title = mandoc_strdup(man->meta.title);
1171
2983
			} else
1172
1
				man = NULL;
1173
		}
1174
1175
11778
		assert(mpage->desc == NULL);
1176
11778
		if (man == NULL) {
1177
1
			mpage->form = FORM_CAT;
1178
1
			mpage->sec = mandoc_strdup(mlink->dsec);
1179
1
			mpage->arch = mandoc_strdup(mlink->arch);
1180
1
			mpage->title = mandoc_strdup(mlink->name);
1181
1
			parse_cat(mpage, fd);
1182
11778
		} else if (man->macroset == MACROSET_MDOC)
1183
8794
			parse_mdoc(mpage, &man->meta, man->first);
1184
		else
1185
2983
			parse_man(mpage, &man->meta, man->first);
1186
11778
		if (mpage->desc == NULL) {
1187
1
			mpage->desc = mandoc_strdup(mlink->name);
1188
1
			if (warnings)
1189
				say(mlink->file, "No one-line description, "
1190
				    "using filename \"%s\"", mlink->name);
1191
		}
1192
1193
47770
		for (mlink = mpage->mlinks;
1194
23885
		     mlink != NULL;
1195
12107
		     mlink = mlink->next) {
1196
12107
			putkey(mpage, mlink->name, NAME_FILE);
1197
12107
			if (warnings && !use_all)
1198
				mlink_check(mpage, mlink);
1199
		}
1200
1201
11778
		dbadd(dba, mpage);
1202
1203
nextpage:
1204
11872
		ohash_delete(&strings);
1205
11872
		ohash_delete(&names);
1206
11872
	}
1207
96
}
1208
1209
static void
1210
parse_cat(struct mpage *mpage, int fd)
1211
{
1212
	FILE		*stream;
1213
	struct mlink	*mlink;
1214
2
	char		*line, *p, *title, *sec;
1215
1
	size_t		 linesz, plen, titlesz;
1216
	ssize_t		 len;
1217
	int		 offs;
1218
1219
1
	mlink = mpage->mlinks;
1220
3
	stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r");
1221
1
	if (stream == NULL) {
1222
		if (fd != -1)
1223
			close(fd);
1224
		if (warnings)
1225
			say(mlink->file, "&fopen");
1226
		return;
1227
	}
1228
1229
1
	line = NULL;
1230
1
	linesz = 0;
1231
1232
	/* Parse the section number from the header line. */
1233
1234
2
	while (getline(&line, &linesz, stream) != -1) {
1235
		if (*line == '\n')
1236
			continue;
1237
		if ((sec = strchr(line, '(')) == NULL)
1238
			break;
1239
		if ((p = strchr(++sec, ')')) == NULL)
1240
			break;
1241
		free(mpage->sec);
1242
		mpage->sec = mandoc_strndup(sec, p - sec);
1243
		if (warnings && *mlink->dsec != '\0' &&
1244
		    strcasecmp(mpage->sec, mlink->dsec))
1245
			say(mlink->file,
1246
			    "Section \"%s\" manual in %s directory",
1247
			    mpage->sec, mlink->dsec);
1248
		break;
1249
	}
1250
1251
	/* Skip to first blank line. */
1252
1253

1
	while (line == NULL || *line != '\n')
1254
1
		if (getline(&line, &linesz, stream) == -1)
1255
			break;
1256
1257
	/*
1258
	 * Assume the first line that is not indented
1259
	 * is the first section header.  Skip to it.
1260
	 */
1261
1262
1
	while (getline(&line, &linesz, stream) != -1)
1263
		if (*line != '\n' && *line != ' ')
1264
			break;
1265
1266
	/*
1267
	 * Read up until the next section into a buffer.
1268
	 * Strip the leading and trailing newline from each read line,
1269
	 * appending a trailing space.
1270
	 * Ignore empty (whitespace-only) lines.
1271
	 */
1272
1273
	titlesz = 0;
1274
	title = NULL;
1275
1276
2
	while ((len = getline(&line, &linesz, stream)) != -1) {
1277
		if (*line != ' ')
1278
			break;
1279
		offs = 0;
1280
		while (isspace((unsigned char)line[offs]))
1281
			offs++;
1282
		if (line[offs] == '\0')
1283
			continue;
1284
		title = mandoc_realloc(title, titlesz + len - offs);
1285
		memcpy(title + titlesz, line + offs, len - offs);
1286
		titlesz += len - offs;
1287
		title[titlesz - 1] = ' ';
1288
	}
1289
1
	free(line);
1290
1291
	/*
1292
	 * If no page content can be found, or the input line
1293
	 * is already the next section header, or there is no
1294
	 * trailing newline, reuse the page title as the page
1295
	 * description.
1296
	 */
1297
1298

1
	if (NULL == title || '\0' == *title) {
1299
1
		if (warnings)
1300
			say(mlink->file, "Cannot find NAME section");
1301
1
		fclose(stream);
1302
1
		free(title);
1303
1
		return;
1304
	}
1305
1306
	title[titlesz - 1] = '\0';
1307
1308
	/*
1309
	 * Skip to the first dash.
1310
	 * Use the remaining line as the description (no more than 70
1311
	 * bytes).
1312
	 */
1313
1314
	if (NULL != (p = strstr(title, "- "))) {
1315
		for (p += 2; ' ' == *p || '\b' == *p; p++)
1316
			/* Skip to next word. */ ;
1317
	} else {
1318
		if (warnings)
1319
			say(mlink->file, "No dash in title line, "
1320
			    "reusing \"%s\" as one-line description", title);
1321
		p = title;
1322
	}
1323
1324
	plen = strlen(p);
1325
1326
	/* Strip backspace-encoding from line. */
1327
1328
	while (NULL != (line = memchr(p, '\b', plen))) {
1329
		len = line - p;
1330
		if (0 == len) {
1331
			memmove(line, line + 1, plen--);
1332
			continue;
1333
		}
1334
		memmove(line - 1, line + 1, plen - len);
1335
		plen -= 2;
1336
	}
1337
1338
	mpage->desc = mandoc_strdup(p);
1339
	fclose(stream);
1340
	free(title);
1341
1
}
1342
1343
/*
1344
 * Put a type/word pair into the word database for this particular file.
1345
 */
1346
static void
1347
putkey(const struct mpage *mpage, char *value, uint64_t type)
1348
{
1349
988748
	putkeys(mpage, value, strlen(value), type);
1350
494374
}
1351
1352
/*
1353
 * Grok all nodes at or below a certain mdoc node into putkey().
1354
 */
1355
static void
1356
putmdockey(const struct mpage *mpage,
1357
	const struct roff_node *n, uint64_t m, int taboo)
1358
{
1359
1360
1828810
	for ( ; NULL != n; n = n->next) {
1361
378614
		if (n->flags & taboo)
1362
			continue;
1363
368465
		if (NULL != n->child)
1364
216
			putmdockey(mpage, n->child, m, taboo);
1365
368465
		if (n->type == ROFFT_TEXT)
1366
368021
			putkey(mpage, n->string, m);
1367
	}
1368
357194
}
1369
1370
static void
1371
parse_man(struct mpage *mpage, const struct roff_meta *meta,
1372
	const struct roff_node *n)
1373
{
1374
	const struct roff_node *head, *body;
1375
29830
	char		*start, *title;
1376
	char		 byte;
1377
	size_t		 sz;
1378
1379
14915
	if (n == NULL)
1380
		return;
1381
1382
	/*
1383
	 * We're only searching for one thing: the first text child in
1384
	 * the BODY of a NAME section.  Since we don't keep track of
1385
	 * sections in -man, run some hoops to find out whether we're in
1386
	 * the correct section or not.
1387
	 */
1388
1389

17898
	if (n->type == ROFFT_BODY && n->tok == MAN_SH) {
1390
		body = n;
1391

5966
		if ((head = body->parent->head) != NULL &&
1392
2983
		    (head = head->child) != NULL &&
1393
2983
		    head->next == NULL &&
1394
2983
		    head->type == ROFFT_TEXT &&
1395
2983
		    strcmp(head->string, "NAME") == 0 &&
1396
2983
		    body->child != NULL) {
1397
1398
			/*
1399
			 * Suck the entire NAME section into memory.
1400
			 * Yes, we might run away.
1401
			 * But too many manuals have big, spread-out
1402
			 * NAME sections over many lines.
1403
			 */
1404
1405
2983
			title = NULL;
1406
2983
			deroff(&title, body);
1407
2983
			if (NULL == title)
1408
				return;
1409
1410
			/*
1411
			 * Go through a special heuristic dance here.
1412
			 * Conventionally, one or more manual names are
1413
			 * comma-specified prior to a whitespace, then a
1414
			 * dash, then a description.  Try to puzzle out
1415
			 * the name parts here.
1416
			 */
1417
1418
			start = title;
1419
2983
			for ( ;; ) {
1420
4636
				sz = strcspn(start, " ,");
1421
4636
				if ('\0' == start[sz])
1422
					break;
1423
1424
				byte = start[sz];
1425
4636
				start[sz] = '\0';
1426
1427
				/*
1428
				 * Assume a stray trailing comma in the
1429
				 * name list if a name begins with a dash.
1430
				 */
1431
1432

6339
				if ('-' == start[0] ||
1433
6339
				    ('\\' == start[0] && '-' == start[1]))
1434
					break;
1435
1436
4635
				putkey(mpage, start, NAME_TITLE);
1437

9191
				if ( ! (mpage->name_head_done ||
1438
4556
				    strcasecmp(start, meta->title))) {
1439
2519
					putkey(mpage, start, NAME_HEAD);
1440
2519
					mpage->name_head_done = 1;
1441
2519
				}
1442
1443
4635
				if (' ' == byte) {
1444
2982
					start += sz + 1;
1445
2982
					break;
1446
				}
1447
1448
1653
				assert(',' == byte);
1449
1653
				start += sz + 1;
1450
6612
				while (' ' == *start)
1451
1653
					start++;
1452
			}
1453
1454
2983
			if (start == title) {
1455
				putkey(mpage, start, NAME_TITLE);
1456
				if ( ! (mpage->name_head_done ||
1457
				    strcasecmp(start, meta->title))) {
1458
					putkey(mpage, start, NAME_HEAD);
1459
					mpage->name_head_done = 1;
1460
				}
1461
				free(title);
1462
				return;
1463
			}
1464
1465
2983
			while (isspace((unsigned char)*start))
1466
				start++;
1467
1468
2983
			if (0 == strncmp(start, "-", 1))
1469
331
				start += 1;
1470
2652
			else if (0 == strncmp(start, "\\-\\-", 4))
1471
128
				start += 4;
1472
2524
			else if (0 == strncmp(start, "\\-", 2))
1473
2522
				start += 2;
1474
2
			else if (0 == strncmp(start, "\\(en", 4))
1475
				start += 4;
1476
2
			else if (0 == strncmp(start, "\\(em", 4))
1477
				start += 4;
1478
1479
8953
			while (' ' == *start)
1480
2985
				start++;
1481
1482
2983
			mpage->desc = mandoc_strdup(start);
1483
2983
			free(title);
1484
2983
			return;
1485
		}
1486
	}
1487
1488
47728
	for (n = n->child; n; n = n->next) {
1489
14895
		if (NULL != mpage->desc)
1490
			break;
1491
11932
		parse_man(mpage, meta, n);
1492
	}
1493
26847
}
1494
1495
static void
1496
parse_mdoc(struct mpage *mpage, const struct roff_meta *meta,
1497
	const struct roff_node *n)
1498
{
1499
1500
9698587
	for (n = n->child; n != NULL; n = n->next) {
1501

4451364
		if (n->tok == TOKEN_NONE ||
1502
1440139
		    n->tok < ROFF_MAX ||
1503
1439692
		    n->flags & mdocs[n->tok].taboo)
1504
			continue;
1505

2826620
		assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
1506
1413310
		switch (n->type) {
1507
		case ROFFT_ELEM:
1508
		case ROFFT_BLOCK:
1509
		case ROFFT_HEAD:
1510
		case ROFFT_BODY:
1511
		case ROFFT_TAIL:
1512

1840591
			if (mdocs[n->tok].fp != NULL &&
1513
427281
			    (*mdocs[n->tok].fp)(mpage, meta, n) == 0)
1514
				break;
1515
1018472
			if (mdocs[n->tok].mask)
1516
663540
				putmdockey(mpage, n->child,
1517
331770
				    mdocs[n->tok].mask, mdocs[n->tok].taboo);
1518
			break;
1519
		default:
1520
			continue;
1521
		}
1522
1413310
		if (NULL != n->child)
1523
1216287
			parse_mdoc(mpage, meta, n);
1524
	}
1525
1225081
}
1526
1527
static int
1528
parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta,
1529
	const struct roff_node *n)
1530
{
1531
	char		*start, *end;
1532
	size_t		 sz;
1533
1534

519
	if (SEC_SYNOPSIS != n->sec ||
1535
173
	    NULL == (n = n->child) ||
1536
173
	    n->type != ROFFT_TEXT)
1537
		return 0;
1538
1539
	/*
1540
	 * Only consider those `Fd' macro fields that begin with an
1541
	 * "inclusion" token (versus, e.g., #define).
1542
	 */
1543
1544
173
	if (strcmp("#include", n->string))
1545
168
		return 0;
1546
1547

10
	if ((n = n->next) == NULL || n->type != ROFFT_TEXT)
1548
		return 0;
1549
1550
	/*
1551
	 * Strip away the enclosing angle brackets and make sure we're
1552
	 * not zero-length.
1553
	 */
1554
1555
5
	start = n->string;
1556

5
	if ('<' == *start || '"' == *start)
1557
5
		start++;
1558
1559
5
	if (0 == (sz = strlen(start)))
1560
		return 0;
1561
1562
5
	end = &start[(int)sz - 1];
1563

5
	if ('>' == *end || '"' == *end)
1564
5
		end--;
1565
1566
5
	if (end > start)
1567
5
		putkeys(mpage, start, end - start + 1, TYPE_In);
1568
5
	return 0;
1569
173
}
1570
1571
static void
1572
parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n)
1573
{
1574
	char	*cp;
1575
	size_t	 sz;
1576
1577
126200
	if (n->type != ROFFT_TEXT)
1578
		return;
1579
1580
	/* Skip function pointer punctuation. */
1581
1582
63100
	cp = n->string;
1583

253477
	while (*cp == '(' || *cp == '*')
1584
303
		cp++;
1585
63100
	sz = strcspn(cp, "()");
1586
1587
63100
	putkeys(mpage, cp, sz, TYPE_Fn);
1588
63100
	if (n->sec == SEC_SYNOPSIS)
1589
16186
		putkeys(mpage, cp, sz, NAME_SYN);
1590
126200
}
1591
1592
static int
1593
parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta,
1594
	const struct roff_node *n)
1595
{
1596
1597
114364
	if (n->child == NULL)
1598
		return 0;
1599
1600
57182
	parse_mdoc_fname(mpage, n->child);
1601
1602
167606
	for (n = n->child->next; n != NULL; n = n->next)
1603
26621
		if (n->type == ROFFT_TEXT)
1604
26621
			putkey(mpage, n->string, TYPE_Fa);
1605
1606
57182
	return 0;
1607
57182
}
1608
1609
static int
1610
parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta,
1611
	const struct roff_node *n)
1612
{
1613
1614
35508
	if (n->type != ROFFT_HEAD)
1615
11836
		return 1;
1616
1617
5918
	if (n->child != NULL)
1618
5918
		parse_mdoc_fname(mpage, n->child);
1619
1620
5918
	return 0;
1621
17754
}
1622
1623
static int
1624
parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta,
1625
	const struct roff_node *n)
1626
{
1627
31174
	char *cp;
1628
1629

15971
	if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY)
1630
256
		return 0;
1631
1632

30110
	if (n->child != NULL &&
1633
15331
	    n->child->next == NULL &&
1634
14779
	    n->child->type == ROFFT_TEXT)
1635
14779
		return 1;
1636
1637
552
	cp = NULL;
1638
552
	deroff(&cp, n);
1639
552
	if (cp != NULL) {
1640
1534
		putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va ||
1641
430
		    n->type == ROFFT_BODY ? TYPE_Va : 0));
1642
552
		free(cp);
1643
552
	}
1644
1645
552
	return 0;
1646
15587
}
1647
1648
static int
1649
parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta,
1650
	const struct roff_node *n)
1651
{
1652
142330
	char	*cp;
1653
1654
71165
	if (NULL == (n = n->child))
1655
		return 0;
1656
1657
71165
	if (NULL == n->next) {
1658
5
		putkey(mpage, n->string, TYPE_Xr);
1659
5
		return 0;
1660
	}
1661
1662
71160
	mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string);
1663
71160
	putkey(mpage, cp, TYPE_Xr);
1664
71160
	free(cp);
1665
71160
	return 0;
1666
71165
}
1667
1668
static int
1669
parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta,
1670
	const struct roff_node *n)
1671
{
1672
1673
52764
	if (n->type == ROFFT_BODY)
1674
8794
		deroff(&mpage->desc, n);
1675
26382
	return 0;
1676
}
1677
1678
static int
1679
parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta,
1680
	const struct roff_node *n)
1681
{
1682
1683
117572
	if (SEC_NAME == n->sec)
1684
22518
		putmdockey(mpage, n->child, NAME_TITLE, 0);
1685

44341
	else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) {
1686
2690
		if (n->child == NULL)
1687
			putkey(mpage, meta->name, NAME_SYN);
1688
		else
1689
2690
			putmdockey(mpage, n->child, NAME_SYN, 0);
1690
	}
1691

68813
	if ( ! (mpage->name_head_done ||
1692

20072
	    n->child == NULL || n->child->string == NULL ||
1693
10027
	    strcasecmp(n->child->string, meta->title))) {
1694
8754
		putkey(mpage, n->child->string, NAME_HEAD);
1695
8754
		mpage->name_head_done = 1;
1696
8754
	}
1697
58786
	return 0;
1698
}
1699
1700
static int
1701
parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta,
1702
	const struct roff_node *n)
1703
{
1704
1705
525324
	return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD;
1706
}
1707
1708
static int
1709
parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta,
1710
	const struct roff_node *n)
1711
{
1712
1713
16458
	return n->type == ROFFT_HEAD;
1714
}
1715
1716
/*
1717
 * Add a string to the hash table for the current manual.
1718
 * Each string has a bitmask telling which macros it belongs to.
1719
 * When we finish the manual, we'll dump the table.
1720
 */
1721
static void
1722
putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v)
1723
{
1724
	struct ohash	*htab;
1725
	struct str	*s;
1726
573665
	const char	*end;
1727
	unsigned int	 slot;
1728
	int		 i, mustfree;
1729
1730
573665
	if (0 == sz)
1731
6
		return;
1732
1733
573659
	mustfree = render_string(&cp, &sz);
1734
1735
573659
	if (TYPE_Nm & v) {
1736
		htab = &names;
1737
69421
		v &= name_mask;
1738
69421
		if (v & NAME_FIRST)
1739
11777
			name_mask &= ~NAME_FIRST;
1740
69421
		if (debug > 1)
1741
			say(mpage->mlinks->file,
1742
			    "Adding name %*s, bits=0x%llx", (int)sz, cp,
1743
			    (unsigned long long)v);
1744
	} else {
1745
		htab = &strings;
1746
504238
		if (debug > 1)
1747
		    for (i = 0; i < KEY_MAX; i++)
1748
			if ((uint64_t)1 << i & v)
1749
			    say(mpage->mlinks->file,
1750
				"Adding key %s=%*s",
1751
				mansearch_keynames[i], (int)sz, cp);
1752
	}
1753
1754
573659
	end = cp + sz;
1755
573659
	slot = ohash_qlookupi(htab, cp, &end);
1756
573659
	s = ohash_find(htab, slot);
1757
1758

850949
	if (NULL != s && mpage == s->mpage) {
1759
277290
		s->mask |= v;
1760
277290
		return;
1761
296369
	} else if (NULL == s) {
1762
296369
		s = mandoc_calloc(1, sizeof(struct str) + sz + 1);
1763
296369
		memcpy(s->key, cp, sz);
1764
296369
		ohash_insert(htab, slot, s);
1765
296369
	}
1766
296369
	s->mpage = mpage;
1767
296369
	s->mask = v;
1768
1769
296369
	if (mustfree)
1770
9211
		free(cp);
1771
870034
}
1772
1773
/*
1774
 * Take a Unicode codepoint and produce its UTF-8 encoding.
1775
 * This isn't the best way to do this, but it works.
1776
 * The magic numbers are from the UTF-8 packaging.
1777
 * They're not as scary as they seem: read the UTF-8 spec for details.
1778
 */
1779
static size_t
1780
utf8(unsigned int cp, char out[7])
1781
{
1782
	size_t		 rc;
1783
1784
	rc = 0;
1785
	if (cp <= 0x0000007F) {
1786
		rc = 1;
1787
		out[0] = (char)cp;
1788
	} else if (cp <= 0x000007FF) {
1789
		rc = 2;
1790
		out[0] = (cp >> 6  & 31) | 192;
1791
		out[1] = (cp       & 63) | 128;
1792
	} else if (cp <= 0x0000FFFF) {
1793
		rc = 3;
1794
		out[0] = (cp >> 12 & 15) | 224;
1795
		out[1] = (cp >> 6  & 63) | 128;
1796
		out[2] = (cp       & 63) | 128;
1797
	} else if (cp <= 0x001FFFFF) {
1798
		rc = 4;
1799
		out[0] = (cp >> 18 &  7) | 240;
1800
		out[1] = (cp >> 12 & 63) | 128;
1801
		out[2] = (cp >> 6  & 63) | 128;
1802
		out[3] = (cp       & 63) | 128;
1803
	} else if (cp <= 0x03FFFFFF) {
1804
		rc = 5;
1805
		out[0] = (cp >> 24 &  3) | 248;
1806
		out[1] = (cp >> 18 & 63) | 128;
1807
		out[2] = (cp >> 12 & 63) | 128;
1808
		out[3] = (cp >> 6  & 63) | 128;
1809
		out[4] = (cp       & 63) | 128;
1810
	} else if (cp <= 0x7FFFFFFF) {
1811
		rc = 6;
1812
		out[0] = (cp >> 30 &  1) | 252;
1813
		out[1] = (cp >> 24 & 63) | 128;
1814
		out[2] = (cp >> 18 & 63) | 128;
1815
		out[3] = (cp >> 12 & 63) | 128;
1816
		out[4] = (cp >> 6  & 63) | 128;
1817
		out[5] = (cp       & 63) | 128;
1818
	} else
1819
		return 0;
1820
1821
	out[rc] = '\0';
1822
	return rc;
1823
}
1824
1825
/*
1826
 * If the string contains escape sequences,
1827
 * replace it with an allocated rendering and return 1,
1828
 * such that the caller can free it after use.
1829
 * Otherwise, do nothing and return 0.
1830
 */
1831
static int
1832
render_string(char **public, size_t *psz)
1833
{
1834
1170874
	const char	*src, *scp, *addcp, *seq;
1835
	char		*dst;
1836
585437
	size_t		 ssz, dsz, addsz;
1837
585437
	char		 utfbuf[7], res[6];
1838
585437
	int		 seqlen, unicode;
1839
1840
585437
	res[0] = '\\';
1841
585437
	res[1] = '\t';
1842
585437
	res[2] = ASCII_NBRSP;
1843
585437
	res[3] = ASCII_HYPH;
1844
585437
	res[4] = ASCII_BREAK;
1845
585437
	res[5] = '\0';
1846
1847
585437
	src = scp = *public;
1848
585437
	ssz = *psz;
1849
	dst = NULL;
1850
	dsz = 0;
1851
1852
6813656
	while (scp < src + *psz) {
1853
1854
		/* Leave normal characters unchanged. */
1855
1856
5629455
		if (strchr(res, *scp) == NULL) {
1857
5611192
			if (dst != NULL)
1858
100571
				dst[dsz++] = *scp;
1859
5611192
			scp++;
1860
5611192
			continue;
1861
		}
1862
1863
		/*
1864
		 * Found something that requires replacing,
1865
		 * make sure we have a destination buffer.
1866
		 */
1867
1868
18263
		if (dst == NULL) {
1869
12601
			dst = mandoc_malloc(ssz + 1);
1870
12601
			dsz = scp - src;
1871
12601
			memcpy(dst, src, dsz);
1872
12601
		}
1873
1874
		/* Handle single-char special characters. */
1875
1876

19287
		switch (*scp) {
1877
		case '\\':
1878
			break;
1879
		case '\t':
1880
		case ASCII_NBRSP:
1881
96
			dst[dsz++] = ' ';
1882
96
			scp++;
1883
96
			continue;
1884
		case ASCII_HYPH:
1885
1024
			dst[dsz++] = '-';
1886
			/* FALLTHROUGH */
1887
		case ASCII_BREAK:
1888
1024
			scp++;
1889
1024
			continue;
1890
		default:
1891
			abort();
1892
		}
1893
1894
		/*
1895
		 * Found an escape sequence.
1896
		 * Read past the slash, then parse it.
1897
		 * Ignore everything except characters.
1898
		 */
1899
1900
17143
		scp++;
1901
17143
		if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL)
1902
			continue;
1903
1904
		/*
1905
		 * Render the special character
1906
		 * as either UTF-8 or ASCII.
1907
		 */
1908
1909
13327
		if (write_utf8) {
1910
			unicode = mchars_spec2cp(seq, seqlen);
1911
			if (unicode <= 0)
1912
				continue;
1913
			addsz = utf8(unicode, utfbuf);
1914
			if (addsz == 0)
1915
				continue;
1916
			addcp = utfbuf;
1917
		} else {
1918
13327
			addcp = mchars_spec2str(seq, seqlen, &addsz);
1919
13327
			if (addcp == NULL)
1920
				continue;
1921
13327
			if (*addcp == ASCII_NBRSP) {
1922
				addcp = " ";
1923
1989
				addsz = 1;
1924
1989
			}
1925
		}
1926
1927
		/* Copy the rendered glyph into the stream. */
1928
1929
13327
		ssz += addsz;
1930
13327
		dst = mandoc_realloc(dst, ssz + 1);
1931
13327
		memcpy(dst + dsz, addcp, addsz);
1932
13327
		dsz += addsz;
1933
	}
1934
585437
	if (dst != NULL) {
1935
12601
		*public = dst;
1936
12601
		*psz = dsz;
1937
12601
	}
1938
1939
	/* Trim trailing whitespace and NUL-terminate. */
1940
1941

1173780
	while (*psz > 0 && (*public)[*psz - 1] == ' ')
1942
975
		--*psz;
1943
585437
	if (dst != NULL) {
1944
12601
		(*public)[*psz] = '\0';
1945
12601
		return 1;
1946
	} else
1947
572836
		return 0;
1948
585437
}
1949
1950
static void
1951
dbadd_mlink(const struct mlink *mlink)
1952
{
1953
24256
	dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE);
1954
12128
	dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec);
1955
12128
	dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec);
1956
12128
	dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch);
1957
12128
	dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file);
1958
12128
}
1959
1960
/*
1961
 * Flush the current page's terms (and their bits) into the database.
1962
 * Also, handle escape sequences at the last possible moment.
1963
 */
1964
static void
1965
dbadd(struct dba *dba, struct mpage *mpage)
1966
{
1967
	struct mlink	*mlink;
1968
	struct str	*key;
1969
23556
	char		*cp;
1970
	uint64_t	 mask;
1971
11778
	size_t		 i;
1972
11778
	unsigned int	 slot;
1973
	int		 mustfree;
1974
1975
11778
	mlink = mpage->mlinks;
1976
1977
11778
	if (nodb) {
1978
		for (key = ohash_first(&names, &slot); NULL != key;
1979
		     key = ohash_next(&names, &slot))
1980
			free(key);
1981
		for (key = ohash_first(&strings, &slot); NULL != key;
1982
		     key = ohash_next(&strings, &slot))
1983
			free(key);
1984
		if (0 == debug)
1985
			return;
1986
		while (NULL != mlink) {
1987
			fputs(mlink->name, stdout);
1988
			if (NULL == mlink->next ||
1989
			    strcmp(mlink->dsec, mlink->next->dsec) ||
1990
			    strcmp(mlink->fsec, mlink->next->fsec) ||
1991
			    strcmp(mlink->arch, mlink->next->arch)) {
1992
				putchar('(');
1993
				if ('\0' == *mlink->dsec)
1994
					fputs(mlink->fsec, stdout);
1995
				else
1996
					fputs(mlink->dsec, stdout);
1997
				if ('\0' != *mlink->arch)
1998
					printf("/%s", mlink->arch);
1999
				putchar(')');
2000
			}
2001
			mlink = mlink->next;
2002
			if (NULL != mlink)
2003
				fputs(", ", stdout);
2004
		}
2005
		printf(" - %s\n", mpage->desc);
2006
		return;
2007
	}
2008
2009
11778
	if (debug)
2010
		say(mlink->file, "Adding to database");
2011
2012
11778
	cp = mpage->desc;
2013
11778
	i = strlen(cp);
2014
11778
	mustfree = render_string(&cp, &i);
2015
23556
	mpage->dba = dba_page_new(dba->pages,
2016
11778
	    *mpage->arch == '\0' ? mlink->arch : mpage->arch,
2017
11778
	    cp, mlink->file, mpage->form);
2018
11778
	if (mustfree)
2019
1128
		free(cp);
2020
11778
	dba_page_add(mpage->dba, DBP_SECT, mpage->sec);
2021
2022
47770
	while (mlink != NULL) {
2023
12107
		dbadd_mlink(mlink);
2024
12107
		mlink = mlink->next;
2025
	}
2026
2027
80272
	for (key = ohash_first(&names, &slot); NULL != key;
2028
28358
	     key = ohash_next(&names, &slot)) {
2029
28358
		assert(key->mpage == mpage);
2030
28358
		dba_page_alias(mpage->dba, key->key, key->mask);
2031
28358
		free(key);
2032
	}
2033
559578
	for (key = ohash_first(&strings, &slot); NULL != key;
2034
268011
	     key = ohash_next(&strings, &slot)) {
2035
268011
		assert(key->mpage == mpage);
2036
268011
		i = 0;
2037
19832814
		for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) {
2038
9648396
			if (key->mask & mask)
2039
548604
				dba_macro_add(dba->macros, i,
2040
274302
				    key->key, mpage->dba);
2041
9648396
			i++;
2042
		}
2043
268011
		free(key);
2044
	}
2045
23556
}
2046
2047
static void
2048
dbprune(struct dba *dba)
2049
{
2050
	struct dba_array	*page, *files;
2051
	char			*file;
2052
2053
3348
	dba_array_FOREACH(dba->pages, page) {
2054
3339
		files = dba_array_get(page, DBP_FILE);
2055
10598
		dba_array_FOREACH(files, file) {
2056
3920
			if (*file < ' ')
2057
3339
				file++;
2058
7840
			if (ohash_find(&mlinks, ohash_qlookup(&mlinks,
2059
3920
			    file)) != NULL) {
2060
				if (debug)
2061
					say(file, "Deleting from database");
2062
				dba_array_del(dba->pages);
2063
				break;
2064
			}
2065
		}
2066
	}
2067
3
}
2068
2069
/*
2070
 * Write the database from memory to disk.
2071
 */
2072
static void
2073
dbwrite(struct dba *dba)
2074
{
2075
192
	struct stat	 sb1, sb2;
2076
96
	char		 tfn[33], *cp1, *cp2;
2077
	off_t		 i;
2078
	int		 fd1, fd2;
2079
2080
	/*
2081
	 * Do not write empty databases, and delete existing ones
2082
	 * when makewhatis -u causes them to become empty.
2083
	 */
2084
2085
96
	dba_array_start(dba->pages);
2086
96
	if (dba_array_next(dba->pages) == NULL) {
2087

10
		if (unlink(MANDOC_DB) == -1 && errno != ENOENT)
2088
			say(MANDOC_DB, "&unlink");
2089
5
		return;
2090
	}
2091
2092
	/*
2093
	 * Build the database in a temporary file,
2094
	 * then atomically move it into place.
2095
	 */
2096
2097
91
	if (dba_write(MANDOC_DB "~", dba) != -1) {
2098
91
		if (rename(MANDOC_DB "~", MANDOC_DB) == -1) {
2099
			exitcode = (int)MANDOCLEVEL_SYSERR;
2100
			say(MANDOC_DB, "&rename");
2101
			unlink(MANDOC_DB "~");
2102
		}
2103
91
		return;
2104
	}
2105
2106
	/*
2107
	 * We lack write permission and cannot replace the database
2108
	 * file, but let's at least check whether the data changed.
2109
	 */
2110
2111
	(void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn));
2112
	if (mkdtemp(tfn) == NULL) {
2113
		exitcode = (int)MANDOCLEVEL_SYSERR;
2114
		say("", "&%s", tfn);
2115
		return;
2116
	}
2117
	cp1 = cp2 = MAP_FAILED;
2118
	fd1 = fd2 = -1;
2119
	(void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn));
2120
	if (dba_write(tfn, dba) == -1) {
2121
		say(tfn, "&dba_write");
2122
		goto err;
2123
	}
2124
	if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) {
2125
		say(MANDOC_DB, "&open");
2126
		goto err;
2127
	}
2128
	if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) {
2129
		say(tfn, "&open");
2130
		goto err;
2131
	}
2132
	if (fstat(fd1, &sb1) == -1) {
2133
		say(MANDOC_DB, "&fstat");
2134
		goto err;
2135
	}
2136
	if (fstat(fd2, &sb2) == -1) {
2137
		say(tfn, "&fstat");
2138
		goto err;
2139
	}
2140
	if (sb1.st_size != sb2.st_size)
2141
		goto err;
2142
	if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE,
2143
	    fd1, 0)) == MAP_FAILED) {
2144
		say(MANDOC_DB, "&mmap");
2145
		goto err;
2146
	}
2147
	if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE,
2148
	    fd2, 0)) == MAP_FAILED) {
2149
		say(tfn, "&mmap");
2150
		goto err;
2151
	}
2152
	for (i = 0; i < sb1.st_size; i++)
2153
		if (cp1[i] != cp2[i])
2154
			goto err;
2155
	goto out;
2156
2157
err:
2158
	exitcode = (int)MANDOCLEVEL_SYSERR;
2159
	say(MANDOC_DB, "Data changed, but cannot replace database");
2160
2161
out:
2162
	if (cp1 != MAP_FAILED)
2163
		munmap(cp1, sb1.st_size);
2164
	if (cp2 != MAP_FAILED)
2165
		munmap(cp2, sb2.st_size);
2166
	if (fd1 != -1)
2167
		close(fd1);
2168
	if (fd2 != -1)
2169
		close(fd2);
2170
	unlink(tfn);
2171
	*strrchr(tfn, '/') = '\0';
2172
	rmdir(tfn);
2173
96
}
2174
2175
static int
2176
set_basedir(const char *targetdir, int report_baddir)
2177
{
2178
	static char	 startdir[PATH_MAX];
2179
	static int	 getcwd_status;  /* 1 = ok, 2 = failure */
2180
	static int	 chdir_status;  /* 1 = changed directory */
2181
	char		*cp;
2182
2183
	/*
2184
	 * Remember the original working directory, if possible.
2185
	 * This will be needed if the second or a later directory
2186
	 * on the command line is given as a relative path.
2187
	 * Do not error out if the current directory is not
2188
	 * searchable: Maybe it won't be needed after all.
2189
	 */
2190
192
	if (0 == getcwd_status) {
2191
96
		if (NULL == getcwd(startdir, sizeof(startdir))) {
2192
			getcwd_status = 2;
2193
			(void)strlcpy(startdir, strerror(errno),
2194
			    sizeof(startdir));
2195
		} else
2196
96
			getcwd_status = 1;
2197
	}
2198
2199
	/*
2200
	 * We are leaving the old base directory.
2201
	 * Do not use it any longer, not even for messages.
2202
	 */
2203
96
	*basedir = '\0';
2204
2205
	/*
2206
	 * If and only if the directory was changed earlier and
2207
	 * the next directory to process is given as a relative path,
2208
	 * first go back, or bail out if that is impossible.
2209
	 */
2210

96
	if (chdir_status && '/' != *targetdir) {
2211
		if (2 == getcwd_status) {
2212
			exitcode = (int)MANDOCLEVEL_SYSERR;
2213
			say("", "getcwd: %s", startdir);
2214
			return 0;
2215
		}
2216
		if (-1 == chdir(startdir)) {
2217
			exitcode = (int)MANDOCLEVEL_SYSERR;
2218
			say("", "&chdir %s", startdir);
2219
			return 0;
2220
		}
2221
	}
2222
2223
	/*
2224
	 * Always resolve basedir to the canonicalized absolute
2225
	 * pathname and append a trailing slash, such that
2226
	 * we can reliably check whether files are inside.
2227
	 */
2228
96
	if (NULL == realpath(targetdir, basedir)) {
2229
		if (report_baddir || errno != ENOENT) {
2230
			exitcode = (int)MANDOCLEVEL_BADARG;
2231
			say("", "&%s: realpath", targetdir);
2232
		}
2233
		return 0;
2234
96
	} else if (-1 == chdir(basedir)) {
2235
		if (report_baddir || errno != ENOENT) {
2236
			exitcode = (int)MANDOCLEVEL_BADARG;
2237
			say("", "&chdir");
2238
		}
2239
		return 0;
2240
	}
2241
96
	chdir_status = 1;
2242
96
	cp = strchr(basedir, '\0');
2243
96
	if ('/' != cp[-1]) {
2244
96
		if (cp - basedir >= PATH_MAX - 1) {
2245
			exitcode = (int)MANDOCLEVEL_SYSERR;
2246
			say("", "Filename too long");
2247
			return 0;
2248
		}
2249
96
		*cp++ = '/';
2250
96
		*cp = '\0';
2251
96
	}
2252
96
	return 1;
2253
96
}
2254
2255
static void
2256
say(const char *file, const char *format, ...)
2257
{
2258
	va_list		 ap;
2259
	int		 use_errno;
2260
2261
	if ('\0' != *basedir)
2262
		fprintf(stderr, "%s", basedir);
2263
	if ('\0' != *basedir && '\0' != *file)
2264
		fputc('/', stderr);
2265
	if ('\0' != *file)
2266
		fprintf(stderr, "%s", file);
2267
2268
	use_errno = 1;
2269
	if (NULL != format) {
2270
		switch (*format) {
2271
		case '&':
2272
			format++;
2273
			break;
2274
		case '\0':
2275
			format = NULL;
2276
			break;
2277
		default:
2278
			use_errno = 0;
2279
			break;
2280
		}
2281
	}
2282
	if (NULL != format) {
2283
		if ('\0' != *basedir || '\0' != *file)
2284
			fputs(": ", stderr);
2285
		va_start(ap, format);
2286
		vfprintf(stderr, format, ap);
2287
		va_end(ap);
2288
	}
2289
	if (use_errno) {
2290
		if ('\0' != *basedir || '\0' != *file || NULL != format)
2291
			fputs(": ", stderr);
2292
		perror(NULL);
2293
	} else
2294
		fputc('\n', stderr);
2295
}