GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/read.c Lines: 246 307 80.1 %
Date: 2017-11-07 Branches: 139 200 69.5 %

Line Branch Exec Source
1
/*	$OpenBSD: read.c,v 1.164 2017/07/20 14:36:32 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5
 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
 */
19
#include <sys/types.h>
20
#include <sys/mman.h>
21
#include <sys/stat.h>
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <errno.h>
26
#include <fcntl.h>
27
#include <stdarg.h>
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <unistd.h>
32
#include <zlib.h>
33
34
#include "mandoc_aux.h"
35
#include "mandoc.h"
36
#include "roff.h"
37
#include "mdoc.h"
38
#include "man.h"
39
#include "libmandoc.h"
40
41
#define	REPARSE_LIMIT	1000
42
43
struct	mparse {
44
	struct roff	 *roff; /* roff parser (!NULL) */
45
	struct roff_man	 *man; /* man parser */
46
	char		 *sodest; /* filename pointed to by .so */
47
	const char	 *file; /* filename of current input file */
48
	struct buf	 *primary; /* buffer currently being parsed */
49
	struct buf	 *secondary; /* preprocessed copy of input */
50
	const char	 *os_s; /* default operating system */
51
	mandocmsg	  mmsg; /* warning/error message handler */
52
	enum mandoclevel  file_status; /* status of current parse */
53
	enum mandocerr	  mmin; /* ignore messages below this */
54
	int		  options; /* parser options */
55
	int		  gzip; /* current input file is gzipped */
56
	int		  filenc; /* encoding of the current file */
57
	int		  reparse_count; /* finite interp. stack */
58
	int		  line; /* line number in the file */
59
};
60
61
static	void	  choose_parser(struct mparse *);
62
static	void	  resize_buf(struct buf *, size_t);
63
static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
64
static	int	  read_whole_file(struct mparse *, const char *, int,
65
				struct buf *, int *);
66
static	void	  mparse_end(struct mparse *);
67
static	void	  mparse_parse_buffer(struct mparse *, struct buf,
68
			const char *);
69
70
static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
71
	MANDOCERR_OK,
72
	MANDOCERR_OK,
73
	MANDOCERR_WARNING,
74
	MANDOCERR_ERROR,
75
	MANDOCERR_UNSUPP,
76
	MANDOCERR_MAX,
77
	MANDOCERR_MAX
78
};
79
80
static	const char * const	mandocerrs[MANDOCERR_MAX] = {
81
	"ok",
82
83
	"base system convention",
84
85
	"Mdocdate found",
86
	"Mdocdate missing",
87
	"unknown architecture",
88
	"operating system explicitly specified",
89
	"RCS id missing",
90
	"referenced manual not found",
91
92
	"generic style suggestion",
93
94
	"legacy man(7) date format",
95
	"lower case character in document title",
96
	"duplicate RCS id",
97
	"typo in section name",
98
	"unterminated quoted argument",
99
	"useless macro",
100
	"consider using OS macro",
101
	"errnos out of order",
102
	"duplicate errno",
103
	"trailing delimiter",
104
	"no blank before trailing delimiter",
105
	"fill mode already enabled, skipping",
106
	"fill mode already disabled, skipping",
107
	"function name without markup",
108
	"whitespace at end of input line",
109
	"bad comment style",
110
111
	"generic warning",
112
113
	/* related to the prologue */
114
	"missing manual title, using UNTITLED",
115
	"missing manual title, using \"\"",
116
	"missing manual section, using \"\"",
117
	"unknown manual section",
118
	"missing date, using today's date",
119
	"cannot parse date, using it verbatim",
120
	"date in the future, using it anyway",
121
	"missing Os macro, using \"\"",
122
	"late prologue macro",
123
	"prologue macros out of order",
124
125
	/* related to document structure */
126
	".so is fragile, better use ln(1)",
127
	"no document body",
128
	"content before first section header",
129
	"first section is not \"NAME\"",
130
	"NAME section without Nm before Nd",
131
	"NAME section without description",
132
	"description not at the end of NAME",
133
	"bad NAME section content",
134
	"missing comma before name",
135
	"missing description line, using \"\"",
136
	"description line outside NAME section",
137
	"sections out of conventional order",
138
	"duplicate section title",
139
	"unexpected section",
140
	"cross reference to self",
141
	"unusual Xr order",
142
	"unusual Xr punctuation",
143
	"AUTHORS section without An macro",
144
145
	/* related to macros and nesting */
146
	"obsolete macro",
147
	"macro neither callable nor escaped",
148
	"skipping paragraph macro",
149
	"moving paragraph macro out of list",
150
	"skipping no-space macro",
151
	"blocks badly nested",
152
	"nested displays are not portable",
153
	"moving content out of list",
154
	"first macro on line",
155
	"line scope broken",
156
	"skipping blank line in line scope",
157
158
	/* related to missing macro arguments */
159
	"skipping empty request",
160
	"conditional request controls empty scope",
161
	"skipping empty macro",
162
	"empty block",
163
	"empty argument, using 0n",
164
	"missing display type, using -ragged",
165
	"list type is not the first argument",
166
	"missing -width in -tag list, using 6n",
167
	"missing utility name, using \"\"",
168
	"missing function name, using \"\"",
169
	"empty head in list item",
170
	"empty list item",
171
	"missing argument, using next line",
172
	"missing font type, using \\fR",
173
	"unknown font type, using \\fR",
174
	"nothing follows prefix",
175
	"empty reference block",
176
	"missing section argument",
177
	"missing -std argument, adding it",
178
	"missing option string, using \"\"",
179
	"missing resource identifier, using \"\"",
180
	"missing eqn box, using \"\"",
181
182
	/* related to bad macro arguments */
183
	"duplicate argument",
184
	"skipping duplicate argument",
185
	"skipping duplicate display type",
186
	"skipping duplicate list type",
187
	"skipping -width argument",
188
	"wrong number of cells",
189
	"unknown AT&T UNIX version",
190
	"comma in function argument",
191
	"parenthesis in function name",
192
	"unknown library name",
193
	"invalid content in Rs block",
194
	"invalid Boolean argument",
195
	"unknown font, skipping request",
196
	"odd number of characters in request",
197
198
	/* related to plain text */
199
	"blank line in fill mode, using .sp",
200
	"tab in filled text",
201
	"new sentence, new line",
202
	"invalid escape sequence",
203
	"undefined string, using \"\"",
204
205
	/* related to tables */
206
	"tbl line starts with span",
207
	"tbl column starts with span",
208
	"skipping vertical bar in tbl layout",
209
210
	"generic error",
211
212
	/* related to tables */
213
	"non-alphabetic character in tbl options",
214
	"skipping unknown tbl option",
215
	"missing tbl option argument",
216
	"wrong tbl option argument size",
217
	"empty tbl layout",
218
	"invalid character in tbl layout",
219
	"unmatched parenthesis in tbl layout",
220
	"tbl without any data cells",
221
	"ignoring data in spanned tbl cell",
222
	"ignoring extra tbl data cells",
223
	"data block open at end of tbl",
224
225
	/* related to document structure and macros */
226
	NULL,
227
	"duplicate prologue macro",
228
	"skipping late title macro",
229
	"input stack limit exceeded, infinite loop?",
230
	"skipping bad character",
231
	"skipping unknown macro",
232
	"skipping insecure request",
233
	"skipping item outside list",
234
	"skipping column outside column list",
235
	"skipping end of block that is not open",
236
	"fewer RS blocks open, skipping",
237
	"inserting missing end of block",
238
	"appending missing end of block",
239
240
	/* related to request and macro arguments */
241
	"escaped character not allowed in a name",
242
	"NOT IMPLEMENTED: Bd -file",
243
	"skipping display without arguments",
244
	"missing list type, using -item",
245
	"argument is not numeric, using 1",
246
	"missing manual name, using \"\"",
247
	"uname(3) system call failed, using UNKNOWN",
248
	"unknown standard specifier",
249
	"skipping request without numeric argument",
250
	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
251
	".so request failed",
252
	"skipping all arguments",
253
	"skipping excess arguments",
254
	"divide by zero",
255
256
	"unsupported feature",
257
	"input too large",
258
	"unsupported control character",
259
	"unsupported roff request",
260
	"eqn delim option in tbl",
261
	"unsupported tbl layout modifier",
262
	"ignoring macro in table",
263
};
264
265
static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
266
	"SUCCESS",
267
	"STYLE",
268
	"WARNING",
269
	"ERROR",
270
	"UNSUPP",
271
	"BADARG",
272
	"SYSERR"
273
};
274
275
276
static void
277
resize_buf(struct buf *buf, size_t initial)
278
{
279
280
1991256
	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
281
497814
	buf->buf = mandoc_realloc(buf->buf, buf->sz);
282
497814
}
283
284
static void
285
choose_parser(struct mparse *curp)
286
{
287
	char		*cp, *ep;
288
	int		 format;
289
290
	/*
291
	 * If neither command line arguments -mdoc or -man select
292
	 * a parser nor the roff parser found a .Dd or .TH macro
293
	 * yet, look ahead in the main input buffer.
294
	 */
295
296
44372
	if ((format = roff_getformat(curp->roff)) == 0) {
297
13
		cp = curp->primary->buf;
298
13
		ep = cp + curp->primary->sz;
299
404
		while (cp < ep) {
300

218
			if (*cp == '.' || *cp == '\'') {
301
168
				cp++;
302

168
				if (cp[0] == 'D' && cp[1] == 'd') {
303
					format = MPARSE_MDOC;
304
					break;
305
				}
306

172
				if (cp[0] == 'T' && cp[1] == 'H') {
307
					format = MPARSE_MAN;
308
4
					break;
309
				}
310
			}
311
189
			cp = memchr(cp, '\n', ep - cp);
312
189
			if (cp == NULL)
313
				break;
314
189
			cp++;
315
		}
316
	}
317
318
22186
	if (format == MPARSE_MDOC) {
319
17016
		curp->man->macroset = MACROSET_MDOC;
320
17016
		if (curp->man->mdocmac == NULL)
321
8311
			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
322
	} else {
323
5170
		curp->man->macroset = MACROSET_MAN;
324
5170
		if (curp->man->manmac == NULL)
325
2193
			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
326
	}
327
22186
	curp->man->first->tok = TOKEN_NONE;
328
22186
}
329
330
/*
331
 * Main parse routine for a buffer.
332
 * It assumes encoding and line numbering are already set up.
333
 * It can recurse directly (for invocations of user-defined
334
 * macros, inline equations, and input line traps)
335
 * and indirectly (for .so file inclusion).
336
 */
337
static int
338
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
339
{
340
162102
	struct buf	 ln;
341
	const char	*save_file;
342
162102
	char		*cp;
343
162102
	size_t		 pos; /* byte number in the ln buffer */
344
	enum rofferr	 rr;
345
162102
	int		 of;
346
	int		 lnn; /* line number in the real file */
347
	int		 fd;
348
	unsigned char	 c;
349
350
162102
	memset(&ln, 0, sizeof(ln));
351
352
162102
	lnn = curp->line;
353
162102
	pos = 0;
354
355
6859344
	while (i < blk.sz) {
356

7833813
		if (0 == pos && '\0' == blk.buf[i])
357
			break;
358
359
3838031
		if (start) {
360
3543000
			curp->line = lnn;
361
3543000
			curp->reparse_count = 0;
362
363

3568102
			if (lnn < 3 &&
364
48750
			    curp->filenc & MPARSE_UTF8 &&
365
25102
			    curp->filenc & MPARSE_LATIN1)
366
25102
				curp->filenc = preconv_cue(&blk, i);
367
		}
368
369

342551767
		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
370
371
			/*
372
			 * When finding an unescaped newline character,
373
			 * leave the character loop to process the line.
374
			 * Skip a preceding carriage return, if any.
375
			 */
376
377

114691897
			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
378
			    '\n' == blk.buf[i + 1])
379
				++i;
380
114691897
			if ('\n' == blk.buf[i]) {
381
3837929
				++i;
382
3837929
				++lnn;
383
3837929
				break;
384
			}
385
386
			/*
387
			 * Make sure we have space for the worst
388
			 * case of 11 bytes: "\\[u10ffff]\0"
389
			 */
390
391
110853968
			if (pos + 11 > ln.sz)
392
497787
				resize_buf(&ln, 256);
393
394
			/*
395
			 * Encode 8-bit input.
396
			 */
397
398
110853968
			c = blk.buf[i];
399
110853968
			if (c & 0x80) {
400

24416
				if ( ! (curp->filenc && preconv_encode(
401
				    &blk, &i, &ln, &pos, &curp->filenc))) {
402
21527
					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
403
21527
					    curp->line, pos, "0x%x", c);
404
21527
					ln.buf[pos++] = '?';
405
21527
					i++;
406
21527
				}
407
				continue;
408
			}
409
410
			/*
411
			 * Exclude control characters.
412
			 */
413
414

221728280
			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
415

351
				mandoc_vmsg(c == 0x00 || c == 0x04 ||
416
54
				    c > 0x0a ? MANDOCERR_CHAR_BAD :
417
				    MANDOCERR_CHAR_UNSUPP,
418
81
				    curp, curp->line, pos, "0x%x", c);
419
81
				i++;
420
81
				if (c != '\r')
421
81
					ln.buf[pos++] = '?';
422
				continue;
423
			}
424
425
110831874
			ln.buf[pos++] = blk.buf[i++];
426
		}
427
428
3838031
		if (pos + 1 >= ln.sz)
429
26
			resize_buf(&ln, 256);
430
431

7651640
		if (i == blk.sz || blk.buf[i] == '\0')
432
144101
			ln.buf[pos++] = '\n';
433
3838031
		ln.buf[pos] = '\0';
434
435
		/*
436
		 * A significant amount of complexity is contained by
437
		 * the roff preprocessor.  It's line-oriented but can be
438
		 * expressed on one line, so we need at times to
439
		 * readjust our starting point and re-run it.  The roff
440
		 * preprocessor can also readjust the buffers with new
441
		 * data, so we pass them in wholesale.
442
		 */
443
444
3838031
		of = 0;
445
446
		/*
447
		 * Maintain a lookaside buffer of all parsed lines.  We
448
		 * only do this if mparse_keep() has been invoked (the
449
		 * buffer may be accessed with mparse_getkeep()).
450
		 */
451
452
3838031
		if (curp->secondary) {
453
47466
			curp->secondary->buf = mandoc_realloc(
454
47466
			    curp->secondary->buf,
455
47466
			    curp->secondary->sz + pos + 2);
456
142398
			memcpy(curp->secondary->buf +
457
47466
			    curp->secondary->sz,
458
47466
			    ln.buf, pos);
459
47466
			curp->secondary->sz += pos;
460
94932
			curp->secondary->buf
461
94932
				[curp->secondary->sz] = '\n';
462
47466
			curp->secondary->sz++;
463
94932
			curp->secondary->buf
464
94932
				[curp->secondary->sz] = '\0';
465
47466
		}
466
rerun:
467
7825445
		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
468
469

7825445
		switch (rr) {
470
		case ROFF_REPARSE:
471
137697
			if (++curp->reparse_count > REPARSE_LIMIT)
472
18
				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
473
18
				    curp->line, pos, NULL);
474
275358
			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
475
137679
			    start == 1) {
476
119697
				pos = 0;
477
119697
				continue;
478
			}
479
18000
			free(ln.buf);
480
18000
			return 0;
481
		case ROFF_APPEND:
482
140662
			pos = strlen(ln.buf);
483
140662
			continue;
484
		case ROFF_RERUN:
485
			goto rerun;
486
		case ROFF_IGN:
487
773673
			pos = 0;
488
773673
			continue;
489
		case ROFF_SO:
490

94
			if ( ! (curp->options & MPARSE_SO) &&
491
94
			    (i >= blk.sz || blk.buf[i] == '\0')) {
492
94
				curp->sodest = mandoc_strdup(ln.buf + of);
493
94
				free(ln.buf);
494
94
				return 1;
495
			}
496
			/*
497
			 * We remove `so' clauses from our lookaside
498
			 * buffer because we're going to descend into
499
			 * the file recursively.
500
			 */
501
			if (curp->secondary)
502
				curp->secondary->sz -= pos + 1;
503
			save_file = curp->file;
504
			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
505
				mparse_readfd(curp, fd, ln.buf + of);
506
				close(fd);
507
				curp->file = save_file;
508
			} else {
509
				curp->file = save_file;
510
				mandoc_vmsg(MANDOCERR_SO_FAIL,
511
				    curp, curp->line, pos,
512
				    ".so %s", ln.buf + of);
513
				ln.sz = mandoc_asprintf(&cp,
514
				    ".sp\nSee the file %s.\n.sp",
515
				    ln.buf + of);
516
				free(ln.buf);
517
				ln.buf = cp;
518
				of = 0;
519
				mparse_buf_r(curp, ln, of, 0);
520
			}
521
			pos = 0;
522
			continue;
523
		default:
524
			break;
525
		}
526
527
2785905
		if (curp->man->macroset == MACROSET_NONE)
528
22186
			choose_parser(curp);
529
530

11143620
		if ((curp->man->macroset == MACROSET_MDOC ?
531
1592461
		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
532
3979349
		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
533
				break;
534
535
		/* Temporary buffers typically are not full. */
536
537

2858140
		if (0 == start && '\0' == blk.buf[i])
538
			break;
539
540
		/* Start the next input line. */
541
542
2750554
		pos = 0;
543
	}
544
545
144008
	free(ln.buf);
546
144008
	return 1;
547
162102
}
548
549
static int
550
read_whole_file(struct mparse *curp, const char *file, int fd,
551
		struct buf *fb, int *with_mmap)
552
{
553
48846
	struct stat	 st;
554
	gzFile		 gz;
555
	size_t		 off;
556
	ssize_t		 ssz;
557
558
24423
	if (fstat(fd, &st) == -1) {
559
		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
560
		    "fstat: %s", strerror(errno));
561
		return 0;
562
	}
563
564
	/*
565
	 * If we're a regular file, try just reading in the whole entry
566
	 * via mmap().  This is faster than reading it into blocks, and
567
	 * since each file is only a few bytes to begin with, I'm not
568
	 * concerned that this is going to tank any machines.
569
	 */
570
571

48846
	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
572
24423
		if (st.st_size > 0x7fffffff) {
573
			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
574
			return 0;
575
		}
576
24423
		*with_mmap = 1;
577
24423
		fb->sz = (size_t)st.st_size;
578
24423
		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
579
24423
		if (fb->buf != MAP_FAILED)
580
24422
			return 1;
581
	}
582
583
1
	if (curp->gzip) {
584
		if ((gz = gzdopen(fd, "rb")) == NULL) {
585
			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
586
			    "gzdopen: %s", strerror(errno));
587
			return 0;
588
		}
589
	} else
590
		gz = NULL;
591
592
	/*
593
	 * If this isn't a regular file (like, say, stdin), then we must
594
	 * go the old way and just read things in bit by bit.
595
	 */
596
597
1
	*with_mmap = 0;
598
	off = 0;
599
1
	fb->sz = 0;
600
1
	fb->buf = NULL;
601
1
	for (;;) {
602
1
		if (off == fb->sz) {
603
1
			if (fb->sz == (1U << 31)) {
604
				mandoc_msg(MANDOCERR_TOOLARGE, curp,
605
				    0, 0, NULL);
606
				break;
607
			}
608
1
			resize_buf(fb, 65536);
609
1
		}
610
3
		ssz = curp->gzip ?
611
		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
612
1
		    read(fd, fb->buf + (int)off, fb->sz - off);
613
1
		if (ssz == 0) {
614
1
			fb->sz = off;
615
1
			return 1;
616
		}
617
		if (ssz == -1) {
618
			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
619
			    "read: %s", strerror(errno));
620
			break;
621
		}
622
		off += (size_t)ssz;
623
	}
624
625
	free(fb->buf);
626
	fb->buf = NULL;
627
	return 0;
628
24423
}
629
630
static void
631
mparse_end(struct mparse *curp)
632
{
633
48846
	if (curp->man->macroset == MACROSET_NONE)
634
95
		curp->man->macroset = MACROSET_MAN;
635
24423
	if (curp->man->macroset == MACROSET_MDOC)
636
17250
		mdoc_endparse(curp->man);
637
	else
638
7173
		man_endparse(curp->man);
639
24423
	roff_endparse(curp->roff);
640
24423
}
641
642
static void
643
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
644
{
645
	struct buf	*svprimary;
646
	const char	*svfile;
647
	size_t		 offset;
648
	static int	 recursion_depth;
649
650
24423
	if (64 < recursion_depth) {
651
		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
652
		return;
653
	}
654
655
	/* Line number is per-file. */
656
24423
	svfile = curp->file;
657
24423
	curp->file = file;
658
24423
	svprimary = curp->primary;
659
24423
	curp->primary = &blk;
660
24423
	curp->line = 1;
661
24423
	recursion_depth++;
662
663
	/* Skip an UTF-8 byte order mark. */
664

36974
	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
665
12551
	    (unsigned char)blk.buf[0] == 0xef &&
666
	    (unsigned char)blk.buf[1] == 0xbb &&
667
	    (unsigned char)blk.buf[2] == 0xbf) {
668
		offset = 3;
669
		curp->filenc &= ~MPARSE_LATIN1;
670
	} else
671
		offset = 0;
672
673
24423
	mparse_buf_r(curp, blk, offset, 1);
674
675
24423
	if (--recursion_depth == 0)
676
24423
		mparse_end(curp);
677
678
24423
	curp->primary = svprimary;
679
24423
	curp->file = svfile;
680
48846
}
681
682
/*
683
 * Read the whole file into memory and call the parsers.
684
 * Called recursively when an .so request is encountered.
685
 */
686
enum mandoclevel
687
mparse_readfd(struct mparse *curp, int fd, const char *file)
688
{
689
48846
	struct buf	 blk;
690
24423
	int		 with_mmap;
691
	int		 save_filenc;
692
693
24423
	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
694
24423
		save_filenc = curp->filenc;
695
24423
		curp->filenc = curp->options &
696
		    (MPARSE_UTF8 | MPARSE_LATIN1);
697
24423
		mparse_parse_buffer(curp, blk, file);
698
24423
		curp->filenc = save_filenc;
699
24423
		if (with_mmap)
700
24422
			munmap(blk.buf, blk.sz);
701
		else
702
1
			free(blk.buf);
703
	}
704
48846
	return curp->file_status;
705
24423
}
706
707
int
708
mparse_open(struct mparse *curp, const char *file)
709
{
710
48846
	char		 *cp;
711
	int		  fd;
712
713
24423
	curp->file = file;
714
24423
	cp = strrchr(file, '.');
715
73269
	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
716
717
	/* First try to use the filename as it is. */
718
719
24423
	if ((fd = open(file, O_RDONLY)) != -1)
720
24423
		return fd;
721
722
	/*
723
	 * If that doesn't work and the filename doesn't
724
	 * already  end in .gz, try appending .gz.
725
	 */
726
727
	if ( ! curp->gzip) {
728
		mandoc_asprintf(&cp, "%s.gz", file);
729
		fd = open(cp, O_RDONLY);
730
		free(cp);
731
		if (fd != -1) {
732
			curp->gzip = 1;
733
			return fd;
734
		}
735
	}
736
737
	/* Neither worked, give up. */
738
739
	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
740
	return -1;
741
24423
}
742
743
struct mparse *
744
mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
745
    enum mandoc_os os_e, const char *os_s)
746
{
747
	struct mparse	*curp;
748
749
25294
	curp = mandoc_calloc(1, sizeof(struct mparse));
750
751
12647
	curp->options = options;
752
12647
	curp->mmin = mmin;
753
12647
	curp->mmsg = mmsg;
754
12647
	curp->os_s = os_s;
755
756
12647
	curp->roff = roff_alloc(curp, options);
757
25294
	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
758
12647
		curp->options & MPARSE_QUICK ? 1 : 0);
759
12647
	if (curp->options & MPARSE_MDOC) {
760
234
		curp->man->macroset = MACROSET_MDOC;
761
234
		if (curp->man->mdocmac == NULL)
762
234
			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
763
12413
	} else if (curp->options & MPARSE_MAN) {
764
1908
		curp->man->macroset = MACROSET_MAN;
765
1908
		if (curp->man->manmac == NULL)
766
1908
			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
767
	}
768
12647
	curp->man->first->tok = TOKEN_NONE;
769
12647
	curp->man->meta.os_e = os_e;
770
12647
	return curp;
771
}
772
773
void
774
mparse_reset(struct mparse *curp)
775
{
776
23744
	roff_reset(curp->roff);
777
11872
	roff_man_reset(curp->man);
778
779
11872
	free(curp->sodest);
780
11872
	curp->sodest = NULL;
781
782
11872
	if (curp->secondary)
783
		curp->secondary->sz = 0;
784
785
11872
	curp->file_status = MANDOCLEVEL_OK;
786
11872
	curp->gzip = 0;
787
11872
}
788
789
void
790
mparse_free(struct mparse *curp)
791
{
792
793
25294
	roffhash_free(curp->man->mdocmac);
794
12647
	roffhash_free(curp->man->manmac);
795
12647
	roff_man_free(curp->man);
796
12647
	roff_free(curp->roff);
797
12647
	if (curp->secondary)
798
1899
		free(curp->secondary->buf);
799
800
12647
	free(curp->secondary);
801
12647
	free(curp->sodest);
802
12647
	free(curp);
803
12647
}
804
805
void
806
mparse_result(struct mparse *curp, struct roff_man **man,
807
	char **sodest)
808
{
809
810

60718
	if (sodest && NULL != (*sodest = curp->sodest)) {
811
94
		*man = NULL;
812
94
		return;
813
	}
814
24329
	if (man)
815
24329
		*man = curp->man;
816
24423
}
817
818
void
819
mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
820
{
821
25102
	if (curp->file_status > *rc)
822
945
		*rc = curp->file_status;
823
12551
}
824
825
void
826
mandoc_vmsg(enum mandocerr t, struct mparse *m,
827
		int ln, int pos, const char *fmt, ...)
828
{
829
72962
	char		 buf[256];
830
36481
	va_list		 ap;
831
832
36481
	va_start(ap, fmt);
833
36481
	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
834
36481
	va_end(ap);
835
836
36481
	mandoc_msg(t, m, ln, pos, buf);
837
36481
}
838
839
void
840
mandoc_msg(enum mandocerr er, struct mparse *m,
841
		int ln, int col, const char *msg)
842
{
843
	enum mandoclevel level;
844
845
138812
	if (er < m->mmin && er != MANDOCERR_FILE)
846
60496
		return;
847
848
	level = MANDOCLEVEL_UNSUPP;
849
51300
	while (er < mandoclimits[level])
850
16740
		level--;
851
852
8910
	if (m->mmsg)
853
8910
		(*m->mmsg)(er, level, m->file, ln, col, msg);
854
855
8910
	if (m->file_status < level)
856
2223
		m->file_status = level;
857
78316
}
858
859
const char *
860
mparse_strerror(enum mandocerr er)
861
{
862
863
17838
	return mandocerrs[er];
864
}
865
866
const char *
867
mparse_strlevel(enum mandoclevel lvl)
868
{
869
17838
	return mandoclevels[lvl];
870
}
871
872
void
873
mparse_keep(struct mparse *p)
874
{
875
876
3798
	assert(NULL == p->secondary);
877
1899
	p->secondary = mandoc_calloc(1, sizeof(struct buf));
878
1899
}
879
880
const char *
881
mparse_getkeep(const struct mparse *p)
882
{
883
884
	assert(p->secondary);
885
	return p->secondary->sz ? p->secondary->buf : NULL;
886
}