GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/read.c Lines: 233 309 75.4 %
Date: 2017-11-13 Branches: 130 200 65.0 %

Line Branch Exec Source
1
/*	$OpenBSD: read.c,v 1.165 2017/11/10 22:48:05 jca Exp $ */
2
/*
3
 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5
 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
 */
19
#include <sys/types.h>
20
#include <sys/mman.h>
21
#include <sys/stat.h>
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <errno.h>
26
#include <fcntl.h>
27
#include <stdarg.h>
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <unistd.h>
32
#include <zlib.h>
33
34
#include "mandoc_aux.h"
35
#include "mandoc.h"
36
#include "roff.h"
37
#include "mdoc.h"
38
#include "man.h"
39
#include "libmandoc.h"
40
41
#define	REPARSE_LIMIT	1000
42
43
struct	mparse {
44
	struct roff	 *roff; /* roff parser (!NULL) */
45
	struct roff_man	 *man; /* man parser */
46
	char		 *sodest; /* filename pointed to by .so */
47
	const char	 *file; /* filename of current input file */
48
	struct buf	 *primary; /* buffer currently being parsed */
49
	struct buf	 *secondary; /* preprocessed copy of input */
50
	const char	 *os_s; /* default operating system */
51
	mandocmsg	  mmsg; /* warning/error message handler */
52
	enum mandoclevel  file_status; /* status of current parse */
53
	enum mandocerr	  mmin; /* ignore messages below this */
54
	int		  options; /* parser options */
55
	int		  gzip; /* current input file is gzipped */
56
	int		  filenc; /* encoding of the current file */
57
	int		  reparse_count; /* finite interp. stack */
58
	int		  line; /* line number in the file */
59
};
60
61
static	void	  choose_parser(struct mparse *);
62
static	void	  resize_buf(struct buf *, size_t);
63
static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
64
static	int	  read_whole_file(struct mparse *, const char *, int,
65
				struct buf *, int *);
66
static	void	  mparse_end(struct mparse *);
67
static	void	  mparse_parse_buffer(struct mparse *, struct buf,
68
			const char *);
69
70
static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
71
	MANDOCERR_OK,
72
	MANDOCERR_OK,
73
	MANDOCERR_WARNING,
74
	MANDOCERR_ERROR,
75
	MANDOCERR_UNSUPP,
76
	MANDOCERR_MAX,
77
	MANDOCERR_MAX
78
};
79
80
static	const char * const	mandocerrs[MANDOCERR_MAX] = {
81
	"ok",
82
83
	"base system convention",
84
85
	"Mdocdate found",
86
	"Mdocdate missing",
87
	"unknown architecture",
88
	"operating system explicitly specified",
89
	"RCS id missing",
90
	"referenced manual not found",
91
92
	"generic style suggestion",
93
94
	"legacy man(7) date format",
95
	"lower case character in document title",
96
	"duplicate RCS id",
97
	"possible typo in section name",
98
	"unterminated quoted argument",
99
	"useless macro",
100
	"consider using OS macro",
101
	"errnos out of order",
102
	"duplicate errno",
103
	"trailing delimiter",
104
	"no blank before trailing delimiter",
105
	"fill mode already enabled, skipping",
106
	"fill mode already disabled, skipping",
107
	"function name without markup",
108
	"whitespace at end of input line",
109
	"bad comment style",
110
111
	"generic warning",
112
113
	/* related to the prologue */
114
	"missing manual title, using UNTITLED",
115
	"missing manual title, using \"\"",
116
	"missing manual section, using \"\"",
117
	"unknown manual section",
118
	"missing date, using today's date",
119
	"cannot parse date, using it verbatim",
120
	"date in the future, using it anyway",
121
	"missing Os macro, using \"\"",
122
	"late prologue macro",
123
	"prologue macros out of order",
124
125
	/* related to document structure */
126
	".so is fragile, better use ln(1)",
127
	"no document body",
128
	"content before first section header",
129
	"first section is not \"NAME\"",
130
	"NAME section without Nm before Nd",
131
	"NAME section without description",
132
	"description not at the end of NAME",
133
	"bad NAME section content",
134
	"missing comma before name",
135
	"missing description line, using \"\"",
136
	"description line outside NAME section",
137
	"sections out of conventional order",
138
	"duplicate section title",
139
	"unexpected section",
140
	"cross reference to self",
141
	"unusual Xr order",
142
	"unusual Xr punctuation",
143
	"AUTHORS section without An macro",
144
145
	/* related to macros and nesting */
146
	"obsolete macro",
147
	"macro neither callable nor escaped",
148
	"skipping paragraph macro",
149
	"moving paragraph macro out of list",
150
	"skipping no-space macro",
151
	"blocks badly nested",
152
	"nested displays are not portable",
153
	"moving content out of list",
154
	"first macro on line",
155
	"line scope broken",
156
	"skipping blank line in line scope",
157
158
	/* related to missing macro arguments */
159
	"skipping empty request",
160
	"conditional request controls empty scope",
161
	"skipping empty macro",
162
	"empty block",
163
	"empty argument, using 0n",
164
	"missing display type, using -ragged",
165
	"list type is not the first argument",
166
	"missing -width in -tag list, using 6n",
167
	"missing utility name, using \"\"",
168
	"missing function name, using \"\"",
169
	"empty head in list item",
170
	"empty list item",
171
	"missing argument, using next line",
172
	"missing font type, using \\fR",
173
	"unknown font type, using \\fR",
174
	"nothing follows prefix",
175
	"empty reference block",
176
	"missing section argument",
177
	"missing -std argument, adding it",
178
	"missing option string, using \"\"",
179
	"missing resource identifier, using \"\"",
180
	"missing eqn box, using \"\"",
181
182
	/* related to bad macro arguments */
183
	"duplicate argument",
184
	"skipping duplicate argument",
185
	"skipping duplicate display type",
186
	"skipping duplicate list type",
187
	"skipping -width argument",
188
	"wrong number of cells",
189
	"unknown AT&T UNIX version",
190
	"comma in function argument",
191
	"parenthesis in function name",
192
	"unknown library name",
193
	"invalid content in Rs block",
194
	"invalid Boolean argument",
195
	"unknown font, skipping request",
196
	"odd number of characters in request",
197
198
	/* related to plain text */
199
	"blank line in fill mode, using .sp",
200
	"tab in filled text",
201
	"new sentence, new line",
202
	"invalid escape sequence",
203
	"undefined string, using \"\"",
204
205
	/* related to tables */
206
	"tbl line starts with span",
207
	"tbl column starts with span",
208
	"skipping vertical bar in tbl layout",
209
210
	"generic error",
211
212
	/* related to tables */
213
	"non-alphabetic character in tbl options",
214
	"skipping unknown tbl option",
215
	"missing tbl option argument",
216
	"wrong tbl option argument size",
217
	"empty tbl layout",
218
	"invalid character in tbl layout",
219
	"unmatched parenthesis in tbl layout",
220
	"tbl without any data cells",
221
	"ignoring data in spanned tbl cell",
222
	"ignoring extra tbl data cells",
223
	"data block open at end of tbl",
224
225
	/* related to document structure and macros */
226
	NULL,
227
	"duplicate prologue macro",
228
	"skipping late title macro",
229
	"input stack limit exceeded, infinite loop?",
230
	"skipping bad character",
231
	"skipping unknown macro",
232
	"skipping insecure request",
233
	"skipping item outside list",
234
	"skipping column outside column list",
235
	"skipping end of block that is not open",
236
	"fewer RS blocks open, skipping",
237
	"inserting missing end of block",
238
	"appending missing end of block",
239
240
	/* related to request and macro arguments */
241
	"escaped character not allowed in a name",
242
	"NOT IMPLEMENTED: Bd -file",
243
	"skipping display without arguments",
244
	"missing list type, using -item",
245
	"argument is not numeric, using 1",
246
	"missing manual name, using \"\"",
247
	"uname(3) system call failed, using UNKNOWN",
248
	"unknown standard specifier",
249
	"skipping request without numeric argument",
250
	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
251
	".so request failed",
252
	"skipping all arguments",
253
	"skipping excess arguments",
254
	"divide by zero",
255
256
	"unsupported feature",
257
	"input too large",
258
	"unsupported control character",
259
	"unsupported roff request",
260
	"eqn delim option in tbl",
261
	"unsupported tbl layout modifier",
262
	"ignoring macro in table",
263
};
264
265
static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
266
	"SUCCESS",
267
	"STYLE",
268
	"WARNING",
269
	"ERROR",
270
	"UNSUPP",
271
	"BADARG",
272
	"SYSERR"
273
};
274
275
276
static void
277
resize_buf(struct buf *buf, size_t initial)
278
{
279
280
621696
	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
281
155424
	buf->buf = mandoc_realloc(buf->buf, buf->sz);
282
155424
}
283
284
static void
285
choose_parser(struct mparse *curp)
286
{
287
	char		*cp, *ep;
288
	int		 format;
289
290
	/*
291
	 * If neither command line arguments -mdoc or -man select
292
	 * a parser nor the roff parser found a .Dd or .TH macro
293
	 * yet, look ahead in the main input buffer.
294
	 */
295
296
14620
	if ((format = roff_getformat(curp->roff)) == 0) {
297
3
		cp = curp->primary->buf;
298
3
		ep = cp + curp->primary->sz;
299
36
		while (cp < ep) {
300

21
			if (*cp == '.' || *cp == '\'') {
301
9
				cp++;
302

9
				if (cp[0] == 'D' && cp[1] == 'd') {
303
					format = MPARSE_MDOC;
304
					break;
305
				}
306

9
				if (cp[0] == 'T' && cp[1] == 'H') {
307
					format = MPARSE_MAN;
308
					break;
309
				}
310
			}
311
15
			cp = memchr(cp, '\n', ep - cp);
312
15
			if (cp == NULL)
313
				break;
314
15
			cp++;
315
		}
316
	}
317
318
7310
	if (format == MPARSE_MDOC) {
319
5684
		curp->man->macroset = MACROSET_MDOC;
320
5684
		if (curp->man->mdocmac == NULL)
321
2776
			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
322
	} else {
323
1626
		curp->man->macroset = MACROSET_MAN;
324
1626
		if (curp->man->manmac == NULL)
325
744
			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
326
	}
327
7310
	curp->man->first->tok = TOKEN_NONE;
328
7310
}
329
330
/*
331
 * Main parse routine for a buffer.
332
 * It assumes encoding and line numbering are already set up.
333
 * It can recurse directly (for invocations of user-defined
334
 * macros, inline equations, and input line traps)
335
 * and indirectly (for .so file inclusion).
336
 */
337
static int
338
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
339
{
340
51315
	struct buf	 ln;
341
	const char	*save_file;
342
51315
	char		*cp;
343
51315
	size_t		 pos; /* byte number in the ln buffer */
344
	enum rofferr	 rr;
345
51315
	int		 of;
346
	int		 lnn; /* line number in the real file */
347
	int		 fd;
348
	unsigned char	 c;
349
350
51315
	memset(&ln, 0, sizeof(ln));
351
352
51315
	lnn = curp->line;
353
51315
	pos = 0;
354
355
2208921
	while (i < blk.sz) {
356

2517956
		if (0 == pos && '\0' == blk.buf[i])
357
			break;
358
359
1234526
		if (start) {
360
1142021
			curp->line = lnn;
361
1142021
			curp->reparse_count = 0;
362
363

1150413
			if (lnn < 3 &&
364
16048
			    curp->filenc & MPARSE_UTF8 &&
365
8392
			    curp->filenc & MPARSE_LATIN1)
366
8392
				curp->filenc = preconv_cue(&blk, i);
367
		}
368
369

110589546
		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
370
371
			/*
372
			 * When finding an unescaped newline character,
373
			 * leave the character loop to process the line.
374
			 * Skip a preceding carriage return, if any.
375
			 */
376
377

36622378
			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
378
			    '\n' == blk.buf[i + 1])
379
				++i;
380
36622378
			if ('\n' == blk.buf[i]) {
381
1234497
				++i;
382
1234497
				++lnn;
383
1234497
				break;
384
			}
385
386
			/*
387
			 * Make sure we have space for the worst
388
			 * case of 11 bytes: "\\[u10ffff]\0"
389
			 */
390
391
35387881
			if (pos + 11 > ln.sz)
392
155422
				resize_buf(&ln, 256);
393
394
			/*
395
			 * Encode 8-bit input.
396
			 */
397
398
35387881
			c = blk.buf[i];
399
35387881
			if (c & 0x80) {
400

8024
				if ( ! (curp->filenc && preconv_encode(
401
				    &blk, &i, &ln, &pos, &curp->filenc))) {
402
6847
					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
403
6847
					    curp->line, pos, "0x%x", c);
404
6847
					ln.buf[pos++] = '?';
405
6847
					i++;
406
6847
				}
407
7045
				continue;
408
			}
409
410
			/*
411
			 * Exclude control characters.
412
			 */
413
414

70783555
			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
415

143
				mandoc_vmsg(c == 0x00 || c == 0x04 ||
416
22
				    c > 0x0a ? MANDOCERR_CHAR_BAD :
417
				    MANDOCERR_CHAR_UNSUPP,
418
33
				    curp, curp->line, pos, "0x%x", c);
419
33
				i++;
420
33
				if (c != '\r')
421
33
					ln.buf[pos++] = '?';
422
33
				continue;
423
			}
424
425
35380803
			ln.buf[pos++] = blk.buf[i++];
426
		}
427
428
1234526
		if (pos + 1 >= ln.sz)
429
2
			resize_buf(&ln, 256);
430
431

2461026
		if (i == blk.sz || blk.buf[i] == '\0')
432
45315
			ln.buf[pos++] = '\n';
433
1234526
		ln.buf[pos] = '\0';
434
435
		/*
436
		 * A significant amount of complexity is contained by
437
		 * the roff preprocessor.  It's line-oriented but can be
438
		 * expressed on one line, so we need at times to
439
		 * readjust our starting point and re-run it.  The roff
440
		 * preprocessor can also readjust the buffers with new
441
		 * data, so we pass them in wholesale.
442
		 */
443
444
1234526
		of = 0;
445
446
		/*
447
		 * Maintain a lookaside buffer of all parsed lines.  We
448
		 * only do this if mparse_keep() has been invoked (the
449
		 * buffer may be accessed with mparse_getkeep()).
450
		 */
451
452
2469052
		if (curp->secondary) {
453
15822
			curp->secondary->buf = mandoc_realloc(
454
15822
			    curp->secondary->buf,
455
15822
			    curp->secondary->sz + pos + 2);
456
47466
			memcpy(curp->secondary->buf +
457
15822
			    curp->secondary->sz,
458
15822
			    ln.buf, pos);
459
15822
			curp->secondary->sz += pos;
460
31644
			curp->secondary->buf
461
31644
				[curp->secondary->sz] = '\n';
462
15822
			curp->secondary->sz++;
463
31644
			curp->secondary->buf
464
31644
				[curp->secondary->sz] = '\0';
465
15822
		}
466
rerun:
467
1283979
		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
468
469

1283979
		switch (rr) {
470
		case ROFF_REPARSE:
471
43295
			if (++curp->reparse_count > REPARSE_LIMIT)
472
6
				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
473
6
				    curp->line, pos, NULL);
474
86578
			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
475
43289
			    start == 1) {
476
37295
				pos = 0;
477
37295
				continue;
478
			}
479
6000
			free(ln.buf);
480
6000
			return 0;
481
		case ROFF_APPEND:
482
43828
			pos = strlen(ln.buf);
483
43828
			continue;
484
		case ROFF_RERUN:
485
49453
			goto rerun;
486
		case ROFF_IGN:
487
247328
			pos = 0;
488
247328
			continue;
489
		case ROFF_SO:
490

4
			if ( ! (curp->options & MPARSE_SO) &&
491
4
			    (i >= blk.sz || blk.buf[i] == '\0')) {
492
4
				curp->sodest = mandoc_strdup(ln.buf + of);
493
4
				free(ln.buf);
494
4
				return 1;
495
			}
496
			/*
497
			 * We remove `so' clauses from our lookaside
498
			 * buffer because we're going to descend into
499
			 * the file recursively.
500
			 */
501
			if (curp->secondary)
502
				curp->secondary->sz -= pos + 1;
503
			save_file = curp->file;
504
			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
505
				mparse_readfd(curp, fd, ln.buf + of);
506
				close(fd);
507
				curp->file = save_file;
508
			} else {
509
				curp->file = save_file;
510
				mandoc_vmsg(MANDOCERR_SO_FAIL,
511
				    curp, curp->line, pos,
512
				    ".so %s", ln.buf + of);
513
				ln.sz = mandoc_asprintf(&cp,
514
				    ".sp\nSee the file %s.\n.sp",
515
				    ln.buf + of);
516
				free(ln.buf);
517
				ln.buf = cp;
518
				of = 0;
519
				mparse_buf_r(curp, ln, of, 0);
520
			}
521
			pos = 0;
522
			continue;
523
		default:
524
			break;
525
		}
526
527
900071
		if (curp->man->macroset == MACROSET_NONE)
528
7310
			choose_parser(curp);
529
530

3600284
		if ((curp->man->macroset == MACROSET_MDOC ?
531
530564
		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
532
1269578
		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
533
				break;
534
535
		/* Temporary buffers typically are not full. */
536
537

922885
		if (0 == start && '\0' == blk.buf[i])
538
			break;
539
540
		/* Start the next input line. */
541
542
888920
		pos = 0;
543
	}
544
545
45311
	free(ln.buf);
546
45311
	return 1;
547
51315
}
548
549
static int
550
read_whole_file(struct mparse *curp, const char *file, int fd,
551
		struct buf *fb, int *with_mmap)
552
{
553
16052
	struct stat	 st;
554
	gzFile		 gz;
555
	size_t		 off;
556
	ssize_t		 ssz;
557
558
8026
	if (fstat(fd, &st) == -1) {
559
		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
560
		    "fstat: %s", strerror(errno));
561
		return 0;
562
	}
563
564
	/*
565
	 * If we're a regular file, try just reading in the whole entry
566
	 * via mmap().  This is faster than reading it into blocks, and
567
	 * since each file is only a few bytes to begin with, I'm not
568
	 * concerned that this is going to tank any machines.
569
	 */
570
571

16052
	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
572
8026
		if (st.st_size > 0x7fffffff) {
573
			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
574
			return 0;
575
		}
576
8026
		*with_mmap = 1;
577
8026
		fb->sz = (size_t)st.st_size;
578
8026
		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
579
8026
		if (fb->buf != MAP_FAILED)
580
8026
			return 1;
581
	}
582
583
	if (curp->gzip) {
584
		if ((gz = gzdopen(fd, "rb")) == NULL) {
585
			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
586
			    "gzdopen: %s", strerror(errno));
587
			return 0;
588
		}
589
	} else
590
		gz = NULL;
591
592
	/*
593
	 * If this isn't a regular file (like, say, stdin), then we must
594
	 * go the old way and just read things in bit by bit.
595
	 */
596
597
	*with_mmap = 0;
598
	off = 0;
599
	fb->sz = 0;
600
	fb->buf = NULL;
601
	for (;;) {
602
		if (off == fb->sz) {
603
			if (fb->sz == (1U << 31)) {
604
				mandoc_msg(MANDOCERR_TOOLARGE, curp,
605
				    0, 0, NULL);
606
				break;
607
			}
608
			resize_buf(fb, 65536);
609
		}
610
		ssz = curp->gzip ?
611
		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
612
		    read(fd, fb->buf + (int)off, fb->sz - off);
613
		if (ssz == 0) {
614
			fb->sz = off;
615
			return 1;
616
		}
617
		if (ssz == -1) {
618
			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
619
			    "read: %s", strerror(errno));
620
			break;
621
		}
622
		off += (size_t)ssz;
623
	}
624
625
	free(fb->buf);
626
	fb->buf = NULL;
627
	return 0;
628
8026
}
629
630
static void
631
mparse_end(struct mparse *curp)
632
{
633
16052
	if (curp->man->macroset == MACROSET_NONE)
634
4
		curp->man->macroset = MACROSET_MAN;
635
8026
	if (curp->man->macroset == MACROSET_MDOC)
636
5761
		mdoc_endparse(curp->man);
637
	else
638
2265
		man_endparse(curp->man);
639
8026
	roff_endparse(curp->roff);
640
8026
}
641
642
static void
643
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
644
{
645
	struct buf	*svprimary;
646
	const char	*svfile;
647
	size_t		 offset;
648
	static int	 recursion_depth;
649
650
8026
	if (64 < recursion_depth) {
651
		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
652
		return;
653
	}
654
655
	/* Line number is per-file. */
656
8026
	svfile = curp->file;
657
8026
	curp->file = file;
658
8026
	svprimary = curp->primary;
659
8026
	curp->primary = &blk;
660
8026
	curp->line = 1;
661
8026
	recursion_depth++;
662
663
	/* Skip an UTF-8 byte order mark. */
664

12222
	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
665
4196
	    (unsigned char)blk.buf[0] == 0xef &&
666
	    (unsigned char)blk.buf[1] == 0xbb &&
667
	    (unsigned char)blk.buf[2] == 0xbf) {
668
		offset = 3;
669
		curp->filenc &= ~MPARSE_LATIN1;
670
	} else
671
		offset = 0;
672
673
8026
	mparse_buf_r(curp, blk, offset, 1);
674
675
8026
	if (--recursion_depth == 0)
676
8026
		mparse_end(curp);
677
678
8026
	curp->primary = svprimary;
679
8026
	curp->file = svfile;
680
16052
}
681
682
/*
683
 * Read the whole file into memory and call the parsers.
684
 * Called recursively when an .so request is encountered.
685
 */
686
enum mandoclevel
687
mparse_readfd(struct mparse *curp, int fd, const char *file)
688
{
689
16052
	struct buf	 blk;
690
8026
	int		 with_mmap;
691
	int		 save_filenc;
692
693
8026
	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
694
8026
		save_filenc = curp->filenc;
695
8026
		curp->filenc = curp->options &
696
		    (MPARSE_UTF8 | MPARSE_LATIN1);
697
8026
		mparse_parse_buffer(curp, blk, file);
698
8026
		curp->filenc = save_filenc;
699
8026
		if (with_mmap)
700
8026
			munmap(blk.buf, blk.sz);
701
		else
702
			free(blk.buf);
703
	}
704
16052
	return curp->file_status;
705
8026
}
706
707
int
708
mparse_open(struct mparse *curp, const char *file)
709
{
710
16052
	char		 *cp;
711
	int		  fd;
712
713
8026
	curp->file = file;
714
8026
	cp = strrchr(file, '.');
715
24078
	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
716
717
	/* First try to use the filename as it is. */
718
719
8026
	if ((fd = open(file, O_RDONLY)) != -1)
720
8026
		return fd;
721
722
	/*
723
	 * If that doesn't work and the filename doesn't
724
	 * already  end in .gz, try appending .gz.
725
	 */
726
727
	if ( ! curp->gzip) {
728
		mandoc_asprintf(&cp, "%s.gz", file);
729
		fd = open(cp, O_RDONLY);
730
		free(cp);
731
		if (fd != -1) {
732
			curp->gzip = 1;
733
			return fd;
734
		}
735
	}
736
737
	/* Neither worked, give up. */
738
739
	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
740
	return -1;
741
8026
}
742
743
struct mparse *
744
mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
745
    enum mandoc_os os_e, const char *os_s)
746
{
747
	struct mparse	*curp;
748
749
8466
	curp = mandoc_calloc(1, sizeof(struct mparse));
750
751
4233
	curp->options = options;
752
4233
	curp->mmin = mmin;
753
4233
	curp->mmsg = mmsg;
754
4233
	curp->os_s = os_s;
755
756
4233
	curp->roff = roff_alloc(curp, options);
757
8466
	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
758
4233
		curp->options & MPARSE_QUICK ? 1 : 0);
759
4233
	if (curp->options & MPARSE_MDOC) {
760
77
		curp->man->macroset = MACROSET_MDOC;
761
77
		if (curp->man->mdocmac == NULL)
762
77
			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
763
4156
	} else if (curp->options & MPARSE_MAN) {
764
635
		curp->man->macroset = MACROSET_MAN;
765
635
		if (curp->man->manmac == NULL)
766
635
			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
767
	}
768
4233
	curp->man->first->tok = TOKEN_NONE;
769
4233
	curp->man->meta.os_e = os_e;
770
4233
	return curp;
771
}
772
773
void
774
mparse_reset(struct mparse *curp)
775
{
776
7660
	roff_reset(curp->roff);
777
3830
	roff_man_reset(curp->man);
778
779
3830
	free(curp->sodest);
780
3830
	curp->sodest = NULL;
781
782
3830
	if (curp->secondary)
783
		curp->secondary->sz = 0;
784
785
3830
	curp->file_status = MANDOCLEVEL_OK;
786
3830
	curp->gzip = 0;
787
3830
}
788
789
void
790
mparse_free(struct mparse *curp)
791
{
792
793
8466
	roffhash_free(curp->man->mdocmac);
794
4233
	roffhash_free(curp->man->manmac);
795
4233
	roff_man_free(curp->man);
796
4233
	roff_free(curp->roff);
797
4233
	if (curp->secondary)
798
633
		free(curp->secondary->buf);
799
800
4233
	free(curp->secondary);
801
4233
	free(curp->sodest);
802
4233
	free(curp);
803
4233
}
804
805
void
806
mparse_result(struct mparse *curp, struct roff_man **man,
807
	char **sodest)
808
{
809
810

19882
	if (sodest && NULL != (*sodest = curp->sodest)) {
811
4
		*man = NULL;
812
4
		return;
813
	}
814
8022
	if (man)
815
8022
		*man = curp->man;
816
8026
}
817
818
void
819
mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
820
{
821
8392
	if (curp->file_status > *rc)
822
315
		*rc = curp->file_status;
823
4196
}
824
825
void
826
mandoc_vmsg(enum mandocerr t, struct mparse *m,
827
		int ln, int pos, const char *fmt, ...)
828
{
829
23468
	char		 buf[256];
830
11734
	va_list		 ap;
831
832
11734
	va_start(ap, fmt);
833
11734
	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
834
11734
	va_end(ap);
835
836
11734
	mandoc_msg(t, m, ln, pos, buf);
837
11734
}
838
839
void
840
mandoc_msg(enum mandocerr er, struct mparse *m,
841
		int ln, int col, const char *msg)
842
{
843
	enum mandoclevel level;
844
845
44410
	if (er < m->mmin && er != MANDOCERR_FILE)
846
19235
		return;
847
848
	level = MANDOCLEVEL_UNSUPP;
849
17100
	while (er < mandoclimits[level])
850
5580
		level--;
851
852
2970
	if (m->mmsg)
853
2970
		(*m->mmsg)(er, level, m->file, ln, col, msg);
854
855
2970
	if (m->file_status < level)
856
741
		m->file_status = level;
857
25175
}
858
859
const char *
860
mparse_strerror(enum mandocerr er)
861
{
862
863
5946
	return mandocerrs[er];
864
}
865
866
const char *
867
mparse_strlevel(enum mandoclevel lvl)
868
{
869
5946
	return mandoclevels[lvl];
870
}
871
872
void
873
mparse_keep(struct mparse *p)
874
{
875
876
1266
	assert(NULL == p->secondary);
877
633
	p->secondary = mandoc_calloc(1, sizeof(struct buf));
878
633
}
879
880
const char *
881
mparse_getkeep(const struct mparse *p)
882
{
883
884
	assert(p->secondary);
885
	return p->secondary->sz ? p->secondary->buf : NULL;
886
}