GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/read.c Lines: 0 275 0.0 %
Date: 2016-12-06 Branches: 0 220 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: read.c,v 1.124 2016/07/19 16:22:34 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
5
 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
 */
19
#include <sys/types.h>
20
#include <sys/mman.h>
21
#include <sys/stat.h>
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <err.h>
26
#include <errno.h>
27
#include <fcntl.h>
28
#include <stdarg.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <unistd.h>
33
#include <zlib.h>
34
35
#include "mandoc_aux.h"
36
#include "mandoc.h"
37
#include "roff.h"
38
#include "mdoc.h"
39
#include "man.h"
40
#include "libmandoc.h"
41
#include "roff_int.h"
42
43
#define	REPARSE_LIMIT	1000
44
45
struct	mparse {
46
	struct roff_man	 *man; /* man parser */
47
	struct roff	 *roff; /* roff parser (!NULL) */
48
	char		 *sodest; /* filename pointed to by .so */
49
	const char	 *file; /* filename of current input file */
50
	struct buf	 *primary; /* buffer currently being parsed */
51
	struct buf	 *secondary; /* preprocessed copy of input */
52
	const char	 *defos; /* default operating system */
53
	mandocmsg	  mmsg; /* warning/error message handler */
54
	enum mandoclevel  file_status; /* status of current parse */
55
	enum mandoclevel  wlevel; /* ignore messages below this */
56
	int		  options; /* parser options */
57
	int		  gzip; /* current input file is gzipped */
58
	int		  filenc; /* encoding of the current file */
59
	int		  reparse_count; /* finite interp. stack */
60
	int		  line; /* line number in the file */
61
};
62
63
static	void	  choose_parser(struct mparse *);
64
static	void	  resize_buf(struct buf *, size_t);
65
static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
66
static	int	  read_whole_file(struct mparse *, const char *, int,
67
				struct buf *, int *);
68
static	void	  mparse_end(struct mparse *);
69
static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70
			const char *);
71
72
static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73
	MANDOCERR_OK,
74
	MANDOCERR_WARNING,
75
	MANDOCERR_WARNING,
76
	MANDOCERR_ERROR,
77
	MANDOCERR_UNSUPP,
78
	MANDOCERR_MAX,
79
	MANDOCERR_MAX
80
};
81
82
static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83
	"ok",
84
85
	"generic warning",
86
87
	/* related to the prologue */
88
	"missing manual title, using UNTITLED",
89
	"missing manual title, using \"\"",
90
	"lower case character in document title",
91
	"missing manual section, using \"\"",
92
	"unknown manual section",
93
	"missing date, using today's date",
94
	"cannot parse date, using it verbatim",
95
	"missing Os macro, using \"\"",
96
	"duplicate prologue macro",
97
	"late prologue macro",
98
	"skipping late title macro",
99
	"prologue macros out of order",
100
101
	/* related to document structure */
102
	".so is fragile, better use ln(1)",
103
	"no document body",
104
	"content before first section header",
105
	"first section is not \"NAME\"",
106
	"NAME section without name",
107
	"NAME section without description",
108
	"description not at the end of NAME",
109
	"bad NAME section content",
110
	"missing description line, using \"\"",
111
	"sections out of conventional order",
112
	"duplicate section title",
113
	"unexpected section",
114
	"unusual Xr order",
115
	"unusual Xr punctuation",
116
	"AUTHORS section without An macro",
117
118
	/* related to macros and nesting */
119
	"obsolete macro",
120
	"macro neither callable nor escaped",
121
	"skipping paragraph macro",
122
	"moving paragraph macro out of list",
123
	"skipping no-space macro",
124
	"blocks badly nested",
125
	"nested displays are not portable",
126
	"moving content out of list",
127
	"fill mode already enabled, skipping",
128
	"fill mode already disabled, skipping",
129
	"line scope broken",
130
131
	/* related to missing macro arguments */
132
	"skipping empty request",
133
	"conditional request controls empty scope",
134
	"skipping empty macro",
135
	"empty block",
136
	"empty argument, using 0n",
137
	"missing display type, using -ragged",
138
	"list type is not the first argument",
139
	"missing -width in -tag list, using 8n",
140
	"missing utility name, using \"\"",
141
	"missing function name, using \"\"",
142
	"empty head in list item",
143
	"empty list item",
144
	"missing font type, using \\fR",
145
	"unknown font type, using \\fR",
146
	"nothing follows prefix",
147
	"empty reference block",
148
	"missing -std argument, adding it",
149
	"missing option string, using \"\"",
150
	"missing resource identifier, using \"\"",
151
	"missing eqn box, using \"\"",
152
153
	/* related to bad macro arguments */
154
	"unterminated quoted argument",
155
	"duplicate argument",
156
	"skipping duplicate argument",
157
	"skipping duplicate display type",
158
	"skipping duplicate list type",
159
	"skipping -width argument",
160
	"wrong number of cells",
161
	"unknown AT&T UNIX version",
162
	"comma in function argument",
163
	"parenthesis in function name",
164
	"invalid content in Rs block",
165
	"invalid Boolean argument",
166
	"unknown font, skipping request",
167
	"odd number of characters in request",
168
169
	/* related to plain text */
170
	"blank line in fill mode, using .sp",
171
	"tab in filled text",
172
	"whitespace at end of input line",
173
	"bad comment style",
174
	"invalid escape sequence",
175
	"undefined string, using \"\"",
176
177
	/* related to tables */
178
	"tbl line starts with span",
179
	"tbl column starts with span",
180
	"skipping vertical bar in tbl layout",
181
182
	"generic error",
183
184
	/* related to tables */
185
	"non-alphabetic character in tbl options",
186
	"skipping unknown tbl option",
187
	"missing tbl option argument",
188
	"wrong tbl option argument size",
189
	"empty tbl layout",
190
	"invalid character in tbl layout",
191
	"unmatched parenthesis in tbl layout",
192
	"tbl without any data cells",
193
	"ignoring data in spanned tbl cell",
194
	"ignoring extra tbl data cells",
195
	"data block open at end of tbl",
196
197
	/* related to document structure and macros */
198
	NULL,
199
	"input stack limit exceeded, infinite loop?",
200
	"skipping bad character",
201
	"skipping unknown macro",
202
	"skipping insecure request",
203
	"skipping item outside list",
204
	"skipping column outside column list",
205
	"skipping end of block that is not open",
206
	"fewer RS blocks open, skipping",
207
	"inserting missing end of block",
208
	"appending missing end of block",
209
210
	/* related to request and macro arguments */
211
	"escaped character not allowed in a name",
212
	"NOT IMPLEMENTED: Bd -file",
213
	"skipping display without arguments",
214
	"missing list type, using -item",
215
	"missing manual name, using \"\"",
216
	"uname(3) system call failed, using UNKNOWN",
217
	"unknown standard specifier",
218
	"skipping request without numeric argument",
219
	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
220
	".so request failed",
221
	"skipping all arguments",
222
	"skipping excess arguments",
223
	"divide by zero",
224
225
	"unsupported feature",
226
	"input too large",
227
	"unsupported control character",
228
	"unsupported roff request",
229
	"eqn delim option in tbl",
230
	"unsupported tbl layout modifier",
231
	"ignoring macro in table",
232
};
233
234
static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
235
	"SUCCESS",
236
	"RESERVED",
237
	"WARNING",
238
	"ERROR",
239
	"UNSUPP",
240
	"BADARG",
241
	"SYSERR"
242
};
243
244
245
static void
246
resize_buf(struct buf *buf, size_t initial)
247
{
248
249
	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
250
	buf->buf = mandoc_realloc(buf->buf, buf->sz);
251
}
252
253
static void
254
choose_parser(struct mparse *curp)
255
{
256
	char		*cp, *ep;
257
	int		 format;
258
259
	/*
260
	 * If neither command line arguments -mdoc or -man select
261
	 * a parser nor the roff parser found a .Dd or .TH macro
262
	 * yet, look ahead in the main input buffer.
263
	 */
264
265
	if ((format = roff_getformat(curp->roff)) == 0) {
266
		cp = curp->primary->buf;
267
		ep = cp + curp->primary->sz;
268
		while (cp < ep) {
269
			if (*cp == '.' || *cp == '\'') {
270
				cp++;
271
				if (cp[0] == 'D' && cp[1] == 'd') {
272
					format = MPARSE_MDOC;
273
					break;
274
				}
275
				if (cp[0] == 'T' && cp[1] == 'H') {
276
					format = MPARSE_MAN;
277
					break;
278
				}
279
			}
280
			cp = memchr(cp, '\n', ep - cp);
281
			if (cp == NULL)
282
				break;
283
			cp++;
284
		}
285
	}
286
287
	if (format == MPARSE_MDOC) {
288
		mdoc_hash_init();
289
		curp->man->macroset = MACROSET_MDOC;
290
		curp->man->first->tok = TOKEN_NONE;
291
	} else {
292
		man_hash_init();
293
		curp->man->macroset = MACROSET_MAN;
294
		curp->man->first->tok = TOKEN_NONE;
295
	}
296
}
297
298
/*
299
 * Main parse routine for a buffer.
300
 * It assumes encoding and line numbering are already set up.
301
 * It can recurse directly (for invocations of user-defined
302
 * macros, inline equations, and input line traps)
303
 * and indirectly (for .so file inclusion).
304
 */
305
static void
306
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
307
{
308
	const struct tbl_span	*span;
309
	struct buf	 ln;
310
	const char	*save_file;
311
	char		*cp;
312
	size_t		 pos; /* byte number in the ln buffer */
313
	enum rofferr	 rr;
314
	int		 of;
315
	int		 lnn; /* line number in the real file */
316
	int		 fd;
317
	unsigned char	 c;
318
319
	memset(&ln, 0, sizeof(ln));
320
321
	lnn = curp->line;
322
	pos = 0;
323
324
	while (i < blk.sz) {
325
		if (0 == pos && '\0' == blk.buf[i])
326
			break;
327
328
		if (start) {
329
			curp->line = lnn;
330
			curp->reparse_count = 0;
331
332
			if (lnn < 3 &&
333
			    curp->filenc & MPARSE_UTF8 &&
334
			    curp->filenc & MPARSE_LATIN1)
335
				curp->filenc = preconv_cue(&blk, i);
336
		}
337
338
		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
339
340
			/*
341
			 * When finding an unescaped newline character,
342
			 * leave the character loop to process the line.
343
			 * Skip a preceding carriage return, if any.
344
			 */
345
346
			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
347
			    '\n' == blk.buf[i + 1])
348
				++i;
349
			if ('\n' == blk.buf[i]) {
350
				++i;
351
				++lnn;
352
				break;
353
			}
354
355
			/*
356
			 * Make sure we have space for the worst
357
			 * case of 11 bytes: "\\[u10ffff]\0"
358
			 */
359
360
			if (pos + 11 > ln.sz)
361
				resize_buf(&ln, 256);
362
363
			/*
364
			 * Encode 8-bit input.
365
			 */
366
367
			c = blk.buf[i];
368
			if (c & 0x80) {
369
				if ( ! (curp->filenc && preconv_encode(
370
				    &blk, &i, &ln, &pos, &curp->filenc))) {
371
					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
372
					    curp->line, pos, "0x%x", c);
373
					ln.buf[pos++] = '?';
374
					i++;
375
				}
376
				continue;
377
			}
378
379
			/*
380
			 * Exclude control characters.
381
			 */
382
383
			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
384
				mandoc_vmsg(c == 0x00 || c == 0x04 ||
385
				    c > 0x0a ? MANDOCERR_CHAR_BAD :
386
				    MANDOCERR_CHAR_UNSUPP,
387
				    curp, curp->line, pos, "0x%x", c);
388
				i++;
389
				if (c != '\r')
390
					ln.buf[pos++] = '?';
391
				continue;
392
			}
393
394
			/* Trailing backslash = a plain char. */
395
396
			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
397
				ln.buf[pos++] = blk.buf[i++];
398
				continue;
399
			}
400
401
			/*
402
			 * Found escape and at least one other character.
403
			 * When it's a newline character, skip it.
404
			 * When there is a carriage return in between,
405
			 * skip that one as well.
406
			 */
407
408
			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
409
			    '\n' == blk.buf[i + 2])
410
				++i;
411
			if ('\n' == blk.buf[i + 1]) {
412
				i += 2;
413
				++lnn;
414
				continue;
415
			}
416
417
			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
418
				i += 2;
419
				/* Comment, skip to end of line */
420
				for (; i < blk.sz; ++i) {
421
					if ('\n' == blk.buf[i]) {
422
						++i;
423
						++lnn;
424
						break;
425
					}
426
				}
427
428
				/* Backout trailing whitespaces */
429
				for (; pos > 0; --pos) {
430
					if (ln.buf[pos - 1] != ' ')
431
						break;
432
					if (pos > 2 && ln.buf[pos - 2] == '\\')
433
						break;
434
				}
435
				break;
436
			}
437
438
			/* Catch escaped bogus characters. */
439
440
			c = (unsigned char) blk.buf[i+1];
441
442
			if ( ! (isascii(c) &&
443
			    (isgraph(c) || isblank(c)))) {
444
				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
445
				    curp->line, pos, "0x%x", c);
446
				i += 2;
447
				ln.buf[pos++] = '?';
448
				continue;
449
			}
450
451
			/* Some other escape sequence, copy & cont. */
452
453
			ln.buf[pos++] = blk.buf[i++];
454
			ln.buf[pos++] = blk.buf[i++];
455
		}
456
457
		if (pos >= ln.sz)
458
			resize_buf(&ln, 256);
459
460
		ln.buf[pos] = '\0';
461
462
		/*
463
		 * A significant amount of complexity is contained by
464
		 * the roff preprocessor.  It's line-oriented but can be
465
		 * expressed on one line, so we need at times to
466
		 * readjust our starting point and re-run it.  The roff
467
		 * preprocessor can also readjust the buffers with new
468
		 * data, so we pass them in wholesale.
469
		 */
470
471
		of = 0;
472
473
		/*
474
		 * Maintain a lookaside buffer of all parsed lines.  We
475
		 * only do this if mparse_keep() has been invoked (the
476
		 * buffer may be accessed with mparse_getkeep()).
477
		 */
478
479
		if (curp->secondary) {
480
			curp->secondary->buf = mandoc_realloc(
481
			    curp->secondary->buf,
482
			    curp->secondary->sz + pos + 2);
483
			memcpy(curp->secondary->buf +
484
			    curp->secondary->sz,
485
			    ln.buf, pos);
486
			curp->secondary->sz += pos;
487
			curp->secondary->buf
488
				[curp->secondary->sz] = '\n';
489
			curp->secondary->sz++;
490
			curp->secondary->buf
491
				[curp->secondary->sz] = '\0';
492
		}
493
rerun:
494
		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
495
496
		switch (rr) {
497
		case ROFF_REPARSE:
498
			if (REPARSE_LIMIT >= ++curp->reparse_count)
499
				mparse_buf_r(curp, ln, of, 0);
500
			else
501
				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
502
				    curp->line, pos, NULL);
503
			pos = 0;
504
			continue;
505
		case ROFF_APPEND:
506
			pos = strlen(ln.buf);
507
			continue;
508
		case ROFF_RERUN:
509
			goto rerun;
510
		case ROFF_IGN:
511
			pos = 0;
512
			continue;
513
		case ROFF_SO:
514
			if ( ! (curp->options & MPARSE_SO) &&
515
			    (i >= blk.sz || blk.buf[i] == '\0')) {
516
				curp->sodest = mandoc_strdup(ln.buf + of);
517
				free(ln.buf);
518
				return;
519
			}
520
			/*
521
			 * We remove `so' clauses from our lookaside
522
			 * buffer because we're going to descend into
523
			 * the file recursively.
524
			 */
525
			if (curp->secondary)
526
				curp->secondary->sz -= pos + 1;
527
			save_file = curp->file;
528
			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
529
				mparse_readfd(curp, fd, ln.buf + of);
530
				close(fd);
531
				curp->file = save_file;
532
			} else {
533
				curp->file = save_file;
534
				mandoc_vmsg(MANDOCERR_SO_FAIL,
535
				    curp, curp->line, pos,
536
				    ".so %s", ln.buf + of);
537
				ln.sz = mandoc_asprintf(&cp,
538
				    ".sp\nSee the file %s.\n.sp",
539
				    ln.buf + of);
540
				free(ln.buf);
541
				ln.buf = cp;
542
				of = 0;
543
				mparse_buf_r(curp, ln, of, 0);
544
			}
545
			pos = 0;
546
			continue;
547
		default:
548
			break;
549
		}
550
551
		if (curp->man->macroset == MACROSET_NONE)
552
			choose_parser(curp);
553
554
		/*
555
		 * Lastly, push down into the parsers themselves.
556
		 * If libroff returns ROFF_TBL, then add it to the
557
		 * currently open parse.  Since we only get here if
558
		 * there does exist data (see tbl_data.c), we're
559
		 * guaranteed that something's been allocated.
560
		 * Do the same for ROFF_EQN.
561
		 */
562
563
		if (rr == ROFF_TBL)
564
			while ((span = roff_span(curp->roff)) != NULL)
565
				roff_addtbl(curp->man, span);
566
		else if (rr == ROFF_EQN)
567
			roff_addeqn(curp->man, roff_eqn(curp->roff));
568
		else if ((curp->man->macroset == MACROSET_MDOC ?
569
		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
570
		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
571
				break;
572
573
		/* Temporary buffers typically are not full. */
574
575
		if (0 == start && '\0' == blk.buf[i])
576
			break;
577
578
		/* Start the next input line. */
579
580
		pos = 0;
581
	}
582
583
	free(ln.buf);
584
}
585
586
static int
587
read_whole_file(struct mparse *curp, const char *file, int fd,
588
		struct buf *fb, int *with_mmap)
589
{
590
	struct stat	 st;
591
	gzFile		 gz;
592
	size_t		 off;
593
	ssize_t		 ssz;
594
595
	if (fstat(fd, &st) == -1)
596
		err((int)MANDOCLEVEL_SYSERR, "%s", file);
597
598
	/*
599
	 * If we're a regular file, try just reading in the whole entry
600
	 * via mmap().  This is faster than reading it into blocks, and
601
	 * since each file is only a few bytes to begin with, I'm not
602
	 * concerned that this is going to tank any machines.
603
	 */
604
605
	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
606
		if (st.st_size > 0x7fffffff) {
607
			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
608
			return 0;
609
		}
610
		*with_mmap = 1;
611
		fb->sz = (size_t)st.st_size;
612
		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
613
		if (fb->buf != MAP_FAILED)
614
			return 1;
615
	}
616
617
	if (curp->gzip) {
618
		if ((gz = gzdopen(fd, "rb")) == NULL)
619
			err((int)MANDOCLEVEL_SYSERR, "%s", file);
620
	} else
621
		gz = NULL;
622
623
	/*
624
	 * If this isn't a regular file (like, say, stdin), then we must
625
	 * go the old way and just read things in bit by bit.
626
	 */
627
628
	*with_mmap = 0;
629
	off = 0;
630
	fb->sz = 0;
631
	fb->buf = NULL;
632
	for (;;) {
633
		if (off == fb->sz) {
634
			if (fb->sz == (1U << 31)) {
635
				mandoc_msg(MANDOCERR_TOOLARGE, curp,
636
				    0, 0, NULL);
637
				break;
638
			}
639
			resize_buf(fb, 65536);
640
		}
641
		ssz = curp->gzip ?
642
		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
643
		    read(fd, fb->buf + (int)off, fb->sz - off);
644
		if (ssz == 0) {
645
			fb->sz = off;
646
			return 1;
647
		}
648
		if (ssz == -1)
649
			err((int)MANDOCLEVEL_SYSERR, "%s", file);
650
		off += (size_t)ssz;
651
	}
652
653
	free(fb->buf);
654
	fb->buf = NULL;
655
	return 0;
656
}
657
658
static void
659
mparse_end(struct mparse *curp)
660
{
661
	if (curp->man->macroset == MACROSET_NONE)
662
		curp->man->macroset = MACROSET_MAN;
663
	if (curp->man->macroset == MACROSET_MDOC)
664
		mdoc_endparse(curp->man);
665
	else
666
		man_endparse(curp->man);
667
	roff_endparse(curp->roff);
668
}
669
670
static void
671
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
672
{
673
	struct buf	*svprimary;
674
	const char	*svfile;
675
	size_t		 offset;
676
	static int	 recursion_depth;
677
678
	if (64 < recursion_depth) {
679
		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
680
		return;
681
	}
682
683
	/* Line number is per-file. */
684
	svfile = curp->file;
685
	curp->file = file;
686
	svprimary = curp->primary;
687
	curp->primary = &blk;
688
	curp->line = 1;
689
	recursion_depth++;
690
691
	/* Skip an UTF-8 byte order mark. */
692
	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
693
	    (unsigned char)blk.buf[0] == 0xef &&
694
	    (unsigned char)blk.buf[1] == 0xbb &&
695
	    (unsigned char)blk.buf[2] == 0xbf) {
696
		offset = 3;
697
		curp->filenc &= ~MPARSE_LATIN1;
698
	} else
699
		offset = 0;
700
701
	mparse_buf_r(curp, blk, offset, 1);
702
703
	if (--recursion_depth == 0)
704
		mparse_end(curp);
705
706
	curp->primary = svprimary;
707
	curp->file = svfile;
708
}
709
710
/*
711
 * Read the whole file into memory and call the parsers.
712
 * Called recursively when an .so request is encountered.
713
 */
714
enum mandoclevel
715
mparse_readfd(struct mparse *curp, int fd, const char *file)
716
{
717
	struct buf	 blk;
718
	int		 with_mmap;
719
	int		 save_filenc;
720
721
	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
722
		save_filenc = curp->filenc;
723
		curp->filenc = curp->options &
724
		    (MPARSE_UTF8 | MPARSE_LATIN1);
725
		mparse_parse_buffer(curp, blk, file);
726
		curp->filenc = save_filenc;
727
		if (with_mmap)
728
			munmap(blk.buf, blk.sz);
729
		else
730
			free(blk.buf);
731
	}
732
	return curp->file_status;
733
}
734
735
int
736
mparse_open(struct mparse *curp, const char *file)
737
{
738
	char		 *cp;
739
	int		  fd;
740
741
	curp->file = file;
742
	cp = strrchr(file, '.');
743
	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
744
745
	/* First try to use the filename as it is. */
746
747
	if ((fd = open(file, O_RDONLY)) != -1)
748
		return fd;
749
750
	/*
751
	 * If that doesn't work and the filename doesn't
752
	 * already  end in .gz, try appending .gz.
753
	 */
754
755
	if ( ! curp->gzip) {
756
		mandoc_asprintf(&cp, "%s.gz", file);
757
		fd = open(cp, O_RDONLY);
758
		free(cp);
759
		if (fd != -1) {
760
			curp->gzip = 1;
761
			return fd;
762
		}
763
	}
764
765
	/* Neither worked, give up. */
766
767
	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
768
	return -1;
769
}
770
771
struct mparse *
772
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
773
    const char *defos)
774
{
775
	struct mparse	*curp;
776
777
	curp = mandoc_calloc(1, sizeof(struct mparse));
778
779
	curp->options = options;
780
	curp->wlevel = wlevel;
781
	curp->mmsg = mmsg;
782
	curp->defos = defos;
783
784
	curp->roff = roff_alloc(curp, options);
785
	curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
786
		curp->options & MPARSE_QUICK ? 1 : 0);
787
	if (curp->options & MPARSE_MDOC) {
788
		mdoc_hash_init();
789
		curp->man->macroset = MACROSET_MDOC;
790
	} else if (curp->options & MPARSE_MAN) {
791
		man_hash_init();
792
		curp->man->macroset = MACROSET_MAN;
793
	}
794
	curp->man->first->tok = TOKEN_NONE;
795
	return curp;
796
}
797
798
void
799
mparse_reset(struct mparse *curp)
800
{
801
	roff_reset(curp->roff);
802
	roff_man_reset(curp->man);
803
	if (curp->secondary)
804
		curp->secondary->sz = 0;
805
806
	curp->file_status = MANDOCLEVEL_OK;
807
808
	free(curp->sodest);
809
	curp->sodest = NULL;
810
}
811
812
void
813
mparse_free(struct mparse *curp)
814
{
815
816
	roff_man_free(curp->man);
817
	if (curp->roff)
818
		roff_free(curp->roff);
819
	if (curp->secondary)
820
		free(curp->secondary->buf);
821
822
	free(curp->secondary);
823
	free(curp->sodest);
824
	free(curp);
825
}
826
827
void
828
mparse_result(struct mparse *curp, struct roff_man **man,
829
	char **sodest)
830
{
831
832
	if (sodest && NULL != (*sodest = curp->sodest)) {
833
		*man = NULL;
834
		return;
835
	}
836
	if (man)
837
		*man = curp->man;
838
}
839
840
void
841
mandoc_vmsg(enum mandocerr t, struct mparse *m,
842
		int ln, int pos, const char *fmt, ...)
843
{
844
	char		 buf[256];
845
	va_list		 ap;
846
847
	va_start(ap, fmt);
848
	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
849
	va_end(ap);
850
851
	mandoc_msg(t, m, ln, pos, buf);
852
}
853
854
void
855
mandoc_msg(enum mandocerr er, struct mparse *m,
856
		int ln, int col, const char *msg)
857
{
858
	enum mandoclevel level;
859
860
	level = MANDOCLEVEL_UNSUPP;
861
	while (er < mandoclimits[level])
862
		level--;
863
864
	if (level < m->wlevel && er != MANDOCERR_FILE)
865
		return;
866
867
	if (m->mmsg)
868
		(*m->mmsg)(er, level, m->file, ln, col, msg);
869
870
	if (m->file_status < level)
871
		m->file_status = level;
872
}
873
874
const char *
875
mparse_strerror(enum mandocerr er)
876
{
877
878
	return mandocerrs[er];
879
}
880
881
const char *
882
mparse_strlevel(enum mandoclevel lvl)
883
{
884
	return mandoclevels[lvl];
885
}
886
887
void
888
mparse_keep(struct mparse *p)
889
{
890
891
	assert(NULL == p->secondary);
892
	p->secondary = mandoc_calloc(1, sizeof(struct buf));
893
}
894
895
const char *
896
mparse_getkeep(const struct mparse *p)
897
{
898
899
	assert(p->secondary);
900
	return p->secondary->sz ? p->secondary->buf : NULL;
901
}