1 |
|
|
/* $OpenBSD: read.c,v 1.124 2016/07/19 16:22:34 schwarze Exp $ */ |
2 |
|
|
/* |
3 |
|
|
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
4 |
|
|
* Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> |
5 |
|
|
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
6 |
|
|
* |
7 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
8 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
9 |
|
|
* copyright notice and this permission notice appear in all copies. |
10 |
|
|
* |
11 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
12 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
13 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
14 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
15 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
16 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
17 |
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
18 |
|
|
*/ |
19 |
|
|
#include <sys/types.h> |
20 |
|
|
#include <sys/mman.h> |
21 |
|
|
#include <sys/stat.h> |
22 |
|
|
|
23 |
|
|
#include <assert.h> |
24 |
|
|
#include <ctype.h> |
25 |
|
|
#include <err.h> |
26 |
|
|
#include <errno.h> |
27 |
|
|
#include <fcntl.h> |
28 |
|
|
#include <stdarg.h> |
29 |
|
|
#include <stdio.h> |
30 |
|
|
#include <stdlib.h> |
31 |
|
|
#include <string.h> |
32 |
|
|
#include <unistd.h> |
33 |
|
|
#include <zlib.h> |
34 |
|
|
|
35 |
|
|
#include "mandoc_aux.h" |
36 |
|
|
#include "mandoc.h" |
37 |
|
|
#include "roff.h" |
38 |
|
|
#include "mdoc.h" |
39 |
|
|
#include "man.h" |
40 |
|
|
#include "libmandoc.h" |
41 |
|
|
#include "roff_int.h" |
42 |
|
|
|
43 |
|
|
#define REPARSE_LIMIT 1000 |
44 |
|
|
|
45 |
|
|
struct mparse { |
46 |
|
|
struct roff_man *man; /* man parser */ |
47 |
|
|
struct roff *roff; /* roff parser (!NULL) */ |
48 |
|
|
char *sodest; /* filename pointed to by .so */ |
49 |
|
|
const char *file; /* filename of current input file */ |
50 |
|
|
struct buf *primary; /* buffer currently being parsed */ |
51 |
|
|
struct buf *secondary; /* preprocessed copy of input */ |
52 |
|
|
const char *defos; /* default operating system */ |
53 |
|
|
mandocmsg mmsg; /* warning/error message handler */ |
54 |
|
|
enum mandoclevel file_status; /* status of current parse */ |
55 |
|
|
enum mandoclevel wlevel; /* ignore messages below this */ |
56 |
|
|
int options; /* parser options */ |
57 |
|
|
int gzip; /* current input file is gzipped */ |
58 |
|
|
int filenc; /* encoding of the current file */ |
59 |
|
|
int reparse_count; /* finite interp. stack */ |
60 |
|
|
int line; /* line number in the file */ |
61 |
|
|
}; |
62 |
|
|
|
63 |
|
|
static void choose_parser(struct mparse *); |
64 |
|
|
static void resize_buf(struct buf *, size_t); |
65 |
|
|
static void mparse_buf_r(struct mparse *, struct buf, size_t, int); |
66 |
|
|
static int read_whole_file(struct mparse *, const char *, int, |
67 |
|
|
struct buf *, int *); |
68 |
|
|
static void mparse_end(struct mparse *); |
69 |
|
|
static void mparse_parse_buffer(struct mparse *, struct buf, |
70 |
|
|
const char *); |
71 |
|
|
|
72 |
|
|
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { |
73 |
|
|
MANDOCERR_OK, |
74 |
|
|
MANDOCERR_WARNING, |
75 |
|
|
MANDOCERR_WARNING, |
76 |
|
|
MANDOCERR_ERROR, |
77 |
|
|
MANDOCERR_UNSUPP, |
78 |
|
|
MANDOCERR_MAX, |
79 |
|
|
MANDOCERR_MAX |
80 |
|
|
}; |
81 |
|
|
|
82 |
|
|
static const char * const mandocerrs[MANDOCERR_MAX] = { |
83 |
|
|
"ok", |
84 |
|
|
|
85 |
|
|
"generic warning", |
86 |
|
|
|
87 |
|
|
/* related to the prologue */ |
88 |
|
|
"missing manual title, using UNTITLED", |
89 |
|
|
"missing manual title, using \"\"", |
90 |
|
|
"lower case character in document title", |
91 |
|
|
"missing manual section, using \"\"", |
92 |
|
|
"unknown manual section", |
93 |
|
|
"missing date, using today's date", |
94 |
|
|
"cannot parse date, using it verbatim", |
95 |
|
|
"missing Os macro, using \"\"", |
96 |
|
|
"duplicate prologue macro", |
97 |
|
|
"late prologue macro", |
98 |
|
|
"skipping late title macro", |
99 |
|
|
"prologue macros out of order", |
100 |
|
|
|
101 |
|
|
/* related to document structure */ |
102 |
|
|
".so is fragile, better use ln(1)", |
103 |
|
|
"no document body", |
104 |
|
|
"content before first section header", |
105 |
|
|
"first section is not \"NAME\"", |
106 |
|
|
"NAME section without name", |
107 |
|
|
"NAME section without description", |
108 |
|
|
"description not at the end of NAME", |
109 |
|
|
"bad NAME section content", |
110 |
|
|
"missing description line, using \"\"", |
111 |
|
|
"sections out of conventional order", |
112 |
|
|
"duplicate section title", |
113 |
|
|
"unexpected section", |
114 |
|
|
"unusual Xr order", |
115 |
|
|
"unusual Xr punctuation", |
116 |
|
|
"AUTHORS section without An macro", |
117 |
|
|
|
118 |
|
|
/* related to macros and nesting */ |
119 |
|
|
"obsolete macro", |
120 |
|
|
"macro neither callable nor escaped", |
121 |
|
|
"skipping paragraph macro", |
122 |
|
|
"moving paragraph macro out of list", |
123 |
|
|
"skipping no-space macro", |
124 |
|
|
"blocks badly nested", |
125 |
|
|
"nested displays are not portable", |
126 |
|
|
"moving content out of list", |
127 |
|
|
"fill mode already enabled, skipping", |
128 |
|
|
"fill mode already disabled, skipping", |
129 |
|
|
"line scope broken", |
130 |
|
|
|
131 |
|
|
/* related to missing macro arguments */ |
132 |
|
|
"skipping empty request", |
133 |
|
|
"conditional request controls empty scope", |
134 |
|
|
"skipping empty macro", |
135 |
|
|
"empty block", |
136 |
|
|
"empty argument, using 0n", |
137 |
|
|
"missing display type, using -ragged", |
138 |
|
|
"list type is not the first argument", |
139 |
|
|
"missing -width in -tag list, using 8n", |
140 |
|
|
"missing utility name, using \"\"", |
141 |
|
|
"missing function name, using \"\"", |
142 |
|
|
"empty head in list item", |
143 |
|
|
"empty list item", |
144 |
|
|
"missing font type, using \\fR", |
145 |
|
|
"unknown font type, using \\fR", |
146 |
|
|
"nothing follows prefix", |
147 |
|
|
"empty reference block", |
148 |
|
|
"missing -std argument, adding it", |
149 |
|
|
"missing option string, using \"\"", |
150 |
|
|
"missing resource identifier, using \"\"", |
151 |
|
|
"missing eqn box, using \"\"", |
152 |
|
|
|
153 |
|
|
/* related to bad macro arguments */ |
154 |
|
|
"unterminated quoted argument", |
155 |
|
|
"duplicate argument", |
156 |
|
|
"skipping duplicate argument", |
157 |
|
|
"skipping duplicate display type", |
158 |
|
|
"skipping duplicate list type", |
159 |
|
|
"skipping -width argument", |
160 |
|
|
"wrong number of cells", |
161 |
|
|
"unknown AT&T UNIX version", |
162 |
|
|
"comma in function argument", |
163 |
|
|
"parenthesis in function name", |
164 |
|
|
"invalid content in Rs block", |
165 |
|
|
"invalid Boolean argument", |
166 |
|
|
"unknown font, skipping request", |
167 |
|
|
"odd number of characters in request", |
168 |
|
|
|
169 |
|
|
/* related to plain text */ |
170 |
|
|
"blank line in fill mode, using .sp", |
171 |
|
|
"tab in filled text", |
172 |
|
|
"whitespace at end of input line", |
173 |
|
|
"bad comment style", |
174 |
|
|
"invalid escape sequence", |
175 |
|
|
"undefined string, using \"\"", |
176 |
|
|
|
177 |
|
|
/* related to tables */ |
178 |
|
|
"tbl line starts with span", |
179 |
|
|
"tbl column starts with span", |
180 |
|
|
"skipping vertical bar in tbl layout", |
181 |
|
|
|
182 |
|
|
"generic error", |
183 |
|
|
|
184 |
|
|
/* related to tables */ |
185 |
|
|
"non-alphabetic character in tbl options", |
186 |
|
|
"skipping unknown tbl option", |
187 |
|
|
"missing tbl option argument", |
188 |
|
|
"wrong tbl option argument size", |
189 |
|
|
"empty tbl layout", |
190 |
|
|
"invalid character in tbl layout", |
191 |
|
|
"unmatched parenthesis in tbl layout", |
192 |
|
|
"tbl without any data cells", |
193 |
|
|
"ignoring data in spanned tbl cell", |
194 |
|
|
"ignoring extra tbl data cells", |
195 |
|
|
"data block open at end of tbl", |
196 |
|
|
|
197 |
|
|
/* related to document structure and macros */ |
198 |
|
|
NULL, |
199 |
|
|
"input stack limit exceeded, infinite loop?", |
200 |
|
|
"skipping bad character", |
201 |
|
|
"skipping unknown macro", |
202 |
|
|
"skipping insecure request", |
203 |
|
|
"skipping item outside list", |
204 |
|
|
"skipping column outside column list", |
205 |
|
|
"skipping end of block that is not open", |
206 |
|
|
"fewer RS blocks open, skipping", |
207 |
|
|
"inserting missing end of block", |
208 |
|
|
"appending missing end of block", |
209 |
|
|
|
210 |
|
|
/* related to request and macro arguments */ |
211 |
|
|
"escaped character not allowed in a name", |
212 |
|
|
"NOT IMPLEMENTED: Bd -file", |
213 |
|
|
"skipping display without arguments", |
214 |
|
|
"missing list type, using -item", |
215 |
|
|
"missing manual name, using \"\"", |
216 |
|
|
"uname(3) system call failed, using UNKNOWN", |
217 |
|
|
"unknown standard specifier", |
218 |
|
|
"skipping request without numeric argument", |
219 |
|
|
"NOT IMPLEMENTED: .so with absolute path or \"..\"", |
220 |
|
|
".so request failed", |
221 |
|
|
"skipping all arguments", |
222 |
|
|
"skipping excess arguments", |
223 |
|
|
"divide by zero", |
224 |
|
|
|
225 |
|
|
"unsupported feature", |
226 |
|
|
"input too large", |
227 |
|
|
"unsupported control character", |
228 |
|
|
"unsupported roff request", |
229 |
|
|
"eqn delim option in tbl", |
230 |
|
|
"unsupported tbl layout modifier", |
231 |
|
|
"ignoring macro in table", |
232 |
|
|
}; |
233 |
|
|
|
234 |
|
|
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
235 |
|
|
"SUCCESS", |
236 |
|
|
"RESERVED", |
237 |
|
|
"WARNING", |
238 |
|
|
"ERROR", |
239 |
|
|
"UNSUPP", |
240 |
|
|
"BADARG", |
241 |
|
|
"SYSERR" |
242 |
|
|
}; |
243 |
|
|
|
244 |
|
|
|
245 |
|
|
static void |
246 |
|
|
resize_buf(struct buf *buf, size_t initial) |
247 |
|
|
{ |
248 |
|
|
|
249 |
|
|
buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; |
250 |
|
|
buf->buf = mandoc_realloc(buf->buf, buf->sz); |
251 |
|
|
} |
252 |
|
|
|
253 |
|
|
static void |
254 |
|
|
choose_parser(struct mparse *curp) |
255 |
|
|
{ |
256 |
|
|
char *cp, *ep; |
257 |
|
|
int format; |
258 |
|
|
|
259 |
|
|
/* |
260 |
|
|
* If neither command line arguments -mdoc or -man select |
261 |
|
|
* a parser nor the roff parser found a .Dd or .TH macro |
262 |
|
|
* yet, look ahead in the main input buffer. |
263 |
|
|
*/ |
264 |
|
|
|
265 |
|
|
if ((format = roff_getformat(curp->roff)) == 0) { |
266 |
|
|
cp = curp->primary->buf; |
267 |
|
|
ep = cp + curp->primary->sz; |
268 |
|
|
while (cp < ep) { |
269 |
|
|
if (*cp == '.' || *cp == '\'') { |
270 |
|
|
cp++; |
271 |
|
|
if (cp[0] == 'D' && cp[1] == 'd') { |
272 |
|
|
format = MPARSE_MDOC; |
273 |
|
|
break; |
274 |
|
|
} |
275 |
|
|
if (cp[0] == 'T' && cp[1] == 'H') { |
276 |
|
|
format = MPARSE_MAN; |
277 |
|
|
break; |
278 |
|
|
} |
279 |
|
|
} |
280 |
|
|
cp = memchr(cp, '\n', ep - cp); |
281 |
|
|
if (cp == NULL) |
282 |
|
|
break; |
283 |
|
|
cp++; |
284 |
|
|
} |
285 |
|
|
} |
286 |
|
|
|
287 |
|
|
if (format == MPARSE_MDOC) { |
288 |
|
|
mdoc_hash_init(); |
289 |
|
|
curp->man->macroset = MACROSET_MDOC; |
290 |
|
|
curp->man->first->tok = TOKEN_NONE; |
291 |
|
|
} else { |
292 |
|
|
man_hash_init(); |
293 |
|
|
curp->man->macroset = MACROSET_MAN; |
294 |
|
|
curp->man->first->tok = TOKEN_NONE; |
295 |
|
|
} |
296 |
|
|
} |
297 |
|
|
|
298 |
|
|
/* |
299 |
|
|
* Main parse routine for a buffer. |
300 |
|
|
* It assumes encoding and line numbering are already set up. |
301 |
|
|
* It can recurse directly (for invocations of user-defined |
302 |
|
|
* macros, inline equations, and input line traps) |
303 |
|
|
* and indirectly (for .so file inclusion). |
304 |
|
|
*/ |
305 |
|
|
static void |
306 |
|
|
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
307 |
|
|
{ |
308 |
|
|
const struct tbl_span *span; |
309 |
|
|
struct buf ln; |
310 |
|
|
const char *save_file; |
311 |
|
|
char *cp; |
312 |
|
|
size_t pos; /* byte number in the ln buffer */ |
313 |
|
|
enum rofferr rr; |
314 |
|
|
int of; |
315 |
|
|
int lnn; /* line number in the real file */ |
316 |
|
|
int fd; |
317 |
|
|
unsigned char c; |
318 |
|
|
|
319 |
|
|
memset(&ln, 0, sizeof(ln)); |
320 |
|
|
|
321 |
|
|
lnn = curp->line; |
322 |
|
|
pos = 0; |
323 |
|
|
|
324 |
|
|
while (i < blk.sz) { |
325 |
|
|
if (0 == pos && '\0' == blk.buf[i]) |
326 |
|
|
break; |
327 |
|
|
|
328 |
|
|
if (start) { |
329 |
|
|
curp->line = lnn; |
330 |
|
|
curp->reparse_count = 0; |
331 |
|
|
|
332 |
|
|
if (lnn < 3 && |
333 |
|
|
curp->filenc & MPARSE_UTF8 && |
334 |
|
|
curp->filenc & MPARSE_LATIN1) |
335 |
|
|
curp->filenc = preconv_cue(&blk, i); |
336 |
|
|
} |
337 |
|
|
|
338 |
|
|
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
339 |
|
|
|
340 |
|
|
/* |
341 |
|
|
* When finding an unescaped newline character, |
342 |
|
|
* leave the character loop to process the line. |
343 |
|
|
* Skip a preceding carriage return, if any. |
344 |
|
|
*/ |
345 |
|
|
|
346 |
|
|
if ('\r' == blk.buf[i] && i + 1 < blk.sz && |
347 |
|
|
'\n' == blk.buf[i + 1]) |
348 |
|
|
++i; |
349 |
|
|
if ('\n' == blk.buf[i]) { |
350 |
|
|
++i; |
351 |
|
|
++lnn; |
352 |
|
|
break; |
353 |
|
|
} |
354 |
|
|
|
355 |
|
|
/* |
356 |
|
|
* Make sure we have space for the worst |
357 |
|
|
* case of 11 bytes: "\\[u10ffff]\0" |
358 |
|
|
*/ |
359 |
|
|
|
360 |
|
|
if (pos + 11 > ln.sz) |
361 |
|
|
resize_buf(&ln, 256); |
362 |
|
|
|
363 |
|
|
/* |
364 |
|
|
* Encode 8-bit input. |
365 |
|
|
*/ |
366 |
|
|
|
367 |
|
|
c = blk.buf[i]; |
368 |
|
|
if (c & 0x80) { |
369 |
|
|
if ( ! (curp->filenc && preconv_encode( |
370 |
|
|
&blk, &i, &ln, &pos, &curp->filenc))) { |
371 |
|
|
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
372 |
|
|
curp->line, pos, "0x%x", c); |
373 |
|
|
ln.buf[pos++] = '?'; |
374 |
|
|
i++; |
375 |
|
|
} |
376 |
|
|
continue; |
377 |
|
|
} |
378 |
|
|
|
379 |
|
|
/* |
380 |
|
|
* Exclude control characters. |
381 |
|
|
*/ |
382 |
|
|
|
383 |
|
|
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
384 |
|
|
mandoc_vmsg(c == 0x00 || c == 0x04 || |
385 |
|
|
c > 0x0a ? MANDOCERR_CHAR_BAD : |
386 |
|
|
MANDOCERR_CHAR_UNSUPP, |
387 |
|
|
curp, curp->line, pos, "0x%x", c); |
388 |
|
|
i++; |
389 |
|
|
if (c != '\r') |
390 |
|
|
ln.buf[pos++] = '?'; |
391 |
|
|
continue; |
392 |
|
|
} |
393 |
|
|
|
394 |
|
|
/* Trailing backslash = a plain char. */ |
395 |
|
|
|
396 |
|
|
if (blk.buf[i] != '\\' || i + 1 == blk.sz) { |
397 |
|
|
ln.buf[pos++] = blk.buf[i++]; |
398 |
|
|
continue; |
399 |
|
|
} |
400 |
|
|
|
401 |
|
|
/* |
402 |
|
|
* Found escape and at least one other character. |
403 |
|
|
* When it's a newline character, skip it. |
404 |
|
|
* When there is a carriage return in between, |
405 |
|
|
* skip that one as well. |
406 |
|
|
*/ |
407 |
|
|
|
408 |
|
|
if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && |
409 |
|
|
'\n' == blk.buf[i + 2]) |
410 |
|
|
++i; |
411 |
|
|
if ('\n' == blk.buf[i + 1]) { |
412 |
|
|
i += 2; |
413 |
|
|
++lnn; |
414 |
|
|
continue; |
415 |
|
|
} |
416 |
|
|
|
417 |
|
|
if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { |
418 |
|
|
i += 2; |
419 |
|
|
/* Comment, skip to end of line */ |
420 |
|
|
for (; i < blk.sz; ++i) { |
421 |
|
|
if ('\n' == blk.buf[i]) { |
422 |
|
|
++i; |
423 |
|
|
++lnn; |
424 |
|
|
break; |
425 |
|
|
} |
426 |
|
|
} |
427 |
|
|
|
428 |
|
|
/* Backout trailing whitespaces */ |
429 |
|
|
for (; pos > 0; --pos) { |
430 |
|
|
if (ln.buf[pos - 1] != ' ') |
431 |
|
|
break; |
432 |
|
|
if (pos > 2 && ln.buf[pos - 2] == '\\') |
433 |
|
|
break; |
434 |
|
|
} |
435 |
|
|
break; |
436 |
|
|
} |
437 |
|
|
|
438 |
|
|
/* Catch escaped bogus characters. */ |
439 |
|
|
|
440 |
|
|
c = (unsigned char) blk.buf[i+1]; |
441 |
|
|
|
442 |
|
|
if ( ! (isascii(c) && |
443 |
|
|
(isgraph(c) || isblank(c)))) { |
444 |
|
|
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
445 |
|
|
curp->line, pos, "0x%x", c); |
446 |
|
|
i += 2; |
447 |
|
|
ln.buf[pos++] = '?'; |
448 |
|
|
continue; |
449 |
|
|
} |
450 |
|
|
|
451 |
|
|
/* Some other escape sequence, copy & cont. */ |
452 |
|
|
|
453 |
|
|
ln.buf[pos++] = blk.buf[i++]; |
454 |
|
|
ln.buf[pos++] = blk.buf[i++]; |
455 |
|
|
} |
456 |
|
|
|
457 |
|
|
if (pos >= ln.sz) |
458 |
|
|
resize_buf(&ln, 256); |
459 |
|
|
|
460 |
|
|
ln.buf[pos] = '\0'; |
461 |
|
|
|
462 |
|
|
/* |
463 |
|
|
* A significant amount of complexity is contained by |
464 |
|
|
* the roff preprocessor. It's line-oriented but can be |
465 |
|
|
* expressed on one line, so we need at times to |
466 |
|
|
* readjust our starting point and re-run it. The roff |
467 |
|
|
* preprocessor can also readjust the buffers with new |
468 |
|
|
* data, so we pass them in wholesale. |
469 |
|
|
*/ |
470 |
|
|
|
471 |
|
|
of = 0; |
472 |
|
|
|
473 |
|
|
/* |
474 |
|
|
* Maintain a lookaside buffer of all parsed lines. We |
475 |
|
|
* only do this if mparse_keep() has been invoked (the |
476 |
|
|
* buffer may be accessed with mparse_getkeep()). |
477 |
|
|
*/ |
478 |
|
|
|
479 |
|
|
if (curp->secondary) { |
480 |
|
|
curp->secondary->buf = mandoc_realloc( |
481 |
|
|
curp->secondary->buf, |
482 |
|
|
curp->secondary->sz + pos + 2); |
483 |
|
|
memcpy(curp->secondary->buf + |
484 |
|
|
curp->secondary->sz, |
485 |
|
|
ln.buf, pos); |
486 |
|
|
curp->secondary->sz += pos; |
487 |
|
|
curp->secondary->buf |
488 |
|
|
[curp->secondary->sz] = '\n'; |
489 |
|
|
curp->secondary->sz++; |
490 |
|
|
curp->secondary->buf |
491 |
|
|
[curp->secondary->sz] = '\0'; |
492 |
|
|
} |
493 |
|
|
rerun: |
494 |
|
|
rr = roff_parseln(curp->roff, curp->line, &ln, &of); |
495 |
|
|
|
496 |
|
|
switch (rr) { |
497 |
|
|
case ROFF_REPARSE: |
498 |
|
|
if (REPARSE_LIMIT >= ++curp->reparse_count) |
499 |
|
|
mparse_buf_r(curp, ln, of, 0); |
500 |
|
|
else |
501 |
|
|
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
502 |
|
|
curp->line, pos, NULL); |
503 |
|
|
pos = 0; |
504 |
|
|
continue; |
505 |
|
|
case ROFF_APPEND: |
506 |
|
|
pos = strlen(ln.buf); |
507 |
|
|
continue; |
508 |
|
|
case ROFF_RERUN: |
509 |
|
|
goto rerun; |
510 |
|
|
case ROFF_IGN: |
511 |
|
|
pos = 0; |
512 |
|
|
continue; |
513 |
|
|
case ROFF_SO: |
514 |
|
|
if ( ! (curp->options & MPARSE_SO) && |
515 |
|
|
(i >= blk.sz || blk.buf[i] == '\0')) { |
516 |
|
|
curp->sodest = mandoc_strdup(ln.buf + of); |
517 |
|
|
free(ln.buf); |
518 |
|
|
return; |
519 |
|
|
} |
520 |
|
|
/* |
521 |
|
|
* We remove `so' clauses from our lookaside |
522 |
|
|
* buffer because we're going to descend into |
523 |
|
|
* the file recursively. |
524 |
|
|
*/ |
525 |
|
|
if (curp->secondary) |
526 |
|
|
curp->secondary->sz -= pos + 1; |
527 |
|
|
save_file = curp->file; |
528 |
|
|
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
529 |
|
|
mparse_readfd(curp, fd, ln.buf + of); |
530 |
|
|
close(fd); |
531 |
|
|
curp->file = save_file; |
532 |
|
|
} else { |
533 |
|
|
curp->file = save_file; |
534 |
|
|
mandoc_vmsg(MANDOCERR_SO_FAIL, |
535 |
|
|
curp, curp->line, pos, |
536 |
|
|
".so %s", ln.buf + of); |
537 |
|
|
ln.sz = mandoc_asprintf(&cp, |
538 |
|
|
".sp\nSee the file %s.\n.sp", |
539 |
|
|
ln.buf + of); |
540 |
|
|
free(ln.buf); |
541 |
|
|
ln.buf = cp; |
542 |
|
|
of = 0; |
543 |
|
|
mparse_buf_r(curp, ln, of, 0); |
544 |
|
|
} |
545 |
|
|
pos = 0; |
546 |
|
|
continue; |
547 |
|
|
default: |
548 |
|
|
break; |
549 |
|
|
} |
550 |
|
|
|
551 |
|
|
if (curp->man->macroset == MACROSET_NONE) |
552 |
|
|
choose_parser(curp); |
553 |
|
|
|
554 |
|
|
/* |
555 |
|
|
* Lastly, push down into the parsers themselves. |
556 |
|
|
* If libroff returns ROFF_TBL, then add it to the |
557 |
|
|
* currently open parse. Since we only get here if |
558 |
|
|
* there does exist data (see tbl_data.c), we're |
559 |
|
|
* guaranteed that something's been allocated. |
560 |
|
|
* Do the same for ROFF_EQN. |
561 |
|
|
*/ |
562 |
|
|
|
563 |
|
|
if (rr == ROFF_TBL) |
564 |
|
|
while ((span = roff_span(curp->roff)) != NULL) |
565 |
|
|
roff_addtbl(curp->man, span); |
566 |
|
|
else if (rr == ROFF_EQN) |
567 |
|
|
roff_addeqn(curp->man, roff_eqn(curp->roff)); |
568 |
|
|
else if ((curp->man->macroset == MACROSET_MDOC ? |
569 |
|
|
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
570 |
|
|
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
571 |
|
|
break; |
572 |
|
|
|
573 |
|
|
/* Temporary buffers typically are not full. */ |
574 |
|
|
|
575 |
|
|
if (0 == start && '\0' == blk.buf[i]) |
576 |
|
|
break; |
577 |
|
|
|
578 |
|
|
/* Start the next input line. */ |
579 |
|
|
|
580 |
|
|
pos = 0; |
581 |
|
|
} |
582 |
|
|
|
583 |
|
|
free(ln.buf); |
584 |
|
|
} |
585 |
|
|
|
586 |
|
|
static int |
587 |
|
|
read_whole_file(struct mparse *curp, const char *file, int fd, |
588 |
|
|
struct buf *fb, int *with_mmap) |
589 |
|
|
{ |
590 |
|
|
struct stat st; |
591 |
|
|
gzFile gz; |
592 |
|
|
size_t off; |
593 |
|
|
ssize_t ssz; |
594 |
|
|
|
595 |
|
|
if (fstat(fd, &st) == -1) |
596 |
|
|
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
597 |
|
|
|
598 |
|
|
/* |
599 |
|
|
* If we're a regular file, try just reading in the whole entry |
600 |
|
|
* via mmap(). This is faster than reading it into blocks, and |
601 |
|
|
* since each file is only a few bytes to begin with, I'm not |
602 |
|
|
* concerned that this is going to tank any machines. |
603 |
|
|
*/ |
604 |
|
|
|
605 |
|
|
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
606 |
|
|
if (st.st_size > 0x7fffffff) { |
607 |
|
|
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
608 |
|
|
return 0; |
609 |
|
|
} |
610 |
|
|
*with_mmap = 1; |
611 |
|
|
fb->sz = (size_t)st.st_size; |
612 |
|
|
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
613 |
|
|
if (fb->buf != MAP_FAILED) |
614 |
|
|
return 1; |
615 |
|
|
} |
616 |
|
|
|
617 |
|
|
if (curp->gzip) { |
618 |
|
|
if ((gz = gzdopen(fd, "rb")) == NULL) |
619 |
|
|
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
620 |
|
|
} else |
621 |
|
|
gz = NULL; |
622 |
|
|
|
623 |
|
|
/* |
624 |
|
|
* If this isn't a regular file (like, say, stdin), then we must |
625 |
|
|
* go the old way and just read things in bit by bit. |
626 |
|
|
*/ |
627 |
|
|
|
628 |
|
|
*with_mmap = 0; |
629 |
|
|
off = 0; |
630 |
|
|
fb->sz = 0; |
631 |
|
|
fb->buf = NULL; |
632 |
|
|
for (;;) { |
633 |
|
|
if (off == fb->sz) { |
634 |
|
|
if (fb->sz == (1U << 31)) { |
635 |
|
|
mandoc_msg(MANDOCERR_TOOLARGE, curp, |
636 |
|
|
0, 0, NULL); |
637 |
|
|
break; |
638 |
|
|
} |
639 |
|
|
resize_buf(fb, 65536); |
640 |
|
|
} |
641 |
|
|
ssz = curp->gzip ? |
642 |
|
|
gzread(gz, fb->buf + (int)off, fb->sz - off) : |
643 |
|
|
read(fd, fb->buf + (int)off, fb->sz - off); |
644 |
|
|
if (ssz == 0) { |
645 |
|
|
fb->sz = off; |
646 |
|
|
return 1; |
647 |
|
|
} |
648 |
|
|
if (ssz == -1) |
649 |
|
|
err((int)MANDOCLEVEL_SYSERR, "%s", file); |
650 |
|
|
off += (size_t)ssz; |
651 |
|
|
} |
652 |
|
|
|
653 |
|
|
free(fb->buf); |
654 |
|
|
fb->buf = NULL; |
655 |
|
|
return 0; |
656 |
|
|
} |
657 |
|
|
|
658 |
|
|
static void |
659 |
|
|
mparse_end(struct mparse *curp) |
660 |
|
|
{ |
661 |
|
|
if (curp->man->macroset == MACROSET_NONE) |
662 |
|
|
curp->man->macroset = MACROSET_MAN; |
663 |
|
|
if (curp->man->macroset == MACROSET_MDOC) |
664 |
|
|
mdoc_endparse(curp->man); |
665 |
|
|
else |
666 |
|
|
man_endparse(curp->man); |
667 |
|
|
roff_endparse(curp->roff); |
668 |
|
|
} |
669 |
|
|
|
670 |
|
|
static void |
671 |
|
|
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
672 |
|
|
{ |
673 |
|
|
struct buf *svprimary; |
674 |
|
|
const char *svfile; |
675 |
|
|
size_t offset; |
676 |
|
|
static int recursion_depth; |
677 |
|
|
|
678 |
|
|
if (64 < recursion_depth) { |
679 |
|
|
mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); |
680 |
|
|
return; |
681 |
|
|
} |
682 |
|
|
|
683 |
|
|
/* Line number is per-file. */ |
684 |
|
|
svfile = curp->file; |
685 |
|
|
curp->file = file; |
686 |
|
|
svprimary = curp->primary; |
687 |
|
|
curp->primary = &blk; |
688 |
|
|
curp->line = 1; |
689 |
|
|
recursion_depth++; |
690 |
|
|
|
691 |
|
|
/* Skip an UTF-8 byte order mark. */ |
692 |
|
|
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
693 |
|
|
(unsigned char)blk.buf[0] == 0xef && |
694 |
|
|
(unsigned char)blk.buf[1] == 0xbb && |
695 |
|
|
(unsigned char)blk.buf[2] == 0xbf) { |
696 |
|
|
offset = 3; |
697 |
|
|
curp->filenc &= ~MPARSE_LATIN1; |
698 |
|
|
} else |
699 |
|
|
offset = 0; |
700 |
|
|
|
701 |
|
|
mparse_buf_r(curp, blk, offset, 1); |
702 |
|
|
|
703 |
|
|
if (--recursion_depth == 0) |
704 |
|
|
mparse_end(curp); |
705 |
|
|
|
706 |
|
|
curp->primary = svprimary; |
707 |
|
|
curp->file = svfile; |
708 |
|
|
} |
709 |
|
|
|
710 |
|
|
/* |
711 |
|
|
* Read the whole file into memory and call the parsers. |
712 |
|
|
* Called recursively when an .so request is encountered. |
713 |
|
|
*/ |
714 |
|
|
enum mandoclevel |
715 |
|
|
mparse_readfd(struct mparse *curp, int fd, const char *file) |
716 |
|
|
{ |
717 |
|
|
struct buf blk; |
718 |
|
|
int with_mmap; |
719 |
|
|
int save_filenc; |
720 |
|
|
|
721 |
|
|
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
722 |
|
|
save_filenc = curp->filenc; |
723 |
|
|
curp->filenc = curp->options & |
724 |
|
|
(MPARSE_UTF8 | MPARSE_LATIN1); |
725 |
|
|
mparse_parse_buffer(curp, blk, file); |
726 |
|
|
curp->filenc = save_filenc; |
727 |
|
|
if (with_mmap) |
728 |
|
|
munmap(blk.buf, blk.sz); |
729 |
|
|
else |
730 |
|
|
free(blk.buf); |
731 |
|
|
} |
732 |
|
|
return curp->file_status; |
733 |
|
|
} |
734 |
|
|
|
735 |
|
|
int |
736 |
|
|
mparse_open(struct mparse *curp, const char *file) |
737 |
|
|
{ |
738 |
|
|
char *cp; |
739 |
|
|
int fd; |
740 |
|
|
|
741 |
|
|
curp->file = file; |
742 |
|
|
cp = strrchr(file, '.'); |
743 |
|
|
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
744 |
|
|
|
745 |
|
|
/* First try to use the filename as it is. */ |
746 |
|
|
|
747 |
|
|
if ((fd = open(file, O_RDONLY)) != -1) |
748 |
|
|
return fd; |
749 |
|
|
|
750 |
|
|
/* |
751 |
|
|
* If that doesn't work and the filename doesn't |
752 |
|
|
* already end in .gz, try appending .gz. |
753 |
|
|
*/ |
754 |
|
|
|
755 |
|
|
if ( ! curp->gzip) { |
756 |
|
|
mandoc_asprintf(&cp, "%s.gz", file); |
757 |
|
|
fd = open(cp, O_RDONLY); |
758 |
|
|
free(cp); |
759 |
|
|
if (fd != -1) { |
760 |
|
|
curp->gzip = 1; |
761 |
|
|
return fd; |
762 |
|
|
} |
763 |
|
|
} |
764 |
|
|
|
765 |
|
|
/* Neither worked, give up. */ |
766 |
|
|
|
767 |
|
|
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
768 |
|
|
return -1; |
769 |
|
|
} |
770 |
|
|
|
771 |
|
|
struct mparse * |
772 |
|
|
mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, |
773 |
|
|
const char *defos) |
774 |
|
|
{ |
775 |
|
|
struct mparse *curp; |
776 |
|
|
|
777 |
|
|
curp = mandoc_calloc(1, sizeof(struct mparse)); |
778 |
|
|
|
779 |
|
|
curp->options = options; |
780 |
|
|
curp->wlevel = wlevel; |
781 |
|
|
curp->mmsg = mmsg; |
782 |
|
|
curp->defos = defos; |
783 |
|
|
|
784 |
|
|
curp->roff = roff_alloc(curp, options); |
785 |
|
|
curp->man = roff_man_alloc( curp->roff, curp, curp->defos, |
786 |
|
|
curp->options & MPARSE_QUICK ? 1 : 0); |
787 |
|
|
if (curp->options & MPARSE_MDOC) { |
788 |
|
|
mdoc_hash_init(); |
789 |
|
|
curp->man->macroset = MACROSET_MDOC; |
790 |
|
|
} else if (curp->options & MPARSE_MAN) { |
791 |
|
|
man_hash_init(); |
792 |
|
|
curp->man->macroset = MACROSET_MAN; |
793 |
|
|
} |
794 |
|
|
curp->man->first->tok = TOKEN_NONE; |
795 |
|
|
return curp; |
796 |
|
|
} |
797 |
|
|
|
798 |
|
|
void |
799 |
|
|
mparse_reset(struct mparse *curp) |
800 |
|
|
{ |
801 |
|
|
roff_reset(curp->roff); |
802 |
|
|
roff_man_reset(curp->man); |
803 |
|
|
if (curp->secondary) |
804 |
|
|
curp->secondary->sz = 0; |
805 |
|
|
|
806 |
|
|
curp->file_status = MANDOCLEVEL_OK; |
807 |
|
|
|
808 |
|
|
free(curp->sodest); |
809 |
|
|
curp->sodest = NULL; |
810 |
|
|
} |
811 |
|
|
|
812 |
|
|
void |
813 |
|
|
mparse_free(struct mparse *curp) |
814 |
|
|
{ |
815 |
|
|
|
816 |
|
|
roff_man_free(curp->man); |
817 |
|
|
if (curp->roff) |
818 |
|
|
roff_free(curp->roff); |
819 |
|
|
if (curp->secondary) |
820 |
|
|
free(curp->secondary->buf); |
821 |
|
|
|
822 |
|
|
free(curp->secondary); |
823 |
|
|
free(curp->sodest); |
824 |
|
|
free(curp); |
825 |
|
|
} |
826 |
|
|
|
827 |
|
|
void |
828 |
|
|
mparse_result(struct mparse *curp, struct roff_man **man, |
829 |
|
|
char **sodest) |
830 |
|
|
{ |
831 |
|
|
|
832 |
|
|
if (sodest && NULL != (*sodest = curp->sodest)) { |
833 |
|
|
*man = NULL; |
834 |
|
|
return; |
835 |
|
|
} |
836 |
|
|
if (man) |
837 |
|
|
*man = curp->man; |
838 |
|
|
} |
839 |
|
|
|
840 |
|
|
void |
841 |
|
|
mandoc_vmsg(enum mandocerr t, struct mparse *m, |
842 |
|
|
int ln, int pos, const char *fmt, ...) |
843 |
|
|
{ |
844 |
|
|
char buf[256]; |
845 |
|
|
va_list ap; |
846 |
|
|
|
847 |
|
|
va_start(ap, fmt); |
848 |
|
|
(void)vsnprintf(buf, sizeof(buf), fmt, ap); |
849 |
|
|
va_end(ap); |
850 |
|
|
|
851 |
|
|
mandoc_msg(t, m, ln, pos, buf); |
852 |
|
|
} |
853 |
|
|
|
854 |
|
|
void |
855 |
|
|
mandoc_msg(enum mandocerr er, struct mparse *m, |
856 |
|
|
int ln, int col, const char *msg) |
857 |
|
|
{ |
858 |
|
|
enum mandoclevel level; |
859 |
|
|
|
860 |
|
|
level = MANDOCLEVEL_UNSUPP; |
861 |
|
|
while (er < mandoclimits[level]) |
862 |
|
|
level--; |
863 |
|
|
|
864 |
|
|
if (level < m->wlevel && er != MANDOCERR_FILE) |
865 |
|
|
return; |
866 |
|
|
|
867 |
|
|
if (m->mmsg) |
868 |
|
|
(*m->mmsg)(er, level, m->file, ln, col, msg); |
869 |
|
|
|
870 |
|
|
if (m->file_status < level) |
871 |
|
|
m->file_status = level; |
872 |
|
|
} |
873 |
|
|
|
874 |
|
|
const char * |
875 |
|
|
mparse_strerror(enum mandocerr er) |
876 |
|
|
{ |
877 |
|
|
|
878 |
|
|
return mandocerrs[er]; |
879 |
|
|
} |
880 |
|
|
|
881 |
|
|
const char * |
882 |
|
|
mparse_strlevel(enum mandoclevel lvl) |
883 |
|
|
{ |
884 |
|
|
return mandoclevels[lvl]; |
885 |
|
|
} |
886 |
|
|
|
887 |
|
|
void |
888 |
|
|
mparse_keep(struct mparse *p) |
889 |
|
|
{ |
890 |
|
|
|
891 |
|
|
assert(NULL == p->secondary); |
892 |
|
|
p->secondary = mandoc_calloc(1, sizeof(struct buf)); |
893 |
|
|
} |
894 |
|
|
|
895 |
|
|
const char * |
896 |
|
|
mparse_getkeep(const struct mparse *p) |
897 |
|
|
{ |
898 |
|
|
|
899 |
|
|
assert(p->secondary); |
900 |
|
|
return p->secondary->sz ? p->secondary->buf : NULL; |
901 |
|
|
} |