GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* $OpenBSD: read.c,v 1.164 2017/07/20 14:36:32 schwarze Exp $ */ |
||
2 |
/* |
||
3 |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
||
4 |
* Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> |
||
5 |
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> |
||
6 |
* |
||
7 |
* Permission to use, copy, modify, and distribute this software for any |
||
8 |
* purpose with or without fee is hereby granted, provided that the above |
||
9 |
* copyright notice and this permission notice appear in all copies. |
||
10 |
* |
||
11 |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
||
12 |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||
13 |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
||
14 |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||
15 |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
||
16 |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
||
17 |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||
18 |
*/ |
||
19 |
#include <sys/types.h> |
||
20 |
#include <sys/mman.h> |
||
21 |
#include <sys/stat.h> |
||
22 |
|||
23 |
#include <assert.h> |
||
24 |
#include <ctype.h> |
||
25 |
#include <errno.h> |
||
26 |
#include <fcntl.h> |
||
27 |
#include <stdarg.h> |
||
28 |
#include <stdio.h> |
||
29 |
#include <stdlib.h> |
||
30 |
#include <string.h> |
||
31 |
#include <unistd.h> |
||
32 |
#include <zlib.h> |
||
33 |
|||
34 |
#include "mandoc_aux.h" |
||
35 |
#include "mandoc.h" |
||
36 |
#include "roff.h" |
||
37 |
#include "mdoc.h" |
||
38 |
#include "man.h" |
||
39 |
#include "libmandoc.h" |
||
40 |
|||
41 |
#define REPARSE_LIMIT 1000 |
||
42 |
|||
43 |
struct mparse { |
||
44 |
struct roff *roff; /* roff parser (!NULL) */ |
||
45 |
struct roff_man *man; /* man parser */ |
||
46 |
char *sodest; /* filename pointed to by .so */ |
||
47 |
const char *file; /* filename of current input file */ |
||
48 |
struct buf *primary; /* buffer currently being parsed */ |
||
49 |
struct buf *secondary; /* preprocessed copy of input */ |
||
50 |
const char *os_s; /* default operating system */ |
||
51 |
mandocmsg mmsg; /* warning/error message handler */ |
||
52 |
enum mandoclevel file_status; /* status of current parse */ |
||
53 |
enum mandocerr mmin; /* ignore messages below this */ |
||
54 |
int options; /* parser options */ |
||
55 |
int gzip; /* current input file is gzipped */ |
||
56 |
int filenc; /* encoding of the current file */ |
||
57 |
int reparse_count; /* finite interp. stack */ |
||
58 |
int line; /* line number in the file */ |
||
59 |
}; |
||
60 |
|||
61 |
static void choose_parser(struct mparse *); |
||
62 |
static void resize_buf(struct buf *, size_t); |
||
63 |
static int mparse_buf_r(struct mparse *, struct buf, size_t, int); |
||
64 |
static int read_whole_file(struct mparse *, const char *, int, |
||
65 |
struct buf *, int *); |
||
66 |
static void mparse_end(struct mparse *); |
||
67 |
static void mparse_parse_buffer(struct mparse *, struct buf, |
||
68 |
const char *); |
||
69 |
|||
70 |
static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { |
||
71 |
MANDOCERR_OK, |
||
72 |
MANDOCERR_OK, |
||
73 |
MANDOCERR_WARNING, |
||
74 |
MANDOCERR_ERROR, |
||
75 |
MANDOCERR_UNSUPP, |
||
76 |
MANDOCERR_MAX, |
||
77 |
MANDOCERR_MAX |
||
78 |
}; |
||
79 |
|||
80 |
static const char * const mandocerrs[MANDOCERR_MAX] = { |
||
81 |
"ok", |
||
82 |
|||
83 |
"base system convention", |
||
84 |
|||
85 |
"Mdocdate found", |
||
86 |
"Mdocdate missing", |
||
87 |
"unknown architecture", |
||
88 |
"operating system explicitly specified", |
||
89 |
"RCS id missing", |
||
90 |
"referenced manual not found", |
||
91 |
|||
92 |
"generic style suggestion", |
||
93 |
|||
94 |
"legacy man(7) date format", |
||
95 |
"lower case character in document title", |
||
96 |
"duplicate RCS id", |
||
97 |
"typo in section name", |
||
98 |
"unterminated quoted argument", |
||
99 |
"useless macro", |
||
100 |
"consider using OS macro", |
||
101 |
"errnos out of order", |
||
102 |
"duplicate errno", |
||
103 |
"trailing delimiter", |
||
104 |
"no blank before trailing delimiter", |
||
105 |
"fill mode already enabled, skipping", |
||
106 |
"fill mode already disabled, skipping", |
||
107 |
"function name without markup", |
||
108 |
"whitespace at end of input line", |
||
109 |
"bad comment style", |
||
110 |
|||
111 |
"generic warning", |
||
112 |
|||
113 |
/* related to the prologue */ |
||
114 |
"missing manual title, using UNTITLED", |
||
115 |
"missing manual title, using \"\"", |
||
116 |
"missing manual section, using \"\"", |
||
117 |
"unknown manual section", |
||
118 |
"missing date, using today's date", |
||
119 |
"cannot parse date, using it verbatim", |
||
120 |
"date in the future, using it anyway", |
||
121 |
"missing Os macro, using \"\"", |
||
122 |
"late prologue macro", |
||
123 |
"prologue macros out of order", |
||
124 |
|||
125 |
/* related to document structure */ |
||
126 |
".so is fragile, better use ln(1)", |
||
127 |
"no document body", |
||
128 |
"content before first section header", |
||
129 |
"first section is not \"NAME\"", |
||
130 |
"NAME section without Nm before Nd", |
||
131 |
"NAME section without description", |
||
132 |
"description not at the end of NAME", |
||
133 |
"bad NAME section content", |
||
134 |
"missing comma before name", |
||
135 |
"missing description line, using \"\"", |
||
136 |
"description line outside NAME section", |
||
137 |
"sections out of conventional order", |
||
138 |
"duplicate section title", |
||
139 |
"unexpected section", |
||
140 |
"cross reference to self", |
||
141 |
"unusual Xr order", |
||
142 |
"unusual Xr punctuation", |
||
143 |
"AUTHORS section without An macro", |
||
144 |
|||
145 |
/* related to macros and nesting */ |
||
146 |
"obsolete macro", |
||
147 |
"macro neither callable nor escaped", |
||
148 |
"skipping paragraph macro", |
||
149 |
"moving paragraph macro out of list", |
||
150 |
"skipping no-space macro", |
||
151 |
"blocks badly nested", |
||
152 |
"nested displays are not portable", |
||
153 |
"moving content out of list", |
||
154 |
"first macro on line", |
||
155 |
"line scope broken", |
||
156 |
"skipping blank line in line scope", |
||
157 |
|||
158 |
/* related to missing macro arguments */ |
||
159 |
"skipping empty request", |
||
160 |
"conditional request controls empty scope", |
||
161 |
"skipping empty macro", |
||
162 |
"empty block", |
||
163 |
"empty argument, using 0n", |
||
164 |
"missing display type, using -ragged", |
||
165 |
"list type is not the first argument", |
||
166 |
"missing -width in -tag list, using 6n", |
||
167 |
"missing utility name, using \"\"", |
||
168 |
"missing function name, using \"\"", |
||
169 |
"empty head in list item", |
||
170 |
"empty list item", |
||
171 |
"missing argument, using next line", |
||
172 |
"missing font type, using \\fR", |
||
173 |
"unknown font type, using \\fR", |
||
174 |
"nothing follows prefix", |
||
175 |
"empty reference block", |
||
176 |
"missing section argument", |
||
177 |
"missing -std argument, adding it", |
||
178 |
"missing option string, using \"\"", |
||
179 |
"missing resource identifier, using \"\"", |
||
180 |
"missing eqn box, using \"\"", |
||
181 |
|||
182 |
/* related to bad macro arguments */ |
||
183 |
"duplicate argument", |
||
184 |
"skipping duplicate argument", |
||
185 |
"skipping duplicate display type", |
||
186 |
"skipping duplicate list type", |
||
187 |
"skipping -width argument", |
||
188 |
"wrong number of cells", |
||
189 |
"unknown AT&T UNIX version", |
||
190 |
"comma in function argument", |
||
191 |
"parenthesis in function name", |
||
192 |
"unknown library name", |
||
193 |
"invalid content in Rs block", |
||
194 |
"invalid Boolean argument", |
||
195 |
"unknown font, skipping request", |
||
196 |
"odd number of characters in request", |
||
197 |
|||
198 |
/* related to plain text */ |
||
199 |
"blank line in fill mode, using .sp", |
||
200 |
"tab in filled text", |
||
201 |
"new sentence, new line", |
||
202 |
"invalid escape sequence", |
||
203 |
"undefined string, using \"\"", |
||
204 |
|||
205 |
/* related to tables */ |
||
206 |
"tbl line starts with span", |
||
207 |
"tbl column starts with span", |
||
208 |
"skipping vertical bar in tbl layout", |
||
209 |
|||
210 |
"generic error", |
||
211 |
|||
212 |
/* related to tables */ |
||
213 |
"non-alphabetic character in tbl options", |
||
214 |
"skipping unknown tbl option", |
||
215 |
"missing tbl option argument", |
||
216 |
"wrong tbl option argument size", |
||
217 |
"empty tbl layout", |
||
218 |
"invalid character in tbl layout", |
||
219 |
"unmatched parenthesis in tbl layout", |
||
220 |
"tbl without any data cells", |
||
221 |
"ignoring data in spanned tbl cell", |
||
222 |
"ignoring extra tbl data cells", |
||
223 |
"data block open at end of tbl", |
||
224 |
|||
225 |
/* related to document structure and macros */ |
||
226 |
NULL, |
||
227 |
"duplicate prologue macro", |
||
228 |
"skipping late title macro", |
||
229 |
"input stack limit exceeded, infinite loop?", |
||
230 |
"skipping bad character", |
||
231 |
"skipping unknown macro", |
||
232 |
"skipping insecure request", |
||
233 |
"skipping item outside list", |
||
234 |
"skipping column outside column list", |
||
235 |
"skipping end of block that is not open", |
||
236 |
"fewer RS blocks open, skipping", |
||
237 |
"inserting missing end of block", |
||
238 |
"appending missing end of block", |
||
239 |
|||
240 |
/* related to request and macro arguments */ |
||
241 |
"escaped character not allowed in a name", |
||
242 |
"NOT IMPLEMENTED: Bd -file", |
||
243 |
"skipping display without arguments", |
||
244 |
"missing list type, using -item", |
||
245 |
"argument is not numeric, using 1", |
||
246 |
"missing manual name, using \"\"", |
||
247 |
"uname(3) system call failed, using UNKNOWN", |
||
248 |
"unknown standard specifier", |
||
249 |
"skipping request without numeric argument", |
||
250 |
"NOT IMPLEMENTED: .so with absolute path or \"..\"", |
||
251 |
".so request failed", |
||
252 |
"skipping all arguments", |
||
253 |
"skipping excess arguments", |
||
254 |
"divide by zero", |
||
255 |
|||
256 |
"unsupported feature", |
||
257 |
"input too large", |
||
258 |
"unsupported control character", |
||
259 |
"unsupported roff request", |
||
260 |
"eqn delim option in tbl", |
||
261 |
"unsupported tbl layout modifier", |
||
262 |
"ignoring macro in table", |
||
263 |
}; |
||
264 |
|||
265 |
static const char * const mandoclevels[MANDOCLEVEL_MAX] = { |
||
266 |
"SUCCESS", |
||
267 |
"STYLE", |
||
268 |
"WARNING", |
||
269 |
"ERROR", |
||
270 |
"UNSUPP", |
||
271 |
"BADARG", |
||
272 |
"SYSERR" |
||
273 |
}; |
||
274 |
|||
275 |
|||
276 |
static void |
||
277 |
resize_buf(struct buf *buf, size_t initial) |
||
278 |
{ |
||
279 |
|||
280 |
✓✓ | 1991256 |
buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; |
281 |
497814 |
buf->buf = mandoc_realloc(buf->buf, buf->sz); |
|
282 |
497814 |
} |
|
283 |
|||
284 |
static void |
||
285 |
choose_parser(struct mparse *curp) |
||
286 |
{ |
||
287 |
char *cp, *ep; |
||
288 |
int format; |
||
289 |
|||
290 |
/* |
||
291 |
* If neither command line arguments -mdoc or -man select |
||
292 |
* a parser nor the roff parser found a .Dd or .TH macro |
||
293 |
* yet, look ahead in the main input buffer. |
||
294 |
*/ |
||
295 |
|||
296 |
✓✓ | 44372 |
if ((format = roff_getformat(curp->roff)) == 0) { |
297 |
13 |
cp = curp->primary->buf; |
|
298 |
13 |
ep = cp + curp->primary->sz; |
|
299 |
✓✓ | 404 |
while (cp < ep) { |
300 |
✓✓✗✓ |
218 |
if (*cp == '.' || *cp == '\'') { |
301 |
168 |
cp++; |
|
302 |
✗✓✗✗ |
168 |
if (cp[0] == 'D' && cp[1] == 'd') { |
303 |
format = MPARSE_MDOC; |
||
304 |
break; |
||
305 |
} |
||
306 |
✓✓✓✗ |
172 |
if (cp[0] == 'T' && cp[1] == 'H') { |
307 |
format = MPARSE_MAN; |
||
308 |
4 |
break; |
|
309 |
} |
||
310 |
} |
||
311 |
189 |
cp = memchr(cp, '\n', ep - cp); |
|
312 |
✓✗ | 189 |
if (cp == NULL) |
313 |
break; |
||
314 |
189 |
cp++; |
|
315 |
} |
||
316 |
} |
||
317 |
|||
318 |
✓✓ | 22186 |
if (format == MPARSE_MDOC) { |
319 |
17016 |
curp->man->macroset = MACROSET_MDOC; |
|
320 |
✓✓ | 17016 |
if (curp->man->mdocmac == NULL) |
321 |
8311 |
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
|
322 |
} else { |
||
323 |
5170 |
curp->man->macroset = MACROSET_MAN; |
|
324 |
✓✓ | 5170 |
if (curp->man->manmac == NULL) |
325 |
2193 |
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
|
326 |
} |
||
327 |
22186 |
curp->man->first->tok = TOKEN_NONE; |
|
328 |
22186 |
} |
|
329 |
|||
330 |
/* |
||
331 |
* Main parse routine for a buffer. |
||
332 |
* It assumes encoding and line numbering are already set up. |
||
333 |
* It can recurse directly (for invocations of user-defined |
||
334 |
* macros, inline equations, and input line traps) |
||
335 |
* and indirectly (for .so file inclusion). |
||
336 |
*/ |
||
337 |
static int |
||
338 |
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) |
||
339 |
{ |
||
340 |
162102 |
struct buf ln; |
|
341 |
const char *save_file; |
||
342 |
162102 |
char *cp; |
|
343 |
162102 |
size_t pos; /* byte number in the ln buffer */ |
|
344 |
enum rofferr rr; |
||
345 |
162102 |
int of; |
|
346 |
int lnn; /* line number in the real file */ |
||
347 |
int fd; |
||
348 |
unsigned char c; |
||
349 |
|||
350 |
162102 |
memset(&ln, 0, sizeof(ln)); |
|
351 |
|||
352 |
162102 |
lnn = curp->line; |
|
353 |
162102 |
pos = 0; |
|
354 |
|||
355 |
✓✓ | 6859344 |
while (i < blk.sz) { |
356 |
✓✓✓✓ |
7833813 |
if (0 == pos && '\0' == blk.buf[i]) |
357 |
break; |
||
358 |
|||
359 |
✓✓ | 3838031 |
if (start) { |
360 |
3543000 |
curp->line = lnn; |
|
361 |
3543000 |
curp->reparse_count = 0; |
|
362 |
|||
363 |
✓✓✓✗ |
3568102 |
if (lnn < 3 && |
364 |
✓✓ | 48750 |
curp->filenc & MPARSE_UTF8 && |
365 |
25102 |
curp->filenc & MPARSE_LATIN1) |
|
366 |
25102 |
curp->filenc = preconv_cue(&blk, i); |
|
367 |
} |
||
368 |
|||
369 |
✓✓✓✓ ✓✓ |
342551767 |
while (i < blk.sz && (start || blk.buf[i] != '\0')) { |
370 |
|||
371 |
/* |
||
372 |
* When finding an unescaped newline character, |
||
373 |
* leave the character loop to process the line. |
||
374 |
* Skip a preceding carriage return, if any. |
||
375 |
*/ |
||
376 |
|||
377 |
✗✓✗✗ ✗✗ |
114691897 |
if ('\r' == blk.buf[i] && i + 1 < blk.sz && |
378 |
'\n' == blk.buf[i + 1]) |
||
379 |
++i; |
||
380 |
✓✓ | 114691897 |
if ('\n' == blk.buf[i]) { |
381 |
3837929 |
++i; |
|
382 |
3837929 |
++lnn; |
|
383 |
3837929 |
break; |
|
384 |
} |
||
385 |
|||
386 |
/* |
||
387 |
* Make sure we have space for the worst |
||
388 |
* case of 11 bytes: "\\[u10ffff]\0" |
||
389 |
*/ |
||
390 |
|||
391 |
✓✓ | 110853968 |
if (pos + 11 > ln.sz) |
392 |
497787 |
resize_buf(&ln, 256); |
|
393 |
|||
394 |
/* |
||
395 |
* Encode 8-bit input. |
||
396 |
*/ |
||
397 |
|||
398 |
110853968 |
c = blk.buf[i]; |
|
399 |
✓✓ | 110853968 |
if (c & 0x80) { |
400 |
✓✓✓✓ |
24416 |
if ( ! (curp->filenc && preconv_encode( |
401 |
&blk, &i, &ln, &pos, &curp->filenc))) { |
||
402 |
21527 |
mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, |
|
403 |
21527 |
curp->line, pos, "0x%x", c); |
|
404 |
21527 |
ln.buf[pos++] = '?'; |
|
405 |
21527 |
i++; |
|
406 |
21527 |
} |
|
407 |
continue; |
||
408 |
} |
||
409 |
|||
410 |
/* |
||
411 |
* Exclude control characters. |
||
412 |
*/ |
||
413 |
|||
414 |
✓✓✓✓ ✓✓ |
221728280 |
if (c == 0x7f || (c < 0x20 && c != 0x09)) { |
415 |
✓✓✓✗ |
351 |
mandoc_vmsg(c == 0x00 || c == 0x04 || |
416 |
54 |
c > 0x0a ? MANDOCERR_CHAR_BAD : |
|
417 |
MANDOCERR_CHAR_UNSUPP, |
||
418 |
81 |
curp, curp->line, pos, "0x%x", c); |
|
419 |
81 |
i++; |
|
420 |
✗✓ | 81 |
if (c != '\r') |
421 |
81 |
ln.buf[pos++] = '?'; |
|
422 |
continue; |
||
423 |
} |
||
424 |
|||
425 |
110831874 |
ln.buf[pos++] = blk.buf[i++]; |
|
426 |
} |
||
427 |
|||
428 |
✓✓ | 3838031 |
if (pos + 1 >= ln.sz) |
429 |
26 |
resize_buf(&ln, 256); |
|
430 |
|||
431 |
✓✓✓✓ |
7651640 |
if (i == blk.sz || blk.buf[i] == '\0') |
432 |
144101 |
ln.buf[pos++] = '\n'; |
|
433 |
3838031 |
ln.buf[pos] = '\0'; |
|
434 |
|||
435 |
/* |
||
436 |
* A significant amount of complexity is contained by |
||
437 |
* the roff preprocessor. It's line-oriented but can be |
||
438 |
* expressed on one line, so we need at times to |
||
439 |
* readjust our starting point and re-run it. The roff |
||
440 |
* preprocessor can also readjust the buffers with new |
||
441 |
* data, so we pass them in wholesale. |
||
442 |
*/ |
||
443 |
|||
444 |
3838031 |
of = 0; |
|
445 |
|||
446 |
/* |
||
447 |
* Maintain a lookaside buffer of all parsed lines. We |
||
448 |
* only do this if mparse_keep() has been invoked (the |
||
449 |
* buffer may be accessed with mparse_getkeep()). |
||
450 |
*/ |
||
451 |
|||
452 |
✓✓ | 3838031 |
if (curp->secondary) { |
453 |
47466 |
curp->secondary->buf = mandoc_realloc( |
|
454 |
47466 |
curp->secondary->buf, |
|
455 |
47466 |
curp->secondary->sz + pos + 2); |
|
456 |
142398 |
memcpy(curp->secondary->buf + |
|
457 |
47466 |
curp->secondary->sz, |
|
458 |
47466 |
ln.buf, pos); |
|
459 |
47466 |
curp->secondary->sz += pos; |
|
460 |
94932 |
curp->secondary->buf |
|
461 |
94932 |
[curp->secondary->sz] = '\n'; |
|
462 |
47466 |
curp->secondary->sz++; |
|
463 |
94932 |
curp->secondary->buf |
|
464 |
94932 |
[curp->secondary->sz] = '\0'; |
|
465 |
47466 |
} |
|
466 |
rerun: |
||
467 |
7825445 |
rr = roff_parseln(curp->roff, curp->line, &ln, &of); |
|
468 |
|||
469 |
✓✓✓✓ ✓✓ |
7825445 |
switch (rr) { |
470 |
case ROFF_REPARSE: |
||
471 |
✓✓ | 137697 |
if (++curp->reparse_count > REPARSE_LIMIT) |
472 |
18 |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, |
|
473 |
18 |
curp->line, pos, NULL); |
|
474 |
✓✓ | 275358 |
else if (mparse_buf_r(curp, ln, of, 0) == 1 || |
475 |
137679 |
start == 1) { |
|
476 |
119697 |
pos = 0; |
|
477 |
119697 |
continue; |
|
478 |
} |
||
479 |
18000 |
free(ln.buf); |
|
480 |
18000 |
return 0; |
|
481 |
case ROFF_APPEND: |
||
482 |
140662 |
pos = strlen(ln.buf); |
|
483 |
140662 |
continue; |
|
484 |
case ROFF_RERUN: |
||
485 |
goto rerun; |
||
486 |
case ROFF_IGN: |
||
487 |
773673 |
pos = 0; |
|
488 |
773673 |
continue; |
|
489 |
case ROFF_SO: |
||
490 |
✓✗✗✗ |
94 |
if ( ! (curp->options & MPARSE_SO) && |
491 |
✗✓ | 94 |
(i >= blk.sz || blk.buf[i] == '\0')) { |
492 |
94 |
curp->sodest = mandoc_strdup(ln.buf + of); |
|
493 |
94 |
free(ln.buf); |
|
494 |
94 |
return 1; |
|
495 |
} |
||
496 |
/* |
||
497 |
* We remove `so' clauses from our lookaside |
||
498 |
* buffer because we're going to descend into |
||
499 |
* the file recursively. |
||
500 |
*/ |
||
501 |
if (curp->secondary) |
||
502 |
curp->secondary->sz -= pos + 1; |
||
503 |
save_file = curp->file; |
||
504 |
if ((fd = mparse_open(curp, ln.buf + of)) != -1) { |
||
505 |
mparse_readfd(curp, fd, ln.buf + of); |
||
506 |
close(fd); |
||
507 |
curp->file = save_file; |
||
508 |
} else { |
||
509 |
curp->file = save_file; |
||
510 |
mandoc_vmsg(MANDOCERR_SO_FAIL, |
||
511 |
curp, curp->line, pos, |
||
512 |
".so %s", ln.buf + of); |
||
513 |
ln.sz = mandoc_asprintf(&cp, |
||
514 |
".sp\nSee the file %s.\n.sp", |
||
515 |
ln.buf + of); |
||
516 |
free(ln.buf); |
||
517 |
ln.buf = cp; |
||
518 |
of = 0; |
||
519 |
mparse_buf_r(curp, ln, of, 0); |
||
520 |
} |
||
521 |
pos = 0; |
||
522 |
continue; |
||
523 |
default: |
||
524 |
break; |
||
525 |
} |
||
526 |
|||
527 |
✓✓ | 2785905 |
if (curp->man->macroset == MACROSET_NONE) |
528 |
22186 |
choose_parser(curp); |
|
529 |
|||
530 |
✓✓✓✗ |
11143620 |
if ((curp->man->macroset == MACROSET_MDOC ? |
531 |
1592461 |
mdoc_parseln(curp->man, curp->line, ln.buf, of) : |
|
532 |
3979349 |
man_parseln(curp->man, curp->line, ln.buf, of)) == 2) |
|
533 |
break; |
||
534 |
|||
535 |
/* Temporary buffers typically are not full. */ |
||
536 |
|||
537 |
✓✓✓✓ |
2858140 |
if (0 == start && '\0' == blk.buf[i]) |
538 |
break; |
||
539 |
|||
540 |
/* Start the next input line. */ |
||
541 |
|||
542 |
2750554 |
pos = 0; |
|
543 |
} |
||
544 |
|||
545 |
144008 |
free(ln.buf); |
|
546 |
144008 |
return 1; |
|
547 |
162102 |
} |
|
548 |
|||
549 |
static int |
||
550 |
read_whole_file(struct mparse *curp, const char *file, int fd, |
||
551 |
struct buf *fb, int *with_mmap) |
||
552 |
{ |
||
553 |
48846 |
struct stat st; |
|
554 |
gzFile gz; |
||
555 |
size_t off; |
||
556 |
ssize_t ssz; |
||
557 |
|||
558 |
✗✓ | 24423 |
if (fstat(fd, &st) == -1) { |
559 |
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
||
560 |
"fstat: %s", strerror(errno)); |
||
561 |
return 0; |
||
562 |
} |
||
563 |
|||
564 |
/* |
||
565 |
* If we're a regular file, try just reading in the whole entry |
||
566 |
* via mmap(). This is faster than reading it into blocks, and |
||
567 |
* since each file is only a few bytes to begin with, I'm not |
||
568 |
* concerned that this is going to tank any machines. |
||
569 |
*/ |
||
570 |
|||
571 |
✓✗✓✗ |
48846 |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
572 |
✗✓ | 24423 |
if (st.st_size > 0x7fffffff) { |
573 |
mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); |
||
574 |
return 0; |
||
575 |
} |
||
576 |
24423 |
*with_mmap = 1; |
|
577 |
24423 |
fb->sz = (size_t)st.st_size; |
|
578 |
24423 |
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); |
|
579 |
✓✓ | 24423 |
if (fb->buf != MAP_FAILED) |
580 |
24422 |
return 1; |
|
581 |
} |
||
582 |
|||
583 |
✗✓ | 1 |
if (curp->gzip) { |
584 |
if ((gz = gzdopen(fd, "rb")) == NULL) { |
||
585 |
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
||
586 |
"gzdopen: %s", strerror(errno)); |
||
587 |
return 0; |
||
588 |
} |
||
589 |
} else |
||
590 |
gz = NULL; |
||
591 |
|||
592 |
/* |
||
593 |
* If this isn't a regular file (like, say, stdin), then we must |
||
594 |
* go the old way and just read things in bit by bit. |
||
595 |
*/ |
||
596 |
|||
597 |
1 |
*with_mmap = 0; |
|
598 |
off = 0; |
||
599 |
1 |
fb->sz = 0; |
|
600 |
1 |
fb->buf = NULL; |
|
601 |
1 |
for (;;) { |
|
602 |
✓✗ | 1 |
if (off == fb->sz) { |
603 |
✗✓ | 1 |
if (fb->sz == (1U << 31)) { |
604 |
mandoc_msg(MANDOCERR_TOOLARGE, curp, |
||
605 |
0, 0, NULL); |
||
606 |
break; |
||
607 |
} |
||
608 |
1 |
resize_buf(fb, 65536); |
|
609 |
1 |
} |
|
610 |
✗✓ | 3 |
ssz = curp->gzip ? |
611 |
gzread(gz, fb->buf + (int)off, fb->sz - off) : |
||
612 |
1 |
read(fd, fb->buf + (int)off, fb->sz - off); |
|
613 |
✓✗ | 1 |
if (ssz == 0) { |
614 |
1 |
fb->sz = off; |
|
615 |
1 |
return 1; |
|
616 |
} |
||
617 |
if (ssz == -1) { |
||
618 |
mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, |
||
619 |
"read: %s", strerror(errno)); |
||
620 |
break; |
||
621 |
} |
||
622 |
off += (size_t)ssz; |
||
623 |
} |
||
624 |
|||
625 |
free(fb->buf); |
||
626 |
fb->buf = NULL; |
||
627 |
return 0; |
||
628 |
24423 |
} |
|
629 |
|||
630 |
static void |
||
631 |
mparse_end(struct mparse *curp) |
||
632 |
{ |
||
633 |
✓✓ | 48846 |
if (curp->man->macroset == MACROSET_NONE) |
634 |
95 |
curp->man->macroset = MACROSET_MAN; |
|
635 |
✓✓ | 24423 |
if (curp->man->macroset == MACROSET_MDOC) |
636 |
17250 |
mdoc_endparse(curp->man); |
|
637 |
else |
||
638 |
7173 |
man_endparse(curp->man); |
|
639 |
24423 |
roff_endparse(curp->roff); |
|
640 |
24423 |
} |
|
641 |
|||
642 |
static void |
||
643 |
mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) |
||
644 |
{ |
||
645 |
struct buf *svprimary; |
||
646 |
const char *svfile; |
||
647 |
size_t offset; |
||
648 |
static int recursion_depth; |
||
649 |
|||
650 |
✗✓ | 24423 |
if (64 < recursion_depth) { |
651 |
mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); |
||
652 |
return; |
||
653 |
} |
||
654 |
|||
655 |
/* Line number is per-file. */ |
||
656 |
24423 |
svfile = curp->file; |
|
657 |
24423 |
curp->file = file; |
|
658 |
24423 |
svprimary = curp->primary; |
|
659 |
24423 |
curp->primary = &blk; |
|
660 |
24423 |
curp->line = 1; |
|
661 |
24423 |
recursion_depth++; |
|
662 |
|||
663 |
/* Skip an UTF-8 byte order mark. */ |
||
664 |
✓✓✓✗ ✗✗ |
36974 |
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && |
665 |
✗✓ | 12551 |
(unsigned char)blk.buf[0] == 0xef && |
666 |
(unsigned char)blk.buf[1] == 0xbb && |
||
667 |
(unsigned char)blk.buf[2] == 0xbf) { |
||
668 |
offset = 3; |
||
669 |
curp->filenc &= ~MPARSE_LATIN1; |
||
670 |
} else |
||
671 |
offset = 0; |
||
672 |
|||
673 |
24423 |
mparse_buf_r(curp, blk, offset, 1); |
|
674 |
|||
675 |
✓✗ | 24423 |
if (--recursion_depth == 0) |
676 |
24423 |
mparse_end(curp); |
|
677 |
|||
678 |
24423 |
curp->primary = svprimary; |
|
679 |
24423 |
curp->file = svfile; |
|
680 |
48846 |
} |
|
681 |
|||
682 |
/* |
||
683 |
* Read the whole file into memory and call the parsers. |
||
684 |
* Called recursively when an .so request is encountered. |
||
685 |
*/ |
||
686 |
enum mandoclevel |
||
687 |
mparse_readfd(struct mparse *curp, int fd, const char *file) |
||
688 |
{ |
||
689 |
48846 |
struct buf blk; |
|
690 |
24423 |
int with_mmap; |
|
691 |
int save_filenc; |
||
692 |
|||
693 |
✓✗ | 24423 |
if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { |
694 |
24423 |
save_filenc = curp->filenc; |
|
695 |
24423 |
curp->filenc = curp->options & |
|
696 |
(MPARSE_UTF8 | MPARSE_LATIN1); |
||
697 |
24423 |
mparse_parse_buffer(curp, blk, file); |
|
698 |
24423 |
curp->filenc = save_filenc; |
|
699 |
✓✓ | 24423 |
if (with_mmap) |
700 |
24422 |
munmap(blk.buf, blk.sz); |
|
701 |
else |
||
702 |
1 |
free(blk.buf); |
|
703 |
} |
||
704 |
48846 |
return curp->file_status; |
|
705 |
24423 |
} |
|
706 |
|||
707 |
int |
||
708 |
mparse_open(struct mparse *curp, const char *file) |
||
709 |
{ |
||
710 |
48846 |
char *cp; |
|
711 |
int fd; |
||
712 |
|||
713 |
24423 |
curp->file = file; |
|
714 |
24423 |
cp = strrchr(file, '.'); |
|
715 |
✓✗ | 73269 |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
716 |
|||
717 |
/* First try to use the filename as it is. */ |
||
718 |
|||
719 |
✓✗ | 24423 |
if ((fd = open(file, O_RDONLY)) != -1) |
720 |
24423 |
return fd; |
|
721 |
|||
722 |
/* |
||
723 |
* If that doesn't work and the filename doesn't |
||
724 |
* already end in .gz, try appending .gz. |
||
725 |
*/ |
||
726 |
|||
727 |
if ( ! curp->gzip) { |
||
728 |
mandoc_asprintf(&cp, "%s.gz", file); |
||
729 |
fd = open(cp, O_RDONLY); |
||
730 |
free(cp); |
||
731 |
if (fd != -1) { |
||
732 |
curp->gzip = 1; |
||
733 |
return fd; |
||
734 |
} |
||
735 |
} |
||
736 |
|||
737 |
/* Neither worked, give up. */ |
||
738 |
|||
739 |
mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); |
||
740 |
return -1; |
||
741 |
24423 |
} |
|
742 |
|||
743 |
struct mparse * |
||
744 |
mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg, |
||
745 |
enum mandoc_os os_e, const char *os_s) |
||
746 |
{ |
||
747 |
struct mparse *curp; |
||
748 |
|||
749 |
25294 |
curp = mandoc_calloc(1, sizeof(struct mparse)); |
|
750 |
|||
751 |
12647 |
curp->options = options; |
|
752 |
12647 |
curp->mmin = mmin; |
|
753 |
12647 |
curp->mmsg = mmsg; |
|
754 |
12647 |
curp->os_s = os_s; |
|
755 |
|||
756 |
12647 |
curp->roff = roff_alloc(curp, options); |
|
757 |
25294 |
curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, |
|
758 |
12647 |
curp->options & MPARSE_QUICK ? 1 : 0); |
|
759 |
✓✓ | 12647 |
if (curp->options & MPARSE_MDOC) { |
760 |
234 |
curp->man->macroset = MACROSET_MDOC; |
|
761 |
✓✗ | 234 |
if (curp->man->mdocmac == NULL) |
762 |
234 |
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); |
|
763 |
✓✓ | 12413 |
} else if (curp->options & MPARSE_MAN) { |
764 |
1908 |
curp->man->macroset = MACROSET_MAN; |
|
765 |
✓✗ | 1908 |
if (curp->man->manmac == NULL) |
766 |
1908 |
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); |
|
767 |
} |
||
768 |
12647 |
curp->man->first->tok = TOKEN_NONE; |
|
769 |
12647 |
curp->man->meta.os_e = os_e; |
|
770 |
12647 |
return curp; |
|
771 |
} |
||
772 |
|||
773 |
void |
||
774 |
mparse_reset(struct mparse *curp) |
||
775 |
{ |
||
776 |
23744 |
roff_reset(curp->roff); |
|
777 |
11872 |
roff_man_reset(curp->man); |
|
778 |
|||
779 |
11872 |
free(curp->sodest); |
|
780 |
11872 |
curp->sodest = NULL; |
|
781 |
|||
782 |
✗✓ | 11872 |
if (curp->secondary) |
783 |
curp->secondary->sz = 0; |
||
784 |
|||
785 |
11872 |
curp->file_status = MANDOCLEVEL_OK; |
|
786 |
11872 |
curp->gzip = 0; |
|
787 |
11872 |
} |
|
788 |
|||
789 |
void |
||
790 |
mparse_free(struct mparse *curp) |
||
791 |
{ |
||
792 |
|||
793 |
25294 |
roffhash_free(curp->man->mdocmac); |
|
794 |
12647 |
roffhash_free(curp->man->manmac); |
|
795 |
12647 |
roff_man_free(curp->man); |
|
796 |
12647 |
roff_free(curp->roff); |
|
797 |
✓✓ | 12647 |
if (curp->secondary) |
798 |
1899 |
free(curp->secondary->buf); |
|
799 |
|||
800 |
12647 |
free(curp->secondary); |
|
801 |
12647 |
free(curp->sodest); |
|
802 |
12647 |
free(curp); |
|
803 |
12647 |
} |
|
804 |
|||
805 |
void |
||
806 |
mparse_result(struct mparse *curp, struct roff_man **man, |
||
807 |
char **sodest) |
||
808 |
{ |
||
809 |
|||
810 |
✓✓✓✓ |
60718 |
if (sodest && NULL != (*sodest = curp->sodest)) { |
811 |
94 |
*man = NULL; |
|
812 |
94 |
return; |
|
813 |
} |
||
814 |
✓✗ | 24329 |
if (man) |
815 |
24329 |
*man = curp->man; |
|
816 |
24423 |
} |
|
817 |
|||
818 |
void |
||
819 |
mparse_updaterc(struct mparse *curp, enum mandoclevel *rc) |
||
820 |
{ |
||
821 |
✓✓ | 25102 |
if (curp->file_status > *rc) |
822 |
945 |
*rc = curp->file_status; |
|
823 |
12551 |
} |
|
824 |
|||
825 |
void |
||
826 |
mandoc_vmsg(enum mandocerr t, struct mparse *m, |
||
827 |
int ln, int pos, const char *fmt, ...) |
||
828 |
{ |
||
829 |
72962 |
char buf[256]; |
|
830 |
36481 |
va_list ap; |
|
831 |
|||
832 |
36481 |
va_start(ap, fmt); |
|
833 |
36481 |
(void)vsnprintf(buf, sizeof(buf), fmt, ap); |
|
834 |
36481 |
va_end(ap); |
|
835 |
|||
836 |
36481 |
mandoc_msg(t, m, ln, pos, buf); |
|
837 |
36481 |
} |
|
838 |
|||
839 |
void |
||
840 |
mandoc_msg(enum mandocerr er, struct mparse *m, |
||
841 |
int ln, int col, const char *msg) |
||
842 |
{ |
||
843 |
enum mandoclevel level; |
||
844 |
|||
845 |
✓✓ | 138812 |
if (er < m->mmin && er != MANDOCERR_FILE) |
846 |
60496 |
return; |
|
847 |
|||
848 |
level = MANDOCLEVEL_UNSUPP; |
||
849 |
✓✓ | 51300 |
while (er < mandoclimits[level]) |
850 |
16740 |
level--; |
|
851 |
|||
852 |
✓✗ | 8910 |
if (m->mmsg) |
853 |
8910 |
(*m->mmsg)(er, level, m->file, ln, col, msg); |
|
854 |
|||
855 |
✓✓ | 8910 |
if (m->file_status < level) |
856 |
2223 |
m->file_status = level; |
|
857 |
78316 |
} |
|
858 |
|||
859 |
const char * |
||
860 |
mparse_strerror(enum mandocerr er) |
||
861 |
{ |
||
862 |
|||
863 |
17838 |
return mandocerrs[er]; |
|
864 |
} |
||
865 |
|||
866 |
const char * |
||
867 |
mparse_strlevel(enum mandoclevel lvl) |
||
868 |
{ |
||
869 |
17838 |
return mandoclevels[lvl]; |
|
870 |
} |
||
871 |
|||
872 |
void |
||
873 |
mparse_keep(struct mparse *p) |
||
874 |
{ |
||
875 |
|||
876 |
✗✓ | 3798 |
assert(NULL == p->secondary); |
877 |
1899 |
p->secondary = mandoc_calloc(1, sizeof(struct buf)); |
|
878 |
1899 |
} |
|
879 |
|||
880 |
const char * |
||
881 |
mparse_getkeep(const struct mparse *p) |
||
882 |
{ |
||
883 |
|||
884 |
assert(p->secondary); |
||
885 |
return p->secondary->sz ? p->secondary->buf : NULL; |
||
886 |
} |
Generated by: GCOVR (Version 3.3) |