GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/mandoc.c Lines: 0 219 0.0 %
Date: 2016-12-06 Branches: 0 199 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: mandoc.c,v 1.66 2015/11/12 22:43:30 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
#include <sys/types.h>
19
20
#include <assert.h>
21
#include <ctype.h>
22
#include <errno.h>
23
#include <limits.h>
24
#include <stdlib.h>
25
#include <stdio.h>
26
#include <string.h>
27
#include <time.h>
28
29
#include "mandoc.h"
30
#include "mandoc_aux.h"
31
#include "libmandoc.h"
32
33
static	int	 a2time(time_t *, const char *, const char *);
34
static	char	*time2a(time_t);
35
36
37
enum mandoc_esc
38
mandoc_escape(const char **end, const char **start, int *sz)
39
{
40
	const char	*local_start;
41
	int		 local_sz;
42
	char		 term;
43
	enum mandoc_esc	 gly;
44
45
	/*
46
	 * When the caller doesn't provide return storage,
47
	 * use local storage.
48
	 */
49
50
	if (NULL == start)
51
		start = &local_start;
52
	if (NULL == sz)
53
		sz = &local_sz;
54
55
	/*
56
	 * Beyond the backslash, at least one input character
57
	 * is part of the escape sequence.  With one exception
58
	 * (see below), that character won't be returned.
59
	 */
60
61
	gly = ESCAPE_ERROR;
62
	*start = ++*end;
63
	*sz = 0;
64
	term = '\0';
65
66
	switch ((*start)[-1]) {
67
	/*
68
	 * First the glyphs.  There are several different forms of
69
	 * these, but each eventually returns a substring of the glyph
70
	 * name.
71
	 */
72
	case '(':
73
		gly = ESCAPE_SPECIAL;
74
		*sz = 2;
75
		break;
76
	case '[':
77
		gly = ESCAPE_SPECIAL;
78
		term = ']';
79
		break;
80
	case 'C':
81
		if ('\'' != **start)
82
			return ESCAPE_ERROR;
83
		*start = ++*end;
84
		gly = ESCAPE_SPECIAL;
85
		term = '\'';
86
		break;
87
88
	/*
89
	 * Escapes taking no arguments at all.
90
	 */
91
	case 'd':
92
	case 'u':
93
	case ',':
94
	case '/':
95
		return ESCAPE_IGNORE;
96
97
	/*
98
	 * The \z escape is supposed to output the following
99
	 * character without advancing the cursor position.
100
	 * Since we are mostly dealing with terminal mode,
101
	 * let us just skip the next character.
102
	 */
103
	case 'z':
104
		return ESCAPE_SKIPCHAR;
105
106
	/*
107
	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
108
	 * 'X' is the trigger.  These have opaque sub-strings.
109
	 */
110
	case 'F':
111
	case 'g':
112
	case 'k':
113
	case 'M':
114
	case 'm':
115
	case 'n':
116
	case 'V':
117
	case 'Y':
118
		gly = ESCAPE_IGNORE;
119
		/* FALLTHROUGH */
120
	case 'f':
121
		if (ESCAPE_ERROR == gly)
122
			gly = ESCAPE_FONT;
123
		switch (**start) {
124
		case '(':
125
			*start = ++*end;
126
			*sz = 2;
127
			break;
128
		case '[':
129
			*start = ++*end;
130
			term = ']';
131
			break;
132
		default:
133
			*sz = 1;
134
			break;
135
		}
136
		break;
137
138
	/*
139
	 * These escapes are of the form \X'Y', where 'X' is the trigger
140
	 * and 'Y' is any string.  These have opaque sub-strings.
141
	 * The \B and \w escapes are handled in roff.c, roff_res().
142
	 */
143
	case 'A':
144
	case 'b':
145
	case 'D':
146
	case 'R':
147
	case 'X':
148
	case 'Z':
149
		gly = ESCAPE_IGNORE;
150
		/* FALLTHROUGH */
151
	case 'o':
152
		if (**start == '\0')
153
			return ESCAPE_ERROR;
154
		if (gly == ESCAPE_ERROR)
155
			gly = ESCAPE_OVERSTRIKE;
156
		term = **start;
157
		*start = ++*end;
158
		break;
159
160
	/*
161
	 * These escapes are of the form \X'N', where 'X' is the trigger
162
	 * and 'N' resolves to a numerical expression.
163
	 */
164
	case 'h':
165
	case 'H':
166
	case 'L':
167
	case 'l':
168
	case 'S':
169
	case 'v':
170
	case 'x':
171
		if (strchr(" %&()*+-./0123456789:<=>", **start)) {
172
			if ('\0' != **start)
173
				++*end;
174
			return ESCAPE_ERROR;
175
		}
176
		gly = ESCAPE_IGNORE;
177
		term = **start;
178
		*start = ++*end;
179
		break;
180
181
	/*
182
	 * Special handling for the numbered character escape.
183
	 * XXX Do any other escapes need similar handling?
184
	 */
185
	case 'N':
186
		if ('\0' == **start)
187
			return ESCAPE_ERROR;
188
		(*end)++;
189
		if (isdigit((unsigned char)**start)) {
190
			*sz = 1;
191
			return ESCAPE_IGNORE;
192
		}
193
		(*start)++;
194
		while (isdigit((unsigned char)**end))
195
			(*end)++;
196
		*sz = *end - *start;
197
		if ('\0' != **end)
198
			(*end)++;
199
		return ESCAPE_NUMBERED;
200
201
	/*
202
	 * Sizes get a special category of their own.
203
	 */
204
	case 's':
205
		gly = ESCAPE_IGNORE;
206
207
		/* See +/- counts as a sign. */
208
		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
209
			*start = ++*end;
210
211
		switch (**end) {
212
		case '(':
213
			*start = ++*end;
214
			*sz = 2;
215
			break;
216
		case '[':
217
			*start = ++*end;
218
			term = ']';
219
			break;
220
		case '\'':
221
			*start = ++*end;
222
			term = '\'';
223
			break;
224
		case '3':
225
		case '2':
226
		case '1':
227
			*sz = (*end)[-1] == 's' &&
228
			    isdigit((unsigned char)(*end)[1]) ? 2 : 1;
229
			break;
230
		default:
231
			*sz = 1;
232
			break;
233
		}
234
235
		break;
236
237
	/*
238
	 * Anything else is assumed to be a glyph.
239
	 * In this case, pass back the character after the backslash.
240
	 */
241
	default:
242
		gly = ESCAPE_SPECIAL;
243
		*start = --*end;
244
		*sz = 1;
245
		break;
246
	}
247
248
	assert(ESCAPE_ERROR != gly);
249
250
	/*
251
	 * Read up to the terminating character,
252
	 * paying attention to nested escapes.
253
	 */
254
255
	if ('\0' != term) {
256
		while (**end != term) {
257
			switch (**end) {
258
			case '\0':
259
				return ESCAPE_ERROR;
260
			case '\\':
261
				(*end)++;
262
				if (ESCAPE_ERROR ==
263
				    mandoc_escape(end, NULL, NULL))
264
					return ESCAPE_ERROR;
265
				break;
266
			default:
267
				(*end)++;
268
				break;
269
			}
270
		}
271
		*sz = (*end)++ - *start;
272
	} else {
273
		assert(*sz > 0);
274
		if ((size_t)*sz > strlen(*start))
275
			return ESCAPE_ERROR;
276
		*end += *sz;
277
	}
278
279
	/* Run post-processors. */
280
281
	switch (gly) {
282
	case ESCAPE_FONT:
283
		if (2 == *sz) {
284
			if ('C' == **start) {
285
				/*
286
				 * Treat constant-width font modes
287
				 * just like regular font modes.
288
				 */
289
				(*start)++;
290
				(*sz)--;
291
			} else {
292
				if ('B' == (*start)[0] && 'I' == (*start)[1])
293
					gly = ESCAPE_FONTBI;
294
				break;
295
			}
296
		} else if (1 != *sz)
297
			break;
298
299
		switch (**start) {
300
		case '3':
301
		case 'B':
302
			gly = ESCAPE_FONTBOLD;
303
			break;
304
		case '2':
305
		case 'I':
306
			gly = ESCAPE_FONTITALIC;
307
			break;
308
		case 'P':
309
			gly = ESCAPE_FONTPREV;
310
			break;
311
		case '1':
312
		case 'R':
313
			gly = ESCAPE_FONTROMAN;
314
			break;
315
		}
316
		break;
317
	case ESCAPE_SPECIAL:
318
		if (1 == *sz && 'c' == **start)
319
			gly = ESCAPE_NOSPACE;
320
		/*
321
		 * Unicode escapes are defined in groff as \[u0000]
322
		 * to \[u10FFFF], where the contained value must be
323
		 * a valid Unicode codepoint.  Here, however, only
324
		 * check the length and range.
325
		 */
326
		if (**start != 'u' || *sz < 5 || *sz > 7)
327
			break;
328
		if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
329
			break;
330
		if (*sz == 6 && (*start)[1] == '0')
331
			break;
332
		if (*sz == 5 && (*start)[1] == 'D' &&
333
		    strchr("89ABCDEF", (*start)[2]) != NULL)
334
			break;
335
		if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
336
		    + 1 == *sz)
337
			gly = ESCAPE_UNICODE;
338
		break;
339
	default:
340
		break;
341
	}
342
343
	return gly;
344
}
345
346
/*
347
 * Parse a quoted or unquoted roff-style request or macro argument.
348
 * Return a pointer to the parsed argument, which is either the original
349
 * pointer or advanced by one byte in case the argument is quoted.
350
 * NUL-terminate the argument in place.
351
 * Collapse pairs of quotes inside quoted arguments.
352
 * Advance the argument pointer to the next argument,
353
 * or to the NUL byte terminating the argument line.
354
 */
355
char *
356
mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
357
{
358
	char	 *start, *cp;
359
	int	  quoted, pairs, white;
360
361
	/* Quoting can only start with a new word. */
362
	start = *cpp;
363
	quoted = 0;
364
	if ('"' == *start) {
365
		quoted = 1;
366
		start++;
367
	}
368
369
	pairs = 0;
370
	white = 0;
371
	for (cp = start; '\0' != *cp; cp++) {
372
373
		/*
374
		 * Move the following text left
375
		 * after quoted quotes and after "\\" and "\t".
376
		 */
377
		if (pairs)
378
			cp[-pairs] = cp[0];
379
380
		if ('\\' == cp[0]) {
381
			/*
382
			 * In copy mode, translate double to single
383
			 * backslashes and backslash-t to literal tabs.
384
			 */
385
			switch (cp[1]) {
386
			case 't':
387
				cp[0] = '\t';
388
				/* FALLTHROUGH */
389
			case '\\':
390
				pairs++;
391
				cp++;
392
				break;
393
			case ' ':
394
				/* Skip escaped blanks. */
395
				if (0 == quoted)
396
					cp++;
397
				break;
398
			default:
399
				break;
400
			}
401
		} else if (0 == quoted) {
402
			if (' ' == cp[0]) {
403
				/* Unescaped blanks end unquoted args. */
404
				white = 1;
405
				break;
406
			}
407
		} else if ('"' == cp[0]) {
408
			if ('"' == cp[1]) {
409
				/* Quoted quotes collapse. */
410
				pairs++;
411
				cp++;
412
			} else {
413
				/* Unquoted quotes end quoted args. */
414
				quoted = 2;
415
				break;
416
			}
417
		}
418
	}
419
420
	/* Quoted argument without a closing quote. */
421
	if (1 == quoted)
422
		mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL);
423
424
	/* NUL-terminate this argument and move to the next one. */
425
	if (pairs)
426
		cp[-pairs] = '\0';
427
	if ('\0' != *cp) {
428
		*cp++ = '\0';
429
		while (' ' == *cp)
430
			cp++;
431
	}
432
	*pos += (int)(cp - start) + (quoted ? 1 : 0);
433
	*cpp = cp;
434
435
	if ('\0' == *cp && (white || ' ' == cp[-1]))
436
		mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL);
437
438
	return start;
439
}
440
441
static int
442
a2time(time_t *t, const char *fmt, const char *p)
443
{
444
	struct tm	 tm;
445
	char		*pp;
446
447
	memset(&tm, 0, sizeof(struct tm));
448
449
	pp = strptime(p, fmt, &tm);
450
	if (NULL != pp && '\0' == *pp) {
451
		*t = mktime(&tm);
452
		return 1;
453
	}
454
455
	return 0;
456
}
457
458
static char *
459
time2a(time_t t)
460
{
461
	struct tm	*tm;
462
	char		*buf, *p;
463
	size_t		 ssz;
464
	int		 isz;
465
466
	tm = localtime(&t);
467
	if (tm == NULL)
468
		return NULL;
469
470
	/*
471
	 * Reserve space:
472
	 * up to 9 characters for the month (September) + blank
473
	 * up to 2 characters for the day + comma + blank
474
	 * 4 characters for the year and a terminating '\0'
475
	 */
476
477
	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
478
479
	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
480
		goto fail;
481
	p += (int)ssz;
482
483
	/*
484
	 * The output format is just "%d" here, not "%2d" or "%02d".
485
	 * That's also the reason why we can't just format the
486
	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
487
	 * Besides, the present approach is less prone to buffer
488
	 * overflows, in case anybody should ever introduce the bug
489
	 * of looking at LC_TIME.
490
	 */
491
492
	if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
493
		goto fail;
494
	p += isz;
495
496
	if (strftime(p, 4 + 1, "%Y", tm) == 0)
497
		goto fail;
498
	return buf;
499
500
fail:
501
	free(buf);
502
	return NULL;
503
}
504
505
char *
506
mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
507
{
508
	time_t		 t;
509
510
	/* No date specified: use today's date. */
511
512
	if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) {
513
		mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL);
514
		return time2a(time(NULL));
515
	}
516
517
	/* Valid mdoc(7) date format. */
518
519
	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) ||
520
	    a2time(&t, "%b %d, %Y", in))
521
		return time2a(t);
522
523
	/* Do not warn about the legacy man(7) format. */
524
525
	if ( ! a2time(&t, "%Y-%m-%d", in))
526
		mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in);
527
528
	/* Use any non-mdoc(7) date verbatim. */
529
530
	return mandoc_strdup(in);
531
}
532
533
int
534
mandoc_eos(const char *p, size_t sz)
535
{
536
	const char	*q;
537
	int		 enclosed, found;
538
539
	if (0 == sz)
540
		return 0;
541
542
	/*
543
	 * End-of-sentence recognition must include situations where
544
	 * some symbols, such as `)', allow prior EOS punctuation to
545
	 * propagate outward.
546
	 */
547
548
	enclosed = found = 0;
549
	for (q = p + (int)sz - 1; q >= p; q--) {
550
		switch (*q) {
551
		case '\"':
552
		case '\'':
553
		case ']':
554
		case ')':
555
			if (0 == found)
556
				enclosed = 1;
557
			break;
558
		case '.':
559
		case '!':
560
		case '?':
561
			found = 1;
562
			break;
563
		default:
564
			return found &&
565
			    (!enclosed || isalnum((unsigned char)*q));
566
		}
567
	}
568
569
	return found && !enclosed;
570
}
571
572
/*
573
 * Convert a string to a long that may not be <0.
574
 * If the string is invalid, or is less than 0, return -1.
575
 */
576
int
577
mandoc_strntoi(const char *p, size_t sz, int base)
578
{
579
	char		 buf[32];
580
	char		*ep;
581
	long		 v;
582
583
	if (sz > 31)
584
		return -1;
585
586
	memcpy(buf, p, sz);
587
	buf[(int)sz] = '\0';
588
589
	errno = 0;
590
	v = strtol(buf, &ep, base);
591
592
	if (buf[0] == '\0' || *ep != '\0')
593
		return -1;
594
595
	if (v > INT_MAX)
596
		v = INT_MAX;
597
	if (v < INT_MIN)
598
		v = INT_MIN;
599
600
	return (int)v;
601
}