Head

GCC Code Coverage Report

Directory:	./		Exec	Total	Coverage
File:	usr.bin/mandoc/mandoc.c	Lines:	209	223	93.7 %
Date:	2017-11-13	Branches:	182	240	75.8 %


/*	$OpenBSD: mandoc.c,v 1.71 2017/07/03 13:40:00 schwarze Exp $ */
/*
 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/types.h>

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>

#include "mandoc_aux.h"
#include "mandoc.h"
#include "roff.h"
#include "libmandoc.h"

static	int	 a2time(time_t *, const char *, const char *);
static	char	*time2a(time_t);


enum mandoc_esc
mandoc_escape(const char **end, const char **start, int *sz)
{
	const char	*local_start;
	int		 local_sz;
	char		 term;
	enum mandoc_esc	 gly;

	/*
	 * When the caller doesn't provide return storage,
	 * use local storage.
	 */

	if (NULL == start)
		start = &local_start;
	if (NULL == sz)
		sz = &local_sz;

	/*
	 * Beyond the backslash, at least one input character
	 * is part of the escape sequence.  With one exception
	 * (see below), that character won't be returned.
	 */

	gly = ESCAPE_ERROR;
	*start = ++*end;
	*sz = 0;
	term = '\0';

	switch ((*start)[-1]) {
	/*
	 * First the glyphs.  There are several different forms of
	 * these, but each eventually returns a substring of the glyph
	 * name.
	 */
	case '(':
		gly = ESCAPE_SPECIAL;
		*sz = 2;
		break;
	case '[':
		gly = ESCAPE_SPECIAL;
		term = ']';
		break;
	case 'C':
		if ('\'' != **start)
			return ESCAPE_ERROR;
		*start = ++*end;
		gly = ESCAPE_SPECIAL;
		term = '\'';
		break;

	/*
	 * Escapes taking no arguments at all.
	 */
	case 'd':
	case 'u':
	case ',':
	case '/':
		return ESCAPE_IGNORE;
	case 'p':
		return ESCAPE_BREAK;

	/*
	 * The \z escape is supposed to output the following
	 * character without advancing the cursor position.
	 * Since we are mostly dealing with terminal mode,
	 * let us just skip the next character.
	 */
	case 'z':
		return ESCAPE_SKIPCHAR;

	/*
	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
	 * 'X' is the trigger.  These have opaque sub-strings.
	 */
	case 'F':
	case 'g':
	case 'k':
	case 'M':
	case 'm':
	case 'n':
	case 'V':
	case 'Y':
		gly = ESCAPE_IGNORE;
		/* FALLTHROUGH */
	case 'f':
		if (ESCAPE_ERROR == gly)
			gly = ESCAPE_FONT;
		switch (**start) {
		case '(':
			*start = ++*end;
			*sz = 2;
			break;
		case '[':
			*start = ++*end;
			term = ']';
			break;
		default:
			*sz = 1;
			break;
		}
		break;

	/*
	 * These escapes are of the form \X'Y', where 'X' is the trigger
	 * and 'Y' is any string.  These have opaque sub-strings.
	 * The \B and \w escapes are handled in roff.c, roff_res().
	 */
	case 'A':
	case 'b':
	case 'D':
	case 'R':
	case 'X':
	case 'Z':
		gly = ESCAPE_IGNORE;
		/* FALLTHROUGH */
	case 'o':
		if (**start == '\0')
			return ESCAPE_ERROR;
		if (gly == ESCAPE_ERROR)
			gly = ESCAPE_OVERSTRIKE;
		term = **start;
		*start = ++*end;
		break;

	/*
	 * These escapes are of the form \X'N', where 'X' is the trigger
	 * and 'N' resolves to a numerical expression.
	 */
	case 'h':
	case 'H':
	case 'L':
	case 'l':
	case 'S':
	case 'v':
	case 'x':
		if (strchr(" %&()*+-./0123456789:<=>", **start)) {
			if ('\0' != **start)
				++*end;
			return ESCAPE_ERROR;
		}
		switch ((*start)[-1]) {
		case 'h':
			gly = ESCAPE_HORIZ;
			break;
		case 'l':
			gly = ESCAPE_HLINE;
			break;
		default:
			gly = ESCAPE_IGNORE;
			break;
		}
		term = **start;
		*start = ++*end;
		break;

	/*
	 * Special handling for the numbered character escape.
	 * XXX Do any other escapes need similar handling?
	 */
	case 'N':
		if ('\0' == **start)
			return ESCAPE_ERROR;
		(*end)++;
		if (isdigit((unsigned char)**start)) {
			*sz = 1;
			return ESCAPE_IGNORE;
		}
		(*start)++;
		while (isdigit((unsigned char)**end))
			(*end)++;
		*sz = *end - *start;
		if ('\0' != **end)
			(*end)++;
		return ESCAPE_NUMBERED;

	/*
	 * Sizes get a special category of their own.
	 */
	case 's':
		gly = ESCAPE_IGNORE;

		/* See +/- counts as a sign. */
		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
			*start = ++*end;

		switch (**end) {
		case '(':
			*start = ++*end;
			*sz = 2;
			break;
		case '[':
			*start = ++*end;
			term = ']';
			break;
		case '\'':
			*start = ++*end;
			term = '\'';
			break;
		case '3':
		case '2':
		case '1':
			*sz = (*end)[-1] == 's' &&
			    isdigit((unsigned char)(*end)[1]) ? 2 : 1;
			break;
		default:
			*sz = 1;
			break;
		}

		break;

	/*
	 * Anything else is assumed to be a glyph.
	 * In this case, pass back the character after the backslash.
	 */
	default:
		gly = ESCAPE_SPECIAL;
		*start = --*end;
		*sz = 1;
		break;
	}

	assert(ESCAPE_ERROR != gly);

	/*
	 * Read up to the terminating character,
	 * paying attention to nested escapes.
	 */

	if ('\0' != term) {
		while (**end != term) {
			switch (**end) {
			case '\0':
				return ESCAPE_ERROR;
			case '\\':
				(*end)++;
				if (ESCAPE_ERROR ==
				    mandoc_escape(end, NULL, NULL))
					return ESCAPE_ERROR;
				break;
			default:
				(*end)++;
				break;
			}
		}
		*sz = (*end)++ - *start;
	} else {
		assert(*sz > 0);
		if ((size_t)*sz > strlen(*start))
			return ESCAPE_ERROR;
		*end += *sz;
	}

	/* Run post-processors. */

	switch (gly) {
	case ESCAPE_FONT:
		if (2 == *sz) {
			if ('C' == **start) {
				/*
				 * Treat constant-width font modes
				 * just like regular font modes.
				 */
				(*start)++;
				(*sz)--;
			} else {
				if ('B' == (*start)[0] && 'I' == (*start)[1])
					gly = ESCAPE_FONTBI;
				break;
			}
		} else if (1 != *sz)
			break;

		switch (**start) {
		case '3':
		case 'B':
			gly = ESCAPE_FONTBOLD;
			break;
		case '2':
		case 'I':
			gly = ESCAPE_FONTITALIC;
			break;
		case 'P':
			gly = ESCAPE_FONTPREV;
			break;
		case '1':
		case 'R':
			gly = ESCAPE_FONTROMAN;
			break;
		}
		break;
	case ESCAPE_SPECIAL:
		if (1 == *sz && 'c' == **start)
			gly = ESCAPE_NOSPACE;
		/*
		 * Unicode escapes are defined in groff as \[u0000]
		 * to \[u10FFFF], where the contained value must be
		 * a valid Unicode codepoint.  Here, however, only
		 * check the length and range.
		 */
		if (**start != 'u' || *sz < 5 || *sz > 7)
			break;
		if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
			break;
		if (*sz == 6 && (*start)[1] == '0')
			break;
		if (*sz == 5 && (*start)[1] == 'D' &&
		    strchr("89ABCDEF", (*start)[2]) != NULL)
			break;
		if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
		    + 1 == *sz)
			gly = ESCAPE_UNICODE;
		break;
	default:
		break;
	}

	return gly;
}

/*
 * Parse a quoted or unquoted roff-style request or macro argument.
 * Return a pointer to the parsed argument, which is either the original
 * pointer or advanced by one byte in case the argument is quoted.
 * NUL-terminate the argument in place.
 * Collapse pairs of quotes inside quoted arguments.
 * Advance the argument pointer to the next argument,
 * or to the NUL byte terminating the argument line.
 */
char *
mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
{
	char	 *start, *cp;
	int	  quoted, pairs, white;

	/* Quoting can only start with a new word. */
	start = *cpp;
	quoted = 0;
	if ('"' == *start) {
		quoted = 1;
		start++;
	}

	pairs = 0;
	white = 0;
	for (cp = start; '\0' != *cp; cp++) {

		/*
		 * Move the following text left
		 * after quoted quotes and after "\\" and "\t".
		 */
		if (pairs)
			cp[-pairs] = cp[0];

		if ('\\' == cp[0]) {
			/*
			 * In copy mode, translate double to single
			 * backslashes and backslash-t to literal tabs.
			 */
			switch (cp[1]) {
			case 't':
				cp[0] = '\t';
				/* FALLTHROUGH */
			case '\\':
				pairs++;
				cp++;
				break;
			case ' ':
				/* Skip escaped blanks. */
				if (0 == quoted)
					cp++;
				break;
			default:
				break;
			}
		} else if (0 == quoted) {
			if (' ' == cp[0]) {
				/* Unescaped blanks end unquoted args. */
				white = 1;
				break;
			}
		} else if ('"' == cp[0]) {
			if ('"' == cp[1]) {
				/* Quoted quotes collapse. */
				pairs++;
				cp++;
			} else {
				/* Unquoted quotes end quoted args. */
				quoted = 2;
				break;
			}
		}
	}

	/* Quoted argument without a closing quote. */
	if (1 == quoted)
		mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL);

	/* NUL-terminate this argument and move to the next one. */
	if (pairs)
		cp[-pairs] = '\0';
	if ('\0' != *cp) {
		*cp++ = '\0';
		while (' ' == *cp)
			cp++;
	}
	*pos += (int)(cp - start) + (quoted ? 1 : 0);
	*cpp = cp;

	if ('\0' == *cp && (white || ' ' == cp[-1]))
		mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL);

	return start;
}

static int
a2time(time_t *t, const char *fmt, const char *p)
{
	struct tm	 tm;
	char		*pp;

	memset(&tm, 0, sizeof(struct tm));

	pp = strptime(p, fmt, &tm);
	if (NULL != pp && '\0' == *pp) {
		*t = mktime(&tm);
		return 1;
	}

	return 0;
}

static char *
time2a(time_t t)
{
	struct tm	*tm;
	char		*buf, *p;
	size_t		 ssz;
	int		 isz;

	tm = localtime(&t);
	if (tm == NULL)
		return NULL;

	/*
	 * Reserve space:
	 * up to 9 characters for the month (September) + blank
	 * up to 2 characters for the day + comma + blank
	 * 4 characters for the year and a terminating '\0'
	 */

	p = buf = mandoc_malloc(10 + 4 + 4 + 1);

	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
		goto fail;
	p += (int)ssz;

	/*
	 * The output format is just "%d" here, not "%2d" or "%02d".
	 * That's also the reason why we can't just format the
	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
	 * Besides, the present approach is less prone to buffer
	 * overflows, in case anybody should ever introduce the bug
	 * of looking at LC_TIME.
	 */

	if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
		goto fail;
	p += isz;

	if (strftime(p, 4 + 1, "%Y", tm) == 0)
		goto fail;
	return buf;

fail:
	free(buf);
	return NULL;
}

char *
mandoc_normdate(struct roff_man *man, char *in, int ln, int pos)
{
	char		*cp;
	time_t		 t;

	/* No date specified: use today's date. */

	if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) {
		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, ln, pos, NULL);
		return time2a(time(NULL));
	}

	/* Valid mdoc(7) date format. */

	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) ||
	    a2time(&t, "%b %d, %Y", in)) {
		cp = time2a(t);
		if (t > time(NULL) + 86400)
			mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse,
			    ln, pos, cp);
		return cp;
	}

	/* In man(7), do not warn about the legacy format. */

	if (a2time(&t, "%Y-%m-%d", in) == 0)
		mandoc_msg(MANDOCERR_DATE_BAD, man->parse, ln, pos, in);
	else if (t > time(NULL) + 86400)
		mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, ln, pos, in);
	else if (man->macroset == MACROSET_MDOC)
		mandoc_vmsg(MANDOCERR_DATE_LEGACY, man->parse,
		    ln, pos, "Dd %s", in);

	/* Use any non-mdoc(7) date verbatim. */

	return mandoc_strdup(in);
}

int
mandoc_eos(const char *p, size_t sz)
{
	const char	*q;
	int		 enclosed, found;

	if (0 == sz)
		return 0;

	/*
	 * End-of-sentence recognition must include situations where
	 * some symbols, such as `)', allow prior EOS punctuation to
	 * propagate outward.
	 */

	enclosed = found = 0;
	for (q = p + (int)sz - 1; q >= p; q--) {
		switch (*q) {
		case '\"':
		case '\'':
		case ']':
		case ')':
			if (0 == found)
				enclosed = 1;
			break;
		case '.':
		case '!':
		case '?':
			found = 1;
			break;
		default:
			return found &&
			    (!enclosed || isalnum((unsigned char)*q));
		}
	}

	return found && !enclosed;
}

/*
 * Convert a string to a long that may not be <0.
 * If the string is invalid, or is less than 0, return -1.
 */
int
mandoc_strntoi(const char *p, size_t sz, int base)
{
	char		 buf[32];
	char		*ep;
	long		 v;

	if (sz > 31)
		return -1;

	memcpy(buf, p, sz);
	buf[(int)sz] = '\0';

	errno = 0;
	v = strtol(buf, &ep, base);

	if (buf[0] == '\0' || *ep != '\0')
		return -1;

	if (v > INT_MAX)
		v = INT_MAX;
	if (v < INT_MIN)
		v = INT_MIN;

	return (int)v;
}


Generated by: GCOVR (Version 3.3)

Line	Branch	Exec	Source
1			/* $OpenBSD: mandoc.c,v 1.71 2017/07/03 13:40:00 schwarze Exp $ */
2			/*
3			* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4			* Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5			*
6			* Permission to use, copy, modify, and distribute this software for any
7			* purpose with or without fee is hereby granted, provided that the above
8			* copyright notice and this permission notice appear in all copies.
9			*
10			* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11			* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12			* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13			* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14			* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15			* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16			* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17			*/
18			#include <sys/types.h>
19
20			#include <assert.h>
21			#include <ctype.h>
22			#include <errno.h>
23			#include <limits.h>
24			#include <stdlib.h>
25			#include <stdio.h>
26			#include <string.h>
27			#include <time.h>
28
29			#include "mandoc_aux.h"
30			#include "mandoc.h"
31			#include "roff.h"
32			#include "libmandoc.h"
33
34			static int a2time(time_t , const char , const char *);
35			static char *time2a(time_t);
36
37
38			enum mandoc_esc
39			mandoc_escape(const char end, const char start, int *sz)
40			{
41		2652546	const char *local_start;
42		1326273	int local_sz;
43			char term;
44			enum mandoc_esc gly;
45
46			/*
47			* When the caller doesn't provide return storage,
48			* use local storage.
49			*/
50
51	✓✓	1326273	if (NULL == start)
52		752111	start = &local_start;
53	✓✓	1326273	if (NULL == sz)
54		752111	sz = &local_sz;
55
56			/*
57			* Beyond the backslash, at least one input character
58			* is part of the escape sequence. With one exception
59			* (see below), that character won't be returned.
60			*/
61
62			gly = ESCAPE_ERROR;
63		1326369	start = ++end;
64		1326369	*sz = 0;
65			term = '\0';
66
67	✓✓✓✗ ✗✗✓✓ ✓✗✗✗ ✗✗✗✗ ✓✓✗✗ ✗✗✗✓ ✓✗✗✗ ✗✗✗✓ ✓✓✓	1326369	switch ((*start)[-1]) {
68			/*
69			* First the glyphs. There are several different forms of
70			* these, but each eventually returns a substring of the glyph
71			* name.
72			*/
73			case '(':
74			gly = ESCAPE_SPECIAL;
75		86390	*sz = 2;
76		86390	break;
77			case '[':
78			gly = ESCAPE_SPECIAL;
79			term = ']';
80		14718	break;
81			case 'C':
82	✗✓	162	if ('\'' != **start)
83			return ESCAPE_ERROR;
84		162	start = ++end;
85			gly = ESCAPE_SPECIAL;
86			term = '\'';
87		162	break;
88
89			/*
90			* Escapes taking no arguments at all.
91			*/
92			case 'd':
93			case 'u':
94			case ',':
95			case '/':
96		60	return ESCAPE_IGNORE;
97			case 'p':
98		36	return ESCAPE_BREAK;
99
100			/*
101			* The \z escape is supposed to output the following
102			* character without advancing the cursor position.
103			* Since we are mostly dealing with terminal mode,
104			* let us just skip the next character.
105			*/
106			case 'z':
107		96	return ESCAPE_SKIPCHAR;
108
109			/*
110			* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
111			* 'X' is the trigger. These have opaque sub-strings.
112			*/
113			case 'F':
114			case 'g':
115			case 'k':
116			case 'M':
117			case 'm':
118			case 'n':
119			case 'V':
120			case 'Y':
121		45	gly = ESCAPE_IGNORE;
122			/* FALLTHROUGH */
123			case 'f':
124	✓✓	597757	if (ESCAPE_ERROR == gly)
125		597712	gly = ESCAPE_FONT;
126	✓✓✓	597757	switch (**start) {
127			case '(':
128		168875	start = ++end;
129		168875	*sz = 2;
130		168875	break;
131			case '[':
132		60	start = ++end;
133			term = ']';
134		60	break;
135			default:
136		428822	*sz = 1;
137		428822	break;
138			}
139			break;
140
141			/*
142			* These escapes are of the form \X'Y', where 'X' is the trigger
143			* and 'Y' is any string. These have opaque sub-strings.
144			* The \B and \w escapes are handled in roff.c, roff_res().
145			*/
146			case 'A':
147			case 'b':
148			case 'D':
149			case 'R':
150			case 'X':
151			case 'Z':
152		51	gly = ESCAPE_IGNORE;
153			/* FALLTHROUGH */
154			case 'o':
155	✗✓	120	if (**start == '\0')
156			return ESCAPE_ERROR;
157	✓✓	120	if (gly == ESCAPE_ERROR)
158		69	gly = ESCAPE_OVERSTRIKE;
159		120	term = **start;
160		120	start = ++end;
161		120	break;
162
163			/*
164			* These escapes are of the form \X'N', where 'X' is the trigger
165			* and 'N' resolves to a numerical expression.
166			*/
167			case 'h':
168			case 'H':
169			case 'L':
170			case 'l':
171			case 'S':
172			case 'v':
173			case 'x':
174	✓✓	14902	if (strchr(" %&()+-./0123456789:<=>", *start)) {
175	✓✗	30	if ('\0' != **start)
176		30	++*end;
177		30	return ESCAPE_ERROR;
178			}
179	✓✓✓	14872	switch ((*start)[-1]) {
180			case 'h':
181			gly = ESCAPE_HORIZ;
182		11294	break;
183			case 'l':
184			gly = ESCAPE_HLINE;
185		105	break;
186			default:
187			gly = ESCAPE_IGNORE;
188		3473	break;
189			}
190		14872	term = **start;
191		14872	start = ++end;
192		14872	break;
193
194			/*
195			* Special handling for the numbered character escape.
196			* XXX Do any other escapes need similar handling?
197			*/
198			case 'N':
199	✗✓	1293	if ('\0' == **start)
200			return ESCAPE_ERROR;
201		1293	(*end)++;
202	✓✓	1293	if (isdigit((unsigned char)**start)) {
203		9	*sz = 1;
204		9	return ESCAPE_IGNORE;
205			}
206			(*start)++;
207	✓✓	8082	while (isdigit((unsigned char)**end))
208			(*end)++;
209		1284	sz = end - *start;
210	✓✗	1284	if ('\0' != **end)
211		1284	(*end)++;
212		1284	return ESCAPE_NUMBERED;
213
214			/*
215			* Sizes get a special category of their own.
216			*/
217			case 's':
218			gly = ESCAPE_IGNORE;
219
220			/* See +/- counts as a sign. */
221	✓✓✓✓ ✗✓	385158	if ('+' == end \|\| '-' == end \|\| ASCII_HYPH == **end)
222		77143	start = ++end;
223
224	✓✓✓✗ ✗✓✓	154115	switch (**end) {
225			case '(':
226		30	start = ++end;
227		30	*sz = 2;
228		30	break;
229			case '[':
230		30	start = ++end;
231			term = ']';
232		30	break;
233			case '\'':
234		60	start = ++end;
235			term = '\'';
236		60	break;
237			case '3':
238			case '2':
239			case '1':
240	✗✓	154106	sz = (end)[-1] == 's' &&
241			isdigit((unsigned char)(*end)[1]) ? 2 : 1;
242		77053	break;
243			default:
244		76942	*sz = 1;
245		76942	break;
246			}
247
248			break;
249
250			/*
251			* Anything else is assumed to be a glyph.
252			* In this case, pass back the character after the backslash.
253			*/
254			default:
255			gly = ESCAPE_SPECIAL;
256		456624	start = --end;
257		456624	*sz = 1;
258		456624	break;
259			}
260
261	✗✓	1324758	assert(ESCAPE_ERROR != gly);
262
263			/*
264			* Read up to the terminating character,
265			* paying attention to nested escapes.
266			*/
267
268	✓✓	1324758	if ('\0' != term) {
269	✓✓	322862	while (**end != term) {
270	✗✓✓	131409	switch (**end) {
271			case '\0':
272			return ESCAPE_ERROR;
273			case '\\':
274		126	(*end)++;
275	✗✓	126	if (ESCAPE_ERROR ==
276		126	mandoc_escape(end, NULL, NULL))
277			return ESCAPE_ERROR;
278			break;
279			default:
280		131283	(*end)++;
281		131283	break;
282			}
283			}
284		30022	sz = (end)++ - *start;
285		30022	} else {
286	✗✓	1294736	assert(*sz > 0);
287	✓✓	1294736	if ((size_t)sz > strlen(start))
288		16	return ESCAPE_ERROR;
289		1294720	end += sz;
290			}
291
292			/* Run post-processors. */
293
294	✓✓✓	1883701	switch (gly) {
295			case ESCAPE_FONT:
296	✓✓	597711	if (2 == *sz) {
297	✓✓	168869	if ('C' == **start) {
298			/*
299			* Treat constant-width font modes
300			* just like regular font modes.
301			*/
302		167804	(*start)++;
303		167804	(*sz)--;
304			} else {
305	✓✗✓✗	2130	if ('B' == (start)[0] && 'I' == (start)[1])
306		1065	gly = ESCAPE_FONTBI;
307			break;
308			}
309	✓✗	596646	} else if (1 != *sz)
310			break;
311
312	✗✓✗✓ ✓✗✓✓	1029642	switch (**start) {
313			case '3':
314			case 'B':
315			gly = ESCAPE_FONTBOLD;
316		55623	break;
317			case '2':
318			case 'I':
319			gly = ESCAPE_FONTITALIC;
320		62372	break;
321			case 'P':
322			gly = ESCAPE_FONTPREV;
323		7186	break;
324			case '1':
325			case 'R':
326			gly = ESCAPE_FONTROMAN;
327		307815	break;
328			}
329			break;
330			case ESCAPE_SPECIAL:
331	✓✓✓✓	1014518	if (1 == sz && 'c' == *start)
332		228	gly = ESCAPE_NOSPACE;
333			/*
334			* Unicode escapes are defined in groff as \[u0000]
335			* to \[u10FFFF], where the contained value must be
336			* a valid Unicode codepoint. Here, however, only
337			* check the length and range.
338			*/
339	✓✓✓✓ ✓✓	584704	if (*start != 'u' \|\| sz < 5 \|\| *sz > 7)
340			break;
341	✓✓✓✓ ✓✓	14130	if (sz == 7 && ((start)[1] != '1' \|\| (*start)[2] != '0'))
342			break;
343	✓✓✓✓	13349	if (sz == 6 && (start)[1] == '0')
344			break;
345	✓✓✓✓ ✓✓	25019	if (sz == 5 && (start)[1] == 'D' &&
346		210	strchr("89ABCDEF", (*start)[2]) != NULL)
347			break;
348	✓✓	25428	if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
349		12714	+ 1 == *sz)
350		12675	gly = ESCAPE_UNICODE;
351			break;
352			default:
353			break;
354			}
355
356		1324742	return gly;
357		1326273	}
358
359			/*
360			* Parse a quoted or unquoted roff-style request or macro argument.
361			* Return a pointer to the parsed argument, which is either the original
362			* pointer or advanced by one byte in case the argument is quoted.
363			* NUL-terminate the argument in place.
364			* Collapse pairs of quotes inside quoted arguments.
365			* Advance the argument pointer to the next argument,
366			* or to the NUL byte terminating the argument line.
367			*/
368			char *
369			mandoc_getarg(struct mparse parse, char cpp, int ln, int pos)
370			{
371			char start, cp;
372			int quoted, pairs, white;
373
374			/* Quoting can only start with a new word. */
375		1364256	start = *cpp;
376			quoted = 0;
377	✓✓	682128	if ('"' == *start) {
378			quoted = 1;
379		94215	start++;
380		94215	}
381
382			pairs = 0;
383			white = 0;
384	✓✓	10081468	for (cp = start; '\0' != *cp; cp++) {
385
386			/*
387			* Move the following text left
388			* after quoted quotes and after "\\" and "\t".
389			*/
390	✓✓	4757608	if (pairs)
391		85599	cp[-pairs] = cp[0];
392
393	✓✓	4757608	if ('\\' == cp[0]) {
394			/*
395			* In copy mode, translate double to single
396			* backslashes and backslash-t to literal tabs.
397			*/
398	✓✓✓✓	3935581	switch (cp[1]) {
399			case 't':
400		12	cp[0] = '\t';
401			/* FALLTHROUGH */
402			case '\\':
403		676	pairs++;
404		676	cp++;
405		676	break;
406			case ' ':
407			/* Skip escaped blanks. */
408	✓✓	263	if (0 == quoted)
409		226	cp++;
410			break;
411			default:
412			break;
413			}
414	✓✓	4704023	} else if (0 == quoted) {
415	✓✓	2975797	if (' ' == cp[0]) {
416			/* Unescaped blanks end unquoted args. */
417			white = 1;
418		304984	break;
419			}
420	✓✓	1728226	} else if ('"' == cp[0]) {
421	✓✓	102956	if ('"' == cp[1]) {
422			/* Quoted quotes collapse. */
423		8938	pairs++;
424		8938	cp++;
425			} else {
426			/* Unquoted quotes end quoted args. */
427			quoted = 2;
428		94018	break;
429			}
430		8938	}
431			}
432
433			/* Quoted argument without a closing quote. */
434	✓✓	682128	if (1 == quoted)
435		197	mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL);
436
437			/* NUL-terminate this argument and move to the next one. */
438	✓✓	682128	if (pairs)
439		4493	cp[-pairs] = '\0';
440	✓✓	682128	if ('\0' != *cp) {
441		399002	*cp++ = '\0';
442	✓✓	890236	while (' ' == *cp)
443		46116	cp++;
444			}
445		682128	*pos += (int)(cp - start) + (quoted ? 1 : 0);
446		682128	*cpp = cp;
447
448	✓✓✓✓ ✓✓	1359016	if ('\0' == *cp && (white \|\| ' ' == cp[-1]))
449		215	mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL);
450
451		682128	return start;
452			}
453
454			static int
455			a2time(time_t t, const char fmt, const char *p)
456			{
457		22000	struct tm tm;
458			char *pp;
459
460		11000	memset(&tm, 0, sizeof(struct tm));
461
462		11000	pp = strptime(p, fmt, &tm);
463	✓✓✓✗	18831	if (NULL != pp && '\0' == *pp) {
464		7831	*t = mktime(&tm);
465		7831	return 1;
466			}
467
468		3169	return 0;
469		11000	}
470
471			static char *
472			time2a(time_t t)
473			{
474			struct tm *tm;
475			char buf, p;
476			size_t ssz;
477			int isz;
478
479		7062	tm = localtime(&t);
480	✗✓	7062	if (tm == NULL)
481			return NULL;
482
483			/*
484			* Reserve space:
485			* up to 9 characters for the month (September) + blank
486			* up to 2 characters for the day + comma + blank
487			* 4 characters for the year and a terminating '\0'
488			*/
489
490		7062	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
491
492	✓✗	7062	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
493			goto fail;
494		7062	p += (int)ssz;
495
496			/*
497			* The output format is just "%d" here, not "%2d" or "%02d".
498			* That's also the reason why we can't just format the
499			* date as a whole with "%B %e, %Y" or "%B %d, %Y".
500			* Besides, the present approach is less prone to buffer
501			* overflows, in case anybody should ever introduce the bug
502			* of looking at LC_TIME.
503			*/
504
505	✓✗	7062	if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
506			goto fail;
507		7062	p += isz;
508
509	✓✗	7062	if (strftime(p, 4 + 1, "%Y", tm) == 0)
510			goto fail;
511		7062	return buf;
512
513			fail:
514			free(buf);
515			return NULL;
516		7062	}
517
518			char *
519			mandoc_normdate(struct roff_man man, char in, int ln, int pos)
520			{
521			char *cp;
522		15806	time_t t;
523
524			/* No date specified: use today's date. */
525
526	✓✓✓✗ ✗✓	23697	if (in == NULL \|\| *in == '\0' \|\| strcmp(in, "$" "Mdocdate$") == 0) {
527		6	mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, ln, pos, NULL);
528		6	return time2a(time(NULL));
529			}
530
531			/* Valid mdoc(7) date format. */
532
533	✓✓✓✓	10159	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) \|\|
534		2262	a2time(&t, "%b %d, %Y", in)) {
535		7056	cp = time2a(t);
536	✗✓	7056	if (t > time(NULL) + 86400)
537			mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse,
538			ln, pos, cp);
539		7056	return cp;
540			}
541
542			/* In man(7), do not warn about the legacy format. */
543
544	✓✓	841	if (a2time(&t, "%Y-%m-%d", in) == 0)
545		66	mandoc_msg(MANDOCERR_DATE_BAD, man->parse, ln, pos, in);
546	✗✓	775	else if (t > time(NULL) + 86400)
547			mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, ln, pos, in);
548	✓✓	775	else if (man->macroset == MACROSET_MDOC)
549		12	mandoc_vmsg(MANDOCERR_DATE_LEGACY, man->parse,
550			ln, pos, "Dd %s", in);
551
552			/* Use any non-mdoc(7) date verbatim. */
553
554		841	return mandoc_strdup(in);
555		7903	}
556
557			int
558			mandoc_eos(const char *p, size_t sz)
559			{
560			const char *q;
561			int enclosed, found;
562
563	✓✓	908238	if (0 == sz)
564		119	return 0;
565
566			/*
567			* End-of-sentence recognition must include situations where
568			* some symbols, such as `)', allow prior EOS punctuation to
569			* propagate outward.
570			*/
571
572			enclosed = found = 0;
573	✓✓	1214552	for (q = p + (int)sz - 1; q >= p; q--) {
574	✗✗✗✓ ✗✗✓✓	587666	switch (*q) {
575			case '\"':
576			case '\'':
577			case ']':
578			case ')':
579	✓✓	16020	if (0 == found)
580		9095	enclosed = 1;
581			break;
582			case '.':
583			case '!':
584			case '?':
585			found = 1;
586		137256	break;
587			default:
588	✓✓	868780	return found &&
589	✓✓	120401	(!enclosed \|\| isalnum((unsigned char)*q));
590			}
591			}
592
593	✓✓	56587	return found && !enclosed;
594		454119	}
595
596			/*
597			* Convert a string to a long that may not be <0.
598			* If the string is invalid, or is less than 0, return -1.
599			*/
600			int
601			mandoc_strntoi(const char *p, size_t sz, int base)
602			{
603		10204	char buf[32];
604		5102	char *ep;
605			long v;
606
607	✗✓	5102	if (sz > 31)
608			return -1;
609
610		5102	memcpy(buf, p, sz);
611		5102	buf[(int)sz] = '\0';
612
613		5102	errno = 0;
614		5102	v = strtol(buf, &ep, base);
615
616	✓✓✗✓	10198	if (buf[0] == '\0' \|\| *ep != '\0')
617		6	return -1;
618
619	✗✓	5096	if (v > INT_MAX)
620			v = INT_MAX;
621	✗✓	5096	if (v < INT_MIN)
622			v = INT_MIN;
623
624		5096	return (int)v;
625		5102	}