GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/roff.c Lines: 1386 1556 89.1 %
Date: 2017-11-07 Branches: 846 1003 84.3 %

Line Branch Exec Source
1
/*	$OpenBSD: roff.c,v 1.196 2017/07/14 17:16:13 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
#include <sys/types.h>
19
20
#include <assert.h>
21
#include <ctype.h>
22
#include <limits.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
29
#include "mandoc.h"
30
#include "mandoc_aux.h"
31
#include "mandoc_ohash.h"
32
#include "roff.h"
33
#include "libmandoc.h"
34
#include "roff_int.h"
35
#include "libroff.h"
36
37
/* Maximum number of string expansions per line, to break infinite loops. */
38
#define	EXPAND_LIMIT	1000
39
40
/* Types of definitions of macros and strings. */
41
#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
42
#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
43
#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
44
#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
45
#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
46
			 ROFFDEF_REN | ROFFDEF_STD)
47
48
/* --- data types --------------------------------------------------------- */
49
50
/*
51
 * An incredibly-simple string buffer.
52
 */
53
struct	roffstr {
54
	char		*p; /* nil-terminated buffer */
55
	size_t		 sz; /* saved strlen(p) */
56
};
57
58
/*
59
 * A key-value roffstr pair as part of a singly-linked list.
60
 */
61
struct	roffkv {
62
	struct roffstr	 key;
63
	struct roffstr	 val;
64
	struct roffkv	*next; /* next in list */
65
};
66
67
/*
68
 * A single number register as part of a singly-linked list.
69
 */
70
struct	roffreg {
71
	struct roffstr	 key;
72
	int		 val;
73
	struct roffreg	*next;
74
};
75
76
/*
77
 * Association of request and macro names with token IDs.
78
 */
79
struct	roffreq {
80
	enum roff_tok	 tok;
81
	char		 name[];
82
};
83
84
struct	roff {
85
	struct mparse	*parse; /* parse point */
86
	struct roff_man	*man; /* mdoc or man parser */
87
	struct roffnode	*last; /* leaf of stack */
88
	int		*rstack; /* stack of inverted `ie' values */
89
	struct ohash	*reqtab; /* request lookup table */
90
	struct roffreg	*regtab; /* number registers */
91
	struct roffkv	*strtab; /* user-defined strings & macros */
92
	struct roffkv	*rentab; /* renamed strings & macros */
93
	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
94
	struct roffstr	*xtab; /* single-byte trans table (`tr') */
95
	const char	*current_string; /* value of last called user macro */
96
	struct tbl_node	*first_tbl; /* first table parsed */
97
	struct tbl_node	*last_tbl; /* last table parsed */
98
	struct tbl_node	*tbl; /* current table being parsed */
99
	struct eqn_node	*last_eqn; /* equation parser */
100
	struct eqn_node	*eqn; /* active equation parser */
101
	int		 eqn_inline; /* current equation is inline */
102
	int		 options; /* parse options */
103
	int		 rstacksz; /* current size limit of rstack */
104
	int		 rstackpos; /* position in rstack */
105
	int		 format; /* current file in mdoc or man format */
106
	int		 argc; /* number of args of the last macro */
107
	char		 control; /* control character */
108
	char		 escape; /* escape character */
109
};
110
111
struct	roffnode {
112
	enum roff_tok	 tok; /* type of node */
113
	struct roffnode	*parent; /* up one in stack */
114
	int		 line; /* parse line */
115
	int		 col; /* parse col */
116
	char		*name; /* node name, e.g. macro name */
117
	char		*end; /* end-rules: custom token */
118
	int		 endspan; /* end-rules: next-line or infty */
119
	int		 rule; /* current evaluation rule */
120
};
121
122
#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
123
			 enum roff_tok tok, /* tok of macro */ \
124
			 struct buf *buf, /* input buffer */ \
125
			 int ln, /* parse line */ \
126
			 int ppos, /* original pos in buffer */ \
127
			 int pos, /* current pos in buffer */ \
128
			 int *offs /* reset offset of buffer data */
129
130
typedef	enum rofferr (*roffproc)(ROFF_ARGS);
131
132
struct	roffmac {
133
	roffproc	 proc; /* process new macro */
134
	roffproc	 text; /* process as child text of macro */
135
	roffproc	 sub; /* process as child of macro */
136
	int		 flags;
137
#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
138
};
139
140
struct	predef {
141
	const char	*name; /* predefined input name */
142
	const char	*str; /* replacement symbol */
143
};
144
145
#define	PREDEF(__name, __str) \
146
	{ (__name), (__str) },
147
148
/* --- function prototypes ------------------------------------------------ */
149
150
static	void		 roffnode_cleanscope(struct roff *);
151
static	void		 roffnode_pop(struct roff *);
152
static	void		 roffnode_push(struct roff *, enum roff_tok,
153
				const char *, int, int);
154
static	void		 roff_addtbl(struct roff_man *, struct tbl_node *);
155
static	enum rofferr	 roff_als(ROFF_ARGS);
156
static	enum rofferr	 roff_block(ROFF_ARGS);
157
static	enum rofferr	 roff_block_text(ROFF_ARGS);
158
static	enum rofferr	 roff_block_sub(ROFF_ARGS);
159
static	enum rofferr	 roff_br(ROFF_ARGS);
160
static	enum rofferr	 roff_cblock(ROFF_ARGS);
161
static	enum rofferr	 roff_cc(ROFF_ARGS);
162
static	void		 roff_ccond(struct roff *, int, int);
163
static	enum rofferr	 roff_cond(ROFF_ARGS);
164
static	enum rofferr	 roff_cond_text(ROFF_ARGS);
165
static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
166
static	enum rofferr	 roff_ds(ROFF_ARGS);
167
static	enum rofferr	 roff_ec(ROFF_ARGS);
168
static	enum rofferr	 roff_eo(ROFF_ARGS);
169
static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
170
static	int		 roff_evalcond(struct roff *r, int, char *, int *);
171
static	int		 roff_evalnum(struct roff *, int,
172
				const char *, int *, int *, int);
173
static	int		 roff_evalpar(struct roff *, int,
174
				const char *, int *, int *, int);
175
static	int		 roff_evalstrcond(const char *, int *);
176
static	void		 roff_free1(struct roff *);
177
static	void		 roff_freereg(struct roffreg *);
178
static	void		 roff_freestr(struct roffkv *);
179
static	size_t		 roff_getname(struct roff *, char **, int, int);
180
static	int		 roff_getnum(const char *, int *, int *, int);
181
static	int		 roff_getop(const char *, int *, char *);
182
static	int		 roff_getregn(const struct roff *,
183
				const char *, size_t);
184
static	int		 roff_getregro(const struct roff *,
185
				const char *name);
186
static	const char	*roff_getstrn(const struct roff *,
187
				const char *, size_t, int *);
188
static	int		 roff_hasregn(const struct roff *,
189
				const char *, size_t);
190
static	enum rofferr	 roff_insec(ROFF_ARGS);
191
static	enum rofferr	 roff_it(ROFF_ARGS);
192
static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
193
static	void		 roff_man_alloc1(struct roff_man *);
194
static	void		 roff_man_free1(struct roff_man *);
195
static	enum rofferr	 roff_manyarg(ROFF_ARGS);
196
static	enum rofferr	 roff_nr(ROFF_ARGS);
197
static	enum rofferr	 roff_onearg(ROFF_ARGS);
198
static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
199
				int, int);
200
static	enum rofferr	 roff_parsetext(struct roff *, struct buf *,
201
				int, int *);
202
static	enum rofferr	 roff_renamed(ROFF_ARGS);
203
static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
204
static	enum rofferr	 roff_rm(ROFF_ARGS);
205
static	enum rofferr	 roff_rn(ROFF_ARGS);
206
static	enum rofferr	 roff_rr(ROFF_ARGS);
207
static	void		 roff_setstr(struct roff *,
208
				const char *, const char *, int);
209
static	void		 roff_setstrn(struct roffkv **, const char *,
210
				size_t, const char *, size_t, int);
211
static	enum rofferr	 roff_so(ROFF_ARGS);
212
static	enum rofferr	 roff_tr(ROFF_ARGS);
213
static	enum rofferr	 roff_Dd(ROFF_ARGS);
214
static	enum rofferr	 roff_TE(ROFF_ARGS);
215
static	enum rofferr	 roff_TS(ROFF_ARGS);
216
static	enum rofferr	 roff_EQ(ROFF_ARGS);
217
static	enum rofferr	 roff_EN(ROFF_ARGS);
218
static	enum rofferr	 roff_T_(ROFF_ARGS);
219
static	enum rofferr	 roff_unsupp(ROFF_ARGS);
220
static	enum rofferr	 roff_userdef(ROFF_ARGS);
221
222
/* --- constant data ------------------------------------------------------ */
223
224
#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
225
#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
226
227
const char *__roff_name[MAN_MAX + 1] = {
228
	"br",		"ce",		"ft",		"ll",
229
	"mc",		"po",		"rj",		"sp",
230
	"ta",		"ti",		NULL,
231
	"ab",		"ad",		"af",		"aln",
232
	"als",		"am",		"am1",		"ami",
233
	"ami1",		"as",		"as1",		"asciify",
234
	"backtrace",	"bd",		"bleedat",	"blm",
235
        "box",		"boxa",		"bp",		"BP",
236
	"break",	"breakchar",	"brnl",		"brp",
237
	"brpnl",	"c2",		"cc",
238
	"cf",		"cflags",	"ch",		"char",
239
	"chop",		"class",	"close",	"CL",
240
	"color",	"composite",	"continue",	"cp",
241
	"cropat",	"cs",		"cu",		"da",
242
	"dch",		"Dd",		"de",		"de1",
243
	"defcolor",	"dei",		"dei1",		"device",
244
	"devicem",	"di",		"do",		"ds",
245
	"ds1",		"dwh",		"dt",		"ec",
246
	"ecr",		"ecs",		"el",		"em",
247
	"EN",		"eo",		"EP",		"EQ",
248
	"errprint",	"ev",		"evc",		"ex",
249
	"fallback",	"fam",		"fc",		"fchar",
250
	"fcolor",	"fdeferlig",	"feature",	"fkern",
251
	"fl",		"flig",		"fp",		"fps",
252
	"fschar",	"fspacewidth",	"fspecial",	"ftr",
253
	"fzoom",	"gcolor",	"hc",		"hcode",
254
	"hidechar",	"hla",		"hlm",		"hpf",
255
	"hpfa",		"hpfcode",	"hw",		"hy",
256
	"hylang",	"hylen",	"hym",		"hypp",
257
	"hys",		"ie",		"if",		"ig",
258
	"index",	"it",		"itc",		"IX",
259
	"kern",		"kernafter",	"kernbefore",	"kernpair",
260
	"lc",		"lc_ctype",	"lds",		"length",
261
	"letadj",	"lf",		"lg",		"lhang",
262
	"linetabs",	"lnr",		"lnrf",		"lpfx",
263
	"ls",		"lsm",		"lt",
264
	"mediasize",	"minss",	"mk",		"mso",
265
	"na",		"ne",		"nh",		"nhychar",
266
	"nm",		"nn",		"nop",		"nr",
267
	"nrf",		"nroff",	"ns",		"nx",
268
	"open",		"opena",	"os",		"output",
269
	"padj",		"papersize",	"pc",		"pev",
270
	"pi",		"PI",		"pl",		"pm",
271
	"pn",		"pnr",		"ps",
272
	"psbb",		"pshape",	"pso",		"ptr",
273
	"pvs",		"rchar",	"rd",		"recursionlimit",
274
	"return",	"rfschar",	"rhang",
275
	"rm",		"rn",		"rnn",		"rr",
276
	"rs",		"rt",		"schar",	"sentchar",
277
	"shc",		"shift",	"sizes",	"so",
278
	"spacewidth",	"special",	"spreadwarn",	"ss",
279
	"sty",		"substring",	"sv",		"sy",
280
	"T&",		"tc",		"TE",
281
	"TH",		"tkf",		"tl",
282
	"tm",		"tm1",		"tmc",		"tr",
283
	"track",	"transchar",	"trf",		"trimat",
284
	"trin",		"trnt",		"troff",	"TS",
285
	"uf",		"ul",		"unformat",	"unwatch",
286
	"unwatchn",	"vpt",		"vs",		"warn",
287
	"warnscale",	"watch",	"watchlength",	"watchn",
288
	"wh",		"while",	"write",	"writec",
289
	"writem",	"xflag",	".",		NULL,
290
	NULL,		"text",
291
	"Dd",		"Dt",		"Os",		"Sh",
292
	"Ss",		"Pp",		"D1",		"Dl",
293
	"Bd",		"Ed",		"Bl",		"El",
294
	"It",		"Ad",		"An",		"Ap",
295
	"Ar",		"Cd",		"Cm",		"Dv",
296
	"Er",		"Ev",		"Ex",		"Fa",
297
	"Fd",		"Fl",		"Fn",		"Ft",
298
	"Ic",		"In",		"Li",		"Nd",
299
	"Nm",		"Op",		"Ot",		"Pa",
300
	"Rv",		"St",		"Va",		"Vt",
301
	"Xr",		"%A",		"%B",		"%D",
302
	"%I",		"%J",		"%N",		"%O",
303
	"%P",		"%R",		"%T",		"%V",
304
	"Ac",		"Ao",		"Aq",		"At",
305
	"Bc",		"Bf",		"Bo",		"Bq",
306
	"Bsx",		"Bx",		"Db",		"Dc",
307
	"Do",		"Dq",		"Ec",		"Ef",
308
	"Em",		"Eo",		"Fx",		"Ms",
309
	"No",		"Ns",		"Nx",		"Ox",
310
	"Pc",		"Pf",		"Po",		"Pq",
311
	"Qc",		"Ql",		"Qo",		"Qq",
312
	"Re",		"Rs",		"Sc",		"So",
313
	"Sq",		"Sm",		"Sx",		"Sy",
314
	"Tn",		"Ux",		"Xc",		"Xo",
315
	"Fo",		"Fc",		"Oo",		"Oc",
316
	"Bk",		"Ek",		"Bt",		"Hf",
317
	"Fr",		"Ud",		"Lb",		"Lp",
318
	"Lk",		"Mt",		"Brq",		"Bro",
319
	"Brc",		"%C",		"Es",		"En",
320
	"Dx",		"%Q",		"%U",		"Ta",
321
	NULL,
322
	"TH",		"SH",		"SS",		"TP",
323
	"LP",		"PP",		"P",		"IP",
324
	"HP",		"SM",		"SB",		"BI",
325
	"IB",		"BR",		"RB",		"R",
326
	"B",		"I",		"IR",		"RI",
327
	"nf",		"fi",
328
	"RE",		"RS",		"DT",		"UC",
329
	"PD",		"AT",		"in",
330
	"OP",		"EX",		"EE",		"UR",
331
	"UE",		"MT",		"ME",		NULL
332
};
333
const	char *const *roff_name = __roff_name;
334
335
static	struct roffmac	 roffs[TOKEN_NONE] = {
336
	{ roff_br, NULL, NULL, 0 },  /* br */
337
	{ roff_onearg, NULL, NULL, 0 },  /* ce */
338
	{ roff_onearg, NULL, NULL, 0 },  /* ft */
339
	{ roff_onearg, NULL, NULL, 0 },  /* ll */
340
	{ roff_onearg, NULL, NULL, 0 },  /* mc */
341
	{ roff_onearg, NULL, NULL, 0 },  /* po */
342
	{ roff_onearg, NULL, NULL, 0 },  /* rj */
343
	{ roff_onearg, NULL, NULL, 0 },  /* sp */
344
	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
345
	{ roff_onearg, NULL, NULL, 0 },  /* ti */
346
	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
347
	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
348
	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
349
	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
350
	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
351
	{ roff_als, NULL, NULL, 0 },  /* als */
352
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
353
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
354
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
355
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
356
	{ roff_ds, NULL, NULL, 0 },  /* as */
357
	{ roff_ds, NULL, NULL, 0 },  /* as1 */
358
	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
359
	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
360
	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
361
	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
362
	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
363
	{ roff_unsupp, NULL, NULL, 0 },  /* box */
364
	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
365
	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
366
	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
367
	{ roff_unsupp, NULL, NULL, 0 },  /* break */
368
	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
369
	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
370
	{ roff_br, NULL, NULL, 0 },  /* brp */
371
	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
372
	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
373
	{ roff_cc, NULL, NULL, 0 },  /* cc */
374
	{ roff_insec, NULL, NULL, 0 },  /* cf */
375
	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
376
	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
377
	{ roff_unsupp, NULL, NULL, 0 },  /* char */
378
	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
379
	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
380
	{ roff_insec, NULL, NULL, 0 },  /* close */
381
	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
382
	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
383
	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
384
	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
385
	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
386
	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
387
	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
388
	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
389
	{ roff_unsupp, NULL, NULL, 0 },  /* da */
390
	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
391
	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
392
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
393
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
394
	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
395
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
396
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
397
	{ roff_unsupp, NULL, NULL, 0 },  /* device */
398
	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
399
	{ roff_unsupp, NULL, NULL, 0 },  /* di */
400
	{ roff_unsupp, NULL, NULL, 0 },  /* do */
401
	{ roff_ds, NULL, NULL, 0 },  /* ds */
402
	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
403
	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
404
	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
405
	{ roff_ec, NULL, NULL, 0 },  /* ec */
406
	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
407
	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
408
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
409
	{ roff_unsupp, NULL, NULL, 0 },  /* em */
410
	{ roff_EN, NULL, NULL, 0 },  /* EN */
411
	{ roff_eo, NULL, NULL, 0 },  /* eo */
412
	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
413
	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
414
	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
415
	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
416
	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
417
	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
418
	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
419
	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
420
	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
421
	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
422
	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
423
	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
424
	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
425
	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
426
	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
427
	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
428
	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
429
	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
430
	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
431
	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
432
	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
433
	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
434
	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
435
	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
436
	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
437
	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
438
	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
439
	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
440
	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
441
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
442
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
443
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
444
	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
445
	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
446
	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
447
	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
448
	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
449
	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
450
	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
451
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
452
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
453
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
454
	{ roff_unsupp, NULL, NULL, 0 },  /* index */
455
	{ roff_it, NULL, NULL, 0 },  /* it */
456
	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
457
	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
458
	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
459
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
460
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
461
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
462
	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
463
	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
464
	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
465
	{ roff_unsupp, NULL, NULL, 0 },  /* length */
466
	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
467
	{ roff_insec, NULL, NULL, 0 },  /* lf */
468
	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
469
	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
470
	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
471
	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
472
	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
473
	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
474
	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
475
	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
476
	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
477
	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
478
	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
479
	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
480
	{ roff_insec, NULL, NULL, 0 },  /* mso */
481
	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
482
	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
483
	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
484
	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
485
	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
486
	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
487
	{ roff_unsupp, NULL, NULL, 0 },  /* nop */
488
	{ roff_nr, NULL, NULL, 0 },  /* nr */
489
	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
490
	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
491
	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
492
	{ roff_insec, NULL, NULL, 0 },  /* nx */
493
	{ roff_insec, NULL, NULL, 0 },  /* open */
494
	{ roff_insec, NULL, NULL, 0 },  /* opena */
495
	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
496
	{ roff_unsupp, NULL, NULL, 0 },  /* output */
497
	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
498
	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
499
	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
500
	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
501
	{ roff_insec, NULL, NULL, 0 },  /* pi */
502
	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
503
	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
504
	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
505
	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
506
	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
507
	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
508
	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
509
	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
510
	{ roff_insec, NULL, NULL, 0 },  /* pso */
511
	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
512
	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
513
	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
514
	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
515
	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
516
	{ roff_unsupp, NULL, NULL, 0 },  /* return */
517
	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
518
	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
519
	{ roff_rm, NULL, NULL, 0 },  /* rm */
520
	{ roff_rn, NULL, NULL, 0 },  /* rn */
521
	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
522
	{ roff_rr, NULL, NULL, 0 },  /* rr */
523
	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
524
	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
525
	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
526
	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
527
	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
528
	{ roff_unsupp, NULL, NULL, 0 },  /* shift */
529
	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
530
	{ roff_so, NULL, NULL, 0 },  /* so */
531
	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
532
	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
533
	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
534
	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
535
	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
536
	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
537
	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
538
	{ roff_insec, NULL, NULL, 0 },  /* sy */
539
	{ roff_T_, NULL, NULL, 0 },  /* T& */
540
	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
541
	{ roff_TE, NULL, NULL, 0 },  /* TE */
542
	{ roff_Dd, NULL, NULL, 0 },  /* TH */
543
	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
544
	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
545
	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
546
	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
547
	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
548
	{ roff_tr, NULL, NULL, 0 },  /* tr */
549
	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
550
	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
551
	{ roff_insec, NULL, NULL, 0 },  /* trf */
552
	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
553
	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
554
	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
555
	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
556
	{ roff_TS, NULL, NULL, 0 },  /* TS */
557
	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
558
	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
559
	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
560
	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
561
	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
562
	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
563
	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
564
	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
565
	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
566
	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
567
	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
568
	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
569
	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
570
	{ roff_unsupp, NULL, NULL, 0 },  /* while */
571
	{ roff_insec, NULL, NULL, 0 },  /* write */
572
	{ roff_insec, NULL, NULL, 0 },  /* writec */
573
	{ roff_insec, NULL, NULL, 0 },  /* writem */
574
	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
575
	{ roff_cblock, NULL, NULL, 0 },  /* . */
576
	{ roff_renamed, NULL, NULL, 0 },
577
	{ roff_userdef, NULL, NULL, 0 }
578
};
579
580
/* Array of injected predefined strings. */
581
#define	PREDEFS_MAX	 38
582
static	const struct predef predefs[PREDEFS_MAX] = {
583
#include "predefs.in"
584
};
585
586
static	int	 roffce_lines;	/* number of input lines to center */
587
static	struct roff_node *roffce_node;  /* active request */
588
static	int	 roffit_lines;  /* number of lines to delay */
589
static	char	*roffit_macro;  /* nil-terminated macro line */
590
591
592
/* --- request table ------------------------------------------------------ */
593
594
struct ohash *
595
roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
596
{
597
	struct ohash	*htab;
598
	struct roffreq	*req;
599
	enum roff_tok	 tok;
600
	size_t		 sz;
601
	unsigned int	 slot;
602
603
50586
	htab = mandoc_malloc(sizeof(*htab));
604
25293
	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
605
606
8492512
	for (tok = mintok; tok < maxtok; tok++) {
607
4220963
		if (roff_name[tok] == NULL)
608
			continue;
609
4195669
		sz = strlen(roff_name[tok]);
610
4195669
		req = mandoc_malloc(sizeof(*req) + sz + 1);
611
4195669
		req->tok = tok;
612
4195669
		memcpy(req->name, roff_name[tok], sz + 1);
613
4195669
		slot = ohash_qlookup(htab, req->name);
614
4195669
		ohash_insert(htab, slot, req);
615
4195669
	}
616
25293
	return htab;
617
}
618
619
void
620
roffhash_free(struct ohash *htab)
621
{
622
	struct roffreq	*req;
623
75882
	unsigned int	 slot;
624
625
37941
	if (htab == NULL)
626
12648
		return;
627
8441924
	for (req = ohash_first(htab, &slot); req != NULL;
628
4195669
	     req = ohash_next(htab, &slot))
629
4195669
		free(req);
630
25293
	ohash_delete(htab);
631
25293
	free(htab);
632
63234
}
633
634
enum roff_tok
635
roffhash_find(struct ohash *htab, const char *name, size_t sz)
636
{
637
	struct roffreq	*req;
638
8324296
	const char	*end;
639
640
4162148
	if (sz) {
641
3108522
		end = name + sz;
642
3108522
		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
643
3108522
	} else
644
1053626
		req = ohash_find(htab, ohash_qlookup(htab, name));
645
14506919
	return req == NULL ? TOKEN_NONE : req->tok;
646
4162148
}
647
648
/* --- stack of request blocks -------------------------------------------- */
649
650
/*
651
 * Pop the current node off of the stack of roff instructions currently
652
 * pending.
653
 */
654
static void
655
roffnode_pop(struct roff *r)
656
{
657
	struct roffnode	*p;
658
659
344428
	assert(r->last);
660
	p = r->last;
661
662
172214
	r->last = r->last->parent;
663
172214
	free(p->name);
664
172214
	free(p->end);
665
172214
	free(p);
666
172214
}
667
668
/*
669
 * Push a roff node onto the instruction stack.  This must later be
670
 * removed with roffnode_pop().
671
 */
672
static void
673
roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
674
		int line, int col)
675
{
676
	struct roffnode	*p;
677
678
344428
	p = mandoc_calloc(1, sizeof(struct roffnode));
679
172214
	p->tok = tok;
680
172214
	if (name)
681
10027
		p->name = mandoc_strdup(name);
682
172214
	p->parent = r->last;
683
172214
	p->line = line;
684
172214
	p->col = col;
685
352793
	p->rule = p->parent ? p->parent->rule : 0;
686
687
172214
	r->last = p;
688
172214
}
689
690
/* --- roff parser state data management ---------------------------------- */
691
692
static void
693
roff_free1(struct roff *r)
694
{
695
	struct tbl_node	*tbl;
696
	int		 i;
697
698
87395
	while (NULL != (tbl = r->first_tbl)) {
699
6919
		r->first_tbl = tbl->next;
700
6919
		tbl_free(tbl);
701
	}
702
24519
	r->first_tbl = r->last_tbl = r->tbl = NULL;
703
704
24519
	if (r->last_eqn != NULL)
705
390
		eqn_free(r->last_eqn);
706
24519
	r->last_eqn = r->eqn = NULL;
707
708
49110
	while (r->last)
709
36
		roffnode_pop(r);
710
711
24519
	free (r->rstack);
712
24519
	r->rstack = NULL;
713
24519
	r->rstacksz = 0;
714
24519
	r->rstackpos = -1;
715
716
24519
	roff_freereg(r->regtab);
717
24519
	r->regtab = NULL;
718
719
24519
	roff_freestr(r->strtab);
720
24519
	roff_freestr(r->rentab);
721
24519
	roff_freestr(r->xmbtab);
722
24519
	r->strtab = r->rentab = r->xmbtab = NULL;
723
724
24519
	if (r->xtab)
725
6192
		for (i = 0; i < 128; i++)
726
3072
			free(r->xtab[i].p);
727
24519
	free(r->xtab);
728
24519
	r->xtab = NULL;
729
24519
}
730
731
void
732
roff_reset(struct roff *r)
733
{
734
23744
	roff_free1(r);
735
11872
	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
736
11872
	r->control = '\0';
737
11872
	r->escape = '\\';
738
11872
	roffce_lines = 0;
739
11872
	roffce_node = NULL;
740
11872
	roffit_lines = 0;
741
11872
	roffit_macro = NULL;
742
11872
}
743
744
void
745
roff_free(struct roff *r)
746
{
747
25294
	roff_free1(r);
748
12647
	roffhash_free(r->reqtab);
749
12647
	free(r);
750
12647
}
751
752
struct roff *
753
roff_alloc(struct mparse *parse, int options)
754
{
755
	struct roff	*r;
756
757
25294
	r = mandoc_calloc(1, sizeof(struct roff));
758
12647
	r->parse = parse;
759
12647
	r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
760
12647
	r->options = options;
761
12647
	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
762
12647
	r->rstackpos = -1;
763
12647
	r->escape = '\\';
764
12647
	return r;
765
}
766
767
/* --- syntax tree state data management ---------------------------------- */
768
769
static void
770
roff_man_free1(struct roff_man *man)
771
{
772
773
49038
	if (man->first != NULL)
774
24519
		roff_node_delete(man, man->first);
775
24519
	free(man->meta.msec);
776
24519
	free(man->meta.vol);
777
24519
	free(man->meta.os);
778
24519
	free(man->meta.arch);
779
24519
	free(man->meta.title);
780
24519
	free(man->meta.name);
781
24519
	free(man->meta.date);
782
24519
}
783
784
static void
785
roff_man_alloc1(struct roff_man *man)
786
{
787
788
49038
	memset(&man->meta, 0, sizeof(man->meta));
789
24519
	man->first = mandoc_calloc(1, sizeof(*man->first));
790
24519
	man->first->type = ROFFT_ROOT;
791
24519
	man->last = man->first;
792
24519
	man->last_es = NULL;
793
24519
	man->flags = 0;
794
24519
	man->macroset = MACROSET_NONE;
795
24519
	man->lastsec = man->lastnamed = SEC_NONE;
796
24519
	man->next = ROFF_NEXT_CHILD;
797
24519
}
798
799
void
800
roff_man_reset(struct roff_man *man)
801
{
802
803
23744
	roff_man_free1(man);
804
11872
	roff_man_alloc1(man);
805
11872
}
806
807
void
808
roff_man_free(struct roff_man *man)
809
{
810
811
25294
	roff_man_free1(man);
812
12647
	free(man);
813
12647
}
814
815
struct roff_man *
816
roff_man_alloc(struct roff *roff, struct mparse *parse,
817
	const char *os_s, int quick)
818
{
819
	struct roff_man *man;
820
821
25294
	man = mandoc_calloc(1, sizeof(*man));
822
12647
	man->parse = parse;
823
12647
	man->roff = roff;
824
12647
	man->os_s = os_s;
825
12647
	man->quick = quick;
826
12647
	roff_man_alloc1(man);
827
12647
	roff->man = man;
828
12647
	return man;
829
}
830
831
/* --- syntax tree handling ----------------------------------------------- */
832
833
struct roff_node *
834
roff_node_alloc(struct roff_man *man, int line, int pos,
835
	enum roff_type type, int tok)
836
{
837
	struct roff_node	*n;
838
839
11761760
	n = mandoc_calloc(1, sizeof(*n));
840
5880880
	n->line = line;
841
5880880
	n->pos = pos;
842
5880880
	n->tok = tok;
843
5880880
	n->type = type;
844
5880880
	n->sec = man->lastsec;
845
846
11761760
	if (man->flags & MDOC_SYNOPSIS)
847
5880880
		n->flags |= NODE_SYNPRETTY;
848
	else
849
5880880
		n->flags &= ~NODE_SYNPRETTY;
850
5880880
	if (man->flags & MDOC_NEWLINE)
851
2731839
		n->flags |= NODE_LINE;
852
5880880
	man->flags &= ~MDOC_NEWLINE;
853
854
5880880
	return n;
855
}
856
857
void
858
roff_node_append(struct roff_man *man, struct roff_node *n)
859
{
860
861
11765840
	switch (man->next) {
862
	case ROFF_NEXT_SIBLING:
863
3629246
		if (man->last->next != NULL) {
864
3726
			n->next = man->last->next;
865
3726
			man->last->next->prev = n;
866
3726
		} else
867
3625520
			man->last->parent->last = n;
868
3629246
		man->last->next = n;
869
3629246
		n->prev = man->last;
870
3629246
		n->parent = man->last->parent;
871
3629246
		break;
872
	case ROFF_NEXT_CHILD:
873
2253674
		if (man->last->child != NULL) {
874
8958
			n->next = man->last->child;
875
8958
			man->last->child->prev = n;
876
8958
		} else
877
2244716
			man->last->last = n;
878
2253674
		man->last->child = n;
879
2253674
		n->parent = man->last;
880
2253674
		break;
881
	default:
882
		abort();
883
	}
884
7083108
	man->last = n;
885
886

7083108
	switch (n->type) {
887
	case ROFFT_HEAD:
888
585860
		n->parent->head = n;
889
585860
		break;
890
	case ROFFT_BODY:
891
613839
		if (n->end != ENDBODY_NOT)
892
			return;
893
612192
		n->parent->body = n;
894
612192
		break;
895
	case ROFFT_TAIL:
896
489
		n->parent->tail = n;
897
489
		break;
898
	default:
899
		return;
900
	}
901
902
	/*
903
	 * Copy over the normalised-data pointer of our parent.  Not
904
	 * everybody has one, but copying a null pointer is fine.
905
	 */
906
907
1198541
	n->norm = n->parent->norm;
908
1198541
	assert(n->parent->type == ROFFT_BLOCK);
909
5882920
}
910
911
void
912
roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
913
{
914
	struct roff_node	*n;
915
916
6217428
	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
917
3108714
	n->string = roff_strdup(man->roff, word);
918
3108714
	roff_node_append(man, n);
919
3108714
	n->flags |= NODE_VALID | NODE_ENDED;
920
3108714
	man->next = ROFF_NEXT_SIBLING;
921
3108714
}
922
923
void
924
roff_word_append(struct roff_man *man, const char *word)
925
{
926
	struct roff_node	*n;
927
298370
	char			*addstr, *newstr;
928
929
149185
	n = man->last;
930
149185
	addstr = roff_strdup(man->roff, word);
931
149185
	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
932
149185
	free(addstr);
933
149185
	free(n->string);
934
149185
	n->string = newstr;
935
149185
	man->next = ROFF_NEXT_SIBLING;
936
149185
}
937
938
void
939
roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
940
{
941
	struct roff_node	*n;
942
943
508408
	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
944
254204
	roff_node_append(man, n);
945
254204
	man->next = ROFF_NEXT_CHILD;
946
254204
}
947
948
struct roff_node *
949
roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
950
{
951
	struct roff_node	*n;
952
953
543694
	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
954
271847
	roff_node_append(man, n);
955
271847
	man->next = ROFF_NEXT_CHILD;
956
271847
	return n;
957
}
958
959
struct roff_node *
960
roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
961
{
962
	struct roff_node	*n;
963
964
1171720
	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
965
585860
	roff_node_append(man, n);
966
585860
	man->next = ROFF_NEXT_CHILD;
967
585860
	return n;
968
}
969
970
struct roff_node *
971
roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
972
{
973
	struct roff_node	*n;
974
975
1224384
	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
976
612192
	roff_node_append(man, n);
977
612192
	man->next = ROFF_NEXT_CHILD;
978
612192
	return n;
979
}
980
981
static void
982
roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
983
{
984
	struct roff_node	*n;
985
	const struct tbl_span	*span;
986
987
114100
	if (man->macroset == MACROSET_MAN)
988
13622
		man_breakscope(man, ROFF_TS);
989
110208
	while ((span = tbl_span(tbl)) != NULL) {
990
26579
		n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
991
26579
		n->span = span;
992
26579
		roff_node_append(man, n);
993
26579
		n->flags |= NODE_VALID | NODE_ENDED;
994
26579
		man->next = ROFF_NEXT_SIBLING;
995
	}
996
57050
}
997
998
void
999
roff_node_unlink(struct roff_man *man, struct roff_node *n)
1000
{
1001
1002
	/* Adjust siblings. */
1003
1004
11814878
	if (n->prev)
1005
3761
		n->prev->next = n->next;
1006
5907439
	if (n->next)
1007
3635524
		n->next->prev = n->prev;
1008
1009
	/* Adjust parent. */
1010
1011
5907439
	if (n->parent != NULL) {
1012
5882920
		if (n->parent->child == n)
1013
5879159
			n->parent->child = n->next;
1014
5882920
		if (n->parent->last == n)
1015
2247396
			n->parent->last = n->prev;
1016
	}
1017
1018
	/* Adjust parse point. */
1019
1020
5907439
	if (man == NULL)
1021
		return;
1022
5907361
	if (man->last == n) {
1023
36600
		if (n->prev == NULL) {
1024
35194
			man->last = n->parent;
1025
			man->next = ROFF_NEXT_CHILD;
1026
35194
		} else {
1027
1406
			man->last = n->prev;
1028
			man->next = ROFF_NEXT_SIBLING;
1029
		}
1030
36600
	}
1031
5907361
	if (man->first == n)
1032
24519
		man->first = NULL;
1033
5907439
}
1034
1035
void
1036
roff_node_free(struct roff_node *n)
1037
{
1038
1039
11810798
	if (n->args != NULL)
1040
33748
		mdoc_argv_free(n->args);
1041

11224938
	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1042
1544844
		free(n->norm);
1043
5905399
	if (n->eqn != NULL)
1044
555
		eqn_box_free(n->eqn);
1045
5905399
	free(n->string);
1046
5905399
	free(n);
1047
5905399
}
1048
1049
void
1050
roff_node_delete(struct roff_man *man, struct roff_node *n)
1051
{
1052
1053
29452477
	while (n->child != NULL)
1054
5868140
		roff_node_delete(man, n->child);
1055
5905399
	roff_node_unlink(man, n);
1056
5905399
	roff_node_free(n);
1057
5905399
}
1058
1059
void
1060
deroff(char **dest, const struct roff_node *n)
1061
{
1062
668164
	char	*cp;
1063
	size_t	 sz;
1064
1065
334082
	if (n->type != ROFFT_TEXT) {
1066
669082
		for (n = n->child; n != NULL; n = n->next)
1067
193880
			deroff(dest, n);
1068
140661
		return;
1069
	}
1070
1071
	/* Skip leading whitespace. */
1072
1073
387264
	for (cp = n->string; *cp != '\0'; cp++) {
1074

196852
		if (cp[0] == '\\' && cp[1] != '\0' &&
1075
1613
		    strchr(" %&0^|~", cp[1]) != NULL)
1076
90
			cp++;
1077
193536
		else if ( ! isspace((unsigned char)*cp))
1078
			break;
1079
	}
1080
1081
	/* Skip trailing backslash. */
1082
1083
193421
	sz = strlen(cp);
1084

386836
	if (sz > 0 && cp[sz - 1] == '\\')
1085
		sz--;
1086
1087
	/* Skip trailing whitespace. */
1088
1089
193421
	for (; sz; sz--)
1090
193415
		if ( ! isspace((unsigned char)cp[sz-1]))
1091
			break;
1092
1093
	/* Skip empty strings. */
1094
1095
193421
	if (sz == 0)
1096
6
		return;
1097
1098
193415
	if (*dest == NULL) {
1099
122899
		*dest = mandoc_strndup(cp, sz);
1100
122899
		return;
1101
	}
1102
1103
70516
	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1104
70516
	free(*dest);
1105
70516
	*dest = cp;
1106
404598
}
1107
1108
/* --- main functions of the roff parser ---------------------------------- */
1109
1110
/*
1111
 * In the current line, expand escape sequences that tend to get
1112
 * used in numerical expressions and conditional requests.
1113
 * Also check the syntax of the remaining escape sequences.
1114
 */
1115
static enum rofferr
1116
roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1117
{
1118
8000040
	char		 ubuf[24]; /* buffer to print the number */
1119
	const char	*start;	/* start of the string to process */
1120
	char		*stesc;	/* start of an escape sequence ('\\') */
1121
4000020
	const char	*stnam;	/* start of the name, after "[(*" */
1122
4000020
	const char	*cp;	/* end of the name, e.g. before ']' */
1123
	const char	*res;	/* the string to be substituted */
1124
4000020
	char		*nbuf;	/* new buffer to copy buf->buf to */
1125
	size_t		 maxl;  /* expected length of the escape name */
1126
	size_t		 naml;	/* actual length of the escape name */
1127
	enum mandoc_esc	 esc;	/* type of the escape sequence */
1128
4000020
	int		 inaml;	/* length returned from mandoc_escape() */
1129
	int		 expand_count;	/* to avoid infinite loops */
1130
4000020
	int		 npos;	/* position in numeric expression */
1131
	int		 arg_complete; /* argument not interrupted by eol */
1132
	int		 done;	/* no more input available */
1133
4000020
	int		 deftype; /* type of definition to paste */
1134
	int		 rcsid;	/* kind of RCS id seen */
1135
	char		 term;	/* character terminating the escape */
1136
1137
	/* Search forward for comments. */
1138
1139
	done = 0;
1140
4000020
	start = buf->buf + pos;
1141
195662214
	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1142

96511784
		if (stesc[0] != r->escape || stesc[1] == '\0')
1143
			continue;
1144
2324215
		stesc++;
1145

4302853
		if (*stesc != '"' && *stesc != '#')
1146
			continue;
1147
1148
		/* Comment found, look for RCS id. */
1149
1150
		rcsid = 0;
1151
345577
		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1152
			rcsid = 1 << MANDOC_OS_OPENBSD;
1153
19670
			cp += 8;
1154
345577
		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1155
			rcsid = 1 << MANDOC_OS_NETBSD;
1156
2000
			cp += 7;
1157
2000
		}
1158

367247
		if (cp != NULL &&
1159
21670
		    isalnum((unsigned char)*cp) == 0 &&
1160
21670
		    strchr(cp, '$') != NULL) {
1161
21670
			if (r->man->meta.rcsids & rcsid)
1162
6
				mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1163
3
				    ln, stesc + 1 - buf->buf, stesc + 1);
1164
21670
			r->man->meta.rcsids |= rcsid;
1165
21670
		}
1166
1167
		/* Handle trailing whitespace. */
1168
1169
345577
		cp = strchr(stesc--, '\0') - 1;
1170
345577
		if (*cp == '\n') {
1171
			done = 1;
1172
506
			cp--;
1173
506
		}
1174

690657
		if (*cp == ' ' || *cp == '\t')
1175
994
			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1176
497
			    ln, cp - buf->buf, NULL);
1177

1067587
		while (stesc > start && stesc[-1] == ' ')
1178
7714
			stesc--;
1179
345577
		*stesc = '\0';
1180
345577
		break;
1181
	}
1182
4000020
	if (stesc == start)
1183
6333
		return ROFF_CONT;
1184
3993687
	stesc--;
1185
1186
	/* Notice the end of the input. */
1187
1188
3993687
	if (*stesc == '\n') {
1189
143577
		*stesc-- = '\0';
1190
		done = 1;
1191
143577
	}
1192
1193
	expand_count = 0;
1194
104911929
	while (stesc >= start) {
1195
1196
		/* Search backwards for the next backslash. */
1197
1198
96533645
		if (*stesc != r->escape) {
1199
94342377
			if (*stesc == '\\') {
1200
				*stesc = '\0';
1201
				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1202
				    buf->buf, stesc + 1) + 1;
1203
				start = nbuf + pos;
1204
				stesc = nbuf + (stesc - buf->buf);
1205
				free(buf->buf);
1206
				buf->buf = nbuf;
1207
			}
1208
94342377
			stesc--;
1209
94342377
			continue;
1210
		}
1211
1212
		/* If it is escaped, skip it. */
1213
1214
4408972
		for (cp = stesc - 1; cp >= start; cp--)
1215
1927168
			if (*cp != r->escape)
1216
				break;
1217
1218
2191268
		if ((stesc - cp) % 2 == 0) {
1219
65928
			while (stesc > cp)
1220
26382
				*stesc-- = '\\';
1221
			continue;
1222
2178104
		} else if (stesc[1] != '\0') {
1223
2167745
			*stesc = '\\';
1224
		} else {
1225
10959
			*stesc-- = '\0';
1226
10959
			if (done)
1227
				continue;
1228
			else
1229
10905
				return ROFF_APPEND;
1230
		}
1231
1232
		/* Decide whether to expand or to check only. */
1233
1234
		term = '\0';
1235
2167745
		cp = stesc + 1;
1236

2167745
		switch (*cp) {
1237
		case '*':
1238
			res = NULL;
1239
383150
			break;
1240
		case 'B':
1241
		case 'w':
1242
600
			term = cp[1];
1243
			/* FALLTHROUGH */
1244
		case 'n':
1245
18683
			res = ubuf;
1246
18683
			break;
1247
		default:
1248
1765312
			esc = mandoc_escape(&cp, &stnam, &inaml);
1249

2438496
			if (esc == ESCAPE_ERROR ||
1250
1765258
			    (esc == ESCAPE_SPECIAL &&
1251
673184
			     mchars_spec2cp(stnam, inaml) < 0))
1252
567
				mandoc_vmsg(MANDOCERR_ESC_BAD,
1253
567
				    r->parse, ln, (int)(stesc - buf->buf),
1254
567
				    "%.*s", (int)(cp - stesc), stesc);
1255
1765312
			stesc--;
1256
1765312
			continue;
1257
		}
1258
1259
401833
		if (EXPAND_LIMIT < ++expand_count) {
1260
18
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1261
9
			    ln, (int)(stesc - buf->buf), NULL);
1262
9
			return ROFF_IGN;
1263
		}
1264
1265
		/*
1266
		 * The third character decides the length
1267
		 * of the name of the string or register.
1268
		 * Save a pointer to the name.
1269
		 */
1270
1271
401824
		if (term == '\0') {
1272

401224
			switch (*++cp) {
1273
			case '\0':
1274
				maxl = 0;
1275
				break;
1276
			case '(':
1277
380924
				cp++;
1278
				maxl = 2;
1279
380924
				break;
1280
			case '[':
1281
12303
				cp++;
1282
				term = ']';
1283
				maxl = 0;
1284
12303
				break;
1285
			default:
1286
				maxl = 1;
1287
7997
				break;
1288
			}
1289
		} else {
1290
600
			cp += 2;
1291
			maxl = 0;
1292
		}
1293
401824
		stnam = cp;
1294
1295
		/* Advance to the end of the name. */
1296
1297
		naml = 0;
1298
		arg_complete = 1;
1299

2816422
		while (maxl == 0 || naml < maxl) {
1300
866911
			if (*cp == '\0') {
1301
150
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1302
75
				    ln, (int)(stesc - buf->buf), stesc);
1303
				arg_complete = 0;
1304
75
				break;
1305
			}
1306

963827
			if (maxl == 0 && *cp == term) {
1307
12828
				cp++;
1308
12828
				break;
1309
			}
1310

854271
			if (*cp++ != '\\' || stesc[1] != 'w') {
1311
853913
				naml++;
1312
853913
				continue;
1313
			}
1314

201
			switch (mandoc_escape(&cp, NULL, NULL)) {
1315
			case ESCAPE_SPECIAL:
1316
			case ESCAPE_UNICODE:
1317
			case ESCAPE_NUMBERED:
1318
			case ESCAPE_OVERSTRIKE:
1319
93
				naml++;
1320
93
				break;
1321
			default:
1322
				break;
1323
			}
1324
		}
1325
1326
		/*
1327
		 * Retrieve the replacement string; if it is
1328
		 * undefined, resume searching for escapes.
1329
		 */
1330
1331

803648
		switch (stesc[1]) {
1332
		case '*':
1333
383141
			if (arg_complete) {
1334
383123
				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1335
383123
				res = roff_getstrn(r, stnam, naml, &deftype);
1336
383123
			}
1337
			break;
1338
		case 'B':
1339
273
			npos = 0;
1340
546
			ubuf[0] = arg_complete &&
1341
252
			    roff_evalnum(r, ln, stnam, &npos,
1342
252
			      NULL, ROFFNUM_SCALE) &&
1343
126
			    stnam + npos + 1 == cp ? '1' : '0';
1344
273
			ubuf[1] = '\0';
1345
273
			break;
1346
		case 'n':
1347
18083
			if (arg_complete)
1348
36130
				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1349
18065
				    roff_getregn(r, stnam, naml));
1350
			else
1351
18
				ubuf[0] = '\0';
1352
			break;
1353
		case 'w':
1354
			/* use even incomplete args */
1355
654
			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1356
327
			    24 * (int)naml);
1357
327
			break;
1358
		}
1359
1360
401824
		if (res == NULL) {
1361
225
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1362
225
			    r->parse, ln, (int)(stesc - buf->buf),
1363
225
			    "%.*s", (int)naml, stnam);
1364
			res = "";
1365
401824
		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1366
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1367
			    ln, (int)(stesc - buf->buf), NULL);
1368
			return ROFF_IGN;
1369
		}
1370
1371
		/* Replace the escape sequence by the string. */
1372
1373
401824
		*stesc = '\0';
1374
803648
		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1375
803648
		    buf->buf, res, cp) + 1;
1376
1377
		/* Prepare for the next replacement. */
1378
1379
401824
		start = nbuf + pos;
1380
401824
		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1381
401824
		free(buf->buf);
1382
401824
		buf->buf = nbuf;
1383
	}
1384
3982773
	return ROFF_CONT;
1385
4000020
}
1386
1387
/*
1388
 * Process text streams.
1389
 */
1390
static enum rofferr
1391
roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1392
{
1393
	size_t		 sz;
1394
	const char	*start;
1395
2999078
	char		*p;
1396
	int		 isz;
1397
	enum mandoc_esc	 esc;
1398
1399
	/* Spring the input line trap. */
1400
1401
1499539
	if (roffit_lines == 1) {
1402
45
		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1403
45
		free(buf->buf);
1404
45
		buf->buf = p;
1405
45
		buf->sz = isz + 1;
1406
45
		*offs = 0;
1407
45
		free(roffit_macro);
1408
45
		roffit_lines = 0;
1409
45
		return ROFF_REPARSE;
1410
1499494
	} else if (roffit_lines > 1)
1411
18
		--roffit_lines;
1412
1413

1499494
	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1414
		if (roffce_lines < 1) {
1415
			r->man->last = roffce_node;
1416
			r->man->next = ROFF_NEXT_SIBLING;
1417
			roffce_lines = 0;
1418
			roffce_node = NULL;
1419
		} else
1420
			roffce_lines--;
1421
	}
1422
1423
	/* Convert all breakable hyphens into ASCII_HYPH. */
1424
1425
1499494
	start = p = buf->buf + pos;
1426
1427
4396540
	while (*p != '\0') {
1428
2737921
		sz = strcspn(p, "-\\");
1429
2737921
		p += sz;
1430
1431
2737921
		if (*p == '\0')
1432
			break;
1433
1434
1304905
		if (*p == '\\') {
1435
			/* Skip over escapes. */
1436
1211817
			p++;
1437
1211817
			esc = mandoc_escape((const char **)&p, NULL, NULL);
1438
1211817
			if (esc == ESCAPE_ERROR)
1439
				break;
1440
1216275
			while (*p == '-')
1441
2256
				p++;
1442
			continue;
1443
93088
		} else if (p == start) {
1444
387
			p++;
1445
387
			continue;
1446
		}
1447
1448

171527
		if (isalpha((unsigned char)p[-1]) &&
1449
78826
		    isalpha((unsigned char)p[1]))
1450
75560
			*p = ASCII_HYPH;
1451
92701
		p++;
1452
	}
1453
1499494
	return ROFF_CONT;
1454
1499539
}
1455
1456
enum rofferr
1457
roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1458
{
1459
	enum roff_tok	 t;
1460
	enum rofferr	 e;
1461
8000112
	int		 pos;	/* parse point */
1462
	int		 spos;	/* saved parse point for messages */
1463
	int		 ppos;	/* original offset in buf->buf */
1464
	int		 ctl;	/* macro line (boolean) */
1465
1466
4000056
	ppos = pos = *offs;
1467
1468
	/* Handle in-line equation delimiters. */
1469
1470

4000128
	if (r->tbl == NULL &&
1471

3939903
	    r->last_eqn != NULL && r->last_eqn->delim &&
1472
420
	    (r->eqn == NULL || r->eqn_inline)) {
1473
294
		e = roff_eqndelim(r, buf, pos);
1474
294
		if (e == ROFF_REPARSE)
1475
36
			return e;
1476
258
		assert(e == ROFF_CONT);
1477
	}
1478
1479
	/* Expand some escape sequences. */
1480
1481
4000020
	e = roff_res(r, buf, ln, pos);
1482
4000020
	if (e == ROFF_IGN || e == ROFF_APPEND)
1483
10914
		return e;
1484
3989106
	assert(e == ROFF_CONT);
1485
1486
3989106
	ctl = roff_getcontrol(r, buf->buf, &pos);
1487
1488
	/*
1489
	 * First, if a scope is open and we're not a macro, pass the
1490
	 * text through the macro's filter.
1491
	 * Equations process all content themselves.
1492
	 * Tables process almost all content themselves, but we want
1493
	 * to warn about macros before passing it there.
1494
	 */
1495
1496
3989106
	if (r->last != NULL && ! ctl) {
1497
3200
		t = r->last->tok;
1498
3200
		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1499
3200
		if (e == ROFF_IGN)
1500
2064
			return e;
1501
1136
		assert(e == ROFF_CONT);
1502
	}
1503

3989112
	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1504
1515
		eqn_read(r->eqn, buf->buf + ppos);
1505
1515
		return ROFF_IGN;
1506
	}
1507

4056535
	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1508
57023
		tbl_read(r->tbl, ln, buf->buf, ppos);
1509
57023
		roff_addtbl(r->man, r->tbl);
1510
57023
		return ROFF_IGN;
1511
	}
1512
3928504
	if ( ! ctl)
1513
1499539
		return roff_parsetext(r, buf, pos, offs);
1514
1515
	/* Skip empty request lines. */
1516
1517
2428965
	if (buf->buf[pos] == '"') {
1518
36
		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1519
		    ln, pos, NULL);
1520
36
		return ROFF_IGN;
1521
2428929
	} else if (buf->buf[pos] == '\0')
1522
333714
		return ROFF_IGN;
1523
1524
	/*
1525
	 * If a scope is open, go to the child handler for that macro,
1526
	 * as it may want to preprocess before doing anything with it.
1527
	 * Don't do so if an equation is open.
1528
	 */
1529
1530
2095215
	if (r->last) {
1531
239623
		t = r->last->tok;
1532
239623
		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1533
	}
1534
1535
	/* No scope is open.  This is a new request or macro. */
1536
1537
	spos = pos;
1538
1855592
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1539
1540
	/* Tables ignore most macros. */
1541
1542

1890502
	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1543
27928
	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1544
90
		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1545
45
		    ln, pos, buf->buf + spos);
1546
45
		if (t != TOKEN_NONE)
1547
18
			return ROFF_IGN;
1548

216
		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1549
54
			pos++;
1550
81
		while (buf->buf[pos] == ' ')
1551
27
			pos++;
1552
27
		tbl_read(r->tbl, ln, buf->buf, pos);
1553
27
		roff_addtbl(r->man, r->tbl);
1554
27
		return ROFF_IGN;
1555
	}
1556
1557
	/* For now, let high level macros abort .ce mode. */
1558
1559
1855547
	if (ctl && roffce_node != NULL &&
1560
	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1561
	     t == ROFF_TH || t == ROFF_TS)) {
1562
		r->man->last = roffce_node;
1563
		r->man->next = ROFF_NEXT_SIBLING;
1564
		roffce_lines = 0;
1565
		roffce_node = NULL;
1566
	}
1567
1568
	/*
1569
	 * This is neither a roff request nor a user-defined macro.
1570
	 * Let the standard macro set parsers handle it.
1571
	 */
1572
1573
1855547
	if (t == TOKEN_NONE)
1574
1243212
		return ROFF_CONT;
1575
1576
	/* Execute a roff request or a user defined macro. */
1577
1578
612335
	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1579
4000056
}
1580
1581
void
1582
roff_endparse(struct roff *r)
1583
{
1584
48846
	if (r->last != NULL)
1585
72
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1586
36
		    r->last->line, r->last->col,
1587
36
		    roff_name[r->last->tok]);
1588
1589
24423
	if (r->eqn != NULL) {
1590
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1591
		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1592
		eqn_parse(r->eqn);
1593
		r->eqn = NULL;
1594
	}
1595
1596
24423
	if (r->tbl != NULL) {
1597
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1598
		    r->tbl->line, r->tbl->pos, "TS");
1599
		tbl_end(r->tbl);
1600
		r->tbl = NULL;
1601
	}
1602
24423
}
1603
1604
/*
1605
 * Parse a roff node's type from the input buffer.  This must be in the
1606
 * form of ".foo xxx" in the usual way.
1607
 */
1608
static enum roff_tok
1609
roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1610
{
1611
4190430
	char		*cp;
1612
	const char	*mac;
1613
	size_t		 maclen;
1614
2095215
	int		 deftype;
1615
	enum roff_tok	 t;
1616
1617
2095215
	cp = buf + *pos;
1618
1619


8364504
	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1620
5452
		return TOKEN_NONE;
1621
1622
	mac = cp;
1623
2089763
	maclen = roff_getname(r, &cp, ln, ppos);
1624
1625
2089763
	deftype = ROFFDEF_USER | ROFFDEF_REN;
1626
2089763
	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1627
2089763
	switch (deftype) {
1628
	case ROFFDEF_USER:
1629
		t = ROFF_USERDEF;
1630
267571
		break;
1631
	case ROFFDEF_REN:
1632
		t = ROFF_RENAMED;
1633
18
		break;
1634
	default:
1635
1822174
		t = roffhash_find(r->reqtab, mac, maclen);
1636
1822174
		break;
1637
	}
1638
2089763
	if (t != TOKEN_NONE)
1639
803903
		*pos = cp - buf;
1640
2089763
	return t;
1641
2095215
}
1642
1643
/* --- handling of request blocks ----------------------------------------- */
1644
1645
static enum rofferr
1646
roff_cblock(ROFF_ARGS)
1647
{
1648
1649
	/*
1650
	 * A block-close `..' should only be invoked as a child of an
1651
	 * ignore macro, otherwise raise a warning and just ignore it.
1652
	 */
1653
1654
20180
	if (r->last == NULL) {
1655
18
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1656
		    ln, ppos, "..");
1657
18
		return ROFF_IGN;
1658
	}
1659
1660

10072
	switch (r->last->tok) {
1661
	case ROFF_am:
1662
		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1663
	case ROFF_ami:
1664
	case ROFF_de:
1665
		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1666
	case ROFF_dei:
1667
	case ROFF_ig:
1668
		break;
1669
	default:
1670
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1671
		    ln, ppos, "..");
1672
		return ROFF_IGN;
1673
	}
1674
1675
10072
	if (buf->buf[pos] != '\0')
1676
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1677
		    ".. %s", buf->buf + pos);
1678
1679
10072
	roffnode_pop(r);
1680
10072
	roffnode_cleanscope(r);
1681
10072
	return ROFF_IGN;
1682
1683
10090
}
1684
1685
static void
1686
roffnode_cleanscope(struct roff *r)
1687
{
1688
1689
1008254
	while (r->last) {
1690
222689
		if (--r->last->endspan != 0)
1691
			break;
1692
151252
		roffnode_pop(r);
1693
	}
1694
235250
}
1695
1696
static void
1697
roff_ccond(struct roff *r, int ln, int ppos)
1698
{
1699
1700
21510
	if (NULL == r->last) {
1701
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1702
		    ln, ppos, "\\}");
1703
		return;
1704
	}
1705
1706

10755
	switch (r->last->tok) {
1707
	case ROFF_el:
1708
	case ROFF_ie:
1709
	case ROFF_if:
1710
		break;
1711
	default:
1712
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1713
		    ln, ppos, "\\}");
1714
		return;
1715
	}
1716
1717
10755
	if (r->last->endspan > -1) {
1718
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1719
		    ln, ppos, "\\}");
1720
		return;
1721
	}
1722
1723
10755
	roffnode_pop(r);
1724
10755
	roffnode_cleanscope(r);
1725
10755
	return;
1726
10755
}
1727
1728
static enum rofferr
1729
roff_block(ROFF_ARGS)
1730
{
1731
	const char	*name, *value;
1732
20486
	char		*call, *cp, *iname, *rname;
1733
	size_t		 csz, namesz, rsz;
1734
10243
	int		 deftype;
1735
1736
	/* Ignore groff compatibility mode for now. */
1737
1738
10243
	if (tok == ROFF_de1)
1739
12
		tok = ROFF_de;
1740
10231
	else if (tok == ROFF_dei1)
1741
		tok = ROFF_dei;
1742
10231
	else if (tok == ROFF_am1)
1743
		tok = ROFF_am;
1744
10231
	else if (tok == ROFF_ami1)
1745
		tok = ROFF_ami;
1746
1747
	/* Parse the macro name argument. */
1748
1749
10243
	cp = buf->buf + pos;
1750
10243
	if (tok == ROFF_ig) {
1751
		iname = NULL;
1752
		namesz = 0;
1753
162
	} else {
1754
		iname = cp;
1755
10081
		namesz = roff_getname(r, &cp, ln, ppos);
1756
10081
		iname[namesz] = '\0';
1757
	}
1758
1759
	/* Resolve the macro name argument if it is indirect. */
1760
1761

20288
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1762
63
		deftype = ROFFDEF_USER;
1763
63
		name = roff_getstrn(r, iname, namesz, &deftype);
1764
63
		if (name == NULL) {
1765
18
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1766
18
			    r->parse, ln, (int)(iname - buf->buf),
1767
18
			    "%.*s", (int)namesz, iname);
1768
			namesz = 0;
1769
18
		} else
1770
45
			namesz = strlen(name);
1771
	} else
1772
		name = iname;
1773
1774
10243
	if (namesz == 0 && tok != ROFF_ig) {
1775
108
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1776
54
		    ln, ppos, roff_name[tok]);
1777
54
		return ROFF_IGN;
1778
	}
1779
1780
10189
	roffnode_push(r, tok, name, ln, ppos);
1781
1782
	/*
1783
	 * At the beginning of a `de' macro, clear the existing string
1784
	 * with the same name, if there is one.  New content will be
1785
	 * appended from roff_block_text() in multiline mode.
1786
	 */
1787
1788
10189
	if (tok == ROFF_de || tok == ROFF_dei) {
1789
9991
		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1790
9991
		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1791
10189
	} else if (tok == ROFF_am || tok == ROFF_ami) {
1792
54
		deftype = ROFFDEF_ANY;
1793
54
		value = roff_getstrn(r, iname, namesz, &deftype);
1794

54
		switch (deftype) {  /* Before appending, ... */
1795
		case ROFFDEF_PRE: /* copy predefined to user-defined. */
1796
			roff_setstrn(&r->strtab, name, namesz,
1797
			    value, strlen(value), 0);
1798
			break;
1799
		case ROFFDEF_REN: /* call original standard macro. */
1800
9
			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1801
9
			    (int)strlen(value), value);
1802
9
			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1803
9
			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1804
9
			free(call);
1805
9
			break;
1806
		case ROFFDEF_STD:  /* rename and call standard macro. */
1807
9
			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1808
9
			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1809
9
			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1810
9
			    (int)rsz, rname);
1811
9
			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1812
9
			free(call);
1813
9
			free(rname);
1814
9
			break;
1815
		default:
1816
			break;
1817
		}
1818
	}
1819
1820
10189
	if (*cp == '\0')
1821
10054
		return ROFF_IGN;
1822
1823
	/* Get the custom end marker. */
1824
1825
	iname = cp;
1826
135
	namesz = roff_getname(r, &cp, ln, ppos);
1827
1828
	/* Resolve the end marker if it is indirect. */
1829
1830

270
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1831
45
		deftype = ROFFDEF_USER;
1832
45
		name = roff_getstrn(r, iname, namesz, &deftype);
1833
45
		if (name == NULL) {
1834
18
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1835
18
			    r->parse, ln, (int)(iname - buf->buf),
1836
18
			    "%.*s", (int)namesz, iname);
1837
			namesz = 0;
1838
18
		} else
1839
27
			namesz = strlen(name);
1840
	} else
1841
		name = iname;
1842
1843
135
	if (namesz)
1844
117
		r->last->end = mandoc_strndup(name, namesz);
1845
1846
135
	if (*cp != '\0')
1847
72
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1848
36
		    ln, pos, ".%s ... %s", roff_name[tok], cp);
1849
1850
135
	return ROFF_IGN;
1851
10243
}
1852
1853
static enum rofferr
1854
roff_block_sub(ROFF_ARGS)
1855
{
1856
	enum roff_tok	t;
1857
	int		i, j;
1858
1859
	/*
1860
	 * First check whether a custom macro exists at this level.  If
1861
	 * it does, then check against it.  This is some of groff's
1862
	 * stranger behaviours.  If we encountered a custom end-scope
1863
	 * tag and that tag also happens to be a "real" macro, then we
1864
	 * need to try interpreting it again as a real macro.  If it's
1865
	 * not, then return ignore.  Else continue.
1866
	 */
1867
1868
27725
	if (r->last->end) {
1869
1026
		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1870
414
			if (buf->buf[i] != r->last->end[j])
1871
				break;
1872
1873

144
		if (r->last->end[j] == '\0' &&
1874
99
		    (buf->buf[i] == '\0' ||
1875
18
		     buf->buf[i] == ' ' ||
1876
		     buf->buf[i] == '\t')) {
1877
99
			roffnode_pop(r);
1878
99
			roffnode_cleanscope(r);
1879
1880

450
			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1881
18
				i++;
1882
1883
99
			pos = i;
1884
99
			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1885
			    TOKEN_NONE)
1886
				return ROFF_RERUN;
1887
99
			return ROFF_IGN;
1888
		}
1889
	}
1890
1891
	/*
1892
	 * If we have no custom end-query or lookup failed, then try
1893
	 * pulling it out of the hashtable.
1894
	 */
1895
1896
27626
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1897
1898
27626
	if (t != ROFF_cblock) {
1899
17554
		if (tok != ROFF_ig)
1900
17194
			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1901
17554
		return ROFF_IGN;
1902
	}
1903
1904
10072
	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1905
27725
}
1906
1907
static enum rofferr
1908
roff_block_text(ROFF_ARGS)
1909
{
1910
1911
1548
	if (tok != ROFF_ig)
1912
414
		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1913
1914
774
	return ROFF_IGN;
1915
}
1916
1917
static enum rofferr
1918
roff_cond_sub(ROFF_ARGS)
1919
{
1920
	enum roff_tok	 t;
1921
	char		*ep;
1922
	int		 rr;
1923
1924
211898
	rr = r->last->rule;
1925
211898
	roffnode_cleanscope(r);
1926
1927
	/*
1928
	 * If `\}' occurs on a macro line without a preceding macro,
1929
	 * drop the line completely.
1930
	 */
1931
1932
211898
	ep = buf->buf + pos;
1933

217341
	if (ep[0] == '\\' && ep[1] == '}')
1934
5443
		rr = 0;
1935
1936
	/* Always check for the closing delimiter `\}'. */
1937
1938
353502
	while ((ep = strchr(ep, '\\')) != NULL) {
1939
141604
		switch (ep[1]) {
1940
		case '}':
1941
10413
			memmove(ep, ep + 2, strlen(ep + 2) + 1);
1942
10413
			roff_ccond(r, ln, ep - buf->buf);
1943
10413
			break;
1944
		case '\0':
1945
			++ep;
1946
			break;
1947
		default:
1948
131191
			ep += 2;
1949
131191
			break;
1950
		}
1951
	}
1952
1953
	/*
1954
	 * Fully handle known macros when they are structurally
1955
	 * required or when the conditional evaluated to true.
1956
	 */
1957
1958
211898
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1959

892426
	return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1960
211898
	    ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1961
	    ? ROFF_CONT : ROFF_IGN;
1962
}
1963
1964
static enum rofferr
1965
roff_cond_text(ROFF_ARGS)
1966
{
1967
	char		*ep;
1968
	int		 rr;
1969
1970
4852
	rr = r->last->rule;
1971
2426
	roffnode_cleanscope(r);
1972
1973
2426
	ep = buf->buf + pos;
1974
5266
	while ((ep = strchr(ep, '\\')) != NULL) {
1975
414
		if (*(++ep) == '}') {
1976
342
			*ep = '&';
1977
342
			roff_ccond(r, ln, ep - buf->buf - 1);
1978
342
		}
1979
414
		if (*ep != '\0')
1980
414
			++ep;
1981
	}
1982
2426
	return rr ? ROFF_CONT : ROFF_IGN;
1983
}
1984
1985
/* --- handling of numeric and conditional expressions -------------------- */
1986
1987
/*
1988
 * Parse a single signed integer number.  Stop at the first non-digit.
1989
 * If there is at least one digit, return success and advance the
1990
 * parse point, else return failure and let the parse point unchanged.
1991
 * Ignore overflows, treat them just like the C language.
1992
 */
1993
static int
1994
roff_getnum(const char *v, int *pos, int *res, int flags)
1995
{
1996
85888
	int	 myres, scaled, n, p;
1997
1998
42944
	if (NULL == res)
1999
252
		res = &myres;
2000
2001
42944
	p = *pos;
2002
42944
	n = v[p] == '-';
2003

85834
	if (n || v[p] == '+')
2004
63
		p++;
2005
2006
42944
	if (flags & ROFFNUM_WHITE)
2007
19559
		while (isspace((unsigned char)v[p]))
2008
9
			p++;
2009
2010
193756
	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2011
53934
		*res = 10 * *res + v[p] - '0';
2012
42944
	if (p == *pos + n)
2013
306
		return 0;
2014
2015
42638
	if (n)
2016
54
		*res = -*res;
2017
2018
	/* Each number may be followed by one optional scaling unit. */
2019
2020


42638
	switch (v[p]) {
2021
	case 'f':
2022
9
		scaled = *res * 65536;
2023
9
		break;
2024
	case 'i':
2025
36
		scaled = *res * 240;
2026
36
		break;
2027
	case 'c':
2028
27
		scaled = *res * 240 / 2.54;
2029
27
		break;
2030
	case 'v':
2031
	case 'P':
2032
36
		scaled = *res * 40;
2033
36
		break;
2034
	case 'm':
2035
	case 'n':
2036
4808
		scaled = *res * 24;
2037
4808
		break;
2038
	case 'p':
2039
9
		scaled = *res * 10 / 3;
2040
9
		break;
2041
	case 'u':
2042
9589
		scaled = *res;
2043
9589
		break;
2044
	case 'M':
2045
9
		scaled = *res * 6 / 25;
2046
9
		break;
2047
	default:
2048
28115
		scaled = *res;
2049
28115
		p--;
2050
28115
		break;
2051
	}
2052
42638
	if (flags & ROFFNUM_SCALE)
2053
42575
		*res = scaled;
2054
2055
42638
	*pos = p + 1;
2056
42638
	return 1;
2057
42944
}
2058
2059
/*
2060
 * Evaluate a string comparison condition.
2061
 * The first character is the delimiter.
2062
 * Succeed if the string up to its second occurrence
2063
 * matches the string up to its third occurence.
2064
 * Advance the cursor after the third occurrence
2065
 * or lacking that, to the end of the line.
2066
 */
2067
static int
2068
roff_evalstrcond(const char *v, int *pos)
2069
{
2070
	const char	*s1, *s2, *s3;
2071
	int		 match;
2072
2073
	match = 0;
2074
180
	s1 = v + *pos;		/* initial delimiter */
2075
90
	s2 = s1 + 1;		/* for scanning the first string */
2076
90
	s3 = strchr(s2, *s1);	/* for scanning the second string */
2077
2078
90
	if (NULL == s3)		/* found no middle delimiter */
2079
		goto out;
2080
2081
405
	while ('\0' != *++s3) {
2082
234
		if (*s2 != *s3) {  /* mismatch */
2083
36
			s3 = strchr(s3, *s1);
2084
36
			break;
2085
		}
2086
198
		if (*s3 == *s1) {  /* found the final delimiter */
2087
			match = 1;
2088
36
			break;
2089
		}
2090
162
		s2++;
2091
	}
2092
2093
out:
2094
90
	if (NULL == s3)
2095
9
		s3 = strchr(s2, '\0');
2096
81
	else if (*s3 != '\0')
2097
72
		s3++;
2098
90
	*pos = s3 - v;
2099
90
	return match;
2100
}
2101
2102
/*
2103
 * Evaluate an optionally negated single character, numerical,
2104
 * or string condition.
2105
 */
2106
static int
2107
roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2108
{
2109
275884
	char	*cp, *name;
2110
	size_t	 sz;
2111
137942
	int	 deftype, number, savepos, istrue, wanttrue;
2112
2113
137942
	if ('!' == v[*pos]) {
2114
		wanttrue = 0;
2115
4870
		(*pos)++;
2116
4870
	} else
2117
		wanttrue = 1;
2118
2119


137942
	switch (v[*pos]) {
2120
	case '\0':
2121
18
		return 0;
2122
	case 'n':
2123
	case 'o':
2124
73645
		(*pos)++;
2125
73645
		return wanttrue;
2126
	case 'c':
2127
	case 'e':
2128
	case 't':
2129
	case 'v':
2130
48749
		(*pos)++;
2131
48749
		return !wanttrue;
2132
	case 'd':
2133
	case 'r':
2134
118
		cp = v + *pos + 1;
2135
416
		while (*cp == ' ')
2136
90
			cp++;
2137
		name = cp;
2138
118
		sz = roff_getname(r, &cp, ln, cp - v);
2139
118
		if (sz == 0)
2140
			istrue = 0;
2141
118
		else if (v[*pos] == 'r')
2142
28
			istrue = roff_hasregn(r, name, sz);
2143
		else {
2144
90
			deftype = ROFFDEF_ANY;
2145
90
		        roff_getstrn(r, name, sz, &deftype);
2146
90
			istrue = !!deftype;
2147
		}
2148
118
		*pos = cp - v;
2149
118
		return istrue == wanttrue;
2150
	default:
2151
		break;
2152
	}
2153
2154
	savepos = *pos;
2155
15412
	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2156
15295
		return (number > 0) == wanttrue;
2157
117
	else if (*pos == savepos)
2158
90
		return roff_evalstrcond(v, pos) == wanttrue;
2159
	else
2160
27
		return 0;
2161
137942
}
2162
2163
static enum rofferr
2164
roff_line_ignore(ROFF_ARGS)
2165
{
2166
2167
98906
	return ROFF_IGN;
2168
}
2169
2170
static enum rofferr
2171
roff_insec(ROFF_ARGS)
2172
{
2173
2174
483
	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2175
161
	    ln, ppos, roff_name[tok]);
2176
161
	return ROFF_IGN;
2177
}
2178
2179
static enum rofferr
2180
roff_unsupp(ROFF_ARGS)
2181
{
2182
2183
	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2184
	    ln, ppos, roff_name[tok]);
2185
	return ROFF_IGN;
2186
}
2187
2188
static enum rofferr
2189
roff_cond(ROFF_ARGS)
2190
{
2191
2192
162025
	roffnode_push(r, tok, NULL, ln, ppos);
2193
2194
	/*
2195
	 * An `.el' has no conditional body: it will consume the value
2196
	 * of the current rstack entry set in prior `ie' calls or
2197
	 * defaults to DENY.
2198
	 *
2199
	 * If we're not an `el', however, then evaluate the conditional.
2200
	 */
2201
2202
461992
	r->last->rule = tok == ROFF_el ?
2203
48139
	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2204
137942
	    roff_evalcond(r, ln, buf->buf, &pos);
2205
2206
	/*
2207
	 * An if-else will put the NEGATION of the current evaluated
2208
	 * conditional into the stack of rules.
2209
	 */
2210
2211
162025
	if (tok == ROFF_ie) {
2212
24056
		if (r->rstackpos + 1 == r->rstacksz) {
2213
2450
			r->rstacksz += 16;
2214
4900
			r->rstack = mandoc_reallocarray(r->rstack,
2215
2450
			    r->rstacksz, sizeof(int));
2216
2450
		}
2217
24056
		r->rstack[++r->rstackpos] = !r->last->rule;
2218
24056
	}
2219
2220
	/* If the parent has false as its rule, then so do we. */
2221
2222

170390
	if (r->last->parent && !r->last->parent->rule)
2223
2459
		r->last->rule = 0;
2224
2225
	/*
2226
	 * Determine scope.
2227
	 * If there is nothing on the line after the conditional,
2228
	 * not even whitespace, use next-line scope.
2229
	 */
2230
2231
162025
	if (buf->buf[pos] == '\0') {
2232
		r->last->endspan = 2;
2233
90
		goto out;
2234
	}
2235
2236
437349
	while (buf->buf[pos] == ' ')
2237
137707
		pos++;
2238
2239
	/* An opening brace requests multiline scope. */
2240
2241

172714
	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2242
10773
		r->last->endspan = -1;
2243
10773
		pos += 2;
2244
21690
		while (buf->buf[pos] == ' ')
2245
72
			pos++;
2246
		goto out;
2247
	}
2248
2249
	/*
2250
	 * Anything else following the conditional causes
2251
	 * single-line scope.  Warn if the scope contains
2252
	 * nothing but trailing whitespace.
2253
	 */
2254
2255
151162
	if (buf->buf[pos] == '\0')
2256
72
		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2257
36
		    ln, ppos, roff_name[tok]);
2258
2259
151162
	r->last->endspan = 1;
2260
2261
out:
2262
313277
	*offs = pos;
2263
162025
	return ROFF_RERUN;
2264
}
2265
2266
static enum rofferr
2267
roff_ds(ROFF_ARGS)
2268
{
2269
39856
	char		*string;
2270
	const char	*name;
2271
	size_t		 namesz;
2272
2273
	/* Ignore groff compatibility mode for now. */
2274
2275
19928
	if (tok == ROFF_ds1)
2276
		tok = ROFF_ds;
2277
19928
	else if (tok == ROFF_as1)
2278
		tok = ROFF_as;
2279
2280
	/*
2281
	 * The first word is the name of the string.
2282
	 * If it is empty or terminated by an escape sequence,
2283
	 * abort the `ds' request without defining anything.
2284
	 */
2285
2286
19928
	name = string = buf->buf + pos;
2287
19928
	if (*name == '\0')
2288
		return ROFF_IGN;
2289
2290
19928
	namesz = roff_getname(r, &string, ln, pos);
2291
19928
	if (name[namesz] == '\\')
2292
45
		return ROFF_IGN;
2293
2294
	/* Read past the initial double-quote, if any. */
2295
19883
	if (*string == '"')
2296
9634
		string++;
2297
2298
	/* The rest is the value. */
2299
39766
	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2300
19883
	    ROFF_as == tok);
2301
19883
	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2302
19883
	return ROFF_IGN;
2303
19928
}
2304
2305
/*
2306
 * Parse a single operator, one or two characters long.
2307
 * If the operator is recognized, return success and advance the
2308
 * parse point, else return failure and let the parse point unchanged.
2309
 */
2310
static int
2311
roff_getop(const char *v, int *pos, char *res)
2312
{
2313
2314
116821
	*res = v[*pos];
2315
2316


64439
	switch (*res) {
2317
	case '+':
2318
	case '-':
2319
	case '*':
2320
	case '/':
2321
	case '%':
2322
	case '&':
2323
	case ':':
2324
		break;
2325
	case '<':
2326

135
		switch (v[*pos + 1]) {
2327
		case '=':
2328
27
			*res = 'l';
2329
27
			(*pos)++;
2330
27
			break;
2331
		case '>':
2332
			*res = '!';
2333
			(*pos)++;
2334
			break;
2335
		case '?':
2336
18
			*res = 'i';
2337
18
			(*pos)++;
2338
18
			break;
2339
		default:
2340
			break;
2341
		}
2342
		break;
2343
	case '>':
2344
3048
		switch (v[*pos + 1]) {
2345
		case '=':
2346
27
			*res = 'g';
2347
27
			(*pos)++;
2348
27
			break;
2349
		case '?':
2350
18
			*res = 'a';
2351
18
			(*pos)++;
2352
18
			break;
2353
		default:
2354
			break;
2355
		}
2356
		break;
2357
	case '=':
2358
12057
		if ('=' == v[*pos + 1])
2359
2441
			(*pos)++;
2360
		break;
2361
	default:
2362
31019
		return 0;
2363
	}
2364
21363
	(*pos)++;
2365
2366
21363
	return *res;
2367
52382
}
2368
2369
/*
2370
 * Evaluate either a parenthesized numeric expression
2371
 * or a single signed integer number.
2372
 */
2373
static int
2374
roff_evalpar(struct roff *r, int ln,
2375
	const char *v, int *pos, int *res, int flags)
2376
{
2377
2378
105538
	if ('(' != v[*pos])
2379
42944
		return roff_getnum(v, pos, res, flags);
2380
2381
9825
	(*pos)++;
2382
9825
	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2383
63
		return 0;
2384
2385
	/*
2386
	 * Omission of the closing parenthesis
2387
	 * is an error in validation mode,
2388
	 * but ignored in evaluation mode.
2389
	 */
2390
2391
9762
	if (')' == v[*pos])
2392
9717
		(*pos)++;
2393
45
	else if (NULL == res)
2394
18
		return 0;
2395
2396
9744
	return 1;
2397
52769
}
2398
2399
/*
2400
 * Evaluate a complete numeric expression.
2401
 * Proceed left to right, there is no concept of precedence.
2402
 */
2403
static int
2404
roff_evalnum(struct roff *r, int ln, const char *v,
2405
	int *pos, int *res, int flags)
2406
{
2407
62812
	int		 mypos, operand2;
2408
31406
	char		 operator;
2409
2410
31406
	if (NULL == pos) {
2411
5827
		mypos = 0;
2412
		pos = &mypos;
2413
5827
	}
2414
2415
31406
	if (flags & ROFFNUM_WHITE)
2416
9879
		while (isspace((unsigned char)v[*pos]))
2417
27
			(*pos)++;
2418
2419
31406
	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2420
297
		return 0;
2421
2422
	while (1) {
2423
52382
		if (flags & ROFFNUM_WHITE)
2424
19578
			while (isspace((unsigned char)v[*pos]))
2425
36
				(*pos)++;
2426
2427
52382
		if ( ! roff_getop(v, pos, &operator))
2428
			break;
2429
2430
21363
		if (flags & ROFFNUM_WHITE)
2431
9780
			while (isspace((unsigned char)v[*pos]))
2432
18
				(*pos)++;
2433
2434
21363
		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2435
90
			return 0;
2436
2437
21273
		if (flags & ROFFNUM_WHITE)
2438
9780
			while (isspace((unsigned char)v[*pos]))
2439
18
				(*pos)++;
2440
2441
21273
		if (NULL == res)
2442
			continue;
2443
2444




21111
		switch (operator) {
2445
		case '+':
2446
153
			*res += operand2;
2447
153
			break;
2448
		case '-':
2449
432
			*res -= operand2;
2450
432
			break;
2451
		case '*':
2452
459
			*res *= operand2;
2453
459
			break;
2454
		case '/':
2455
36
			if (operand2 == 0) {
2456
18
				mandoc_msg(MANDOCERR_DIVZERO,
2457
18
					r->parse, ln, *pos, v);
2458
18
				*res = 0;
2459
18
				break;
2460
			}
2461
18
			*res /= operand2;
2462
18
			break;
2463
		case '%':
2464
27
			if (operand2 == 0) {
2465
18
				mandoc_msg(MANDOCERR_DIVZERO,
2466
18
					r->parse, ln, *pos, v);
2467
18
				*res = 0;
2468
18
				break;
2469
			}
2470
9
			*res %= operand2;
2471
9
			break;
2472
		case '<':
2473
45
			*res = *res < operand2;
2474
45
			break;
2475
		case '>':
2476
2958
			*res = *res > operand2;
2477
2958
			break;
2478
		case 'l':
2479
27
			*res = *res <= operand2;
2480
27
			break;
2481
		case 'g':
2482
27
			*res = *res >= operand2;
2483
27
			break;
2484
		case '=':
2485
12048
			*res = *res == operand2;
2486
12048
			break;
2487
		case '!':
2488
			*res = *res != operand2;
2489
			break;
2490
		case '&':
2491
4826
			*res = *res && operand2;
2492
4826
			break;
2493
		case ':':
2494
37
			*res = *res || operand2;
2495
37
			break;
2496
		case 'i':
2497
18
			if (operand2 < *res)
2498
9
				*res = operand2;
2499
			break;
2500
		case 'a':
2501
18
			if (operand2 > *res)
2502
9
				*res = operand2;
2503
			break;
2504
		default:
2505
			abort();
2506
		}
2507
	}
2508
31019
	return 1;
2509
31406
}
2510
2511
/* --- register management ------------------------------------------------ */
2512
2513
void
2514
roff_setreg(struct roff *r, const char *name, int val, char sign)
2515
{
2516
	struct roffreg	*reg;
2517
2518
	/* Search for an existing register with the same name. */
2519
572944
	reg = r->regtab;
2520
2521

1154734
	while (reg && strcmp(name, reg->key.p))
2522
7417
		reg = reg->next;
2523
2524
286472
	if (NULL == reg) {
2525
		/* Create a new register. */
2526
20822
		reg = mandoc_malloc(sizeof(struct roffreg));
2527
20822
		reg->key.p = mandoc_strdup(name);
2528
20822
		reg->key.sz = strlen(name);
2529
20822
		reg->val = 0;
2530
20822
		reg->next = r->regtab;
2531
20822
		r->regtab = reg;
2532
20822
	}
2533
2534
286472
	if ('+' == sign)
2535
9729
		reg->val += val;
2536
276743
	else if ('-' == sign)
2537
9639
		reg->val -= val;
2538
	else
2539
		reg->val = val;
2540
286472
}
2541
2542
/*
2543
 * Handle some predefined read-only number registers.
2544
 * For now, return -1 if the requested register is not predefined;
2545
 * in case a predefined read-only register having the value -1
2546
 * were to turn up, another special value would have to be chosen.
2547
 */
2548
static int
2549
roff_getregro(const struct roff *r, const char *name)
2550
{
2551
2552


14566
	switch (*name) {
2553
	case '$':  /* Number of arguments of the last macro evaluated. */
2554
27
		return r->argc;
2555
	case 'A':  /* ASCII approximation mode is always off. */
2556
9
		return 0;
2557
	case 'g':  /* Groff compatibility mode is always on. */
2558
2397
		return 1;
2559
	case 'H':  /* Fixed horizontal resolution. */
2560
4811
		return 24;
2561
	case 'j':  /* Always adjust left margin only. */
2562
9
		return 0;
2563
	case 'T':  /* Some output device is always defined. */
2564
9
		return 1;
2565
	case 'V':  /* Fixed vertical resolution. */
2566
21
		return 40;
2567
	default:
2568
		return -1;
2569
	}
2570
7283
}
2571
2572
int
2573
roff_getreg(const struct roff *r, const char *name)
2574
{
2575
	struct roffreg	*reg;
2576
	int		 val;
2577
2578

3204664
	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2579
		val = roff_getregro(r, name + 1);
2580
		if (-1 != val)
2581
			return val;
2582
	}
2583
2584
3214218
	for (reg = r->regtab; reg; reg = reg->next)
2585
1536935
		if (0 == strcmp(name, reg->key.p))
2586
1532158
			return reg->val;
2587
2588
70174
	return 0;
2589
1602332
}
2590
2591
static int
2592
roff_getregn(const struct roff *r, const char *name, size_t len)
2593
{
2594
	struct roffreg	*reg;
2595
	int		 val;
2596
2597
36130
	if ('.' == name[0] && 2 == len) {
2598
7283
		val = roff_getregro(r, name + 1);
2599
7283
		if (-1 != val)
2600
7283
			return val;
2601
	}
2602
2603
36694
	for (reg = r->regtab; reg; reg = reg->next)
2604

25228
		if (len == reg->key.sz &&
2605
9421
		    0 == strncmp(name, reg->key.p, len))
2606
8242
			return reg->val;
2607
2608
2540
	return 0;
2609
18065
}
2610
2611
static int
2612
roff_hasregn(const struct roff *r, const char *name, size_t len)
2613
{
2614
	struct roffreg	*reg;
2615
	int		 val;
2616
2617
56
	if ('.' == name[0] && 2 == len) {
2618
		val = roff_getregro(r, name + 1);
2619
		if (-1 != val)
2620
			return 1;
2621
	}
2622
2623
58
	for (reg = r->regtab; reg; reg = reg->next)
2624

37
		if (len == reg->key.sz &&
2625
18
		    0 == strncmp(name, reg->key.p, len))
2626
18
			return 1;
2627
2628
10
	return 0;
2629
28
}
2630
2631
static void
2632
roff_freereg(struct roffreg *reg)
2633
{
2634
	struct roffreg	*old_reg;
2635
2636
115073
	while (NULL != reg) {
2637
20758
		free(reg->key.p);
2638
		old_reg = reg;
2639
20758
		reg = reg->next;
2640
20758
		free(old_reg);
2641
	}
2642
24519
}
2643
2644
static enum rofferr
2645
roff_nr(ROFF_ARGS)
2646
{
2647
11690
	char		*key, *val;
2648
	size_t		 keysz;
2649
5845
	int		 iv;
2650
	char		 sign;
2651
2652
5845
	key = val = buf->buf + pos;
2653
5845
	if (*key == '\0')
2654
		return ROFF_IGN;
2655
2656
5845
	keysz = roff_getname(r, &val, ln, pos);
2657
5845
	if (key[keysz] == '\\')
2658
18
		return ROFF_IGN;
2659
5827
	key[keysz] = '\0';
2660
2661
5827
	sign = *val;
2662

11162
	if (sign == '+' || sign == '-')
2663
984
		val++;
2664
2665
5827
	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2666
5782
		roff_setreg(r, key, iv, sign);
2667
2668
5827
	return ROFF_IGN;
2669
5845
}
2670
2671
static enum rofferr
2672
roff_rr(ROFF_ARGS)
2673
{
2674
	struct roffreg	*reg, **prev;
2675
128
	char		*name, *cp;
2676
	size_t		 namesz;
2677
2678
64
	name = cp = buf->buf + pos;
2679
64
	if (*name == '\0')
2680
		return ROFF_IGN;
2681
64
	namesz = roff_getname(r, &cp, ln, pos);
2682
64
	name[namesz] = '\0';
2683
2684
64
	prev = &r->regtab;
2685
154
	while (1) {
2686
154
		reg = *prev;
2687

308
		if (reg == NULL || !strcmp(name, reg->key.p))
2688
			break;
2689
90
		prev = &reg->next;
2690
	}
2691
64
	if (reg != NULL) {
2692
64
		*prev = reg->next;
2693
64
		free(reg->key.p);
2694
64
		free(reg);
2695
64
	}
2696
64
	return ROFF_IGN;
2697
64
}
2698
2699
/* --- handler functions for roff requests -------------------------------- */
2700
2701
static enum rofferr
2702
roff_rm(ROFF_ARGS)
2703
{
2704
	const char	 *name;
2705
150
	char		 *cp;
2706
	size_t		  namesz;
2707
2708
75
	cp = buf->buf + pos;
2709
330
	while (*cp != '\0') {
2710
		name = cp;
2711
198
		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2712
198
		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2713
198
		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2714
198
		if (name[namesz] == '\\')
2715
			break;
2716
	}
2717
75
	return ROFF_IGN;
2718
75
}
2719
2720
static enum rofferr
2721
roff_it(ROFF_ARGS)
2722
{
2723
90
	int		 iv;
2724
2725
	/* Parse the number of lines. */
2726
2727
90
	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2728
72
		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2729
36
		    ln, ppos, buf->buf + 1);
2730
36
		return ROFF_IGN;
2731
	}
2732
2733
108
	while (isspace((unsigned char)buf->buf[pos]))
2734
27
		pos++;
2735
2736
	/*
2737
	 * Arm the input line trap.
2738
	 * Special-casing "an-trap" is an ugly workaround to cope
2739
	 * with DocBook stupidly fiddling with man(7) internals.
2740
	 */
2741
2742
54
	roffit_lines = iv;
2743

198
	roffit_macro = mandoc_strdup(iv != 1 ||
2744
36
	    strcmp(buf->buf + pos, "an-trap") ?
2745
54
	    buf->buf + pos : "br");
2746
54
	return ROFF_IGN;
2747
90
}
2748
2749
static enum rofferr
2750
roff_Dd(ROFF_ARGS)
2751
{
2752
	int		 mask;
2753
	enum roff_tok	 t, te;
2754
2755
48782
	switch (tok) {
2756
	case ROFF_Dd:
2757
		tok = MDOC_Dd;
2758
		te = MDOC_MAX;
2759
17322
		if (r->format == 0)
2760
17016
			r->format = MPARSE_MDOC;
2761
		mask = MPARSE_MDOC | MPARSE_QUICK;
2762
17322
		break;
2763
	case ROFF_TH:
2764
		tok = MAN_TH;
2765
		te = MAN_MAX;
2766
7069
		if (r->format == 0)
2767
5161
			r->format = MPARSE_MAN;
2768
		mask = MPARSE_QUICK;
2769
7069
		break;
2770
	default:
2771
		abort();
2772
	}
2773
24391
	if ((r->options & mask) == 0)
2774
4640978
		for (t = tok; t < te; t++)
2775
2296404
			roff_setstr(r, roff_name[t], NULL, 0);
2776
24391
	return ROFF_CONT;
2777
}
2778
2779
static enum rofferr
2780
roff_TE(ROFF_ARGS)
2781
{
2782
13838
	if (r->tbl == NULL) {
2783
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2784
		    ln, ppos, "TE");
2785
		return ROFF_IGN;
2786
	}
2787
6919
	if (tbl_end(r->tbl) == 0) {
2788
		r->tbl = NULL;
2789
18
		free(buf->buf);
2790
18
		buf->buf = mandoc_strdup(".sp");
2791
18
		buf->sz = 4;
2792
18
		return ROFF_REPARSE;
2793
	}
2794
	r->tbl = NULL;
2795
6901
	return ROFF_IGN;
2796
6919
}
2797
2798
static enum rofferr
2799
roff_T_(ROFF_ARGS)
2800
{
2801
2802
36
	if (NULL == r->tbl)
2803
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2804
		    ln, ppos, "T&");
2805
	else
2806
18
		tbl_restart(ln, ppos, r->tbl);
2807
2808
18
	return ROFF_IGN;
2809
}
2810
2811
/*
2812
 * Handle in-line equation delimiters.
2813
 */
2814
static enum rofferr
2815
roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2816
{
2817
588
	char		*cp1, *cp2;
2818
	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2819
2820
	/*
2821
	 * Outside equations, look for an opening delimiter.
2822
	 * If we are inside an equation, we already know it is
2823
	 * in-line, or this function wouldn't have been called;
2824
	 * so look for a closing delimiter.
2825
	 */
2826
2827
294
	cp1 = buf->buf + pos;
2828
882
	cp2 = strchr(cp1, r->eqn == NULL ?
2829
294
	    r->last_eqn->odelim : r->last_eqn->cdelim);
2830
294
	if (cp2 == NULL)
2831
258
		return ROFF_CONT;
2832
2833
36
	*cp2++ = '\0';
2834
	bef_pr = bef_nl = aft_nl = aft_pr = "";
2835
2836
	/* Handle preceding text, protecting whitespace. */
2837
2838
36
	if (*buf->buf != '\0') {
2839
36
		if (r->eqn == NULL)
2840
18
			bef_pr = "\\&";
2841
		bef_nl = "\n";
2842
36
	}
2843
2844
	/*
2845
	 * Prepare replacing the delimiter with an equation macro
2846
	 * and drop leading white space from the equation.
2847
	 */
2848
2849
36
	if (r->eqn == NULL) {
2850
18
		while (*cp2 == ' ')
2851
			cp2++;
2852
		mac = ".EQ";
2853
18
	} else
2854
		mac = ".EN";
2855
2856
	/* Handle following text, protecting whitespace. */
2857
2858
36
	if (*cp2 != '\0') {
2859
		aft_nl = "\n";
2860
36
		if (r->eqn != NULL)
2861
18
			aft_pr = "\\&";
2862
	}
2863
2864
	/* Do the actual replacement. */
2865
2866
72
	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2867
36
	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2868
36
	free(buf->buf);
2869
36
	buf->buf = cp1;
2870
2871
	/* Toggle the in-line state of the eqn subsystem. */
2872
2873
36
	r->eqn_inline = r->eqn == NULL;
2874
36
	return ROFF_REPARSE;
2875
294
}
2876
2877
static enum rofferr
2878
roff_EQ(ROFF_ARGS)
2879
{
2880
	struct roff_node	*n;
2881
2882
1110
	if (r->man->macroset == MACROSET_MAN)
2883
		man_breakscope(r->man, ROFF_EQ);
2884
555
	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2885
555
	if (ln > r->man->last->line)
2886
537
		n->flags |= NODE_LINE;
2887
555
	n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2888
555
	n->eqn->expectargs = UINT_MAX;
2889
555
	roff_node_append(r->man, n);
2890
555
	r->man->next = ROFF_NEXT_SIBLING;
2891
2892
555
	assert(r->eqn == NULL);
2893
555
	if (r->last_eqn == NULL)
2894
390
		r->last_eqn = eqn_alloc(r->parse);
2895
	else
2896
165
		eqn_reset(r->last_eqn);
2897
555
	r->eqn = r->last_eqn;
2898
555
	r->eqn->node = n;
2899
2900
555
	if (buf->buf[pos] != '\0')
2901
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2902
		    ".EQ %s", buf->buf + pos);
2903
2904
555
	return ROFF_IGN;
2905
}
2906
2907
static enum rofferr
2908
roff_EN(ROFF_ARGS)
2909
{
2910
1110
	if (r->eqn != NULL) {
2911
555
		eqn_parse(r->eqn);
2912
555
		r->eqn = NULL;
2913
555
	} else
2914
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2915
555
	if (buf->buf[pos] != '\0')
2916
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2917
		    "EN %s", buf->buf + pos);
2918
555
	return ROFF_IGN;
2919
}
2920
2921
static enum rofferr
2922
roff_TS(ROFF_ARGS)
2923
{
2924
13838
	if (r->tbl != NULL) {
2925
		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2926
		    ln, ppos, "TS breaks TS");
2927
		tbl_end(r->tbl);
2928
	}
2929
6919
	r->tbl = tbl_alloc(ppos, ln, r->parse);
2930
6919
	if (r->last_tbl)
2931
6489
		r->last_tbl->next = r->tbl;
2932
	else
2933
430
		r->first_tbl = r->tbl;
2934
6919
	r->last_tbl = r->tbl;
2935
6919
	return ROFF_IGN;
2936
}
2937
2938
static enum rofferr
2939
roff_onearg(ROFF_ARGS)
2940
{
2941
	struct roff_node	*n;
2942
	char			*cp;
2943
253036
	int			 npos;
2944
2945
126518
	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2946
	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
2947
	     tok == ROFF_ti))
2948
		man_breakscope(r->man, tok);
2949
2950

126518
	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2951
		r->man->last = roffce_node;
2952
		r->man->next = ROFF_NEXT_SIBLING;
2953
	}
2954
2955
126518
	roff_elem_alloc(r->man, ln, ppos, tok);
2956
126518
	n = r->man->last;
2957
2958
126518
	cp = buf->buf + pos;
2959
126518
	if (*cp != '\0') {
2960

404481
		while (*cp != '\0' && *cp != ' ')
2961
110633
			cp++;
2962
72690
		while (*cp == ' ')
2963
108
			*cp++ = '\0';
2964
72474
		if (*cp != '\0')
2965
108
			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2966
108
			    r->parse, ln, cp - buf->buf,
2967
108
			    "%s ... %s", roff_name[tok], cp);
2968
72474
		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2969
72474
	}
2970
2971
126518
	if (tok == ROFF_ce || tok == ROFF_rj) {
2972
		if (r->man->last->type == ROFFT_ELEM) {
2973
			roff_word_alloc(r->man, ln, pos, "1");
2974
			r->man->last->flags |= NODE_NOSRC;
2975
		}
2976
		npos = 0;
2977
		if (roff_evalnum(r, ln, r->man->last->string, &npos,
2978
		    &roffce_lines, 0) == 0) {
2979
			mandoc_vmsg(MANDOCERR_CE_NONUM,
2980
			    r->parse, ln, pos, "ce %s", buf->buf + pos);
2981
			roffce_lines = 1;
2982
		}
2983
		if (roffce_lines < 1) {
2984
			r->man->last = r->man->last->parent;
2985
			roffce_node = NULL;
2986
			roffce_lines = 0;
2987
		} else
2988
			roffce_node = r->man->last->parent;
2989
	} else {
2990
126518
		n->flags |= NODE_VALID | NODE_ENDED;
2991
126518
		r->man->last = n;
2992
	}
2993
126518
	n->flags |= NODE_LINE;
2994
126518
	r->man->next = ROFF_NEXT_SIBLING;
2995
126518
	return ROFF_IGN;
2996
126518
}
2997
2998
static enum rofferr
2999
roff_manyarg(ROFF_ARGS)
3000
{
3001
	struct roff_node	*n;
3002
	char			*sp, *ep;
3003
3004
196
	roff_elem_alloc(r->man, ln, ppos, tok);
3005
98
	n = r->man->last;
3006
3007
616
	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3008

1814
		while (*ep != '\0' && *ep != ' ')
3009
491
			ep++;
3010
472
		while (*ep == ' ')
3011
131
			*ep++ = '\0';
3012
210
		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3013
	}
3014
3015
98
	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3016
98
	r->man->last = n;
3017
98
	r->man->next = ROFF_NEXT_SIBLING;
3018
98
	return ROFF_IGN;
3019
}
3020
3021
static enum rofferr
3022
roff_als(ROFF_ARGS)
3023
{
3024
	char		*oldn, *newn, *end, *value;
3025
	size_t		 oldsz, newsz, valsz;
3026
3027
	newn = oldn = buf->buf + pos;
3028
	if (*newn == '\0')
3029
		return ROFF_IGN;
3030
3031
	newsz = roff_getname(r, &oldn, ln, pos);
3032
	if (newn[newsz] == '\\' || *oldn == '\0')
3033
		return ROFF_IGN;
3034
3035
	end = oldn;
3036
	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3037
	if (oldsz == 0)
3038
		return ROFF_IGN;
3039
3040
	valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3041
	    (int)oldsz, oldn);
3042
	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3043
	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3044
	free(value);
3045
	return ROFF_IGN;
3046
}
3047
3048
static enum rofferr
3049
roff_br(ROFF_ARGS)
3050
{
3051
25162
	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3052
		man_breakscope(r->man, ROFF_br);
3053
12581
	roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3054
12581
	if (buf->buf[pos] != '\0')
3055
108
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3056
54
		    "%s %s", roff_name[tok], buf->buf + pos);
3057
12581
	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3058
12581
	r->man->next = ROFF_NEXT_SIBLING;
3059
12581
	return ROFF_IGN;
3060
}
3061
3062
static enum rofferr
3063
roff_cc(ROFF_ARGS)
3064
{
3065
	const char	*p;
3066
3067
108
	p = buf->buf + pos;
3068
3069

90
	if (*p == '\0' || (r->control = *p++) == '.')
3070
18
		r->control = '\0';
3071
3072
54
	if (*p != '\0')
3073
36
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3074
18
		    ln, p - buf->buf, "cc ... %s", p);
3075
3076
54
	return ROFF_IGN;
3077
}
3078
3079
static enum rofferr
3080
roff_ec(ROFF_ARGS)
3081
{
3082
	const char	*p;
3083
3084
	p = buf->buf + pos;
3085
	if (*p == '\0')
3086
		r->escape = '\\';
3087
	else {
3088
		r->escape = *p;
3089
		if (*++p != '\0')
3090
			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3091
			    ln, p - buf->buf, "ec ... %s", p);
3092
	}
3093
	return ROFF_IGN;
3094
}
3095
3096
static enum rofferr
3097
roff_eo(ROFF_ARGS)
3098
{
3099
	r->escape = '\0';
3100
	if (buf->buf[pos] != '\0')
3101
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3102
		    ln, pos, "eo %s", buf->buf + pos);
3103
	return ROFF_IGN;
3104
}
3105
3106
static enum rofferr
3107
roff_tr(ROFF_ARGS)
3108
{
3109
4934
	const char	*p, *first, *second;
3110
	size_t		 fsz, ssz;
3111
	enum mandoc_esc	 esc;
3112
3113
2467
	p = buf->buf + pos;
3114
3115
2467
	if (*p == '\0') {
3116
18
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3117
18
		return ROFF_IGN;
3118
	}
3119
3120
5000
	while (*p != '\0') {
3121
		fsz = ssz = 1;
3122
3123
2473
		first = p++;
3124
2473
		if (*first == '\\') {
3125
2395
			esc = mandoc_escape(&p, NULL, NULL);
3126
2395
			if (esc == ESCAPE_ERROR) {
3127
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3128
				    ln, (int)(p - buf->buf), first);
3129
				return ROFF_IGN;
3130
			}
3131
2395
			fsz = (size_t)(p - first);
3132
2395
		}
3133
3134
2473
		second = p++;
3135
2473
		if (*second == '\\') {
3136
6
			esc = mandoc_escape(&p, NULL, NULL);
3137
6
			if (esc == ESCAPE_ERROR) {
3138
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3139
				    ln, (int)(p - buf->buf), second);
3140
				return ROFF_IGN;
3141
			}
3142
6
			ssz = (size_t)(p - second);
3143
2473
		} else if (*second == '\0') {
3144
72
			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3145
36
			    ln, first - buf->buf, "tr %s", first);
3146
			second = " ";
3147
36
			p--;
3148
36
		}
3149
3150
2473
		if (fsz > 1) {
3151
2395
			roff_setstrn(&r->xmbtab, first, fsz,
3152
			    second, ssz, 0);
3153
2395
			continue;
3154
		}
3155
3156
78
		if (r->xtab == NULL)
3157
24
			r->xtab = mandoc_calloc(128,
3158
			    sizeof(struct roffstr));
3159
3160
78
		free(r->xtab[(int)*first].p);
3161
78
		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3162
78
		r->xtab[(int)*first].sz = ssz;
3163
	}
3164
3165
2449
	return ROFF_IGN;
3166
2467
}
3167
3168
static enum rofferr
3169
roff_rn(ROFF_ARGS)
3170
{
3171
	const char	*value;
3172
72
	char		*oldn, *newn, *end;
3173
	size_t		 oldsz, newsz;
3174
36
	int		 deftype;
3175
3176
36
	oldn = newn = buf->buf + pos;
3177
36
	if (*oldn == '\0')
3178
		return ROFF_IGN;
3179
3180
36
	oldsz = roff_getname(r, &newn, ln, pos);
3181

72
	if (oldn[oldsz] == '\\' || *newn == '\0')
3182
		return ROFF_IGN;
3183
3184
36
	end = newn;
3185
36
	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3186
36
	if (newsz == 0)
3187
		return ROFF_IGN;
3188
3189
36
	deftype = ROFFDEF_ANY;
3190
36
	value = roff_getstrn(r, oldn, oldsz, &deftype);
3191

36
	switch (deftype) {
3192
	case ROFFDEF_USER:
3193
		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3194
		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3195
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3196
		break;
3197
	case ROFFDEF_PRE:
3198
		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3199
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3200
		break;
3201
	case ROFFDEF_REN:
3202
		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3203
		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3204
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3205
		break;
3206
	case ROFFDEF_STD:
3207
36
		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3208
36
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3209
36
		break;
3210
	default:
3211
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3212
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3213
		break;
3214
	}
3215
36
	return ROFF_IGN;
3216
36
}
3217
3218
static enum rofferr
3219
roff_so(ROFF_ARGS)
3220
{
3221
188
	char *name, *cp;
3222
3223
94
	name = buf->buf + pos;
3224
94
	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3225
3226
	/*
3227
	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3228
	 * opening anything that's not in our cwd or anything beneath
3229
	 * it.  Thus, explicitly disallow traversing up the file-system
3230
	 * or using absolute paths.
3231
	 */
3232
3233

282
	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3234
		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3235
		    ".so %s", name);
3236
		buf->sz = mandoc_asprintf(&cp,
3237
		    ".sp\nSee the file %s.\n.sp", name) + 1;
3238
		free(buf->buf);
3239
		buf->buf = cp;
3240
		*offs = 0;
3241
		return ROFF_REPARSE;
3242
	}
3243
3244
94
	*offs = pos;
3245
94
	return ROFF_SO;
3246
94
}
3247
3248
/* --- user defined strings and macros ------------------------------------ */
3249
3250
static enum rofferr
3251
roff_userdef(ROFF_ARGS)
3252
{
3253
267373
	const char	 *arg[16], *ap;
3254
267373
	char		 *cp, *n1, *n2;
3255
	int		  expand_count, i, ib, ie;
3256
	size_t		  asz, rsz;
3257
3258
	/*
3259
	 * Collect pointers to macro argument strings
3260
	 * and NUL-terminate them.
3261
	 */
3262
3263
267373
	r->argc = 0;
3264
267373
	cp = buf->buf + pos;
3265
9090682
	for (i = 0; i < 16; i++) {
3266
4277968
		if (*cp == '\0')
3267
3981780
			arg[i] = "";
3268
		else {
3269
296188
			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3270
296188
			r->argc = i + 1;
3271
		}
3272
	}
3273
3274
	/*
3275
	 * Expand macro arguments.
3276
	 */
3277
3278
267373
	buf->sz = strlen(r->current_string) + 1;
3279
267373
	n1 = n2 = cp = mandoc_malloc(buf->sz);
3280
267373
	memcpy(n1, r->current_string, buf->sz);
3281
	expand_count = 0;
3282
3136896
	while (*cp != '\0') {
3283
3284
		/* Scan ahead for the next argument invocation. */
3285
3286
2602168
		if (*cp++ != '\\')
3287
			continue;
3288
60842
		if (*cp++ != '$')
3289
			continue;
3290
56366
		if (*cp == '*') {  /* \\$* inserts all arguments */
3291
			ib = 0;
3292
45
			ie = r->argc - 1;
3293
45
		} else {  /* \\$1 .. \\$9 insert one argument */
3294
56321
			ib = ie = *cp - '1';
3295
56321
			if (ib < 0 || ib > 8)
3296
				continue;
3297
		}
3298
56366
		cp -= 2;
3299
3300
		/*
3301
		 * Prevent infinite recursion.
3302
		 */
3303
3304
56366
		if (cp >= n2)
3305
38348
			expand_count = 1;
3306
18018
		else if (++expand_count > EXPAND_LIMIT) {
3307
36
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3308
18
			    ln, (int)(cp - n1), NULL);
3309
18
			free(buf->buf);
3310
18
			buf->buf = n1;
3311
18
			return ROFF_IGN;
3312
		}
3313
3314
		/*
3315
		 * Determine the size of the expanded argument,
3316
		 * taking escaping of quotes into account.
3317
		 */
3318
3319
112705
		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3320
225374
		for (i = ib; i <= ie; i++) {
3321
315042
			for (ap = arg[i]; *ap != '\0'; ap++) {
3322
101182
				asz++;
3323
101182
				if (*ap == '"')
3324
90
					asz += 3;
3325
			}
3326
		}
3327
56348
		if (asz != 3) {
3328
3329
			/*
3330
			 * Determine the size of the rest of the
3331
			 * unexpanded macro, including the NUL.
3332
			 */
3333
3334
37397
			rsz = buf->sz - (cp - n1) - 3;
3335
3336
			/*
3337
			 * When shrinking, move before
3338
			 * releasing the storage.
3339
			 */
3340
3341
37397
			if (asz < 3)
3342
36737
				memmove(cp + asz, cp + 3, rsz);
3343
3344
			/*
3345
			 * Resize the storage for the macro
3346
			 * and readjust the parse pointer.
3347
			 */
3348
3349
37397
			buf->sz += asz - 3;
3350
37397
			n2 = mandoc_realloc(n1, buf->sz);
3351
37397
			cp = n2 + (cp - n1);
3352
			n1 = n2;
3353
3354
			/*
3355
			 * When growing, make room
3356
			 * for the expanded argument.
3357
			 */
3358
3359
37397
			if (asz > 3)
3360
660
				memmove(cp + asz, cp + 3, rsz);
3361
		}
3362
3363
		/* Copy the expanded argument, escaping quotes. */
3364
3365
56348
		n2 = cp;
3366
225374
		for (i = ib; i <= ie; i++) {
3367
315042
			for (ap = arg[i]; *ap != '\0'; ap++) {
3368
101182
				if (*ap == '"') {
3369
90
					memcpy(n2, "\\(dq", 4);
3370
90
					n2 += 4;
3371
90
				} else
3372
101092
					*n2++ = *ap;
3373
			}
3374
56339
			if (i < ie)
3375
9
				*n2++ = ' ';
3376
		}
3377
	}
3378
3379
	/*
3380
	 * Replace the macro invocation
3381
	 * by the expanded macro.
3382
	 */
3383
3384
267355
	free(buf->buf);
3385
267355
	buf->buf = n1;
3386
267355
	*offs = 0;
3387
3388
672308
	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3389
	   ROFF_REPARSE : ROFF_APPEND;
3390
267373
}
3391
3392
/*
3393
 * Calling a high-level macro that was renamed with .rn.
3394
 * r->current_string has already been set up by roff_parse().
3395
 */
3396
static enum rofferr
3397
roff_renamed(ROFF_ARGS)
3398
{
3399
36
	char	*nbuf;
3400
3401
54
	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3402
36
	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3403
18
	free(buf->buf);
3404
18
	buf->buf = nbuf;
3405
18
	return ROFF_CONT;
3406
18
}
3407
3408
static size_t
3409
roff_getname(struct roff *r, char **cpp, int ln, int pos)
3410
{
3411
4252408
	char	 *name, *cp;
3412
	size_t	  namesz;
3413
3414
2126204
	name = *cpp;
3415
2126204
	if ('\0' == *name)
3416
36
		return 0;
3417
3418
	/* Read until end of name and terminate it with NUL. */
3419
6409733
	for (cp = name; 1; cp++) {
3420

12247165
		if ('\0' == *cp || ' ' == *cp) {
3421
2123557
			namesz = cp - name;
3422
2123557
			break;
3423
		}
3424
4286176
		if ('\\' != *cp)
3425
			continue;
3426
2746
		namesz = cp - name;
3427

3088
		if ('{' == cp[1] || '}' == cp[1])
3428
			break;
3429
324
		cp++;
3430
324
		if ('\\' == *cp)
3431
			continue;
3432
378
		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3433
189
		    "%.*s", (int)(cp - name + 1), name);
3434
189
		mandoc_escape((const char **)&cp, NULL, NULL);
3435
189
		break;
3436
	}
3437
3438
	/* Read past spaces. */
3439
5259874
	while (' ' == *cp)
3440
1566853
		cp++;
3441
3442
2126168
	*cpp = cp;
3443
2126168
	return namesz;
3444
2126204
}
3445
3446
/*
3447
 * Store *string into the user-defined string called *name.
3448
 * To clear an existing entry, call with (*r, *name, NULL, 0).
3449
 * append == 0: replace mode
3450
 * append == 1: single-line append mode
3451
 * append == 2: multiline append mode, append '\n' after each call
3452
 */
3453
static void
3454
roff_setstr(struct roff *r, const char *name, const char *string,
3455
	int append)
3456
{
3457
	size_t	 namesz;
3458
3459
4628024
	namesz = strlen(name);
3460
4628024
	roff_setstrn(&r->strtab, name, namesz, string,
3461
4645632
	    string ? strlen(string) : 0, append);
3462
2314012
	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3463
2314012
}
3464
3465
static void
3466
roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3467
		const char *string, size_t stringsz, int append)
3468
{
3469
	struct roffkv	*n;
3470
	char		*c;
3471
	int		 i;
3472
	size_t		 oldch, newch;
3473
3474
	/* Search for an existing string with the same name. */
3475
9381342
	n = *r;
3476
3477

1268345052
	while (n && (namesz != n->key.sz ||
3478
236508970
			strncmp(n->key.p, name, namesz)))
3479
254431974
		n = n->next;
3480
3481
4690671
	if (NULL == n) {
3482
		/* Create a new string table entry. */
3483
4654498
		n = mandoc_malloc(sizeof(struct roffkv));
3484
4654498
		n->key.p = mandoc_strndup(name, namesz);
3485
4654498
		n->key.sz = namesz;
3486
4654498
		n->val.p = NULL;
3487
4654498
		n->val.sz = 0;
3488
4654498
		n->next = *r;
3489
4654498
		*r = n;
3490
4690671
	} else if (0 == append) {
3491
18547
		free(n->val.p);
3492
18547
		n->val.p = NULL;
3493
18547
		n->val.sz = 0;
3494
18547
	}
3495
3496
4690671
	if (NULL == string)
3497
4640731
		return;
3498
3499
	/*
3500
	 * One additional byte for the '\n' in multiline mode,
3501
	 * and one for the terminating '\0'.
3502
	 */
3503
49940
	newch = stringsz + (1 < append ? 2u : 1u);
3504
3505
49940
	if (NULL == n->val.p) {
3506
32314
		n->val.p = mandoc_malloc(newch);
3507
32314
		*n->val.p = '\0';
3508
		oldch = 0;
3509
32314
	} else {
3510
17626
		oldch = n->val.sz;
3511
17626
		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3512
	}
3513
3514
	/* Skip existing content in the destination buffer. */
3515
49940
	c = n->val.p + (int)oldch;
3516
3517
	/* Append new content to the destination buffer. */
3518
	i = 0;
3519
655432
	while (i < (int)stringsz) {
3520
		/*
3521
		 * Rudimentary roff copy mode:
3522
		 * Handle escaped backslashes.
3523
		 */
3524

303136
		if ('\\' == string[i] && '\\' == string[i + 1])
3525
2923
			i++;
3526
277776
		*c++ = string[i++];
3527
	}
3528
3529
	/* Append terminating bytes. */
3530
49940
	if (1 < append)
3531
17608
		*c++ = '\n';
3532
3533
49940
	*c = '\0';
3534
49940
	n->val.sz = (int)(c - n->val.p);
3535
4740611
}
3536
3537
static const char *
3538
roff_getstrn(const struct roff *r, const char *name, size_t len,
3539
    int *deftype)
3540
{
3541
	const struct roffkv	*n;
3542
	int			 i;
3543
	enum roff_tok		 tok;
3544
3545
4946312
	if (*deftype & ROFFDEF_USER) {
3546
347544766
		for (n = r->strtab; n != NULL; n = n->next) {
3547

173877572
			if (strncmp(name, n->key.p, len) == 0 &&
3548
1952653
			    n->key.p[len] == '\0' &&
3549
1928983
			    n->val.p != NULL) {
3550
649362
				*deftype = ROFFDEF_USER;
3551
649362
				return n->val.p;
3552
			}
3553
		}
3554
	}
3555
1823794
	if (*deftype & ROFFDEF_PRE) {
3556
43158
		for (i = 0; i < PREDEFS_MAX; i++) {
3557

22536
			if (strncmp(name, predefs[i].name, len) == 0 &&
3558
1281
			    predefs[i].name[len] == '\0') {
3559
1242
				*deftype = ROFFDEF_PRE;
3560
1242
				return predefs[i].str;
3561
			}
3562
		}
3563
	}
3564
1822552
	if (*deftype & ROFFDEF_REN) {
3565
296784936
		for (n = r->rentab; n != NULL; n = n->next) {
3566

147849753
			if (strncmp(name, n->key.p, len) == 0 &&
3567
1302994
			    n->key.p[len] == '\0' &&
3568
1279549
			    n->val.p != NULL) {
3569
45
				*deftype = ROFFDEF_REN;
3570
45
				return n->val.p;
3571
			}
3572
		}
3573
	}
3574
1822507
	if (*deftype & ROFFDEF_STD) {
3575
90
		if (r->man->macroset != MACROSET_MAN) {
3576
4230
			for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3577

2124
				if (strncmp(name, roff_name[tok], len) == 0 &&
3578
18
				    roff_name[tok][len] == '\0') {
3579
18
					*deftype = ROFFDEF_STD;
3580
18
					return NULL;
3581
				}
3582
			}
3583
		}
3584
72
		if (r->man->macroset != MACROSET_MDOC) {
3585
3186
			for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3586

1602
				if (strncmp(name, roff_name[tok], len) == 0 &&
3587
36
				    roff_name[tok][len] == '\0') {
3588
36
					*deftype = ROFFDEF_STD;
3589
36
					return NULL;
3590
				}
3591
			}
3592
		}
3593
	}
3594
1822453
	*deftype = 0;
3595
1822453
	return NULL;
3596
2473156
}
3597
3598
static void
3599
roff_freestr(struct roffkv *r)
3600
{
3601
	struct roffkv	 *n, *nn;
3602
3603
9529667
	for (n = r; n; n = nn) {
3604
4654498
		free(n->key.p);
3605
4654498
		free(n->val.p);
3606
4654498
		nn = n->next;
3607
4654498
		free(n);
3608
	}
3609
73557
}
3610
3611
/* --- accessors and utility functions ------------------------------------ */
3612
3613
/*
3614
 * Duplicate an input string, making the appropriate character
3615
 * conversations (as stipulated by `tr') along the way.
3616
 * Returns a heap-allocated string with all the replacements made.
3617
 */
3618
char *
3619
roff_strdup(const struct roff *r, const char *p)
3620
{
3621
	const struct roffkv *cp;
3622
	char		*res;
3623
	const char	*pp;
3624
	size_t		 ssz, sz;
3625
	enum mandoc_esc	 esc;
3626
3627

5479504
	if (NULL == r->xmbtab && NULL == r->xtab)
3628
2221497
		return mandoc_strdup(p);
3629
1036402
	else if ('\0' == *p)
3630
3
		return mandoc_strdup("");
3631
3632
	/*
3633
	 * Step through each character looking for term matches
3634
	 * (remember that a `tr' can be invoked with an escape, which is
3635
	 * a glyph but the escape is multi-character).
3636
	 * We only do this if the character hash has been initialised
3637
	 * and the string is >0 length.
3638
	 */
3639
3640
	res = NULL;
3641
	ssz = 0;
3642
3643
40888545
	while ('\0' != *p) {
3644
37594981
		assert((unsigned int)*p < 128);
3645

75321892
		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3646
177
			sz = r->xtab[(int)*p].sz;
3647
177
			res = mandoc_realloc(res, ssz + sz + 1);
3648
177
			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3649
			ssz += sz;
3650
177
			p++;
3651
177
			continue;
3652
37594804
		} else if ('\\' != *p) {
3653
36372450
			res = mandoc_realloc(res, ssz + 2);
3654
36372450
			res[ssz++] = *p++;
3655
36372450
			continue;
3656
		}
3657
3658
		/* Search for term matches. */
3659
4886240
		for (cp = r->xmbtab; cp; cp = cp->next)
3660
1222354
			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3661
				break;
3662
3663
1222354
		if (NULL != cp) {
3664
			/*
3665
			 * A match has been found.
3666
			 * Append the match to the array and move
3667
			 * forward by its keysize.
3668
			 */
3669
1588
			res = mandoc_realloc(res,
3670
1588
			    ssz + cp->val.sz + 1);
3671
1588
			memcpy(res + ssz, cp->val.p, cp->val.sz);
3672
1588
			ssz += cp->val.sz;
3673
1588
			p += (int)cp->key.sz;
3674
1588
			continue;
3675
		}
3676
3677
		/*
3678
		 * Handle escapes carefully: we need to copy
3679
		 * over just the escape itself, or else we might
3680
		 * do replacements within the escape itself.
3681
		 * Make sure to pass along the bogus string.
3682
		 */
3683
1220766
		pp = p++;
3684
1220766
		esc = mandoc_escape(&p, NULL, NULL);
3685
1220766
		if (ESCAPE_ERROR == esc) {
3686
			sz = strlen(pp);
3687
			res = mandoc_realloc(res, ssz + sz + 1);
3688
			memcpy(res + ssz, pp, sz);
3689
			break;
3690
		}
3691
		/*
3692
		 * We bail out on bad escapes.
3693
		 * No need to warn: we already did so when
3694
		 * roff_res() was called.
3695
		 */
3696
1220766
		sz = (int)(p - pp);
3697
1220766
		res = mandoc_realloc(res, ssz + sz + 1);
3698
1220766
		memcpy(res + ssz, pp, sz);
3699
		ssz += sz;
3700
	}
3701
3702
1036399
	res[(int)ssz] = '\0';
3703
1036399
	return res;
3704
3257899
}
3705
3706
int
3707
roff_getformat(const struct roff *r)
3708
{
3709
3710
44372
	return r->format;
3711
}
3712
3713
/*
3714
 * Find out whether a line is a macro line or not.
3715
 * If it is, adjust the current position and return one; if it isn't,
3716
 * return zero and don't change the current position.
3717
 * If the control character has been set with `.cc', then let that grain
3718
 * precedence.
3719
 * This is slighly contrary to groff, where using the non-breaking
3720
 * control character when `cc' has been invoked will cause the
3721
 * non-breaking macro contents to be printed verbatim.
3722
 */
3723
int
3724
roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3725
{
3726
	int		pos;
3727
3728
13550022
	pos = *ppos;
3729
3730

6775227
	if (r->control != '\0' && cp[pos] == r->control)
3731
72
		pos++;
3732
6774939
	else if (r->control != '\0')
3733
144
		return 0;
3734

7328273
	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3735
36
		pos += 2;
3736

9839177
	else if ('.' == cp[pos] || '\'' == cp[pos])
3737
3715289
		pos++;
3738
	else
3739
3059470
		return 0;
3740
3741

11830785
	while (' ' == cp[pos] || '\t' == cp[pos])
3742
228198
		pos++;
3743
3744
3715397
	*ppos = pos;
3745
3715397
	return 1;
3746
6775011
}