GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/roff.c Lines: 1389 1564 88.8 %
Date: 2017-11-13 Branches: 841 1003 83.8 %

Line Branch Exec Source
1
/*	$OpenBSD: roff.c,v 1.196 2017/07/14 17:16:13 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
#include <sys/types.h>
19
20
#include <assert.h>
21
#include <ctype.h>
22
#include <limits.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
29
#include "mandoc.h"
30
#include "mandoc_aux.h"
31
#include "mandoc_ohash.h"
32
#include "roff.h"
33
#include "libmandoc.h"
34
#include "roff_int.h"
35
#include "libroff.h"
36
37
/* Maximum number of string expansions per line, to break infinite loops. */
38
#define	EXPAND_LIMIT	1000
39
40
/* Types of definitions of macros and strings. */
41
#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
42
#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
43
#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
44
#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
45
#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
46
			 ROFFDEF_REN | ROFFDEF_STD)
47
48
/* --- data types --------------------------------------------------------- */
49
50
/*
51
 * An incredibly-simple string buffer.
52
 */
53
struct	roffstr {
54
	char		*p; /* nil-terminated buffer */
55
	size_t		 sz; /* saved strlen(p) */
56
};
57
58
/*
59
 * A key-value roffstr pair as part of a singly-linked list.
60
 */
61
struct	roffkv {
62
	struct roffstr	 key;
63
	struct roffstr	 val;
64
	struct roffkv	*next; /* next in list */
65
};
66
67
/*
68
 * A single number register as part of a singly-linked list.
69
 */
70
struct	roffreg {
71
	struct roffstr	 key;
72
	int		 val;
73
	struct roffreg	*next;
74
};
75
76
/*
77
 * Association of request and macro names with token IDs.
78
 */
79
struct	roffreq {
80
	enum roff_tok	 tok;
81
	char		 name[];
82
};
83
84
struct	roff {
85
	struct mparse	*parse; /* parse point */
86
	struct roff_man	*man; /* mdoc or man parser */
87
	struct roffnode	*last; /* leaf of stack */
88
	int		*rstack; /* stack of inverted `ie' values */
89
	struct ohash	*reqtab; /* request lookup table */
90
	struct roffreg	*regtab; /* number registers */
91
	struct roffkv	*strtab; /* user-defined strings & macros */
92
	struct roffkv	*rentab; /* renamed strings & macros */
93
	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
94
	struct roffstr	*xtab; /* single-byte trans table (`tr') */
95
	const char	*current_string; /* value of last called user macro */
96
	struct tbl_node	*first_tbl; /* first table parsed */
97
	struct tbl_node	*last_tbl; /* last table parsed */
98
	struct tbl_node	*tbl; /* current table being parsed */
99
	struct eqn_node	*last_eqn; /* equation parser */
100
	struct eqn_node	*eqn; /* active equation parser */
101
	int		 eqn_inline; /* current equation is inline */
102
	int		 options; /* parse options */
103
	int		 rstacksz; /* current size limit of rstack */
104
	int		 rstackpos; /* position in rstack */
105
	int		 format; /* current file in mdoc or man format */
106
	int		 argc; /* number of args of the last macro */
107
	char		 control; /* control character */
108
	char		 escape; /* escape character */
109
};
110
111
struct	roffnode {
112
	enum roff_tok	 tok; /* type of node */
113
	struct roffnode	*parent; /* up one in stack */
114
	int		 line; /* parse line */
115
	int		 col; /* parse col */
116
	char		*name; /* node name, e.g. macro name */
117
	char		*end; /* end-rules: custom token */
118
	int		 endspan; /* end-rules: next-line or infty */
119
	int		 rule; /* current evaluation rule */
120
};
121
122
#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
123
			 enum roff_tok tok, /* tok of macro */ \
124
			 struct buf *buf, /* input buffer */ \
125
			 int ln, /* parse line */ \
126
			 int ppos, /* original pos in buffer */ \
127
			 int pos, /* current pos in buffer */ \
128
			 int *offs /* reset offset of buffer data */
129
130
typedef	enum rofferr (*roffproc)(ROFF_ARGS);
131
132
struct	roffmac {
133
	roffproc	 proc; /* process new macro */
134
	roffproc	 text; /* process as child text of macro */
135
	roffproc	 sub; /* process as child of macro */
136
	int		 flags;
137
#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
138
};
139
140
struct	predef {
141
	const char	*name; /* predefined input name */
142
	const char	*str; /* replacement symbol */
143
};
144
145
#define	PREDEF(__name, __str) \
146
	{ (__name), (__str) },
147
148
/* --- function prototypes ------------------------------------------------ */
149
150
static	void		 roffnode_cleanscope(struct roff *);
151
static	void		 roffnode_pop(struct roff *);
152
static	void		 roffnode_push(struct roff *, enum roff_tok,
153
				const char *, int, int);
154
static	void		 roff_addtbl(struct roff_man *, struct tbl_node *);
155
static	enum rofferr	 roff_als(ROFF_ARGS);
156
static	enum rofferr	 roff_block(ROFF_ARGS);
157
static	enum rofferr	 roff_block_text(ROFF_ARGS);
158
static	enum rofferr	 roff_block_sub(ROFF_ARGS);
159
static	enum rofferr	 roff_br(ROFF_ARGS);
160
static	enum rofferr	 roff_cblock(ROFF_ARGS);
161
static	enum rofferr	 roff_cc(ROFF_ARGS);
162
static	void		 roff_ccond(struct roff *, int, int);
163
static	enum rofferr	 roff_cond(ROFF_ARGS);
164
static	enum rofferr	 roff_cond_text(ROFF_ARGS);
165
static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
166
static	enum rofferr	 roff_ds(ROFF_ARGS);
167
static	enum rofferr	 roff_ec(ROFF_ARGS);
168
static	enum rofferr	 roff_eo(ROFF_ARGS);
169
static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
170
static	int		 roff_evalcond(struct roff *r, int, char *, int *);
171
static	int		 roff_evalnum(struct roff *, int,
172
				const char *, int *, int *, int);
173
static	int		 roff_evalpar(struct roff *, int,
174
				const char *, int *, int *, int);
175
static	int		 roff_evalstrcond(const char *, int *);
176
static	void		 roff_free1(struct roff *);
177
static	void		 roff_freereg(struct roffreg *);
178
static	void		 roff_freestr(struct roffkv *);
179
static	size_t		 roff_getname(struct roff *, char **, int, int);
180
static	int		 roff_getnum(const char *, int *, int *, int);
181
static	int		 roff_getop(const char *, int *, char *);
182
static	int		 roff_getregn(const struct roff *,
183
				const char *, size_t);
184
static	int		 roff_getregro(const struct roff *,
185
				const char *name);
186
static	const char	*roff_getstrn(const struct roff *,
187
				const char *, size_t, int *);
188
static	int		 roff_hasregn(const struct roff *,
189
				const char *, size_t);
190
static	enum rofferr	 roff_insec(ROFF_ARGS);
191
static	enum rofferr	 roff_it(ROFF_ARGS);
192
static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
193
static	void		 roff_man_alloc1(struct roff_man *);
194
static	void		 roff_man_free1(struct roff_man *);
195
static	enum rofferr	 roff_manyarg(ROFF_ARGS);
196
static	enum rofferr	 roff_nr(ROFF_ARGS);
197
static	enum rofferr	 roff_onearg(ROFF_ARGS);
198
static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
199
				int, int);
200
static	enum rofferr	 roff_parsetext(struct roff *, struct buf *,
201
				int, int *);
202
static	enum rofferr	 roff_renamed(ROFF_ARGS);
203
static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
204
static	enum rofferr	 roff_rm(ROFF_ARGS);
205
static	enum rofferr	 roff_rn(ROFF_ARGS);
206
static	enum rofferr	 roff_rr(ROFF_ARGS);
207
static	void		 roff_setstr(struct roff *,
208
				const char *, const char *, int);
209
static	void		 roff_setstrn(struct roffkv **, const char *,
210
				size_t, const char *, size_t, int);
211
static	enum rofferr	 roff_so(ROFF_ARGS);
212
static	enum rofferr	 roff_tr(ROFF_ARGS);
213
static	enum rofferr	 roff_Dd(ROFF_ARGS);
214
static	enum rofferr	 roff_TE(ROFF_ARGS);
215
static	enum rofferr	 roff_TS(ROFF_ARGS);
216
static	enum rofferr	 roff_EQ(ROFF_ARGS);
217
static	enum rofferr	 roff_EN(ROFF_ARGS);
218
static	enum rofferr	 roff_T_(ROFF_ARGS);
219
static	enum rofferr	 roff_unsupp(ROFF_ARGS);
220
static	enum rofferr	 roff_userdef(ROFF_ARGS);
221
222
/* --- constant data ------------------------------------------------------ */
223
224
#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
225
#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
226
227
const char *__roff_name[MAN_MAX + 1] = {
228
	"br",		"ce",		"ft",		"ll",
229
	"mc",		"po",		"rj",		"sp",
230
	"ta",		"ti",		NULL,
231
	"ab",		"ad",		"af",		"aln",
232
	"als",		"am",		"am1",		"ami",
233
	"ami1",		"as",		"as1",		"asciify",
234
	"backtrace",	"bd",		"bleedat",	"blm",
235
        "box",		"boxa",		"bp",		"BP",
236
	"break",	"breakchar",	"brnl",		"brp",
237
	"brpnl",	"c2",		"cc",
238
	"cf",		"cflags",	"ch",		"char",
239
	"chop",		"class",	"close",	"CL",
240
	"color",	"composite",	"continue",	"cp",
241
	"cropat",	"cs",		"cu",		"da",
242
	"dch",		"Dd",		"de",		"de1",
243
	"defcolor",	"dei",		"dei1",		"device",
244
	"devicem",	"di",		"do",		"ds",
245
	"ds1",		"dwh",		"dt",		"ec",
246
	"ecr",		"ecs",		"el",		"em",
247
	"EN",		"eo",		"EP",		"EQ",
248
	"errprint",	"ev",		"evc",		"ex",
249
	"fallback",	"fam",		"fc",		"fchar",
250
	"fcolor",	"fdeferlig",	"feature",	"fkern",
251
	"fl",		"flig",		"fp",		"fps",
252
	"fschar",	"fspacewidth",	"fspecial",	"ftr",
253
	"fzoom",	"gcolor",	"hc",		"hcode",
254
	"hidechar",	"hla",		"hlm",		"hpf",
255
	"hpfa",		"hpfcode",	"hw",		"hy",
256
	"hylang",	"hylen",	"hym",		"hypp",
257
	"hys",		"ie",		"if",		"ig",
258
	"index",	"it",		"itc",		"IX",
259
	"kern",		"kernafter",	"kernbefore",	"kernpair",
260
	"lc",		"lc_ctype",	"lds",		"length",
261
	"letadj",	"lf",		"lg",		"lhang",
262
	"linetabs",	"lnr",		"lnrf",		"lpfx",
263
	"ls",		"lsm",		"lt",
264
	"mediasize",	"minss",	"mk",		"mso",
265
	"na",		"ne",		"nh",		"nhychar",
266
	"nm",		"nn",		"nop",		"nr",
267
	"nrf",		"nroff",	"ns",		"nx",
268
	"open",		"opena",	"os",		"output",
269
	"padj",		"papersize",	"pc",		"pev",
270
	"pi",		"PI",		"pl",		"pm",
271
	"pn",		"pnr",		"ps",
272
	"psbb",		"pshape",	"pso",		"ptr",
273
	"pvs",		"rchar",	"rd",		"recursionlimit",
274
	"return",	"rfschar",	"rhang",
275
	"rm",		"rn",		"rnn",		"rr",
276
	"rs",		"rt",		"schar",	"sentchar",
277
	"shc",		"shift",	"sizes",	"so",
278
	"spacewidth",	"special",	"spreadwarn",	"ss",
279
	"sty",		"substring",	"sv",		"sy",
280
	"T&",		"tc",		"TE",
281
	"TH",		"tkf",		"tl",
282
	"tm",		"tm1",		"tmc",		"tr",
283
	"track",	"transchar",	"trf",		"trimat",
284
	"trin",		"trnt",		"troff",	"TS",
285
	"uf",		"ul",		"unformat",	"unwatch",
286
	"unwatchn",	"vpt",		"vs",		"warn",
287
	"warnscale",	"watch",	"watchlength",	"watchn",
288
	"wh",		"while",	"write",	"writec",
289
	"writem",	"xflag",	".",		NULL,
290
	NULL,		"text",
291
	"Dd",		"Dt",		"Os",		"Sh",
292
	"Ss",		"Pp",		"D1",		"Dl",
293
	"Bd",		"Ed",		"Bl",		"El",
294
	"It",		"Ad",		"An",		"Ap",
295
	"Ar",		"Cd",		"Cm",		"Dv",
296
	"Er",		"Ev",		"Ex",		"Fa",
297
	"Fd",		"Fl",		"Fn",		"Ft",
298
	"Ic",		"In",		"Li",		"Nd",
299
	"Nm",		"Op",		"Ot",		"Pa",
300
	"Rv",		"St",		"Va",		"Vt",
301
	"Xr",		"%A",		"%B",		"%D",
302
	"%I",		"%J",		"%N",		"%O",
303
	"%P",		"%R",		"%T",		"%V",
304
	"Ac",		"Ao",		"Aq",		"At",
305
	"Bc",		"Bf",		"Bo",		"Bq",
306
	"Bsx",		"Bx",		"Db",		"Dc",
307
	"Do",		"Dq",		"Ec",		"Ef",
308
	"Em",		"Eo",		"Fx",		"Ms",
309
	"No",		"Ns",		"Nx",		"Ox",
310
	"Pc",		"Pf",		"Po",		"Pq",
311
	"Qc",		"Ql",		"Qo",		"Qq",
312
	"Re",		"Rs",		"Sc",		"So",
313
	"Sq",		"Sm",		"Sx",		"Sy",
314
	"Tn",		"Ux",		"Xc",		"Xo",
315
	"Fo",		"Fc",		"Oo",		"Oc",
316
	"Bk",		"Ek",		"Bt",		"Hf",
317
	"Fr",		"Ud",		"Lb",		"Lp",
318
	"Lk",		"Mt",		"Brq",		"Bro",
319
	"Brc",		"%C",		"Es",		"En",
320
	"Dx",		"%Q",		"%U",		"Ta",
321
	NULL,
322
	"TH",		"SH",		"SS",		"TP",
323
	"LP",		"PP",		"P",		"IP",
324
	"HP",		"SM",		"SB",		"BI",
325
	"IB",		"BR",		"RB",		"R",
326
	"B",		"I",		"IR",		"RI",
327
	"nf",		"fi",
328
	"RE",		"RS",		"DT",		"UC",
329
	"PD",		"AT",		"in",
330
	"OP",		"EX",		"EE",		"UR",
331
	"UE",		"MT",		"ME",		NULL
332
};
333
const	char *const *roff_name = __roff_name;
334
335
static	struct roffmac	 roffs[TOKEN_NONE] = {
336
	{ roff_br, NULL, NULL, 0 },  /* br */
337
	{ roff_onearg, NULL, NULL, 0 },  /* ce */
338
	{ roff_onearg, NULL, NULL, 0 },  /* ft */
339
	{ roff_onearg, NULL, NULL, 0 },  /* ll */
340
	{ roff_onearg, NULL, NULL, 0 },  /* mc */
341
	{ roff_onearg, NULL, NULL, 0 },  /* po */
342
	{ roff_onearg, NULL, NULL, 0 },  /* rj */
343
	{ roff_onearg, NULL, NULL, 0 },  /* sp */
344
	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
345
	{ roff_onearg, NULL, NULL, 0 },  /* ti */
346
	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
347
	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
348
	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
349
	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
350
	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
351
	{ roff_als, NULL, NULL, 0 },  /* als */
352
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
353
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
354
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
355
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
356
	{ roff_ds, NULL, NULL, 0 },  /* as */
357
	{ roff_ds, NULL, NULL, 0 },  /* as1 */
358
	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
359
	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
360
	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
361
	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
362
	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
363
	{ roff_unsupp, NULL, NULL, 0 },  /* box */
364
	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
365
	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
366
	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
367
	{ roff_unsupp, NULL, NULL, 0 },  /* break */
368
	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
369
	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
370
	{ roff_br, NULL, NULL, 0 },  /* brp */
371
	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
372
	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
373
	{ roff_cc, NULL, NULL, 0 },  /* cc */
374
	{ roff_insec, NULL, NULL, 0 },  /* cf */
375
	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
376
	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
377
	{ roff_unsupp, NULL, NULL, 0 },  /* char */
378
	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
379
	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
380
	{ roff_insec, NULL, NULL, 0 },  /* close */
381
	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
382
	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
383
	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
384
	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
385
	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
386
	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
387
	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
388
	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
389
	{ roff_unsupp, NULL, NULL, 0 },  /* da */
390
	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
391
	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
392
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
393
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
394
	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
395
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
396
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
397
	{ roff_unsupp, NULL, NULL, 0 },  /* device */
398
	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
399
	{ roff_unsupp, NULL, NULL, 0 },  /* di */
400
	{ roff_unsupp, NULL, NULL, 0 },  /* do */
401
	{ roff_ds, NULL, NULL, 0 },  /* ds */
402
	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
403
	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
404
	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
405
	{ roff_ec, NULL, NULL, 0 },  /* ec */
406
	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
407
	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
408
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
409
	{ roff_unsupp, NULL, NULL, 0 },  /* em */
410
	{ roff_EN, NULL, NULL, 0 },  /* EN */
411
	{ roff_eo, NULL, NULL, 0 },  /* eo */
412
	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
413
	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
414
	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
415
	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
416
	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
417
	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
418
	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
419
	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
420
	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
421
	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
422
	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
423
	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
424
	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
425
	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
426
	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
427
	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
428
	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
429
	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
430
	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
431
	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
432
	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
433
	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
434
	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
435
	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
436
	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
437
	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
438
	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
439
	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
440
	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
441
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
442
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
443
	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
444
	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
445
	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
446
	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
447
	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
448
	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
449
	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
450
	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
451
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
452
	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
453
	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
454
	{ roff_unsupp, NULL, NULL, 0 },  /* index */
455
	{ roff_it, NULL, NULL, 0 },  /* it */
456
	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
457
	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
458
	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
459
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
460
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
461
	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
462
	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
463
	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
464
	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
465
	{ roff_unsupp, NULL, NULL, 0 },  /* length */
466
	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
467
	{ roff_insec, NULL, NULL, 0 },  /* lf */
468
	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
469
	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
470
	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
471
	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
472
	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
473
	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
474
	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
475
	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
476
	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
477
	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
478
	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
479
	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
480
	{ roff_insec, NULL, NULL, 0 },  /* mso */
481
	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
482
	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
483
	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
484
	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
485
	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
486
	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
487
	{ roff_unsupp, NULL, NULL, 0 },  /* nop */
488
	{ roff_nr, NULL, NULL, 0 },  /* nr */
489
	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
490
	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
491
	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
492
	{ roff_insec, NULL, NULL, 0 },  /* nx */
493
	{ roff_insec, NULL, NULL, 0 },  /* open */
494
	{ roff_insec, NULL, NULL, 0 },  /* opena */
495
	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
496
	{ roff_unsupp, NULL, NULL, 0 },  /* output */
497
	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
498
	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
499
	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
500
	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
501
	{ roff_insec, NULL, NULL, 0 },  /* pi */
502
	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
503
	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
504
	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
505
	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
506
	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
507
	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
508
	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
509
	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
510
	{ roff_insec, NULL, NULL, 0 },  /* pso */
511
	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
512
	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
513
	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
514
	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
515
	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
516
	{ roff_unsupp, NULL, NULL, 0 },  /* return */
517
	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
518
	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
519
	{ roff_rm, NULL, NULL, 0 },  /* rm */
520
	{ roff_rn, NULL, NULL, 0 },  /* rn */
521
	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
522
	{ roff_rr, NULL, NULL, 0 },  /* rr */
523
	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
524
	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
525
	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
526
	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
527
	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
528
	{ roff_unsupp, NULL, NULL, 0 },  /* shift */
529
	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
530
	{ roff_so, NULL, NULL, 0 },  /* so */
531
	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
532
	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
533
	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
534
	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
535
	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
536
	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
537
	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
538
	{ roff_insec, NULL, NULL, 0 },  /* sy */
539
	{ roff_T_, NULL, NULL, 0 },  /* T& */
540
	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
541
	{ roff_TE, NULL, NULL, 0 },  /* TE */
542
	{ roff_Dd, NULL, NULL, 0 },  /* TH */
543
	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
544
	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
545
	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
546
	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
547
	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
548
	{ roff_tr, NULL, NULL, 0 },  /* tr */
549
	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
550
	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
551
	{ roff_insec, NULL, NULL, 0 },  /* trf */
552
	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
553
	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
554
	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
555
	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
556
	{ roff_TS, NULL, NULL, 0 },  /* TS */
557
	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
558
	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
559
	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
560
	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
561
	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
562
	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
563
	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
564
	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
565
	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
566
	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
567
	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
568
	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
569
	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
570
	{ roff_unsupp, NULL, NULL, 0 },  /* while */
571
	{ roff_insec, NULL, NULL, 0 },  /* write */
572
	{ roff_insec, NULL, NULL, 0 },  /* writec */
573
	{ roff_insec, NULL, NULL, 0 },  /* writem */
574
	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
575
	{ roff_cblock, NULL, NULL, 0 },  /* . */
576
	{ roff_renamed, NULL, NULL, 0 },
577
	{ roff_userdef, NULL, NULL, 0 }
578
};
579
580
/* Array of injected predefined strings. */
581
#define	PREDEFS_MAX	 38
582
static	const struct predef predefs[PREDEFS_MAX] = {
583
#include "predefs.in"
584
};
585
586
static	int	 roffce_lines;	/* number of input lines to center */
587
static	struct roff_node *roffce_node;  /* active request */
588
static	int	 roffit_lines;  /* number of lines to delay */
589
static	char	*roffit_macro;  /* nil-terminated macro line */
590
591
592
/* --- request table ------------------------------------------------------ */
593
594
struct ohash *
595
roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
596
{
597
	struct ohash	*htab;
598
	struct roffreq	*req;
599
	enum roff_tok	 tok;
600
	size_t		 sz;
601
	unsigned int	 slot;
602
603
16930
	htab = mandoc_malloc(sizeof(*htab));
604
8465
	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
605
606
2841244
	for (tok = mintok; tok < maxtok; tok++) {
607
1412157
		if (roff_name[tok] == NULL)
608
			continue;
609
1403691
		sz = strlen(roff_name[tok]);
610
1403691
		req = mandoc_malloc(sizeof(*req) + sz + 1);
611
1403691
		req->tok = tok;
612
1403691
		memcpy(req->name, roff_name[tok], sz + 1);
613
1403691
		slot = ohash_qlookup(htab, req->name);
614
1403691
		ohash_insert(htab, slot, req);
615
1403691
	}
616
8465
	return htab;
617
}
618
619
void
620
roffhash_free(struct ohash *htab)
621
{
622
	struct roffreq	*req;
623
25398
	unsigned int	 slot;
624
625
12699
	if (htab == NULL)
626
4234
		return;
627
2824312
	for (req = ohash_first(htab, &slot); req != NULL;
628
1403691
	     req = ohash_next(htab, &slot))
629
1403691
		free(req);
630
8465
	ohash_delete(htab);
631
8465
	free(htab);
632
21164
}
633
634
enum roff_tok
635
roffhash_find(struct ohash *htab, const char *name, size_t sz)
636
{
637
	struct roffreq	*req;
638
2714386
	const char	*end;
639
640
1357193
	if (sz) {
641
1006059
		end = name + sz;
642
1006059
		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
643
1006059
	} else
644
351134
		req = ohash_find(htab, ohash_qlookup(htab, name));
645
4724244
	return req == NULL ? TOKEN_NONE : req->tok;
646
1357193
}
647
648
/* --- stack of request blocks -------------------------------------------- */
649
650
/*
651
 * Pop the current node off of the stack of roff instructions currently
652
 * pending.
653
 */
654
static void
655
roffnode_pop(struct roff *r)
656
{
657
	struct roffnode	*p;
658
659
105188
	assert(r->last);
660
	p = r->last;
661
662
52594
	r->last = r->last->parent;
663
52594
	free(p->name);
664
52594
	free(p->end);
665
52594
	free(p);
666
52594
}
667
668
/*
669
 * Push a roff node onto the instruction stack.  This must later be
670
 * removed with roffnode_pop().
671
 */
672
static void
673
roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
674
		int line, int col)
675
{
676
	struct roffnode	*p;
677
678
105188
	p = mandoc_calloc(1, sizeof(struct roffnode));
679
52594
	p->tok = tok;
680
52594
	if (name)
681
3087
		p->name = mandoc_strdup(name);
682
52594
	p->parent = r->last;
683
52594
	p->line = line;
684
52594
	p->col = col;
685
107786
	p->rule = p->parent ? p->parent->rule : 0;
686
687
52594
	r->last = p;
688
52594
}
689
690
/* --- roff parser state data management ---------------------------------- */
691
692
static void
693
roff_free1(struct roff *r)
694
{
695
	struct tbl_node	*tbl;
696
	int		 i;
697
698
28817
	while (NULL != (tbl = r->first_tbl)) {
699
2314
		r->first_tbl = tbl->next;
700
2314
		tbl_free(tbl);
701
	}
702
8063
	r->first_tbl = r->last_tbl = r->tbl = NULL;
703
704
8063
	if (r->last_eqn != NULL)
705
130
		eqn_free(r->last_eqn);
706
8063
	r->last_eqn = r->eqn = NULL;
707
708
16150
	while (r->last)
709
12
		roffnode_pop(r);
710
711
8063
	free (r->rstack);
712
8063
	r->rstack = NULL;
713
8063
	r->rstacksz = 0;
714
8063
	r->rstackpos = -1;
715
716
8063
	roff_freereg(r->regtab);
717
8063
	r->regtab = NULL;
718
719
8063
	roff_freestr(r->strtab);
720
8063
	roff_freestr(r->rentab);
721
8063
	roff_freestr(r->xmbtab);
722
8063
	r->strtab = r->rentab = r->xmbtab = NULL;
723
724
8063
	if (r->xtab)
725
2064
		for (i = 0; i < 128; i++)
726
1024
			free(r->xtab[i].p);
727
8063
	free(r->xtab);
728
8063
	r->xtab = NULL;
729
8063
}
730
731
void
732
roff_reset(struct roff *r)
733
{
734
7660
	roff_free1(r);
735
3830
	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
736
3830
	r->control = '\0';
737
3830
	r->escape = '\\';
738
3830
	roffce_lines = 0;
739
3830
	roffce_node = NULL;
740
3830
	roffit_lines = 0;
741
3830
	roffit_macro = NULL;
742
3830
}
743
744
void
745
roff_free(struct roff *r)
746
{
747
8466
	roff_free1(r);
748
4233
	roffhash_free(r->reqtab);
749
4233
	free(r);
750
4233
}
751
752
struct roff *
753
roff_alloc(struct mparse *parse, int options)
754
{
755
	struct roff	*r;
756
757
8466
	r = mandoc_calloc(1, sizeof(struct roff));
758
4233
	r->parse = parse;
759
4233
	r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
760
4233
	r->options = options;
761
4233
	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
762
4233
	r->rstackpos = -1;
763
4233
	r->escape = '\\';
764
4233
	return r;
765
}
766
767
/* --- syntax tree state data management ---------------------------------- */
768
769
static void
770
roff_man_free1(struct roff_man *man)
771
{
772
773
16126
	if (man->first != NULL)
774
8063
		roff_node_delete(man, man->first);
775
8063
	free(man->meta.msec);
776
8063
	free(man->meta.vol);
777
8063
	free(man->meta.os);
778
8063
	free(man->meta.arch);
779
8063
	free(man->meta.title);
780
8063
	free(man->meta.name);
781
8063
	free(man->meta.date);
782
8063
}
783
784
static void
785
roff_man_alloc1(struct roff_man *man)
786
{
787
788
16126
	memset(&man->meta, 0, sizeof(man->meta));
789
8063
	man->first = mandoc_calloc(1, sizeof(*man->first));
790
8063
	man->first->type = ROFFT_ROOT;
791
8063
	man->last = man->first;
792
8063
	man->last_es = NULL;
793
8063
	man->flags = 0;
794
8063
	man->macroset = MACROSET_NONE;
795
8063
	man->lastsec = man->lastnamed = SEC_NONE;
796
8063
	man->next = ROFF_NEXT_CHILD;
797
8063
}
798
799
void
800
roff_man_reset(struct roff_man *man)
801
{
802
803
7660
	roff_man_free1(man);
804
3830
	roff_man_alloc1(man);
805
3830
}
806
807
void
808
roff_man_free(struct roff_man *man)
809
{
810
811
8466
	roff_man_free1(man);
812
4233
	free(man);
813
4233
}
814
815
struct roff_man *
816
roff_man_alloc(struct roff *roff, struct mparse *parse,
817
	const char *os_s, int quick)
818
{
819
	struct roff_man *man;
820
821
8466
	man = mandoc_calloc(1, sizeof(*man));
822
4233
	man->parse = parse;
823
4233
	man->roff = roff;
824
4233
	man->os_s = os_s;
825
4233
	man->quick = quick;
826
4233
	roff_man_alloc1(man);
827
4233
	roff->man = man;
828
4233
	return man;
829
}
830
831
/* --- syntax tree handling ----------------------------------------------- */
832
833
struct roff_node *
834
roff_node_alloc(struct roff_man *man, int line, int pos,
835
	enum roff_type type, int tok)
836
{
837
	struct roff_node	*n;
838
839
3819112
	n = mandoc_calloc(1, sizeof(*n));
840
1909556
	n->line = line;
841
1909556
	n->pos = pos;
842
1909556
	n->tok = tok;
843
1909556
	n->type = type;
844
1909556
	n->sec = man->lastsec;
845
846
3819112
	if (man->flags & MDOC_SYNOPSIS)
847
1909556
		n->flags |= NODE_SYNPRETTY;
848
	else
849
1909556
		n->flags &= ~NODE_SYNPRETTY;
850
1909556
	if (man->flags & MDOC_NEWLINE)
851
882237
		n->flags |= NODE_LINE;
852
1909556
	man->flags &= ~MDOC_NEWLINE;
853
854
1909556
	return n;
855
}
856
857
void
858
roff_node_append(struct roff_man *man, struct roff_node *n)
859
{
860
861
3820470
	switch (man->next) {
862
	case ROFF_NEXT_SIBLING:
863
1175980
		if (man->last->next != NULL) {
864
1241
			n->next = man->last->next;
865
1241
			man->last->next->prev = n;
866
1241
		} else
867
1174739
			man->last->parent->last = n;
868
1175980
		man->last->next = n;
869
1175980
		n->prev = man->last;
870
1175980
		n->parent = man->last->parent;
871
1175980
		break;
872
	case ROFF_NEXT_CHILD:
873
734255
		if (man->last->child != NULL) {
874
3017
			n->next = man->last->child;
875
3017
			man->last->child->prev = n;
876
3017
		} else
877
731238
			man->last->last = n;
878
734255
		man->last->child = n;
879
734255
		n->parent = man->last;
880
734255
		break;
881
	default:
882
		abort();
883
	}
884
2299353
	man->last = n;
885
886

2299353
	switch (n->type) {
887
	case ROFFT_HEAD:
888
189801
		n->parent->head = n;
889
189801
		break;
890
	case ROFFT_BODY:
891
199154
		if (n->end != ENDBODY_NOT)
892
			return;
893
198605
		n->parent->body = n;
894
198605
		break;
895
	case ROFFT_TAIL:
896
163
		n->parent->tail = n;
897
163
		break;
898
	default:
899
		return;
900
	}
901
902
	/*
903
	 * Copy over the normalised-data pointer of our parent.  Not
904
	 * everybody has one, but copying a null pointer is fine.
905
	 */
906
907
388569
	n->norm = n->parent->norm;
908
388569
	assert(n->parent->type == ROFFT_BLOCK);
909
1910235
}
910
911
void
912
roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
913
{
914
	struct roff_node	*n;
915
916
2015922
	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
917
1007961
	n->string = roff_strdup(man->roff, word);
918
1007961
	roff_node_append(man, n);
919
1007961
	n->flags |= NODE_VALID | NODE_ENDED;
920
1007961
	man->next = ROFF_NEXT_SIBLING;
921
1007961
}
922
923
void
924
roff_word_append(struct roff_man *man, const char *word)
925
{
926
	struct roff_node	*n;
927
99174
	char			*addstr, *newstr;
928
929
49587
	n = man->last;
930
49587
	addstr = roff_strdup(man->roff, word);
931
49587
	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
932
49587
	free(addstr);
933
49587
	free(n->string);
934
49587
	n->string = newstr;
935
49587
	man->next = ROFF_NEXT_SIBLING;
936
49587
}
937
938
void
939
roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
940
{
941
	struct roff_node	*n;
942
943
157262
	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
944
78631
	roff_node_append(man, n);
945
78631
	man->next = ROFF_NEXT_CHILD;
946
78631
}
947
948
struct roff_node *
949
roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
950
{
951
	struct roff_node	*n;
952
953
170404
	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
954
85202
	roff_node_append(man, n);
955
85202
	man->next = ROFF_NEXT_CHILD;
956
85202
	return n;
957
}
958
959
struct roff_node *
960
roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
961
{
962
	struct roff_node	*n;
963
964
379602
	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
965
189801
	roff_node_append(man, n);
966
189801
	man->next = ROFF_NEXT_CHILD;
967
189801
	return n;
968
}
969
970
struct roff_node *
971
roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
972
{
973
	struct roff_node	*n;
974
975
397210
	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
976
198605
	roff_node_append(man, n);
977
198605
	man->next = ROFF_NEXT_CHILD;
978
198605
	return n;
979
}
980
981
static void
982
roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
983
{
984
	struct roff_node	*n;
985
	const struct tbl_span	*span;
986
987
38228
	if (man->macroset == MACROSET_MAN)
988
4638
		man_breakscope(man, ROFF_TS);
989
56112
	while ((span = tbl_span(tbl)) != NULL) {
990
8942
		n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
991
8942
		n->span = span;
992
8942
		roff_node_append(man, n);
993
8942
		n->flags |= NODE_VALID | NODE_ENDED;
994
8942
		man->next = ROFF_NEXT_SIBLING;
995
	}
996
19114
}
997
998
void
999
roff_node_unlink(struct roff_man *man, struct roff_node *n)
1000
{
1001
1002
	/* Adjust siblings. */
1003
1004
3836596
	if (n->prev)
1005
1230
		n->prev->next = n->next;
1006
1918298
	if (n->next)
1007
1178121
		n->next->prev = n->prev;
1008
1009
	/* Adjust parent. */
1010
1011
1918298
	if (n->parent != NULL) {
1012
1910235
		if (n->parent->child == n)
1013
1909005
			n->parent->child = n->next;
1014
1910235
		if (n->parent->last == n)
1015
732114
			n->parent->last = n->prev;
1016
	}
1017
1018
	/* Adjust parse point. */
1019
1020
1918298
	if (man == NULL)
1021
		return;
1022
1918272
	if (man->last == n) {
1023
11892
		if (n->prev == NULL) {
1024
11446
			man->last = n->parent;
1025
			man->next = ROFF_NEXT_CHILD;
1026
11446
		} else {
1027
446
			man->last = n->prev;
1028
			man->next = ROFF_NEXT_SIBLING;
1029
		}
1030
11892
	}
1031
1918272
	if (man->first == n)
1032
8063
		man->first = NULL;
1033
1918298
}
1034
1035
void
1036
roff_node_free(struct roff_node *n)
1037
{
1038
1039
3835238
	if (n->args != NULL)
1040
11238
		mdoc_argv_free(n->args);
1041

3645437
	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1042
503350
		free(n->norm);
1043
1917619
	if (n->eqn != NULL)
1044
185
		eqn_box_free(n->eqn);
1045
1917619
	free(n->string);
1046
1917619
	free(n);
1047
1917619
}
1048
1049
void
1050
roff_node_delete(struct roff_man *man, struct roff_node *n)
1051
{
1052
1053
9563867
	while (n->child != NULL)
1054
1905505
		roff_node_delete(man, n->child);
1055
1917619
	roff_node_unlink(man, n);
1056
1917619
	roff_node_free(n);
1057
1917619
}
1058
1059
void
1060
deroff(char **dest, const struct roff_node *n)
1061
{
1062
222470
	char	*cp;
1063
	size_t	 sz;
1064
1065
111235
	if (n->type != ROFFT_TEXT) {
1066
222660
		for (n = n->child; n != NULL; n = n->next)
1067
64512
			deroff(dest, n);
1068
46818
		return;
1069
	}
1070
1071
	/* Skip leading whitespace. */
1072
1073
128890
	for (cp = n->string; *cp != '\0'; cp++) {
1074

65499
		if (cp[0] == '\\' && cp[1] != '\0' &&
1075
527
		    strchr(" %&0^|~", cp[1]) != NULL)
1076
21
			cp++;
1077
64424
		else if ( ! isspace((unsigned char)*cp))
1078
			break;
1079
	}
1080
1081
	/* Skip trailing backslash. */
1082
1083
64417
	sz = strlen(cp);
1084

128834
	if (sz > 0 && cp[sz - 1] == '\\')
1085
		sz--;
1086
1087
	/* Skip trailing whitespace. */
1088
1089
128834
	for (; sz; sz--)
1090
64417
		if ( ! isspace((unsigned char)cp[sz-1]))
1091
			break;
1092
1093
	/* Skip empty strings. */
1094
1095
64417
	if (sz == 0)
1096
		return;
1097
1098
64417
	if (*dest == NULL) {
1099
40944
		*dest = mandoc_strndup(cp, sz);
1100
40944
		return;
1101
	}
1102
1103
23473
	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1104
23473
	free(*dest);
1105
23473
	*dest = cp;
1106
134708
}
1107
1108
/* --- main functions of the roff parser ---------------------------------- */
1109
1110
/*
1111
 * In the current line, expand escape sequences that tend to get
1112
 * used in numerical expressions and conditional requests.
1113
 * Also check the syntax of the remaining escape sequences.
1114
 */
1115
static enum rofferr
1116
roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1117
{
1118
2567934
	char		 ubuf[24]; /* buffer to print the number */
1119
	const char	*start;	/* start of the string to process */
1120
	char		*stesc;	/* start of an escape sequence ('\\') */
1121
1283967
	const char	*stnam;	/* start of the name, after "[(*" */
1122
1283967
	const char	*cp;	/* end of the name, e.g. before ']' */
1123
	const char	*res;	/* the string to be substituted */
1124
1283967
	char		*nbuf;	/* new buffer to copy buf->buf to */
1125
	size_t		 maxl;  /* expected length of the escape name */
1126
	size_t		 naml;	/* actual length of the escape name */
1127
	enum mandoc_esc	 esc;	/* type of the escape sequence */
1128
1283967
	int		 inaml;	/* length returned from mandoc_escape() */
1129
	int		 expand_count;	/* to avoid infinite loops */
1130
1283967
	int		 npos;	/* position in numeric expression */
1131
	int		 arg_complete; /* argument not interrupted by eol */
1132
	int		 done;	/* no more input available */
1133
1283967
	int		 deftype; /* type of definition to paste */
1134
	int		 rcsid;	/* kind of RCS id seen */
1135
	char		 term;	/* character terminating the escape */
1136
1137
	/* Search forward for comments. */
1138
1139
	done = 0;
1140
1283967
	start = buf->buf + pos;
1141
62177954
	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1142

30641075
		if (stesc[0] != r->escape || stesc[1] == '\0')
1143
			continue;
1144
720142
		stesc++;
1145

1327733
		if (*stesc != '"' && *stesc != '#')
1146
			continue;
1147
1148
		/* Comment found, look for RCS id. */
1149
1150
		rcsid = 0;
1151
112551
		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1152
			rcsid = 1 << MANDOC_OS_OPENBSD;
1153
6573
			cp += 8;
1154
112551
		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1155
			rcsid = 1 << MANDOC_OS_NETBSD;
1156
667
			cp += 7;
1157
667
		}
1158

119791
		if (cp != NULL &&
1159
7240
		    isalnum((unsigned char)*cp) == 0 &&
1160
7240
		    strchr(cp, '$') != NULL) {
1161
7240
			if (r->man->meta.rcsids & rcsid)
1162
2
				mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1163
1
				    ln, stesc + 1 - buf->buf, stesc + 1);
1164
7240
			r->man->meta.rcsids |= rcsid;
1165
7240
		}
1166
1167
		/* Handle trailing whitespace. */
1168
1169
112551
		cp = strchr(stesc--, '\0') - 1;
1170
112551
		if (*cp == '\n') {
1171
			done = 1;
1172
167
			cp--;
1173
167
		}
1174

225030
		if (*cp == ' ' || *cp == '\t')
1175
144
			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1176
72
			    ln, cp - buf->buf, NULL);
1177

459648
		while (stesc > start && stesc[-1] == ' ')
1178
2361
			stesc--;
1179
112551
		*stesc = '\0';
1180
112551
		break;
1181
	}
1182
1283967
	if (stesc == start)
1183
1849
		return ROFF_CONT;
1184
1282118
	stesc--;
1185
1186
	/* Notice the end of the input. */
1187
1188
1282118
	if (*stesc == '\n') {
1189
45142
		*stesc-- = '\0';
1190
		done = 1;
1191
45142
	}
1192
1193
	expand_count = 0;
1194
33314976
	while (stesc >= start) {
1195
1196
		/* Search backwards for the next backslash. */
1197
1198
30632246
		if (*stesc != r->escape) {
1199
29960172
			if (*stesc == '\\') {
1200
				*stesc = '\0';
1201
				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1202
				    buf->buf, stesc + 1) + 1;
1203
				start = nbuf + pos;
1204
				stesc = nbuf + (stesc - buf->buf);
1205
				free(buf->buf);
1206
				buf->buf = nbuf;
1207
			}
1208
29960172
			stesc--;
1209
29960172
			continue;
1210
		}
1211
1212
		/* If it is escaped, skip it. */
1213
1214
1352324
		for (cp = stesc - 1; cp >= start; cp--)
1215
589046
			if (*cp != r->escape)
1216
				break;
1217
1218
672074
		if ((stesc - cp) % 2 == 0) {
1219
24472
			while (stesc > cp)
1220
8162
				*stesc-- = '\\';
1221
4074
			continue;
1222
668000
		} else if (stesc[1] != '\0') {
1223
664790
			*stesc = '\\';
1224
		} else {
1225
3390
			*stesc-- = '\0';
1226
3390
			if (done)
1227
18
				continue;
1228
			else
1229
3372
				return ROFF_APPEND;
1230
		}
1231
1232
		/* Decide whether to expand or to check only. */
1233
1234
		term = '\0';
1235
664790
		cp = stesc + 1;
1236

664790
		switch (*cp) {
1237
		case '*':
1238
			res = NULL;
1239
116047
			break;
1240
		case 'B':
1241
		case 'w':
1242
180
			term = cp[1];
1243
			/* FALLTHROUGH */
1244
		case 'n':
1245
5825
			res = ubuf;
1246
5825
			break;
1247
		default:
1248
542738
			esc = mandoc_escape(&cp, &stnam, &inaml);
1249

754598
			if (esc == ESCAPE_ERROR ||
1250
542720
			    (esc == ESCAPE_SPECIAL &&
1251
211860
			     mchars_spec2cp(stnam, inaml) < 0))
1252
221
				mandoc_vmsg(MANDOCERR_ESC_BAD,
1253
221
				    r->parse, ln, (int)(stesc - buf->buf),
1254
221
				    "%.*s", (int)(cp - stesc), stesc);
1255
542738
			stesc--;
1256
542738
			continue;
1257
		}
1258
1259
121872
		if (EXPAND_LIMIT < ++expand_count) {
1260
6
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1261
3
			    ln, (int)(stesc - buf->buf), NULL);
1262
3
			return ROFF_IGN;
1263
		}
1264
1265
		/*
1266
		 * The third character decides the length
1267
		 * of the name of the string or register.
1268
		 * Save a pointer to the name.
1269
		 */
1270
1271
121869
		if (term == '\0') {
1272

121689
			switch (*++cp) {
1273
			case '\0':
1274
				maxl = 0;
1275
				break;
1276
			case '(':
1277
115112
				cp++;
1278
				maxl = 2;
1279
115112
				break;
1280
			case '[':
1281
4101
				cp++;
1282
				term = ']';
1283
				maxl = 0;
1284
4101
				break;
1285
			default:
1286
				maxl = 1;
1287
2476
				break;
1288
			}
1289
		} else {
1290
180
			cp += 2;
1291
			maxl = 0;
1292
		}
1293
121869
		stnam = cp;
1294
1295
		/* Advance to the end of the name. */
1296
1297
		naml = 0;
1298
		arg_complete = 1;
1299

854505
		while (maxl == 0 || naml < maxl) {
1300
264731
			if (*cp == '\0') {
1301
50
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1302
25
				    ln, (int)(stesc - buf->buf), stesc);
1303
				arg_complete = 0;
1304
25
				break;
1305
			}
1306

296712
			if (maxl == 0 && *cp == term) {
1307
4256
				cp++;
1308
4256
				break;
1309
			}
1310

260535
			if (*cp++ != '\\' || stesc[1] != 'w') {
1311
260421
				naml++;
1312
260421
				continue;
1313
			}
1314

58
			switch (mandoc_escape(&cp, NULL, NULL)) {
1315
			case ESCAPE_SPECIAL:
1316
			case ESCAPE_UNICODE:
1317
			case ESCAPE_NUMBERED:
1318
			case ESCAPE_OVERSTRIKE:
1319
29
				naml++;
1320
29
				break;
1321
			default:
1322
				break;
1323
			}
1324
		}
1325
1326
		/*
1327
		 * Retrieve the replacement string; if it is
1328
		 * undefined, resume searching for escapes.
1329
		 */
1330
1331

243738
		switch (stesc[1]) {
1332
		case '*':
1333
116044
			if (arg_complete) {
1334
116038
				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1335
116038
				res = roff_getstrn(r, stnam, naml, &deftype);
1336
116038
			}
1337
			break;
1338
		case 'B':
1339
91
			npos = 0;
1340
182
			ubuf[0] = arg_complete &&
1341
84
			    roff_evalnum(r, ln, stnam, &npos,
1342
84
			      NULL, ROFFNUM_SCALE) &&
1343
42
			    stnam + npos + 1 == cp ? '1' : '0';
1344
91
			ubuf[1] = '\0';
1345
91
			break;
1346
		case 'n':
1347
5645
			if (arg_complete)
1348
11278
				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1349
5639
				    roff_getregn(r, stnam, naml));
1350
			else
1351
6
				ubuf[0] = '\0';
1352
			break;
1353
		case 'w':
1354
			/* use even incomplete args */
1355
178
			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1356
89
			    24 * (int)naml);
1357
89
			break;
1358
		}
1359
1360
121869
		if (res == NULL) {
1361
75
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1362
75
			    r->parse, ln, (int)(stesc - buf->buf),
1363
75
			    "%.*s", (int)naml, stnam);
1364
			res = "";
1365
121869
		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1366
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1367
			    ln, (int)(stesc - buf->buf), NULL);
1368
			return ROFF_IGN;
1369
		}
1370
1371
		/* Replace the escape sequence by the string. */
1372
1373
121869
		*stesc = '\0';
1374
243738
		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1375
243738
		    buf->buf, res, cp) + 1;
1376
1377
		/* Prepare for the next replacement. */
1378
1379
121869
		start = nbuf + pos;
1380
121869
		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1381
121869
		free(buf->buf);
1382
121869
		buf->buf = nbuf;
1383
	}
1384
1278743
	return ROFF_CONT;
1385
1283967
}
1386
1387
/*
1388
 * Process text streams.
1389
 */
1390
static enum rofferr
1391
roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1392
{
1393
	size_t		 sz;
1394
	const char	*start;
1395
959926
	char		*p;
1396
	int		 isz;
1397
	enum mandoc_esc	 esc;
1398
1399
	/* Spring the input line trap. */
1400
1401
479963
	if (roffit_lines == 1) {
1402
15
		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1403
15
		free(buf->buf);
1404
15
		buf->buf = p;
1405
15
		buf->sz = isz + 1;
1406
15
		*offs = 0;
1407
15
		free(roffit_macro);
1408
15
		roffit_lines = 0;
1409
15
		return ROFF_REPARSE;
1410
479948
	} else if (roffit_lines > 1)
1411
6
		--roffit_lines;
1412
1413

479948
	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1414
		if (roffce_lines < 1) {
1415
			r->man->last = roffce_node;
1416
			r->man->next = ROFF_NEXT_SIBLING;
1417
			roffce_lines = 0;
1418
			roffce_node = NULL;
1419
		} else
1420
			roffce_lines--;
1421
	}
1422
1423
	/* Convert all breakable hyphens into ASCII_HYPH. */
1424
1425
479948
	start = p = buf->buf + pos;
1426
1427
1394992
	while (*p != '\0') {
1428
864452
		sz = strcspn(p, "-\\");
1429
864452
		p += sz;
1430
1431
864452
		if (*p == '\0')
1432
			break;
1433
1434
405092
		if (*p == '\\') {
1435
			/* Skip over escapes. */
1436
374945
			p++;
1437
374945
			esc = mandoc_escape((const char **)&p, NULL, NULL);
1438
374945
			if (esc == ESCAPE_ERROR)
1439
				break;
1440
751352
			while (*p == '-')
1441
749
				p++;
1442
374927
			continue;
1443
30147
		} else if (p == start) {
1444
125
			p++;
1445
125
			continue;
1446
		}
1447
1448

55533
		if (isalpha((unsigned char)p[-1]) &&
1449
25511
		    isalpha((unsigned char)p[1]))
1450
24421
			*p = ASCII_HYPH;
1451
30022
		p++;
1452
	}
1453
479948
	return ROFF_CONT;
1454
479963
}
1455
1456
enum rofferr
1457
roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1458
{
1459
	enum roff_tok	 t;
1460
	enum rofferr	 e;
1461
2567958
	int		 pos;	/* parse point */
1462
	int		 spos;	/* saved parse point for messages */
1463
	int		 ppos;	/* original offset in buf->buf */
1464
	int		 ctl;	/* macro line (boolean) */
1465
1466
1283979
	ppos = pos = *offs;
1467
1468
	/* Handle in-line equation delimiters. */
1469
1470

1284003
	if (r->tbl == NULL &&
1471

1263823
	    r->last_eqn != NULL && r->last_eqn->delim &&
1472
140
	    (r->eqn == NULL || r->eqn_inline)) {
1473
98
		e = roff_eqndelim(r, buf, pos);
1474
98
		if (e == ROFF_REPARSE)
1475
12
			return e;
1476
86
		assert(e == ROFF_CONT);
1477
	}
1478
1479
	/* Expand some escape sequences. */
1480
1481
1283967
	e = roff_res(r, buf, ln, pos);
1482
1283967
	if (e == ROFF_IGN || e == ROFF_APPEND)
1483
3375
		return e;
1484
1280592
	assert(e == ROFF_CONT);
1485
1486
1280592
	ctl = roff_getcontrol(r, buf->buf, &pos);
1487
1488
	/*
1489
	 * First, if a scope is open and we're not a macro, pass the
1490
	 * text through the macro's filter.
1491
	 * Equations process all content themselves.
1492
	 * Tables process almost all content themselves, but we want
1493
	 * to warn about macros before passing it there.
1494
	 */
1495
1496
1280592
	if (r->last != NULL && ! ctl) {
1497
1064
		t = r->last->tok;
1498
1064
		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1499
1064
		if (e == ROFF_IGN)
1500
686
			return e;
1501
378
		assert(e == ROFF_CONT);
1502
	}
1503

1280596
	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1504
505
		eqn_read(r->eqn, buf->buf + ppos);
1505
505
		return ROFF_IGN;
1506
	}
1507

1303183
	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1508
19105
		tbl_read(r->tbl, ln, buf->buf, ppos);
1509
19105
		roff_addtbl(r->man, r->tbl);
1510
19105
		return ROFF_IGN;
1511
	}
1512
1260296
	if ( ! ctl)
1513
479963
		return roff_parsetext(r, buf, pos, offs);
1514
1515
	/* Skip empty request lines. */
1516
1517
780333
	if (buf->buf[pos] == '"') {
1518
12
		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1519
		    ln, pos, NULL);
1520
12
		return ROFF_IGN;
1521
780321
	} else if (buf->buf[pos] == '\0')
1522
108888
		return ROFF_IGN;
1523
1524
	/*
1525
	 * If a scope is open, go to the child handler for that macro,
1526
	 * as it may want to preprocess before doing anything with it.
1527
	 * Don't do so if an equation is open.
1528
	 */
1529
1530
671433
	if (r->last) {
1531
73316
		t = r->last->tok;
1532
73316
		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1533
	}
1534
1535
	/* No scope is open.  This is a new request or macro. */
1536
1537
	spos = pos;
1538
598117
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1539
1540
	/* Tables ignore most macros. */
1541
1542

609792
	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1543
9340
	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1544
30
		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1545
15
		    ln, pos, buf->buf + spos);
1546
15
		if (t != TOKEN_NONE)
1547
6
			return ROFF_IGN;
1548

81
		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1549
18
			pos++;
1550
36
		while (buf->buf[pos] == ' ')
1551
9
			pos++;
1552
9
		tbl_read(r->tbl, ln, buf->buf, pos);
1553
9
		roff_addtbl(r->man, r->tbl);
1554
9
		return ROFF_IGN;
1555
	}
1556
1557
	/* For now, let high level macros abort .ce mode. */
1558
1559
598102
	if (ctl && roffce_node != NULL &&
1560
	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1561
	     t == ROFF_TH || t == ROFF_TS)) {
1562
		r->man->last = roffce_node;
1563
		r->man->next = ROFF_NEXT_SIBLING;
1564
		roffce_lines = 0;
1565
		roffce_node = NULL;
1566
	}
1567
1568
	/*
1569
	 * This is neither a roff request nor a user-defined macro.
1570
	 * Let the standard macro set parsers handle it.
1571
	 */
1572
1573
598102
	if (t == TOKEN_NONE)
1574
406429
		return ROFF_CONT;
1575
1576
	/* Execute a roff request or a user defined macro. */
1577
1578
191673
	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1579
1283979
}
1580
1581
void
1582
roff_endparse(struct roff *r)
1583
{
1584
16052
	if (r->last != NULL)
1585
24
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1586
12
		    r->last->line, r->last->col,
1587
12
		    roff_name[r->last->tok]);
1588
1589
8026
	if (r->eqn != NULL) {
1590
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1591
		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1592
		eqn_parse(r->eqn);
1593
		r->eqn = NULL;
1594
	}
1595
1596
8026
	if (r->tbl != NULL) {
1597
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1598
		    r->tbl->line, r->tbl->pos, "TS");
1599
		tbl_end(r->tbl);
1600
		r->tbl = NULL;
1601
	}
1602
8026
}
1603
1604
/*
1605
 * Parse a roff node's type from the input buffer.  This must be in the
1606
 * form of ".foo xxx" in the usual way.
1607
 */
1608
static enum roff_tok
1609
roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1610
{
1611
1342866
	char		*cp;
1612
	const char	*mac;
1613
	size_t		 maclen;
1614
671433
	int		 deftype;
1615
	enum roff_tok	 t;
1616
1617
671433
	cp = buf + *pos;
1618
1619


2680692
	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1620
1680
		return TOKEN_NONE;
1621
1622
	mac = cp;
1623
669753
	maclen = roff_getname(r, &cp, ln, ppos);
1624
1625
669753
	deftype = ROFFDEF_USER | ROFFDEF_REN;
1626
669753
	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1627
669753
	switch (deftype) {
1628
	case ROFFDEF_USER:
1629
		t = ROFF_USERDEF;
1630
83790
		break;
1631
	case ROFFDEF_REN:
1632
		t = ROFF_RENAMED;
1633
6
		break;
1634
	default:
1635
585957
		t = roffhash_find(r->reqtab, mac, maclen);
1636
585957
		break;
1637
	}
1638
669753
	if (t != TOKEN_NONE)
1639
250469
		*pos = cp - buf;
1640
669753
	return t;
1641
671433
}
1642
1643
/* --- handling of request blocks ----------------------------------------- */
1644
1645
static enum rofferr
1646
roff_cblock(ROFF_ARGS)
1647
{
1648
1649
	/*
1650
	 * A block-close `..' should only be invoked as a child of an
1651
	 * ignore macro, otherwise raise a warning and just ignore it.
1652
	 */
1653
1654
6216
	if (r->last == NULL) {
1655
6
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1656
		    ln, ppos, "..");
1657
6
		return ROFF_IGN;
1658
	}
1659
1660

3102
	switch (r->last->tok) {
1661
	case ROFF_am:
1662
		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1663
	case ROFF_ami:
1664
	case ROFF_de:
1665
		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1666
	case ROFF_dei:
1667
	case ROFF_ig:
1668
		break;
1669
	default:
1670
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1671
		    ln, ppos, "..");
1672
		return ROFF_IGN;
1673
	}
1674
1675
3102
	if (buf->buf[pos] != '\0')
1676
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1677
		    ".. %s", buf->buf + pos);
1678
1679
3102
	roffnode_pop(r);
1680
3102
	roffnode_cleanscope(r);
1681
3102
	return ROFF_IGN;
1682
1683
3108
}
1684
1685
static void
1686
roffnode_cleanscope(struct roff *r)
1687
{
1688
1689
308363
	while (r->last) {
1690
68179
		if (--r->last->endspan != 0)
1691
			break;
1692
46126
		roffnode_pop(r);
1693
	}
1694
72037
}
1695
1696
static void
1697
roff_ccond(struct roff *r, int ln, int ppos)
1698
{
1699
1700
6642
	if (NULL == r->last) {
1701
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1702
		    ln, ppos, "\\}");
1703
		return;
1704
	}
1705
1706

3321
	switch (r->last->tok) {
1707
	case ROFF_el:
1708
	case ROFF_ie:
1709
	case ROFF_if:
1710
		break;
1711
	default:
1712
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1713
		    ln, ppos, "\\}");
1714
		return;
1715
	}
1716
1717
3321
	if (r->last->endspan > -1) {
1718
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1719
		    ln, ppos, "\\}");
1720
		return;
1721
	}
1722
1723
3321
	roffnode_pop(r);
1724
3321
	roffnode_cleanscope(r);
1725
3321
	return;
1726
3321
}
1727
1728
static enum rofferr
1729
roff_block(ROFF_ARGS)
1730
{
1731
	const char	*name, *value;
1732
6318
	char		*call, *cp, *iname, *rname;
1733
	size_t		 csz, namesz, rsz;
1734
3159
	int		 deftype;
1735
1736
	/* Ignore groff compatibility mode for now. */
1737
1738
3159
	if (tok == ROFF_de1)
1739
4
		tok = ROFF_de;
1740
3155
	else if (tok == ROFF_dei1)
1741
		tok = ROFF_dei;
1742
3155
	else if (tok == ROFF_am1)
1743
		tok = ROFF_am;
1744
3155
	else if (tok == ROFF_ami1)
1745
		tok = ROFF_ami;
1746
1747
	/* Parse the macro name argument. */
1748
1749
3159
	cp = buf->buf + pos;
1750
3159
	if (tok == ROFF_ig) {
1751
		iname = NULL;
1752
		namesz = 0;
1753
54
	} else {
1754
		iname = cp;
1755
3105
		namesz = roff_getname(r, &cp, ln, ppos);
1756
3105
		iname[namesz] = '\0';
1757
	}
1758
1759
	/* Resolve the macro name argument if it is indirect. */
1760
1761

6252
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1762
21
		deftype = ROFFDEF_USER;
1763
21
		name = roff_getstrn(r, iname, namesz, &deftype);
1764
21
		if (name == NULL) {
1765
6
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1766
6
			    r->parse, ln, (int)(iname - buf->buf),
1767
6
			    "%.*s", (int)namesz, iname);
1768
			namesz = 0;
1769
6
		} else
1770
15
			namesz = strlen(name);
1771
	} else
1772
		name = iname;
1773
1774
3159
	if (namesz == 0 && tok != ROFF_ig) {
1775
36
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1776
18
		    ln, ppos, roff_name[tok]);
1777
18
		return ROFF_IGN;
1778
	}
1779
1780
3141
	roffnode_push(r, tok, name, ln, ppos);
1781
1782
	/*
1783
	 * At the beginning of a `de' macro, clear the existing string
1784
	 * with the same name, if there is one.  New content will be
1785
	 * appended from roff_block_text() in multiline mode.
1786
	 */
1787
1788
3141
	if (tok == ROFF_de || tok == ROFF_dei) {
1789
3075
		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1790
3075
		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1791
3141
	} else if (tok == ROFF_am || tok == ROFF_ami) {
1792
18
		deftype = ROFFDEF_ANY;
1793
18
		value = roff_getstrn(r, iname, namesz, &deftype);
1794

18
		switch (deftype) {  /* Before appending, ... */
1795
		case ROFFDEF_PRE: /* copy predefined to user-defined. */
1796
			roff_setstrn(&r->strtab, name, namesz,
1797
			    value, strlen(value), 0);
1798
			break;
1799
		case ROFFDEF_REN: /* call original standard macro. */
1800
3
			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1801
3
			    (int)strlen(value), value);
1802
3
			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1803
3
			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1804
3
			free(call);
1805
3
			break;
1806
		case ROFFDEF_STD:  /* rename and call standard macro. */
1807
3
			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1808
3
			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1809
3
			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1810
3
			    (int)rsz, rname);
1811
3
			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1812
3
			free(call);
1813
3
			free(rname);
1814
3
			break;
1815
		default:
1816
			break;
1817
		}
1818
	}
1819
1820
3141
	if (*cp == '\0')
1821
3096
		return ROFF_IGN;
1822
1823
	/* Get the custom end marker. */
1824
1825
	iname = cp;
1826
45
	namesz = roff_getname(r, &cp, ln, ppos);
1827
1828
	/* Resolve the end marker if it is indirect. */
1829
1830

90
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1831
15
		deftype = ROFFDEF_USER;
1832
15
		name = roff_getstrn(r, iname, namesz, &deftype);
1833
15
		if (name == NULL) {
1834
6
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1835
6
			    r->parse, ln, (int)(iname - buf->buf),
1836
6
			    "%.*s", (int)namesz, iname);
1837
			namesz = 0;
1838
6
		} else
1839
9
			namesz = strlen(name);
1840
	} else
1841
		name = iname;
1842
1843
45
	if (namesz)
1844
39
		r->last->end = mandoc_strndup(name, namesz);
1845
1846
45
	if (*cp != '\0')
1847
24
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1848
12
		    ln, pos, ".%s ... %s", roff_name[tok], cp);
1849
1850
45
	return ROFF_IGN;
1851
3159
}
1852
1853
static enum rofferr
1854
roff_block_sub(ROFF_ARGS)
1855
{
1856
	enum roff_tok	t;
1857
	int		i, j;
1858
1859
	/*
1860
	 * First check whether a custom macro exists at this level.  If
1861
	 * it does, then check against it.  This is some of groff's
1862
	 * stranger behaviours.  If we encountered a custom end-scope
1863
	 * tag and that tag also happens to be a "real" macro, then we
1864
	 * need to try interpreting it again as a real macro.  If it's
1865
	 * not, then return ignore.  Else continue.
1866
	 */
1867
1868
8542
	if (r->last->end) {
1869
342
		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1870
138
			if (buf->buf[i] != r->last->end[j])
1871
				break;
1872
1873

48
		if (r->last->end[j] == '\0' &&
1874
33
		    (buf->buf[i] == '\0' ||
1875
6
		     buf->buf[i] == ' ' ||
1876
		     buf->buf[i] == '\t')) {
1877
33
			roffnode_pop(r);
1878
33
			roffnode_cleanscope(r);
1879
1880

150
			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1881
6
				i++;
1882
1883
33
			pos = i;
1884
33
			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1885
			    TOKEN_NONE)
1886
				return ROFF_RERUN;
1887
33
			return ROFF_IGN;
1888
		}
1889
	}
1890
1891
	/*
1892
	 * If we have no custom end-query or lookup failed, then try
1893
	 * pulling it out of the hashtable.
1894
	 */
1895
1896
8509
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1897
1898
8509
	if (t != ROFF_cblock) {
1899
5407
		if (tok != ROFF_ig)
1900
5287
			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1901
5407
		return ROFF_IGN;
1902
	}
1903
1904
3102
	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1905
8542
}
1906
1907
static enum rofferr
1908
roff_block_text(ROFF_ARGS)
1909
{
1910
1911
514
	if (tok != ROFF_ig)
1912
137
		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1913
1914
257
	return ROFF_IGN;
1915
}
1916
1917
static enum rofferr
1918
roff_cond_sub(ROFF_ARGS)
1919
{
1920
	enum roff_tok	 t;
1921
	char		*ep;
1922
	int		 rr;
1923
1924
64774
	rr = r->last->rule;
1925
64774
	roffnode_cleanscope(r);
1926
1927
	/*
1928
	 * If `\}' occurs on a macro line without a preceding macro,
1929
	 * drop the line completely.
1930
	 */
1931
1932
64774
	ep = buf->buf + pos;
1933

66451
	if (ep[0] == '\\' && ep[1] == '}')
1934
1677
		rr = 0;
1935
1936
	/* Always check for the closing delimiter `\}'. */
1937
1938
213268
	while ((ep = strchr(ep, '\\')) != NULL) {
1939
41860
		switch (ep[1]) {
1940
		case '}':
1941
3207
			memmove(ep, ep + 2, strlen(ep + 2) + 1);
1942
3207
			roff_ccond(r, ln, ep - buf->buf);
1943
3207
			break;
1944
		case '\0':
1945
			++ep;
1946
			break;
1947
		default:
1948
38653
			ep += 2;
1949
38653
			break;
1950
		}
1951
	}
1952
1953
	/*
1954
	 * Fully handle known macros when they are structurally
1955
	 * required or when the conditional evaluated to true.
1956
	 */
1957
1958
64774
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1959

273044
	return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1960
64774
	    ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1961
	    ? ROFF_CONT : ROFF_IGN;
1962
}
1963
1964
static enum rofferr
1965
roff_cond_text(ROFF_ARGS)
1966
{
1967
	char		*ep;
1968
	int		 rr;
1969
1970
1614
	rr = r->last->rule;
1971
807
	roffnode_cleanscope(r);
1972
1973
807
	ep = buf->buf + pos;
1974
1886
	while ((ep = strchr(ep, '\\')) != NULL) {
1975
136
		if (*(++ep) == '}') {
1976
114
			*ep = '&';
1977
114
			roff_ccond(r, ln, ep - buf->buf - 1);
1978
114
		}
1979
136
		if (*ep != '\0')
1980
136
			++ep;
1981
	}
1982
807
	return rr ? ROFF_CONT : ROFF_IGN;
1983
}
1984
1985
/* --- handling of numeric and conditional expressions -------------------- */
1986
1987
/*
1988
 * Parse a single signed integer number.  Stop at the first non-digit.
1989
 * If there is at least one digit, return success and advance the
1990
 * parse point, else return failure and let the parse point unchanged.
1991
 * Ignore overflows, treat them just like the C language.
1992
 */
1993
static int
1994
roff_getnum(const char *v, int *pos, int *res, int flags)
1995
{
1996
26720
	int	 myres, scaled, n, p;
1997
1998
13360
	if (NULL == res)
1999
84
		res = &myres;
2000
2001
13360
	p = *pos;
2002
13360
	n = v[p] == '-';
2003

26702
	if (n || v[p] == '+')
2004
21
		p++;
2005
2006
13360
	if (flags & ROFFNUM_WHITE)
2007
12018
		while (isspace((unsigned char)v[p]))
2008
3
			p++;
2009
2010
60252
	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2011
16766
		*res = 10 * *res + v[p] - '0';
2012
13360
	if (p == *pos + n)
2013
102
		return 0;
2014
2015
13258
	if (n)
2016
18
		*res = -*res;
2017
2018
	/* Each number may be followed by one optional scaling unit. */
2019
2020


13258
	switch (v[p]) {
2021
	case 'f':
2022
3
		scaled = *res * 65536;
2023
3
		break;
2024
	case 'i':
2025
12
		scaled = *res * 240;
2026
12
		break;
2027
	case 'c':
2028
9
		scaled = *res * 240 / 2.54;
2029
9
		break;
2030
	case 'v':
2031
	case 'P':
2032
12
		scaled = *res * 40;
2033
12
		break;
2034
	case 'm':
2035
	case 'n':
2036
1476
		scaled = *res * 24;
2037
1476
		break;
2038
	case 'p':
2039
3
		scaled = *res * 10 / 3;
2040
3
		break;
2041
	case 'u':
2042
2943
		scaled = *res;
2043
2943
		break;
2044
	case 'M':
2045
3
		scaled = *res * 6 / 25;
2046
3
		break;
2047
	default:
2048
8797
		scaled = *res;
2049
8797
		p--;
2050
8797
		break;
2051
	}
2052
13258
	if (flags & ROFFNUM_SCALE)
2053
13237
		*res = scaled;
2054
2055
13258
	*pos = p + 1;
2056
13258
	return 1;
2057
13360
}
2058
2059
/*
2060
 * Evaluate a string comparison condition.
2061
 * The first character is the delimiter.
2062
 * Succeed if the string up to its second occurrence
2063
 * matches the string up to its third occurence.
2064
 * Advance the cursor after the third occurrence
2065
 * or lacking that, to the end of the line.
2066
 */
2067
static int
2068
roff_evalstrcond(const char *v, int *pos)
2069
{
2070
	const char	*s1, *s2, *s3;
2071
	int		 match;
2072
2073
	match = 0;
2074
60
	s1 = v + *pos;		/* initial delimiter */
2075
30
	s2 = s1 + 1;		/* for scanning the first string */
2076
30
	s3 = strchr(s2, *s1);	/* for scanning the second string */
2077
2078
30
	if (NULL == s3)		/* found no middle delimiter */
2079
		goto out;
2080
2081
162
	while ('\0' != *++s3) {
2082
78
		if (*s2 != *s3) {  /* mismatch */
2083
12
			s3 = strchr(s3, *s1);
2084
12
			break;
2085
		}
2086
66
		if (*s3 == *s1) {  /* found the final delimiter */
2087
			match = 1;
2088
12
			break;
2089
		}
2090
54
		s2++;
2091
	}
2092
2093
out:
2094
30
	if (NULL == s3)
2095
3
		s3 = strchr(s2, '\0');
2096
27
	else if (*s3 != '\0')
2097
24
		s3++;
2098
30
	*pos = s3 - v;
2099
30
	return match;
2100
}
2101
2102
/*
2103
 * Evaluate an optionally negated single character, numerical,
2104
 * or string condition.
2105
 */
2106
static int
2107
roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2108
{
2109
84334
	char	*cp, *name;
2110
	size_t	 sz;
2111
42167
	int	 deftype, number, savepos, istrue, wanttrue;
2112
2113
42167
	if ('!' == v[*pos]) {
2114
		wanttrue = 0;
2115
1497
		(*pos)++;
2116
1497
	} else
2117
		wanttrue = 1;
2118
2119


42167
	switch (v[*pos]) {
2120
	case '\0':
2121
6
		return 0;
2122
	case 'n':
2123
	case 'o':
2124
22461
		(*pos)++;
2125
22461
		return wanttrue;
2126
	case 'c':
2127
	case 'e':
2128
	case 't':
2129
	case 'v':
2130
14906
		(*pos)++;
2131
14906
		return !wanttrue;
2132
	case 'd':
2133
	case 'r':
2134
39
		cp = v + *pos + 1;
2135
138
		while (*cp == ' ')
2136
30
			cp++;
2137
		name = cp;
2138
39
		sz = roff_getname(r, &cp, ln, cp - v);
2139
39
		if (sz == 0)
2140
			istrue = 0;
2141
39
		else if (v[*pos] == 'r')
2142
9
			istrue = roff_hasregn(r, name, sz);
2143
		else {
2144
30
			deftype = ROFFDEF_ANY;
2145
30
		        roff_getstrn(r, name, sz, &deftype);
2146
30
			istrue = !!deftype;
2147
		}
2148
39
		*pos = cp - v;
2149
39
		return istrue == wanttrue;
2150
	default:
2151
		break;
2152
	}
2153
2154
	savepos = *pos;
2155
4755
	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2156
4716
		return (number > 0) == wanttrue;
2157
39
	else if (*pos == savepos)
2158
30
		return roff_evalstrcond(v, pos) == wanttrue;
2159
	else
2160
9
		return 0;
2161
42167
}
2162
2163
static enum rofferr
2164
roff_line_ignore(ROFF_ARGS)
2165
{
2166
2167
31450
	return ROFF_IGN;
2168
}
2169
2170
static enum rofferr
2171
roff_insec(ROFF_ARGS)
2172
{
2173
2174
	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2175
	    ln, ppos, roff_name[tok]);
2176
	return ROFF_IGN;
2177
}
2178
2179
static enum rofferr
2180
roff_unsupp(ROFF_ARGS)
2181
{
2182
2183
	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2184
	    ln, ppos, roff_name[tok]);
2185
	return ROFF_IGN;
2186
}
2187
2188
static enum rofferr
2189
roff_cond(ROFF_ARGS)
2190
{
2191
2192
49453
	roffnode_push(r, tok, NULL, ln, ppos);
2193
2194
	/*
2195
	 * An `.el' has no conditional body: it will consume the value
2196
	 * of the current rstack entry set in prior `ie' calls or
2197
	 * defaults to DENY.
2198
	 *
2199
	 * If we're not an `el', however, then evaluate the conditional.
2200
	 */
2201
2202
141073
	r->last->rule = tok == ROFF_el ?
2203
14563
	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2204
42167
	    roff_evalcond(r, ln, buf->buf, &pos);
2205
2206
	/*
2207
	 * An if-else will put the NEGATION of the current evaluated
2208
	 * conditional into the stack of rules.
2209
	 */
2210
2211
49453
	if (tok == ROFF_ie) {
2212
7277
		if (r->rstackpos + 1 == r->rstacksz) {
2213
753
			r->rstacksz += 16;
2214
1506
			r->rstack = mandoc_reallocarray(r->rstack,
2215
753
			    r->rstacksz, sizeof(int));
2216
753
		}
2217
7277
		r->rstack[++r->rstackpos] = !r->last->rule;
2218
7277
	}
2219
2220
	/* If the parent has false as its rule, then so do we. */
2221
2222

52051
	if (r->last->parent && !r->last->parent->rule)
2223
756
		r->last->rule = 0;
2224
2225
	/*
2226
	 * Determine scope.
2227
	 * If there is nothing on the line after the conditional,
2228
	 * not even whitespace, use next-line scope.
2229
	 */
2230
2231
49453
	if (buf->buf[pos] == '\0') {
2232
		r->last->endspan = 2;
2233
30
		goto out;
2234
	}
2235
2236
183024
	while (buf->buf[pos] == ' ')
2237
42089
		pos++;
2238
2239
	/* An opening brace requests multiline scope. */
2240
2241

52752
	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2242
3327
		r->last->endspan = -1;
2243
3327
		pos += 2;
2244
6702
		while (buf->buf[pos] == ' ')
2245
24
			pos++;
2246
		goto out;
2247
	}
2248
2249
	/*
2250
	 * Anything else following the conditional causes
2251
	 * single-line scope.  Warn if the scope contains
2252
	 * nothing but trailing whitespace.
2253
	 */
2254
2255
46096
	if (buf->buf[pos] == '\0')
2256
24
		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2257
12
		    ln, ppos, roff_name[tok]);
2258
2259
46096
	r->last->endspan = 1;
2260
2261
out:
2262
95579
	*offs = pos;
2263
49453
	return ROFF_RERUN;
2264
}
2265
2266
static enum rofferr
2267
roff_ds(ROFF_ARGS)
2268
{
2269
12214
	char		*string;
2270
	const char	*name;
2271
	size_t		 namesz;
2272
2273
	/* Ignore groff compatibility mode for now. */
2274
2275
6107
	if (tok == ROFF_ds1)
2276
		tok = ROFF_ds;
2277
6107
	else if (tok == ROFF_as1)
2278
		tok = ROFF_as;
2279
2280
	/*
2281
	 * The first word is the name of the string.
2282
	 * If it is empty or terminated by an escape sequence,
2283
	 * abort the `ds' request without defining anything.
2284
	 */
2285
2286
6107
	name = string = buf->buf + pos;
2287
6107
	if (*name == '\0')
2288
		return ROFF_IGN;
2289
2290
6107
	namesz = roff_getname(r, &string, ln, pos);
2291
6107
	if (name[namesz] == '\\')
2292
15
		return ROFF_IGN;
2293
2294
	/* Read past the initial double-quote, if any. */
2295
6092
	if (*string == '"')
2296
2958
		string++;
2297
2298
	/* The rest is the value. */
2299
12184
	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2300
6092
	    ROFF_as == tok);
2301
6092
	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2302
6092
	return ROFF_IGN;
2303
6107
}
2304
2305
/*
2306
 * Parse a single operator, one or two characters long.
2307
 * If the operator is recognized, return success and advance the
2308
 * parse point, else return failure and let the parse point unchanged.
2309
 */
2310
static int
2311
roff_getop(const char *v, int *pos, char *res)
2312
{
2313
2314
36206
	*res = v[*pos];
2315
2316


19954
	switch (*res) {
2317
	case '+':
2318
	case '-':
2319
	case '*':
2320
	case '/':
2321
	case '%':
2322
	case '&':
2323
	case ':':
2324
		break;
2325
	case '<':
2326

45
		switch (v[*pos + 1]) {
2327
		case '=':
2328
9
			*res = 'l';
2329
9
			(*pos)++;
2330
9
			break;
2331
		case '>':
2332
			*res = '!';
2333
			(*pos)++;
2334
			break;
2335
		case '?':
2336
6
			*res = 'i';
2337
6
			(*pos)++;
2338
6
			break;
2339
		default:
2340
			break;
2341
		}
2342
		break;
2343
	case '>':
2344
951
		switch (v[*pos + 1]) {
2345
		case '=':
2346
9
			*res = 'g';
2347
9
			(*pos)++;
2348
9
			break;
2349
		case '?':
2350
6
			*res = 'a';
2351
6
			(*pos)++;
2352
6
			break;
2353
		default:
2354
			break;
2355
		}
2356
		break;
2357
	case '=':
2358
3702
		if ('=' == v[*pos + 1])
2359
750
			(*pos)++;
2360
		break;
2361
	default:
2362
9640
		return 0;
2363
	}
2364
6612
	(*pos)++;
2365
2366
6612
	return *res;
2367
16252
}
2368
2369
/*
2370
 * Evaluate either a parenthesized numeric expression
2371
 * or a single signed integer number.
2372
 */
2373
static int
2374
roff_evalpar(struct roff *r, int ln,
2375
	const char *v, int *pos, int *res, int flags)
2376
{
2377
2378
32762
	if ('(' != v[*pos])
2379
13360
		return roff_getnum(v, pos, res, flags);
2380
2381
3021
	(*pos)++;
2382
3021
	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2383
21
		return 0;
2384
2385
	/*
2386
	 * Omission of the closing parenthesis
2387
	 * is an error in validation mode,
2388
	 * but ignored in evaluation mode.
2389
	 */
2390
2391
3000
	if (')' == v[*pos])
2392
2985
		(*pos)++;
2393
15
	else if (NULL == res)
2394
6
		return 0;
2395
2396
2994
	return 1;
2397
16381
}
2398
2399
/*
2400
 * Evaluate a complete numeric expression.
2401
 * Proceed left to right, there is no concept of precedence.
2402
 */
2403
static int
2404
roff_evalnum(struct roff *r, int ln, const char *v,
2405
	int *pos, int *res, int flags)
2406
{
2407
19538
	int		 mypos, operand2;
2408
9769
	char		 operator;
2409
2410
9769
	if (NULL == pos) {
2411
1879
		mypos = 0;
2412
		pos = &mypos;
2413
1879
	}
2414
2415
9769
	if (flags & ROFFNUM_WHITE)
2416
6060
		while (isspace((unsigned char)v[*pos]))
2417
9
			(*pos)++;
2418
2419
9769
	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2420
99
		return 0;
2421
2422
16198
	while (1) {
2423
16252
		if (flags & ROFFNUM_WHITE)
2424
12012
			while (isspace((unsigned char)v[*pos]))
2425
12
				(*pos)++;
2426
2427
16252
		if ( ! roff_getop(v, pos, &operator))
2428
			break;
2429
2430
6612
		if (flags & ROFFNUM_WHITE)
2431
6000
			while (isspace((unsigned char)v[*pos]))
2432
6
				(*pos)++;
2433
2434
6612
		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2435
30
			return 0;
2436
2437
6582
		if (flags & ROFFNUM_WHITE)
2438
6000
			while (isspace((unsigned char)v[*pos]))
2439
6
				(*pos)++;
2440
2441
6582
		if (NULL == res)
2442
54
			continue;
2443
2444




6528
		switch (operator) {
2445
		case '+':
2446
51
			*res += operand2;
2447
51
			break;
2448
		case '-':
2449
144
			*res -= operand2;
2450
144
			break;
2451
		case '*':
2452
153
			*res *= operand2;
2453
153
			break;
2454
		case '/':
2455
12
			if (operand2 == 0) {
2456
6
				mandoc_msg(MANDOCERR_DIVZERO,
2457
6
					r->parse, ln, *pos, v);
2458
6
				*res = 0;
2459
6
				break;
2460
			}
2461
6
			*res /= operand2;
2462
6
			break;
2463
		case '%':
2464
9
			if (operand2 == 0) {
2465
6
				mandoc_msg(MANDOCERR_DIVZERO,
2466
6
					r->parse, ln, *pos, v);
2467
6
				*res = 0;
2468
6
				break;
2469
			}
2470
3
			*res %= operand2;
2471
3
			break;
2472
		case '<':
2473
15
			*res = *res < operand2;
2474
15
			break;
2475
		case '>':
2476
921
			*res = *res > operand2;
2477
921
			break;
2478
		case 'l':
2479
9
			*res = *res <= operand2;
2480
9
			break;
2481
		case 'g':
2482
9
			*res = *res >= operand2;
2483
9
			break;
2484
		case '=':
2485
3699
			*res = *res == operand2;
2486
3699
			break;
2487
		case '!':
2488
			*res = *res != operand2;
2489
			break;
2490
		case '&':
2491
1482
			*res = *res && operand2;
2492
1482
			break;
2493
		case ':':
2494
12
			*res = *res || operand2;
2495
12
			break;
2496
		case 'i':
2497
6
			if (operand2 < *res)
2498
3
				*res = operand2;
2499
			break;
2500
		case 'a':
2501
6
			if (operand2 > *res)
2502
3
				*res = operand2;
2503
			break;
2504
		default:
2505
			abort();
2506
		}
2507
	}
2508
9640
	return 1;
2509
9769
}
2510
2511
/* --- register management ------------------------------------------------ */
2512
2513
void
2514
roff_setreg(struct roff *r, const char *name, int val, char sign)
2515
{
2516
	struct roffreg	*reg;
2517
2518
	/* Search for an existing register with the same name. */
2519
190878
	reg = r->regtab;
2520
2521

384717
	while (reg && strcmp(name, reg->key.p))
2522
2458
		reg = reg->next;
2523
2524
95439
	if (NULL == reg) {
2525
		/* Create a new register. */
2526
6871
		reg = mandoc_malloc(sizeof(struct roffreg));
2527
6871
		reg->key.p = mandoc_strdup(name);
2528
6871
		reg->key.sz = strlen(name);
2529
6871
		reg->val = 0;
2530
6871
		reg->next = r->regtab;
2531
6871
		r->regtab = reg;
2532
6871
	}
2533
2534
95439
	if ('+' == sign)
2535
3172
		reg->val += val;
2536
92267
	else if ('-' == sign)
2537
3142
		reg->val -= val;
2538
	else
2539
		reg->val = val;
2540
95439
}
2541
2542
/*
2543
 * Handle some predefined read-only number registers.
2544
 * For now, return -1 if the requested register is not predefined;
2545
 * in case a predefined read-only register having the value -1
2546
 * were to turn up, another special value would have to be chosen.
2547
 */
2548
static int
2549
roff_getregro(const struct roff *r, const char *name)
2550
{
2551
2552


4470
	switch (*name) {
2553
	case '$':  /* Number of arguments of the last macro evaluated. */
2554
9
		return r->argc;
2555
	case 'A':  /* ASCII approximation mode is always off. */
2556
3
		return 0;
2557
	case 'g':  /* Groff compatibility mode is always on. */
2558
735
		return 1;
2559
	case 'H':  /* Fixed horizontal resolution. */
2560
1476
		return 24;
2561
	case 'j':  /* Always adjust left margin only. */
2562
3
		return 0;
2563
	case 'T':  /* Some output device is always defined. */
2564
3
		return 1;
2565
	case 'V':  /* Fixed vertical resolution. */
2566
6
		return 40;
2567
	default:
2568
		return -1;
2569
	}
2570
2235
}
2571
2572
int
2573
roff_getreg(const struct roff *r, const char *name)
2574
{
2575
	struct roffreg	*reg;
2576
	int		 val;
2577
2578

1067566
	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2579
		val = roff_getregro(r, name + 1);
2580
		if (-1 != val)
2581
			return val;
2582
	}
2583
2584
1070722
	for (reg = r->regtab; reg; reg = reg->next)
2585
511943
		if (0 == strcmp(name, reg->key.p))
2586
510365
			return reg->val;
2587
2588
23418
	return 0;
2589
533783
}
2590
2591
static int
2592
roff_getregn(const struct roff *r, const char *name, size_t len)
2593
{
2594
	struct roffreg	*reg;
2595
	int		 val;
2596
2597
11278
	if ('.' == name[0] && 2 == len) {
2598
2235
		val = roff_getregro(r, name + 1);
2599
2235
		if (-1 != val)
2600
2235
			return val;
2601
	}
2602
2603
11850
	for (reg = r->regtab; reg; reg = reg->next)
2604

8156
		if (len == reg->key.sz &&
2605
3014
		    0 == strncmp(name, reg->key.p, len))
2606
2621
			return reg->val;
2607
2608
783
	return 0;
2609
5639
}
2610
2611
static int
2612
roff_hasregn(const struct roff *r, const char *name, size_t len)
2613
{
2614
	struct roffreg	*reg;
2615
	int		 val;
2616
2617
18
	if ('.' == name[0] && 2 == len) {
2618
		val = roff_getregro(r, name + 1);
2619
		if (-1 != val)
2620
			return 1;
2621
	}
2622
2623
18
	for (reg = r->regtab; reg; reg = reg->next)
2624

12
		if (len == reg->key.sz &&
2625
6
		    0 == strncmp(name, reg->key.p, len))
2626
6
			return 1;
2627
2628
3
	return 0;
2629
9
}
2630
2631
static void
2632
roff_freereg(struct roffreg *reg)
2633
{
2634
	struct roffreg	*old_reg;
2635
2636
37889
	while (NULL != reg) {
2637
6850
		free(reg->key.p);
2638
		old_reg = reg;
2639
6850
		reg = reg->next;
2640
6850
		free(old_reg);
2641
	}
2642
8063
}
2643
2644
static enum rofferr
2645
roff_nr(ROFF_ARGS)
2646
{
2647
3770
	char		*key, *val;
2648
	size_t		 keysz;
2649
1885
	int		 iv;
2650
	char		 sign;
2651
2652
1885
	key = val = buf->buf + pos;
2653
1885
	if (*key == '\0')
2654
		return ROFF_IGN;
2655
2656
1885
	keysz = roff_getname(r, &val, ln, pos);
2657
1885
	if (key[keysz] == '\\')
2658
6
		return ROFF_IGN;
2659
1879
	key[keysz] = '\0';
2660
2661
1879
	sign = *val;
2662

3594
	if (sign == '+' || sign == '-')
2663
328
		val++;
2664
2665
1879
	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2666
1864
		roff_setreg(r, key, iv, sign);
2667
2668
1879
	return ROFF_IGN;
2669
1885
}
2670
2671
static enum rofferr
2672
roff_rr(ROFF_ARGS)
2673
{
2674
	struct roffreg	*reg, **prev;
2675
42
	char		*name, *cp;
2676
	size_t		 namesz;
2677
2678
21
	name = cp = buf->buf + pos;
2679
21
	if (*name == '\0')
2680
		return ROFF_IGN;
2681
21
	namesz = roff_getname(r, &cp, ln, pos);
2682
21
	name[namesz] = '\0';
2683
2684
21
	prev = &r->regtab;
2685
51
	while (1) {
2686
51
		reg = *prev;
2687

102
		if (reg == NULL || !strcmp(name, reg->key.p))
2688
			break;
2689
30
		prev = &reg->next;
2690
	}
2691
21
	if (reg != NULL) {
2692
21
		*prev = reg->next;
2693
21
		free(reg->key.p);
2694
21
		free(reg);
2695
21
	}
2696
21
	return ROFF_IGN;
2697
21
}
2698
2699
/* --- handler functions for roff requests -------------------------------- */
2700
2701
static enum rofferr
2702
roff_rm(ROFF_ARGS)
2703
{
2704
	const char	 *name;
2705
48
	char		 *cp;
2706
	size_t		  namesz;
2707
2708
24
	cp = buf->buf + pos;
2709
156
	while (*cp != '\0') {
2710
		name = cp;
2711
60
		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2712
60
		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2713
60
		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2714
60
		if (name[namesz] == '\\')
2715
			break;
2716
	}
2717
24
	return ROFF_IGN;
2718
24
}
2719
2720
static enum rofferr
2721
roff_it(ROFF_ARGS)
2722
{
2723
30
	int		 iv;
2724
2725
	/* Parse the number of lines. */
2726
2727
30
	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2728
24
		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2729
12
		    ln, ppos, buf->buf + 1);
2730
12
		return ROFF_IGN;
2731
	}
2732
2733
54
	while (isspace((unsigned char)buf->buf[pos]))
2734
9
		pos++;
2735
2736
	/*
2737
	 * Arm the input line trap.
2738
	 * Special-casing "an-trap" is an ugly workaround to cope
2739
	 * with DocBook stupidly fiddling with man(7) internals.
2740
	 */
2741
2742
18
	roffit_lines = iv;
2743

66
	roffit_macro = mandoc_strdup(iv != 1 ||
2744
12
	    strcmp(buf->buf + pos, "an-trap") ?
2745
18
	    buf->buf + pos : "br");
2746
18
	return ROFF_IGN;
2747
30
}
2748
2749
static enum rofferr
2750
roff_Dd(ROFF_ARGS)
2751
{
2752
	int		 mask;
2753
	enum roff_tok	 t, te;
2754
2755
16086
	switch (tok) {
2756
	case ROFF_Dd:
2757
		tok = MDOC_Dd;
2758
		te = MDOC_MAX;
2759
5785
		if (r->format == 0)
2760
5684
			r->format = MPARSE_MDOC;
2761
		mask = MPARSE_MDOC | MPARSE_QUICK;
2762
5785
		break;
2763
	case ROFF_TH:
2764
		tok = MAN_TH;
2765
		te = MAN_MAX;
2766
2258
		if (r->format == 0)
2767
1623
			r->format = MPARSE_MAN;
2768
		mask = MPARSE_QUICK;
2769
2258
		break;
2770
	default:
2771
		abort();
2772
	}
2773
8043
	if ((r->options & mask) == 0)
2774
1542620
		for (t = tok; t < te; t++)
2775
763368
			roff_setstr(r, roff_name[t], NULL, 0);
2776
8043
	return ROFF_CONT;
2777
}
2778
2779
static enum rofferr
2780
roff_TE(ROFF_ARGS)
2781
{
2782
4628
	if (r->tbl == NULL) {
2783
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2784
		    ln, ppos, "TE");
2785
		return ROFF_IGN;
2786
	}
2787
2314
	if (tbl_end(r->tbl) == 0) {
2788
		r->tbl = NULL;
2789
6
		free(buf->buf);
2790
6
		buf->buf = mandoc_strdup(".sp");
2791
6
		buf->sz = 4;
2792
6
		return ROFF_REPARSE;
2793
	}
2794
	r->tbl = NULL;
2795
2308
	return ROFF_IGN;
2796
2314
}
2797
2798
static enum rofferr
2799
roff_T_(ROFF_ARGS)
2800
{
2801
2802
12
	if (NULL == r->tbl)
2803
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2804
		    ln, ppos, "T&");
2805
	else
2806
6
		tbl_restart(ln, ppos, r->tbl);
2807
2808
6
	return ROFF_IGN;
2809
}
2810
2811
/*
2812
 * Handle in-line equation delimiters.
2813
 */
2814
static enum rofferr
2815
roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2816
{
2817
196
	char		*cp1, *cp2;
2818
	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2819
2820
	/*
2821
	 * Outside equations, look for an opening delimiter.
2822
	 * If we are inside an equation, we already know it is
2823
	 * in-line, or this function wouldn't have been called;
2824
	 * so look for a closing delimiter.
2825
	 */
2826
2827
98
	cp1 = buf->buf + pos;
2828
294
	cp2 = strchr(cp1, r->eqn == NULL ?
2829
98
	    r->last_eqn->odelim : r->last_eqn->cdelim);
2830
98
	if (cp2 == NULL)
2831
86
		return ROFF_CONT;
2832
2833
12
	*cp2++ = '\0';
2834
	bef_pr = bef_nl = aft_nl = aft_pr = "";
2835
2836
	/* Handle preceding text, protecting whitespace. */
2837
2838
12
	if (*buf->buf != '\0') {
2839
12
		if (r->eqn == NULL)
2840
6
			bef_pr = "\\&";
2841
		bef_nl = "\n";
2842
12
	}
2843
2844
	/*
2845
	 * Prepare replacing the delimiter with an equation macro
2846
	 * and drop leading white space from the equation.
2847
	 */
2848
2849
12
	if (r->eqn == NULL) {
2850
12
		while (*cp2 == ' ')
2851
			cp2++;
2852
		mac = ".EQ";
2853
6
	} else
2854
		mac = ".EN";
2855
2856
	/* Handle following text, protecting whitespace. */
2857
2858
12
	if (*cp2 != '\0') {
2859
		aft_nl = "\n";
2860
12
		if (r->eqn != NULL)
2861
6
			aft_pr = "\\&";
2862
	}
2863
2864
	/* Do the actual replacement. */
2865
2866
24
	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2867
12
	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2868
12
	free(buf->buf);
2869
12
	buf->buf = cp1;
2870
2871
	/* Toggle the in-line state of the eqn subsystem. */
2872
2873
12
	r->eqn_inline = r->eqn == NULL;
2874
12
	return ROFF_REPARSE;
2875
98
}
2876
2877
static enum rofferr
2878
roff_EQ(ROFF_ARGS)
2879
{
2880
	struct roff_node	*n;
2881
2882
370
	if (r->man->macroset == MACROSET_MAN)
2883
		man_breakscope(r->man, ROFF_EQ);
2884
185
	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2885
185
	if (ln > r->man->last->line)
2886
179
		n->flags |= NODE_LINE;
2887
185
	n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2888
185
	n->eqn->expectargs = UINT_MAX;
2889
185
	roff_node_append(r->man, n);
2890
185
	r->man->next = ROFF_NEXT_SIBLING;
2891
2892
185
	assert(r->eqn == NULL);
2893
185
	if (r->last_eqn == NULL)
2894
130
		r->last_eqn = eqn_alloc(r->parse);
2895
	else
2896
55
		eqn_reset(r->last_eqn);
2897
185
	r->eqn = r->last_eqn;
2898
185
	r->eqn->node = n;
2899
2900
185
	if (buf->buf[pos] != '\0')
2901
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2902
		    ".EQ %s", buf->buf + pos);
2903
2904
185
	return ROFF_IGN;
2905
}
2906
2907
static enum rofferr
2908
roff_EN(ROFF_ARGS)
2909
{
2910
370
	if (r->eqn != NULL) {
2911
185
		eqn_parse(r->eqn);
2912
185
		r->eqn = NULL;
2913
185
	} else
2914
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2915
185
	if (buf->buf[pos] != '\0')
2916
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2917
		    "EN %s", buf->buf + pos);
2918
185
	return ROFF_IGN;
2919
}
2920
2921
static enum rofferr
2922
roff_TS(ROFF_ARGS)
2923
{
2924
4628
	if (r->tbl != NULL) {
2925
		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2926
		    ln, ppos, "TS breaks TS");
2927
		tbl_end(r->tbl);
2928
	}
2929
2314
	r->tbl = tbl_alloc(ppos, ln, r->parse);
2930
2314
	if (r->last_tbl)
2931
2169
		r->last_tbl->next = r->tbl;
2932
	else
2933
145
		r->first_tbl = r->tbl;
2934
2314
	r->last_tbl = r->tbl;
2935
2314
	return ROFF_IGN;
2936
}
2937
2938
static enum rofferr
2939
roff_onearg(ROFF_ARGS)
2940
{
2941
	struct roff_node	*n;
2942
	char			*cp;
2943
78986
	int			 npos;
2944
2945
39493
	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2946
	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
2947
	     tok == ROFF_ti))
2948
		man_breakscope(r->man, tok);
2949
2950

39493
	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2951
		r->man->last = roffce_node;
2952
		r->man->next = ROFF_NEXT_SIBLING;
2953
	}
2954
2955
39493
	roff_elem_alloc(r->man, ln, ppos, tok);
2956
39493
	n = r->man->last;
2957
2958
39493
	cp = buf->buf + pos;
2959
39493
	if (*cp != '\0') {
2960

150549
		while (*cp != '\0' && *cp != ' ')
2961
34937
			cp++;
2962
45774
		while (*cp == ' ')
2963
36
			*cp++ = '\0';
2964
22851
		if (*cp != '\0')
2965
36
			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2966
36
			    r->parse, ln, cp - buf->buf,
2967
36
			    "%s ... %s", roff_name[tok], cp);
2968
22851
		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2969
22851
	}
2970
2971
39493
	if (tok == ROFF_ce || tok == ROFF_rj) {
2972
		if (r->man->last->type == ROFFT_ELEM) {
2973
			roff_word_alloc(r->man, ln, pos, "1");
2974
			r->man->last->flags |= NODE_NOSRC;
2975
		}
2976
		npos = 0;
2977
		if (roff_evalnum(r, ln, r->man->last->string, &npos,
2978
		    &roffce_lines, 0) == 0) {
2979
			mandoc_vmsg(MANDOCERR_CE_NONUM,
2980
			    r->parse, ln, pos, "ce %s", buf->buf + pos);
2981
			roffce_lines = 1;
2982
		}
2983
		if (roffce_lines < 1) {
2984
			r->man->last = r->man->last->parent;
2985
			roffce_node = NULL;
2986
			roffce_lines = 0;
2987
		} else
2988
			roffce_node = r->man->last->parent;
2989
	} else {
2990
39493
		n->flags |= NODE_VALID | NODE_ENDED;
2991
39493
		r->man->last = n;
2992
	}
2993
39493
	n->flags |= NODE_LINE;
2994
39493
	r->man->next = ROFF_NEXT_SIBLING;
2995
39493
	return ROFF_IGN;
2996
39493
}
2997
2998
static enum rofferr
2999
roff_manyarg(ROFF_ARGS)
3000
{
3001
	struct roff_node	*n;
3002
	char			*sp, *ep;
3003
3004
64
	roff_elem_alloc(r->man, ln, ppos, tok);
3005
32
	n = r->man->last;
3006
3007
202
	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3008

664
		while (*ep != '\0' && *ep != ' ')
3009
161
			ep++;
3010
224
		while (*ep == ' ')
3011
43
			*ep++ = '\0';
3012
69
		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3013
	}
3014
3015
32
	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3016
32
	r->man->last = n;
3017
32
	r->man->next = ROFF_NEXT_SIBLING;
3018
32
	return ROFF_IGN;
3019
}
3020
3021
static enum rofferr
3022
roff_als(ROFF_ARGS)
3023
{
3024
	char		*oldn, *newn, *end, *value;
3025
	size_t		 oldsz, newsz, valsz;
3026
3027
	newn = oldn = buf->buf + pos;
3028
	if (*newn == '\0')
3029
		return ROFF_IGN;
3030
3031
	newsz = roff_getname(r, &oldn, ln, pos);
3032
	if (newn[newsz] == '\\' || *oldn == '\0')
3033
		return ROFF_IGN;
3034
3035
	end = oldn;
3036
	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3037
	if (oldsz == 0)
3038
		return ROFF_IGN;
3039
3040
	valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3041
	    (int)oldsz, oldn);
3042
	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3043
	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3044
	free(value);
3045
	return ROFF_IGN;
3046
}
3047
3048
static enum rofferr
3049
roff_br(ROFF_ARGS)
3050
{
3051
8076
	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3052
		man_breakscope(r->man, ROFF_br);
3053
4038
	roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3054
4038
	if (buf->buf[pos] != '\0')
3055
36
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3056
18
		    "%s %s", roff_name[tok], buf->buf + pos);
3057
4038
	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3058
4038
	r->man->next = ROFF_NEXT_SIBLING;
3059
4038
	return ROFF_IGN;
3060
}
3061
3062
static enum rofferr
3063
roff_cc(ROFF_ARGS)
3064
{
3065
	const char	*p;
3066
3067
36
	p = buf->buf + pos;
3068
3069

30
	if (*p == '\0' || (r->control = *p++) == '.')
3070
6
		r->control = '\0';
3071
3072
18
	if (*p != '\0')
3073
12
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3074
6
		    ln, p - buf->buf, "cc ... %s", p);
3075
3076
18
	return ROFF_IGN;
3077
}
3078
3079
static enum rofferr
3080
roff_ec(ROFF_ARGS)
3081
{
3082
	const char	*p;
3083
3084
	p = buf->buf + pos;
3085
	if (*p == '\0')
3086
		r->escape = '\\';
3087
	else {
3088
		r->escape = *p;
3089
		if (*++p != '\0')
3090
			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3091
			    ln, p - buf->buf, "ec ... %s", p);
3092
	}
3093
	return ROFF_IGN;
3094
}
3095
3096
static enum rofferr
3097
roff_eo(ROFF_ARGS)
3098
{
3099
	r->escape = '\0';
3100
	if (buf->buf[pos] != '\0')
3101
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3102
		    ln, pos, "eo %s", buf->buf + pos);
3103
	return ROFF_IGN;
3104
}
3105
3106
static enum rofferr
3107
roff_tr(ROFF_ARGS)
3108
{
3109
1518
	const char	*p, *first, *second;
3110
	size_t		 fsz, ssz;
3111
	enum mandoc_esc	 esc;
3112
3113
759
	p = buf->buf + pos;
3114
3115
759
	if (*p == '\0') {
3116
6
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3117
6
		return ROFF_IGN;
3118
	}
3119
3120
2293
	while (*p != '\0') {
3121
		fsz = ssz = 1;
3122
3123
761
		first = p++;
3124
761
		if (*first == '\\') {
3125
735
			esc = mandoc_escape(&p, NULL, NULL);
3126
735
			if (esc == ESCAPE_ERROR) {
3127
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3128
				    ln, (int)(p - buf->buf), first);
3129
				return ROFF_IGN;
3130
			}
3131
735
			fsz = (size_t)(p - first);
3132
735
		}
3133
3134
761
		second = p++;
3135
761
		if (*second == '\\') {
3136
2
			esc = mandoc_escape(&p, NULL, NULL);
3137
2
			if (esc == ESCAPE_ERROR) {
3138
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3139
				    ln, (int)(p - buf->buf), second);
3140
				return ROFF_IGN;
3141
			}
3142
2
			ssz = (size_t)(p - second);
3143
761
		} else if (*second == '\0') {
3144
24
			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3145
12
			    ln, first - buf->buf, "tr %s", first);
3146
			second = " ";
3147
12
			p--;
3148
12
		}
3149
3150
761
		if (fsz > 1) {
3151
735
			roff_setstrn(&r->xmbtab, first, fsz,
3152
			    second, ssz, 0);
3153
735
			continue;
3154
		}
3155
3156
26
		if (r->xtab == NULL)
3157
8
			r->xtab = mandoc_calloc(128,
3158
			    sizeof(struct roffstr));
3159
3160
26
		free(r->xtab[(int)*first].p);
3161
26
		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3162
26
		r->xtab[(int)*first].sz = ssz;
3163
	}
3164
3165
753
	return ROFF_IGN;
3166
759
}
3167
3168
static enum rofferr
3169
roff_rn(ROFF_ARGS)
3170
{
3171
	const char	*value;
3172
24
	char		*oldn, *newn, *end;
3173
	size_t		 oldsz, newsz;
3174
12
	int		 deftype;
3175
3176
12
	oldn = newn = buf->buf + pos;
3177
12
	if (*oldn == '\0')
3178
		return ROFF_IGN;
3179
3180
12
	oldsz = roff_getname(r, &newn, ln, pos);
3181

24
	if (oldn[oldsz] == '\\' || *newn == '\0')
3182
		return ROFF_IGN;
3183
3184
12
	end = newn;
3185
12
	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3186
12
	if (newsz == 0)
3187
		return ROFF_IGN;
3188
3189
12
	deftype = ROFFDEF_ANY;
3190
12
	value = roff_getstrn(r, oldn, oldsz, &deftype);
3191

12
	switch (deftype) {
3192
	case ROFFDEF_USER:
3193
		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3194
		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3195
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3196
		break;
3197
	case ROFFDEF_PRE:
3198
		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3199
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3200
		break;
3201
	case ROFFDEF_REN:
3202
		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3203
		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3204
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3205
		break;
3206
	case ROFFDEF_STD:
3207
12
		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3208
12
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3209
12
		break;
3210
	default:
3211
		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3212
		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3213
		break;
3214
	}
3215
12
	return ROFF_IGN;
3216
12
}
3217
3218
static enum rofferr
3219
roff_so(ROFF_ARGS)
3220
{
3221
8
	char *name, *cp;
3222
3223
4
	name = buf->buf + pos;
3224
4
	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3225
3226
	/*
3227
	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3228
	 * opening anything that's not in our cwd or anything beneath
3229
	 * it.  Thus, explicitly disallow traversing up the file-system
3230
	 * or using absolute paths.
3231
	 */
3232
3233

12
	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3234
		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3235
		    ".so %s", name);
3236
		buf->sz = mandoc_asprintf(&cp,
3237
		    ".sp\nSee the file %s.\n.sp", name) + 1;
3238
		free(buf->buf);
3239
		buf->buf = cp;
3240
		*offs = 0;
3241
		return ROFF_REPARSE;
3242
	}
3243
3244
4
	*offs = pos;
3245
4
	return ROFF_SO;
3246
4
}
3247
3248
/* --- user defined strings and macros ------------------------------------ */
3249
3250
static enum rofferr
3251
roff_userdef(ROFF_ARGS)
3252
{
3253
83724
	const char	 *arg[16], *ap;
3254
83724
	char		 *cp, *n1, *n2;
3255
	int		  expand_count, i, ib, ie;
3256
	size_t		  asz, rsz;
3257
3258
	/*
3259
	 * Collect pointers to macro argument strings
3260
	 * and NUL-terminate them.
3261
	 */
3262
3263
83724
	r->argc = 0;
3264
83724
	cp = buf->buf + pos;
3265
2846616
	for (i = 0; i < 16; i++) {
3266
1339584
		if (*cp == '\0')
3267
1247079
			arg[i] = "";
3268
		else {
3269
92505
			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3270
92505
			r->argc = i + 1;
3271
		}
3272
	}
3273
3274
	/*
3275
	 * Expand macro arguments.
3276
	 */
3277
3278
83724
	buf->sz = strlen(r->current_string) + 1;
3279
83724
	n1 = n2 = cp = mandoc_malloc(buf->sz);
3280
83724
	memcpy(n1, r->current_string, buf->sz);
3281
	expand_count = 0;
3282
1003117
	while (*cp != '\0') {
3283
3284
		/* Scan ahead for the next argument invocation. */
3285
3286
817524
		if (*cp++ != '\\')
3287
797875
			continue;
3288
19649
		if (*cp++ != '$')
3289
1492
			continue;
3290
18157
		if (*cp == '*') {  /* \\$* inserts all arguments */
3291
			ib = 0;
3292
15
			ie = r->argc - 1;
3293
15
		} else {  /* \\$1 .. \\$9 insert one argument */
3294
18142
			ib = ie = *cp - '1';
3295
18142
			if (ib < 0 || ib > 8)
3296
				continue;
3297
		}
3298
18157
		cp -= 2;
3299
3300
		/*
3301
		 * Prevent infinite recursion.
3302
		 */
3303
3304
18157
		if (cp >= n2)
3305
12151
			expand_count = 1;
3306
6006
		else if (++expand_count > EXPAND_LIMIT) {
3307
12
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3308
6
			    ln, (int)(cp - n1), NULL);
3309
6
			free(buf->buf);
3310
6
			buf->buf = n1;
3311
6
			return ROFF_IGN;
3312
		}
3313
3314
		/*
3315
		 * Determine the size of the expanded argument,
3316
		 * taking escaping of quotes into account.
3317
		 */
3318
3319
36305
		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3320
72598
		for (i = ib; i <= ie; i++) {
3321
102442
			for (ap = arg[i]; *ap != '\0'; ap++) {
3322
33073
				asz++;
3323
33073
				if (*ap == '"')
3324
30
					asz += 3;
3325
			}
3326
		}
3327
18151
		if (asz != 3) {
3328
3329
			/*
3330
			 * Determine the size of the rest of the
3331
			 * unexpanded macro, including the NUL.
3332
			 */
3333
3334
11834
			rsz = buf->sz - (cp - n1) - 3;
3335
3336
			/*
3337
			 * When shrinking, move before
3338
			 * releasing the storage.
3339
			 */
3340
3341
11834
			if (asz < 3)
3342
11614
				memmove(cp + asz, cp + 3, rsz);
3343
3344
			/*
3345
			 * Resize the storage for the macro
3346
			 * and readjust the parse pointer.
3347
			 */
3348
3349
11834
			buf->sz += asz - 3;
3350
11834
			n2 = mandoc_realloc(n1, buf->sz);
3351
11834
			cp = n2 + (cp - n1);
3352
			n1 = n2;
3353
3354
			/*
3355
			 * When growing, make room
3356
			 * for the expanded argument.
3357
			 */
3358
3359
11834
			if (asz > 3)
3360
220
				memmove(cp + asz, cp + 3, rsz);
3361
		}
3362
3363
		/* Copy the expanded argument, escaping quotes. */
3364
3365
18151
		n2 = cp;
3366
72598
		for (i = ib; i <= ie; i++) {
3367
102442
			for (ap = arg[i]; *ap != '\0'; ap++) {
3368
33073
				if (*ap == '"') {
3369
30
					memcpy(n2, "\\(dq", 4);
3370
30
					n2 += 4;
3371
30
				} else
3372
33043
					*n2++ = *ap;
3373
			}
3374
18148
			if (i < ie)
3375
3
				*n2++ = ' ';
3376
		}
3377
	}
3378
3379
	/*
3380
	 * Replace the macro invocation
3381
	 * by the expanded macro.
3382
	 */
3383
3384
83718
	free(buf->buf);
3385
83718
	buf->buf = n1;
3386
83718
	*offs = 0;
3387
3388
210698
	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3389
	   ROFF_REPARSE : ROFF_APPEND;
3390
83724
}
3391
3392
/*
3393
 * Calling a high-level macro that was renamed with .rn.
3394
 * r->current_string has already been set up by roff_parse().
3395
 */
3396
static enum rofferr
3397
roff_renamed(ROFF_ARGS)
3398
{
3399
12
	char	*nbuf;
3400
3401
18
	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3402
12
	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3403
6
	free(buf->buf);
3404
6
	buf->buf = nbuf;
3405
6
	return ROFF_CONT;
3406
6
}
3407
3408
static size_t
3409
roff_getname(struct roff *r, char **cpp, int ln, int pos)
3410
{
3411
1362078
	char	 *name, *cp;
3412
	size_t	  namesz;
3413
3414
681039
	name = *cpp;
3415
681039
	if ('\0' == *name)
3416
12
		return 0;
3417
3418
	/* Read until end of name and terminate it with NUL. */
3419
2054556
	for (cp = name; 1; cp++) {
3420

3926922
		if ('\0' == *cp || ' ' == *cp) {
3421
680220
			namesz = cp - name;
3422
680220
			break;
3423
		}
3424
1374336
		if ('\\' != *cp)
3425
			continue;
3426
852
		namesz = cp - name;
3427

966
		if ('{' == cp[1] || '}' == cp[1])
3428
			break;
3429
108
		cp++;
3430
108
		if ('\\' == *cp)
3431
			continue;
3432
126
		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3433
63
		    "%.*s", (int)(cp - name + 1), name);
3434
63
		mandoc_escape((const char **)&cp, NULL, NULL);
3435
63
		break;
3436
	}
3437
3438
	/* Read past spaces. */
3439
2367746
	while (' ' == *cp)
3440
502846
		cp++;
3441
3442
681027
	*cpp = cp;
3443
681027
	return namesz;
3444
681039
}
3445
3446
/*
3447
 * Store *string into the user-defined string called *name.
3448
 * To clear an existing entry, call with (*r, *name, NULL, 0).
3449
 * append == 0: replace mode
3450
 * append == 1: single-line append mode
3451
 * append == 2: multiline append mode, append '\n' after each call
3452
 */
3453
static void
3454
roff_setstr(struct roff *r, const char *name, const char *string,
3455
	int append)
3456
{
3457
	size_t	 namesz;
3458
3459
1537584
	namesz = strlen(name);
3460
1537584
	roff_setstrn(&r->strtab, name, namesz, string,
3461
1543008
	    string ? strlen(string) : 0, append);
3462
768792
	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3463
768792
}
3464
3465
static void
3466
roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3467
		const char *string, size_t stringsz, int append)
3468
{
3469
	struct roffkv	*n;
3470
	char		*c;
3471
	int		 i;
3472
	size_t		 oldch, newch;
3473
3474
	/* Search for an existing string with the same name. */
3475
3113618
	n = *r;
3476
3477

422689432
	while (n && (namesz != n->key.sz ||
3478
78838516
			strncmp(n->key.p, name, namesz)))
3479
84792338
		n = n->next;
3480
3481
1556809
	if (NULL == n) {
3482
		/* Create a new string table entry. */
3483
1545672
		n = mandoc_malloc(sizeof(struct roffkv));
3484
1545672
		n->key.p = mandoc_strndup(name, namesz);
3485
1545672
		n->key.sz = namesz;
3486
1545672
		n->val.p = NULL;
3487
1545672
		n->val.sz = 0;
3488
1545672
		n->next = *r;
3489
1545672
		*r = n;
3490
1556809
	} else if (0 == append) {
3491
5707
		free(n->val.p);
3492
5707
		n->val.p = NULL;
3493
5707
		n->val.sz = 0;
3494
5707
	}
3495
3496
1556809
	if (NULL == string)
3497
1541462
		return;
3498
3499
	/*
3500
	 * One additional byte for the '\n' in multiline mode,
3501
	 * and one for the terminating '\0'.
3502
	 */
3503
15347
	newch = stringsz + (1 < append ? 2u : 1u);
3504
3505
15347
	if (NULL == n->val.p) {
3506
9917
		n->val.p = mandoc_malloc(newch);
3507
9917
		*n->val.p = '\0';
3508
		oldch = 0;
3509
9917
	} else {
3510
5430
		oldch = n->val.sz;
3511
5430
		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3512
	}
3513
3514
	/* Skip existing content in the destination buffer. */
3515
15347
	c = n->val.p + (int)oldch;
3516
3517
	/* Append new content to the destination buffer. */
3518
	i = 0;
3519
201294
	while (i < (int)stringsz) {
3520
		/*
3521
		 * Rudimentary roff copy mode:
3522
		 * Handle escaped backslashes.
3523
		 */
3524

93047
		if ('\\' == string[i] && '\\' == string[i + 1])
3525
900
			i++;
3526
85300
		*c++ = string[i++];
3527
	}
3528
3529
	/* Append terminating bytes. */
3530
15347
	if (1 < append)
3531
5424
		*c++ = '\n';
3532
3533
15347
	*c = '\0';
3534
15347
	n->val.sz = (int)(c - n->val.p);
3535
1572156
}
3536
3537
static const char *
3538
roff_getstrn(const struct roff *r, const char *name, size_t len,
3539
    int *deftype)
3540
{
3541
	const struct roffkv	*n;
3542
	int			 i;
3543
	enum roff_tok		 tok;
3544
3545
1571762
	if (*deftype & ROFFDEF_USER) {
3546
112643104
		for (n = r->strtab; n != NULL; n = n->next) {
3547

56351777
			if (strncmp(name, n->key.p, len) == 0 &&
3548
623094
			    n->key.p[len] == '\0' &&
3549
616722
			    n->val.p != NULL) {
3550
199384
				*deftype = ROFFDEF_USER;
3551
199384
				return n->val.p;
3552
			}
3553
		}
3554
	}
3555
586497
	if (*deftype & ROFFDEF_PRE) {
3556
14386
		for (i = 0; i < PREDEFS_MAX; i++) {
3557

7512
			if (strncmp(name, predefs[i].name, len) == 0 &&
3558
427
			    predefs[i].name[len] == '\0') {
3559
414
				*deftype = ROFFDEF_PRE;
3560
414
				return predefs[i].str;
3561
			}
3562
		}
3563
	}
3564
586083
	if (*deftype & ROFFDEF_REN) {
3565
97134186
		for (n = r->rentab; n != NULL; n = n->next) {
3566

48398420
			if (strncmp(name, n->key.p, len) == 0 &&
3567
423611
			    n->key.p[len] == '\0' &&
3568
417314
			    n->val.p != NULL) {
3569
15
				*deftype = ROFFDEF_REN;
3570
15
				return n->val.p;
3571
			}
3572
		}
3573
	}
3574
586068
	if (*deftype & ROFFDEF_STD) {
3575
30
		if (r->man->macroset != MACROSET_MAN) {
3576
1410
			for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3577

708
				if (strncmp(name, roff_name[tok], len) == 0 &&
3578
6
				    roff_name[tok][len] == '\0') {
3579
6
					*deftype = ROFFDEF_STD;
3580
6
					return NULL;
3581
				}
3582
			}
3583
		}
3584
24
		if (r->man->macroset != MACROSET_MDOC) {
3585
1062
			for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3586

534
				if (strncmp(name, roff_name[tok], len) == 0 &&
3587
12
				    roff_name[tok][len] == '\0') {
3588
12
					*deftype = ROFFDEF_STD;
3589
12
					return NULL;
3590
				}
3591
			}
3592
		}
3593
	}
3594
586050
	*deftype = 0;
3595
586050
	return NULL;
3596
785881
}
3597
3598
static void
3599
roff_freestr(struct roffkv *r)
3600
{
3601
	struct roffkv	 *n, *nn;
3602
3603
3163911
	for (n = r; n; n = nn) {
3604
1545672
		free(n->key.p);
3605
1545672
		free(n->val.p);
3606
1545672
		nn = n->next;
3607
1545672
		free(n);
3608
	}
3609
24189
}
3610
3611
/* --- accessors and utility functions ------------------------------------ */
3612
3613
/*
3614
 * Duplicate an input string, making the appropriate character
3615
 * conversations (as stipulated by `tr') along the way.
3616
 * Returns a heap-allocated string with all the replacements made.
3617
 */
3618
char *
3619
roff_strdup(const struct roff *r, const char *p)
3620
{
3621
	const struct roffkv *cp;
3622
	char		*res;
3623
	const char	*pp;
3624
	size_t		 ssz, sz;
3625
	enum mandoc_esc	 esc;
3626
3627

1791038
	if (NULL == r->xmbtab && NULL == r->xtab)
3628
733454
		return mandoc_strdup(p);
3629
324094
	else if ('\0' == *p)
3630
1
		return mandoc_strdup("");
3631
3632
	/*
3633
	 * Step through each character looking for term matches
3634
	 * (remember that a `tr' can be invoked with an escape, which is
3635
	 * a glyph but the escape is multi-character).
3636
	 * We only do this if the character hash has been initialised
3637
	 * and the string is >0 length.
3638
	 */
3639
3640
	res = NULL;
3641
	ssz = 0;
3642
3643
12673630
	while ('\0' != *p) {
3644
11649323
		assert((unsigned int)*p < 128);
3645

23373432
		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3646
59
			sz = r->xtab[(int)*p].sz;
3647
59
			res = mandoc_realloc(res, ssz + sz + 1);
3648
59
			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3649
			ssz += sz;
3650
59
			p++;
3651
59
			continue;
3652
11649264
		} else if ('\\' != *p) {
3653
11272622
			res = mandoc_realloc(res, ssz + 2);
3654
11272622
			res[ssz++] = *p++;
3655
11272622
			continue;
3656
		}
3657
3658
		/* Search for term matches. */
3659
1505526
		for (cp = r->xmbtab; cp; cp = cp->next)
3660
376642
			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3661
				break;
3662
3663
376642
		if (NULL != cp) {
3664
			/*
3665
			 * A match has been found.
3666
			 * Append the match to the array and move
3667
			 * forward by its keysize.
3668
			 */
3669
521
			res = mandoc_realloc(res,
3670
521
			    ssz + cp->val.sz + 1);
3671
521
			memcpy(res + ssz, cp->val.p, cp->val.sz);
3672
521
			ssz += cp->val.sz;
3673
521
			p += (int)cp->key.sz;
3674
521
			continue;
3675
		}
3676
3677
		/*
3678
		 * Handle escapes carefully: we need to copy
3679
		 * over just the escape itself, or else we might
3680
		 * do replacements within the escape itself.
3681
		 * Make sure to pass along the bogus string.
3682
		 */
3683
376121
		pp = p++;
3684
376121
		esc = mandoc_escape(&p, NULL, NULL);
3685
376121
		if (ESCAPE_ERROR == esc) {
3686
			sz = strlen(pp);
3687
			res = mandoc_realloc(res, ssz + sz + 1);
3688
			memcpy(res + ssz, pp, sz);
3689
			break;
3690
		}
3691
		/*
3692
		 * We bail out on bad escapes.
3693
		 * No need to warn: we already did so when
3694
		 * roff_res() was called.
3695
		 */
3696
376121
		sz = (int)(p - pp);
3697
376121
		res = mandoc_realloc(res, ssz + sz + 1);
3698
376121
		memcpy(res + ssz, pp, sz);
3699
		ssz += sz;
3700
	}
3701
3702
324093
	res[(int)ssz] = '\0';
3703
324093
	return res;
3704
1057548
}
3705
3706
int
3707
roff_getformat(const struct roff *r)
3708
{
3709
3710
14620
	return r->format;
3711
}
3712
3713
/*
3714
 * Find out whether a line is a macro line or not.
3715
 * If it is, adjust the current position and return one; if it isn't,
3716
 * return zero and don't change the current position.
3717
 * If the control character has been set with `.cc', then let that grain
3718
 * precedence.
3719
 * This is slighly contrary to groff, where using the non-breaking
3720
 * control character when `cc' has been invoked will cause the
3721
 * non-breaking macro contents to be printed verbatim.
3722
 */
3723
int
3724
roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3725
{
3726
	int		pos;
3727
3728
4361326
	pos = *ppos;
3729
3730

2180735
	if (r->control != '\0' && cp[pos] == r->control)
3731
24
		pos++;
3732
2180639
	else if (r->control != '\0')
3733
48
		return 0;
3734

2354437
	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3735
12
		pos += 2;
3736

3162253
	else if ('.' == cp[pos] || '\'' == cp[pos])
3737
1200427
		pos++;
3738
	else
3739
980152
		return 0;
3740
3741

5012926
	while (' ' == cp[pos] || '\t' == cp[pos])
3742
70358
		pos++;
3743
3744
1200463
	*ppos = pos;
3745
1200463
	return 1;
3746
2180663
}