GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/roff.c Lines: 0 1140 0.0 %
Date: 2016-12-06 Branches: 0 816 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: roff.c,v 1.156 2016/01/08 17:48:04 schwarze Exp $ */
2
/*
3
 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
#include <sys/types.h>
19
20
#include <assert.h>
21
#include <ctype.h>
22
#include <limits.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
#include <string.h>
26
27
#include "mandoc.h"
28
#include "mandoc_aux.h"
29
#include "roff.h"
30
#include "libmandoc.h"
31
#include "roff_int.h"
32
#include "libroff.h"
33
34
/* Maximum number of string expansions per line, to break infinite loops. */
35
#define	EXPAND_LIMIT	1000
36
37
/* --- data types --------------------------------------------------------- */
38
39
enum	rofft {
40
	ROFF_ab,
41
	ROFF_ad,
42
	ROFF_af,
43
	ROFF_aln,
44
	ROFF_als,
45
	ROFF_am,
46
	ROFF_am1,
47
	ROFF_ami,
48
	ROFF_ami1,
49
	ROFF_as,
50
	ROFF_as1,
51
	ROFF_asciify,
52
	ROFF_backtrace,
53
	ROFF_bd,
54
	ROFF_bleedat,
55
	ROFF_blm,
56
	ROFF_box,
57
	ROFF_boxa,
58
	ROFF_bp,
59
	ROFF_BP,
60
	/* MAN_br, MDOC_br */
61
	ROFF_break,
62
	ROFF_breakchar,
63
	ROFF_brnl,
64
	ROFF_brp,
65
	ROFF_brpnl,
66
	ROFF_c2,
67
	ROFF_cc,
68
	ROFF_ce,
69
	ROFF_cf,
70
	ROFF_cflags,
71
	ROFF_ch,
72
	ROFF_char,
73
	ROFF_chop,
74
	ROFF_class,
75
	ROFF_close,
76
	ROFF_CL,
77
	ROFF_color,
78
	ROFF_composite,
79
	ROFF_continue,
80
	ROFF_cp,
81
	ROFF_cropat,
82
	ROFF_cs,
83
	ROFF_cu,
84
	ROFF_da,
85
	ROFF_dch,
86
	ROFF_Dd,
87
	ROFF_de,
88
	ROFF_de1,
89
	ROFF_defcolor,
90
	ROFF_dei,
91
	ROFF_dei1,
92
	ROFF_device,
93
	ROFF_devicem,
94
	ROFF_di,
95
	ROFF_do,
96
	ROFF_ds,
97
	ROFF_ds1,
98
	ROFF_dwh,
99
	ROFF_dt,
100
	ROFF_ec,
101
	ROFF_ecr,
102
	ROFF_ecs,
103
	ROFF_el,
104
	ROFF_em,
105
	ROFF_EN,
106
	ROFF_eo,
107
	ROFF_EP,
108
	ROFF_EQ,
109
	ROFF_errprint,
110
	ROFF_ev,
111
	ROFF_evc,
112
	ROFF_ex,
113
	ROFF_fallback,
114
	ROFF_fam,
115
	ROFF_fc,
116
	ROFF_fchar,
117
	ROFF_fcolor,
118
	ROFF_fdeferlig,
119
	ROFF_feature,
120
	/* MAN_fi; ignored in mdoc(7) */
121
	ROFF_fkern,
122
	ROFF_fl,
123
	ROFF_flig,
124
	ROFF_fp,
125
	ROFF_fps,
126
	ROFF_fschar,
127
	ROFF_fspacewidth,
128
	ROFF_fspecial,
129
	/* MAN_ft; ignored in mdoc(7) */
130
	ROFF_ftr,
131
	ROFF_fzoom,
132
	ROFF_gcolor,
133
	ROFF_hc,
134
	ROFF_hcode,
135
	ROFF_hidechar,
136
	ROFF_hla,
137
	ROFF_hlm,
138
	ROFF_hpf,
139
	ROFF_hpfa,
140
	ROFF_hpfcode,
141
	ROFF_hw,
142
	ROFF_hy,
143
	ROFF_hylang,
144
	ROFF_hylen,
145
	ROFF_hym,
146
	ROFF_hypp,
147
	ROFF_hys,
148
	ROFF_ie,
149
	ROFF_if,
150
	ROFF_ig,
151
	/* MAN_in; ignored in mdoc(7) */
152
	ROFF_index,
153
	ROFF_it,
154
	ROFF_itc,
155
	ROFF_IX,
156
	ROFF_kern,
157
	ROFF_kernafter,
158
	ROFF_kernbefore,
159
	ROFF_kernpair,
160
	ROFF_lc,
161
	ROFF_lc_ctype,
162
	ROFF_lds,
163
	ROFF_length,
164
	ROFF_letadj,
165
	ROFF_lf,
166
	ROFF_lg,
167
	ROFF_lhang,
168
	ROFF_linetabs,
169
	/* MAN_ll, MDOC_ll */
170
	ROFF_lnr,
171
	ROFF_lnrf,
172
	ROFF_lpfx,
173
	ROFF_ls,
174
	ROFF_lsm,
175
	ROFF_lt,
176
	ROFF_mc,
177
	ROFF_mediasize,
178
	ROFF_minss,
179
	ROFF_mk,
180
	ROFF_mso,
181
	ROFF_na,
182
	ROFF_ne,
183
	/* MAN_nf; ignored in mdoc(7) */
184
	ROFF_nh,
185
	ROFF_nhychar,
186
	ROFF_nm,
187
	ROFF_nn,
188
	ROFF_nop,
189
	ROFF_nr,
190
	ROFF_nrf,
191
	ROFF_nroff,
192
	ROFF_ns,
193
	ROFF_nx,
194
	ROFF_open,
195
	ROFF_opena,
196
	ROFF_os,
197
	ROFF_output,
198
	ROFF_padj,
199
	ROFF_papersize,
200
	ROFF_pc,
201
	ROFF_pev,
202
	ROFF_pi,
203
	ROFF_PI,
204
	ROFF_pl,
205
	ROFF_pm,
206
	ROFF_pn,
207
	ROFF_pnr,
208
	ROFF_po,
209
	ROFF_ps,
210
	ROFF_psbb,
211
	ROFF_pshape,
212
	ROFF_pso,
213
	ROFF_ptr,
214
	ROFF_pvs,
215
	ROFF_rchar,
216
	ROFF_rd,
217
	ROFF_recursionlimit,
218
	ROFF_return,
219
	ROFF_rfschar,
220
	ROFF_rhang,
221
	ROFF_rj,
222
	ROFF_rm,
223
	ROFF_rn,
224
	ROFF_rnn,
225
	ROFF_rr,
226
	ROFF_rs,
227
	ROFF_rt,
228
	ROFF_schar,
229
	ROFF_sentchar,
230
	ROFF_shc,
231
	ROFF_shift,
232
	ROFF_sizes,
233
	ROFF_so,
234
	/* MAN_sp, MDOC_sp */
235
	ROFF_spacewidth,
236
	ROFF_special,
237
	ROFF_spreadwarn,
238
	ROFF_ss,
239
	ROFF_sty,
240
	ROFF_substring,
241
	ROFF_sv,
242
	ROFF_sy,
243
	ROFF_T_,
244
	ROFF_ta,
245
	ROFF_tc,
246
	ROFF_TE,
247
	ROFF_TH,
248
	ROFF_ti,
249
	ROFF_tkf,
250
	ROFF_tl,
251
	ROFF_tm,
252
	ROFF_tm1,
253
	ROFF_tmc,
254
	ROFF_tr,
255
	ROFF_track,
256
	ROFF_transchar,
257
	ROFF_trf,
258
	ROFF_trimat,
259
	ROFF_trin,
260
	ROFF_trnt,
261
	ROFF_troff,
262
	ROFF_TS,
263
	ROFF_uf,
264
	ROFF_ul,
265
	ROFF_unformat,
266
	ROFF_unwatch,
267
	ROFF_unwatchn,
268
	ROFF_vpt,
269
	ROFF_vs,
270
	ROFF_warn,
271
	ROFF_warnscale,
272
	ROFF_watch,
273
	ROFF_watchlength,
274
	ROFF_watchn,
275
	ROFF_wh,
276
	ROFF_while,
277
	ROFF_write,
278
	ROFF_writec,
279
	ROFF_writem,
280
	ROFF_xflag,
281
	ROFF_cblock,
282
	ROFF_USERDEF,
283
	ROFF_MAX
284
};
285
286
/*
287
 * An incredibly-simple string buffer.
288
 */
289
struct	roffstr {
290
	char		*p; /* nil-terminated buffer */
291
	size_t		 sz; /* saved strlen(p) */
292
};
293
294
/*
295
 * A key-value roffstr pair as part of a singly-linked list.
296
 */
297
struct	roffkv {
298
	struct roffstr	 key;
299
	struct roffstr	 val;
300
	struct roffkv	*next; /* next in list */
301
};
302
303
/*
304
 * A single number register as part of a singly-linked list.
305
 */
306
struct	roffreg {
307
	struct roffstr	 key;
308
	int		 val;
309
	struct roffreg	*next;
310
};
311
312
struct	roff {
313
	struct mparse	*parse; /* parse point */
314
	struct roffnode	*last; /* leaf of stack */
315
	int		*rstack; /* stack of inverted `ie' values */
316
	struct roffreg	*regtab; /* number registers */
317
	struct roffkv	*strtab; /* user-defined strings & macros */
318
	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
319
	struct roffstr	*xtab; /* single-byte trans table (`tr') */
320
	const char	*current_string; /* value of last called user macro */
321
	struct tbl_node	*first_tbl; /* first table parsed */
322
	struct tbl_node	*last_tbl; /* last table parsed */
323
	struct tbl_node	*tbl; /* current table being parsed */
324
	struct eqn_node	*last_eqn; /* last equation parsed */
325
	struct eqn_node	*first_eqn; /* first equation parsed */
326
	struct eqn_node	*eqn; /* current equation being parsed */
327
	int		 eqn_inline; /* current equation is inline */
328
	int		 options; /* parse options */
329
	int		 rstacksz; /* current size limit of rstack */
330
	int		 rstackpos; /* position in rstack */
331
	int		 format; /* current file in mdoc or man format */
332
	int		 argc; /* number of args of the last macro */
333
	char		 control; /* control character */
334
};
335
336
struct	roffnode {
337
	enum rofft	 tok; /* type of node */
338
	struct roffnode	*parent; /* up one in stack */
339
	int		 line; /* parse line */
340
	int		 col; /* parse col */
341
	char		*name; /* node name, e.g. macro name */
342
	char		*end; /* end-rules: custom token */
343
	int		 endspan; /* end-rules: next-line or infty */
344
	int		 rule; /* current evaluation rule */
345
};
346
347
#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
348
			 enum rofft tok, /* tok of macro */ \
349
			 struct buf *buf, /* input buffer */ \
350
			 int ln, /* parse line */ \
351
			 int ppos, /* original pos in buffer */ \
352
			 int pos, /* current pos in buffer */ \
353
			 int *offs /* reset offset of buffer data */
354
355
typedef	enum rofferr (*roffproc)(ROFF_ARGS);
356
357
struct	roffmac {
358
	const char	*name; /* macro name */
359
	roffproc	 proc; /* process new macro */
360
	roffproc	 text; /* process as child text of macro */
361
	roffproc	 sub; /* process as child of macro */
362
	int		 flags;
363
#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
364
	struct roffmac	*next;
365
};
366
367
struct	predef {
368
	const char	*name; /* predefined input name */
369
	const char	*str; /* replacement symbol */
370
};
371
372
#define	PREDEF(__name, __str) \
373
	{ (__name), (__str) },
374
375
/* --- function prototypes ------------------------------------------------ */
376
377
static	enum rofft	 roffhash_find(const char *, size_t);
378
static	void		 roffhash_init(void);
379
static	void		 roffnode_cleanscope(struct roff *);
380
static	void		 roffnode_pop(struct roff *);
381
static	void		 roffnode_push(struct roff *, enum rofft,
382
				const char *, int, int);
383
static	enum rofferr	 roff_block(ROFF_ARGS);
384
static	enum rofferr	 roff_block_text(ROFF_ARGS);
385
static	enum rofferr	 roff_block_sub(ROFF_ARGS);
386
static	enum rofferr	 roff_brp(ROFF_ARGS);
387
static	enum rofferr	 roff_cblock(ROFF_ARGS);
388
static	enum rofferr	 roff_cc(ROFF_ARGS);
389
static	void		 roff_ccond(struct roff *, int, int);
390
static	enum rofferr	 roff_cond(ROFF_ARGS);
391
static	enum rofferr	 roff_cond_text(ROFF_ARGS);
392
static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
393
static	enum rofferr	 roff_ds(ROFF_ARGS);
394
static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
395
static	int		 roff_evalcond(struct roff *r, int, char *, int *);
396
static	int		 roff_evalnum(struct roff *, int,
397
				const char *, int *, int *, int);
398
static	int		 roff_evalpar(struct roff *, int,
399
				const char *, int *, int *, int);
400
static	int		 roff_evalstrcond(const char *, int *);
401
static	void		 roff_free1(struct roff *);
402
static	void		 roff_freereg(struct roffreg *);
403
static	void		 roff_freestr(struct roffkv *);
404
static	size_t		 roff_getname(struct roff *, char **, int, int);
405
static	int		 roff_getnum(const char *, int *, int *, int);
406
static	int		 roff_getop(const char *, int *, char *);
407
static	int		 roff_getregn(const struct roff *,
408
				const char *, size_t);
409
static	int		 roff_getregro(const struct roff *,
410
				const char *name);
411
static	const char	*roff_getstrn(const struct roff *,
412
				const char *, size_t);
413
static	int		 roff_hasregn(const struct roff *,
414
				const char *, size_t);
415
static	enum rofferr	 roff_insec(ROFF_ARGS);
416
static	enum rofferr	 roff_it(ROFF_ARGS);
417
static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
418
static	void		 roff_man_alloc1(struct roff_man *);
419
static	void		 roff_man_free1(struct roff_man *);
420
static	enum rofferr	 roff_nr(ROFF_ARGS);
421
static	enum rofft	 roff_parse(struct roff *, char *, int *,
422
				int, int);
423
static	enum rofferr	 roff_parsetext(struct buf *, int, int *);
424
static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
425
static	enum rofferr	 roff_rm(ROFF_ARGS);
426
static	enum rofferr	 roff_rr(ROFF_ARGS);
427
static	void		 roff_setstr(struct roff *,
428
				const char *, const char *, int);
429
static	void		 roff_setstrn(struct roffkv **, const char *,
430
				size_t, const char *, size_t, int);
431
static	enum rofferr	 roff_so(ROFF_ARGS);
432
static	enum rofferr	 roff_tr(ROFF_ARGS);
433
static	enum rofferr	 roff_Dd(ROFF_ARGS);
434
static	enum rofferr	 roff_TH(ROFF_ARGS);
435
static	enum rofferr	 roff_TE(ROFF_ARGS);
436
static	enum rofferr	 roff_TS(ROFF_ARGS);
437
static	enum rofferr	 roff_EQ(ROFF_ARGS);
438
static	enum rofferr	 roff_EN(ROFF_ARGS);
439
static	enum rofferr	 roff_T_(ROFF_ARGS);
440
static	enum rofferr	 roff_unsupp(ROFF_ARGS);
441
static	enum rofferr	 roff_userdef(ROFF_ARGS);
442
443
/* --- constant data ------------------------------------------------------ */
444
445
/* See roffhash_find() */
446
447
#define	ASCII_HI	 126
448
#define	ASCII_LO	 33
449
#define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
450
451
#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
452
#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
453
454
static	struct roffmac	*hash[HASHWIDTH];
455
456
static	struct roffmac	 roffs[ROFF_MAX] = {
457
	{ "ab", roff_unsupp, NULL, NULL, 0, NULL },
458
	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
459
	{ "af", roff_line_ignore, NULL, NULL, 0, NULL },
460
	{ "aln", roff_unsupp, NULL, NULL, 0, NULL },
461
	{ "als", roff_unsupp, NULL, NULL, 0, NULL },
462
	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
463
	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
464
	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
465
	{ "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
466
	{ "as", roff_ds, NULL, NULL, 0, NULL },
467
	{ "as1", roff_ds, NULL, NULL, 0, NULL },
468
	{ "asciify", roff_unsupp, NULL, NULL, 0, NULL },
469
	{ "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
470
	{ "bd", roff_line_ignore, NULL, NULL, 0, NULL },
471
	{ "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
472
	{ "blm", roff_unsupp, NULL, NULL, 0, NULL },
473
	{ "box", roff_unsupp, NULL, NULL, 0, NULL },
474
	{ "boxa", roff_unsupp, NULL, NULL, 0, NULL },
475
	{ "bp", roff_line_ignore, NULL, NULL, 0, NULL },
476
	{ "BP", roff_unsupp, NULL, NULL, 0, NULL },
477
	{ "break", roff_unsupp, NULL, NULL, 0, NULL },
478
	{ "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
479
	{ "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
480
	{ "brp", roff_brp, NULL, NULL, 0, NULL },
481
	{ "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
482
	{ "c2", roff_unsupp, NULL, NULL, 0, NULL },
483
	{ "cc", roff_cc, NULL, NULL, 0, NULL },
484
	{ "ce", roff_line_ignore, NULL, NULL, 0, NULL },
485
	{ "cf", roff_insec, NULL, NULL, 0, NULL },
486
	{ "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
487
	{ "ch", roff_line_ignore, NULL, NULL, 0, NULL },
488
	{ "char", roff_unsupp, NULL, NULL, 0, NULL },
489
	{ "chop", roff_unsupp, NULL, NULL, 0, NULL },
490
	{ "class", roff_line_ignore, NULL, NULL, 0, NULL },
491
	{ "close", roff_insec, NULL, NULL, 0, NULL },
492
	{ "CL", roff_unsupp, NULL, NULL, 0, NULL },
493
	{ "color", roff_line_ignore, NULL, NULL, 0, NULL },
494
	{ "composite", roff_unsupp, NULL, NULL, 0, NULL },
495
	{ "continue", roff_unsupp, NULL, NULL, 0, NULL },
496
	{ "cp", roff_line_ignore, NULL, NULL, 0, NULL },
497
	{ "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
498
	{ "cs", roff_line_ignore, NULL, NULL, 0, NULL },
499
	{ "cu", roff_line_ignore, NULL, NULL, 0, NULL },
500
	{ "da", roff_unsupp, NULL, NULL, 0, NULL },
501
	{ "dch", roff_unsupp, NULL, NULL, 0, NULL },
502
	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
503
	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
504
	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
505
	{ "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
506
	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
507
	{ "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
508
	{ "device", roff_unsupp, NULL, NULL, 0, NULL },
509
	{ "devicem", roff_unsupp, NULL, NULL, 0, NULL },
510
	{ "di", roff_unsupp, NULL, NULL, 0, NULL },
511
	{ "do", roff_unsupp, NULL, NULL, 0, NULL },
512
	{ "ds", roff_ds, NULL, NULL, 0, NULL },
513
	{ "ds1", roff_ds, NULL, NULL, 0, NULL },
514
	{ "dwh", roff_unsupp, NULL, NULL, 0, NULL },
515
	{ "dt", roff_unsupp, NULL, NULL, 0, NULL },
516
	{ "ec", roff_unsupp, NULL, NULL, 0, NULL },
517
	{ "ecr", roff_unsupp, NULL, NULL, 0, NULL },
518
	{ "ecs", roff_unsupp, NULL, NULL, 0, NULL },
519
	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
520
	{ "em", roff_unsupp, NULL, NULL, 0, NULL },
521
	{ "EN", roff_EN, NULL, NULL, 0, NULL },
522
	{ "eo", roff_unsupp, NULL, NULL, 0, NULL },
523
	{ "EP", roff_unsupp, NULL, NULL, 0, NULL },
524
	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
525
	{ "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
526
	{ "ev", roff_unsupp, NULL, NULL, 0, NULL },
527
	{ "evc", roff_unsupp, NULL, NULL, 0, NULL },
528
	{ "ex", roff_unsupp, NULL, NULL, 0, NULL },
529
	{ "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
530
	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
531
	{ "fc", roff_unsupp, NULL, NULL, 0, NULL },
532
	{ "fchar", roff_unsupp, NULL, NULL, 0, NULL },
533
	{ "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
534
	{ "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
535
	{ "feature", roff_line_ignore, NULL, NULL, 0, NULL },
536
	{ "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
537
	{ "fl", roff_line_ignore, NULL, NULL, 0, NULL },
538
	{ "flig", roff_line_ignore, NULL, NULL, 0, NULL },
539
	{ "fp", roff_line_ignore, NULL, NULL, 0, NULL },
540
	{ "fps", roff_line_ignore, NULL, NULL, 0, NULL },
541
	{ "fschar", roff_unsupp, NULL, NULL, 0, NULL },
542
	{ "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
543
	{ "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
544
	{ "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
545
	{ "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
546
	{ "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
547
	{ "hc", roff_line_ignore, NULL, NULL, 0, NULL },
548
	{ "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
549
	{ "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
550
	{ "hla", roff_line_ignore, NULL, NULL, 0, NULL },
551
	{ "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
552
	{ "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
553
	{ "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
554
	{ "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
555
	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
556
	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
557
	{ "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
558
	{ "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
559
	{ "hym", roff_line_ignore, NULL, NULL, 0, NULL },
560
	{ "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
561
	{ "hys", roff_line_ignore, NULL, NULL, 0, NULL },
562
	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
563
	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
564
	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
565
	{ "index", roff_unsupp, NULL, NULL, 0, NULL },
566
	{ "it", roff_it, NULL, NULL, 0, NULL },
567
	{ "itc", roff_unsupp, NULL, NULL, 0, NULL },
568
	{ "IX", roff_line_ignore, NULL, NULL, 0, NULL },
569
	{ "kern", roff_line_ignore, NULL, NULL, 0, NULL },
570
	{ "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
571
	{ "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
572
	{ "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
573
	{ "lc", roff_unsupp, NULL, NULL, 0, NULL },
574
	{ "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
575
	{ "lds", roff_unsupp, NULL, NULL, 0, NULL },
576
	{ "length", roff_unsupp, NULL, NULL, 0, NULL },
577
	{ "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
578
	{ "lf", roff_insec, NULL, NULL, 0, NULL },
579
	{ "lg", roff_line_ignore, NULL, NULL, 0, NULL },
580
	{ "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
581
	{ "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
582
	{ "lnr", roff_unsupp, NULL, NULL, 0, NULL },
583
	{ "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
584
	{ "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
585
	{ "ls", roff_line_ignore, NULL, NULL, 0, NULL },
586
	{ "lsm", roff_unsupp, NULL, NULL, 0, NULL },
587
	{ "lt", roff_line_ignore, NULL, NULL, 0, NULL },
588
	{ "mc", roff_line_ignore, NULL, NULL, 0, NULL },
589
	{ "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
590
	{ "minss", roff_line_ignore, NULL, NULL, 0, NULL },
591
	{ "mk", roff_line_ignore, NULL, NULL, 0, NULL },
592
	{ "mso", roff_insec, NULL, NULL, 0, NULL },
593
	{ "na", roff_line_ignore, NULL, NULL, 0, NULL },
594
	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
595
	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
596
	{ "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
597
	{ "nm", roff_unsupp, NULL, NULL, 0, NULL },
598
	{ "nn", roff_unsupp, NULL, NULL, 0, NULL },
599
	{ "nop", roff_unsupp, NULL, NULL, 0, NULL },
600
	{ "nr", roff_nr, NULL, NULL, 0, NULL },
601
	{ "nrf", roff_unsupp, NULL, NULL, 0, NULL },
602
	{ "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
603
	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
604
	{ "nx", roff_insec, NULL, NULL, 0, NULL },
605
	{ "open", roff_insec, NULL, NULL, 0, NULL },
606
	{ "opena", roff_insec, NULL, NULL, 0, NULL },
607
	{ "os", roff_line_ignore, NULL, NULL, 0, NULL },
608
	{ "output", roff_unsupp, NULL, NULL, 0, NULL },
609
	{ "padj", roff_line_ignore, NULL, NULL, 0, NULL },
610
	{ "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
611
	{ "pc", roff_line_ignore, NULL, NULL, 0, NULL },
612
	{ "pev", roff_line_ignore, NULL, NULL, 0, NULL },
613
	{ "pi", roff_insec, NULL, NULL, 0, NULL },
614
	{ "PI", roff_unsupp, NULL, NULL, 0, NULL },
615
	{ "pl", roff_line_ignore, NULL, NULL, 0, NULL },
616
	{ "pm", roff_line_ignore, NULL, NULL, 0, NULL },
617
	{ "pn", roff_line_ignore, NULL, NULL, 0, NULL },
618
	{ "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
619
	{ "po", roff_line_ignore, NULL, NULL, 0, NULL },
620
	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
621
	{ "psbb", roff_unsupp, NULL, NULL, 0, NULL },
622
	{ "pshape", roff_unsupp, NULL, NULL, 0, NULL },
623
	{ "pso", roff_insec, NULL, NULL, 0, NULL },
624
	{ "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
625
	{ "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
626
	{ "rchar", roff_unsupp, NULL, NULL, 0, NULL },
627
	{ "rd", roff_line_ignore, NULL, NULL, 0, NULL },
628
	{ "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
629
	{ "return", roff_unsupp, NULL, NULL, 0, NULL },
630
	{ "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
631
	{ "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
632
	{ "rj", roff_line_ignore, NULL, NULL, 0, NULL },
633
	{ "rm", roff_rm, NULL, NULL, 0, NULL },
634
	{ "rn", roff_unsupp, NULL, NULL, 0, NULL },
635
	{ "rnn", roff_unsupp, NULL, NULL, 0, NULL },
636
	{ "rr", roff_rr, NULL, NULL, 0, NULL },
637
	{ "rs", roff_line_ignore, NULL, NULL, 0, NULL },
638
	{ "rt", roff_line_ignore, NULL, NULL, 0, NULL },
639
	{ "schar", roff_unsupp, NULL, NULL, 0, NULL },
640
	{ "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
641
	{ "shc", roff_line_ignore, NULL, NULL, 0, NULL },
642
	{ "shift", roff_unsupp, NULL, NULL, 0, NULL },
643
	{ "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
644
	{ "so", roff_so, NULL, NULL, 0, NULL },
645
	{ "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
646
	{ "special", roff_line_ignore, NULL, NULL, 0, NULL },
647
	{ "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
648
	{ "ss", roff_line_ignore, NULL, NULL, 0, NULL },
649
	{ "sty", roff_line_ignore, NULL, NULL, 0, NULL },
650
	{ "substring", roff_unsupp, NULL, NULL, 0, NULL },
651
	{ "sv", roff_line_ignore, NULL, NULL, 0, NULL },
652
	{ "sy", roff_insec, NULL, NULL, 0, NULL },
653
	{ "T&", roff_T_, NULL, NULL, 0, NULL },
654
	{ "ta", roff_unsupp, NULL, NULL, 0, NULL },
655
	{ "tc", roff_unsupp, NULL, NULL, 0, NULL },
656
	{ "TE", roff_TE, NULL, NULL, 0, NULL },
657
	{ "TH", roff_TH, NULL, NULL, 0, NULL },
658
	{ "ti", roff_unsupp, NULL, NULL, 0, NULL },
659
	{ "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
660
	{ "tl", roff_unsupp, NULL, NULL, 0, NULL },
661
	{ "tm", roff_line_ignore, NULL, NULL, 0, NULL },
662
	{ "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
663
	{ "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
664
	{ "tr", roff_tr, NULL, NULL, 0, NULL },
665
	{ "track", roff_line_ignore, NULL, NULL, 0, NULL },
666
	{ "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
667
	{ "trf", roff_insec, NULL, NULL, 0, NULL },
668
	{ "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
669
	{ "trin", roff_unsupp, NULL, NULL, 0, NULL },
670
	{ "trnt", roff_unsupp, NULL, NULL, 0, NULL },
671
	{ "troff", roff_line_ignore, NULL, NULL, 0, NULL },
672
	{ "TS", roff_TS, NULL, NULL, 0, NULL },
673
	{ "uf", roff_line_ignore, NULL, NULL, 0, NULL },
674
	{ "ul", roff_line_ignore, NULL, NULL, 0, NULL },
675
	{ "unformat", roff_unsupp, NULL, NULL, 0, NULL },
676
	{ "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
677
	{ "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
678
	{ "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
679
	{ "vs", roff_line_ignore, NULL, NULL, 0, NULL },
680
	{ "warn", roff_line_ignore, NULL, NULL, 0, NULL },
681
	{ "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
682
	{ "watch", roff_line_ignore, NULL, NULL, 0, NULL },
683
	{ "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
684
	{ "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
685
	{ "wh", roff_unsupp, NULL, NULL, 0, NULL },
686
	{ "while", roff_unsupp, NULL, NULL, 0, NULL },
687
	{ "write", roff_insec, NULL, NULL, 0, NULL },
688
	{ "writec", roff_insec, NULL, NULL, 0, NULL },
689
	{ "writem", roff_insec, NULL, NULL, 0, NULL },
690
	{ "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
691
	{ ".", roff_cblock, NULL, NULL, 0, NULL },
692
	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
693
};
694
695
/* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
696
const	char *const __mdoc_reserved[] = {
697
	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
698
	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
699
	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
700
	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
701
	"Dt", "Dv", "Dx", "D1",
702
	"Ec", "Ed", "Ef", "Ek", "El", "Em",
703
	"En", "Eo", "Er", "Es", "Ev", "Ex",
704
	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
705
	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
706
	"Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
707
	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
708
	"Pa", "Pc", "Pf", "Po", "Pp", "Pq",
709
	"Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
710
	"Sc", "Sh", "Sm", "So", "Sq",
711
	"Ss", "St", "Sx", "Sy",
712
	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
713
	"%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
714
	"%P", "%Q", "%R", "%T", "%U", "%V",
715
	NULL
716
};
717
718
/* not currently implemented: BT DE DS ME MT PT SY TQ YS */
719
const	char *const __man_reserved[] = {
720
	"AT", "B", "BI", "BR", "DT",
721
	"EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
722
	"LP", "OP", "P", "PD", "PP",
723
	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
724
	"TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
725
	NULL
726
};
727
728
/* Array of injected predefined strings. */
729
#define	PREDEFS_MAX	 38
730
static	const struct predef predefs[PREDEFS_MAX] = {
731
#include "predefs.in"
732
};
733
734
/* See roffhash_find() */
735
#define	ROFF_HASH(p)	(p[0] - ASCII_LO)
736
737
static	int	 roffit_lines;  /* number of lines to delay */
738
static	char	*roffit_macro;  /* nil-terminated macro line */
739
740
741
/* --- request table ------------------------------------------------------ */
742
743
static void
744
roffhash_init(void)
745
{
746
	struct roffmac	 *n;
747
	int		  buc, i;
748
749
	for (i = 0; i < (int)ROFF_USERDEF; i++) {
750
		assert(roffs[i].name[0] >= ASCII_LO);
751
		assert(roffs[i].name[0] <= ASCII_HI);
752
753
		buc = ROFF_HASH(roffs[i].name);
754
755
		if (NULL != (n = hash[buc])) {
756
			for ( ; n->next; n = n->next)
757
				/* Do nothing. */ ;
758
			n->next = &roffs[i];
759
		} else
760
			hash[buc] = &roffs[i];
761
	}
762
}
763
764
/*
765
 * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
766
 * the nil-terminated string name could be found.
767
 */
768
static enum rofft
769
roffhash_find(const char *p, size_t s)
770
{
771
	int		 buc;
772
	struct roffmac	*n;
773
774
	/*
775
	 * libroff has an extremely simple hashtable, for the time
776
	 * being, which simply keys on the first character, which must
777
	 * be printable, then walks a chain.  It works well enough until
778
	 * optimised.
779
	 */
780
781
	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
782
		return ROFF_MAX;
783
784
	buc = ROFF_HASH(p);
785
786
	if (NULL == (n = hash[buc]))
787
		return ROFF_MAX;
788
	for ( ; n; n = n->next)
789
		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
790
			return (enum rofft)(n - roffs);
791
792
	return ROFF_MAX;
793
}
794
795
/* --- stack of request blocks -------------------------------------------- */
796
797
/*
798
 * Pop the current node off of the stack of roff instructions currently
799
 * pending.
800
 */
801
static void
802
roffnode_pop(struct roff *r)
803
{
804
	struct roffnode	*p;
805
806
	assert(r->last);
807
	p = r->last;
808
809
	r->last = r->last->parent;
810
	free(p->name);
811
	free(p->end);
812
	free(p);
813
}
814
815
/*
816
 * Push a roff node onto the instruction stack.  This must later be
817
 * removed with roffnode_pop().
818
 */
819
static void
820
roffnode_push(struct roff *r, enum rofft tok, const char *name,
821
		int line, int col)
822
{
823
	struct roffnode	*p;
824
825
	p = mandoc_calloc(1, sizeof(struct roffnode));
826
	p->tok = tok;
827
	if (name)
828
		p->name = mandoc_strdup(name);
829
	p->parent = r->last;
830
	p->line = line;
831
	p->col = col;
832
	p->rule = p->parent ? p->parent->rule : 0;
833
834
	r->last = p;
835
}
836
837
/* --- roff parser state data management ---------------------------------- */
838
839
static void
840
roff_free1(struct roff *r)
841
{
842
	struct tbl_node	*tbl;
843
	struct eqn_node	*e;
844
	int		 i;
845
846
	while (NULL != (tbl = r->first_tbl)) {
847
		r->first_tbl = tbl->next;
848
		tbl_free(tbl);
849
	}
850
	r->first_tbl = r->last_tbl = r->tbl = NULL;
851
852
	while (NULL != (e = r->first_eqn)) {
853
		r->first_eqn = e->next;
854
		eqn_free(e);
855
	}
856
	r->first_eqn = r->last_eqn = r->eqn = NULL;
857
858
	while (r->last)
859
		roffnode_pop(r);
860
861
	free (r->rstack);
862
	r->rstack = NULL;
863
	r->rstacksz = 0;
864
	r->rstackpos = -1;
865
866
	roff_freereg(r->regtab);
867
	r->regtab = NULL;
868
869
	roff_freestr(r->strtab);
870
	roff_freestr(r->xmbtab);
871
	r->strtab = r->xmbtab = NULL;
872
873
	if (r->xtab)
874
		for (i = 0; i < 128; i++)
875
			free(r->xtab[i].p);
876
	free(r->xtab);
877
	r->xtab = NULL;
878
}
879
880
void
881
roff_reset(struct roff *r)
882
{
883
884
	roff_free1(r);
885
	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
886
	r->control = 0;
887
}
888
889
void
890
roff_free(struct roff *r)
891
{
892
893
	roff_free1(r);
894
	free(r);
895
}
896
897
struct roff *
898
roff_alloc(struct mparse *parse, int options)
899
{
900
	struct roff	*r;
901
902
	r = mandoc_calloc(1, sizeof(struct roff));
903
	r->parse = parse;
904
	r->options = options;
905
	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
906
	r->rstackpos = -1;
907
908
	roffhash_init();
909
910
	return r;
911
}
912
913
/* --- syntax tree state data management ---------------------------------- */
914
915
static void
916
roff_man_free1(struct roff_man *man)
917
{
918
919
	if (man->first != NULL)
920
		roff_node_delete(man, man->first);
921
	free(man->meta.msec);
922
	free(man->meta.vol);
923
	free(man->meta.os);
924
	free(man->meta.arch);
925
	free(man->meta.title);
926
	free(man->meta.name);
927
	free(man->meta.date);
928
}
929
930
static void
931
roff_man_alloc1(struct roff_man *man)
932
{
933
934
	memset(&man->meta, 0, sizeof(man->meta));
935
	man->first = mandoc_calloc(1, sizeof(*man->first));
936
	man->first->type = ROFFT_ROOT;
937
	man->last = man->first;
938
	man->last_es = NULL;
939
	man->flags = 0;
940
	man->macroset = MACROSET_NONE;
941
	man->lastsec = man->lastnamed = SEC_NONE;
942
	man->next = ROFF_NEXT_CHILD;
943
}
944
945
void
946
roff_man_reset(struct roff_man *man)
947
{
948
949
	roff_man_free1(man);
950
	roff_man_alloc1(man);
951
}
952
953
void
954
roff_man_free(struct roff_man *man)
955
{
956
957
	roff_man_free1(man);
958
	free(man);
959
}
960
961
struct roff_man *
962
roff_man_alloc(struct roff *roff, struct mparse *parse,
963
	const char *defos, int quick)
964
{
965
	struct roff_man *man;
966
967
	man = mandoc_calloc(1, sizeof(*man));
968
	man->parse = parse;
969
	man->roff = roff;
970
	man->defos = defos;
971
	man->quick = quick;
972
	roff_man_alloc1(man);
973
	return man;
974
}
975
976
/* --- syntax tree handling ----------------------------------------------- */
977
978
struct roff_node *
979
roff_node_alloc(struct roff_man *man, int line, int pos,
980
	enum roff_type type, int tok)
981
{
982
	struct roff_node	*n;
983
984
	n = mandoc_calloc(1, sizeof(*n));
985
	n->line = line;
986
	n->pos = pos;
987
	n->tok = tok;
988
	n->type = type;
989
	n->sec = man->lastsec;
990
991
	if (man->flags & MDOC_SYNOPSIS)
992
		n->flags |= MDOC_SYNPRETTY;
993
	else
994
		n->flags &= ~MDOC_SYNPRETTY;
995
	if (man->flags & MDOC_NEWLINE)
996
		n->flags |= MDOC_LINE;
997
	man->flags &= ~MDOC_NEWLINE;
998
999
	return n;
1000
}
1001
1002
void
1003
roff_node_append(struct roff_man *man, struct roff_node *n)
1004
{
1005
1006
	switch (man->next) {
1007
	case ROFF_NEXT_SIBLING:
1008
		if (man->last->next != NULL) {
1009
			n->next = man->last->next;
1010
			man->last->next->prev = n;
1011
		} else
1012
			man->last->parent->last = n;
1013
		man->last->next = n;
1014
		n->prev = man->last;
1015
		n->parent = man->last->parent;
1016
		break;
1017
	case ROFF_NEXT_CHILD:
1018
		man->last->child = n;
1019
		n->parent = man->last;
1020
		n->parent->last = n;
1021
		break;
1022
	default:
1023
		abort();
1024
	}
1025
	man->last = n;
1026
1027
	switch (n->type) {
1028
	case ROFFT_HEAD:
1029
		n->parent->head = n;
1030
		break;
1031
	case ROFFT_BODY:
1032
		if (n->end != ENDBODY_NOT)
1033
			return;
1034
		n->parent->body = n;
1035
		break;
1036
	case ROFFT_TAIL:
1037
		n->parent->tail = n;
1038
		break;
1039
	default:
1040
		return;
1041
	}
1042
1043
	/*
1044
	 * Copy over the normalised-data pointer of our parent.  Not
1045
	 * everybody has one, but copying a null pointer is fine.
1046
	 */
1047
1048
	n->norm = n->parent->norm;
1049
	assert(n->parent->type == ROFFT_BLOCK);
1050
}
1051
1052
void
1053
roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1054
{
1055
	struct roff_node	*n;
1056
1057
	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1058
	n->string = roff_strdup(man->roff, word);
1059
	roff_node_append(man, n);
1060
	if (man->macroset == MACROSET_MDOC)
1061
		n->flags |= MDOC_VALID | MDOC_ENDED;
1062
	else
1063
		n->flags |= MAN_VALID;
1064
	man->next = ROFF_NEXT_SIBLING;
1065
}
1066
1067
void
1068
roff_word_append(struct roff_man *man, const char *word)
1069
{
1070
	struct roff_node	*n;
1071
	char			*addstr, *newstr;
1072
1073
	n = man->last;
1074
	addstr = roff_strdup(man->roff, word);
1075
	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1076
	free(addstr);
1077
	free(n->string);
1078
	n->string = newstr;
1079
	man->next = ROFF_NEXT_SIBLING;
1080
}
1081
1082
void
1083
roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1084
{
1085
	struct roff_node	*n;
1086
1087
	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1088
	roff_node_append(man, n);
1089
	man->next = ROFF_NEXT_CHILD;
1090
}
1091
1092
struct roff_node *
1093
roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1094
{
1095
	struct roff_node	*n;
1096
1097
	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1098
	roff_node_append(man, n);
1099
	man->next = ROFF_NEXT_CHILD;
1100
	return n;
1101
}
1102
1103
struct roff_node *
1104
roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1105
{
1106
	struct roff_node	*n;
1107
1108
	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1109
	roff_node_append(man, n);
1110
	man->next = ROFF_NEXT_CHILD;
1111
	return n;
1112
}
1113
1114
struct roff_node *
1115
roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1116
{
1117
	struct roff_node	*n;
1118
1119
	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1120
	roff_node_append(man, n);
1121
	man->next = ROFF_NEXT_CHILD;
1122
	return n;
1123
}
1124
1125
void
1126
roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1127
{
1128
	struct roff_node	*n;
1129
1130
	n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1131
	n->eqn = eqn;
1132
	if (eqn->ln > man->last->line)
1133
		n->flags |= MDOC_LINE;
1134
	roff_node_append(man, n);
1135
	man->next = ROFF_NEXT_SIBLING;
1136
}
1137
1138
void
1139
roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1140
{
1141
	struct roff_node	*n;
1142
1143
	if (man->macroset == MACROSET_MAN)
1144
		man_breakscope(man, TOKEN_NONE);
1145
	n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1146
	n->span = tbl;
1147
	roff_node_append(man, n);
1148
	if (man->macroset == MACROSET_MDOC)
1149
		n->flags |= MDOC_VALID | MDOC_ENDED;
1150
	else
1151
		n->flags |= MAN_VALID;
1152
	man->next = ROFF_NEXT_SIBLING;
1153
}
1154
1155
void
1156
roff_node_unlink(struct roff_man *man, struct roff_node *n)
1157
{
1158
1159
	/* Adjust siblings. */
1160
1161
	if (n->prev)
1162
		n->prev->next = n->next;
1163
	if (n->next)
1164
		n->next->prev = n->prev;
1165
1166
	/* Adjust parent. */
1167
1168
	if (n->parent != NULL) {
1169
		if (n->parent->child == n)
1170
			n->parent->child = n->next;
1171
		if (n->parent->last == n)
1172
			n->parent->last = n->prev;
1173
	}
1174
1175
	/* Adjust parse point. */
1176
1177
	if (man == NULL)
1178
		return;
1179
	if (man->last == n) {
1180
		if (n->prev == NULL) {
1181
			man->last = n->parent;
1182
			man->next = ROFF_NEXT_CHILD;
1183
		} else {
1184
			man->last = n->prev;
1185
			man->next = ROFF_NEXT_SIBLING;
1186
		}
1187
	}
1188
	if (man->first == n)
1189
		man->first = NULL;
1190
}
1191
1192
void
1193
roff_node_free(struct roff_node *n)
1194
{
1195
1196
	if (n->args != NULL)
1197
		mdoc_argv_free(n->args);
1198
	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1199
		free(n->norm);
1200
	free(n->string);
1201
	free(n);
1202
}
1203
1204
void
1205
roff_node_delete(struct roff_man *man, struct roff_node *n)
1206
{
1207
1208
	while (n->child != NULL)
1209
		roff_node_delete(man, n->child);
1210
	roff_node_unlink(man, n);
1211
	roff_node_free(n);
1212
}
1213
1214
void
1215
deroff(char **dest, const struct roff_node *n)
1216
{
1217
	char	*cp;
1218
	size_t	 sz;
1219
1220
	if (n->type != ROFFT_TEXT) {
1221
		for (n = n->child; n != NULL; n = n->next)
1222
			deroff(dest, n);
1223
		return;
1224
	}
1225
1226
	/* Skip leading whitespace and escape sequences. */
1227
1228
	cp = n->string;
1229
	while (*cp != '\0') {
1230
		if ('\\' == *cp) {
1231
			cp++;
1232
			mandoc_escape((const char **)&cp, NULL, NULL);
1233
		} else if (isspace((unsigned char)*cp))
1234
			cp++;
1235
		else
1236
			break;
1237
	}
1238
1239
	/* Skip trailing whitespace. */
1240
1241
	for (sz = strlen(cp); sz; sz--)
1242
		if ( ! isspace((unsigned char)cp[sz-1]))
1243
			break;
1244
1245
	/* Skip empty strings. */
1246
1247
	if (sz == 0)
1248
		return;
1249
1250
	if (*dest == NULL) {
1251
		*dest = mandoc_strndup(cp, sz);
1252
		return;
1253
	}
1254
1255
	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1256
	free(*dest);
1257
	*dest = cp;
1258
}
1259
1260
/* --- main functions of the roff parser ---------------------------------- */
1261
1262
/*
1263
 * In the current line, expand escape sequences that tend to get
1264
 * used in numerical expressions and conditional requests.
1265
 * Also check the syntax of the remaining escape sequences.
1266
 */
1267
static enum rofferr
1268
roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1269
{
1270
	char		 ubuf[24]; /* buffer to print the number */
1271
	const char	*start;	/* start of the string to process */
1272
	char		*stesc;	/* start of an escape sequence ('\\') */
1273
	const char	*stnam;	/* start of the name, after "[(*" */
1274
	const char	*cp;	/* end of the name, e.g. before ']' */
1275
	const char	*res;	/* the string to be substituted */
1276
	char		*nbuf;	/* new buffer to copy buf->buf to */
1277
	size_t		 maxl;  /* expected length of the escape name */
1278
	size_t		 naml;	/* actual length of the escape name */
1279
	enum mandoc_esc	 esc;	/* type of the escape sequence */
1280
	int		 inaml;	/* length returned from mandoc_escape() */
1281
	int		 expand_count;	/* to avoid infinite loops */
1282
	int		 npos;	/* position in numeric expression */
1283
	int		 arg_complete; /* argument not interrupted by eol */
1284
	char		 term;	/* character terminating the escape */
1285
1286
	expand_count = 0;
1287
	start = buf->buf + pos;
1288
	stesc = strchr(start, '\0') - 1;
1289
	while (stesc-- > start) {
1290
1291
		/* Search backwards for the next backslash. */
1292
1293
		if (*stesc != '\\')
1294
			continue;
1295
1296
		/* If it is escaped, skip it. */
1297
1298
		for (cp = stesc - 1; cp >= start; cp--)
1299
			if (*cp != '\\')
1300
				break;
1301
1302
		if ((stesc - cp) % 2 == 0) {
1303
			stesc = (char *)cp;
1304
			continue;
1305
		}
1306
1307
		/* Decide whether to expand or to check only. */
1308
1309
		term = '\0';
1310
		cp = stesc + 1;
1311
		switch (*cp) {
1312
		case '*':
1313
			res = NULL;
1314
			break;
1315
		case 'B':
1316
		case 'w':
1317
			term = cp[1];
1318
			/* FALLTHROUGH */
1319
		case 'n':
1320
			res = ubuf;
1321
			break;
1322
		default:
1323
			esc = mandoc_escape(&cp, &stnam, &inaml);
1324
			if (esc == ESCAPE_ERROR ||
1325
			    (esc == ESCAPE_SPECIAL &&
1326
			     mchars_spec2cp(stnam, inaml) < 0))
1327
				mandoc_vmsg(MANDOCERR_ESC_BAD,
1328
				    r->parse, ln, (int)(stesc - buf->buf),
1329
				    "%.*s", (int)(cp - stesc), stesc);
1330
			continue;
1331
		}
1332
1333
		if (EXPAND_LIMIT < ++expand_count) {
1334
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1335
			    ln, (int)(stesc - buf->buf), NULL);
1336
			return ROFF_IGN;
1337
		}
1338
1339
		/*
1340
		 * The third character decides the length
1341
		 * of the name of the string or register.
1342
		 * Save a pointer to the name.
1343
		 */
1344
1345
		if (term == '\0') {
1346
			switch (*++cp) {
1347
			case '\0':
1348
				maxl = 0;
1349
				break;
1350
			case '(':
1351
				cp++;
1352
				maxl = 2;
1353
				break;
1354
			case '[':
1355
				cp++;
1356
				term = ']';
1357
				maxl = 0;
1358
				break;
1359
			default:
1360
				maxl = 1;
1361
				break;
1362
			}
1363
		} else {
1364
			cp += 2;
1365
			maxl = 0;
1366
		}
1367
		stnam = cp;
1368
1369
		/* Advance to the end of the name. */
1370
1371
		naml = 0;
1372
		arg_complete = 1;
1373
		while (maxl == 0 || naml < maxl) {
1374
			if (*cp == '\0') {
1375
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1376
				    ln, (int)(stesc - buf->buf), stesc);
1377
				arg_complete = 0;
1378
				break;
1379
			}
1380
			if (maxl == 0 && *cp == term) {
1381
				cp++;
1382
				break;
1383
			}
1384
			if (*cp++ != '\\' || stesc[1] != 'w') {
1385
				naml++;
1386
				continue;
1387
			}
1388
			switch (mandoc_escape(&cp, NULL, NULL)) {
1389
			case ESCAPE_SPECIAL:
1390
			case ESCAPE_UNICODE:
1391
			case ESCAPE_NUMBERED:
1392
			case ESCAPE_OVERSTRIKE:
1393
				naml++;
1394
				break;
1395
			default:
1396
				break;
1397
			}
1398
		}
1399
1400
		/*
1401
		 * Retrieve the replacement string; if it is
1402
		 * undefined, resume searching for escapes.
1403
		 */
1404
1405
		switch (stesc[1]) {
1406
		case '*':
1407
			if (arg_complete)
1408
				res = roff_getstrn(r, stnam, naml);
1409
			break;
1410
		case 'B':
1411
			npos = 0;
1412
			ubuf[0] = arg_complete &&
1413
			    roff_evalnum(r, ln, stnam, &npos,
1414
			      NULL, ROFFNUM_SCALE) &&
1415
			    stnam + npos + 1 == cp ? '1' : '0';
1416
			ubuf[1] = '\0';
1417
			break;
1418
		case 'n':
1419
			if (arg_complete)
1420
				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1421
				    roff_getregn(r, stnam, naml));
1422
			else
1423
				ubuf[0] = '\0';
1424
			break;
1425
		case 'w':
1426
			/* use even incomplete args */
1427
			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1428
			    24 * (int)naml);
1429
			break;
1430
		}
1431
1432
		if (res == NULL) {
1433
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1434
			    r->parse, ln, (int)(stesc - buf->buf),
1435
			    "%.*s", (int)naml, stnam);
1436
			res = "";
1437
		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1438
			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1439
			    ln, (int)(stesc - buf->buf), NULL);
1440
			return ROFF_IGN;
1441
		}
1442
1443
		/* Replace the escape sequence by the string. */
1444
1445
		*stesc = '\0';
1446
		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1447
		    buf->buf, res, cp) + 1;
1448
1449
		/* Prepare for the next replacement. */
1450
1451
		start = nbuf + pos;
1452
		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1453
		free(buf->buf);
1454
		buf->buf = nbuf;
1455
	}
1456
	return ROFF_CONT;
1457
}
1458
1459
/*
1460
 * Process text streams.
1461
 */
1462
static enum rofferr
1463
roff_parsetext(struct buf *buf, int pos, int *offs)
1464
{
1465
	size_t		 sz;
1466
	const char	*start;
1467
	char		*p;
1468
	int		 isz;
1469
	enum mandoc_esc	 esc;
1470
1471
	/* Spring the input line trap. */
1472
1473
	if (roffit_lines == 1) {
1474
		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1475
		free(buf->buf);
1476
		buf->buf = p;
1477
		buf->sz = isz + 1;
1478
		*offs = 0;
1479
		free(roffit_macro);
1480
		roffit_lines = 0;
1481
		return ROFF_REPARSE;
1482
	} else if (roffit_lines > 1)
1483
		--roffit_lines;
1484
1485
	/* Convert all breakable hyphens into ASCII_HYPH. */
1486
1487
	start = p = buf->buf + pos;
1488
1489
	while (*p != '\0') {
1490
		sz = strcspn(p, "-\\");
1491
		p += sz;
1492
1493
		if (*p == '\0')
1494
			break;
1495
1496
		if (*p == '\\') {
1497
			/* Skip over escapes. */
1498
			p++;
1499
			esc = mandoc_escape((const char **)&p, NULL, NULL);
1500
			if (esc == ESCAPE_ERROR)
1501
				break;
1502
			while (*p == '-')
1503
				p++;
1504
			continue;
1505
		} else if (p == start) {
1506
			p++;
1507
			continue;
1508
		}
1509
1510
		if (isalpha((unsigned char)p[-1]) &&
1511
		    isalpha((unsigned char)p[1]))
1512
			*p = ASCII_HYPH;
1513
		p++;
1514
	}
1515
	return ROFF_CONT;
1516
}
1517
1518
enum rofferr
1519
roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1520
{
1521
	enum rofft	 t;
1522
	enum rofferr	 e;
1523
	int		 pos;	/* parse point */
1524
	int		 spos;	/* saved parse point for messages */
1525
	int		 ppos;	/* original offset in buf->buf */
1526
	int		 ctl;	/* macro line (boolean) */
1527
1528
	ppos = pos = *offs;
1529
1530
	/* Handle in-line equation delimiters. */
1531
1532
	if (r->tbl == NULL &&
1533
	    r->last_eqn != NULL && r->last_eqn->delim &&
1534
	    (r->eqn == NULL || r->eqn_inline)) {
1535
		e = roff_eqndelim(r, buf, pos);
1536
		if (e == ROFF_REPARSE)
1537
			return e;
1538
		assert(e == ROFF_CONT);
1539
	}
1540
1541
	/* Expand some escape sequences. */
1542
1543
	e = roff_res(r, buf, ln, pos);
1544
	if (e == ROFF_IGN)
1545
		return e;
1546
	assert(e == ROFF_CONT);
1547
1548
	ctl = roff_getcontrol(r, buf->buf, &pos);
1549
1550
	/*
1551
	 * First, if a scope is open and we're not a macro, pass the
1552
	 * text through the macro's filter.
1553
	 * Equations process all content themselves.
1554
	 * Tables process almost all content themselves, but we want
1555
	 * to warn about macros before passing it there.
1556
	 */
1557
1558
	if (r->last != NULL && ! ctl) {
1559
		t = r->last->tok;
1560
		assert(roffs[t].text);
1561
		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1562
		assert(e == ROFF_IGN || e == ROFF_CONT);
1563
		if (e != ROFF_CONT)
1564
			return e;
1565
	}
1566
	if (r->eqn != NULL)
1567
		return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1568
	if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1569
		return tbl_read(r->tbl, ln, buf->buf, ppos);
1570
	if ( ! ctl)
1571
		return roff_parsetext(buf, pos, offs);
1572
1573
	/* Skip empty request lines. */
1574
1575
	if (buf->buf[pos] == '"') {
1576
		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1577
		    ln, pos, NULL);
1578
		return ROFF_IGN;
1579
	} else if (buf->buf[pos] == '\0')
1580
		return ROFF_IGN;
1581
1582
	/*
1583
	 * If a scope is open, go to the child handler for that macro,
1584
	 * as it may want to preprocess before doing anything with it.
1585
	 * Don't do so if an equation is open.
1586
	 */
1587
1588
	if (r->last) {
1589
		t = r->last->tok;
1590
		assert(roffs[t].sub);
1591
		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1592
	}
1593
1594
	/* No scope is open.  This is a new request or macro. */
1595
1596
	spos = pos;
1597
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1598
1599
	/* Tables ignore most macros. */
1600
1601
	if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1602
		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1603
		    ln, pos, buf->buf + spos);
1604
		if (t == ROFF_TS)
1605
			return ROFF_IGN;
1606
		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1607
			pos++;
1608
		while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1609
			pos++;
1610
		return tbl_read(r->tbl, ln, buf->buf, pos);
1611
	}
1612
1613
	/*
1614
	 * This is neither a roff request nor a user-defined macro.
1615
	 * Let the standard macro set parsers handle it.
1616
	 */
1617
1618
	if (t == ROFF_MAX)
1619
		return ROFF_CONT;
1620
1621
	/* Execute a roff request or a user defined macro. */
1622
1623
	assert(roffs[t].proc);
1624
	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1625
}
1626
1627
void
1628
roff_endparse(struct roff *r)
1629
{
1630
1631
	if (r->last)
1632
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1633
		    r->last->line, r->last->col,
1634
		    roffs[r->last->tok].name);
1635
1636
	if (r->eqn) {
1637
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1638
		    r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1639
		eqn_end(&r->eqn);
1640
	}
1641
1642
	if (r->tbl) {
1643
		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1644
		    r->tbl->line, r->tbl->pos, "TS");
1645
		tbl_end(&r->tbl);
1646
	}
1647
}
1648
1649
/*
1650
 * Parse a roff node's type from the input buffer.  This must be in the
1651
 * form of ".foo xxx" in the usual way.
1652
 */
1653
static enum rofft
1654
roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1655
{
1656
	char		*cp;
1657
	const char	*mac;
1658
	size_t		 maclen;
1659
	enum rofft	 t;
1660
1661
	cp = buf + *pos;
1662
1663
	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1664
		return ROFF_MAX;
1665
1666
	mac = cp;
1667
	maclen = roff_getname(r, &cp, ln, ppos);
1668
1669
	t = (r->current_string = roff_getstrn(r, mac, maclen))
1670
	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
1671
1672
	if (ROFF_MAX != t)
1673
		*pos = cp - buf;
1674
1675
	return t;
1676
}
1677
1678
/* --- handling of request blocks ----------------------------------------- */
1679
1680
static enum rofferr
1681
roff_cblock(ROFF_ARGS)
1682
{
1683
1684
	/*
1685
	 * A block-close `..' should only be invoked as a child of an
1686
	 * ignore macro, otherwise raise a warning and just ignore it.
1687
	 */
1688
1689
	if (r->last == NULL) {
1690
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1691
		    ln, ppos, "..");
1692
		return ROFF_IGN;
1693
	}
1694
1695
	switch (r->last->tok) {
1696
	case ROFF_am:
1697
		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1698
	case ROFF_ami:
1699
	case ROFF_de:
1700
		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1701
	case ROFF_dei:
1702
	case ROFF_ig:
1703
		break;
1704
	default:
1705
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1706
		    ln, ppos, "..");
1707
		return ROFF_IGN;
1708
	}
1709
1710
	if (buf->buf[pos] != '\0')
1711
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1712
		    ".. %s", buf->buf + pos);
1713
1714
	roffnode_pop(r);
1715
	roffnode_cleanscope(r);
1716
	return ROFF_IGN;
1717
1718
}
1719
1720
static void
1721
roffnode_cleanscope(struct roff *r)
1722
{
1723
1724
	while (r->last) {
1725
		if (--r->last->endspan != 0)
1726
			break;
1727
		roffnode_pop(r);
1728
	}
1729
}
1730
1731
static void
1732
roff_ccond(struct roff *r, int ln, int ppos)
1733
{
1734
1735
	if (NULL == r->last) {
1736
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1737
		    ln, ppos, "\\}");
1738
		return;
1739
	}
1740
1741
	switch (r->last->tok) {
1742
	case ROFF_el:
1743
	case ROFF_ie:
1744
	case ROFF_if:
1745
		break;
1746
	default:
1747
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1748
		    ln, ppos, "\\}");
1749
		return;
1750
	}
1751
1752
	if (r->last->endspan > -1) {
1753
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1754
		    ln, ppos, "\\}");
1755
		return;
1756
	}
1757
1758
	roffnode_pop(r);
1759
	roffnode_cleanscope(r);
1760
	return;
1761
}
1762
1763
static enum rofferr
1764
roff_block(ROFF_ARGS)
1765
{
1766
	const char	*name;
1767
	char		*iname, *cp;
1768
	size_t		 namesz;
1769
1770
	/* Ignore groff compatibility mode for now. */
1771
1772
	if (tok == ROFF_de1)
1773
		tok = ROFF_de;
1774
	else if (tok == ROFF_dei1)
1775
		tok = ROFF_dei;
1776
	else if (tok == ROFF_am1)
1777
		tok = ROFF_am;
1778
	else if (tok == ROFF_ami1)
1779
		tok = ROFF_ami;
1780
1781
	/* Parse the macro name argument. */
1782
1783
	cp = buf->buf + pos;
1784
	if (tok == ROFF_ig) {
1785
		iname = NULL;
1786
		namesz = 0;
1787
	} else {
1788
		iname = cp;
1789
		namesz = roff_getname(r, &cp, ln, ppos);
1790
		iname[namesz] = '\0';
1791
	}
1792
1793
	/* Resolve the macro name argument if it is indirect. */
1794
1795
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1796
		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1797
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1798
			    r->parse, ln, (int)(iname - buf->buf),
1799
			    "%.*s", (int)namesz, iname);
1800
			namesz = 0;
1801
		} else
1802
			namesz = strlen(name);
1803
	} else
1804
		name = iname;
1805
1806
	if (namesz == 0 && tok != ROFF_ig) {
1807
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1808
		    ln, ppos, roffs[tok].name);
1809
		return ROFF_IGN;
1810
	}
1811
1812
	roffnode_push(r, tok, name, ln, ppos);
1813
1814
	/*
1815
	 * At the beginning of a `de' macro, clear the existing string
1816
	 * with the same name, if there is one.  New content will be
1817
	 * appended from roff_block_text() in multiline mode.
1818
	 */
1819
1820
	if (tok == ROFF_de || tok == ROFF_dei)
1821
		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1822
1823
	if (*cp == '\0')
1824
		return ROFF_IGN;
1825
1826
	/* Get the custom end marker. */
1827
1828
	iname = cp;
1829
	namesz = roff_getname(r, &cp, ln, ppos);
1830
1831
	/* Resolve the end marker if it is indirect. */
1832
1833
	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1834
		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1835
			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1836
			    r->parse, ln, (int)(iname - buf->buf),
1837
			    "%.*s", (int)namesz, iname);
1838
			namesz = 0;
1839
		} else
1840
			namesz = strlen(name);
1841
	} else
1842
		name = iname;
1843
1844
	if (namesz)
1845
		r->last->end = mandoc_strndup(name, namesz);
1846
1847
	if (*cp != '\0')
1848
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1849
		    ln, pos, ".%s ... %s", roffs[tok].name, cp);
1850
1851
	return ROFF_IGN;
1852
}
1853
1854
static enum rofferr
1855
roff_block_sub(ROFF_ARGS)
1856
{
1857
	enum rofft	t;
1858
	int		i, j;
1859
1860
	/*
1861
	 * First check whether a custom macro exists at this level.  If
1862
	 * it does, then check against it.  This is some of groff's
1863
	 * stranger behaviours.  If we encountered a custom end-scope
1864
	 * tag and that tag also happens to be a "real" macro, then we
1865
	 * need to try interpreting it again as a real macro.  If it's
1866
	 * not, then return ignore.  Else continue.
1867
	 */
1868
1869
	if (r->last->end) {
1870
		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1871
			if (buf->buf[i] != r->last->end[j])
1872
				break;
1873
1874
		if (r->last->end[j] == '\0' &&
1875
		    (buf->buf[i] == '\0' ||
1876
		     buf->buf[i] == ' ' ||
1877
		     buf->buf[i] == '\t')) {
1878
			roffnode_pop(r);
1879
			roffnode_cleanscope(r);
1880
1881
			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1882
				i++;
1883
1884
			pos = i;
1885
			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1886
			    ROFF_MAX)
1887
				return ROFF_RERUN;
1888
			return ROFF_IGN;
1889
		}
1890
	}
1891
1892
	/*
1893
	 * If we have no custom end-query or lookup failed, then try
1894
	 * pulling it out of the hashtable.
1895
	 */
1896
1897
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1898
1899
	if (t != ROFF_cblock) {
1900
		if (tok != ROFF_ig)
1901
			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1902
		return ROFF_IGN;
1903
	}
1904
1905
	assert(roffs[t].proc);
1906
	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1907
}
1908
1909
static enum rofferr
1910
roff_block_text(ROFF_ARGS)
1911
{
1912
1913
	if (tok != ROFF_ig)
1914
		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1915
1916
	return ROFF_IGN;
1917
}
1918
1919
static enum rofferr
1920
roff_cond_sub(ROFF_ARGS)
1921
{
1922
	enum rofft	 t;
1923
	char		*ep;
1924
	int		 rr;
1925
1926
	rr = r->last->rule;
1927
	roffnode_cleanscope(r);
1928
	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1929
1930
	/*
1931
	 * Fully handle known macros when they are structurally
1932
	 * required or when the conditional evaluated to true.
1933
	 */
1934
1935
	if ((t != ROFF_MAX) &&
1936
	    (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1937
		assert(roffs[t].proc);
1938
		return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1939
	}
1940
1941
	/*
1942
	 * If `\}' occurs on a macro line without a preceding macro,
1943
	 * drop the line completely.
1944
	 */
1945
1946
	ep = buf->buf + pos;
1947
	if (ep[0] == '\\' && ep[1] == '}')
1948
		rr = 0;
1949
1950
	/* Always check for the closing delimiter `\}'. */
1951
1952
	while ((ep = strchr(ep, '\\')) != NULL) {
1953
		if (*(++ep) == '}') {
1954
			*ep = '&';
1955
			roff_ccond(r, ln, ep - buf->buf - 1);
1956
		}
1957
		if (*ep != '\0')
1958
			++ep;
1959
	}
1960
	return rr ? ROFF_CONT : ROFF_IGN;
1961
}
1962
1963
static enum rofferr
1964
roff_cond_text(ROFF_ARGS)
1965
{
1966
	char		*ep;
1967
	int		 rr;
1968
1969
	rr = r->last->rule;
1970
	roffnode_cleanscope(r);
1971
1972
	ep = buf->buf + pos;
1973
	while ((ep = strchr(ep, '\\')) != NULL) {
1974
		if (*(++ep) == '}') {
1975
			*ep = '&';
1976
			roff_ccond(r, ln, ep - buf->buf - 1);
1977
		}
1978
		if (*ep != '\0')
1979
			++ep;
1980
	}
1981
	return rr ? ROFF_CONT : ROFF_IGN;
1982
}
1983
1984
/* --- handling of numeric and conditional expressions -------------------- */
1985
1986
/*
1987
 * Parse a single signed integer number.  Stop at the first non-digit.
1988
 * If there is at least one digit, return success and advance the
1989
 * parse point, else return failure and let the parse point unchanged.
1990
 * Ignore overflows, treat them just like the C language.
1991
 */
1992
static int
1993
roff_getnum(const char *v, int *pos, int *res, int flags)
1994
{
1995
	int	 myres, scaled, n, p;
1996
1997
	if (NULL == res)
1998
		res = &myres;
1999
2000
	p = *pos;
2001
	n = v[p] == '-';
2002
	if (n || v[p] == '+')
2003
		p++;
2004
2005
	if (flags & ROFFNUM_WHITE)
2006
		while (isspace((unsigned char)v[p]))
2007
			p++;
2008
2009
	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2010
		*res = 10 * *res + v[p] - '0';
2011
	if (p == *pos + n)
2012
		return 0;
2013
2014
	if (n)
2015
		*res = -*res;
2016
2017
	/* Each number may be followed by one optional scaling unit. */
2018
2019
	switch (v[p]) {
2020
	case 'f':
2021
		scaled = *res * 65536;
2022
		break;
2023
	case 'i':
2024
		scaled = *res * 240;
2025
		break;
2026
	case 'c':
2027
		scaled = *res * 240 / 2.54;
2028
		break;
2029
	case 'v':
2030
	case 'P':
2031
		scaled = *res * 40;
2032
		break;
2033
	case 'm':
2034
	case 'n':
2035
		scaled = *res * 24;
2036
		break;
2037
	case 'p':
2038
		scaled = *res * 10 / 3;
2039
		break;
2040
	case 'u':
2041
		scaled = *res;
2042
		break;
2043
	case 'M':
2044
		scaled = *res * 6 / 25;
2045
		break;
2046
	default:
2047
		scaled = *res;
2048
		p--;
2049
		break;
2050
	}
2051
	if (flags & ROFFNUM_SCALE)
2052
		*res = scaled;
2053
2054
	*pos = p + 1;
2055
	return 1;
2056
}
2057
2058
/*
2059
 * Evaluate a string comparison condition.
2060
 * The first character is the delimiter.
2061
 * Succeed if the string up to its second occurrence
2062
 * matches the string up to its third occurence.
2063
 * Advance the cursor after the third occurrence
2064
 * or lacking that, to the end of the line.
2065
 */
2066
static int
2067
roff_evalstrcond(const char *v, int *pos)
2068
{
2069
	const char	*s1, *s2, *s3;
2070
	int		 match;
2071
2072
	match = 0;
2073
	s1 = v + *pos;		/* initial delimiter */
2074
	s2 = s1 + 1;		/* for scanning the first string */
2075
	s3 = strchr(s2, *s1);	/* for scanning the second string */
2076
2077
	if (NULL == s3)		/* found no middle delimiter */
2078
		goto out;
2079
2080
	while ('\0' != *++s3) {
2081
		if (*s2 != *s3) {  /* mismatch */
2082
			s3 = strchr(s3, *s1);
2083
			break;
2084
		}
2085
		if (*s3 == *s1) {  /* found the final delimiter */
2086
			match = 1;
2087
			break;
2088
		}
2089
		s2++;
2090
	}
2091
2092
out:
2093
	if (NULL == s3)
2094
		s3 = strchr(s2, '\0');
2095
	else if (*s3 != '\0')
2096
		s3++;
2097
	*pos = s3 - v;
2098
	return match;
2099
}
2100
2101
/*
2102
 * Evaluate an optionally negated single character, numerical,
2103
 * or string condition.
2104
 */
2105
static int
2106
roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2107
{
2108
	char	*cp, *name;
2109
	size_t	 sz;
2110
	int	 number, savepos, wanttrue;
2111
2112
	if ('!' == v[*pos]) {
2113
		wanttrue = 0;
2114
		(*pos)++;
2115
	} else
2116
		wanttrue = 1;
2117
2118
	switch (v[*pos]) {
2119
	case '\0':
2120
		return 0;
2121
	case 'n':
2122
	case 'o':
2123
		(*pos)++;
2124
		return wanttrue;
2125
	case 'c':
2126
	case 'd':
2127
	case 'e':
2128
	case 't':
2129
	case 'v':
2130
		(*pos)++;
2131
		return !wanttrue;
2132
	case 'r':
2133
		cp = name = v + ++*pos;
2134
		sz = roff_getname(r, &cp, ln, *pos);
2135
		*pos = cp - v;
2136
		return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2137
	default:
2138
		break;
2139
	}
2140
2141
	savepos = *pos;
2142
	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2143
		return (number > 0) == wanttrue;
2144
	else if (*pos == savepos)
2145
		return roff_evalstrcond(v, pos) == wanttrue;
2146
	else
2147
		return 0;
2148
}
2149
2150
static enum rofferr
2151
roff_line_ignore(ROFF_ARGS)
2152
{
2153
2154
	return ROFF_IGN;
2155
}
2156
2157
static enum rofferr
2158
roff_insec(ROFF_ARGS)
2159
{
2160
2161
	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2162
	    ln, ppos, roffs[tok].name);
2163
	return ROFF_IGN;
2164
}
2165
2166
static enum rofferr
2167
roff_unsupp(ROFF_ARGS)
2168
{
2169
2170
	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2171
	    ln, ppos, roffs[tok].name);
2172
	return ROFF_IGN;
2173
}
2174
2175
static enum rofferr
2176
roff_cond(ROFF_ARGS)
2177
{
2178
2179
	roffnode_push(r, tok, NULL, ln, ppos);
2180
2181
	/*
2182
	 * An `.el' has no conditional body: it will consume the value
2183
	 * of the current rstack entry set in prior `ie' calls or
2184
	 * defaults to DENY.
2185
	 *
2186
	 * If we're not an `el', however, then evaluate the conditional.
2187
	 */
2188
2189
	r->last->rule = tok == ROFF_el ?
2190
	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2191
	    roff_evalcond(r, ln, buf->buf, &pos);
2192
2193
	/*
2194
	 * An if-else will put the NEGATION of the current evaluated
2195
	 * conditional into the stack of rules.
2196
	 */
2197
2198
	if (tok == ROFF_ie) {
2199
		if (r->rstackpos + 1 == r->rstacksz) {
2200
			r->rstacksz += 16;
2201
			r->rstack = mandoc_reallocarray(r->rstack,
2202
			    r->rstacksz, sizeof(int));
2203
		}
2204
		r->rstack[++r->rstackpos] = !r->last->rule;
2205
	}
2206
2207
	/* If the parent has false as its rule, then so do we. */
2208
2209
	if (r->last->parent && !r->last->parent->rule)
2210
		r->last->rule = 0;
2211
2212
	/*
2213
	 * Determine scope.
2214
	 * If there is nothing on the line after the conditional,
2215
	 * not even whitespace, use next-line scope.
2216
	 */
2217
2218
	if (buf->buf[pos] == '\0') {
2219
		r->last->endspan = 2;
2220
		goto out;
2221
	}
2222
2223
	while (buf->buf[pos] == ' ')
2224
		pos++;
2225
2226
	/* An opening brace requests multiline scope. */
2227
2228
	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2229
		r->last->endspan = -1;
2230
		pos += 2;
2231
		while (buf->buf[pos] == ' ')
2232
			pos++;
2233
		goto out;
2234
	}
2235
2236
	/*
2237
	 * Anything else following the conditional causes
2238
	 * single-line scope.  Warn if the scope contains
2239
	 * nothing but trailing whitespace.
2240
	 */
2241
2242
	if (buf->buf[pos] == '\0')
2243
		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2244
		    ln, ppos, roffs[tok].name);
2245
2246
	r->last->endspan = 1;
2247
2248
out:
2249
	*offs = pos;
2250
	return ROFF_RERUN;
2251
}
2252
2253
static enum rofferr
2254
roff_ds(ROFF_ARGS)
2255
{
2256
	char		*string;
2257
	const char	*name;
2258
	size_t		 namesz;
2259
2260
	/* Ignore groff compatibility mode for now. */
2261
2262
	if (tok == ROFF_ds1)
2263
		tok = ROFF_ds;
2264
	else if (tok == ROFF_as1)
2265
		tok = ROFF_as;
2266
2267
	/*
2268
	 * The first word is the name of the string.
2269
	 * If it is empty or terminated by an escape sequence,
2270
	 * abort the `ds' request without defining anything.
2271
	 */
2272
2273
	name = string = buf->buf + pos;
2274
	if (*name == '\0')
2275
		return ROFF_IGN;
2276
2277
	namesz = roff_getname(r, &string, ln, pos);
2278
	if (name[namesz] == '\\')
2279
		return ROFF_IGN;
2280
2281
	/* Read past the initial double-quote, if any. */
2282
	if (*string == '"')
2283
		string++;
2284
2285
	/* The rest is the value. */
2286
	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2287
	    ROFF_as == tok);
2288
	return ROFF_IGN;
2289
}
2290
2291
/*
2292
 * Parse a single operator, one or two characters long.
2293
 * If the operator is recognized, return success and advance the
2294
 * parse point, else return failure and let the parse point unchanged.
2295
 */
2296
static int
2297
roff_getop(const char *v, int *pos, char *res)
2298
{
2299
2300
	*res = v[*pos];
2301
2302
	switch (*res) {
2303
	case '+':
2304
	case '-':
2305
	case '*':
2306
	case '/':
2307
	case '%':
2308
	case '&':
2309
	case ':':
2310
		break;
2311
	case '<':
2312
		switch (v[*pos + 1]) {
2313
		case '=':
2314
			*res = 'l';
2315
			(*pos)++;
2316
			break;
2317
		case '>':
2318
			*res = '!';
2319
			(*pos)++;
2320
			break;
2321
		case '?':
2322
			*res = 'i';
2323
			(*pos)++;
2324
			break;
2325
		default:
2326
			break;
2327
		}
2328
		break;
2329
	case '>':
2330
		switch (v[*pos + 1]) {
2331
		case '=':
2332
			*res = 'g';
2333
			(*pos)++;
2334
			break;
2335
		case '?':
2336
			*res = 'a';
2337
			(*pos)++;
2338
			break;
2339
		default:
2340
			break;
2341
		}
2342
		break;
2343
	case '=':
2344
		if ('=' == v[*pos + 1])
2345
			(*pos)++;
2346
		break;
2347
	default:
2348
		return 0;
2349
	}
2350
	(*pos)++;
2351
2352
	return *res;
2353
}
2354
2355
/*
2356
 * Evaluate either a parenthesized numeric expression
2357
 * or a single signed integer number.
2358
 */
2359
static int
2360
roff_evalpar(struct roff *r, int ln,
2361
	const char *v, int *pos, int *res, int flags)
2362
{
2363
2364
	if ('(' != v[*pos])
2365
		return roff_getnum(v, pos, res, flags);
2366
2367
	(*pos)++;
2368
	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2369
		return 0;
2370
2371
	/*
2372
	 * Omission of the closing parenthesis
2373
	 * is an error in validation mode,
2374
	 * but ignored in evaluation mode.
2375
	 */
2376
2377
	if (')' == v[*pos])
2378
		(*pos)++;
2379
	else if (NULL == res)
2380
		return 0;
2381
2382
	return 1;
2383
}
2384
2385
/*
2386
 * Evaluate a complete numeric expression.
2387
 * Proceed left to right, there is no concept of precedence.
2388
 */
2389
static int
2390
roff_evalnum(struct roff *r, int ln, const char *v,
2391
	int *pos, int *res, int flags)
2392
{
2393
	int		 mypos, operand2;
2394
	char		 operator;
2395
2396
	if (NULL == pos) {
2397
		mypos = 0;
2398
		pos = &mypos;
2399
	}
2400
2401
	if (flags & ROFFNUM_WHITE)
2402
		while (isspace((unsigned char)v[*pos]))
2403
			(*pos)++;
2404
2405
	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2406
		return 0;
2407
2408
	while (1) {
2409
		if (flags & ROFFNUM_WHITE)
2410
			while (isspace((unsigned char)v[*pos]))
2411
				(*pos)++;
2412
2413
		if ( ! roff_getop(v, pos, &operator))
2414
			break;
2415
2416
		if (flags & ROFFNUM_WHITE)
2417
			while (isspace((unsigned char)v[*pos]))
2418
				(*pos)++;
2419
2420
		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2421
			return 0;
2422
2423
		if (flags & ROFFNUM_WHITE)
2424
			while (isspace((unsigned char)v[*pos]))
2425
				(*pos)++;
2426
2427
		if (NULL == res)
2428
			continue;
2429
2430
		switch (operator) {
2431
		case '+':
2432
			*res += operand2;
2433
			break;
2434
		case '-':
2435
			*res -= operand2;
2436
			break;
2437
		case '*':
2438
			*res *= operand2;
2439
			break;
2440
		case '/':
2441
			if (operand2 == 0) {
2442
				mandoc_msg(MANDOCERR_DIVZERO,
2443
					r->parse, ln, *pos, v);
2444
				*res = 0;
2445
				break;
2446
			}
2447
			*res /= operand2;
2448
			break;
2449
		case '%':
2450
			if (operand2 == 0) {
2451
				mandoc_msg(MANDOCERR_DIVZERO,
2452
					r->parse, ln, *pos, v);
2453
				*res = 0;
2454
				break;
2455
			}
2456
			*res %= operand2;
2457
			break;
2458
		case '<':
2459
			*res = *res < operand2;
2460
			break;
2461
		case '>':
2462
			*res = *res > operand2;
2463
			break;
2464
		case 'l':
2465
			*res = *res <= operand2;
2466
			break;
2467
		case 'g':
2468
			*res = *res >= operand2;
2469
			break;
2470
		case '=':
2471
			*res = *res == operand2;
2472
			break;
2473
		case '!':
2474
			*res = *res != operand2;
2475
			break;
2476
		case '&':
2477
			*res = *res && operand2;
2478
			break;
2479
		case ':':
2480
			*res = *res || operand2;
2481
			break;
2482
		case 'i':
2483
			if (operand2 < *res)
2484
				*res = operand2;
2485
			break;
2486
		case 'a':
2487
			if (operand2 > *res)
2488
				*res = operand2;
2489
			break;
2490
		default:
2491
			abort();
2492
		}
2493
	}
2494
	return 1;
2495
}
2496
2497
/* --- register management ------------------------------------------------ */
2498
2499
void
2500
roff_setreg(struct roff *r, const char *name, int val, char sign)
2501
{
2502
	struct roffreg	*reg;
2503
2504
	/* Search for an existing register with the same name. */
2505
	reg = r->regtab;
2506
2507
	while (reg && strcmp(name, reg->key.p))
2508
		reg = reg->next;
2509
2510
	if (NULL == reg) {
2511
		/* Create a new register. */
2512
		reg = mandoc_malloc(sizeof(struct roffreg));
2513
		reg->key.p = mandoc_strdup(name);
2514
		reg->key.sz = strlen(name);
2515
		reg->val = 0;
2516
		reg->next = r->regtab;
2517
		r->regtab = reg;
2518
	}
2519
2520
	if ('+' == sign)
2521
		reg->val += val;
2522
	else if ('-' == sign)
2523
		reg->val -= val;
2524
	else
2525
		reg->val = val;
2526
}
2527
2528
/*
2529
 * Handle some predefined read-only number registers.
2530
 * For now, return -1 if the requested register is not predefined;
2531
 * in case a predefined read-only register having the value -1
2532
 * were to turn up, another special value would have to be chosen.
2533
 */
2534
static int
2535
roff_getregro(const struct roff *r, const char *name)
2536
{
2537
2538
	switch (*name) {
2539
	case '$':  /* Number of arguments of the last macro evaluated. */
2540
		return r->argc;
2541
	case 'A':  /* ASCII approximation mode is always off. */
2542
		return 0;
2543
	case 'g':  /* Groff compatibility mode is always on. */
2544
		return 1;
2545
	case 'H':  /* Fixed horizontal resolution. */
2546
		return 24;
2547
	case 'j':  /* Always adjust left margin only. */
2548
		return 0;
2549
	case 'T':  /* Some output device is always defined. */
2550
		return 1;
2551
	case 'V':  /* Fixed vertical resolution. */
2552
		return 40;
2553
	default:
2554
		return -1;
2555
	}
2556
}
2557
2558
int
2559
roff_getreg(const struct roff *r, const char *name)
2560
{
2561
	struct roffreg	*reg;
2562
	int		 val;
2563
2564
	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2565
		val = roff_getregro(r, name + 1);
2566
		if (-1 != val)
2567
			return val;
2568
	}
2569
2570
	for (reg = r->regtab; reg; reg = reg->next)
2571
		if (0 == strcmp(name, reg->key.p))
2572
			return reg->val;
2573
2574
	return 0;
2575
}
2576
2577
static int
2578
roff_getregn(const struct roff *r, const char *name, size_t len)
2579
{
2580
	struct roffreg	*reg;
2581
	int		 val;
2582
2583
	if ('.' == name[0] && 2 == len) {
2584
		val = roff_getregro(r, name + 1);
2585
		if (-1 != val)
2586
			return val;
2587
	}
2588
2589
	for (reg = r->regtab; reg; reg = reg->next)
2590
		if (len == reg->key.sz &&
2591
		    0 == strncmp(name, reg->key.p, len))
2592
			return reg->val;
2593
2594
	return 0;
2595
}
2596
2597
static int
2598
roff_hasregn(const struct roff *r, const char *name, size_t len)
2599
{
2600
	struct roffreg	*reg;
2601
	int		 val;
2602
2603
	if ('.' == name[0] && 2 == len) {
2604
		val = roff_getregro(r, name + 1);
2605
		if (-1 != val)
2606
			return 1;
2607
	}
2608
2609
	for (reg = r->regtab; reg; reg = reg->next)
2610
		if (len == reg->key.sz &&
2611
		    0 == strncmp(name, reg->key.p, len))
2612
			return 1;
2613
2614
	return 0;
2615
}
2616
2617
static void
2618
roff_freereg(struct roffreg *reg)
2619
{
2620
	struct roffreg	*old_reg;
2621
2622
	while (NULL != reg) {
2623
		free(reg->key.p);
2624
		old_reg = reg;
2625
		reg = reg->next;
2626
		free(old_reg);
2627
	}
2628
}
2629
2630
static enum rofferr
2631
roff_nr(ROFF_ARGS)
2632
{
2633
	char		*key, *val;
2634
	size_t		 keysz;
2635
	int		 iv;
2636
	char		 sign;
2637
2638
	key = val = buf->buf + pos;
2639
	if (*key == '\0')
2640
		return ROFF_IGN;
2641
2642
	keysz = roff_getname(r, &val, ln, pos);
2643
	if (key[keysz] == '\\')
2644
		return ROFF_IGN;
2645
	key[keysz] = '\0';
2646
2647
	sign = *val;
2648
	if (sign == '+' || sign == '-')
2649
		val++;
2650
2651
	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2652
		roff_setreg(r, key, iv, sign);
2653
2654
	return ROFF_IGN;
2655
}
2656
2657
static enum rofferr
2658
roff_rr(ROFF_ARGS)
2659
{
2660
	struct roffreg	*reg, **prev;
2661
	char		*name, *cp;
2662
	size_t		 namesz;
2663
2664
	name = cp = buf->buf + pos;
2665
	if (*name == '\0')
2666
		return ROFF_IGN;
2667
	namesz = roff_getname(r, &cp, ln, pos);
2668
	name[namesz] = '\0';
2669
2670
	prev = &r->regtab;
2671
	while (1) {
2672
		reg = *prev;
2673
		if (reg == NULL || !strcmp(name, reg->key.p))
2674
			break;
2675
		prev = &reg->next;
2676
	}
2677
	if (reg != NULL) {
2678
		*prev = reg->next;
2679
		free(reg->key.p);
2680
		free(reg);
2681
	}
2682
	return ROFF_IGN;
2683
}
2684
2685
/* --- handler functions for roff requests -------------------------------- */
2686
2687
static enum rofferr
2688
roff_rm(ROFF_ARGS)
2689
{
2690
	const char	 *name;
2691
	char		 *cp;
2692
	size_t		  namesz;
2693
2694
	cp = buf->buf + pos;
2695
	while (*cp != '\0') {
2696
		name = cp;
2697
		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2698
		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2699
		if (name[namesz] == '\\')
2700
			break;
2701
	}
2702
	return ROFF_IGN;
2703
}
2704
2705
static enum rofferr
2706
roff_it(ROFF_ARGS)
2707
{
2708
	int		 iv;
2709
2710
	/* Parse the number of lines. */
2711
2712
	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2713
		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2714
		    ln, ppos, buf->buf + 1);
2715
		return ROFF_IGN;
2716
	}
2717
2718
	while (isspace((unsigned char)buf->buf[pos]))
2719
		pos++;
2720
2721
	/*
2722
	 * Arm the input line trap.
2723
	 * Special-casing "an-trap" is an ugly workaround to cope
2724
	 * with DocBook stupidly fiddling with man(7) internals.
2725
	 */
2726
2727
	roffit_lines = iv;
2728
	roffit_macro = mandoc_strdup(iv != 1 ||
2729
	    strcmp(buf->buf + pos, "an-trap") ?
2730
	    buf->buf + pos : "br");
2731
	return ROFF_IGN;
2732
}
2733
2734
static enum rofferr
2735
roff_Dd(ROFF_ARGS)
2736
{
2737
	const char *const	*cp;
2738
2739
	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2740
		for (cp = __mdoc_reserved; *cp; cp++)
2741
			roff_setstr(r, *cp, NULL, 0);
2742
2743
	if (r->format == 0)
2744
		r->format = MPARSE_MDOC;
2745
2746
	return ROFF_CONT;
2747
}
2748
2749
static enum rofferr
2750
roff_TH(ROFF_ARGS)
2751
{
2752
	const char *const	*cp;
2753
2754
	if ((r->options & MPARSE_QUICK) == 0)
2755
		for (cp = __man_reserved; *cp; cp++)
2756
			roff_setstr(r, *cp, NULL, 0);
2757
2758
	if (r->format == 0)
2759
		r->format = MPARSE_MAN;
2760
2761
	return ROFF_CONT;
2762
}
2763
2764
static enum rofferr
2765
roff_TE(ROFF_ARGS)
2766
{
2767
2768
	if (NULL == r->tbl)
2769
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2770
		    ln, ppos, "TE");
2771
	else if ( ! tbl_end(&r->tbl)) {
2772
		free(buf->buf);
2773
		buf->buf = mandoc_strdup(".sp");
2774
		buf->sz = 4;
2775
		return ROFF_REPARSE;
2776
	}
2777
	return ROFF_IGN;
2778
}
2779
2780
static enum rofferr
2781
roff_T_(ROFF_ARGS)
2782
{
2783
2784
	if (NULL == r->tbl)
2785
		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2786
		    ln, ppos, "T&");
2787
	else
2788
		tbl_restart(ppos, ln, r->tbl);
2789
2790
	return ROFF_IGN;
2791
}
2792
2793
/*
2794
 * Handle in-line equation delimiters.
2795
 */
2796
static enum rofferr
2797
roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2798
{
2799
	char		*cp1, *cp2;
2800
	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2801
2802
	/*
2803
	 * Outside equations, look for an opening delimiter.
2804
	 * If we are inside an equation, we already know it is
2805
	 * in-line, or this function wouldn't have been called;
2806
	 * so look for a closing delimiter.
2807
	 */
2808
2809
	cp1 = buf->buf + pos;
2810
	cp2 = strchr(cp1, r->eqn == NULL ?
2811
	    r->last_eqn->odelim : r->last_eqn->cdelim);
2812
	if (cp2 == NULL)
2813
		return ROFF_CONT;
2814
2815
	*cp2++ = '\0';
2816
	bef_pr = bef_nl = aft_nl = aft_pr = "";
2817
2818
	/* Handle preceding text, protecting whitespace. */
2819
2820
	if (*buf->buf != '\0') {
2821
		if (r->eqn == NULL)
2822
			bef_pr = "\\&";
2823
		bef_nl = "\n";
2824
	}
2825
2826
	/*
2827
	 * Prepare replacing the delimiter with an equation macro
2828
	 * and drop leading white space from the equation.
2829
	 */
2830
2831
	if (r->eqn == NULL) {
2832
		while (*cp2 == ' ')
2833
			cp2++;
2834
		mac = ".EQ";
2835
	} else
2836
		mac = ".EN";
2837
2838
	/* Handle following text, protecting whitespace. */
2839
2840
	if (*cp2 != '\0') {
2841
		aft_nl = "\n";
2842
		if (r->eqn != NULL)
2843
			aft_pr = "\\&";
2844
	}
2845
2846
	/* Do the actual replacement. */
2847
2848
	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2849
	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2850
	free(buf->buf);
2851
	buf->buf = cp1;
2852
2853
	/* Toggle the in-line state of the eqn subsystem. */
2854
2855
	r->eqn_inline = r->eqn == NULL;
2856
	return ROFF_REPARSE;
2857
}
2858
2859
static enum rofferr
2860
roff_EQ(ROFF_ARGS)
2861
{
2862
	struct eqn_node *e;
2863
2864
	assert(r->eqn == NULL);
2865
	e = eqn_alloc(ppos, ln, r->parse);
2866
2867
	if (r->last_eqn) {
2868
		r->last_eqn->next = e;
2869
		e->delim = r->last_eqn->delim;
2870
		e->odelim = r->last_eqn->odelim;
2871
		e->cdelim = r->last_eqn->cdelim;
2872
	} else
2873
		r->first_eqn = r->last_eqn = e;
2874
2875
	r->eqn = r->last_eqn = e;
2876
2877
	if (buf->buf[pos] != '\0')
2878
		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2879
		    ".EQ %s", buf->buf + pos);
2880
2881
	return ROFF_IGN;
2882
}
2883
2884
static enum rofferr
2885
roff_EN(ROFF_ARGS)
2886
{
2887
2888
	mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2889
	return ROFF_IGN;
2890
}
2891
2892
static enum rofferr
2893
roff_TS(ROFF_ARGS)
2894
{
2895
	struct tbl_node	*tbl;
2896
2897
	if (r->tbl) {
2898
		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2899
		    ln, ppos, "TS breaks TS");
2900
		tbl_end(&r->tbl);
2901
	}
2902
2903
	tbl = tbl_alloc(ppos, ln, r->parse);
2904
2905
	if (r->last_tbl)
2906
		r->last_tbl->next = tbl;
2907
	else
2908
		r->first_tbl = r->last_tbl = tbl;
2909
2910
	r->tbl = r->last_tbl = tbl;
2911
	return ROFF_IGN;
2912
}
2913
2914
static enum rofferr
2915
roff_brp(ROFF_ARGS)
2916
{
2917
2918
	buf->buf[pos - 1] = '\0';
2919
	return ROFF_CONT;
2920
}
2921
2922
static enum rofferr
2923
roff_cc(ROFF_ARGS)
2924
{
2925
	const char	*p;
2926
2927
	p = buf->buf + pos;
2928
2929
	if (*p == '\0' || (r->control = *p++) == '.')
2930
		r->control = 0;
2931
2932
	if (*p != '\0')
2933
		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2934
		    ln, p - buf->buf, "cc ... %s", p);
2935
2936
	return ROFF_IGN;
2937
}
2938
2939
static enum rofferr
2940
roff_tr(ROFF_ARGS)
2941
{
2942
	const char	*p, *first, *second;
2943
	size_t		 fsz, ssz;
2944
	enum mandoc_esc	 esc;
2945
2946
	p = buf->buf + pos;
2947
2948
	if (*p == '\0') {
2949
		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2950
		return ROFF_IGN;
2951
	}
2952
2953
	while (*p != '\0') {
2954
		fsz = ssz = 1;
2955
2956
		first = p++;
2957
		if (*first == '\\') {
2958
			esc = mandoc_escape(&p, NULL, NULL);
2959
			if (esc == ESCAPE_ERROR) {
2960
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2961
				    ln, (int)(p - buf->buf), first);
2962
				return ROFF_IGN;
2963
			}
2964
			fsz = (size_t)(p - first);
2965
		}
2966
2967
		second = p++;
2968
		if (*second == '\\') {
2969
			esc = mandoc_escape(&p, NULL, NULL);
2970
			if (esc == ESCAPE_ERROR) {
2971
				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2972
				    ln, (int)(p - buf->buf), second);
2973
				return ROFF_IGN;
2974
			}
2975
			ssz = (size_t)(p - second);
2976
		} else if (*second == '\0') {
2977
			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2978
			    ln, first - buf->buf, "tr %s", first);
2979
			second = " ";
2980
			p--;
2981
		}
2982
2983
		if (fsz > 1) {
2984
			roff_setstrn(&r->xmbtab, first, fsz,
2985
			    second, ssz, 0);
2986
			continue;
2987
		}
2988
2989
		if (r->xtab == NULL)
2990
			r->xtab = mandoc_calloc(128,
2991
			    sizeof(struct roffstr));
2992
2993
		free(r->xtab[(int)*first].p);
2994
		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2995
		r->xtab[(int)*first].sz = ssz;
2996
	}
2997
2998
	return ROFF_IGN;
2999
}
3000
3001
static enum rofferr
3002
roff_so(ROFF_ARGS)
3003
{
3004
	char *name, *cp;
3005
3006
	name = buf->buf + pos;
3007
	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3008
3009
	/*
3010
	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3011
	 * opening anything that's not in our cwd or anything beneath
3012
	 * it.  Thus, explicitly disallow traversing up the file-system
3013
	 * or using absolute paths.
3014
	 */
3015
3016
	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3017
		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3018
		    ".so %s", name);
3019
		buf->sz = mandoc_asprintf(&cp,
3020
		    ".sp\nSee the file %s.\n.sp", name) + 1;
3021
		free(buf->buf);
3022
		buf->buf = cp;
3023
		*offs = 0;
3024
		return ROFF_REPARSE;
3025
	}
3026
3027
	*offs = pos;
3028
	return ROFF_SO;
3029
}
3030
3031
/* --- user defined strings and macros ------------------------------------ */
3032
3033
static enum rofferr
3034
roff_userdef(ROFF_ARGS)
3035
{
3036
	const char	 *arg[9], *ap;
3037
	char		 *cp, *n1, *n2;
3038
	int		  i, ib, ie;
3039
	size_t		  asz, rsz;
3040
3041
	/*
3042
	 * Collect pointers to macro argument strings
3043
	 * and NUL-terminate them.
3044
	 */
3045
3046
	r->argc = 0;
3047
	cp = buf->buf + pos;
3048
	for (i = 0; i < 9; i++) {
3049
		if (*cp == '\0')
3050
			arg[i] = "";
3051
		else {
3052
			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3053
			r->argc = i + 1;
3054
		}
3055
	}
3056
3057
	/*
3058
	 * Expand macro arguments.
3059
	 */
3060
3061
	buf->sz = strlen(r->current_string) + 1;
3062
	n1 = cp = mandoc_malloc(buf->sz);
3063
	memcpy(n1, r->current_string, buf->sz);
3064
	while (*cp != '\0') {
3065
3066
		/* Scan ahead for the next argument invocation. */
3067
3068
		if (*cp++ != '\\')
3069
			continue;
3070
		if (*cp++ != '$')
3071
			continue;
3072
		if (*cp == '*') {  /* \\$* inserts all arguments */
3073
			ib = 0;
3074
			ie = r->argc - 1;
3075
		} else {  /* \\$1 .. \\$9 insert one argument */
3076
			ib = ie = *cp - '1';
3077
			if (ib < 0 || ib > 8)
3078
				continue;
3079
		}
3080
		cp -= 2;
3081
3082
		/*
3083
		 * Determine the size of the expanded argument,
3084
		 * taking escaping of quotes into account.
3085
		 */
3086
3087
		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3088
		for (i = ib; i <= ie; i++) {
3089
			for (ap = arg[i]; *ap != '\0'; ap++) {
3090
				asz++;
3091
				if (*ap == '"')
3092
					asz += 3;
3093
			}
3094
		}
3095
		if (asz != 3) {
3096
3097
			/*
3098
			 * Determine the size of the rest of the
3099
			 * unexpanded macro, including the NUL.
3100
			 */
3101
3102
			rsz = buf->sz - (cp - n1) - 3;
3103
3104
			/*
3105
			 * When shrinking, move before
3106
			 * releasing the storage.
3107
			 */
3108
3109
			if (asz < 3)
3110
				memmove(cp + asz, cp + 3, rsz);
3111
3112
			/*
3113
			 * Resize the storage for the macro
3114
			 * and readjust the parse pointer.
3115
			 */
3116
3117
			buf->sz += asz - 3;
3118
			n2 = mandoc_realloc(n1, buf->sz);
3119
			cp = n2 + (cp - n1);
3120
			n1 = n2;
3121
3122
			/*
3123
			 * When growing, make room
3124
			 * for the expanded argument.
3125
			 */
3126
3127
			if (asz > 3)
3128
				memmove(cp + asz, cp + 3, rsz);
3129
		}
3130
3131
		/* Copy the expanded argument, escaping quotes. */
3132
3133
		n2 = cp;
3134
		for (i = ib; i <= ie; i++) {
3135
			for (ap = arg[i]; *ap != '\0'; ap++) {
3136
				if (*ap == '"') {
3137
					memcpy(n2, "\\(dq", 4);
3138
					n2 += 4;
3139
				} else
3140
					*n2++ = *ap;
3141
			}
3142
			if (i < ie)
3143
				*n2++ = ' ';
3144
		}
3145
	}
3146
3147
	/*
3148
	 * Replace the macro invocation
3149
	 * by the expanded macro.
3150
	 */
3151
3152
	free(buf->buf);
3153
	buf->buf = n1;
3154
	*offs = 0;
3155
3156
	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3157
	   ROFF_REPARSE : ROFF_APPEND;
3158
}
3159
3160
static size_t
3161
roff_getname(struct roff *r, char **cpp, int ln, int pos)
3162
{
3163
	char	 *name, *cp;
3164
	size_t	  namesz;
3165
3166
	name = *cpp;
3167
	if ('\0' == *name)
3168
		return 0;
3169
3170
	/* Read until end of name and terminate it with NUL. */
3171
	for (cp = name; 1; cp++) {
3172
		if ('\0' == *cp || ' ' == *cp) {
3173
			namesz = cp - name;
3174
			break;
3175
		}
3176
		if ('\\' != *cp)
3177
			continue;
3178
		namesz = cp - name;
3179
		if ('{' == cp[1] || '}' == cp[1])
3180
			break;
3181
		cp++;
3182
		if ('\\' == *cp)
3183
			continue;
3184
		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3185
		    "%.*s", (int)(cp - name + 1), name);
3186
		mandoc_escape((const char **)&cp, NULL, NULL);
3187
		break;
3188
	}
3189
3190
	/* Read past spaces. */
3191
	while (' ' == *cp)
3192
		cp++;
3193
3194
	*cpp = cp;
3195
	return namesz;
3196
}
3197
3198
/*
3199
 * Store *string into the user-defined string called *name.
3200
 * To clear an existing entry, call with (*r, *name, NULL, 0).
3201
 * append == 0: replace mode
3202
 * append == 1: single-line append mode
3203
 * append == 2: multiline append mode, append '\n' after each call
3204
 */
3205
static void
3206
roff_setstr(struct roff *r, const char *name, const char *string,
3207
	int append)
3208
{
3209
3210
	roff_setstrn(&r->strtab, name, strlen(name), string,
3211
	    string ? strlen(string) : 0, append);
3212
}
3213
3214
static void
3215
roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3216
		const char *string, size_t stringsz, int append)
3217
{
3218
	struct roffkv	*n;
3219
	char		*c;
3220
	int		 i;
3221
	size_t		 oldch, newch;
3222
3223
	/* Search for an existing string with the same name. */
3224
	n = *r;
3225
3226
	while (n && (namesz != n->key.sz ||
3227
			strncmp(n->key.p, name, namesz)))
3228
		n = n->next;
3229
3230
	if (NULL == n) {
3231
		/* Create a new string table entry. */
3232
		n = mandoc_malloc(sizeof(struct roffkv));
3233
		n->key.p = mandoc_strndup(name, namesz);
3234
		n->key.sz = namesz;
3235
		n->val.p = NULL;
3236
		n->val.sz = 0;
3237
		n->next = *r;
3238
		*r = n;
3239
	} else if (0 == append) {
3240
		free(n->val.p);
3241
		n->val.p = NULL;
3242
		n->val.sz = 0;
3243
	}
3244
3245
	if (NULL == string)
3246
		return;
3247
3248
	/*
3249
	 * One additional byte for the '\n' in multiline mode,
3250
	 * and one for the terminating '\0'.
3251
	 */
3252
	newch = stringsz + (1 < append ? 2u : 1u);
3253
3254
	if (NULL == n->val.p) {
3255
		n->val.p = mandoc_malloc(newch);
3256
		*n->val.p = '\0';
3257
		oldch = 0;
3258
	} else {
3259
		oldch = n->val.sz;
3260
		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3261
	}
3262
3263
	/* Skip existing content in the destination buffer. */
3264
	c = n->val.p + (int)oldch;
3265
3266
	/* Append new content to the destination buffer. */
3267
	i = 0;
3268
	while (i < (int)stringsz) {
3269
		/*
3270
		 * Rudimentary roff copy mode:
3271
		 * Handle escaped backslashes.
3272
		 */
3273
		if ('\\' == string[i] && '\\' == string[i + 1])
3274
			i++;
3275
		*c++ = string[i++];
3276
	}
3277
3278
	/* Append terminating bytes. */
3279
	if (1 < append)
3280
		*c++ = '\n';
3281
3282
	*c = '\0';
3283
	n->val.sz = (int)(c - n->val.p);
3284
}
3285
3286
static const char *
3287
roff_getstrn(const struct roff *r, const char *name, size_t len)
3288
{
3289
	const struct roffkv *n;
3290
	int i;
3291
3292
	for (n = r->strtab; n; n = n->next)
3293
		if (0 == strncmp(name, n->key.p, len) &&
3294
		    '\0' == n->key.p[(int)len])
3295
			return n->val.p;
3296
3297
	for (i = 0; i < PREDEFS_MAX; i++)
3298
		if (0 == strncmp(name, predefs[i].name, len) &&
3299
				'\0' == predefs[i].name[(int)len])
3300
			return predefs[i].str;
3301
3302
	return NULL;
3303
}
3304
3305
static void
3306
roff_freestr(struct roffkv *r)
3307
{
3308
	struct roffkv	 *n, *nn;
3309
3310
	for (n = r; n; n = nn) {
3311
		free(n->key.p);
3312
		free(n->val.p);
3313
		nn = n->next;
3314
		free(n);
3315
	}
3316
}
3317
3318
/* --- accessors and utility functions ------------------------------------ */
3319
3320
const struct tbl_span *
3321
roff_span(const struct roff *r)
3322
{
3323
3324
	return r->tbl ? tbl_span(r->tbl) : NULL;
3325
}
3326
3327
const struct eqn *
3328
roff_eqn(const struct roff *r)
3329
{
3330
3331
	return r->last_eqn ? &r->last_eqn->eqn : NULL;
3332
}
3333
3334
/*
3335
 * Duplicate an input string, making the appropriate character
3336
 * conversations (as stipulated by `tr') along the way.
3337
 * Returns a heap-allocated string with all the replacements made.
3338
 */
3339
char *
3340
roff_strdup(const struct roff *r, const char *p)
3341
{
3342
	const struct roffkv *cp;
3343
	char		*res;
3344
	const char	*pp;
3345
	size_t		 ssz, sz;
3346
	enum mandoc_esc	 esc;
3347
3348
	if (NULL == r->xmbtab && NULL == r->xtab)
3349
		return mandoc_strdup(p);
3350
	else if ('\0' == *p)
3351
		return mandoc_strdup("");
3352
3353
	/*
3354
	 * Step through each character looking for term matches
3355
	 * (remember that a `tr' can be invoked with an escape, which is
3356
	 * a glyph but the escape is multi-character).
3357
	 * We only do this if the character hash has been initialised
3358
	 * and the string is >0 length.
3359
	 */
3360
3361
	res = NULL;
3362
	ssz = 0;
3363
3364
	while ('\0' != *p) {
3365
		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3366
			sz = r->xtab[(int)*p].sz;
3367
			res = mandoc_realloc(res, ssz + sz + 1);
3368
			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3369
			ssz += sz;
3370
			p++;
3371
			continue;
3372
		} else if ('\\' != *p) {
3373
			res = mandoc_realloc(res, ssz + 2);
3374
			res[ssz++] = *p++;
3375
			continue;
3376
		}
3377
3378
		/* Search for term matches. */
3379
		for (cp = r->xmbtab; cp; cp = cp->next)
3380
			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3381
				break;
3382
3383
		if (NULL != cp) {
3384
			/*
3385
			 * A match has been found.
3386
			 * Append the match to the array and move
3387
			 * forward by its keysize.
3388
			 */
3389
			res = mandoc_realloc(res,
3390
			    ssz + cp->val.sz + 1);
3391
			memcpy(res + ssz, cp->val.p, cp->val.sz);
3392
			ssz += cp->val.sz;
3393
			p += (int)cp->key.sz;
3394
			continue;
3395
		}
3396
3397
		/*
3398
		 * Handle escapes carefully: we need to copy
3399
		 * over just the escape itself, or else we might
3400
		 * do replacements within the escape itself.
3401
		 * Make sure to pass along the bogus string.
3402
		 */
3403
		pp = p++;
3404
		esc = mandoc_escape(&p, NULL, NULL);
3405
		if (ESCAPE_ERROR == esc) {
3406
			sz = strlen(pp);
3407
			res = mandoc_realloc(res, ssz + sz + 1);
3408
			memcpy(res + ssz, pp, sz);
3409
			break;
3410
		}
3411
		/*
3412
		 * We bail out on bad escapes.
3413
		 * No need to warn: we already did so when
3414
		 * roff_res() was called.
3415
		 */
3416
		sz = (int)(p - pp);
3417
		res = mandoc_realloc(res, ssz + sz + 1);
3418
		memcpy(res + ssz, pp, sz);
3419
		ssz += sz;
3420
	}
3421
3422
	res[(int)ssz] = '\0';
3423
	return res;
3424
}
3425
3426
int
3427
roff_getformat(const struct roff *r)
3428
{
3429
3430
	return r->format;
3431
}
3432
3433
/*
3434
 * Find out whether a line is a macro line or not.
3435
 * If it is, adjust the current position and return one; if it isn't,
3436
 * return zero and don't change the current position.
3437
 * If the control character has been set with `.cc', then let that grain
3438
 * precedence.
3439
 * This is slighly contrary to groff, where using the non-breaking
3440
 * control character when `cc' has been invoked will cause the
3441
 * non-breaking macro contents to be printed verbatim.
3442
 */
3443
int
3444
roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3445
{
3446
	int		pos;
3447
3448
	pos = *ppos;
3449
3450
	if (0 != r->control && cp[pos] == r->control)
3451
		pos++;
3452
	else if (0 != r->control)
3453
		return 0;
3454
	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3455
		pos += 2;
3456
	else if ('.' == cp[pos] || '\'' == cp[pos])
3457
		pos++;
3458
	else
3459
		return 0;
3460
3461
	while (' ' == cp[pos] || '\t' == cp[pos])
3462
		pos++;
3463
3464
	*ppos = pos;
3465
	return 1;
3466
}