GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/mandoc/eqn.c Lines: 389 448 86.8 %
Date: 2017-11-13 Branches: 246 341 72.1 %

Line Branch Exec Source
1
/*	$OpenBSD: eqn.c,v 1.41 2017/07/15 15:06:31 bentley Exp $ */
2
/*
3
 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
#include <sys/types.h>
19
20
#include <assert.h>
21
#include <ctype.h>
22
#include <limits.h>
23
#include <stdio.h>
24
#include <stdlib.h>
25
#include <string.h>
26
#include <time.h>
27
28
#include "mandoc_aux.h"
29
#include "mandoc.h"
30
#include "roff.h"
31
#include "libmandoc.h"
32
#include "libroff.h"
33
34
#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
35
#define	STRNEQ(p1, sz1, p2, sz2) \
36
	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37
38
enum	eqn_tok {
39
	EQN_TOK_DYAD = 0,
40
	EQN_TOK_VEC,
41
	EQN_TOK_UNDER,
42
	EQN_TOK_BAR,
43
	EQN_TOK_TILDE,
44
	EQN_TOK_HAT,
45
	EQN_TOK_DOT,
46
	EQN_TOK_DOTDOT,
47
	EQN_TOK_FWD,
48
	EQN_TOK_BACK,
49
	EQN_TOK_DOWN,
50
	EQN_TOK_UP,
51
	EQN_TOK_FAT,
52
	EQN_TOK_ROMAN,
53
	EQN_TOK_ITALIC,
54
	EQN_TOK_BOLD,
55
	EQN_TOK_SIZE,
56
	EQN_TOK_SUB,
57
	EQN_TOK_SUP,
58
	EQN_TOK_SQRT,
59
	EQN_TOK_OVER,
60
	EQN_TOK_FROM,
61
	EQN_TOK_TO,
62
	EQN_TOK_BRACE_OPEN,
63
	EQN_TOK_BRACE_CLOSE,
64
	EQN_TOK_GSIZE,
65
	EQN_TOK_GFONT,
66
	EQN_TOK_MARK,
67
	EQN_TOK_LINEUP,
68
	EQN_TOK_LEFT,
69
	EQN_TOK_RIGHT,
70
	EQN_TOK_PILE,
71
	EQN_TOK_LPILE,
72
	EQN_TOK_RPILE,
73
	EQN_TOK_CPILE,
74
	EQN_TOK_MATRIX,
75
	EQN_TOK_CCOL,
76
	EQN_TOK_LCOL,
77
	EQN_TOK_RCOL,
78
	EQN_TOK_DELIM,
79
	EQN_TOK_DEFINE,
80
	EQN_TOK_TDEFINE,
81
	EQN_TOK_NDEFINE,
82
	EQN_TOK_UNDEF,
83
	EQN_TOK_ABOVE,
84
	EQN_TOK__MAX,
85
	EQN_TOK_FUNC,
86
	EQN_TOK_QUOTED,
87
	EQN_TOK_SYM,
88
	EQN_TOK_EOF
89
};
90
91
static	const char *eqn_toks[EQN_TOK__MAX] = {
92
	"dyad", /* EQN_TOK_DYAD */
93
	"vec", /* EQN_TOK_VEC */
94
	"under", /* EQN_TOK_UNDER */
95
	"bar", /* EQN_TOK_BAR */
96
	"tilde", /* EQN_TOK_TILDE */
97
	"hat", /* EQN_TOK_HAT */
98
	"dot", /* EQN_TOK_DOT */
99
	"dotdot", /* EQN_TOK_DOTDOT */
100
	"fwd", /* EQN_TOK_FWD * */
101
	"back", /* EQN_TOK_BACK */
102
	"down", /* EQN_TOK_DOWN */
103
	"up", /* EQN_TOK_UP */
104
	"fat", /* EQN_TOK_FAT */
105
	"roman", /* EQN_TOK_ROMAN */
106
	"italic", /* EQN_TOK_ITALIC */
107
	"bold", /* EQN_TOK_BOLD */
108
	"size", /* EQN_TOK_SIZE */
109
	"sub", /* EQN_TOK_SUB */
110
	"sup", /* EQN_TOK_SUP */
111
	"sqrt", /* EQN_TOK_SQRT */
112
	"over", /* EQN_TOK_OVER */
113
	"from", /* EQN_TOK_FROM */
114
	"to", /* EQN_TOK_TO */
115
	"{", /* EQN_TOK_BRACE_OPEN */
116
	"}", /* EQN_TOK_BRACE_CLOSE */
117
	"gsize", /* EQN_TOK_GSIZE */
118
	"gfont", /* EQN_TOK_GFONT */
119
	"mark", /* EQN_TOK_MARK */
120
	"lineup", /* EQN_TOK_LINEUP */
121
	"left", /* EQN_TOK_LEFT */
122
	"right", /* EQN_TOK_RIGHT */
123
	"pile", /* EQN_TOK_PILE */
124
	"lpile", /* EQN_TOK_LPILE */
125
	"rpile", /* EQN_TOK_RPILE */
126
	"cpile", /* EQN_TOK_CPILE */
127
	"matrix", /* EQN_TOK_MATRIX */
128
	"ccol", /* EQN_TOK_CCOL */
129
	"lcol", /* EQN_TOK_LCOL */
130
	"rcol", /* EQN_TOK_RCOL */
131
	"delim", /* EQN_TOK_DELIM */
132
	"define", /* EQN_TOK_DEFINE */
133
	"tdefine", /* EQN_TOK_TDEFINE */
134
	"ndefine", /* EQN_TOK_NDEFINE */
135
	"undef", /* EQN_TOK_UNDEF */
136
	"above", /* EQN_TOK_ABOVE */
137
};
138
139
static	const char *const eqn_func[] = {
140
	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
141
	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
142
	"if",	"lim",	"ln",	"log",	"max",	"min",
143
	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
144
};
145
146
enum	eqn_symt {
147
	EQNSYM_alpha = 0,
148
	EQNSYM_beta,
149
	EQNSYM_chi,
150
	EQNSYM_delta,
151
	EQNSYM_epsilon,
152
	EQNSYM_eta,
153
	EQNSYM_gamma,
154
	EQNSYM_iota,
155
	EQNSYM_kappa,
156
	EQNSYM_lambda,
157
	EQNSYM_mu,
158
	EQNSYM_nu,
159
	EQNSYM_omega,
160
	EQNSYM_omicron,
161
	EQNSYM_phi,
162
	EQNSYM_pi,
163
	EQNSYM_ps,
164
	EQNSYM_rho,
165
	EQNSYM_sigma,
166
	EQNSYM_tau,
167
	EQNSYM_theta,
168
	EQNSYM_upsilon,
169
	EQNSYM_xi,
170
	EQNSYM_zeta,
171
	EQNSYM_DELTA,
172
	EQNSYM_GAMMA,
173
	EQNSYM_LAMBDA,
174
	EQNSYM_OMEGA,
175
	EQNSYM_PHI,
176
	EQNSYM_PI,
177
	EQNSYM_PSI,
178
	EQNSYM_SIGMA,
179
	EQNSYM_THETA,
180
	EQNSYM_UPSILON,
181
	EQNSYM_XI,
182
	EQNSYM_inter,
183
	EQNSYM_union,
184
	EQNSYM_prod,
185
	EQNSYM_int,
186
	EQNSYM_sum,
187
	EQNSYM_grad,
188
	EQNSYM_del,
189
	EQNSYM_times,
190
	EQNSYM_cdot,
191
	EQNSYM_nothing,
192
	EQNSYM_approx,
193
	EQNSYM_prime,
194
	EQNSYM_half,
195
	EQNSYM_partial,
196
	EQNSYM_inf,
197
	EQNSYM_muchgreat,
198
	EQNSYM_muchless,
199
	EQNSYM_larrow,
200
	EQNSYM_rarrow,
201
	EQNSYM_pm,
202
	EQNSYM_nequal,
203
	EQNSYM_equiv,
204
	EQNSYM_lessequal,
205
	EQNSYM_moreequal,
206
	EQNSYM_minus,
207
	EQNSYM__MAX
208
};
209
210
struct	eqnsym {
211
	const char	*str;
212
	const char	*sym;
213
};
214
215
static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
216
	{ "alpha", "*a" }, /* EQNSYM_alpha */
217
	{ "beta", "*b" }, /* EQNSYM_beta */
218
	{ "chi", "*x" }, /* EQNSYM_chi */
219
	{ "delta", "*d" }, /* EQNSYM_delta */
220
	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
221
	{ "eta", "*y" }, /* EQNSYM_eta */
222
	{ "gamma", "*g" }, /* EQNSYM_gamma */
223
	{ "iota", "*i" }, /* EQNSYM_iota */
224
	{ "kappa", "*k" }, /* EQNSYM_kappa */
225
	{ "lambda", "*l" }, /* EQNSYM_lambda */
226
	{ "mu", "*m" }, /* EQNSYM_mu */
227
	{ "nu", "*n" }, /* EQNSYM_nu */
228
	{ "omega", "*w" }, /* EQNSYM_omega */
229
	{ "omicron", "*o" }, /* EQNSYM_omicron */
230
	{ "phi", "*f" }, /* EQNSYM_phi */
231
	{ "pi", "*p" }, /* EQNSYM_pi */
232
	{ "psi", "*q" }, /* EQNSYM_psi */
233
	{ "rho", "*r" }, /* EQNSYM_rho */
234
	{ "sigma", "*s" }, /* EQNSYM_sigma */
235
	{ "tau", "*t" }, /* EQNSYM_tau */
236
	{ "theta", "*h" }, /* EQNSYM_theta */
237
	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
238
	{ "xi", "*c" }, /* EQNSYM_xi */
239
	{ "zeta", "*z" }, /* EQNSYM_zeta */
240
	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
241
	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
242
	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
243
	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
244
	{ "PHI", "*F" }, /* EQNSYM_PHI */
245
	{ "PI", "*P" }, /* EQNSYM_PI */
246
	{ "PSI", "*Q" }, /* EQNSYM_PSI */
247
	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
248
	{ "THETA", "*H" }, /* EQNSYM_THETA */
249
	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
250
	{ "XI", "*C" }, /* EQNSYM_XI */
251
	{ "inter", "ca" }, /* EQNSYM_inter */
252
	{ "union", "cu" }, /* EQNSYM_union */
253
	{ "prod", "product" }, /* EQNSYM_prod */
254
	{ "int", "integral" }, /* EQNSYM_int */
255
	{ "sum", "sum" }, /* EQNSYM_sum */
256
	{ "grad", "gr" }, /* EQNSYM_grad */
257
	{ "del", "gr" }, /* EQNSYM_del */
258
	{ "times", "mu" }, /* EQNSYM_times */
259
	{ "cdot", "pc" }, /* EQNSYM_cdot */
260
	{ "nothing", "&" }, /* EQNSYM_nothing */
261
	{ "approx", "~~" }, /* EQNSYM_approx */
262
	{ "prime", "fm" }, /* EQNSYM_prime */
263
	{ "half", "12" }, /* EQNSYM_half */
264
	{ "partial", "pd" }, /* EQNSYM_partial */
265
	{ "inf", "if" }, /* EQNSYM_inf */
266
	{ ">>", ">>" }, /* EQNSYM_muchgreat */
267
	{ "<<", "<<" }, /* EQNSYM_muchless */
268
	{ "<-", "<-" }, /* EQNSYM_larrow */
269
	{ "->", "->" }, /* EQNSYM_rarrow */
270
	{ "+-", "+-" }, /* EQNSYM_pm */
271
	{ "!=", "!=" }, /* EQNSYM_nequal */
272
	{ "==", "==" }, /* EQNSYM_equiv */
273
	{ "<=", "<=" }, /* EQNSYM_lessequal */
274
	{ ">=", ">=" }, /* EQNSYM_moreequal */
275
	{ "-", "mi" }, /* EQNSYM_minus */
276
};
277
278
enum	parse_mode {
279
	MODE_QUOTED,
280
	MODE_NOSUB,
281
	MODE_SUB,
282
	MODE_TOK
283
};
284
285
static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
286
static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
287
				struct eqn_box *);
288
static	void		 eqn_def(struct eqn_node *);
289
static	struct eqn_def	*eqn_def_find(struct eqn_node *);
290
static	void		 eqn_delim(struct eqn_node *);
291
static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
292
static	void		 eqn_undef(struct eqn_node *);
293
294
295
struct eqn_node *
296
eqn_alloc(struct mparse *parse)
297
{
298
	struct eqn_node *ep;
299
300
260
	ep = mandoc_calloc(1, sizeof(*ep));
301
130
	ep->parse = parse;
302
130
	ep->gsize = EQN_DEFSIZE;
303
130
	return ep;
304
}
305
306
void
307
eqn_reset(struct eqn_node *ep)
308
{
309
110
	free(ep->data);
310
55
	ep->data = ep->start = ep->end = NULL;
311
55
	ep->sz = ep->toksz = 0;
312
55
}
313
314
void
315
eqn_read(struct eqn_node *ep, const char *p)
316
{
317
1010
	char		*cp;
318
319
505
	if (ep->data == NULL) {
320
185
		ep->sz = strlen(p);
321
185
		ep->data = mandoc_strdup(p);
322
185
	} else {
323
320
		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
324
320
		free(ep->data);
325
320
		ep->data = cp;
326
	}
327
505
	ep->sz += 1;
328
505
}
329
330
/*
331
 * Find the key "key" of the give size within our eqn-defined values.
332
 */
333
static struct eqn_def *
334
eqn_def_find(struct eqn_node *ep)
335
{
336
	int		 i;
337
338
12579
	for (i = 0; i < (int)ep->defsz; i++)
339

5223
		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
340
		    ep->defs[i].keysz, ep->start, ep->toksz))
341
1626
			return &ep->defs[i];
342
343
2433
	return NULL;
344
4059
}
345
346
/*
347
 * Parse a token from the input text.  The modes are:
348
 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
349
 *   before its next occurence.  Do not interpret the token in any
350
 *   way and return EQN_TOK_QUOTED.  All other modes behave like
351
 *   MODE_QUOTED when *ep->start is '"'.
352
 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
353
 *   otherwise, it ends before the next whitespace or brace.
354
 *   Do not interpret the token and return EQN_TOK__MAX.
355
 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
356
 *   alias created with define.  If it is an alias, replace it with
357
 *   its string value and reparse.
358
 * MODE_TOK: Like MODE_SUB, but also check the token against the list
359
 *   of tokens, and if there is a match, return that token.  Otherwise,
360
 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
361
 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
362
 *   a token match, *ep->start is set to an allocated string that the
363
 *   caller is expected to free.
364
 * All modes skip whitespace following the end of the token.
365
 */
366
static enum eqn_tok
367
eqn_next(struct eqn_node *ep, enum parse_mode mode)
368
{
369
	static int	 last_len, lim;
370
371
	struct eqn_def	*def;
372
	size_t		 start;
373
	int		 diff, i, quoted;
374
	enum eqn_tok	 tok;
375
376
	/*
377
	 * Reset the recursion counter after advancing
378
	 * beyond the end of the previous substitution.
379
	 */
380
6028
	if (ep->end - ep->data >= last_len)
381
2843
		lim = 0;
382
383
3014
	ep->start = ep->end;
384
3014
	quoted = mode == MODE_QUOTED;
385
4580
	for (;;) {
386
4850
		switch (*ep->start) {
387
		case '\0':
388
191
			ep->toksz = 0;
389
191
			return EQN_TOK_EOF;
390
		case '"':
391
			quoted = 1;
392
270
			break;
393
		default:
394
			break;
395
		}
396
4389
		if (quoted) {
397
324
			ep->end = strchr(ep->start + 1, *ep->start);
398
324
			ep->start++;  /* Skip opening quote. */
399
324
			if (ep->end == NULL) {
400
				mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
401
				    ep->node->line, ep->node->pos, NULL);
402
				ep->end = strchr(ep->start, '\0');
403
			}
404
		} else {
405
4065
			ep->end = ep->start + 1;
406

8042
			if (*ep->start != '{' && *ep->start != '}')
407
3889
				ep->end += strcspn(ep->end, " ^~\"{}\t");
408
		}
409
4389
		ep->toksz = ep->end - ep->start;
410

4713
		if (quoted && *ep->end != '\0')
411
324
			ep->end++;  /* Skip closing quote. */
412

22508
		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
413
3433
			ep->end++;
414
4389
		if (quoted)  /* Cannot return, may have to strndup. */
415
			break;
416
4065
		if (mode == MODE_NOSUB)
417
84
			return EQN_TOK__MAX;
418
3981
		if ((def = eqn_def_find(ep)) == NULL)
419
			break;
420
1590
		if (++lim > EQN_NEST_MAX) {
421
48
			mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
422
24
			    ep->node->line, ep->node->pos, NULL);
423
24
			return EQN_TOK_EOF;
424
		}
425
426
		/* Replace a defined name with its string value. */
427
1566
		if ((diff = def->valsz - ep->toksz) > 0) {
428
771
			start = ep->start - ep->data;
429
771
			ep->sz += diff;
430
771
			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
431
771
			ep->start = ep->data + start;
432
771
		}
433
1566
		if (diff)
434
1590
			memmove(ep->start + def->valsz, ep->start + ep->toksz,
435
795
			    strlen(ep->start + ep->toksz) + 1);
436
1566
		memcpy(ep->start, def->val, def->valsz);
437
1566
		last_len = ep->start - ep->data + def->valsz;
438
	}
439
2715
	if (mode != MODE_TOK)
440
141
		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
441
2574
	if (quoted) {
442
270
		ep->start = mandoc_strndup(ep->start, ep->toksz);
443
270
		return EQN_TOK_QUOTED;
444
	}
445
157246
	for (tok = 0; tok < EQN_TOK__MAX; tok++)
446

88152
		if (STRNEQ(ep->start, ep->toksz,
447
		    eqn_toks[tok], strlen(eqn_toks[tok])))
448
1142
			return tok;
449
450
137830
	for (i = 0; i < EQNSYM__MAX; i++) {
451

72402
		if (STRNEQ(ep->start, ep->toksz,
452
		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
453
62
			mandoc_asprintf(&ep->start,
454
62
			    "\\[%s]", eqnsyms[i].sym);
455
62
			return EQN_TOK_SYM;
456
		}
457
	}
458
1100
	ep->start = mandoc_strndup(ep->start, ep->toksz);
459
56716
	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
460

29426
		if (STRNEQ(ep->start, ep->toksz,
461
		    eqn_func[i], strlen(eqn_func[i])))
462
193
			return EQN_TOK_FUNC;
463
907
	return EQN_TOK__MAX;
464
3014
}
465
466
void
467
eqn_box_free(struct eqn_box *bp)
468
{
469
470
5012
	if (bp->first)
471
962
		eqn_box_free(bp->first);
472
2506
	if (bp->next)
473
1359
		eqn_box_free(bp->next);
474
475
2506
	free(bp->text);
476
2506
	free(bp->left);
477
2506
	free(bp->right);
478
2506
	free(bp->top);
479
2506
	free(bp->bottom);
480
2506
	free(bp);
481
2506
}
482
483
/*
484
 * Allocate a box as the last child of the parent node.
485
 */
486
static struct eqn_box *
487
eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
488
{
489
	struct eqn_box	*bp;
490
491
4642
	bp = mandoc_calloc(1, sizeof(struct eqn_box));
492
2321
	bp->parent = parent;
493
2321
	bp->parent->args++;
494
2321
	bp->expectargs = UINT_MAX;
495
2321
	bp->font = bp->parent->font;
496
2321
	bp->size = ep->gsize;
497
498
2321
	if (NULL != parent->first) {
499
1641
		parent->last->next = bp;
500
1641
		bp->prev = parent->last;
501
1641
	} else
502
680
		parent->first = bp;
503
504
2321
	parent->last = bp;
505
2321
	return bp;
506
}
507
508
/*
509
 * Reparent the current last node (of the current parent) under a new
510
 * EQN_SUBEXPR as the first element.
511
 * Then return the new parent.
512
 * The new EQN_SUBEXPR will have a two-child limit.
513
 */
514
static struct eqn_box *
515
eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
516
{
517
	struct eqn_box	*b, *newb;
518
519
864
	assert(NULL != parent->last);
520
	b = parent->last;
521
432
	if (parent->last == parent->first)
522
151
		parent->first = NULL;
523
432
	parent->args--;
524
432
	parent->last = b->prev;
525
432
	b->prev = NULL;
526
432
	newb = eqn_box_alloc(ep, parent);
527
432
	newb->type = EQN_SUBEXPR;
528
432
	newb->expectargs = 2;
529
432
	newb->args = 1;
530
432
	newb->first = newb->last = b;
531
432
	newb->first->next = NULL;
532
432
	b->parent = newb;
533
432
	return newb;
534
}
535
536
/*
537
 * Parse the "delim" control statement.
538
 */
539
static void
540
eqn_delim(struct eqn_node *ep)
541
{
542

30
	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
543
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
544
		    ep->node->line, ep->node->pos, "delim");
545
		if (ep->end[0] != '\0')
546
			ep->end++;
547
10
	} else if (strncmp(ep->end, "off", 3) == 0) {
548
3
		ep->delim = 0;
549
3
		ep->end += 3;
550
10
	} else if (strncmp(ep->end, "on", 2) == 0) {
551

6
		if (ep->odelim && ep->cdelim)
552
3
			ep->delim = 1;
553
3
		ep->end += 2;
554
3
	} else {
555
4
		ep->odelim = *ep->end++;
556
4
		ep->cdelim = *ep->end++;
557
4
		ep->delim = 1;
558
	}
559
10
}
560
561
/*
562
 * Undefine a previously-defined string.
563
 */
564
static void
565
eqn_undef(struct eqn_node *ep)
566
{
567
	struct eqn_def	*def;
568
569
48
	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
570
12
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
571
6
		    ep->node->line, ep->node->pos, "undef");
572
6
		return;
573
	}
574
18
	if ((def = eqn_def_find(ep)) == NULL)
575
		return;
576
18
	free(def->key);
577
18
	free(def->val);
578
18
	def->key = def->val = NULL;
579
18
	def->keysz = def->valsz = 0;
580
42
}
581
582
static void
583
eqn_def(struct eqn_node *ep)
584
{
585
	struct eqn_def	*def;
586
	int		 i;
587
588
132
	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
589
12
		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
590
6
		    ep->node->line, ep->node->pos, "define");
591
6
		return;
592
	}
593
594
	/*
595
	 * Search for a key that already exists.
596
	 * Create a new key if none is found.
597
	 */
598
60
	if ((def = eqn_def_find(ep)) == NULL) {
599
		/* Find holes in string array. */
600
96
		for (i = 0; i < (int)ep->defsz; i++)
601
24
			if (0 == ep->defs[i].keysz)
602
				break;
603
604
42
		if (i == (int)ep->defsz) {
605
24
			ep->defsz++;
606
24
			ep->defs = mandoc_reallocarray(ep->defs,
607
			    ep->defsz, sizeof(struct eqn_def));
608
24
			ep->defs[i].key = ep->defs[i].val = NULL;
609
24
		}
610
611
42
		def = ep->defs + i;
612
42
		free(def->key);
613
42
		def->key = mandoc_strndup(ep->start, ep->toksz);
614
42
		def->keysz = ep->toksz;
615
42
	}
616
617
60
	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
618
12
		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
619
6
		    ep->node->line, ep->node->pos, "define %s", def->key);
620
6
		free(def->key);
621
6
		free(def->val);
622
6
		def->key = def->val = NULL;
623
6
		def->keysz = def->valsz = 0;
624
6
		return;
625
	}
626
54
	free(def->val);
627
54
	def->val = mandoc_strndup(ep->start, ep->toksz);
628
54
	def->valsz = ep->toksz;
629
120
}
630
631
void
632
eqn_parse(struct eqn_node *ep)
633
{
634
	struct eqn_box	*cur, *nbox, *parent, *split;
635
370
	const char	*cp, *cpn;
636
	char		*p;
637
	enum eqn_tok	 tok;
638
	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
639
	int		 size;
640
641
185
	parent = ep->node->eqn;
642
185
	assert(parent != NULL);
643
644
	/*
645
	 * Empty equation.
646
	 * Do not add it to the high-level syntax tree.
647
	 */
648
649
185
	if (ep->data == NULL)
650
		return;
651
652
185
	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
653
654
next_tok:
655
3389
	tok = eqn_next(ep, MODE_TOK);
656












3389
	switch (tok) {
657
	case EQN_TOK_UNDEF:
658
24
		eqn_undef(ep);
659
24
		break;
660
	case EQN_TOK_NDEFINE:
661
	case EQN_TOK_DEFINE:
662
66
		eqn_def(ep);
663
66
		break;
664
	case EQN_TOK_TDEFINE:
665

18
		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
666
6
		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
667
24
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
668
12
			    ep->node->line, ep->node->pos, "tdefine");
669
		break;
670
	case EQN_TOK_DELIM:
671
10
		eqn_delim(ep);
672
10
		break;
673
	case EQN_TOK_GFONT:
674
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
675
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
676
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
677
		break;
678
	case EQN_TOK_MARK:
679
	case EQN_TOK_LINEUP:
680
		/* Ignore these. */
681
		break;
682
	case EQN_TOK_DYAD:
683
	case EQN_TOK_VEC:
684
	case EQN_TOK_UNDER:
685
	case EQN_TOK_BAR:
686
	case EQN_TOK_TILDE:
687
	case EQN_TOK_HAT:
688
	case EQN_TOK_DOT:
689
	case EQN_TOK_DOTDOT:
690
132
		if (parent->last == NULL) {
691
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
692
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
693
			cur = eqn_box_alloc(ep, parent);
694
			cur->type = EQN_TEXT;
695
			cur->text = mandoc_strdup("");
696
		}
697
132
		parent = eqn_box_makebinary(ep, parent);
698
132
		parent->type = EQN_LIST;
699
132
		parent->expectargs = 1;
700
132
		parent->font = EQNFONT_ROMAN;
701


132
		switch (tok) {
702
		case EQN_TOK_DOTDOT:
703
6
			parent->top = mandoc_strdup("\\[ad]");
704
6
			break;
705
		case EQN_TOK_VEC:
706
6
			parent->top = mandoc_strdup("\\[->]");
707
6
			break;
708
		case EQN_TOK_DYAD:
709
12
			parent->top = mandoc_strdup("\\[<>]");
710
12
			break;
711
		case EQN_TOK_TILDE:
712
24
			parent->top = mandoc_strdup("\\[a~]");
713
24
			break;
714
		case EQN_TOK_UNDER:
715
18
			parent->bottom = mandoc_strdup("\\[ul]");
716
18
			break;
717
		case EQN_TOK_BAR:
718
12
			parent->top = mandoc_strdup("\\[rn]");
719
12
			break;
720
		case EQN_TOK_DOT:
721
6
			parent->top = mandoc_strdup("\\[a.]");
722
6
			break;
723
		case EQN_TOK_HAT:
724
48
			parent->top = mandoc_strdup("\\[ha]");
725
48
			break;
726
		default:
727
			abort();
728
		}
729
132
		parent = parent->parent;
730
132
		break;
731
	case EQN_TOK_FWD:
732
	case EQN_TOK_BACK:
733
	case EQN_TOK_DOWN:
734
	case EQN_TOK_UP:
735
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
736
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
737
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
738
		break;
739
	case EQN_TOK_FAT:
740
	case EQN_TOK_ROMAN:
741
	case EQN_TOK_ITALIC:
742
	case EQN_TOK_BOLD:
743
332
		while (parent->args == parent->expectargs)
744
12
			parent = parent->parent;
745
		/*
746
		 * These values apply to the next word or sequence of
747
		 * words; thus, we mark that we'll have a child with
748
		 * exactly one of those.
749
		 */
750
154
		parent = eqn_box_alloc(ep, parent);
751
154
		parent->type = EQN_LIST;
752
154
		parent->expectargs = 1;
753

154
		switch (tok) {
754
		case EQN_TOK_FAT:
755
			parent->font = EQNFONT_FAT;
756
			break;
757
		case EQN_TOK_ROMAN:
758
			parent->font = EQNFONT_ROMAN;
759
76
			break;
760
		case EQN_TOK_ITALIC:
761
			parent->font = EQNFONT_ITALIC;
762
18
			break;
763
		case EQN_TOK_BOLD:
764
			parent->font = EQNFONT_BOLD;
765
60
			break;
766
		default:
767
			abort();
768
		}
769
154
		break;
770
	case EQN_TOK_SIZE:
771
	case EQN_TOK_GSIZE:
772
		/* Accept two values: integral size and a single. */
773
9
		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
774
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
775
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
776
			break;
777
		}
778
9
		size = mandoc_strntoi(ep->start, ep->toksz, 10);
779
9
		if (-1 == size) {
780
			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
781
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
782
			break;
783
		}
784
9
		if (EQN_TOK_GSIZE == tok) {
785
			ep->gsize = size;
786
			break;
787
		}
788
30
		while (parent->args == parent->expectargs)
789
6
			parent = parent->parent;
790
9
		parent = eqn_box_alloc(ep, parent);
791
9
		parent->type = EQN_LIST;
792
9
		parent->expectargs = 1;
793
9
		parent->size = size;
794
9
		break;
795
	case EQN_TOK_FROM:
796
	case EQN_TOK_TO:
797
	case EQN_TOK_SUB:
798
	case EQN_TOK_SUP:
799
		/*
800
		 * We have a left-right-associative expression.
801
		 * Repivot under a positional node, open a child scope
802
		 * and keep on reading.
803
		 */
804
300
		if (parent->last == NULL) {
805
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
806
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
807
			cur = eqn_box_alloc(ep, parent);
808
			cur->type = EQN_TEXT;
809
			cur->text = mandoc_strdup("");
810
		}
811

708
		while (parent->expectargs == 1 && parent->args == 1)
812
36
			parent = parent->parent;
813
300
		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
814
320
			for (cur = parent; cur != NULL; cur = cur->parent)
815

192
				if (cur->pos == EQNPOS_SUB ||
816
102
				    cur->pos == EQNPOS_SUP ||
817
102
				    cur->pos == EQNPOS_SUBSUP ||
818
90
				    cur->pos == EQNPOS_SQRT ||
819
90
				    cur->pos == EQNPOS_OVER)
820
					break;
821
70
			if (cur != NULL)
822
12
				parent = cur->parent;
823
		}
824

404
		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
825
42
			parent->expectargs = 3;
826
42
			parent->pos = EQNPOS_SUBSUP;
827
42
			break;
828
		}
829

296
		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
830
32
			parent->expectargs = 3;
831
32
			parent->pos = EQNPOS_FROMTO;
832
32
			break;
833
		}
834
226
		parent = eqn_box_makebinary(ep, parent);
835

226
		switch (tok) {
836
		case EQN_TOK_FROM:
837
			parent->pos = EQNPOS_FROM;
838
32
			break;
839
		case EQN_TOK_TO:
840
			parent->pos = EQNPOS_TO;
841
6
			break;
842
		case EQN_TOK_SUP:
843
			parent->pos = EQNPOS_SUP;
844
62
			break;
845
		case EQN_TOK_SUB:
846
			parent->pos = EQNPOS_SUB;
847
126
			break;
848
		default:
849
			abort();
850
		}
851
226
		break;
852
	case EQN_TOK_SQRT:
853
86
		while (parent->args == parent->expectargs)
854
			parent = parent->parent;
855
		/*
856
		 * Accept a left-right-associative set of arguments just
857
		 * like sub and sup and friends but without rebalancing
858
		 * under a pivot.
859
		 */
860
43
		parent = eqn_box_alloc(ep, parent);
861
43
		parent->type = EQN_SUBEXPR;
862
43
		parent->pos = EQNPOS_SQRT;
863
43
		parent->expectargs = 1;
864
43
		break;
865
	case EQN_TOK_OVER:
866
		/*
867
		 * We have a right-left-associative fraction.
868
		 * Close out anything that's currently open, then
869
		 * rebalance and continue reading.
870
		 */
871
74
		if (parent->last == NULL) {
872
18
			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
873
9
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
874
9
			cur = eqn_box_alloc(ep, parent);
875
9
			cur->type = EQN_TEXT;
876
9
			cur->text = mandoc_strdup("");
877
9
		}
878
196
		while (parent->args == parent->expectargs)
879
24
			parent = parent->parent;
880
166
		while (EQN_SUBEXPR == parent->type)
881
9
			parent = parent->parent;
882
74
		parent = eqn_box_makebinary(ep, parent);
883
74
		parent->pos = EQNPOS_OVER;
884
74
		break;
885
	case EQN_TOK_RIGHT:
886
	case EQN_TOK_BRACE_CLOSE:
887
		/*
888
		 * Close out the existing brace.
889
		 * FIXME: this is a shitty sentinel: we should really
890
		 * have a native EQN_BRACE type or whatnot.
891
		 */
892
326
		for (cur = parent; cur != NULL; cur = cur->parent)
893

202
			if (cur->type == EQN_LIST &&
894
126
			    cur->expectargs > 1 &&
895
126
			    (tok == EQN_TOK_BRACE_CLOSE ||
896
39
			     cur->left != NULL))
897
				break;
898
126
		if (cur == NULL) {
899
			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
900
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
901
			break;
902
		}
903
		parent = cur;
904
126
		if (EQN_TOK_RIGHT == tok) {
905
39
			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
906
				mandoc_msg(MANDOCERR_REQ_EMPTY,
907
				    ep->parse, ep->node->line,
908
				    ep->node->pos, eqn_toks[tok]);
909
				break;
910
			}
911
			/* Handling depends on right/left. */
912

39
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
913
				parent->right = mandoc_strdup("\\[rc]");
914

39
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
915
				parent->right = mandoc_strdup("\\[rf]");
916
			else
917
				parent->right =
918
39
				    mandoc_strndup(ep->start, ep->toksz);
919
39
		}
920
126
		parent = parent->parent;
921

189
		if (tok == EQN_TOK_BRACE_CLOSE &&
922
87
		    (parent->type == EQN_PILE ||
923
63
		     parent->type == EQN_MATRIX))
924
36
			parent = parent->parent;
925
		/* Close out any "singleton" lists. */
926

288
		while (parent->type == EQN_LIST &&
927
51
		    parent->expectargs == 1 &&
928
12
		    parent->args == 1)
929
12
			parent = parent->parent;
930
		break;
931
	case EQN_TOK_BRACE_OPEN:
932
	case EQN_TOK_LEFT:
933
		/*
934
		 * If we already have something in the stack and we're
935
		 * in an expression, then rewind til we're not any more
936
		 * (just like with the text node).
937
		 */
938
254
		while (parent->args == parent->expectargs)
939
1
			parent = parent->parent;
940

165
		if (EQN_TOK_LEFT == tok &&
941
39
		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
942
			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
943
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
944
			break;
945
		}
946
126
		parent = eqn_box_alloc(ep, parent);
947
126
		parent->type = EQN_LIST;
948
126
		if (EQN_TOK_LEFT == tok) {
949

39
			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
950
				parent->left = mandoc_strdup("\\[lc]");
951

39
			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
952
				parent->left = mandoc_strdup("\\[lf]");
953
			else
954
				parent->left =
955
39
				    mandoc_strndup(ep->start, ep->toksz);
956
39
		}
957
		break;
958
	case EQN_TOK_PILE:
959
	case EQN_TOK_LPILE:
960
	case EQN_TOK_RPILE:
961
	case EQN_TOK_CPILE:
962
	case EQN_TOK_CCOL:
963
	case EQN_TOK_LCOL:
964
	case EQN_TOK_RCOL:
965
48
		while (parent->args == parent->expectargs)
966
			parent = parent->parent;
967
24
		parent = eqn_box_alloc(ep, parent);
968
24
		parent->type = EQN_PILE;
969
24
		parent->expectargs = 1;
970
24
		break;
971
	case EQN_TOK_ABOVE:
972
132
		for (cur = parent; cur != NULL; cur = cur->parent)
973
66
			if (cur->type == EQN_PILE)
974
				break;
975
24
		if (cur == NULL) {
976
			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
977
			    ep->node->line, ep->node->pos, eqn_toks[tok]);
978
			break;
979
		}
980
24
		parent = eqn_box_alloc(ep, cur);
981
24
		parent->type = EQN_LIST;
982
24
		break;
983
	case EQN_TOK_MATRIX:
984
36
		while (parent->args == parent->expectargs)
985
			parent = parent->parent;
986
18
		parent = eqn_box_alloc(ep, parent);
987
18
		parent->type = EQN_MATRIX;
988
18
		parent->expectargs = 1;
989
18
		break;
990
	case EQN_TOK_EOF:
991
185
		return;
992
	case EQN_TOK__MAX:
993
	case EQN_TOK_FUNC:
994
	case EQN_TOK_QUOTED:
995
	case EQN_TOK_SYM:
996
1432
		p = ep->start;
997
1432
		assert(p != NULL);
998
		/*
999
		 * If we already have something in the stack and we're
1000
		 * in an expression, then rewind til we're not any more.
1001
		 */
1002
3312
		while (parent->args == parent->expectargs)
1003
224
			parent = parent->parent;
1004
2802
		cur = eqn_box_alloc(ep, parent);
1005
2802
		cur->type = EQN_TEXT;
1006
2802
		cur->text = p;
1007

2802
		switch (tok) {
1008
		case EQN_TOK_FUNC:
1009
193
			cur->font = EQNFONT_ROMAN;
1010
193
			break;
1011
		case EQN_TOK_QUOTED:
1012
270
			if (cur->font == EQNFONT_NONE)
1013
195
				cur->font = EQNFONT_ITALIC;
1014
			break;
1015
		case EQN_TOK_SYM:
1016
			break;
1017
		default:
1018

1741
			if (cur->font != EQNFONT_NONE || *p == '\0')
1019
				break;
1020
834
			cpn = p - 1;
1021
			ccln = CCL_LET;
1022
			split = NULL;
1023
877
			for (;;) {
1024
				/* Advance to next character. */
1025
2011
				cp = cpn++;
1026
				ccl = ccln;
1027
5335
				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1028
2626
				    isdigit((unsigned char)*cpn) ||
1029

1078
				    (*cpn == '.' && (ccl == CCL_DIG ||
1030
				     isdigit((unsigned char)cpn[1]))) ?
1031
				    CCL_DIG : CCL_PUN;
1032
				/* No boundary before first character. */
1033
2011
				if (cp < p)
1034
877
					continue;
1035
1134
				cur->font = ccl == CCL_LET ?
1036
				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1037
1134
				if (*cp == '\\')
1038
42
					mandoc_escape(&cpn, NULL, NULL);
1039
				/* No boundary after last character. */
1040
1134
				if (*cpn == '\0')
1041
					break;
1042

814
				if (ccln == ccl && *cp != ',' && *cpn != ',')
1043
257
					continue;
1044
				/* Boundary found, split the text. */
1045
43
				if (parent->args == parent->expectargs) {
1046
					/* Remove the text from the tree. */
1047
7
					if (cur->prev == NULL)
1048
6
						parent->first = cur->next;
1049
					else
1050
1
						cur->prev->next = NULL;
1051
7
					parent->last = cur->prev;
1052
7
					parent->args--;
1053
					/* Set up a list instead. */
1054
7
					split = eqn_box_alloc(ep, parent);
1055
7
					split->type = EQN_LIST;
1056
					/* Insert the word into the list. */
1057
7
					split->first = split->last = cur;
1058
7
					cur->parent = split;
1059
7
					cur->prev = NULL;
1060
					parent = split;
1061
7
				}
1062
				/* Append a new text box. */
1063
43
				nbox = eqn_box_alloc(ep, parent);
1064
43
				nbox->type = EQN_TEXT;
1065
43
				nbox->text = mandoc_strdup(cpn);
1066
				/* Truncate the old box. */
1067
86
				p = mandoc_strndup(cur->text,
1068
43
				    cpn - cur->text);
1069
43
				free(cur->text);
1070
43
				cur->text = p;
1071
				/* Setup to process the new box. */
1072
				cur = nbox;
1073
43
				p = nbox->text;
1074
43
				cpn = p - 1;
1075
				ccln = CCL_LET;
1076
			}
1077
834
			if (split != NULL)
1078
7
				parent = split->parent;
1079
			break;
1080
		}
1081
		break;
1082
	default:
1083
		abort();
1084
	}
1085
2574
	goto next_tok;
1086
185
}
1087
1088
void
1089
eqn_free(struct eqn_node *p)
1090
{
1091
	int		 i;
1092
1093
438
	for (i = 0; i < (int)p->defsz; i++) {
1094
24
		free(p->defs[i].key);
1095
24
		free(p->defs[i].val);
1096
	}
1097
1098
130
	free(p->data);
1099
130
	free(p->defs);
1100
130
	free(p);
1101
130
}