GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/b.c Lines: 277 516 53.7 %
Date: 2016-12-06 Branches: 174 416 41.8 %

Line Branch Exec Source
1
/*	$OpenBSD: b.c,v 1.18 2014/12/19 19:28:55 deraadt Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
/* lasciate ogne speranza, voi ch'intrate. */
27
28
#define	DEBUG
29
30
#include <ctype.h>
31
#include <stdio.h>
32
#include <string.h>
33
#include <stdlib.h>
34
#include "awk.h"
35
#include "ytab.h"
36
37
#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38
				/* NCHARS is 2**n */
39
#define MAXLIN 22
40
41
#define type(v)		(v)->nobj	/* badly overloaded here */
42
#define info(v)		(v)->ntype	/* badly overloaded here */
43
#define left(v)		(v)->narg[0]
44
#define right(v)	(v)->narg[1]
45
#define parent(v)	(v)->nnext
46
47
#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48
#define ELEAF	case EMPTYRE:		/* empty string in regexp */
49
#define UNARY	case STAR: case PLUS: case QUEST:
50
51
/* encoding in tree Nodes:
52
	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
53
		left is index, right contains value or pointer to value
54
	unary (STAR, PLUS, QUEST): left is child, right is null
55
	binary (CAT, OR): left and right are children
56
	parent contains pointer to parent
57
*/
58
59
60
int	*setvec;
61
int	*tmpset;
62
int	maxsetvec = 0;
63
64
int	rtok;		/* next token in current re */
65
int	rlxval;
66
static uschar	*rlxstr;
67
static uschar	*prestr;	/* current position in current re */
68
static uschar	*lastre;	/* origin of last re */
69
70
static	int setcnt;
71
static	int poscnt;
72
73
char	*patbeg;
74
int	patlen;
75
76
#define	NFA	20	/* cache this many dynamic fa's */
77
fa	*fatab[NFA];
78
int	nfatab	= 0;	/* entries in fatab */
79
80
fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
81
5
{
82
	int i, use, nuse;
83
	fa *pfa;
84
	static int now = 1;
85
86
5
	if (setvec == 0) {	/* first time through any RE */
87
3
		maxsetvec = MAXLIN;
88
3
		setvec = (int *) calloc(maxsetvec, sizeof(int));
89
3
		tmpset = (int *) calloc(maxsetvec, sizeof(int));
90

3
		if (setvec == 0 || tmpset == 0)
91
			overflo("out of space initializing makedfa");
92
	}
93
94
5
	if (compile_time)	/* a constant for sure */
95
5
		return mkdfa(s, anchor);
96
	for (i = 0; i < nfatab; i++)	/* is it there already? */
97
		if (fatab[i]->anchor == anchor
98
		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
99
			fatab[i]->use = now++;
100
			return fatab[i];
101
		}
102
	pfa = mkdfa(s, anchor);
103
	if (nfatab < NFA) {	/* room for another */
104
		fatab[nfatab] = pfa;
105
		fatab[nfatab]->use = now++;
106
		nfatab++;
107
		return pfa;
108
	}
109
	use = fatab[0]->use;	/* replace least-recently used */
110
	nuse = 0;
111
	for (i = 1; i < nfatab; i++)
112
		if (fatab[i]->use < use) {
113
			use = fatab[i]->use;
114
			nuse = i;
115
		}
116
	freefa(fatab[nuse]);
117
	fatab[nuse] = pfa;
118
	pfa->use = now++;
119
	return pfa;
120
}
121
122
fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
123
				/* anchor = 1 for anchored matches, else 0 */
124
5
{
125
	Node *p, *p1;
126
	fa *f;
127
128
5
	p = reparse(s);
129
5
	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130
		/* put ALL STAR in front of reg.  exp. */
131
5
	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132
		/* put FINAL after reg.  exp. */
133
134
5
	poscnt = 0;
135
5
	penter(p1);	/* enter parent pointers and leaf indices */
136
5
	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137
		overflo("out of space for fa");
138
5
	f->accept = poscnt-1;	/* penter has computed number of positions in re */
139
5
	cfoll(f, p1);	/* set up follow sets */
140
5
	freetr(p1);
141
5
	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
142
			overflo("out of space in makedfa");
143
5
	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144
		overflo("out of space in makedfa");
145
5
	*f->posns[1] = 0;
146
5
	f->initstat = makeinit(f, anchor);
147
5
	f->anchor = anchor;
148
5
	f->restr = (uschar *) tostring(s);
149
5
	return f;
150
}
151
152
int makeinit(fa *f, int anchor)
153
5
{
154
	int i, k;
155
156
5
	f->curstat = 2;
157
5
	f->out[2] = 0;
158
5
	f->reset = 0;
159
5
	k = *(f->re[0].lfollow);
160
5
	xfree(f->posns[2]);
161
5
	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
162
		overflo("out of space in makeinit");
163
22
	for (i=0; i <= k; i++) {
164
17
		(f->posns[2])[i] = (f->re[0].lfollow)[i];
165
	}
166
5
	if ((f->posns[2])[1] == f->accept)
167
		f->out[2] = 1;
168
1300
	for (i=0; i < NCHARS; i++)
169
1295
		f->gototab[2][i] = 0;
170
5
	f->curstat = cgoto(f, 2, HAT);
171
5
	if (anchor) {
172
		*f->posns[2] = k-1;	/* leave out position 0 */
173
		for (i=0; i < k; i++) {
174
			(f->posns[0])[i] = (f->posns[2])[i];
175
		}
176
177
		f->out[0] = f->out[2];
178
		if (f->curstat != 2)
179
			--(*f->posns[f->curstat]);
180
	}
181
5
	return f->curstat;
182
}
183
184
void penter(Node *p)	/* set up parent pointers and leaf indices */
185
142
{
186

142
	switch (type(p)) {
187
	ELEAF
188
	LEAF
189
67
		info(p) = poscnt;
190
67
		poscnt++;
191
67
		break;
192
	UNARY
193
13
		penter(left(p));
194
13
		parent(left(p)) = p;
195
13
		break;
196
	case CAT:
197
	case OR:
198
62
		penter(left(p));
199
62
		penter(right(p));
200
62
		parent(left(p)) = p;
201
62
		parent(right(p)) = p;
202
62
		break;
203
	default:	/* can't happen */
204
		FATAL("can't happen: unknown type %d in penter", type(p));
205
		break;
206
	}
207
142
}
208
209
void freetr(Node *p)	/* free parse tree */
210
142
{
211

142
	switch (type(p)) {
212
	ELEAF
213
	LEAF
214
67
		xfree(p);
215
		break;
216
	UNARY
217
13
		freetr(left(p));
218
13
		xfree(p);
219
		break;
220
	case CAT:
221
	case OR:
222
62
		freetr(left(p));
223
62
		freetr(right(p));
224
62
		xfree(p);
225
		break;
226
	default:	/* can't happen */
227
		FATAL("can't happen: unknown type %d in freetr", type(p));
228
		break;
229
	}
230
142
}
231
232
/* in the parsing of regular expressions, metacharacters like . have */
233
/* to be seen literally;  \056 is not a metacharacter. */
234
235
int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
236
{			/* only pick up one 8-bit byte (2 chars) */
237
	uschar *p;
238
	int n = 0;
239
	int i;
240
241
	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
242
		if (isdigit(*p))
243
			n = 16 * n + *p - '0';
244
		else if (*p >= 'a' && *p <= 'f')
245
			n = 16 * n + *p - 'a' + 10;
246
		else if (*p >= 'A' && *p <= 'F')
247
			n = 16 * n + *p - 'A' + 10;
248
	}
249
	*pp = (uschar *) p;
250
	return n;
251
}
252
253
#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
254
255
int quoted(uschar **pp)	/* pick up next thing after a \\ */
256
			/* and increment *pp */
257
4
{
258
4
	uschar *p = *pp;
259
	int c;
260
261
4
	if ((c = *p++) == 't')
262
4
		c = '\t';
263
	else if (c == 'n')
264
		c = '\n';
265
	else if (c == 'f')
266
		c = '\f';
267
	else if (c == 'r')
268
		c = '\r';
269
	else if (c == 'b')
270
		c = '\b';
271
	else if (c == '\\')
272
		c = '\\';
273
	else if (c == 'x') {	/* hexadecimal goo follows */
274
		c = hexstr(&p);	/* this adds a null if number is invalid */
275
	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
276
		int n = c - '0';
277
		if (isoctdigit(*p)) {
278
			n = 8 * n + *p++ - '0';
279
			if (isoctdigit(*p))
280
				n = 8 * n + *p++ - '0';
281
		}
282
		c = n;
283
	} /* else */
284
		/* c = c; */
285
4
	*pp = p;
286
4
	return c;
287
}
288
289
char *cclenter(const char *argp)	/* add a character class */
290
6
{
291
	int i, c, c2;
292
6
	uschar *p = (uschar *) argp;
293
	uschar *op, *bp;
294
	static uschar *buf = 0;
295
	static int bufsz = 100;
296
297
6
	op = p;
298

6
	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
299
		FATAL("out of space for character class [%.10s...] 1", p);
300
6
	bp = buf;
301
137
	for (i = 0; (c = *p++) != 0; ) {
302
125
		if (c == '\\') {
303
4
			c = quoted(&p);
304

121
		} else if (c == '-' && i > 0 && bp[-1] != 0) {
305
			if (*p != 0) {
306
				c = bp[-1];
307
				c2 = *p++;
308
				if (c2 == '\\')
309
					c2 = quoted(&p);
310
				if (c > c2) {	/* empty; ignore */
311
					bp--;
312
					i--;
313
					continue;
314
				}
315
				while (c < c2) {
316
					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
317
						FATAL("out of space for character class [%.10s...] 2", p);
318
					*bp++ = ++c;
319
					i++;
320
				}
321
				continue;
322
			}
323
		}
324
125
		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
325
			FATAL("out of space for character class [%.10s...] 3", p);
326
125
		*bp++ = c;
327
125
		i++;
328
	}
329
6
	*bp = 0;
330
6
	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
331
6
	xfree(op);
332
6
	return (char *) tostring((char *) buf);
333
}
334
335
void overflo(const char *s)
336
{
337
	FATAL("regular expression too big: %.30s...", s);
338
}
339
340
void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
341
142
{
342
	int i;
343
	int *p;
344
345

142
	switch (type(v)) {
346
	ELEAF
347
	LEAF
348
67
		f->re[info(v)].ltype = type(v);
349
67
		f->re[info(v)].lval.np = right(v);
350
134
		while (f->accept >= maxsetvec) {	/* guessing here! */
351
			setvec = reallocarray(setvec, maxsetvec,
352
			    4 * sizeof(int));
353
			tmpset = reallocarray(tmpset, maxsetvec,
354
			    4 * sizeof(int));
355
			if (setvec == 0 || tmpset == 0)
356
				overflo("out of space in cfoll()");
357
			maxsetvec *= 4;
358
		}
359
1064
		for (i = 0; i <= f->accept; i++)
360
997
			setvec[i] = 0;
361
67
		setcnt = 0;
362
67
		follow(v);	/* computes setvec and setcnt */
363
67
		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364
			overflo("out of space building follow set");
365
67
		f->re[info(v)].lfollow = p;
366
67
		*p = setcnt;
367
1064
		for (i = f->accept; i >= 0; i--)
368
997
			if (setvec[i] == 1)
369
86
				*++p = i;
370
		break;
371
	UNARY
372
13
		cfoll(f,left(v));
373
13
		break;
374
	case CAT:
375
	case OR:
376
62
		cfoll(f,left(v));
377
62
		cfoll(f,right(v));
378
62
		break;
379
	default:	/* can't happen */
380
		FATAL("can't happen: unknown type %d in cfoll", type(v));
381
	}
382
142
}
383
384
int first(Node *p)	/* collects initially active leaves of p into setvec */
385
			/* returns 0 if p matches empty string */
386
158
{
387
	int b, lp;
388
389

158
	switch (type(p)) {
390
	ELEAF
391
	LEAF
392
86
		lp = info(p);	/* look for high-water mark of subscripts */
393

172
		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
394
			setvec = reallocarray(setvec, maxsetvec,
395
			    4 * sizeof(int));
396
			tmpset = reallocarray(tmpset, maxsetvec,
397
			    4 * sizeof(int));
398
			if (setvec == 0 || tmpset == 0)
399
				overflo("out of space in first()");
400
			maxsetvec *= 4;
401
		}
402
86
		if (type(p) == EMPTYRE) {
403
			setvec[lp] = 0;
404
			return(0);
405
		}
406
86
		if (setvec[lp] != 1) {
407
86
			setvec[lp] = 1;
408
86
			setcnt++;
409
		}
410

86
		if (type(p) == CCL && (*(char *) right(p)) == '\0')
411
			return(0);		/* empty CCL */
412
86
		else return(1);
413
	case PLUS:
414
2
		if (first(left(p)) == 0) return(0);
415
2
		return(1);
416
	case STAR:
417
	case QUEST:
418
7
		first(left(p));
419
7
		return(0);
420
	case CAT:
421

59
		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
422
59
		return(1);
423
	case OR:
424
4
		b = first(right(p));
425

4
		if (first(left(p)) == 0 || b == 0) return(0);
426
4
		return(1);
427
	}
428
	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
429
	return(-1);
430
}
431
432
void follow(Node *v)	/* collects leaves that can follow v into setvec */
433
149
{
434
	Node *p;
435
436
149
	if (type(v) == FINAL)
437
5
		return;
438
144
	p = parent(v);
439

144
	switch (type(p)) {
440
	case STAR:
441
	case PLUS:
442
13
		first(v);
443
13
		follow(p);
444
13
		return;
445
446
	case OR:
447
	case QUEST:
448
4
		follow(p);
449
4
		return;
450
451
	case CAT:
452
127
		if (v == left(p)) {	/* v is left child of p */
453
67
			if (first(right(p)) == 0) {
454
5
				follow(p);
455
5
				return;
456
			}
457
		} else		/* v is right child */
458
60
			follow(p);
459
		return;
460
	}
461
}
462
463
int member(int c, const char *sarg)	/* is c in s? */
464
194
{
465
194
	uschar *s = (uschar *) sarg;
466
467
3416
	while (*s)
468
3117
		if (c == *s++)
469
89
			return(1);
470
105
	return(0);
471
}
472
473
int match(fa *f, const char *p0)	/* shortest match ? */
474
1955
{
475
	int s, ns;
476
1955
	uschar *p = (uschar *) p0;
477
478
1955
	s = f->reset ? makeinit(f,0) : f->initstat;
479
1955
	if (f->out[s])
480
		return(1);
481
	do {
482
		/* assert(*p < NCHARS); */
483
59594
		if ((ns = f->gototab[s][*p]) != 0)
484
59155
			s = ns;
485
		else
486
439
			s = cgoto(f, s, *p);
487
59594
		if (f->out[s])
488
229
			return(1);
489
59365
	} while (*p++ != 0);
490
1726
	return(0);
491
}
492
493
int pmatch(fa *f, const char *p0)	/* longest match, for sub */
494
{
495
	int s, ns;
496
	uschar *p = (uschar *) p0;
497
	uschar *q;
498
	int i, k;
499
500
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
501
	if (f->reset) {
502
		f->initstat = s = makeinit(f,1);
503
	} else {
504
		s = f->initstat;
505
	}
506
	patbeg = (char *) p;
507
	patlen = -1;
508
	do {
509
		q = p;
510
		do {
511
			if (f->out[s])		/* final state */
512
				patlen = q-p;
513
			/* assert(*q < NCHARS); */
514
			if ((ns = f->gototab[s][*q]) != 0)
515
				s = ns;
516
			else
517
				s = cgoto(f, s, *q);
518
			if (s == 1) {	/* no transition */
519
				if (patlen >= 0) {
520
					patbeg = (char *) p;
521
					return(1);
522
				}
523
				else
524
					goto nextin;	/* no match */
525
			}
526
		} while (*q++ != 0);
527
		if (f->out[s])
528
			patlen = q-p-1;	/* don't count $ */
529
		if (patlen >= 0) {
530
			patbeg = (char *) p;
531
			return(1);
532
		}
533
	nextin:
534
		s = 2;
535
		if (f->reset) {
536
			for (i = 2; i <= f->curstat; i++)
537
				xfree(f->posns[i]);
538
			k = *f->posns[0];
539
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
540
				overflo("out of space in pmatch");
541
			for (i = 0; i <= k; i++)
542
				(f->posns[2])[i] = (f->posns[0])[i];
543
			f->initstat = f->curstat = 2;
544
			f->out[2] = f->out[0];
545
			for (i = 0; i < NCHARS; i++)
546
				f->gototab[2][i] = 0;
547
		}
548
	} while (*p++ != 0);
549
	return (0);
550
}
551
552
int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
553
{
554
	int s, ns;
555
	uschar *p = (uschar *) p0;
556
	uschar *q;
557
	int i, k;
558
559
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
560
	if (f->reset) {
561
		f->initstat = s = makeinit(f,1);
562
	} else {
563
		s = f->initstat;
564
	}
565
	patlen = -1;
566
	while (*p) {
567
		q = p;
568
		do {
569
			if (f->out[s])		/* final state */
570
				patlen = q-p;
571
			/* assert(*q < NCHARS); */
572
			if ((ns = f->gototab[s][*q]) != 0)
573
				s = ns;
574
			else
575
				s = cgoto(f, s, *q);
576
			if (s == 1) {	/* no transition */
577
				if (patlen > 0) {
578
					patbeg = (char *) p;
579
					return(1);
580
				} else
581
					goto nnextin;	/* no nonempty match */
582
			}
583
		} while (*q++ != 0);
584
		if (f->out[s])
585
			patlen = q-p-1;	/* don't count $ */
586
		if (patlen > 0 ) {
587
			patbeg = (char *) p;
588
			return(1);
589
		}
590
	nnextin:
591
		s = 2;
592
		if (f->reset) {
593
			for (i = 2; i <= f->curstat; i++)
594
				xfree(f->posns[i]);
595
			k = *f->posns[0];
596
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
597
				overflo("out of state space");
598
			for (i = 0; i <= k; i++)
599
				(f->posns[2])[i] = (f->posns[0])[i];
600
			f->initstat = f->curstat = 2;
601
			f->out[2] = f->out[0];
602
			for (i = 0; i < NCHARS; i++)
603
				f->gototab[2][i] = 0;
604
		}
605
		p++;
606
	}
607
	return (0);
608
}
609
610
Node *reparse(const char *p)	/* parses regular expression pointed to by p */
611
5
{			/* uses relex() to scan regular expression */
612
	Node *np;
613
614
5
	dprintf( ("reparse <%s>\n", p) );
615
5
	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
616
5
	rtok = relex();
617
	/* GNU compatibility: an empty regexp matches anything */
618
5
	if (rtok == '\0') {
619
		/* FATAL("empty regular expression"); previous */
620
		return(op2(EMPTYRE, NIL, NIL));
621
	}
622
5
	np = regexp();
623
5
	if (rtok != '\0')
624
		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
625
5
	return(np);
626
}
627
628
Node *regexp(void)	/* top-level parse of reg expr */
629
7
{
630
7
	return (alt(concat(primary())));
631
}
632
633
Node *primary(void)
634
59
{
635
	Node *np;
636
637


59
	switch (rtok) {
638
	case CHAR:
639
45
		np = op2(CHAR, NIL, itonp(rlxval));
640
45
		rtok = relex();
641
45
		return (unary(np));
642
	case ALL:
643
		rtok = relex();
644
		return (unary(op2(ALL, NIL, NIL)));
645
	case EMPTYRE:
646
		rtok = relex();
647
		return (unary(op2(ALL, NIL, NIL)));
648
	case DOT:
649
4
		rtok = relex();
650
4
		return (unary(op2(DOT, NIL, NIL)));
651
	case CCL:
652
6
		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
653
6
		rtok = relex();
654
6
		return (unary(np));
655
	case NCCL:
656
		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
657
		rtok = relex();
658
		return (unary(np));
659
	case '^':
660
1
		rtok = relex();
661
1
		return (unary(op2(CHAR, NIL, itonp(HAT))));
662
	case '$':
663
1
		rtok = relex();
664
1
		return (unary(op2(CHAR, NIL, NIL)));
665
	case '(':
666
2
		rtok = relex();
667
2
		if (rtok == ')') {	/* special pleading for () */
668
			rtok = relex();
669
			return unary(op2(CCL, NIL, (Node *) tostring("")));
670
		}
671
2
		np = regexp();
672
2
		if (rtok == ')') {
673
2
			rtok = relex();
674
2
			return (unary(np));
675
		}
676
		else
677
			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
678
	default:
679
		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
680
	}
681
	return 0;	/*NOTREACHED*/
682
}
683
684
Node *concat(Node *np)
685
59
{
686
59
	switch (rtok) {
687
	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
688
50
		return (concat(op2(CAT, np, primary())));
689
	}
690
9
	return (np);
691
}
692
693
Node *alt(Node *np)
694
9
{
695
9
	if (rtok == OR) {
696
2
		rtok = relex();
697
2
		return (alt(op2(OR, np, concat(primary()))));
698
	}
699
7
	return (np);
700
}
701
702
Node *unary(Node *np)
703
67
{
704

67
	switch (rtok) {
705
	case STAR:
706
7
		rtok = relex();
707
7
		return (unary(op2(STAR, np, NIL)));
708
	case PLUS:
709
1
		rtok = relex();
710
1
		return (unary(op2(PLUS, np, NIL)));
711
	case QUEST:
712
		rtok = relex();
713
		return (unary(op2(QUEST, np, NIL)));
714
	default:
715
59
		return (np);
716
	}
717
}
718
719
/*
720
 * Character class definitions conformant to the POSIX locale as
721
 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
722
 * and operating character sets are both ASCII (ISO646) or supersets
723
 * thereof.
724
 *
725
 * Note that to avoid overflowing the temporary buffer used in
726
 * relex(), the expanded character class (prior to range expansion)
727
 * must be less than twice the size of their full name.
728
 */
729
730
/* Because isblank doesn't show up in any of the header files on any
731
 * system i use, it's defined here.  if some other locale has a richer
732
 * definition of "blank", define HAS_ISBLANK and provide your own
733
 * version.
734
 * the parentheses here are an attempt to find a path through the maze
735
 * of macro definition and/or function and/or version provided.  thanks
736
 * to nelson beebe for the suggestion; let's see if it works everywhere.
737
 */
738
739
#ifndef HAS_ISBLANK
740
741
int (xisblank)(int c)
742
{
743
	return c==' ' || c=='\t';
744
}
745
746
#endif
747
748
struct charclass {
749
	const char *cc_name;
750
	int cc_namelen;
751
	int (*cc_func)(int);
752
} charclasses[] = {
753
	{ "alnum",	5,	isalnum },
754
	{ "alpha",	5,	isalpha },
755
#ifndef HAS_ISBLANK
756
	{ "blank",	5,	isspace }, /* was isblank */
757
#else
758
	{ "blank",	5,	isblank },
759
#endif
760
	{ "cntrl",	5,	iscntrl },
761
	{ "digit",	5,	isdigit },
762
	{ "graph",	5,	isgraph },
763
	{ "lower",	5,	islower },
764
	{ "print",	5,	isprint },
765
	{ "punct",	5,	ispunct },
766
	{ "space",	5,	isspace },
767
	{ "upper",	5,	isupper },
768
	{ "xdigit",	6,	isxdigit },
769
	{ NULL,		0,	NULL },
770
};
771
772
773
int relex(void)		/* lexical analyzer for reparse */
774
76
{
775
	int c, n;
776
	int cflag;
777
	static uschar *buf = 0;
778
	static int bufsz = 100;
779
	uschar *bp;
780
	struct charclass *cc;
781
	int i;
782
783


76
	switch (c = *prestr++) {
784
2
	case '|': return OR;
785
7
	case '*': return STAR;
786
1
	case '+': return PLUS;
787
	case '?': return QUEST;
788
4
	case '.': return DOT;
789
5
	case '\0': prestr--; return '\0';
790
	case '^':
791
	case '$':
792
	case '(':
793
	case ')':
794
6
		return c;
795
	case '\\':
796
		rlxval = quoted(&prestr);
797
		return CHAR;
798
	default:
799
45
		rlxval = c;
800
45
		return CHAR;
801
	case '[':
802

6
		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
803
			FATAL("out of space in reg expr %.10s..", lastre);
804
6
		bp = buf;
805
6
		if (*prestr == '^') {
806
			cflag = 1;
807
			prestr++;
808
		}
809
		else
810
6
			cflag = 0;
811
6
		n = 2 * strlen((const char *) prestr)+1;
812
6
		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
813
			FATAL("out of space for reg expr %.10s...", lastre);
814
		for (; ; ) {
815
131
			if ((c = *prestr++) == '\\') {
816
4
				*bp++ = '\\';
817
4
				if ((c = *prestr++) == '\0')
818
					FATAL("nonterminated character class %.20s...", lastre);
819
4
				*bp++ = c;
820
			/* } else if (c == '\n') { */
821
			/* 	FATAL("newline in character class %.20s...", lastre); */
822

127
			} else if (c == '[' && *prestr == ':') {
823
				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
824
				for (cc = charclasses; cc->cc_name; cc++)
825
					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
826
						break;
827
				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
828
				    prestr[2 + cc->cc_namelen] == ']') {
829
					prestr += cc->cc_namelen + 3;
830
					for (i = 0; i < NCHARS; i++) {
831
						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
832
						    FATAL("out of space for reg expr %.10s...", lastre);
833
						if (cc->cc_func(i)) {
834
							*bp++ = i;
835
							n++;
836
						}
837
					}
838
				} else
839
					*bp++ = c;
840
127
			} else if (c == '\0') {
841
				FATAL("nonterminated character class %.20s", lastre);
842
127
			} else if (bp == buf) {	/* 1st char is special */
843
2
				*bp++ = c;
844
125
			} else if (c == ']') {
845
6
				*bp++ = 0;
846
6
				rlxstr = (uschar *) tostring((char *) buf);
847
6
				if (cflag == 0)
848
6
					return CCL;
849
				else
850
					return NCCL;
851
			} else
852
119
				*bp++ = c;
853
		}
854
	}
855
}
856
857
int cgoto(fa *f, int s, int c)
858
444
{
859
	int i, j, k;
860
	int *p, *q;
861
862
	assert(c == HAT || c < NCHARS);
863
888
	while (f->accept >= maxsetvec) {	/* guessing here! */
864
		setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
865
		tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
866
		if (setvec == 0 || tmpset == 0)
867
			overflo("out of space in cgoto()");
868
		maxsetvec *= 4;
869
	}
870
7850
	for (i = 0; i <= f->accept; i++)
871
7406
		setvec[i] = 0;
872
444
	setcnt = 0;
873
	/* compute positions of gototab[s,c] into setvec */
874
444
	p = f->posns[s];
875
1696
	for (i = 1; i <= *p; i++) {
876
1252
		if ((k = f->re[p[i]].ltype) != FINAL) {
877





1252
			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
878
			 || (k == DOT && c != 0 && c != HAT)
879
			 || (k == ALL && c != 0)
880
			 || (k == EMPTYRE && c != 0)
881
			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
882
			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
883
559
				q = f->re[p[i]].lfollow;
884
1728
				for (j = 1; j <= *q; j++) {
885
1169
					if (q[j] >= maxsetvec) {
886
						setvec = reallocarray(setvec,
887
						    maxsetvec, 4 * sizeof(int));
888
						tmpset = reallocarray(tmpset,
889
						    maxsetvec, 4 * sizeof(int));
890
						if (setvec == 0 || tmpset == 0)
891
							overflo("cgoto overflow");
892
						maxsetvec *= 4;
893
					}
894
1169
					if (setvec[q[j]] == 0) {
895
1169
						setcnt++;
896
1169
						setvec[q[j]] = 1;
897
					}
898
				}
899
			}
900
		}
901
	}
902
	/* determine if setvec is a previous state */
903
444
	tmpset[0] = setcnt;
904
444
	j = 1;
905
7850
	for (i = f->accept; i >= 0; i--)
906
7406
		if (setvec[i]) {
907
1169
			tmpset[j++] = i;
908
		}
909
	/* tmpset == previous state? */
910
1870
	for (i = 1; i <= f->curstat; i++) {
911
1837
		p = f->posns[i];
912
1837
		if ((k = tmpset[0]) != p[0])
913
1220
			goto different;
914
1680
		for (j = 1; j <= k; j++)
915
1269
			if (tmpset[j] != p[j])
916
206
				goto different;
917
		/* setvec is state i */
918
411
		f->gototab[s][c] = i;
919
411
		return i;
920
1426
	  different:;
921
	}
922
923
	/* add tmpset to current set of states */
924
33
	if (f->curstat >= NSTATES-1) {
925
		f->curstat = 2;
926
		f->reset = 1;
927
		for (i = 2; i < NSTATES; i++)
928
			xfree(f->posns[i]);
929
	} else
930
33
		++(f->curstat);
931
8580
	for (i = 0; i < NCHARS; i++)
932
8547
		f->gototab[f->curstat][i] = 0;
933
33
	xfree(f->posns[f->curstat]);
934
33
	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
935
		overflo("out of space in cgoto");
936
937
33
	f->posns[f->curstat] = p;
938
33
	f->gototab[s][c] = f->curstat;
939
172
	for (i = 0; i <= setcnt; i++)
940
139
		p[i] = tmpset[i];
941
33
	if (setvec[f->accept])
942
2
		f->out[f->curstat] = 1;
943
	else
944
31
		f->out[f->curstat] = 0;
945
33
	return f->curstat;
946
}
947
948
949
void freefa(fa *f)	/* free a finite automaton */
950
{
951
	int i;
952
953
	if (f == NULL)
954
		return;
955
	for (i = 0; i <= f->curstat; i++)
956
		xfree(f->posns[i]);
957
	for (i = 0; i <= f->accept; i++) {
958
		xfree(f->re[i].lfollow);
959
		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
960
			xfree((f->re[i].lval.np));
961
	}
962
	xfree(f->restr);
963
	xfree(f);
964
}