GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/b.c Lines: 275 507 54.2 %
Date: 2017-11-13 Branches: 165 454 36.3 %

Line Branch Exec Source
1
/*	$OpenBSD: b.c,v 1.19 2017/10/09 14:51:31 deraadt Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
/* lasciate ogne speranza, voi ch'intrate. */
27
28
#define	DEBUG
29
30
#include <ctype.h>
31
#include <stdio.h>
32
#include <string.h>
33
#include <stdlib.h>
34
#include "awk.h"
35
#include "ytab.h"
36
37
#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38
				/* NCHARS is 2**n */
39
#define MAXLIN 22
40
41
#define type(v)		(v)->nobj	/* badly overloaded here */
42
#define info(v)		(v)->ntype	/* badly overloaded here */
43
#define left(v)		(v)->narg[0]
44
#define right(v)	(v)->narg[1]
45
#define parent(v)	(v)->nnext
46
47
#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48
#define ELEAF	case EMPTYRE:		/* empty string in regexp */
49
#define UNARY	case STAR: case PLUS: case QUEST:
50
51
/* encoding in tree Nodes:
52
	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
53
		left is index, right contains value or pointer to value
54
	unary (STAR, PLUS, QUEST): left is child, right is null
55
	binary (CAT, OR): left and right are children
56
	parent contains pointer to parent
57
*/
58
59
60
int	*setvec;
61
int	*tmpset;
62
int	maxsetvec = 0;
63
64
int	rtok;		/* next token in current re */
65
int	rlxval;
66
static uschar	*rlxstr;
67
static uschar	*prestr;	/* current position in current re */
68
static uschar	*lastre;	/* origin of last re */
69
70
static	int setcnt;
71
static	int poscnt;
72
73
char	*patbeg;
74
int	patlen;
75
76
#define	NFA	20	/* cache this many dynamic fa's */
77
fa	*fatab[NFA];
78
int	nfatab	= 0;	/* entries in fatab */
79
80
fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
81
{
82
	int i, use, nuse;
83
	fa *pfa;
84
	static int now = 1;
85
86
172
	if (setvec == 0) {	/* first time through any RE */
87
62
		maxsetvec = MAXLIN;
88
62
		setvec = (int *) calloc(maxsetvec, sizeof(int));
89
62
		tmpset = (int *) calloc(maxsetvec, sizeof(int));
90
62
		if (setvec == 0 || tmpset == 0)
91
			overflo("out of space initializing makedfa");
92
	}
93
94
86
	if (compile_time)	/* a constant for sure */
95
86
		return mkdfa(s, anchor);
96
	for (i = 0; i < nfatab; i++)	/* is it there already? */
97
		if (fatab[i]->anchor == anchor
98
		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
99
			fatab[i]->use = now++;
100
			return fatab[i];
101
		}
102
	pfa = mkdfa(s, anchor);
103
	if (nfatab < NFA) {	/* room for another */
104
		fatab[nfatab] = pfa;
105
		fatab[nfatab]->use = now++;
106
		nfatab++;
107
		return pfa;
108
	}
109
	use = fatab[0]->use;	/* replace least-recently used */
110
	nuse = 0;
111
	for (i = 1; i < nfatab; i++)
112
		if (fatab[i]->use < use) {
113
			use = fatab[i]->use;
114
			nuse = i;
115
		}
116
	freefa(fatab[nuse]);
117
	fatab[nuse] = pfa;
118
	pfa->use = now++;
119
	return pfa;
120
86
}
121
122
fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
123
				/* anchor = 1 for anchored matches, else 0 */
124
{
125
	Node *p, *p1;
126
	fa *f;
127
128
172
	p = reparse(s);
129
86
	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130
		/* put ALL STAR in front of reg.  exp. */
131
86
	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132
		/* put FINAL after reg.  exp. */
133
134
86
	poscnt = 0;
135
86
	penter(p1);	/* enter parent pointers and leaf indices */
136
86
	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137
		overflo("out of space for fa");
138
86
	f->accept = poscnt-1;	/* penter has computed number of positions in re */
139
86
	cfoll(f, p1);	/* set up follow sets */
140
86
	freetr(p1);
141
86
	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
142
			overflo("out of space in makedfa");
143
86
	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144
		overflo("out of space in makedfa");
145
86
	*f->posns[1] = 0;
146
86
	f->initstat = makeinit(f, anchor);
147
86
	f->anchor = anchor;
148
86
	f->restr = (uschar *) tostring(s);
149
86
	return f;
150
}
151
152
int makeinit(fa *f, int anchor)
153
{
154
	int i, k;
155
156
172
	f->curstat = 2;
157
86
	f->out[2] = 0;
158
86
	f->reset = 0;
159
86
	k = *(f->re[0].lfollow);
160
86
	xfree(f->posns[2]);
161
86
	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
162
		overflo("out of space in makeinit");
163
688
	for (i=0; i <= k; i++) {
164
258
		(f->posns[2])[i] = (f->re[0].lfollow)[i];
165
	}
166
86
	if ((f->posns[2])[1] == f->accept)
167
		f->out[2] = 1;
168
44720
	for (i=0; i < NCHARS; i++)
169
22274
		f->gototab[2][i] = 0;
170
86
	f->curstat = cgoto(f, 2, HAT);
171
86
	if (anchor) {
172
6
		*f->posns[2] = k-1;	/* leave out position 0 */
173
36
		for (i=0; i < k; i++) {
174
12
			(f->posns[0])[i] = (f->posns[2])[i];
175
		}
176
177
6
		f->out[0] = f->out[2];
178
6
		if (f->curstat != 2)
179
6
			--(*f->posns[f->curstat]);
180
	}
181
86
	return f->curstat;
182
}
183
184
void penter(Node *p)	/* set up parent pointers and leaf indices */
185
{
186



2872
	switch (type(p)) {
187
	ELEAF
188
	LEAF
189
718
		info(p) = poscnt;
190
718
		poscnt++;
191
718
		break;
192
	UNARY
193
86
		penter(left(p));
194
86
		parent(left(p)) = p;
195
86
		break;
196
	case CAT:
197
	case OR:
198
632
		penter(left(p));
199
632
		penter(right(p));
200
632
		parent(left(p)) = p;
201
632
		parent(right(p)) = p;
202
632
		break;
203
	default:	/* can't happen */
204
		FATAL("can't happen: unknown type %d in penter", type(p));
205
		break;
206
	}
207
1436
}
208
209
void freetr(Node *p)	/* free parse tree */
210
{
211



2872
	switch (type(p)) {
212
	ELEAF
213
	LEAF
214
1436
		xfree(p);
215
		break;
216
	UNARY
217
86
		freetr(left(p));
218
172
		xfree(p);
219
		break;
220
	case CAT:
221
	case OR:
222
632
		freetr(left(p));
223
632
		freetr(right(p));
224
1264
		xfree(p);
225
		break;
226
	default:	/* can't happen */
227
		FATAL("can't happen: unknown type %d in freetr", type(p));
228
		break;
229
	}
230
1436
}
231
232
/* in the parsing of regular expressions, metacharacters like . have */
233
/* to be seen literally;  \056 is not a metacharacter. */
234
235
int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
236
{			/* only pick up one 8-bit byte (2 chars) */
237
	uschar *p;
238
	int n = 0;
239
	int i;
240
241
	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
242
		if (isdigit(*p))
243
			n = 16 * n + *p - '0';
244
		else if (*p >= 'a' && *p <= 'f')
245
			n = 16 * n + *p - 'a' + 10;
246
		else if (*p >= 'A' && *p <= 'F')
247
			n = 16 * n + *p - 'A' + 10;
248
	}
249
	*pp = (uschar *) p;
250
	return n;
251
}
252
253
#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
254
255
int quoted(uschar **pp)	/* pick up next thing after a \\ */
256
			/* and increment *pp */
257
{
258
36
	uschar *p = *pp;
259
	int c;
260
261
18
	if ((c = *p++) == 't')
262
		c = '\t';
263
18
	else if (c == 'n')
264
		c = '\n';
265
18
	else if (c == 'f')
266
		c = '\f';
267
18
	else if (c == 'r')
268
		c = '\r';
269
18
	else if (c == 'b')
270
		c = '\b';
271
18
	else if (c == '\\')
272
		c = '\\';
273
18
	else if (c == 'x') {	/* hexadecimal goo follows */
274
		c = hexstr(&p);	/* this adds a null if number is invalid */
275
18
	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
276
		int n = c - '0';
277
		if (isoctdigit(*p)) {
278
			n = 8 * n + *p++ - '0';
279
			if (isoctdigit(*p))
280
				n = 8 * n + *p++ - '0';
281
		}
282
		c = n;
283
	} /* else */
284
		/* c = c; */
285
18
	*pp = p;
286
18
	return c;
287
18
}
288
289
char *cclenter(const char *argp)	/* add a character class */
290
{
291
	int i, c, c2;
292
12
	uschar *p = (uschar *) argp;
293
6
	uschar *op, *bp;
294
	static uschar *buf = 0;
295
	static int bufsz = 100;
296
297
6
	op = p;
298

12
	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
299
		FATAL("out of space for character class [%.10s...] 1", p);
300
6
	bp = buf;
301
24
	for (i = 0; (c = *p++) != 0; ) {
302
6
		if (c == '\\') {
303
			c = quoted(&p);
304

6
		} else if (c == '-' && i > 0 && bp[-1] != 0) {
305
			if (*p != 0) {
306
				c = bp[-1];
307
				c2 = *p++;
308
				if (c2 == '\\')
309
					c2 = quoted(&p);
310
				if (c > c2) {	/* empty; ignore */
311
					bp--;
312
					i--;
313
					continue;
314
				}
315
				while (c < c2) {
316
					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
317
						FATAL("out of space for character class [%.10s...] 2", p);
318
					*bp++ = ++c;
319
					i++;
320
				}
321
				continue;
322
			}
323
		}
324
6
		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
325
			FATAL("out of space for character class [%.10s...] 3", p);
326
6
		*bp++ = c;
327
6
		i++;
328
	}
329
6
	*bp = 0;
330
6
	DPRINTF( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
331
12
	xfree(op);
332
12
	return (char *) tostring((char *) buf);
333
6
}
334
335
void overflo(const char *s)
336
{
337
	FATAL("regular expression too big: %.30s...", s);
338
}
339
340
void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
341
{
342
	int i;
343
	int *p;
344
345



2872
	switch (type(v)) {
346
	ELEAF
347
	LEAF
348
718
		f->re[info(v)].ltype = type(v);
349
718
		f->re[info(v)].lval.np = right(v);
350
1436
		while (f->accept >= maxsetvec) {	/* guessing here! */
351
			setvec = reallocarray(setvec, maxsetvec,
352
			    4 * sizeof(int));
353
			tmpset = reallocarray(tmpset, maxsetvec,
354
			    4 * sizeof(int));
355
			if (setvec == 0 || tmpset == 0)
356
				overflo("out of space in cfoll()");
357
			maxsetvec *= 4;
358
		}
359
15620
		for (i = 0; i <= f->accept; i++)
360
7092
			setvec[i] = 0;
361
718
		setcnt = 0;
362
718
		follow(v);	/* computes setvec and setcnt */
363
718
		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364
			overflo("out of space building follow set");
365
718
		f->re[info(v)].lfollow = p;
366
718
		*p = setcnt;
367
15620
		for (i = f->accept; i >= 0; i--)
368
7092
			if (setvec[i] == 1)
369
718
				*++p = i;
370
		break;
371
	UNARY
372
86
		cfoll(f,left(v));
373
86
		break;
374
	case CAT:
375
	case OR:
376
632
		cfoll(f,left(v));
377
632
		cfoll(f,right(v));
378
632
		break;
379
	default:	/* can't happen */
380
		FATAL("can't happen: unknown type %d in cfoll", type(v));
381
	}
382
1436
}
383
384
int first(Node *p)	/* collects initially active leaves of p into setvec */
385
			/* returns 0 if p matches empty string */
386
{
387
	int b, lp;
388
389



2356
	switch (type(p)) {
390
	ELEAF
391
	LEAF
392
718
		lp = info(p);	/* look for high-water mark of subscripts */
393

2154
		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
394
			setvec = reallocarray(setvec, maxsetvec,
395
			    4 * sizeof(int));
396
			tmpset = reallocarray(tmpset, maxsetvec,
397
			    4 * sizeof(int));
398
			if (setvec == 0 || tmpset == 0)
399
				overflo("out of space in first()");
400
			maxsetvec *= 4;
401
		}
402
718
		if (type(p) == EMPTYRE) {
403
			setvec[lp] = 0;
404
			return(0);
405
		}
406
718
		if (setvec[lp] != 1) {
407
718
			setvec[lp] = 1;
408
718
			setcnt++;
409
718
		}
410

718
		if (type(p) == CCL && (*(char *) right(p)) == '\0')
411
			return(0);		/* empty CCL */
412
718
		else return(1);
413
	case PLUS:
414
		if (first(left(p)) == 0) return(0);
415
		return(1);
416
	case STAR:
417
	case QUEST:
418
		first(left(p));
419
		return(0);
420
	case CAT:
421

460
		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
422
460
		return(1);
423
	case OR:
424
		b = first(right(p));
425
		if (first(left(p)) == 0 || b == 0) return(0);
426
		return(1);
427
	}
428
	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
429
	return(-1);
430
1178
}
431
432
void follow(Node *v)	/* collects leaves that can follow v into setvec */
433
{
434
	Node *p;
435
436
2700
	if (type(v) == FINAL)
437
86
		return;
438
1264
	p = parent(v);
439

1264
	switch (type(p)) {
440
	case STAR:
441
	case PLUS:
442
86
		first(v);
443
86
		follow(p);
444
86
		return;
445
446
	case OR:
447
	case QUEST:
448
		follow(p);
449
		return;
450
451
	case CAT:
452
1178
		if (v == left(p)) {	/* v is left child of p */
453
632
			if (first(right(p)) == 0) {
454
				follow(p);
455
				return;
456
			}
457
		} else		/* v is right child */
458
546
			follow(p);
459
1178
		return;
460
	}
461
1350
}
462
463
int member(int c, const char *sarg)	/* is c in s? */
464
{
465
	uschar *s = (uschar *) sarg;
466
467
528
	while (*s)
468
108
		if (c == *s++)
469
6
			return(1);
470
102
	return(0);
471
108
}
472
473
int match(fa *f, const char *p0)	/* shortest match ? */
474
{
475
	int s, ns;
476
	uschar *p = (uschar *) p0;
477
478
157728
	s = f->reset ? makeinit(f,0) : f->initstat;
479
39432
	if (f->out[s])
480
		return(1);
481
39432
	do {
482
		/* assert(*p < NCHARS); */
483
890118
		if ((ns = f->gototab[s][*p]) != 0)
484
884132
			s = ns;
485
		else
486
5986
			s = cgoto(f, s, *p);
487
890118
		if (f->out[s])
488
9018
			return(1);
489
881100
	} while (*p++ != 0);
490
30414
	return(0);
491
39432
}
492
493
int pmatch(fa *f, const char *p0)	/* longest match, for sub */
494
{
495
	int s, ns;
496
	uschar *p = (uschar *) p0;
497
	uschar *q;
498
	int i, k;
499
500
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
501
2424
	if (f->reset) {
502
		f->initstat = s = makeinit(f,1);
503
	} else {
504
1212
		s = f->initstat;
505
	}
506
1212
	patbeg = (char *) p;
507
1212
	patlen = -1;
508
1212
	do {
509
		q = p;
510
61554
		do {
511
61626
			if (f->out[s])		/* final state */
512
18
				patlen = q-p;
513
			/* assert(*q < NCHARS); */
514
61626
			if ((ns = f->gototab[s][*q]) != 0)
515
61248
				s = ns;
516
			else
517
378
				s = cgoto(f, s, *q);
518
61626
			if (s == 1) {	/* no transition */
519
61554
				if (patlen >= 0) {
520
18
					patbeg = (char *) p;
521
18
					return(1);
522
				}
523
				else
524
					goto nextin;	/* no match */
525
			}
526
72
		} while (*q++ != 0);
527
		if (f->out[s])
528
			patlen = q-p-1;	/* don't count $ */
529
		if (patlen >= 0) {
530
			patbeg = (char *) p;
531
			return(1);
532
		}
533
	nextin:
534
		s = 2;
535
61536
		if (f->reset) {
536
			for (i = 2; i <= f->curstat; i++)
537
				xfree(f->posns[i]);
538
			k = *f->posns[0];
539
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
540
				overflo("out of space in pmatch");
541
			for (i = 0; i <= k; i++)
542
				(f->posns[2])[i] = (f->posns[0])[i];
543
			f->initstat = f->curstat = 2;
544
			f->out[2] = f->out[0];
545
			for (i = 0; i < NCHARS; i++)
546
				f->gototab[2][i] = 0;
547
		}
548
61536
	} while (*p++ != 0);
549
1194
	return (0);
550
1212
}
551
552
int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
553
{
554
	int s, ns;
555
	uschar *p = (uschar *) p0;
556
	uschar *q;
557
	int i, k;
558
559
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
560
	if (f->reset) {
561
		f->initstat = s = makeinit(f,1);
562
	} else {
563
		s = f->initstat;
564
	}
565
	patlen = -1;
566
	while (*p) {
567
		q = p;
568
		do {
569
			if (f->out[s])		/* final state */
570
				patlen = q-p;
571
			/* assert(*q < NCHARS); */
572
			if ((ns = f->gototab[s][*q]) != 0)
573
				s = ns;
574
			else
575
				s = cgoto(f, s, *q);
576
			if (s == 1) {	/* no transition */
577
				if (patlen > 0) {
578
					patbeg = (char *) p;
579
					return(1);
580
				} else
581
					goto nnextin;	/* no nonempty match */
582
			}
583
		} while (*q++ != 0);
584
		if (f->out[s])
585
			patlen = q-p-1;	/* don't count $ */
586
		if (patlen > 0 ) {
587
			patbeg = (char *) p;
588
			return(1);
589
		}
590
	nnextin:
591
		s = 2;
592
		if (f->reset) {
593
			for (i = 2; i <= f->curstat; i++)
594
				xfree(f->posns[i]);
595
			k = *f->posns[0];
596
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
597
				overflo("out of state space");
598
			for (i = 0; i <= k; i++)
599
				(f->posns[2])[i] = (f->posns[0])[i];
600
			f->initstat = f->curstat = 2;
601
			f->out[2] = f->out[0];
602
			for (i = 0; i < NCHARS; i++)
603
				f->gototab[2][i] = 0;
604
		}
605
		p++;
606
	}
607
	return (0);
608
}
609
610
Node *reparse(const char *p)	/* parses regular expression pointed to by p */
611
{			/* uses relex() to scan regular expression */
612
	Node *np;
613
614
172
	DPRINTF( ("reparse <%s>\n", p) );
615
86
	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
616
86
	rtok = relex();
617
	/* GNU compatibility: an empty regexp matches anything */
618
86
	if (rtok == '\0') {
619
		/* FATAL("empty regular expression"); previous */
620
		return(op2(EMPTYRE, NIL, NIL));
621
	}
622
86
	np = regexp();
623
86
	if (rtok != '\0')
624
		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
625
86
	return(np);
626
86
}
627
628
Node *regexp(void)	/* top-level parse of reg expr */
629
{
630
172
	return (alt(concat(primary())));
631
}
632
633
Node *primary(void)
634
{
635
	Node *np;
636
637


1092
	switch (rtok) {
638
	case CHAR:
639
474
		np = op2(CHAR, NIL, itonp(rlxval));
640
474
		rtok = relex();
641
474
		return (unary(np));
642
	case ALL:
643
		rtok = relex();
644
		return (unary(op2(ALL, NIL, NIL)));
645
	case EMPTYRE:
646
		rtok = relex();
647
		return (unary(op2(ALL, NIL, NIL)));
648
	case DOT:
649
		rtok = relex();
650
		return (unary(op2(DOT, NIL, NIL)));
651
	case CCL:
652
		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
653
		rtok = relex();
654
		return (unary(np));
655
	case NCCL:
656
6
		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
657
6
		rtok = relex();
658
6
		return (unary(np));
659
	case '^':
660
66
		rtok = relex();
661
66
		return (unary(op2(CHAR, NIL, itonp(HAT))));
662
	case '$':
663
		rtok = relex();
664
		return (unary(op2(CHAR, NIL, NIL)));
665
	case '(':
666
		rtok = relex();
667
		if (rtok == ')') {	/* special pleading for () */
668
			rtok = relex();
669
			return unary(op2(CCL, NIL, (Node *) tostring("")));
670
		}
671
		np = regexp();
672
		if (rtok == ')') {
673
			rtok = relex();
674
			return (unary(np));
675
		}
676
		else
677
			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
678
	default:
679
		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
680
	}
681
	return 0;	/*NOTREACHED*/
682
546
}
683
684
Node *concat(Node *np)
685
{
686


1092
	switch (rtok) {
687
	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
688
460
		return (concat(op2(CAT, np, primary())));
689
	}
690
86
	return (np);
691
546
}
692
693
Node *alt(Node *np)
694
{
695
172
	if (rtok == OR) {
696
		rtok = relex();
697
		return (alt(op2(OR, np, concat(primary()))));
698
	}
699
86
	return (np);
700
86
}
701
702
Node *unary(Node *np)
703
{
704

1092
	switch (rtok) {
705
	case STAR:
706
		rtok = relex();
707
		return (unary(op2(STAR, np, NIL)));
708
	case PLUS:
709
		rtok = relex();
710
		return (unary(op2(PLUS, np, NIL)));
711
	case QUEST:
712
		rtok = relex();
713
		return (unary(op2(QUEST, np, NIL)));
714
	default:
715
546
		return (np);
716
	}
717
546
}
718
719
/*
720
 * Character class definitions conformant to the POSIX locale as
721
 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
722
 * and operating character sets are both ASCII (ISO646) or supersets
723
 * thereof.
724
 *
725
 * Note that to avoid overflowing the temporary buffer used in
726
 * relex(), the expanded character class (prior to range expansion)
727
 * must be less than twice the size of their full name.
728
 */
729
730
/* Because isblank doesn't show up in any of the header files on any
731
 * system i use, it's defined here.  if some other locale has a richer
732
 * definition of "blank", define HAS_ISBLANK and provide your own
733
 * version.
734
 * the parentheses here are an attempt to find a path through the maze
735
 * of macro definition and/or function and/or version provided.  thanks
736
 * to nelson beebe for the suggestion; let's see if it works everywhere.
737
 */
738
739
#ifndef HAS_ISBLANK
740
741
int (xisblank)(int c)
742
{
743
	return c==' ' || c=='\t';
744
}
745
746
#endif
747
748
struct charclass {
749
	const char *cc_name;
750
	int cc_namelen;
751
	int (*cc_func)(int);
752
} charclasses[] = {
753
	{ "alnum",	5,	isalnum },
754
	{ "alpha",	5,	isalpha },
755
#ifndef HAS_ISBLANK
756
	{ "blank",	5,	isspace }, /* was isblank */
757
#else
758
	{ "blank",	5,	isblank },
759
#endif
760
	{ "cntrl",	5,	iscntrl },
761
	{ "digit",	5,	isdigit },
762
	{ "graph",	5,	isgraph },
763
	{ "lower",	5,	islower },
764
	{ "print",	5,	isprint },
765
	{ "punct",	5,	ispunct },
766
	{ "space",	5,	isspace },
767
	{ "upper",	5,	isupper },
768
	{ "xdigit",	6,	isxdigit },
769
	{ NULL,		0,	NULL },
770
};
771
772
773
int relex(void)		/* lexical analyzer for reparse */
774
{
775
	int c, n;
776
	int cflag;
777
	static uschar *buf = 0;
778
	static int bufsz = 100;
779
1264
	uschar *bp;
780
	struct charclass *cc;
781
	int i;
782
783



632
	switch (c = *prestr++) {
784
	case '|': return OR;
785
	case '*': return STAR;
786
	case '+': return PLUS;
787
	case '?': return QUEST;
788
	case '.': return DOT;
789
86
	case '\0': prestr--; return '\0';
790
	case '^':
791
	case '$':
792
	case '(':
793
	case ')':
794
66
		return c;
795
	case '\\':
796
18
		rlxval = quoted(&prestr);
797
18
		return CHAR;
798
	default:
799
456
		rlxval = c;
800
456
		return CHAR;
801
	case '[':
802

12
		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
803
			FATAL("out of space in reg expr %.10s..", lastre);
804
6
		bp = buf;
805
6
		if (*prestr == '^') {
806
			cflag = 1;
807
6
			prestr++;
808
6
		}
809
		else
810
			cflag = 0;
811
6
		n = 2 * strlen((const char *) prestr)+1;
812
6
		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
813
			FATAL("out of space for reg expr %.10s...", lastre);
814
12
		for (; ; ) {
815
12
			if ((c = *prestr++) == '\\') {
816
				*bp++ = '\\';
817
				if ((c = *prestr++) == '\0')
818
					FATAL("nonterminated character class %.20s...", lastre);
819
				*bp++ = c;
820
			/* } else if (c == '\n') { */
821
			/* 	FATAL("newline in character class %.20s...", lastre); */
822

12
			} else if (c == '[' && *prestr == ':') {
823
				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
824
				for (cc = charclasses; cc->cc_name; cc++)
825
					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
826
						break;
827
				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
828
				    prestr[2 + cc->cc_namelen] == ']') {
829
					prestr += cc->cc_namelen + 3;
830
					for (i = 0; i < NCHARS; i++) {
831
						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
832
						    FATAL("out of space for reg expr %.10s...", lastre);
833
						if (cc->cc_func(i)) {
834
							*bp++ = i;
835
							n++;
836
						}
837
					}
838
				} else
839
					*bp++ = c;
840
12
			} else if (c == '\0') {
841
				FATAL("nonterminated character class %.20s", lastre);
842
12
			} else if (bp == buf) {	/* 1st char is special */
843
6
				*bp++ = c;
844
12
			} else if (c == ']') {
845
6
				*bp++ = 0;
846
6
				rlxstr = (uschar *) tostring((char *) buf);
847
6
				if (cflag == 0)
848
					return CCL;
849
				else
850
6
					return NCCL;
851
			} else
852
				*bp++ = c;
853
		}
854
	}
855
632
}
856
857
int cgoto(fa *f, int s, int c)
858
{
859
	int i, j, k;
860
	int *p, *q;
861
862
	assert(c == HAT || c < NCHARS);
863
19350
	while (f->accept >= maxsetvec) {	/* guessing here! */
864
		setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
865
		tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
866
		if (setvec == 0 || tmpset == 0)
867
			overflo("out of space in cgoto()");
868
		maxsetvec *= 4;
869
	}
870
116920
	for (i = 0; i <= f->accept; i++)
871
52010
		setvec[i] = 0;
872
6450
	setcnt = 0;
873
	/* compute positions of gototab[s,c] into setvec */
874
6450
	p = f->posns[s];
875
40380
	for (i = 1; i <= *p; i++) {
876
13740
		if ((k = f->re[p[i]].ltype) != FINAL) {
877
21288
			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
878

20968
			 || (k == DOT && c != 0 && c != HAT)
879
13414
			 || (k == ALL && c != 0)
880
7482
			 || (k == EMPTYRE && c != 0)
881
7482
			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
882

7590
			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
883
6350
				q = f->re[p[i]].lfollow;
884
37264
				for (j = 1; j <= *q; j++) {
885
12282
					if (q[j] >= maxsetvec) {
886
						setvec = reallocarray(setvec,
887
						    maxsetvec, 4 * sizeof(int));
888
						tmpset = reallocarray(tmpset,
889
						    maxsetvec, 4 * sizeof(int));
890
						if (setvec == 0 || tmpset == 0)
891
							overflo("cgoto overflow");
892
						maxsetvec *= 4;
893
					}
894
12282
					if (setvec[q[j]] == 0) {
895
12282
						setcnt++;
896
12282
						setvec[q[j]] = 1;
897
12282
					}
898
				}
899
			}
900
		}
901
	}
902
	/* determine if setvec is a previous state */
903
6450
	tmpset[0] = setcnt;
904
	j = 1;
905
116920
	for (i = f->accept; i >= 0; i--)
906
52010
		if (setvec[i]) {
907
12282
			tmpset[j++] = i;
908
12282
		}
909
	/* tmpset == previous state? */
910
27856
	for (i = 1; i <= f->curstat; i++) {
911
13614
		p = f->posns[i];
912
13614
		if ((k = tmpset[0]) != p[0])
913
			goto different;
914
37256
		for (j = 1; j <= k; j++)
915
12492
			if (tmpset[j] != p[j])
916
				goto different;
917
		/* setvec is state i */
918
6136
		f->gototab[s][c] = i;
919
6136
		return i;
920
	  different:;
921
	}
922
923
	/* add tmpset to current set of states */
924
314
	if (f->curstat >= NSTATES-1) {
925
		f->curstat = 2;
926
		f->reset = 1;
927
		for (i = 2; i < NSTATES; i++)
928
			xfree(f->posns[i]);
929
	} else
930
314
		++(f->curstat);
931
163280
	for (i = 0; i < NCHARS; i++)
932
81326
		f->gototab[f->curstat][i] = 0;
933
314
	xfree(f->posns[f->curstat]);
934
314
	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
935
		overflo("out of space in cgoto");
936
937
314
	f->posns[f->curstat] = p;
938
314
	f->gototab[s][c] = f->curstat;
939
3044
	for (i = 0; i <= setcnt; i++)
940
1208
		p[i] = tmpset[i];
941
314
	if (setvec[f->accept])
942
		f->out[f->curstat] = 1;
943
	else
944
		f->out[f->curstat] = 0;
945
314
	return f->curstat;
946
6450
}
947
948
949
void freefa(fa *f)	/* free a finite automaton */
950
{
951
	int i;
952
953
	if (f == NULL)
954
		return;
955
	for (i = 0; i <= f->curstat; i++)
956
		xfree(f->posns[i]);
957
	for (i = 0; i <= f->accept; i++) {
958
		xfree(f->re[i].lfollow);
959
		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
960
			xfree((f->re[i].lval.np));
961
	}
962
	xfree(f->restr);
963
	xfree(f);
964
}