GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/b.c Lines: 314 504 62.3 %
Date: 2017-11-07 Branches: 201 454 44.3 %

Line Branch Exec Source
1
/*	$OpenBSD: b.c,v 1.18 2014/12/19 19:28:55 deraadt Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
/* lasciate ogne speranza, voi ch'intrate. */
27
28
#define	DEBUG
29
30
#include <ctype.h>
31
#include <stdio.h>
32
#include <string.h>
33
#include <stdlib.h>
34
#include "awk.h"
35
#include "ytab.h"
36
37
#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
38
				/* NCHARS is 2**n */
39
#define MAXLIN 22
40
41
#define type(v)		(v)->nobj	/* badly overloaded here */
42
#define info(v)		(v)->ntype	/* badly overloaded here */
43
#define left(v)		(v)->narg[0]
44
#define right(v)	(v)->narg[1]
45
#define parent(v)	(v)->nnext
46
47
#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48
#define ELEAF	case EMPTYRE:		/* empty string in regexp */
49
#define UNARY	case STAR: case PLUS: case QUEST:
50
51
/* encoding in tree Nodes:
52
	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
53
		left is index, right contains value or pointer to value
54
	unary (STAR, PLUS, QUEST): left is child, right is null
55
	binary (CAT, OR): left and right are children
56
	parent contains pointer to parent
57
*/
58
59
60
int	*setvec;
61
int	*tmpset;
62
int	maxsetvec = 0;
63
64
int	rtok;		/* next token in current re */
65
int	rlxval;
66
static uschar	*rlxstr;
67
static uschar	*prestr;	/* current position in current re */
68
static uschar	*lastre;	/* origin of last re */
69
70
static	int setcnt;
71
static	int poscnt;
72
73
char	*patbeg;
74
int	patlen;
75
76
#define	NFA	20	/* cache this many dynamic fa's */
77
fa	*fatab[NFA];
78
int	nfatab	= 0;	/* entries in fatab */
79
80
fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
81
{
82
	int i, use, nuse;
83
	fa *pfa;
84
	static int now = 1;
85
86
240
	if (setvec == 0) {	/* first time through any RE */
87
93
		maxsetvec = MAXLIN;
88
93
		setvec = (int *) calloc(maxsetvec, sizeof(int));
89
93
		tmpset = (int *) calloc(maxsetvec, sizeof(int));
90
93
		if (setvec == 0 || tmpset == 0)
91
			overflo("out of space initializing makedfa");
92
	}
93
94
120
	if (compile_time)	/* a constant for sure */
95
120
		return mkdfa(s, anchor);
96
	for (i = 0; i < nfatab; i++)	/* is it there already? */
97
		if (fatab[i]->anchor == anchor
98
		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
99
			fatab[i]->use = now++;
100
			return fatab[i];
101
		}
102
	pfa = mkdfa(s, anchor);
103
	if (nfatab < NFA) {	/* room for another */
104
		fatab[nfatab] = pfa;
105
		fatab[nfatab]->use = now++;
106
		nfatab++;
107
		return pfa;
108
	}
109
	use = fatab[0]->use;	/* replace least-recently used */
110
	nuse = 0;
111
	for (i = 1; i < nfatab; i++)
112
		if (fatab[i]->use < use) {
113
			use = fatab[i]->use;
114
			nuse = i;
115
		}
116
	freefa(fatab[nuse]);
117
	fatab[nuse] = pfa;
118
	pfa->use = now++;
119
	return pfa;
120
120
}
121
122
fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
123
				/* anchor = 1 for anchored matches, else 0 */
124
{
125
	Node *p, *p1;
126
	fa *f;
127
128
240
	p = reparse(s);
129
120
	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130
		/* put ALL STAR in front of reg.  exp. */
131
120
	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132
		/* put FINAL after reg.  exp. */
133
134
120
	poscnt = 0;
135
120
	penter(p1);	/* enter parent pointers and leaf indices */
136
120
	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137
		overflo("out of space for fa");
138
120
	f->accept = poscnt-1;	/* penter has computed number of positions in re */
139
120
	cfoll(f, p1);	/* set up follow sets */
140
120
	freetr(p1);
141
120
	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
142
			overflo("out of space in makedfa");
143
120
	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144
		overflo("out of space in makedfa");
145
120
	*f->posns[1] = 0;
146
120
	f->initstat = makeinit(f, anchor);
147
120
	f->anchor = anchor;
148
120
	f->restr = (uschar *) tostring(s);
149
120
	return f;
150
}
151
152
int makeinit(fa *f, int anchor)
153
{
154
	int i, k;
155
156
240
	f->curstat = 2;
157
120
	f->out[2] = 0;
158
120
	f->reset = 0;
159
120
	k = *(f->re[0].lfollow);
160
120
	xfree(f->posns[2]);
161
120
	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
162
		overflo("out of space in makeinit");
163
964
	for (i=0; i <= k; i++) {
164
362
		(f->posns[2])[i] = (f->re[0].lfollow)[i];
165
	}
166
120
	if ((f->posns[2])[1] == f->accept)
167
		f->out[2] = 1;
168
62400
	for (i=0; i < NCHARS; i++)
169
31080
		f->gototab[2][i] = 0;
170
120
	f->curstat = cgoto(f, 2, HAT);
171
120
	if (anchor) {
172
6
		*f->posns[2] = k-1;	/* leave out position 0 */
173
36
		for (i=0; i < k; i++) {
174
12
			(f->posns[0])[i] = (f->posns[2])[i];
175
		}
176
177
6
		f->out[0] = f->out[2];
178
6
		if (f->curstat != 2)
179
6
			--(*f->posns[f->curstat]);
180
	}
181
120
	return f->curstat;
182
}
183
184
void penter(Node *p)	/* set up parent pointers and leaf indices */
185
{
186



4112
	switch (type(p)) {
187
	ELEAF
188
	LEAF
189
1024
		info(p) = poscnt;
190
1024
		poscnt++;
191
1024
		break;
192
	UNARY
193
128
		penter(left(p));
194
128
		parent(left(p)) = p;
195
128
		break;
196
	case CAT:
197
	case OR:
198
904
		penter(left(p));
199
904
		penter(right(p));
200
904
		parent(left(p)) = p;
201
904
		parent(right(p)) = p;
202
904
		break;
203
	default:	/* can't happen */
204
		FATAL("can't happen: unknown type %d in penter", type(p));
205
		break;
206
	}
207
2056
}
208
209
void freetr(Node *p)	/* free parse tree */
210
{
211



4112
	switch (type(p)) {
212
	ELEAF
213
	LEAF
214
2048
		xfree(p);
215
		break;
216
	UNARY
217
128
		freetr(left(p));
218
256
		xfree(p);
219
		break;
220
	case CAT:
221
	case OR:
222
904
		freetr(left(p));
223
904
		freetr(right(p));
224
1808
		xfree(p);
225
		break;
226
	default:	/* can't happen */
227
		FATAL("can't happen: unknown type %d in freetr", type(p));
228
		break;
229
	}
230
2056
}
231
232
/* in the parsing of regular expressions, metacharacters like . have */
233
/* to be seen literally;  \056 is not a metacharacter. */
234
235
int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
236
{			/* only pick up one 8-bit byte (2 chars) */
237
	uschar *p;
238
	int n = 0;
239
	int i;
240
241
	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
242
		if (isdigit(*p))
243
			n = 16 * n + *p - '0';
244
		else if (*p >= 'a' && *p <= 'f')
245
			n = 16 * n + *p - 'a' + 10;
246
		else if (*p >= 'A' && *p <= 'F')
247
			n = 16 * n + *p - 'A' + 10;
248
	}
249
	*pp = (uschar *) p;
250
	return n;
251
}
252
253
#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
254
255
int quoted(uschar **pp)	/* pick up next thing after a \\ */
256
			/* and increment *pp */
257
{
258
62
	uschar *p = *pp;
259
	int c;
260
261
31
	if ((c = *p++) == 't')
262
4
		c = '\t';
263
27
	else if (c == 'n')
264
		c = '\n';
265
27
	else if (c == 'f')
266
		c = '\f';
267
27
	else if (c == 'r')
268
		c = '\r';
269
27
	else if (c == 'b')
270
		c = '\b';
271
27
	else if (c == '\\')
272
		c = '\\';
273
27
	else if (c == 'x') {	/* hexadecimal goo follows */
274
		c = hexstr(&p);	/* this adds a null if number is invalid */
275
27
	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
276
		int n = c - '0';
277
		if (isoctdigit(*p)) {
278
			n = 8 * n + *p++ - '0';
279
			if (isoctdigit(*p))
280
				n = 8 * n + *p++ - '0';
281
		}
282
		c = n;
283
	} /* else */
284
		/* c = c; */
285
31
	*pp = p;
286
31
	return c;
287
31
}
288
289
char *cclenter(const char *argp)	/* add a character class */
290
{
291
	int i, c, c2;
292
28
	uschar *p = (uschar *) argp;
293
14
	uschar *op, *bp;
294
	static uschar *buf = 0;
295
	static int bufsz = 100;
296
297
14
	op = p;
298

23
	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
299
		FATAL("out of space for character class [%.10s...] 1", p);
300
14
	bp = buf;
301
294
	for (i = 0; (c = *p++) != 0; ) {
302
133
		if (c == '\\') {
303
4
			c = quoted(&p);
304

133
		} else if (c == '-' && i > 0 && bp[-1] != 0) {
305
			if (*p != 0) {
306
				c = bp[-1];
307
				c2 = *p++;
308
				if (c2 == '\\')
309
					c2 = quoted(&p);
310
				if (c > c2) {	/* empty; ignore */
311
					bp--;
312
					i--;
313
					continue;
314
				}
315
				while (c < c2) {
316
					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
317
						FATAL("out of space for character class [%.10s...] 2", p);
318
					*bp++ = ++c;
319
					i++;
320
				}
321
				continue;
322
			}
323
		}
324
133
		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
325
			FATAL("out of space for character class [%.10s...] 3", p);
326
133
		*bp++ = c;
327
133
		i++;
328
	}
329
14
	*bp = 0;
330
14
	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
331
28
	xfree(op);
332
28
	return (char *) tostring((char *) buf);
333
14
}
334
335
void overflo(const char *s)
336
{
337
	FATAL("regular expression too big: %.30s...", s);
338
}
339
340
void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
341
{
342
	int i;
343
	int *p;
344
345



4112
	switch (type(v)) {
346
	ELEAF
347
	LEAF
348
1024
		f->re[info(v)].ltype = type(v);
349
1024
		f->re[info(v)].lval.np = right(v);
350
2048
		while (f->accept >= maxsetvec) {	/* guessing here! */
351
			setvec = reallocarray(setvec, maxsetvec,
352
			    4 * sizeof(int));
353
			tmpset = reallocarray(tmpset, maxsetvec,
354
			    4 * sizeof(int));
355
			if (setvec == 0 || tmpset == 0)
356
				overflo("out of space in cfoll()");
357
			maxsetvec *= 4;
358
		}
359
22816
		for (i = 0; i <= f->accept; i++)
360
10384
			setvec[i] = 0;
361
1024
		setcnt = 0;
362
1024
		follow(v);	/* computes setvec and setcnt */
363
1024
		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
364
			overflo("out of space building follow set");
365
1024
		f->re[info(v)].lfollow = p;
366
1024
		*p = setcnt;
367
22816
		for (i = f->accept; i >= 0; i--)
368
10384
			if (setvec[i] == 1)
369
1043
				*++p = i;
370
		break;
371
	UNARY
372
128
		cfoll(f,left(v));
373
128
		break;
374
	case CAT:
375
	case OR:
376
904
		cfoll(f,left(v));
377
904
		cfoll(f,right(v));
378
904
		break;
379
	default:	/* can't happen */
380
		FATAL("can't happen: unknown type %d in cfoll", type(v));
381
	}
382
2056
}
383
384
int first(Node *p)	/* collects initially active leaves of p into setvec */
385
			/* returns 0 if p matches empty string */
386
{
387
	int b, lp;
388
389



3454
	switch (type(p)) {
390
	ELEAF
391
	LEAF
392
1043
		lp = info(p);	/* look for high-water mark of subscripts */
393

3129
		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
394
			setvec = reallocarray(setvec, maxsetvec,
395
			    4 * sizeof(int));
396
			tmpset = reallocarray(tmpset, maxsetvec,
397
			    4 * sizeof(int));
398
			if (setvec == 0 || tmpset == 0)
399
				overflo("out of space in first()");
400
			maxsetvec *= 4;
401
		}
402
1043
		if (type(p) == EMPTYRE) {
403
			setvec[lp] = 0;
404
			return(0);
405
		}
406
1043
		if (setvec[lp] != 1) {
407
1043
			setvec[lp] = 1;
408
1043
			setcnt++;
409
1043
		}
410

1055
		if (type(p) == CCL && (*(char *) right(p)) == '\0')
411
			return(0);		/* empty CCL */
412
1043
		else return(1);
413
	case PLUS:
414
2
		if (first(left(p)) == 0) return(0);
415
2
		return(1);
416
	case STAR:
417
	case QUEST:
418
7
		first(left(p));
419
7
		return(0);
420
	case CAT:
421

673
		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
422
671
		return(1);
423
	case OR:
424
4
		b = first(right(p));
425
4
		if (first(left(p)) == 0 || b == 0) return(0);
426
4
		return(1);
427
	}
428
	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
429
	return(-1);
430
1727
}
431
432
void follow(Node *v)	/* collects leaves that can follow v into setvec */
433
{
434
	Node *p;
435
436
3896
	if (type(v) == FINAL)
437
120
		return;
438
1828
	p = parent(v);
439

1828
	switch (type(p)) {
440
	case STAR:
441
	case PLUS:
442
128
		first(v);
443
128
		follow(p);
444
128
		return;
445
446
	case OR:
447
	case QUEST:
448
4
		follow(p);
449
4
		return;
450
451
	case CAT:
452
1696
		if (v == left(p)) {	/* v is left child of p */
453
909
			if (first(right(p)) == 0) {
454
5
				follow(p);
455
5
				return;
456
			}
457
		} else		/* v is right child */
458
787
			follow(p);
459
1691
		return;
460
	}
461
1948
}
462
463
int member(int c, const char *sarg)	/* is c in s? */
464
{
465
	uschar *s = (uschar *) sarg;
466
467
4242
	while (*s)
468
3277
		if (c == *s++)
469
97
			return(1);
470
257
	return(0);
471
354
}
472
473
int match(fa *f, const char *p0)	/* shortest match ? */
474
{
475
	int s, ns;
476
	uschar *p = (uschar *) p0;
477
478
207668
	s = f->reset ? makeinit(f,0) : f->initstat;
479
51917
	if (f->out[s])
480
		return(1);
481
	do {
482
		/* assert(*p < NCHARS); */
483
1182152
		if ((ns = f->gototab[s][*p]) != 0)
484
1173602
			s = ns;
485
		else
486
8550
			s = cgoto(f, s, *p);
487
1182152
		if (f->out[s])
488
11119
			return(1);
489
1171033
	} while (*p++ != 0);
490
40798
	return(0);
491
51917
}
492
493
int pmatch(fa *f, const char *p0)	/* longest match, for sub */
494
{
495
	int s, ns;
496
	uschar *p = (uschar *) p0;
497
	uschar *q;
498
	int i, k;
499
500
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
501
2424
	if (f->reset) {
502
		f->initstat = s = makeinit(f,1);
503
	} else {
504
1212
		s = f->initstat;
505
	}
506
1212
	patbeg = (char *) p;
507
1212
	patlen = -1;
508
1212
	do {
509
		q = p;
510
61554
		do {
511
61626
			if (f->out[s])		/* final state */
512
18
				patlen = q-p;
513
			/* assert(*q < NCHARS); */
514
61626
			if ((ns = f->gototab[s][*q]) != 0)
515
61248
				s = ns;
516
			else
517
378
				s = cgoto(f, s, *q);
518
61626
			if (s == 1) {	/* no transition */
519
61554
				if (patlen >= 0) {
520
18
					patbeg = (char *) p;
521
18
					return(1);
522
				}
523
				else
524
					goto nextin;	/* no match */
525
			}
526
72
		} while (*q++ != 0);
527
		if (f->out[s])
528
			patlen = q-p-1;	/* don't count $ */
529
		if (patlen >= 0) {
530
			patbeg = (char *) p;
531
			return(1);
532
		}
533
	nextin:
534
		s = 2;
535
61536
		if (f->reset) {
536
			for (i = 2; i <= f->curstat; i++)
537
				xfree(f->posns[i]);
538
			k = *f->posns[0];
539
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
540
				overflo("out of space in pmatch");
541
			for (i = 0; i <= k; i++)
542
				(f->posns[2])[i] = (f->posns[0])[i];
543
			f->initstat = f->curstat = 2;
544
			f->out[2] = f->out[0];
545
			for (i = 0; i < NCHARS; i++)
546
				f->gototab[2][i] = 0;
547
		}
548
61536
	} while (*p++ != 0);
549
1194
	return (0);
550
1212
}
551
552
int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
553
{
554
	int s, ns;
555
	uschar *p = (uschar *) p0;
556
	uschar *q;
557
	int i, k;
558
559
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
560
	if (f->reset) {
561
		f->initstat = s = makeinit(f,1);
562
	} else {
563
		s = f->initstat;
564
	}
565
	patlen = -1;
566
	while (*p) {
567
		q = p;
568
		do {
569
			if (f->out[s])		/* final state */
570
				patlen = q-p;
571
			/* assert(*q < NCHARS); */
572
			if ((ns = f->gototab[s][*q]) != 0)
573
				s = ns;
574
			else
575
				s = cgoto(f, s, *q);
576
			if (s == 1) {	/* no transition */
577
				if (patlen > 0) {
578
					patbeg = (char *) p;
579
					return(1);
580
				} else
581
					goto nnextin;	/* no nonempty match */
582
			}
583
		} while (*q++ != 0);
584
		if (f->out[s])
585
			patlen = q-p-1;	/* don't count $ */
586
		if (patlen > 0 ) {
587
			patbeg = (char *) p;
588
			return(1);
589
		}
590
	nnextin:
591
		s = 2;
592
		if (f->reset) {
593
			for (i = 2; i <= f->curstat; i++)
594
				xfree(f->posns[i]);
595
			k = *f->posns[0];
596
			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
597
				overflo("out of state space");
598
			for (i = 0; i <= k; i++)
599
				(f->posns[2])[i] = (f->posns[0])[i];
600
			f->initstat = f->curstat = 2;
601
			f->out[2] = f->out[0];
602
			for (i = 0; i < NCHARS; i++)
603
				f->gototab[2][i] = 0;
604
		}
605
		p++;
606
	}
607
	return (0);
608
}
609
610
Node *reparse(const char *p)	/* parses regular expression pointed to by p */
611
{			/* uses relex() to scan regular expression */
612
	Node *np;
613
614
240
	dprintf( ("reparse <%s>\n", p) );
615
120
	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
616
120
	rtok = relex();
617
	/* GNU compatibility: an empty regexp matches anything */
618
120
	if (rtok == '\0') {
619
		/* FATAL("empty regular expression"); previous */
620
		return(op2(EMPTYRE, NIL, NIL));
621
	}
622
120
	np = regexp();
623
120
	if (rtok != '\0')
624
		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
625
120
	return(np);
626
120
}
627
628
Node *regexp(void)	/* top-level parse of reg expr */
629
{
630
244
	return (alt(concat(primary())));
631
}
632
633
Node *primary(void)
634
{
635
	Node *np;
636
637


1572
	switch (rtok) {
638
	case CHAR:
639
675
		np = op2(CHAR, NIL, itonp(rlxval));
640
675
		rtok = relex();
641
675
		return (unary(np));
642
	case ALL:
643
		rtok = relex();
644
		return (unary(op2(ALL, NIL, NIL)));
645
	case EMPTYRE:
646
		rtok = relex();
647
		return (unary(op2(ALL, NIL, NIL)));
648
	case DOT:
649
4
		rtok = relex();
650
4
		return (unary(op2(DOT, NIL, NIL)));
651
	case CCL:
652
6
		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
653
6
		rtok = relex();
654
6
		return (unary(np));
655
	case NCCL:
656
8
		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
657
8
		rtok = relex();
658
8
		return (unary(np));
659
	case '^':
660
90
		rtok = relex();
661
90
		return (unary(op2(CHAR, NIL, itonp(HAT))));
662
	case '$':
663
1
		rtok = relex();
664
1
		return (unary(op2(CHAR, NIL, NIL)));
665
	case '(':
666
2
		rtok = relex();
667
2
		if (rtok == ')') {	/* special pleading for () */
668
			rtok = relex();
669
			return unary(op2(CCL, NIL, (Node *) tostring("")));
670
		}
671
2
		np = regexp();
672
2
		if (rtok == ')') {
673
2
			rtok = relex();
674
2
			return (unary(np));
675
		}
676
		else
677
			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
678
	default:
679
		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
680
	}
681
	return 0;	/*NOTREACHED*/
682
786
}
683
684
Node *concat(Node *np)
685
{
686


1572
	switch (rtok) {
687
	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
688
662
		return (concat(op2(CAT, np, primary())));
689
	}
690
124
	return (np);
691
786
}
692
693
Node *alt(Node *np)
694
{
695
248
	if (rtok == OR) {
696
2
		rtok = relex();
697
2
		return (alt(op2(OR, np, concat(primary()))));
698
	}
699
122
	return (np);
700
124
}
701
702
Node *unary(Node *np)
703
{
704

1588
	switch (rtok) {
705
	case STAR:
706
7
		rtok = relex();
707
7
		return (unary(op2(STAR, np, NIL)));
708
	case PLUS:
709
1
		rtok = relex();
710
1
		return (unary(op2(PLUS, np, NIL)));
711
	case QUEST:
712
		rtok = relex();
713
		return (unary(op2(QUEST, np, NIL)));
714
	default:
715
786
		return (np);
716
	}
717
794
}
718
719
/*
720
 * Character class definitions conformant to the POSIX locale as
721
 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
722
 * and operating character sets are both ASCII (ISO646) or supersets
723
 * thereof.
724
 *
725
 * Note that to avoid overflowing the temporary buffer used in
726
 * relex(), the expanded character class (prior to range expansion)
727
 * must be less than twice the size of their full name.
728
 */
729
730
/* Because isblank doesn't show up in any of the header files on any
731
 * system i use, it's defined here.  if some other locale has a richer
732
 * definition of "blank", define HAS_ISBLANK and provide your own
733
 * version.
734
 * the parentheses here are an attempt to find a path through the maze
735
 * of macro definition and/or function and/or version provided.  thanks
736
 * to nelson beebe for the suggestion; let's see if it works everywhere.
737
 */
738
739
#ifndef HAS_ISBLANK
740
741
int (xisblank)(int c)
742
{
743
	return c==' ' || c=='\t';
744
}
745
746
#endif
747
748
struct charclass {
749
	const char *cc_name;
750
	int cc_namelen;
751
	int (*cc_func)(int);
752
} charclasses[] = {
753
	{ "alnum",	5,	isalnum },
754
	{ "alpha",	5,	isalpha },
755
#ifndef HAS_ISBLANK
756
	{ "blank",	5,	isspace }, /* was isblank */
757
#else
758
	{ "blank",	5,	isblank },
759
#endif
760
	{ "cntrl",	5,	iscntrl },
761
	{ "digit",	5,	isdigit },
762
	{ "graph",	5,	isgraph },
763
	{ "lower",	5,	islower },
764
	{ "print",	5,	isprint },
765
	{ "punct",	5,	ispunct },
766
	{ "space",	5,	isspace },
767
	{ "upper",	5,	isupper },
768
	{ "xdigit",	6,	isxdigit },
769
	{ NULL,		0,	NULL },
770
};
771
772
773
int relex(void)		/* lexical analyzer for reparse */
774
{
775
	int c, n;
776
	int cflag;
777
	static uschar *buf = 0;
778
	static int bufsz = 100;
779
1836
	uschar *bp;
780
	struct charclass *cc;
781
	int i;
782
783



918
	switch (c = *prestr++) {
784
2
	case '|': return OR;
785
7
	case '*': return STAR;
786
1
	case '+': return PLUS;
787
	case '?': return QUEST;
788
4
	case '.': return DOT;
789
120
	case '\0': prestr--; return '\0';
790
	case '^':
791
	case '$':
792
	case '(':
793
	case ')':
794
95
		return c;
795
	case '\\':
796
27
		rlxval = quoted(&prestr);
797
27
		return CHAR;
798
	default:
799
648
		rlxval = c;
800
648
		return CHAR;
801
	case '[':
802

23
		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
803
			FATAL("out of space in reg expr %.10s..", lastre);
804
14
		bp = buf;
805
14
		if (*prestr == '^') {
806
			cflag = 1;
807
8
			prestr++;
808
8
		}
809
		else
810
			cflag = 0;
811
14
		n = 2 * strlen((const char *) prestr)+1;
812
14
		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
813
			FATAL("out of space for reg expr %.10s...", lastre);
814
		for (; ; ) {
815
147
			if ((c = *prestr++) == '\\') {
816
4
				*bp++ = '\\';
817
4
				if ((c = *prestr++) == '\0')
818
					FATAL("nonterminated character class %.20s...", lastre);
819
4
				*bp++ = c;
820
			/* } else if (c == '\n') { */
821
			/* 	FATAL("newline in character class %.20s...", lastre); */
822

147
			} else if (c == '[' && *prestr == ':') {
823
				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
824
				for (cc = charclasses; cc->cc_name; cc++)
825
					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
826
						break;
827
				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
828
				    prestr[2 + cc->cc_namelen] == ']') {
829
					prestr += cc->cc_namelen + 3;
830
					for (i = 0; i < NCHARS; i++) {
831
						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
832
						    FATAL("out of space for reg expr %.10s...", lastre);
833
						if (cc->cc_func(i)) {
834
							*bp++ = i;
835
							n++;
836
						}
837
					}
838
				} else
839
					*bp++ = c;
840
143
			} else if (c == '\0') {
841
				FATAL("nonterminated character class %.20s", lastre);
842
143
			} else if (bp == buf) {	/* 1st char is special */
843
10
				*bp++ = c;
844
143
			} else if (c == ']') {
845
14
				*bp++ = 0;
846
14
				rlxstr = (uschar *) tostring((char *) buf);
847
14
				if (cflag == 0)
848
6
					return CCL;
849
				else
850
8
					return NCCL;
851
			} else
852
119
				*bp++ = c;
853
		}
854
	}
855
918
}
856
857
int cgoto(fa *f, int s, int c)
858
{
859
	int i, j, k;
860
	int *p, *q;
861
862
	assert(c == HAT || c < NCHARS);
863
27144
	while (f->accept >= maxsetvec) {	/* guessing here! */
864
		setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int));
865
		tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int));
866
		if (setvec == 0 || tmpset == 0)
867
			overflo("out of space in cgoto()");
868
		maxsetvec *= 4;
869
	}
870
169188
	for (i = 0; i <= f->accept; i++)
871
75546
		setvec[i] = 0;
872
9048
	setcnt = 0;
873
	/* compute positions of gototab[s,c] into setvec */
874
9048
	p = f->posns[s];
875
57330
	for (i = 1; i <= *p; i++) {
876
19617
		if ((k = f->re[p[i]].ltype) != FINAL) {
877
30196
			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
878

29779
			 || (k == DOT && c != 0 && c != HAT)
879
19194
			 || (k == ALL && c != 0)
880
10724
			 || (k == EMPTYRE && c != 0)
881
10918
			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
882

10989
			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
883
9122
				q = f->re[p[i]].lfollow;
884
53776
				for (j = 1; j <= *q; j++) {
885
17766
					if (q[j] >= maxsetvec) {
886
						setvec = reallocarray(setvec,
887
						    maxsetvec, 4 * sizeof(int));
888
						tmpset = reallocarray(tmpset,
889
						    maxsetvec, 4 * sizeof(int));
890
						if (setvec == 0 || tmpset == 0)
891
							overflo("cgoto overflow");
892
						maxsetvec *= 4;
893
					}
894
17766
					if (setvec[q[j]] == 0) {
895
17766
						setcnt++;
896
17766
						setvec[q[j]] = 1;
897
17766
					}
898
				}
899
			}
900
		}
901
	}
902
	/* determine if setvec is a previous state */
903
9048
	tmpset[0] = setcnt;
904
	j = 1;
905
169188
	for (i = f->accept; i >= 0; i--)
906
75546
		if (setvec[i]) {
907
17766
			tmpset[j++] = i;
908
17766
		}
909
	/* tmpset == previous state? */
910
40676
	for (i = 1; i <= f->curstat; i++) {
911
19930
		p = f->posns[i];
912
19930
		if ((k = tmpset[0]) != p[0])
913
			goto different;
914
53412
		for (j = 1; j <= k; j++)
915
18066
			if (tmpset[j] != p[j])
916
				goto different;
917
		/* setvec is state i */
918
8640
		f->gototab[s][c] = i;
919
8640
		return i;
920
	  different:;
921
	}
922
923
	/* add tmpset to current set of states */
924
408
	if (f->curstat >= NSTATES-1) {
925
		f->curstat = 2;
926
		f->reset = 1;
927
		for (i = 2; i < NSTATES; i++)
928
			xfree(f->posns[i]);
929
	} else
930
408
		++(f->curstat);
931
212160
	for (i = 0; i < NCHARS; i++)
932
105672
		f->gototab[f->curstat][i] = 0;
933
408
	xfree(f->posns[f->curstat]);
934
408
	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
935
		overflo("out of space in cgoto");
936
937
408
	f->posns[f->curstat] = p;
938
408
	f->gototab[s][c] = f->curstat;
939
3998
	for (i = 0; i <= setcnt; i++)
940
1591
		p[i] = tmpset[i];
941
408
	if (setvec[f->accept])
942
		f->out[f->curstat] = 1;
943
	else
944
		f->out[f->curstat] = 0;
945
408
	return f->curstat;
946
9048
}
947
948
949
void freefa(fa *f)	/* free a finite automaton */
950
{
951
	int i;
952
953
	if (f == NULL)
954
		return;
955
	for (i = 0; i <= f->curstat; i++)
956
		xfree(f->posns[i]);
957
	for (i = 0; i <= f->accept; i++) {
958
		xfree(f->re[i].lfollow);
959
		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
960
			xfree((f->re[i].lval.np));
961
	}
962
	xfree(f->restr);
963
	xfree(f);
964
}