GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libc/regex/engine.c Lines: 0 366 0.0 %
Date: 2017-11-13 Branches: 0 784 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: engine.c,v 1.24 2016/09/21 04:38:56 guenther Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5
 * Copyright (c) 1992, 1993, 1994
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Henry Spencer.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 *
35
 *	@(#)engine.c	8.5 (Berkeley) 3/20/94
36
 */
37
38
/*
39
 * The matching engine and friends.  This file is #included by regexec.c
40
 * after suitable #defines of a variety of macros used herein, so that
41
 * different state representations can be used without duplicating masses
42
 * of code.
43
 */
44
45
#ifdef SNAMES
46
#define	matcher	smatcher
47
#define	fast	sfast
48
#define	slow	sslow
49
#define	dissect	sdissect
50
#define	backref	sbackref
51
#define	step	sstep
52
#define	print	sprint
53
#define	at	sat
54
#define	match	smat
55
#define	nope	snope
56
#endif
57
#ifdef LNAMES
58
#define	matcher	lmatcher
59
#define	fast	lfast
60
#define	slow	lslow
61
#define	dissect	ldissect
62
#define	backref	lbackref
63
#define	step	lstep
64
#define	print	lprint
65
#define	at	lat
66
#define	match	lmat
67
#define	nope	lnope
68
#endif
69
70
/* another structure passed up and down to avoid zillions of parameters */
71
struct match {
72
	struct re_guts *g;
73
	int eflags;
74
	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
75
	char *offp;		/* offsets work from here */
76
	char *beginp;		/* start of string -- virtual NUL precedes */
77
	char *endp;		/* end of string -- virtual NUL here */
78
	char *coldp;		/* can be no match starting before here */
79
	char **lastpos;		/* [nplus+1] */
80
	STATEVARS;
81
	states st;		/* current states */
82
	states fresh;		/* states for a fresh start */
83
	states tmp;		/* temporary */
84
	states empty;		/* empty set of states */
85
};
86
87
static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
88
static char *dissect(struct match *, char *, char *, sopno, sopno);
89
static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
90
static char *fast(struct match *, char *, char *, sopno, sopno);
91
static char *slow(struct match *, char *, char *, sopno, sopno);
92
static states step(struct re_guts *, sopno, sopno, states, int, states);
93
#define MAX_RECURSION	100
94
#define	BOL	(OUT+1)
95
#define	EOL	(BOL+1)
96
#define	BOLEOL	(BOL+2)
97
#define	NOTHING	(BOL+3)
98
#define	BOW	(BOL+4)
99
#define	EOW	(BOL+5)
100
/* update nonchars[] array below when adding fake chars here */
101
#define	CODEMAX	(BOL+5)		/* highest code used */
102
#define	NONCHAR(c)	((c) > CHAR_MAX)
103
#define	NNONCHAR	(CODEMAX-CHAR_MAX)
104
#ifdef REDEBUG
105
static void print(struct match *, char *, states, int, FILE *);
106
#endif
107
#ifdef REDEBUG
108
static void at(struct match *, char *, char *, char *, sopno, sopno);
109
#endif
110
#ifdef REDEBUG
111
static const char *pchar(int);
112
#endif
113
114
#ifdef REDEBUG
115
#define	SP(t, s, c)	print(m, t, s, c, stdout)
116
#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
117
#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
118
static int nope = 0;
119
#else
120
#define	SP(t, s, c)	/* nothing */
121
#define	AT(t, p1, p2, s1, s2)	/* nothing */
122
#define	NOTE(s)	/* nothing */
123
#endif
124
125
/*
126
 - matcher - the actual matching engine
127
 */
128
static int			/* 0 success, REG_NOMATCH failure */
129
matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
130
    int eflags)
131
{
132
	char *endp;
133
	int i;
134
	struct match mv;
135
	struct match *m = &mv;
136
	char *dp;
137
	const sopno gf = g->firststate+1;	/* +1 for OEND */
138
	const sopno gl = g->laststate;
139
	char *start;
140
	char *stop;
141
142
	/* simplify the situation where possible */
143
	if (g->cflags&REG_NOSUB)
144
		nmatch = 0;
145
	if (eflags&REG_STARTEND) {
146
		start = string + pmatch[0].rm_so;
147
		stop = string + pmatch[0].rm_eo;
148
	} else {
149
		start = string;
150
		stop = start + strlen(start);
151
	}
152
	if (stop < start)
153
		return(REG_INVARG);
154
155
	/* prescreening; this does wonders for this rather slow code */
156
	if (g->must != NULL) {
157
		for (dp = start; dp < stop; dp++)
158
			if (*dp == g->must[0] && stop - dp >= g->mlen &&
159
			    memcmp(dp, g->must, g->mlen) == 0)
160
				break;
161
		if (dp == stop)		/* we didn't find g->must */
162
			return(REG_NOMATCH);
163
	}
164
165
	/* match struct setup */
166
	m->g = g;
167
	m->eflags = eflags;
168
	m->pmatch = NULL;
169
	m->lastpos = NULL;
170
	m->offp = string;
171
	m->beginp = start;
172
	m->endp = stop;
173
	STATESETUP(m, 4);
174
	SETUP(m->st);
175
	SETUP(m->fresh);
176
	SETUP(m->tmp);
177
	SETUP(m->empty);
178
	CLEAR(m->empty);
179
180
	/* this loop does only one repetition except for backrefs */
181
	for (;;) {
182
		endp = fast(m, start, stop, gf, gl);
183
		if (endp == NULL) {		/* a miss */
184
			free(m->pmatch);
185
			free(m->lastpos);
186
			STATETEARDOWN(m);
187
			return(REG_NOMATCH);
188
		}
189
		if (nmatch == 0 && !g->backrefs)
190
			break;		/* no further info needed */
191
192
		/* where? */
193
		assert(m->coldp != NULL);
194
		for (;;) {
195
			NOTE("finding start");
196
			endp = slow(m, m->coldp, stop, gf, gl);
197
			if (endp != NULL)
198
				break;
199
			assert(m->coldp < m->endp);
200
			m->coldp++;
201
		}
202
		if (nmatch == 1 && !g->backrefs)
203
			break;		/* no further info needed */
204
205
		/* oh my, he wants the subexpressions... */
206
		if (m->pmatch == NULL)
207
			m->pmatch = reallocarray(NULL, m->g->nsub + 1,
208
			    sizeof(regmatch_t));
209
		if (m->pmatch == NULL) {
210
			STATETEARDOWN(m);
211
			return(REG_ESPACE);
212
		}
213
		for (i = 1; i <= m->g->nsub; i++)
214
			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
215
		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
216
			NOTE("dissecting");
217
			dp = dissect(m, m->coldp, endp, gf, gl);
218
		} else {
219
			if (g->nplus > 0 && m->lastpos == NULL)
220
				m->lastpos = reallocarray(NULL,
221
				    g->nplus+1, sizeof(char *));
222
			if (g->nplus > 0 && m->lastpos == NULL) {
223
				free(m->pmatch);
224
				STATETEARDOWN(m);
225
				return(REG_ESPACE);
226
			}
227
			NOTE("backref dissect");
228
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
229
		}
230
		if (dp != NULL)
231
			break;
232
233
		/* uh-oh... we couldn't find a subexpression-level match */
234
		assert(g->backrefs);	/* must be back references doing it */
235
		assert(g->nplus == 0 || m->lastpos != NULL);
236
		for (;;) {
237
			if (dp != NULL || endp <= m->coldp)
238
				break;		/* defeat */
239
			NOTE("backoff");
240
			endp = slow(m, m->coldp, endp-1, gf, gl);
241
			if (endp == NULL)
242
				break;		/* defeat */
243
			/* try it on a shorter possibility */
244
#ifndef NDEBUG
245
			for (i = 1; i <= m->g->nsub; i++) {
246
				assert(m->pmatch[i].rm_so == -1);
247
				assert(m->pmatch[i].rm_eo == -1);
248
			}
249
#endif
250
			NOTE("backoff dissect");
251
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
252
		}
253
		assert(dp == NULL || dp == endp);
254
		if (dp != NULL)		/* found a shorter one */
255
			break;
256
257
		/* despite initial appearances, there is no match here */
258
		NOTE("false alarm");
259
		if (m->coldp == stop)
260
			break;
261
		start = m->coldp + 1;	/* recycle starting later */
262
	}
263
264
	/* fill in the details if requested */
265
	if (nmatch > 0) {
266
		pmatch[0].rm_so = m->coldp - m->offp;
267
		pmatch[0].rm_eo = endp - m->offp;
268
	}
269
	if (nmatch > 1) {
270
		assert(m->pmatch != NULL);
271
		for (i = 1; i < nmatch; i++)
272
			if (i <= m->g->nsub)
273
				pmatch[i] = m->pmatch[i];
274
			else {
275
				pmatch[i].rm_so = -1;
276
				pmatch[i].rm_eo = -1;
277
			}
278
	}
279
280
	free(m->pmatch);
281
	free(m->lastpos);
282
	STATETEARDOWN(m);
283
	return(0);
284
}
285
286
/*
287
 - dissect - figure out what matched what, no back references
288
 */
289
static char *			/* == stop (success) always */
290
dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
291
{
292
	int i;
293
	sopno ss;	/* start sop of current subRE */
294
	sopno es;	/* end sop of current subRE */
295
	char *sp;	/* start of string matched by it */
296
	char *stp;	/* string matched by it cannot pass here */
297
	char *rest;	/* start of rest of string */
298
	char *tail;	/* string unmatched by rest of RE */
299
	sopno ssub;	/* start sop of subsubRE */
300
	sopno esub;	/* end sop of subsubRE */
301
	char *ssp;	/* start of string matched by subsubRE */
302
	char *sep;	/* end of string matched by subsubRE */
303
	char *oldssp;	/* previous ssp */
304
	char *dp;
305
306
	AT("diss", start, stop, startst, stopst);
307
	sp = start;
308
	for (ss = startst; ss < stopst; ss = es) {
309
		/* identify end of subRE */
310
		es = ss;
311
		switch (OP(m->g->strip[es])) {
312
		case OPLUS_:
313
		case OQUEST_:
314
			es += OPND(m->g->strip[es]);
315
			break;
316
		case OCH_:
317
			while (OP(m->g->strip[es]) != O_CH)
318
				es += OPND(m->g->strip[es]);
319
			break;
320
		}
321
		es++;
322
323
		/* figure out what it matched */
324
		switch (OP(m->g->strip[ss])) {
325
		case OEND:
326
			assert(nope);
327
			break;
328
		case OCHAR:
329
			sp++;
330
			break;
331
		case OBOL:
332
		case OEOL:
333
		case OBOW:
334
		case OEOW:
335
			break;
336
		case OANY:
337
		case OANYOF:
338
			sp++;
339
			break;
340
		case OBACK_:
341
		case O_BACK:
342
			assert(nope);
343
			break;
344
		/* cases where length of match is hard to find */
345
		case OQUEST_:
346
			stp = stop;
347
			for (;;) {
348
				/* how long could this one be? */
349
				rest = slow(m, sp, stp, ss, es);
350
				assert(rest != NULL);	/* it did match */
351
				/* could the rest match the rest? */
352
				tail = slow(m, rest, stop, es, stopst);
353
				if (tail == stop)
354
					break;		/* yes! */
355
				/* no -- try a shorter match for this one */
356
				stp = rest - 1;
357
				assert(stp >= sp);	/* it did work */
358
			}
359
			ssub = ss + 1;
360
			esub = es - 1;
361
			/* did innards match? */
362
			if (slow(m, sp, rest, ssub, esub) != NULL) {
363
				dp = dissect(m, sp, rest, ssub, esub);
364
				assert(dp == rest);
365
			} else		/* no */
366
				assert(sp == rest);
367
			sp = rest;
368
			break;
369
		case OPLUS_:
370
			stp = stop;
371
			for (;;) {
372
				/* how long could this one be? */
373
				rest = slow(m, sp, stp, ss, es);
374
				assert(rest != NULL);	/* it did match */
375
				/* could the rest match the rest? */
376
				tail = slow(m, rest, stop, es, stopst);
377
				if (tail == stop)
378
					break;		/* yes! */
379
				/* no -- try a shorter match for this one */
380
				stp = rest - 1;
381
				assert(stp >= sp);	/* it did work */
382
			}
383
			ssub = ss + 1;
384
			esub = es - 1;
385
			ssp = sp;
386
			oldssp = ssp;
387
			for (;;) {	/* find last match of innards */
388
				sep = slow(m, ssp, rest, ssub, esub);
389
				if (sep == NULL || sep == ssp)
390
					break;	/* failed or matched null */
391
				oldssp = ssp;	/* on to next try */
392
				ssp = sep;
393
			}
394
			if (sep == NULL) {
395
				/* last successful match */
396
				sep = ssp;
397
				ssp = oldssp;
398
			}
399
			assert(sep == rest);	/* must exhaust substring */
400
			assert(slow(m, ssp, sep, ssub, esub) == rest);
401
			dp = dissect(m, ssp, sep, ssub, esub);
402
			assert(dp == sep);
403
			sp = rest;
404
			break;
405
		case OCH_:
406
			stp = stop;
407
			for (;;) {
408
				/* how long could this one be? */
409
				rest = slow(m, sp, stp, ss, es);
410
				assert(rest != NULL);	/* it did match */
411
				/* could the rest match the rest? */
412
				tail = slow(m, rest, stop, es, stopst);
413
				if (tail == stop)
414
					break;		/* yes! */
415
				/* no -- try a shorter match for this one */
416
				stp = rest - 1;
417
				assert(stp >= sp);	/* it did work */
418
			}
419
			ssub = ss + 1;
420
			esub = ss + OPND(m->g->strip[ss]) - 1;
421
			assert(OP(m->g->strip[esub]) == OOR1);
422
			for (;;) {	/* find first matching branch */
423
				if (slow(m, sp, rest, ssub, esub) == rest)
424
					break;	/* it matched all of it */
425
				/* that one missed, try next one */
426
				assert(OP(m->g->strip[esub]) == OOR1);
427
				esub++;
428
				assert(OP(m->g->strip[esub]) == OOR2);
429
				ssub = esub + 1;
430
				esub += OPND(m->g->strip[esub]);
431
				if (OP(m->g->strip[esub]) == OOR2)
432
					esub--;
433
				else
434
					assert(OP(m->g->strip[esub]) == O_CH);
435
			}
436
			dp = dissect(m, sp, rest, ssub, esub);
437
			assert(dp == rest);
438
			sp = rest;
439
			break;
440
		case O_PLUS:
441
		case O_QUEST:
442
		case OOR1:
443
		case OOR2:
444
		case O_CH:
445
			assert(nope);
446
			break;
447
		case OLPAREN:
448
			i = OPND(m->g->strip[ss]);
449
			assert(0 < i && i <= m->g->nsub);
450
			m->pmatch[i].rm_so = sp - m->offp;
451
			break;
452
		case ORPAREN:
453
			i = OPND(m->g->strip[ss]);
454
			assert(0 < i && i <= m->g->nsub);
455
			m->pmatch[i].rm_eo = sp - m->offp;
456
			break;
457
		default:		/* uh oh */
458
			assert(nope);
459
			break;
460
		}
461
	}
462
463
	assert(sp == stop);
464
	return(sp);
465
}
466
467
/*
468
 - backref - figure out what matched what, figuring in back references
469
 */
470
static char *			/* == stop (success) or NULL (failure) */
471
backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
472
    sopno lev, int rec)			/* PLUS nesting level */
473
{
474
	int i;
475
	sopno ss;	/* start sop of current subRE */
476
	char *sp;	/* start of string matched by it */
477
	sopno ssub;	/* start sop of subsubRE */
478
	sopno esub;	/* end sop of subsubRE */
479
	char *ssp;	/* start of string matched by subsubRE */
480
	char *dp;
481
	size_t len;
482
	int hard;
483
	sop s;
484
	regoff_t offsave;
485
	cset *cs;
486
487
	AT("back", start, stop, startst, stopst);
488
	sp = start;
489
490
	/* get as far as we can with easy stuff */
491
	hard = 0;
492
	for (ss = startst; !hard && ss < stopst; ss++)
493
		switch (OP(s = m->g->strip[ss])) {
494
		case OCHAR:
495
			if (sp == stop || *sp++ != (char)OPND(s))
496
				return(NULL);
497
			break;
498
		case OANY:
499
			if (sp == stop)
500
				return(NULL);
501
			sp++;
502
			break;
503
		case OANYOF:
504
			cs = &m->g->sets[OPND(s)];
505
			if (sp == stop || !CHIN(cs, *sp++))
506
				return(NULL);
507
			break;
508
		case OBOL:
509
			if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
510
			    (sp > m->offp && sp < m->endp &&
511
			     *(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
512
				{ /* yes */ }
513
			else
514
				return(NULL);
515
			break;
516
		case OEOL:
517
			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
518
					(sp < m->endp && *sp == '\n' &&
519
						(m->g->cflags&REG_NEWLINE)) )
520
				{ /* yes */ }
521
			else
522
				return(NULL);
523
			break;
524
		case OBOW:
525
			if (sp < m->endp && ISWORD(*sp) &&
526
			    ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
527
			     (sp > m->offp && !ISWORD(*(sp-1)))))
528
				{ /* yes */ }
529
			else
530
				return(NULL);
531
			break;
532
		case OEOW:
533
			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
534
					(sp < m->endp && *sp == '\n' &&
535
						(m->g->cflags&REG_NEWLINE)) ||
536
					(sp < m->endp && !ISWORD(*sp)) ) &&
537
					(sp > m->beginp && ISWORD(*(sp-1))) )
538
				{ /* yes */ }
539
			else
540
				return(NULL);
541
			break;
542
		case O_QUEST:
543
			break;
544
		case OOR1:	/* matches null but needs to skip */
545
			ss++;
546
			s = m->g->strip[ss];
547
			do {
548
				assert(OP(s) == OOR2);
549
				ss += OPND(s);
550
			} while (OP(s = m->g->strip[ss]) != O_CH);
551
			/* note that the ss++ gets us past the O_CH */
552
			break;
553
		default:	/* have to make a choice */
554
			hard = 1;
555
			break;
556
		}
557
	if (!hard) {		/* that was it! */
558
		if (sp != stop)
559
			return(NULL);
560
		return(sp);
561
	}
562
	ss--;			/* adjust for the for's final increment */
563
564
	/* the hard stuff */
565
	AT("hard", sp, stop, ss, stopst);
566
	s = m->g->strip[ss];
567
	switch (OP(s)) {
568
	case OBACK_:		/* the vilest depths */
569
		i = OPND(s);
570
		assert(0 < i && i <= m->g->nsub);
571
		if (m->pmatch[i].rm_eo == -1)
572
			return(NULL);
573
		assert(m->pmatch[i].rm_so != -1);
574
		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
575
		if (len == 0 && rec++ > MAX_RECURSION)
576
			return(NULL);
577
		assert(stop - m->beginp >= len);
578
		if (sp > stop - len)
579
			return(NULL);	/* not enough left to match */
580
		ssp = m->offp + m->pmatch[i].rm_so;
581
		if (memcmp(sp, ssp, len) != 0)
582
			return(NULL);
583
		while (m->g->strip[ss] != SOP(O_BACK, i))
584
			ss++;
585
		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
586
		break;
587
	case OQUEST_:		/* to null or not */
588
		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
589
		if (dp != NULL)
590
			return(dp);	/* not */
591
		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
592
		break;
593
	case OPLUS_:
594
		assert(m->lastpos != NULL);
595
		assert(lev+1 <= m->g->nplus);
596
		m->lastpos[lev+1] = sp;
597
		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
598
		break;
599
	case O_PLUS:
600
		if (sp == m->lastpos[lev])	/* last pass matched null */
601
			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
602
		/* try another pass */
603
		m->lastpos[lev] = sp;
604
		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
605
		if (dp == NULL)
606
			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
607
		else
608
			return(dp);
609
		break;
610
	case OCH_:		/* find the right one, if any */
611
		ssub = ss + 1;
612
		esub = ss + OPND(s) - 1;
613
		assert(OP(m->g->strip[esub]) == OOR1);
614
		for (;;) {	/* find first matching branch */
615
			dp = backref(m, sp, stop, ssub, esub, lev, rec);
616
			if (dp != NULL)
617
				return(dp);
618
			/* that one missed, try next one */
619
			if (OP(m->g->strip[esub]) == O_CH)
620
				return(NULL);	/* there is none */
621
			esub++;
622
			assert(OP(m->g->strip[esub]) == OOR2);
623
			ssub = esub + 1;
624
			esub += OPND(m->g->strip[esub]);
625
			if (OP(m->g->strip[esub]) == OOR2)
626
				esub--;
627
			else
628
				assert(OP(m->g->strip[esub]) == O_CH);
629
		}
630
		break;
631
	case OLPAREN:		/* must undo assignment if rest fails */
632
		i = OPND(s);
633
		assert(0 < i && i <= m->g->nsub);
634
		offsave = m->pmatch[i].rm_so;
635
		m->pmatch[i].rm_so = sp - m->offp;
636
		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
637
		if (dp != NULL)
638
			return(dp);
639
		m->pmatch[i].rm_so = offsave;
640
		return(NULL);
641
		break;
642
	case ORPAREN:		/* must undo assignment if rest fails */
643
		i = OPND(s);
644
		assert(0 < i && i <= m->g->nsub);
645
		offsave = m->pmatch[i].rm_eo;
646
		m->pmatch[i].rm_eo = sp - m->offp;
647
		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
648
		if (dp != NULL)
649
			return(dp);
650
		m->pmatch[i].rm_eo = offsave;
651
		return(NULL);
652
		break;
653
	default:		/* uh oh */
654
		assert(nope);
655
		break;
656
	}
657
658
	/* "can't happen" */
659
	assert(nope);
660
	/* NOTREACHED */
661
	return NULL;
662
}
663
664
/*
665
 - fast - step through the string at top speed
666
 */
667
static char *			/* where tentative match ended, or NULL */
668
fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
669
{
670
	states st = m->st;
671
	states fresh = m->fresh;
672
	states tmp = m->tmp;
673
	char *p = start;
674
	int c;
675
	int lastc;	/* previous c */
676
	int flagch;
677
	int i;
678
	char *coldp;	/* last p after which no match was underway */
679
680
	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
681
		c = OUT;
682
	else
683
		c = *(start-1);
684
685
	CLEAR(st);
686
	SET1(st, startst);
687
	st = step(m->g, startst, stopst, st, NOTHING, st);
688
	ASSIGN(fresh, st);
689
	SP("start", st, *p);
690
	coldp = NULL;
691
	for (;;) {
692
		/* next character */
693
		lastc = c;
694
		c = (p == m->endp) ? OUT : *p;
695
		if (EQ(st, fresh))
696
			coldp = p;
697
698
		/* is there an EOL and/or BOL between lastc and c? */
699
		flagch = '\0';
700
		i = 0;
701
		if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
702
		    (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
703
			flagch = BOL;
704
			i = m->g->nbol;
705
		}
706
		if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
707
		    (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
708
			flagch = (flagch == BOL) ? BOLEOL : EOL;
709
			i += m->g->neol;
710
		}
711
		if (i != 0) {
712
			for (; i > 0; i--)
713
				st = step(m->g, startst, stopst,
714
				    st, flagch, st);
715
			SP("boleol", st, c);
716
		}
717
718
		/* how about a word boundary? */
719
		if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
720
		    (c != OUT && ISWORD(c)))
721
			flagch = BOW;
722
		if ((lastc != OUT && ISWORD(lastc)) &&
723
		    (flagch == EOL || (c != OUT && !ISWORD(c))))
724
			flagch = EOW;
725
		if (flagch == BOW || flagch == EOW) {
726
			st = step(m->g, startst, stopst, st, flagch, st);
727
			SP("boweow", st, c);
728
		}
729
730
		/* are we done? */
731
		if (ISSET(st, stopst) || p == stop)
732
			break;		/* NOTE BREAK OUT */
733
734
		/* no, we must deal with this character */
735
		ASSIGN(tmp, st);
736
		ASSIGN(st, fresh);
737
		assert(c != OUT);
738
		st = step(m->g, startst, stopst, tmp, c, st);
739
		SP("aft", st, c);
740
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
741
		p++;
742
	}
743
744
	assert(coldp != NULL);
745
	m->coldp = coldp;
746
	if (ISSET(st, stopst))
747
		return(p+1);
748
	else
749
		return(NULL);
750
}
751
752
/*
753
 - slow - step through the string more deliberately
754
 */
755
static char *			/* where it ended */
756
slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
757
{
758
	states st = m->st;
759
	states empty = m->empty;
760
	states tmp = m->tmp;
761
	char *p = start;
762
	int c;
763
	int lastc;	/* previous c */
764
	int flagch;
765
	int i;
766
	char *matchp;	/* last p at which a match ended */
767
768
	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
769
		c = OUT;
770
	else
771
		c = *(start-1);
772
773
	AT("slow", start, stop, startst, stopst);
774
	CLEAR(st);
775
	SET1(st, startst);
776
	SP("sstart", st, *p);
777
	st = step(m->g, startst, stopst, st, NOTHING, st);
778
	matchp = NULL;
779
	for (;;) {
780
		/* next character */
781
		lastc = c;
782
		c = (p == m->endp) ? OUT : *p;
783
784
		/* is there an EOL and/or BOL between lastc and c? */
785
		flagch = '\0';
786
		i = 0;
787
		if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
788
		    (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
789
			flagch = BOL;
790
			i = m->g->nbol;
791
		}
792
		if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
793
		    (c == OUT && !(m->eflags&REG_NOTEOL))) {
794
			flagch = (flagch == BOL) ? BOLEOL : EOL;
795
			i += m->g->neol;
796
		}
797
		if (i != 0) {
798
			for (; i > 0; i--)
799
				st = step(m->g, startst, stopst,
800
				    st, flagch, st);
801
			SP("sboleol", st, c);
802
		}
803
804
		/* how about a word boundary? */
805
		if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
806
		    (c != OUT && ISWORD(c)))
807
			flagch = BOW;
808
		if ((lastc != OUT && ISWORD(lastc)) &&
809
		    (flagch == EOL || (c != OUT && !ISWORD(c))))
810
			flagch = EOW;
811
		if (flagch == BOW || flagch == EOW) {
812
			st = step(m->g, startst, stopst, st, flagch, st);
813
			SP("sboweow", st, c);
814
		}
815
816
		/* are we done? */
817
		if (ISSET(st, stopst))
818
			matchp = p;
819
		if (EQ(st, empty) || p == stop)
820
			break;		/* NOTE BREAK OUT */
821
822
		/* no, we must deal with this character */
823
		ASSIGN(tmp, st);
824
		ASSIGN(st, empty);
825
		assert(c != OUT);
826
		st = step(m->g, startst, stopst, tmp, c, st);
827
		SP("saft", st, c);
828
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
829
		p++;
830
	}
831
832
	return(matchp);
833
}
834
835
836
/*
837
 - step - map set of states reachable before char to set reachable after
838
 */
839
static states
840
step(struct re_guts *g,
841
    sopno start,		/* start state within strip */
842
    sopno stop,			/* state after stop state within strip */
843
    states bef,			/* states reachable before */
844
    int ch,			/* character or NONCHAR code */
845
    states aft)			/* states already known reachable after */
846
{
847
	cset *cs;
848
	sop s;
849
	sopno pc;
850
	onestate here;		/* note, macros know this name */
851
	sopno look;
852
	int i;
853
854
	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
855
		s = g->strip[pc];
856
		switch (OP(s)) {
857
		case OEND:
858
			assert(pc == stop-1);
859
			break;
860
		case OCHAR:
861
			/* only characters can match */
862
			assert(!NONCHAR(ch) || ch != (char)OPND(s));
863
			if (ch == (char)OPND(s))
864
				FWD(aft, bef, 1);
865
			break;
866
		case OBOL:
867
			if (ch == BOL || ch == BOLEOL)
868
				FWD(aft, bef, 1);
869
			break;
870
		case OEOL:
871
			if (ch == EOL || ch == BOLEOL)
872
				FWD(aft, bef, 1);
873
			break;
874
		case OBOW:
875
			if (ch == BOW)
876
				FWD(aft, bef, 1);
877
			break;
878
		case OEOW:
879
			if (ch == EOW)
880
				FWD(aft, bef, 1);
881
			break;
882
		case OANY:
883
			if (!NONCHAR(ch))
884
				FWD(aft, bef, 1);
885
			break;
886
		case OANYOF:
887
			cs = &g->sets[OPND(s)];
888
			if (!NONCHAR(ch) && CHIN(cs, ch))
889
				FWD(aft, bef, 1);
890
			break;
891
		case OBACK_:		/* ignored here */
892
		case O_BACK:
893
			FWD(aft, aft, 1);
894
			break;
895
		case OPLUS_:		/* forward, this is just an empty */
896
			FWD(aft, aft, 1);
897
			break;
898
		case O_PLUS:		/* both forward and back */
899
			FWD(aft, aft, 1);
900
			i = ISSETBACK(aft, OPND(s));
901
			BACK(aft, aft, OPND(s));
902
			if (!i && ISSETBACK(aft, OPND(s))) {
903
				/* oho, must reconsider loop body */
904
				pc -= OPND(s) + 1;
905
				INIT(here, pc);
906
			}
907
			break;
908
		case OQUEST_:		/* two branches, both forward */
909
			FWD(aft, aft, 1);
910
			FWD(aft, aft, OPND(s));
911
			break;
912
		case O_QUEST:		/* just an empty */
913
			FWD(aft, aft, 1);
914
			break;
915
		case OLPAREN:		/* not significant here */
916
		case ORPAREN:
917
			FWD(aft, aft, 1);
918
			break;
919
		case OCH_:		/* mark the first two branches */
920
			FWD(aft, aft, 1);
921
			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
922
			FWD(aft, aft, OPND(s));
923
			break;
924
		case OOR1:		/* done a branch, find the O_CH */
925
			if (ISSTATEIN(aft, here)) {
926
				for (look = 1;
927
				    OP(s = g->strip[pc+look]) != O_CH;
928
				    look += OPND(s))
929
					assert(OP(s) == OOR2);
930
				FWD(aft, aft, look);
931
			}
932
			break;
933
		case OOR2:		/* propagate OCH_'s marking */
934
			FWD(aft, aft, 1);
935
			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
936
				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
937
				FWD(aft, aft, OPND(s));
938
			}
939
			break;
940
		case O_CH:		/* just empty */
941
			FWD(aft, aft, 1);
942
			break;
943
		default:		/* ooooops... */
944
			assert(nope);
945
			break;
946
		}
947
	}
948
949
	return(aft);
950
}
951
952
#ifdef REDEBUG
953
/*
954
 - print - print a set of states
955
 */
956
static void
957
print(struct match *m, char *caption, states st, int ch, FILE *d)
958
{
959
	struct re_guts *g = m->g;
960
	int i;
961
	int first = 1;
962
963
	if (!(m->eflags&REG_TRACE))
964
		return;
965
966
	(void)fprintf(d, "%s", caption);
967
	(void)fprintf(d, " %s", pchar(ch));
968
	for (i = 0; i < g->nstates; i++) {
969
		if (ISSET(st, i)) {
970
			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
971
			first = 0;
972
		}
973
	}
974
	(void)fprintf(d, "\n");
975
}
976
977
/*
978
 - at - print current situation
979
 */
980
static void
981
at(struct match *m, char *title, char *start, char *stop, sopno startst,
982
    sopno stopst)
983
{
984
	if (!(m->eflags&REG_TRACE))
985
		return;
986
987
	(void)printf("%s %s-", title, pchar(*start));
988
	(void)printf("%s ", pchar(*stop));
989
	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
990
}
991
992
#ifndef PCHARDONE
993
#define	PCHARDONE	/* never again */
994
static const char *nonchars[] =
995
    { "OUT", "BOL", "EOL", "BOLEOL", "NOTHING", "BOW", "EOW" };
996
#define	PNONCHAR(c)						\
997
	((c) - OUT < (sizeof(nonchars)/sizeof(nonchars[0]))	\
998
	    ? nonchars[(c) - OUT] : "invalid")
999
1000
/*
1001
 - pchar - make a character printable
1002
 *
1003
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1004
 * duplicate here avoids having a debugging-capable regexec.o tied to
1005
 * a matching debug.o, and this is convenient.  It all disappears in
1006
 * the non-debug compilation anyway, so it doesn't matter much.
1007
 */
1008
static const char *		/* -> representation */
1009
pchar(int ch)
1010
{
1011
	static char pbuf[10];
1012
1013
	if (NONCHAR(ch)) {
1014
		if (ch - OUT < (sizeof(nonchars)/sizeof(nonchars[0])))
1015
			return nonchars[ch - OUT];
1016
		return "invalid";
1017
	}
1018
	if (isprint((unsigned char)ch) || ch == ' ')
1019
		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1020
	else
1021
		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1022
	return(pbuf);
1023
}
1024
#endif
1025
#endif
1026
1027
#undef	matcher
1028
#undef	fast
1029
#undef	slow
1030
#undef	dissect
1031
#undef	backref
1032
#undef	step
1033
#undef	print
1034
#undef	at
1035
#undef	match
1036
#undef	nope