1 |
|
|
/* $OpenBSD: engine.c,v 1.24 2016/09/21 04:38:56 guenther Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer. |
5 |
|
|
* Copyright (c) 1992, 1993, 1994 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* This code is derived from software contributed to Berkeley by |
9 |
|
|
* Henry Spencer. |
10 |
|
|
* |
11 |
|
|
* Redistribution and use in source and binary forms, with or without |
12 |
|
|
* modification, are permitted provided that the following conditions |
13 |
|
|
* are met: |
14 |
|
|
* 1. Redistributions of source code must retain the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer. |
16 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer in the |
18 |
|
|
* documentation and/or other materials provided with the distribution. |
19 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
20 |
|
|
* may be used to endorse or promote products derived from this software |
21 |
|
|
* without specific prior written permission. |
22 |
|
|
* |
23 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 |
|
|
* SUCH DAMAGE. |
34 |
|
|
* |
35 |
|
|
* @(#)engine.c 8.5 (Berkeley) 3/20/94 |
36 |
|
|
*/ |
37 |
|
|
|
38 |
|
|
/* |
39 |
|
|
* The matching engine and friends. This file is #included by regexec.c |
40 |
|
|
* after suitable #defines of a variety of macros used herein, so that |
41 |
|
|
* different state representations can be used without duplicating masses |
42 |
|
|
* of code. |
43 |
|
|
*/ |
44 |
|
|
|
45 |
|
|
#ifdef SNAMES |
46 |
|
|
#define matcher smatcher |
47 |
|
|
#define fast sfast |
48 |
|
|
#define slow sslow |
49 |
|
|
#define dissect sdissect |
50 |
|
|
#define backref sbackref |
51 |
|
|
#define step sstep |
52 |
|
|
#define print sprint |
53 |
|
|
#define at sat |
54 |
|
|
#define match smat |
55 |
|
|
#define nope snope |
56 |
|
|
#endif |
57 |
|
|
#ifdef LNAMES |
58 |
|
|
#define matcher lmatcher |
59 |
|
|
#define fast lfast |
60 |
|
|
#define slow lslow |
61 |
|
|
#define dissect ldissect |
62 |
|
|
#define backref lbackref |
63 |
|
|
#define step lstep |
64 |
|
|
#define print lprint |
65 |
|
|
#define at lat |
66 |
|
|
#define match lmat |
67 |
|
|
#define nope lnope |
68 |
|
|
#endif |
69 |
|
|
|
70 |
|
|
/* another structure passed up and down to avoid zillions of parameters */ |
71 |
|
|
struct match { |
72 |
|
|
struct re_guts *g; |
73 |
|
|
int eflags; |
74 |
|
|
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
75 |
|
|
char *offp; /* offsets work from here */ |
76 |
|
|
char *beginp; /* start of string -- virtual NUL precedes */ |
77 |
|
|
char *endp; /* end of string -- virtual NUL here */ |
78 |
|
|
char *coldp; /* can be no match starting before here */ |
79 |
|
|
char **lastpos; /* [nplus+1] */ |
80 |
|
|
STATEVARS; |
81 |
|
|
states st; /* current states */ |
82 |
|
|
states fresh; /* states for a fresh start */ |
83 |
|
|
states tmp; /* temporary */ |
84 |
|
|
states empty; /* empty set of states */ |
85 |
|
|
}; |
86 |
|
|
|
87 |
|
|
static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int); |
88 |
|
|
static char *dissect(struct match *, char *, char *, sopno, sopno); |
89 |
|
|
static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); |
90 |
|
|
static char *fast(struct match *, char *, char *, sopno, sopno); |
91 |
|
|
static char *slow(struct match *, char *, char *, sopno, sopno); |
92 |
|
|
static states step(struct re_guts *, sopno, sopno, states, int, states); |
93 |
|
|
#define MAX_RECURSION 100 |
94 |
|
|
#define BOL (OUT+1) |
95 |
|
|
#define EOL (BOL+1) |
96 |
|
|
#define BOLEOL (BOL+2) |
97 |
|
|
#define NOTHING (BOL+3) |
98 |
|
|
#define BOW (BOL+4) |
99 |
|
|
#define EOW (BOL+5) |
100 |
|
|
/* update nonchars[] array below when adding fake chars here */ |
101 |
|
|
#define CODEMAX (BOL+5) /* highest code used */ |
102 |
|
|
#define NONCHAR(c) ((c) > CHAR_MAX) |
103 |
|
|
#define NNONCHAR (CODEMAX-CHAR_MAX) |
104 |
|
|
#ifdef REDEBUG |
105 |
|
|
static void print(struct match *, char *, states, int, FILE *); |
106 |
|
|
#endif |
107 |
|
|
#ifdef REDEBUG |
108 |
|
|
static void at(struct match *, char *, char *, char *, sopno, sopno); |
109 |
|
|
#endif |
110 |
|
|
#ifdef REDEBUG |
111 |
|
|
static const char *pchar(int); |
112 |
|
|
#endif |
113 |
|
|
|
114 |
|
|
#ifdef REDEBUG |
115 |
|
|
#define SP(t, s, c) print(m, t, s, c, stdout) |
116 |
|
|
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
117 |
|
|
#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } |
118 |
|
|
static int nope = 0; |
119 |
|
|
#else |
120 |
|
|
#define SP(t, s, c) /* nothing */ |
121 |
|
|
#define AT(t, p1, p2, s1, s2) /* nothing */ |
122 |
|
|
#define NOTE(s) /* nothing */ |
123 |
|
|
#endif |
124 |
|
|
|
125 |
|
|
/* |
126 |
|
|
- matcher - the actual matching engine |
127 |
|
|
*/ |
128 |
|
|
static int /* 0 success, REG_NOMATCH failure */ |
129 |
|
|
matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], |
130 |
|
|
int eflags) |
131 |
|
|
{ |
132 |
|
|
char *endp; |
133 |
|
|
int i; |
134 |
|
|
struct match mv; |
135 |
|
|
struct match *m = &mv; |
136 |
|
|
char *dp; |
137 |
|
|
const sopno gf = g->firststate+1; /* +1 for OEND */ |
138 |
|
|
const sopno gl = g->laststate; |
139 |
|
|
char *start; |
140 |
|
|
char *stop; |
141 |
|
|
|
142 |
|
|
/* simplify the situation where possible */ |
143 |
|
|
if (g->cflags®_NOSUB) |
144 |
|
|
nmatch = 0; |
145 |
|
|
if (eflags®_STARTEND) { |
146 |
|
|
start = string + pmatch[0].rm_so; |
147 |
|
|
stop = string + pmatch[0].rm_eo; |
148 |
|
|
} else { |
149 |
|
|
start = string; |
150 |
|
|
stop = start + strlen(start); |
151 |
|
|
} |
152 |
|
|
if (stop < start) |
153 |
|
|
return(REG_INVARG); |
154 |
|
|
|
155 |
|
|
/* prescreening; this does wonders for this rather slow code */ |
156 |
|
|
if (g->must != NULL) { |
157 |
|
|
for (dp = start; dp < stop; dp++) |
158 |
|
|
if (*dp == g->must[0] && stop - dp >= g->mlen && |
159 |
|
|
memcmp(dp, g->must, g->mlen) == 0) |
160 |
|
|
break; |
161 |
|
|
if (dp == stop) /* we didn't find g->must */ |
162 |
|
|
return(REG_NOMATCH); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
/* match struct setup */ |
166 |
|
|
m->g = g; |
167 |
|
|
m->eflags = eflags; |
168 |
|
|
m->pmatch = NULL; |
169 |
|
|
m->lastpos = NULL; |
170 |
|
|
m->offp = string; |
171 |
|
|
m->beginp = start; |
172 |
|
|
m->endp = stop; |
173 |
|
|
STATESETUP(m, 4); |
174 |
|
|
SETUP(m->st); |
175 |
|
|
SETUP(m->fresh); |
176 |
|
|
SETUP(m->tmp); |
177 |
|
|
SETUP(m->empty); |
178 |
|
|
CLEAR(m->empty); |
179 |
|
|
|
180 |
|
|
/* this loop does only one repetition except for backrefs */ |
181 |
|
|
for (;;) { |
182 |
|
|
endp = fast(m, start, stop, gf, gl); |
183 |
|
|
if (endp == NULL) { /* a miss */ |
184 |
|
|
free(m->pmatch); |
185 |
|
|
free(m->lastpos); |
186 |
|
|
STATETEARDOWN(m); |
187 |
|
|
return(REG_NOMATCH); |
188 |
|
|
} |
189 |
|
|
if (nmatch == 0 && !g->backrefs) |
190 |
|
|
break; /* no further info needed */ |
191 |
|
|
|
192 |
|
|
/* where? */ |
193 |
|
|
assert(m->coldp != NULL); |
194 |
|
|
for (;;) { |
195 |
|
|
NOTE("finding start"); |
196 |
|
|
endp = slow(m, m->coldp, stop, gf, gl); |
197 |
|
|
if (endp != NULL) |
198 |
|
|
break; |
199 |
|
|
assert(m->coldp < m->endp); |
200 |
|
|
m->coldp++; |
201 |
|
|
} |
202 |
|
|
if (nmatch == 1 && !g->backrefs) |
203 |
|
|
break; /* no further info needed */ |
204 |
|
|
|
205 |
|
|
/* oh my, he wants the subexpressions... */ |
206 |
|
|
if (m->pmatch == NULL) |
207 |
|
|
m->pmatch = reallocarray(NULL, m->g->nsub + 1, |
208 |
|
|
sizeof(regmatch_t)); |
209 |
|
|
if (m->pmatch == NULL) { |
210 |
|
|
STATETEARDOWN(m); |
211 |
|
|
return(REG_ESPACE); |
212 |
|
|
} |
213 |
|
|
for (i = 1; i <= m->g->nsub; i++) |
214 |
|
|
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
215 |
|
|
if (!g->backrefs && !(m->eflags®_BACKR)) { |
216 |
|
|
NOTE("dissecting"); |
217 |
|
|
dp = dissect(m, m->coldp, endp, gf, gl); |
218 |
|
|
} else { |
219 |
|
|
if (g->nplus > 0 && m->lastpos == NULL) |
220 |
|
|
m->lastpos = reallocarray(NULL, |
221 |
|
|
g->nplus+1, sizeof(char *)); |
222 |
|
|
if (g->nplus > 0 && m->lastpos == NULL) { |
223 |
|
|
free(m->pmatch); |
224 |
|
|
STATETEARDOWN(m); |
225 |
|
|
return(REG_ESPACE); |
226 |
|
|
} |
227 |
|
|
NOTE("backref dissect"); |
228 |
|
|
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
229 |
|
|
} |
230 |
|
|
if (dp != NULL) |
231 |
|
|
break; |
232 |
|
|
|
233 |
|
|
/* uh-oh... we couldn't find a subexpression-level match */ |
234 |
|
|
assert(g->backrefs); /* must be back references doing it */ |
235 |
|
|
assert(g->nplus == 0 || m->lastpos != NULL); |
236 |
|
|
for (;;) { |
237 |
|
|
if (dp != NULL || endp <= m->coldp) |
238 |
|
|
break; /* defeat */ |
239 |
|
|
NOTE("backoff"); |
240 |
|
|
endp = slow(m, m->coldp, endp-1, gf, gl); |
241 |
|
|
if (endp == NULL) |
242 |
|
|
break; /* defeat */ |
243 |
|
|
/* try it on a shorter possibility */ |
244 |
|
|
#ifndef NDEBUG |
245 |
|
|
for (i = 1; i <= m->g->nsub; i++) { |
246 |
|
|
assert(m->pmatch[i].rm_so == -1); |
247 |
|
|
assert(m->pmatch[i].rm_eo == -1); |
248 |
|
|
} |
249 |
|
|
#endif |
250 |
|
|
NOTE("backoff dissect"); |
251 |
|
|
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
252 |
|
|
} |
253 |
|
|
assert(dp == NULL || dp == endp); |
254 |
|
|
if (dp != NULL) /* found a shorter one */ |
255 |
|
|
break; |
256 |
|
|
|
257 |
|
|
/* despite initial appearances, there is no match here */ |
258 |
|
|
NOTE("false alarm"); |
259 |
|
|
if (m->coldp == stop) |
260 |
|
|
break; |
261 |
|
|
start = m->coldp + 1; /* recycle starting later */ |
262 |
|
|
} |
263 |
|
|
|
264 |
|
|
/* fill in the details if requested */ |
265 |
|
|
if (nmatch > 0) { |
266 |
|
|
pmatch[0].rm_so = m->coldp - m->offp; |
267 |
|
|
pmatch[0].rm_eo = endp - m->offp; |
268 |
|
|
} |
269 |
|
|
if (nmatch > 1) { |
270 |
|
|
assert(m->pmatch != NULL); |
271 |
|
|
for (i = 1; i < nmatch; i++) |
272 |
|
|
if (i <= m->g->nsub) |
273 |
|
|
pmatch[i] = m->pmatch[i]; |
274 |
|
|
else { |
275 |
|
|
pmatch[i].rm_so = -1; |
276 |
|
|
pmatch[i].rm_eo = -1; |
277 |
|
|
} |
278 |
|
|
} |
279 |
|
|
|
280 |
|
|
free(m->pmatch); |
281 |
|
|
free(m->lastpos); |
282 |
|
|
STATETEARDOWN(m); |
283 |
|
|
return(0); |
284 |
|
|
} |
285 |
|
|
|
286 |
|
|
/* |
287 |
|
|
- dissect - figure out what matched what, no back references |
288 |
|
|
*/ |
289 |
|
|
static char * /* == stop (success) always */ |
290 |
|
|
dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
291 |
|
|
{ |
292 |
|
|
int i; |
293 |
|
|
sopno ss; /* start sop of current subRE */ |
294 |
|
|
sopno es; /* end sop of current subRE */ |
295 |
|
|
char *sp; /* start of string matched by it */ |
296 |
|
|
char *stp; /* string matched by it cannot pass here */ |
297 |
|
|
char *rest; /* start of rest of string */ |
298 |
|
|
char *tail; /* string unmatched by rest of RE */ |
299 |
|
|
sopno ssub; /* start sop of subsubRE */ |
300 |
|
|
sopno esub; /* end sop of subsubRE */ |
301 |
|
|
char *ssp; /* start of string matched by subsubRE */ |
302 |
|
|
char *sep; /* end of string matched by subsubRE */ |
303 |
|
|
char *oldssp; /* previous ssp */ |
304 |
|
|
char *dp; |
305 |
|
|
|
306 |
|
|
AT("diss", start, stop, startst, stopst); |
307 |
|
|
sp = start; |
308 |
|
|
for (ss = startst; ss < stopst; ss = es) { |
309 |
|
|
/* identify end of subRE */ |
310 |
|
|
es = ss; |
311 |
|
|
switch (OP(m->g->strip[es])) { |
312 |
|
|
case OPLUS_: |
313 |
|
|
case OQUEST_: |
314 |
|
|
es += OPND(m->g->strip[es]); |
315 |
|
|
break; |
316 |
|
|
case OCH_: |
317 |
|
|
while (OP(m->g->strip[es]) != O_CH) |
318 |
|
|
es += OPND(m->g->strip[es]); |
319 |
|
|
break; |
320 |
|
|
} |
321 |
|
|
es++; |
322 |
|
|
|
323 |
|
|
/* figure out what it matched */ |
324 |
|
|
switch (OP(m->g->strip[ss])) { |
325 |
|
|
case OEND: |
326 |
|
|
assert(nope); |
327 |
|
|
break; |
328 |
|
|
case OCHAR: |
329 |
|
|
sp++; |
330 |
|
|
break; |
331 |
|
|
case OBOL: |
332 |
|
|
case OEOL: |
333 |
|
|
case OBOW: |
334 |
|
|
case OEOW: |
335 |
|
|
break; |
336 |
|
|
case OANY: |
337 |
|
|
case OANYOF: |
338 |
|
|
sp++; |
339 |
|
|
break; |
340 |
|
|
case OBACK_: |
341 |
|
|
case O_BACK: |
342 |
|
|
assert(nope); |
343 |
|
|
break; |
344 |
|
|
/* cases where length of match is hard to find */ |
345 |
|
|
case OQUEST_: |
346 |
|
|
stp = stop; |
347 |
|
|
for (;;) { |
348 |
|
|
/* how long could this one be? */ |
349 |
|
|
rest = slow(m, sp, stp, ss, es); |
350 |
|
|
assert(rest != NULL); /* it did match */ |
351 |
|
|
/* could the rest match the rest? */ |
352 |
|
|
tail = slow(m, rest, stop, es, stopst); |
353 |
|
|
if (tail == stop) |
354 |
|
|
break; /* yes! */ |
355 |
|
|
/* no -- try a shorter match for this one */ |
356 |
|
|
stp = rest - 1; |
357 |
|
|
assert(stp >= sp); /* it did work */ |
358 |
|
|
} |
359 |
|
|
ssub = ss + 1; |
360 |
|
|
esub = es - 1; |
361 |
|
|
/* did innards match? */ |
362 |
|
|
if (slow(m, sp, rest, ssub, esub) != NULL) { |
363 |
|
|
dp = dissect(m, sp, rest, ssub, esub); |
364 |
|
|
assert(dp == rest); |
365 |
|
|
} else /* no */ |
366 |
|
|
assert(sp == rest); |
367 |
|
|
sp = rest; |
368 |
|
|
break; |
369 |
|
|
case OPLUS_: |
370 |
|
|
stp = stop; |
371 |
|
|
for (;;) { |
372 |
|
|
/* how long could this one be? */ |
373 |
|
|
rest = slow(m, sp, stp, ss, es); |
374 |
|
|
assert(rest != NULL); /* it did match */ |
375 |
|
|
/* could the rest match the rest? */ |
376 |
|
|
tail = slow(m, rest, stop, es, stopst); |
377 |
|
|
if (tail == stop) |
378 |
|
|
break; /* yes! */ |
379 |
|
|
/* no -- try a shorter match for this one */ |
380 |
|
|
stp = rest - 1; |
381 |
|
|
assert(stp >= sp); /* it did work */ |
382 |
|
|
} |
383 |
|
|
ssub = ss + 1; |
384 |
|
|
esub = es - 1; |
385 |
|
|
ssp = sp; |
386 |
|
|
oldssp = ssp; |
387 |
|
|
for (;;) { /* find last match of innards */ |
388 |
|
|
sep = slow(m, ssp, rest, ssub, esub); |
389 |
|
|
if (sep == NULL || sep == ssp) |
390 |
|
|
break; /* failed or matched null */ |
391 |
|
|
oldssp = ssp; /* on to next try */ |
392 |
|
|
ssp = sep; |
393 |
|
|
} |
394 |
|
|
if (sep == NULL) { |
395 |
|
|
/* last successful match */ |
396 |
|
|
sep = ssp; |
397 |
|
|
ssp = oldssp; |
398 |
|
|
} |
399 |
|
|
assert(sep == rest); /* must exhaust substring */ |
400 |
|
|
assert(slow(m, ssp, sep, ssub, esub) == rest); |
401 |
|
|
dp = dissect(m, ssp, sep, ssub, esub); |
402 |
|
|
assert(dp == sep); |
403 |
|
|
sp = rest; |
404 |
|
|
break; |
405 |
|
|
case OCH_: |
406 |
|
|
stp = stop; |
407 |
|
|
for (;;) { |
408 |
|
|
/* how long could this one be? */ |
409 |
|
|
rest = slow(m, sp, stp, ss, es); |
410 |
|
|
assert(rest != NULL); /* it did match */ |
411 |
|
|
/* could the rest match the rest? */ |
412 |
|
|
tail = slow(m, rest, stop, es, stopst); |
413 |
|
|
if (tail == stop) |
414 |
|
|
break; /* yes! */ |
415 |
|
|
/* no -- try a shorter match for this one */ |
416 |
|
|
stp = rest - 1; |
417 |
|
|
assert(stp >= sp); /* it did work */ |
418 |
|
|
} |
419 |
|
|
ssub = ss + 1; |
420 |
|
|
esub = ss + OPND(m->g->strip[ss]) - 1; |
421 |
|
|
assert(OP(m->g->strip[esub]) == OOR1); |
422 |
|
|
for (;;) { /* find first matching branch */ |
423 |
|
|
if (slow(m, sp, rest, ssub, esub) == rest) |
424 |
|
|
break; /* it matched all of it */ |
425 |
|
|
/* that one missed, try next one */ |
426 |
|
|
assert(OP(m->g->strip[esub]) == OOR1); |
427 |
|
|
esub++; |
428 |
|
|
assert(OP(m->g->strip[esub]) == OOR2); |
429 |
|
|
ssub = esub + 1; |
430 |
|
|
esub += OPND(m->g->strip[esub]); |
431 |
|
|
if (OP(m->g->strip[esub]) == OOR2) |
432 |
|
|
esub--; |
433 |
|
|
else |
434 |
|
|
assert(OP(m->g->strip[esub]) == O_CH); |
435 |
|
|
} |
436 |
|
|
dp = dissect(m, sp, rest, ssub, esub); |
437 |
|
|
assert(dp == rest); |
438 |
|
|
sp = rest; |
439 |
|
|
break; |
440 |
|
|
case O_PLUS: |
441 |
|
|
case O_QUEST: |
442 |
|
|
case OOR1: |
443 |
|
|
case OOR2: |
444 |
|
|
case O_CH: |
445 |
|
|
assert(nope); |
446 |
|
|
break; |
447 |
|
|
case OLPAREN: |
448 |
|
|
i = OPND(m->g->strip[ss]); |
449 |
|
|
assert(0 < i && i <= m->g->nsub); |
450 |
|
|
m->pmatch[i].rm_so = sp - m->offp; |
451 |
|
|
break; |
452 |
|
|
case ORPAREN: |
453 |
|
|
i = OPND(m->g->strip[ss]); |
454 |
|
|
assert(0 < i && i <= m->g->nsub); |
455 |
|
|
m->pmatch[i].rm_eo = sp - m->offp; |
456 |
|
|
break; |
457 |
|
|
default: /* uh oh */ |
458 |
|
|
assert(nope); |
459 |
|
|
break; |
460 |
|
|
} |
461 |
|
|
} |
462 |
|
|
|
463 |
|
|
assert(sp == stop); |
464 |
|
|
return(sp); |
465 |
|
|
} |
466 |
|
|
|
467 |
|
|
/* |
468 |
|
|
- backref - figure out what matched what, figuring in back references |
469 |
|
|
*/ |
470 |
|
|
static char * /* == stop (success) or NULL (failure) */ |
471 |
|
|
backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, |
472 |
|
|
sopno lev, int rec) /* PLUS nesting level */ |
473 |
|
|
{ |
474 |
|
|
int i; |
475 |
|
|
sopno ss; /* start sop of current subRE */ |
476 |
|
|
char *sp; /* start of string matched by it */ |
477 |
|
|
sopno ssub; /* start sop of subsubRE */ |
478 |
|
|
sopno esub; /* end sop of subsubRE */ |
479 |
|
|
char *ssp; /* start of string matched by subsubRE */ |
480 |
|
|
char *dp; |
481 |
|
|
size_t len; |
482 |
|
|
int hard; |
483 |
|
|
sop s; |
484 |
|
|
regoff_t offsave; |
485 |
|
|
cset *cs; |
486 |
|
|
|
487 |
|
|
AT("back", start, stop, startst, stopst); |
488 |
|
|
sp = start; |
489 |
|
|
|
490 |
|
|
/* get as far as we can with easy stuff */ |
491 |
|
|
hard = 0; |
492 |
|
|
for (ss = startst; !hard && ss < stopst; ss++) |
493 |
|
|
switch (OP(s = m->g->strip[ss])) { |
494 |
|
|
case OCHAR: |
495 |
|
|
if (sp == stop || *sp++ != (char)OPND(s)) |
496 |
|
|
return(NULL); |
497 |
|
|
break; |
498 |
|
|
case OANY: |
499 |
|
|
if (sp == stop) |
500 |
|
|
return(NULL); |
501 |
|
|
sp++; |
502 |
|
|
break; |
503 |
|
|
case OANYOF: |
504 |
|
|
cs = &m->g->sets[OPND(s)]; |
505 |
|
|
if (sp == stop || !CHIN(cs, *sp++)) |
506 |
|
|
return(NULL); |
507 |
|
|
break; |
508 |
|
|
case OBOL: |
509 |
|
|
if ((sp == m->beginp && !(m->eflags®_NOTBOL)) || |
510 |
|
|
(sp > m->offp && sp < m->endp && |
511 |
|
|
*(sp-1) == '\n' && (m->g->cflags®_NEWLINE))) |
512 |
|
|
{ /* yes */ } |
513 |
|
|
else |
514 |
|
|
return(NULL); |
515 |
|
|
break; |
516 |
|
|
case OEOL: |
517 |
|
|
if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
518 |
|
|
(sp < m->endp && *sp == '\n' && |
519 |
|
|
(m->g->cflags®_NEWLINE)) ) |
520 |
|
|
{ /* yes */ } |
521 |
|
|
else |
522 |
|
|
return(NULL); |
523 |
|
|
break; |
524 |
|
|
case OBOW: |
525 |
|
|
if (sp < m->endp && ISWORD(*sp) && |
526 |
|
|
((sp == m->beginp && !(m->eflags®_NOTBOL)) || |
527 |
|
|
(sp > m->offp && !ISWORD(*(sp-1))))) |
528 |
|
|
{ /* yes */ } |
529 |
|
|
else |
530 |
|
|
return(NULL); |
531 |
|
|
break; |
532 |
|
|
case OEOW: |
533 |
|
|
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
534 |
|
|
(sp < m->endp && *sp == '\n' && |
535 |
|
|
(m->g->cflags®_NEWLINE)) || |
536 |
|
|
(sp < m->endp && !ISWORD(*sp)) ) && |
537 |
|
|
(sp > m->beginp && ISWORD(*(sp-1))) ) |
538 |
|
|
{ /* yes */ } |
539 |
|
|
else |
540 |
|
|
return(NULL); |
541 |
|
|
break; |
542 |
|
|
case O_QUEST: |
543 |
|
|
break; |
544 |
|
|
case OOR1: /* matches null but needs to skip */ |
545 |
|
|
ss++; |
546 |
|
|
s = m->g->strip[ss]; |
547 |
|
|
do { |
548 |
|
|
assert(OP(s) == OOR2); |
549 |
|
|
ss += OPND(s); |
550 |
|
|
} while (OP(s = m->g->strip[ss]) != O_CH); |
551 |
|
|
/* note that the ss++ gets us past the O_CH */ |
552 |
|
|
break; |
553 |
|
|
default: /* have to make a choice */ |
554 |
|
|
hard = 1; |
555 |
|
|
break; |
556 |
|
|
} |
557 |
|
|
if (!hard) { /* that was it! */ |
558 |
|
|
if (sp != stop) |
559 |
|
|
return(NULL); |
560 |
|
|
return(sp); |
561 |
|
|
} |
562 |
|
|
ss--; /* adjust for the for's final increment */ |
563 |
|
|
|
564 |
|
|
/* the hard stuff */ |
565 |
|
|
AT("hard", sp, stop, ss, stopst); |
566 |
|
|
s = m->g->strip[ss]; |
567 |
|
|
switch (OP(s)) { |
568 |
|
|
case OBACK_: /* the vilest depths */ |
569 |
|
|
i = OPND(s); |
570 |
|
|
assert(0 < i && i <= m->g->nsub); |
571 |
|
|
if (m->pmatch[i].rm_eo == -1) |
572 |
|
|
return(NULL); |
573 |
|
|
assert(m->pmatch[i].rm_so != -1); |
574 |
|
|
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
575 |
|
|
if (len == 0 && rec++ > MAX_RECURSION) |
576 |
|
|
return(NULL); |
577 |
|
|
assert(stop - m->beginp >= len); |
578 |
|
|
if (sp > stop - len) |
579 |
|
|
return(NULL); /* not enough left to match */ |
580 |
|
|
ssp = m->offp + m->pmatch[i].rm_so; |
581 |
|
|
if (memcmp(sp, ssp, len) != 0) |
582 |
|
|
return(NULL); |
583 |
|
|
while (m->g->strip[ss] != SOP(O_BACK, i)) |
584 |
|
|
ss++; |
585 |
|
|
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
586 |
|
|
break; |
587 |
|
|
case OQUEST_: /* to null or not */ |
588 |
|
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
589 |
|
|
if (dp != NULL) |
590 |
|
|
return(dp); /* not */ |
591 |
|
|
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); |
592 |
|
|
break; |
593 |
|
|
case OPLUS_: |
594 |
|
|
assert(m->lastpos != NULL); |
595 |
|
|
assert(lev+1 <= m->g->nplus); |
596 |
|
|
m->lastpos[lev+1] = sp; |
597 |
|
|
return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
598 |
|
|
break; |
599 |
|
|
case O_PLUS: |
600 |
|
|
if (sp == m->lastpos[lev]) /* last pass matched null */ |
601 |
|
|
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
602 |
|
|
/* try another pass */ |
603 |
|
|
m->lastpos[lev] = sp; |
604 |
|
|
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); |
605 |
|
|
if (dp == NULL) |
606 |
|
|
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
607 |
|
|
else |
608 |
|
|
return(dp); |
609 |
|
|
break; |
610 |
|
|
case OCH_: /* find the right one, if any */ |
611 |
|
|
ssub = ss + 1; |
612 |
|
|
esub = ss + OPND(s) - 1; |
613 |
|
|
assert(OP(m->g->strip[esub]) == OOR1); |
614 |
|
|
for (;;) { /* find first matching branch */ |
615 |
|
|
dp = backref(m, sp, stop, ssub, esub, lev, rec); |
616 |
|
|
if (dp != NULL) |
617 |
|
|
return(dp); |
618 |
|
|
/* that one missed, try next one */ |
619 |
|
|
if (OP(m->g->strip[esub]) == O_CH) |
620 |
|
|
return(NULL); /* there is none */ |
621 |
|
|
esub++; |
622 |
|
|
assert(OP(m->g->strip[esub]) == OOR2); |
623 |
|
|
ssub = esub + 1; |
624 |
|
|
esub += OPND(m->g->strip[esub]); |
625 |
|
|
if (OP(m->g->strip[esub]) == OOR2) |
626 |
|
|
esub--; |
627 |
|
|
else |
628 |
|
|
assert(OP(m->g->strip[esub]) == O_CH); |
629 |
|
|
} |
630 |
|
|
break; |
631 |
|
|
case OLPAREN: /* must undo assignment if rest fails */ |
632 |
|
|
i = OPND(s); |
633 |
|
|
assert(0 < i && i <= m->g->nsub); |
634 |
|
|
offsave = m->pmatch[i].rm_so; |
635 |
|
|
m->pmatch[i].rm_so = sp - m->offp; |
636 |
|
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
637 |
|
|
if (dp != NULL) |
638 |
|
|
return(dp); |
639 |
|
|
m->pmatch[i].rm_so = offsave; |
640 |
|
|
return(NULL); |
641 |
|
|
break; |
642 |
|
|
case ORPAREN: /* must undo assignment if rest fails */ |
643 |
|
|
i = OPND(s); |
644 |
|
|
assert(0 < i && i <= m->g->nsub); |
645 |
|
|
offsave = m->pmatch[i].rm_eo; |
646 |
|
|
m->pmatch[i].rm_eo = sp - m->offp; |
647 |
|
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
648 |
|
|
if (dp != NULL) |
649 |
|
|
return(dp); |
650 |
|
|
m->pmatch[i].rm_eo = offsave; |
651 |
|
|
return(NULL); |
652 |
|
|
break; |
653 |
|
|
default: /* uh oh */ |
654 |
|
|
assert(nope); |
655 |
|
|
break; |
656 |
|
|
} |
657 |
|
|
|
658 |
|
|
/* "can't happen" */ |
659 |
|
|
assert(nope); |
660 |
|
|
/* NOTREACHED */ |
661 |
|
|
return NULL; |
662 |
|
|
} |
663 |
|
|
|
664 |
|
|
/* |
665 |
|
|
- fast - step through the string at top speed |
666 |
|
|
*/ |
667 |
|
|
static char * /* where tentative match ended, or NULL */ |
668 |
|
|
fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
669 |
|
|
{ |
670 |
|
|
states st = m->st; |
671 |
|
|
states fresh = m->fresh; |
672 |
|
|
states tmp = m->tmp; |
673 |
|
|
char *p = start; |
674 |
|
|
int c; |
675 |
|
|
int lastc; /* previous c */ |
676 |
|
|
int flagch; |
677 |
|
|
int i; |
678 |
|
|
char *coldp; /* last p after which no match was underway */ |
679 |
|
|
|
680 |
|
|
if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) |
681 |
|
|
c = OUT; |
682 |
|
|
else |
683 |
|
|
c = *(start-1); |
684 |
|
|
|
685 |
|
|
CLEAR(st); |
686 |
|
|
SET1(st, startst); |
687 |
|
|
st = step(m->g, startst, stopst, st, NOTHING, st); |
688 |
|
|
ASSIGN(fresh, st); |
689 |
|
|
SP("start", st, *p); |
690 |
|
|
coldp = NULL; |
691 |
|
|
for (;;) { |
692 |
|
|
/* next character */ |
693 |
|
|
lastc = c; |
694 |
|
|
c = (p == m->endp) ? OUT : *p; |
695 |
|
|
if (EQ(st, fresh)) |
696 |
|
|
coldp = p; |
697 |
|
|
|
698 |
|
|
/* is there an EOL and/or BOL between lastc and c? */ |
699 |
|
|
flagch = '\0'; |
700 |
|
|
i = 0; |
701 |
|
|
if ((lastc == '\n' && m->g->cflags®_NEWLINE) || |
702 |
|
|
(lastc == OUT && !(m->eflags®_NOTBOL))) { |
703 |
|
|
flagch = BOL; |
704 |
|
|
i = m->g->nbol; |
705 |
|
|
} |
706 |
|
|
if ((c == '\n' && m->g->cflags®_NEWLINE) || |
707 |
|
|
(c == OUT && !(m->eflags®_NOTEOL)) ) { |
708 |
|
|
flagch = (flagch == BOL) ? BOLEOL : EOL; |
709 |
|
|
i += m->g->neol; |
710 |
|
|
} |
711 |
|
|
if (i != 0) { |
712 |
|
|
for (; i > 0; i--) |
713 |
|
|
st = step(m->g, startst, stopst, |
714 |
|
|
st, flagch, st); |
715 |
|
|
SP("boleol", st, c); |
716 |
|
|
} |
717 |
|
|
|
718 |
|
|
/* how about a word boundary? */ |
719 |
|
|
if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
720 |
|
|
(c != OUT && ISWORD(c))) |
721 |
|
|
flagch = BOW; |
722 |
|
|
if ((lastc != OUT && ISWORD(lastc)) && |
723 |
|
|
(flagch == EOL || (c != OUT && !ISWORD(c)))) |
724 |
|
|
flagch = EOW; |
725 |
|
|
if (flagch == BOW || flagch == EOW) { |
726 |
|
|
st = step(m->g, startst, stopst, st, flagch, st); |
727 |
|
|
SP("boweow", st, c); |
728 |
|
|
} |
729 |
|
|
|
730 |
|
|
/* are we done? */ |
731 |
|
|
if (ISSET(st, stopst) || p == stop) |
732 |
|
|
break; /* NOTE BREAK OUT */ |
733 |
|
|
|
734 |
|
|
/* no, we must deal with this character */ |
735 |
|
|
ASSIGN(tmp, st); |
736 |
|
|
ASSIGN(st, fresh); |
737 |
|
|
assert(c != OUT); |
738 |
|
|
st = step(m->g, startst, stopst, tmp, c, st); |
739 |
|
|
SP("aft", st, c); |
740 |
|
|
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
741 |
|
|
p++; |
742 |
|
|
} |
743 |
|
|
|
744 |
|
|
assert(coldp != NULL); |
745 |
|
|
m->coldp = coldp; |
746 |
|
|
if (ISSET(st, stopst)) |
747 |
|
|
return(p+1); |
748 |
|
|
else |
749 |
|
|
return(NULL); |
750 |
|
|
} |
751 |
|
|
|
752 |
|
|
/* |
753 |
|
|
- slow - step through the string more deliberately |
754 |
|
|
*/ |
755 |
|
|
static char * /* where it ended */ |
756 |
|
|
slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
757 |
|
|
{ |
758 |
|
|
states st = m->st; |
759 |
|
|
states empty = m->empty; |
760 |
|
|
states tmp = m->tmp; |
761 |
|
|
char *p = start; |
762 |
|
|
int c; |
763 |
|
|
int lastc; /* previous c */ |
764 |
|
|
int flagch; |
765 |
|
|
int i; |
766 |
|
|
char *matchp; /* last p at which a match ended */ |
767 |
|
|
|
768 |
|
|
if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) |
769 |
|
|
c = OUT; |
770 |
|
|
else |
771 |
|
|
c = *(start-1); |
772 |
|
|
|
773 |
|
|
AT("slow", start, stop, startst, stopst); |
774 |
|
|
CLEAR(st); |
775 |
|
|
SET1(st, startst); |
776 |
|
|
SP("sstart", st, *p); |
777 |
|
|
st = step(m->g, startst, stopst, st, NOTHING, st); |
778 |
|
|
matchp = NULL; |
779 |
|
|
for (;;) { |
780 |
|
|
/* next character */ |
781 |
|
|
lastc = c; |
782 |
|
|
c = (p == m->endp) ? OUT : *p; |
783 |
|
|
|
784 |
|
|
/* is there an EOL and/or BOL between lastc and c? */ |
785 |
|
|
flagch = '\0'; |
786 |
|
|
i = 0; |
787 |
|
|
if ((lastc == '\n' && m->g->cflags®_NEWLINE) || |
788 |
|
|
(lastc == OUT && !(m->eflags®_NOTBOL))) { |
789 |
|
|
flagch = BOL; |
790 |
|
|
i = m->g->nbol; |
791 |
|
|
} |
792 |
|
|
if ((c == '\n' && m->g->cflags®_NEWLINE) || |
793 |
|
|
(c == OUT && !(m->eflags®_NOTEOL))) { |
794 |
|
|
flagch = (flagch == BOL) ? BOLEOL : EOL; |
795 |
|
|
i += m->g->neol; |
796 |
|
|
} |
797 |
|
|
if (i != 0) { |
798 |
|
|
for (; i > 0; i--) |
799 |
|
|
st = step(m->g, startst, stopst, |
800 |
|
|
st, flagch, st); |
801 |
|
|
SP("sboleol", st, c); |
802 |
|
|
} |
803 |
|
|
|
804 |
|
|
/* how about a word boundary? */ |
805 |
|
|
if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
806 |
|
|
(c != OUT && ISWORD(c))) |
807 |
|
|
flagch = BOW; |
808 |
|
|
if ((lastc != OUT && ISWORD(lastc)) && |
809 |
|
|
(flagch == EOL || (c != OUT && !ISWORD(c)))) |
810 |
|
|
flagch = EOW; |
811 |
|
|
if (flagch == BOW || flagch == EOW) { |
812 |
|
|
st = step(m->g, startst, stopst, st, flagch, st); |
813 |
|
|
SP("sboweow", st, c); |
814 |
|
|
} |
815 |
|
|
|
816 |
|
|
/* are we done? */ |
817 |
|
|
if (ISSET(st, stopst)) |
818 |
|
|
matchp = p; |
819 |
|
|
if (EQ(st, empty) || p == stop) |
820 |
|
|
break; /* NOTE BREAK OUT */ |
821 |
|
|
|
822 |
|
|
/* no, we must deal with this character */ |
823 |
|
|
ASSIGN(tmp, st); |
824 |
|
|
ASSIGN(st, empty); |
825 |
|
|
assert(c != OUT); |
826 |
|
|
st = step(m->g, startst, stopst, tmp, c, st); |
827 |
|
|
SP("saft", st, c); |
828 |
|
|
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
829 |
|
|
p++; |
830 |
|
|
} |
831 |
|
|
|
832 |
|
|
return(matchp); |
833 |
|
|
} |
834 |
|
|
|
835 |
|
|
|
836 |
|
|
/* |
837 |
|
|
- step - map set of states reachable before char to set reachable after |
838 |
|
|
*/ |
839 |
|
|
static states |
840 |
|
|
step(struct re_guts *g, |
841 |
|
|
sopno start, /* start state within strip */ |
842 |
|
|
sopno stop, /* state after stop state within strip */ |
843 |
|
|
states bef, /* states reachable before */ |
844 |
|
|
int ch, /* character or NONCHAR code */ |
845 |
|
|
states aft) /* states already known reachable after */ |
846 |
|
|
{ |
847 |
|
|
cset *cs; |
848 |
|
|
sop s; |
849 |
|
|
sopno pc; |
850 |
|
|
onestate here; /* note, macros know this name */ |
851 |
|
|
sopno look; |
852 |
|
|
int i; |
853 |
|
|
|
854 |
|
|
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { |
855 |
|
|
s = g->strip[pc]; |
856 |
|
|
switch (OP(s)) { |
857 |
|
|
case OEND: |
858 |
|
|
assert(pc == stop-1); |
859 |
|
|
break; |
860 |
|
|
case OCHAR: |
861 |
|
|
/* only characters can match */ |
862 |
|
|
assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
863 |
|
|
if (ch == (char)OPND(s)) |
864 |
|
|
FWD(aft, bef, 1); |
865 |
|
|
break; |
866 |
|
|
case OBOL: |
867 |
|
|
if (ch == BOL || ch == BOLEOL) |
868 |
|
|
FWD(aft, bef, 1); |
869 |
|
|
break; |
870 |
|
|
case OEOL: |
871 |
|
|
if (ch == EOL || ch == BOLEOL) |
872 |
|
|
FWD(aft, bef, 1); |
873 |
|
|
break; |
874 |
|
|
case OBOW: |
875 |
|
|
if (ch == BOW) |
876 |
|
|
FWD(aft, bef, 1); |
877 |
|
|
break; |
878 |
|
|
case OEOW: |
879 |
|
|
if (ch == EOW) |
880 |
|
|
FWD(aft, bef, 1); |
881 |
|
|
break; |
882 |
|
|
case OANY: |
883 |
|
|
if (!NONCHAR(ch)) |
884 |
|
|
FWD(aft, bef, 1); |
885 |
|
|
break; |
886 |
|
|
case OANYOF: |
887 |
|
|
cs = &g->sets[OPND(s)]; |
888 |
|
|
if (!NONCHAR(ch) && CHIN(cs, ch)) |
889 |
|
|
FWD(aft, bef, 1); |
890 |
|
|
break; |
891 |
|
|
case OBACK_: /* ignored here */ |
892 |
|
|
case O_BACK: |
893 |
|
|
FWD(aft, aft, 1); |
894 |
|
|
break; |
895 |
|
|
case OPLUS_: /* forward, this is just an empty */ |
896 |
|
|
FWD(aft, aft, 1); |
897 |
|
|
break; |
898 |
|
|
case O_PLUS: /* both forward and back */ |
899 |
|
|
FWD(aft, aft, 1); |
900 |
|
|
i = ISSETBACK(aft, OPND(s)); |
901 |
|
|
BACK(aft, aft, OPND(s)); |
902 |
|
|
if (!i && ISSETBACK(aft, OPND(s))) { |
903 |
|
|
/* oho, must reconsider loop body */ |
904 |
|
|
pc -= OPND(s) + 1; |
905 |
|
|
INIT(here, pc); |
906 |
|
|
} |
907 |
|
|
break; |
908 |
|
|
case OQUEST_: /* two branches, both forward */ |
909 |
|
|
FWD(aft, aft, 1); |
910 |
|
|
FWD(aft, aft, OPND(s)); |
911 |
|
|
break; |
912 |
|
|
case O_QUEST: /* just an empty */ |
913 |
|
|
FWD(aft, aft, 1); |
914 |
|
|
break; |
915 |
|
|
case OLPAREN: /* not significant here */ |
916 |
|
|
case ORPAREN: |
917 |
|
|
FWD(aft, aft, 1); |
918 |
|
|
break; |
919 |
|
|
case OCH_: /* mark the first two branches */ |
920 |
|
|
FWD(aft, aft, 1); |
921 |
|
|
assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
922 |
|
|
FWD(aft, aft, OPND(s)); |
923 |
|
|
break; |
924 |
|
|
case OOR1: /* done a branch, find the O_CH */ |
925 |
|
|
if (ISSTATEIN(aft, here)) { |
926 |
|
|
for (look = 1; |
927 |
|
|
OP(s = g->strip[pc+look]) != O_CH; |
928 |
|
|
look += OPND(s)) |
929 |
|
|
assert(OP(s) == OOR2); |
930 |
|
|
FWD(aft, aft, look); |
931 |
|
|
} |
932 |
|
|
break; |
933 |
|
|
case OOR2: /* propagate OCH_'s marking */ |
934 |
|
|
FWD(aft, aft, 1); |
935 |
|
|
if (OP(g->strip[pc+OPND(s)]) != O_CH) { |
936 |
|
|
assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
937 |
|
|
FWD(aft, aft, OPND(s)); |
938 |
|
|
} |
939 |
|
|
break; |
940 |
|
|
case O_CH: /* just empty */ |
941 |
|
|
FWD(aft, aft, 1); |
942 |
|
|
break; |
943 |
|
|
default: /* ooooops... */ |
944 |
|
|
assert(nope); |
945 |
|
|
break; |
946 |
|
|
} |
947 |
|
|
} |
948 |
|
|
|
949 |
|
|
return(aft); |
950 |
|
|
} |
951 |
|
|
|
952 |
|
|
#ifdef REDEBUG |
953 |
|
|
/* |
954 |
|
|
- print - print a set of states |
955 |
|
|
*/ |
956 |
|
|
static void |
957 |
|
|
print(struct match *m, char *caption, states st, int ch, FILE *d) |
958 |
|
|
{ |
959 |
|
|
struct re_guts *g = m->g; |
960 |
|
|
int i; |
961 |
|
|
int first = 1; |
962 |
|
|
|
963 |
|
|
if (!(m->eflags®_TRACE)) |
964 |
|
|
return; |
965 |
|
|
|
966 |
|
|
(void)fprintf(d, "%s", caption); |
967 |
|
|
(void)fprintf(d, " %s", pchar(ch)); |
968 |
|
|
for (i = 0; i < g->nstates; i++) { |
969 |
|
|
if (ISSET(st, i)) { |
970 |
|
|
(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
971 |
|
|
first = 0; |
972 |
|
|
} |
973 |
|
|
} |
974 |
|
|
(void)fprintf(d, "\n"); |
975 |
|
|
} |
976 |
|
|
|
977 |
|
|
/* |
978 |
|
|
- at - print current situation |
979 |
|
|
*/ |
980 |
|
|
static void |
981 |
|
|
at(struct match *m, char *title, char *start, char *stop, sopno startst, |
982 |
|
|
sopno stopst) |
983 |
|
|
{ |
984 |
|
|
if (!(m->eflags®_TRACE)) |
985 |
|
|
return; |
986 |
|
|
|
987 |
|
|
(void)printf("%s %s-", title, pchar(*start)); |
988 |
|
|
(void)printf("%s ", pchar(*stop)); |
989 |
|
|
(void)printf("%ld-%ld\n", (long)startst, (long)stopst); |
990 |
|
|
} |
991 |
|
|
|
992 |
|
|
#ifndef PCHARDONE |
993 |
|
|
#define PCHARDONE /* never again */ |
994 |
|
|
static const char *nonchars[] = |
995 |
|
|
{ "OUT", "BOL", "EOL", "BOLEOL", "NOTHING", "BOW", "EOW" }; |
996 |
|
|
#define PNONCHAR(c) \ |
997 |
|
|
((c) - OUT < (sizeof(nonchars)/sizeof(nonchars[0])) \ |
998 |
|
|
? nonchars[(c) - OUT] : "invalid") |
999 |
|
|
|
1000 |
|
|
/* |
1001 |
|
|
- pchar - make a character printable |
1002 |
|
|
* |
1003 |
|
|
* Is this identical to regchar() over in debug.c? Well, yes. But a |
1004 |
|
|
* duplicate here avoids having a debugging-capable regexec.o tied to |
1005 |
|
|
* a matching debug.o, and this is convenient. It all disappears in |
1006 |
|
|
* the non-debug compilation anyway, so it doesn't matter much. |
1007 |
|
|
*/ |
1008 |
|
|
static const char * /* -> representation */ |
1009 |
|
|
pchar(int ch) |
1010 |
|
|
{ |
1011 |
|
|
static char pbuf[10]; |
1012 |
|
|
|
1013 |
|
|
if (NONCHAR(ch)) { |
1014 |
|
|
if (ch - OUT < (sizeof(nonchars)/sizeof(nonchars[0]))) |
1015 |
|
|
return nonchars[ch - OUT]; |
1016 |
|
|
return "invalid"; |
1017 |
|
|
} |
1018 |
|
|
if (isprint((unsigned char)ch) || ch == ' ') |
1019 |
|
|
(void)snprintf(pbuf, sizeof pbuf, "%c", ch); |
1020 |
|
|
else |
1021 |
|
|
(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); |
1022 |
|
|
return(pbuf); |
1023 |
|
|
} |
1024 |
|
|
#endif |
1025 |
|
|
#endif |
1026 |
|
|
|
1027 |
|
|
#undef matcher |
1028 |
|
|
#undef fast |
1029 |
|
|
#undef slow |
1030 |
|
|
#undef dissect |
1031 |
|
|
#undef backref |
1032 |
|
|
#undef step |
1033 |
|
|
#undef print |
1034 |
|
|
#undef at |
1035 |
|
|
#undef match |
1036 |
|
|
#undef nope |