GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* $OpenBSD: b.c,v 1.18 2014/12/19 19:28:55 deraadt Exp $ */ |
||
2 |
/**************************************************************** |
||
3 |
Copyright (C) Lucent Technologies 1997 |
||
4 |
All Rights Reserved |
||
5 |
|||
6 |
Permission to use, copy, modify, and distribute this software and |
||
7 |
its documentation for any purpose and without fee is hereby |
||
8 |
granted, provided that the above copyright notice appear in all |
||
9 |
copies and that both that the copyright notice and this |
||
10 |
permission notice and warranty disclaimer appear in supporting |
||
11 |
documentation, and that the name Lucent Technologies or any of |
||
12 |
its entities not be used in advertising or publicity pertaining |
||
13 |
to distribution of the software without specific, written prior |
||
14 |
permission. |
||
15 |
|||
16 |
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
||
17 |
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. |
||
18 |
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY |
||
19 |
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||
20 |
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER |
||
21 |
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
||
22 |
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF |
||
23 |
THIS SOFTWARE. |
||
24 |
****************************************************************/ |
||
25 |
|||
26 |
/* lasciate ogne speranza, voi ch'intrate. */ |
||
27 |
|||
28 |
#define DEBUG |
||
29 |
|||
30 |
#include <ctype.h> |
||
31 |
#include <stdio.h> |
||
32 |
#include <string.h> |
||
33 |
#include <stdlib.h> |
||
34 |
#include "awk.h" |
||
35 |
#include "ytab.h" |
||
36 |
|||
37 |
#define HAT (NCHARS+2) /* matches ^ in regular expr */ |
||
38 |
/* NCHARS is 2**n */ |
||
39 |
#define MAXLIN 22 |
||
40 |
|||
41 |
#define type(v) (v)->nobj /* badly overloaded here */ |
||
42 |
#define info(v) (v)->ntype /* badly overloaded here */ |
||
43 |
#define left(v) (v)->narg[0] |
||
44 |
#define right(v) (v)->narg[1] |
||
45 |
#define parent(v) (v)->nnext |
||
46 |
|||
47 |
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: |
||
48 |
#define ELEAF case EMPTYRE: /* empty string in regexp */ |
||
49 |
#define UNARY case STAR: case PLUS: case QUEST: |
||
50 |
|||
51 |
/* encoding in tree Nodes: |
||
52 |
leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): |
||
53 |
left is index, right contains value or pointer to value |
||
54 |
unary (STAR, PLUS, QUEST): left is child, right is null |
||
55 |
binary (CAT, OR): left and right are children |
||
56 |
parent contains pointer to parent |
||
57 |
*/ |
||
58 |
|||
59 |
|||
60 |
int *setvec; |
||
61 |
int *tmpset; |
||
62 |
int maxsetvec = 0; |
||
63 |
|||
64 |
int rtok; /* next token in current re */ |
||
65 |
int rlxval; |
||
66 |
static uschar *rlxstr; |
||
67 |
static uschar *prestr; /* current position in current re */ |
||
68 |
static uschar *lastre; /* origin of last re */ |
||
69 |
|||
70 |
static int setcnt; |
||
71 |
static int poscnt; |
||
72 |
|||
73 |
char *patbeg; |
||
74 |
int patlen; |
||
75 |
|||
76 |
#define NFA 20 /* cache this many dynamic fa's */ |
||
77 |
fa *fatab[NFA]; |
||
78 |
int nfatab = 0; /* entries in fatab */ |
||
79 |
|||
80 |
fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ |
||
81 |
{ |
||
82 |
int i, use, nuse; |
||
83 |
fa *pfa; |
||
84 |
static int now = 1; |
||
85 |
|||
86 |
✓✓ | 240 |
if (setvec == 0) { /* first time through any RE */ |
87 |
93 |
maxsetvec = MAXLIN; |
|
88 |
93 |
setvec = (int *) calloc(maxsetvec, sizeof(int)); |
|
89 |
93 |
tmpset = (int *) calloc(maxsetvec, sizeof(int)); |
|
90 |
✗✓ | 93 |
if (setvec == 0 || tmpset == 0) |
91 |
overflo("out of space initializing makedfa"); |
||
92 |
} |
||
93 |
|||
94 |
✓✗ | 120 |
if (compile_time) /* a constant for sure */ |
95 |
120 |
return mkdfa(s, anchor); |
|
96 |
for (i = 0; i < nfatab; i++) /* is it there already? */ |
||
97 |
if (fatab[i]->anchor == anchor |
||
98 |
&& strcmp((const char *) fatab[i]->restr, s) == 0) { |
||
99 |
fatab[i]->use = now++; |
||
100 |
return fatab[i]; |
||
101 |
} |
||
102 |
pfa = mkdfa(s, anchor); |
||
103 |
if (nfatab < NFA) { /* room for another */ |
||
104 |
fatab[nfatab] = pfa; |
||
105 |
fatab[nfatab]->use = now++; |
||
106 |
nfatab++; |
||
107 |
return pfa; |
||
108 |
} |
||
109 |
use = fatab[0]->use; /* replace least-recently used */ |
||
110 |
nuse = 0; |
||
111 |
for (i = 1; i < nfatab; i++) |
||
112 |
if (fatab[i]->use < use) { |
||
113 |
use = fatab[i]->use; |
||
114 |
nuse = i; |
||
115 |
} |
||
116 |
freefa(fatab[nuse]); |
||
117 |
fatab[nuse] = pfa; |
||
118 |
pfa->use = now++; |
||
119 |
return pfa; |
||
120 |
120 |
} |
|
121 |
|||
122 |
fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ |
||
123 |
/* anchor = 1 for anchored matches, else 0 */ |
||
124 |
{ |
||
125 |
Node *p, *p1; |
||
126 |
fa *f; |
||
127 |
|||
128 |
240 |
p = reparse(s); |
|
129 |
120 |
p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); |
|
130 |
/* put ALL STAR in front of reg. exp. */ |
||
131 |
120 |
p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); |
|
132 |
/* put FINAL after reg. exp. */ |
||
133 |
|||
134 |
120 |
poscnt = 0; |
|
135 |
120 |
penter(p1); /* enter parent pointers and leaf indices */ |
|
136 |
✗✓ | 120 |
if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) |
137 |
overflo("out of space for fa"); |
||
138 |
120 |
f->accept = poscnt-1; /* penter has computed number of positions in re */ |
|
139 |
120 |
cfoll(f, p1); /* set up follow sets */ |
|
140 |
120 |
freetr(p1); |
|
141 |
✗✓ | 120 |
if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL) |
142 |
overflo("out of space in makedfa"); |
||
143 |
✗✓ | 120 |
if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) |
144 |
overflo("out of space in makedfa"); |
||
145 |
120 |
*f->posns[1] = 0; |
|
146 |
120 |
f->initstat = makeinit(f, anchor); |
|
147 |
120 |
f->anchor = anchor; |
|
148 |
120 |
f->restr = (uschar *) tostring(s); |
|
149 |
120 |
return f; |
|
150 |
} |
||
151 |
|||
152 |
int makeinit(fa *f, int anchor) |
||
153 |
{ |
||
154 |
int i, k; |
||
155 |
|||
156 |
240 |
f->curstat = 2; |
|
157 |
120 |
f->out[2] = 0; |
|
158 |
120 |
f->reset = 0; |
|
159 |
120 |
k = *(f->re[0].lfollow); |
|
160 |
✗✓ | 120 |
xfree(f->posns[2]); |
161 |
✗✓ | 120 |
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) |
162 |
overflo("out of space in makeinit"); |
||
163 |
✓✓ | 964 |
for (i=0; i <= k; i++) { |
164 |
362 |
(f->posns[2])[i] = (f->re[0].lfollow)[i]; |
|
165 |
} |
||
166 |
✗✓ | 120 |
if ((f->posns[2])[1] == f->accept) |
167 |
f->out[2] = 1; |
||
168 |
✓✓ | 62400 |
for (i=0; i < NCHARS; i++) |
169 |
31080 |
f->gototab[2][i] = 0; |
|
170 |
120 |
f->curstat = cgoto(f, 2, HAT); |
|
171 |
✓✓ | 120 |
if (anchor) { |
172 |
6 |
*f->posns[2] = k-1; /* leave out position 0 */ |
|
173 |
✓✓ | 36 |
for (i=0; i < k; i++) { |
174 |
12 |
(f->posns[0])[i] = (f->posns[2])[i]; |
|
175 |
} |
||
176 |
|||
177 |
6 |
f->out[0] = f->out[2]; |
|
178 |
✓✗ | 6 |
if (f->curstat != 2) |
179 |
6 |
--(*f->posns[f->curstat]); |
|
180 |
} |
||
181 |
120 |
return f->curstat; |
|
182 |
} |
||
183 |
|||
184 |
void penter(Node *p) /* set up parent pointers and leaf indices */ |
||
185 |
{ |
||
186 |
✗✗✗✗ ✗✗✓✗ ✗✓✗✓ ✗ |
4112 |
switch (type(p)) { |
187 |
ELEAF |
||
188 |
LEAF |
||
189 |
1024 |
info(p) = poscnt; |
|
190 |
1024 |
poscnt++; |
|
191 |
1024 |
break; |
|
192 |
UNARY |
||
193 |
128 |
penter(left(p)); |
|
194 |
128 |
parent(left(p)) = p; |
|
195 |
128 |
break; |
|
196 |
case CAT: |
||
197 |
case OR: |
||
198 |
904 |
penter(left(p)); |
|
199 |
904 |
penter(right(p)); |
|
200 |
904 |
parent(left(p)) = p; |
|
201 |
904 |
parent(right(p)) = p; |
|
202 |
904 |
break; |
|
203 |
default: /* can't happen */ |
||
204 |
FATAL("can't happen: unknown type %d in penter", type(p)); |
||
205 |
break; |
||
206 |
} |
||
207 |
2056 |
} |
|
208 |
|||
209 |
void freetr(Node *p) /* free parse tree */ |
||
210 |
{ |
||
211 |
✗✗✗✗ ✗✗✓✗ ✗✓✗✓ ✗ |
4112 |
switch (type(p)) { |
212 |
ELEAF |
||
213 |
LEAF |
||
214 |
✓✗ | 2048 |
xfree(p); |
215 |
break; |
||
216 |
UNARY |
||
217 |
128 |
freetr(left(p)); |
|
218 |
✓✗ | 256 |
xfree(p); |
219 |
break; |
||
220 |
case CAT: |
||
221 |
case OR: |
||
222 |
904 |
freetr(left(p)); |
|
223 |
904 |
freetr(right(p)); |
|
224 |
✓✗ | 1808 |
xfree(p); |
225 |
break; |
||
226 |
default: /* can't happen */ |
||
227 |
FATAL("can't happen: unknown type %d in freetr", type(p)); |
||
228 |
break; |
||
229 |
} |
||
230 |
2056 |
} |
|
231 |
|||
232 |
/* in the parsing of regular expressions, metacharacters like . have */ |
||
233 |
/* to be seen literally; \056 is not a metacharacter. */ |
||
234 |
|||
235 |
int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ |
||
236 |
{ /* only pick up one 8-bit byte (2 chars) */ |
||
237 |
uschar *p; |
||
238 |
int n = 0; |
||
239 |
int i; |
||
240 |
|||
241 |
for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { |
||
242 |
if (isdigit(*p)) |
||
243 |
n = 16 * n + *p - '0'; |
||
244 |
else if (*p >= 'a' && *p <= 'f') |
||
245 |
n = 16 * n + *p - 'a' + 10; |
||
246 |
else if (*p >= 'A' && *p <= 'F') |
||
247 |
n = 16 * n + *p - 'A' + 10; |
||
248 |
} |
||
249 |
*pp = (uschar *) p; |
||
250 |
return n; |
||
251 |
} |
||
252 |
|||
253 |
#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ |
||
254 |
|||
255 |
int quoted(uschar **pp) /* pick up next thing after a \\ */ |
||
256 |
/* and increment *pp */ |
||
257 |
{ |
||
258 |
62 |
uschar *p = *pp; |
|
259 |
int c; |
||
260 |
|||
261 |
✓✓ | 31 |
if ((c = *p++) == 't') |
262 |
4 |
c = '\t'; |
|
263 |
✗✓ | 27 |
else if (c == 'n') |
264 |
c = '\n'; |
||
265 |
✗✓ | 27 |
else if (c == 'f') |
266 |
c = '\f'; |
||
267 |
✗✓ | 27 |
else if (c == 'r') |
268 |
c = '\r'; |
||
269 |
✗✓ | 27 |
else if (c == 'b') |
270 |
c = '\b'; |
||
271 |
✗✓ | 27 |
else if (c == '\\') |
272 |
c = '\\'; |
||
273 |
✗✓ | 27 |
else if (c == 'x') { /* hexadecimal goo follows */ |
274 |
c = hexstr(&p); /* this adds a null if number is invalid */ |
||
275 |
✗✓ | 27 |
} else if (isoctdigit(c)) { /* \d \dd \ddd */ |
276 |
int n = c - '0'; |
||
277 |
if (isoctdigit(*p)) { |
||
278 |
n = 8 * n + *p++ - '0'; |
||
279 |
if (isoctdigit(*p)) |
||
280 |
n = 8 * n + *p++ - '0'; |
||
281 |
} |
||
282 |
c = n; |
||
283 |
} /* else */ |
||
284 |
/* c = c; */ |
||
285 |
31 |
*pp = p; |
|
286 |
31 |
return c; |
|
287 |
31 |
} |
|
288 |
|||
289 |
char *cclenter(const char *argp) /* add a character class */ |
||
290 |
{ |
||
291 |
int i, c, c2; |
||
292 |
28 |
uschar *p = (uschar *) argp; |
|
293 |
14 |
uschar *op, *bp; |
|
294 |
static uschar *buf = 0; |
||
295 |
static int bufsz = 100; |
||
296 |
|||
297 |
14 |
op = p; |
|
298 |
✓✓✗✓ |
23 |
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
299 |
FATAL("out of space for character class [%.10s...] 1", p); |
||
300 |
14 |
bp = buf; |
|
301 |
✓✓ | 294 |
for (i = 0; (c = *p++) != 0; ) { |
302 |
✓✓ | 133 |
if (c == '\\') { |
303 |
4 |
c = quoted(&p); |
|
304 |
✗✓✗✗ |
133 |
} else if (c == '-' && i > 0 && bp[-1] != 0) { |
305 |
if (*p != 0) { |
||
306 |
c = bp[-1]; |
||
307 |
c2 = *p++; |
||
308 |
if (c2 == '\\') |
||
309 |
c2 = quoted(&p); |
||
310 |
if (c > c2) { /* empty; ignore */ |
||
311 |
bp--; |
||
312 |
i--; |
||
313 |
continue; |
||
314 |
} |
||
315 |
while (c < c2) { |
||
316 |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) |
||
317 |
FATAL("out of space for character class [%.10s...] 2", p); |
||
318 |
*bp++ = ++c; |
||
319 |
i++; |
||
320 |
} |
||
321 |
continue; |
||
322 |
} |
||
323 |
} |
||
324 |
✗✓ | 133 |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) |
325 |
FATAL("out of space for character class [%.10s...] 3", p); |
||
326 |
133 |
*bp++ = c; |
|
327 |
133 |
i++; |
|
328 |
} |
||
329 |
14 |
*bp = 0; |
|
330 |
✗✓ | 14 |
dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); |
331 |
✓✗ | 28 |
xfree(op); |
332 |
28 |
return (char *) tostring((char *) buf); |
|
333 |
14 |
} |
|
334 |
|||
335 |
void overflo(const char *s) |
||
336 |
{ |
||
337 |
FATAL("regular expression too big: %.30s...", s); |
||
338 |
} |
||
339 |
|||
340 |
void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ |
||
341 |
{ |
||
342 |
int i; |
||
343 |
int *p; |
||
344 |
|||
345 |
✗✗✗✗ ✗✗✓✗ ✗✓✗✓ ✗ |
4112 |
switch (type(v)) { |
346 |
ELEAF |
||
347 |
LEAF |
||
348 |
1024 |
f->re[info(v)].ltype = type(v); |
|
349 |
1024 |
f->re[info(v)].lval.np = right(v); |
|
350 |
✗✓ | 2048 |
while (f->accept >= maxsetvec) { /* guessing here! */ |
351 |
setvec = reallocarray(setvec, maxsetvec, |
||
352 |
4 * sizeof(int)); |
||
353 |
tmpset = reallocarray(tmpset, maxsetvec, |
||
354 |
4 * sizeof(int)); |
||
355 |
if (setvec == 0 || tmpset == 0) |
||
356 |
overflo("out of space in cfoll()"); |
||
357 |
maxsetvec *= 4; |
||
358 |
} |
||
359 |
✓✓ | 22816 |
for (i = 0; i <= f->accept; i++) |
360 |
10384 |
setvec[i] = 0; |
|
361 |
1024 |
setcnt = 0; |
|
362 |
1024 |
follow(v); /* computes setvec and setcnt */ |
|
363 |
✗✓ | 1024 |
if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) |
364 |
overflo("out of space building follow set"); |
||
365 |
1024 |
f->re[info(v)].lfollow = p; |
|
366 |
1024 |
*p = setcnt; |
|
367 |
✓✓ | 22816 |
for (i = f->accept; i >= 0; i--) |
368 |
✓✓ | 10384 |
if (setvec[i] == 1) |
369 |
1043 |
*++p = i; |
|
370 |
break; |
||
371 |
UNARY |
||
372 |
128 |
cfoll(f,left(v)); |
|
373 |
128 |
break; |
|
374 |
case CAT: |
||
375 |
case OR: |
||
376 |
904 |
cfoll(f,left(v)); |
|
377 |
904 |
cfoll(f,right(v)); |
|
378 |
904 |
break; |
|
379 |
default: /* can't happen */ |
||
380 |
FATAL("can't happen: unknown type %d in cfoll", type(v)); |
||
381 |
} |
||
382 |
2056 |
} |
|
383 |
|||
384 |
int first(Node *p) /* collects initially active leaves of p into setvec */ |
||
385 |
/* returns 0 if p matches empty string */ |
||
386 |
{ |
||
387 |
int b, lp; |
||
388 |
|||
389 |
✗✗✗✗ ✗✗✓✓ ✗✓✓✓ ✗ |
3454 |
switch (type(p)) { |
390 |
ELEAF |
||
391 |
LEAF |
||
392 |
1043 |
lp = info(p); /* look for high-water mark of subscripts */ |
|
393 |
✓✗✗✓ |
3129 |
while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ |
394 |
setvec = reallocarray(setvec, maxsetvec, |
||
395 |
4 * sizeof(int)); |
||
396 |
tmpset = reallocarray(tmpset, maxsetvec, |
||
397 |
4 * sizeof(int)); |
||
398 |
if (setvec == 0 || tmpset == 0) |
||
399 |
overflo("out of space in first()"); |
||
400 |
maxsetvec *= 4; |
||
401 |
} |
||
402 |
✗✓ | 1043 |
if (type(p) == EMPTYRE) { |
403 |
setvec[lp] = 0; |
||
404 |
return(0); |
||
405 |
} |
||
406 |
✓✗ | 1043 |
if (setvec[lp] != 1) { |
407 |
1043 |
setvec[lp] = 1; |
|
408 |
1043 |
setcnt++; |
|
409 |
1043 |
} |
|
410 |
✓✓✗✓ |
1055 |
if (type(p) == CCL && (*(char *) right(p)) == '\0') |
411 |
return(0); /* empty CCL */ |
||
412 |
1043 |
else return(1); |
|
413 |
case PLUS: |
||
414 |
✗✓ | 2 |
if (first(left(p)) == 0) return(0); |
415 |
2 |
return(1); |
|
416 |
case STAR: |
||
417 |
case QUEST: |
||
418 |
7 |
first(left(p)); |
|
419 |
7 |
return(0); |
|
420 |
case CAT: |
||
421 |
✓✓✗✓ |
673 |
if (first(left(p)) == 0 && first(right(p)) == 0) return(0); |
422 |
671 |
return(1); |
|
423 |
case OR: |
||
424 |
4 |
b = first(right(p)); |
|
425 |
✗✓ | 4 |
if (first(left(p)) == 0 || b == 0) return(0); |
426 |
4 |
return(1); |
|
427 |
} |
||
428 |
FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ |
||
429 |
return(-1); |
||
430 |
1727 |
} |
|
431 |
|||
432 |
void follow(Node *v) /* collects leaves that can follow v into setvec */ |
||
433 |
{ |
||
434 |
Node *p; |
||
435 |
|||
436 |
✓✓ | 3896 |
if (type(v) == FINAL) |
437 |
120 |
return; |
|
438 |
1828 |
p = parent(v); |
|
439 |
✗✓✗✓ ✓✗ |
1828 |
switch (type(p)) { |
440 |
case STAR: |
||
441 |
case PLUS: |
||
442 |
128 |
first(v); |
|
443 |
128 |
follow(p); |
|
444 |
128 |
return; |
|
445 |
|||
446 |
case OR: |
||
447 |
case QUEST: |
||
448 |
4 |
follow(p); |
|
449 |
4 |
return; |
|
450 |
|||
451 |
case CAT: |
||
452 |
✓✓ | 1696 |
if (v == left(p)) { /* v is left child of p */ |
453 |
✓✓ | 909 |
if (first(right(p)) == 0) { |
454 |
5 |
follow(p); |
|
455 |
5 |
return; |
|
456 |
} |
||
457 |
} else /* v is right child */ |
||
458 |
787 |
follow(p); |
|
459 |
1691 |
return; |
|
460 |
} |
||
461 |
1948 |
} |
|
462 |
|||
463 |
int member(int c, const char *sarg) /* is c in s? */ |
||
464 |
{ |
||
465 |
uschar *s = (uschar *) sarg; |
||
466 |
|||
467 |
✓✓ | 4242 |
while (*s) |
468 |
✓✓ | 3277 |
if (c == *s++) |
469 |
97 |
return(1); |
|
470 |
257 |
return(0); |
|
471 |
354 |
} |
|
472 |
|||
473 |
int match(fa *f, const char *p0) /* shortest match ? */ |
||
474 |
{ |
||
475 |
int s, ns; |
||
476 |
uschar *p = (uschar *) p0; |
||
477 |
|||
478 |
✗✓ | 207668 |
s = f->reset ? makeinit(f,0) : f->initstat; |
479 |
✗✓ | 51917 |
if (f->out[s]) |
480 |
return(1); |
||
481 |
do { |
||
482 |
/* assert(*p < NCHARS); */ |
||
483 |
✓✓ | 1182152 |
if ((ns = f->gototab[s][*p]) != 0) |
484 |
1173602 |
s = ns; |
|
485 |
else |
||
486 |
8550 |
s = cgoto(f, s, *p); |
|
487 |
✓✓ | 1182152 |
if (f->out[s]) |
488 |
11119 |
return(1); |
|
489 |
✓✓ | 1171033 |
} while (*p++ != 0); |
490 |
40798 |
return(0); |
|
491 |
51917 |
} |
|
492 |
|||
493 |
int pmatch(fa *f, const char *p0) /* longest match, for sub */ |
||
494 |
{ |
||
495 |
int s, ns; |
||
496 |
uschar *p = (uschar *) p0; |
||
497 |
uschar *q; |
||
498 |
int i, k; |
||
499 |
|||
500 |
/* s = f->reset ? makeinit(f,1) : f->initstat; */ |
||
501 |
✗✓ | 2424 |
if (f->reset) { |
502 |
f->initstat = s = makeinit(f,1); |
||
503 |
} else { |
||
504 |
1212 |
s = f->initstat; |
|
505 |
} |
||
506 |
1212 |
patbeg = (char *) p; |
|
507 |
1212 |
patlen = -1; |
|
508 |
1212 |
do { |
|
509 |
q = p; |
||
510 |
61554 |
do { |
|
511 |
✓✓ | 61626 |
if (f->out[s]) /* final state */ |
512 |
18 |
patlen = q-p; |
|
513 |
/* assert(*q < NCHARS); */ |
||
514 |
✓✓ | 61626 |
if ((ns = f->gototab[s][*q]) != 0) |
515 |
61248 |
s = ns; |
|
516 |
else |
||
517 |
378 |
s = cgoto(f, s, *q); |
|
518 |
✓✓ | 61626 |
if (s == 1) { /* no transition */ |
519 |
✓✓ | 61554 |
if (patlen >= 0) { |
520 |
18 |
patbeg = (char *) p; |
|
521 |
18 |
return(1); |
|
522 |
} |
||
523 |
else |
||
524 |
goto nextin; /* no match */ |
||
525 |
} |
||
526 |
✓✗ | 72 |
} while (*q++ != 0); |
527 |
if (f->out[s]) |
||
528 |
patlen = q-p-1; /* don't count $ */ |
||
529 |
if (patlen >= 0) { |
||
530 |
patbeg = (char *) p; |
||
531 |
return(1); |
||
532 |
} |
||
533 |
nextin: |
||
534 |
s = 2; |
||
535 |
✗✓ | 61536 |
if (f->reset) { |
536 |
for (i = 2; i <= f->curstat; i++) |
||
537 |
xfree(f->posns[i]); |
||
538 |
k = *f->posns[0]; |
||
539 |
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) |
||
540 |
overflo("out of space in pmatch"); |
||
541 |
for (i = 0; i <= k; i++) |
||
542 |
(f->posns[2])[i] = (f->posns[0])[i]; |
||
543 |
f->initstat = f->curstat = 2; |
||
544 |
f->out[2] = f->out[0]; |
||
545 |
for (i = 0; i < NCHARS; i++) |
||
546 |
f->gototab[2][i] = 0; |
||
547 |
} |
||
548 |
✓✓ | 61536 |
} while (*p++ != 0); |
549 |
1194 |
return (0); |
|
550 |
1212 |
} |
|
551 |
|||
552 |
int nematch(fa *f, const char *p0) /* non-empty match, for sub */ |
||
553 |
{ |
||
554 |
int s, ns; |
||
555 |
uschar *p = (uschar *) p0; |
||
556 |
uschar *q; |
||
557 |
int i, k; |
||
558 |
|||
559 |
/* s = f->reset ? makeinit(f,1) : f->initstat; */ |
||
560 |
if (f->reset) { |
||
561 |
f->initstat = s = makeinit(f,1); |
||
562 |
} else { |
||
563 |
s = f->initstat; |
||
564 |
} |
||
565 |
patlen = -1; |
||
566 |
while (*p) { |
||
567 |
q = p; |
||
568 |
do { |
||
569 |
if (f->out[s]) /* final state */ |
||
570 |
patlen = q-p; |
||
571 |
/* assert(*q < NCHARS); */ |
||
572 |
if ((ns = f->gototab[s][*q]) != 0) |
||
573 |
s = ns; |
||
574 |
else |
||
575 |
s = cgoto(f, s, *q); |
||
576 |
if (s == 1) { /* no transition */ |
||
577 |
if (patlen > 0) { |
||
578 |
patbeg = (char *) p; |
||
579 |
return(1); |
||
580 |
} else |
||
581 |
goto nnextin; /* no nonempty match */ |
||
582 |
} |
||
583 |
} while (*q++ != 0); |
||
584 |
if (f->out[s]) |
||
585 |
patlen = q-p-1; /* don't count $ */ |
||
586 |
if (patlen > 0 ) { |
||
587 |
patbeg = (char *) p; |
||
588 |
return(1); |
||
589 |
} |
||
590 |
nnextin: |
||
591 |
s = 2; |
||
592 |
if (f->reset) { |
||
593 |
for (i = 2; i <= f->curstat; i++) |
||
594 |
xfree(f->posns[i]); |
||
595 |
k = *f->posns[0]; |
||
596 |
if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL) |
||
597 |
overflo("out of state space"); |
||
598 |
for (i = 0; i <= k; i++) |
||
599 |
(f->posns[2])[i] = (f->posns[0])[i]; |
||
600 |
f->initstat = f->curstat = 2; |
||
601 |
f->out[2] = f->out[0]; |
||
602 |
for (i = 0; i < NCHARS; i++) |
||
603 |
f->gototab[2][i] = 0; |
||
604 |
} |
||
605 |
p++; |
||
606 |
} |
||
607 |
return (0); |
||
608 |
} |
||
609 |
|||
610 |
Node *reparse(const char *p) /* parses regular expression pointed to by p */ |
||
611 |
{ /* uses relex() to scan regular expression */ |
||
612 |
Node *np; |
||
613 |
|||
614 |
✗✓ | 240 |
dprintf( ("reparse <%s>\n", p) ); |
615 |
120 |
lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ |
|
616 |
120 |
rtok = relex(); |
|
617 |
/* GNU compatibility: an empty regexp matches anything */ |
||
618 |
✗✓ | 120 |
if (rtok == '\0') { |
619 |
/* FATAL("empty regular expression"); previous */ |
||
620 |
return(op2(EMPTYRE, NIL, NIL)); |
||
621 |
} |
||
622 |
120 |
np = regexp(); |
|
623 |
✗✓ | 120 |
if (rtok != '\0') |
624 |
FATAL("syntax error in regular expression %s at %s", lastre, prestr); |
||
625 |
120 |
return(np); |
|
626 |
120 |
} |
|
627 |
|||
628 |
Node *regexp(void) /* top-level parse of reg expr */ |
||
629 |
{ |
||
630 |
244 |
return (alt(concat(primary()))); |
|
631 |
} |
||
632 |
|||
633 |
Node *primary(void) |
||
634 |
{ |
||
635 |
Node *np; |
||
636 |
|||
637 |
✓✗✗✓ ✓✓✓✓ ✓✗ |
1572 |
switch (rtok) { |
638 |
case CHAR: |
||
639 |
675 |
np = op2(CHAR, NIL, itonp(rlxval)); |
|
640 |
675 |
rtok = relex(); |
|
641 |
675 |
return (unary(np)); |
|
642 |
case ALL: |
||
643 |
rtok = relex(); |
||
644 |
return (unary(op2(ALL, NIL, NIL))); |
||
645 |
case EMPTYRE: |
||
646 |
rtok = relex(); |
||
647 |
return (unary(op2(ALL, NIL, NIL))); |
||
648 |
case DOT: |
||
649 |
4 |
rtok = relex(); |
|
650 |
4 |
return (unary(op2(DOT, NIL, NIL))); |
|
651 |
case CCL: |
||
652 |
6 |
np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); |
|
653 |
6 |
rtok = relex(); |
|
654 |
6 |
return (unary(np)); |
|
655 |
case NCCL: |
||
656 |
8 |
np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); |
|
657 |
8 |
rtok = relex(); |
|
658 |
8 |
return (unary(np)); |
|
659 |
case '^': |
||
660 |
90 |
rtok = relex(); |
|
661 |
90 |
return (unary(op2(CHAR, NIL, itonp(HAT)))); |
|
662 |
case '$': |
||
663 |
1 |
rtok = relex(); |
|
664 |
1 |
return (unary(op2(CHAR, NIL, NIL))); |
|
665 |
case '(': |
||
666 |
2 |
rtok = relex(); |
|
667 |
✗✓ | 2 |
if (rtok == ')') { /* special pleading for () */ |
668 |
rtok = relex(); |
||
669 |
return unary(op2(CCL, NIL, (Node *) tostring(""))); |
||
670 |
} |
||
671 |
2 |
np = regexp(); |
|
672 |
✓✗ | 2 |
if (rtok == ')') { |
673 |
2 |
rtok = relex(); |
|
674 |
2 |
return (unary(np)); |
|
675 |
} |
||
676 |
else |
||
677 |
FATAL("syntax error in regular expression %s at %s", lastre, prestr); |
||
678 |
default: |
||
679 |
FATAL("illegal primary in regular expression %s at %s", lastre, prestr); |
||
680 |
} |
||
681 |
return 0; /*NOTREACHED*/ |
||
682 |
786 |
} |
|
683 |
|||
684 |
Node *concat(Node *np) |
||
685 |
{ |
||
686 |
✗✗✗✗ ✗✗✗✓ ✓ |
1572 |
switch (rtok) { |
687 |
case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': |
||
688 |
662 |
return (concat(op2(CAT, np, primary()))); |
|
689 |
} |
||
690 |
124 |
return (np); |
|
691 |
786 |
} |
|
692 |
|||
693 |
Node *alt(Node *np) |
||
694 |
{ |
||
695 |
✓✓ | 248 |
if (rtok == OR) { |
696 |
2 |
rtok = relex(); |
|
697 |
2 |
return (alt(op2(OR, np, concat(primary())))); |
|
698 |
} |
||
699 |
122 |
return (np); |
|
700 |
124 |
} |
|
701 |
|||
702 |
Node *unary(Node *np) |
||
703 |
{ |
||
704 |
✓✓✗✓ |
1588 |
switch (rtok) { |
705 |
case STAR: |
||
706 |
7 |
rtok = relex(); |
|
707 |
7 |
return (unary(op2(STAR, np, NIL))); |
|
708 |
case PLUS: |
||
709 |
1 |
rtok = relex(); |
|
710 |
1 |
return (unary(op2(PLUS, np, NIL))); |
|
711 |
case QUEST: |
||
712 |
rtok = relex(); |
||
713 |
return (unary(op2(QUEST, np, NIL))); |
||
714 |
default: |
||
715 |
786 |
return (np); |
|
716 |
} |
||
717 |
794 |
} |
|
718 |
|||
719 |
/* |
||
720 |
* Character class definitions conformant to the POSIX locale as |
||
721 |
* defined in IEEE P1003.1 draft 7 of June 2001, assuming the source |
||
722 |
* and operating character sets are both ASCII (ISO646) or supersets |
||
723 |
* thereof. |
||
724 |
* |
||
725 |
* Note that to avoid overflowing the temporary buffer used in |
||
726 |
* relex(), the expanded character class (prior to range expansion) |
||
727 |
* must be less than twice the size of their full name. |
||
728 |
*/ |
||
729 |
|||
730 |
/* Because isblank doesn't show up in any of the header files on any |
||
731 |
* system i use, it's defined here. if some other locale has a richer |
||
732 |
* definition of "blank", define HAS_ISBLANK and provide your own |
||
733 |
* version. |
||
734 |
* the parentheses here are an attempt to find a path through the maze |
||
735 |
* of macro definition and/or function and/or version provided. thanks |
||
736 |
* to nelson beebe for the suggestion; let's see if it works everywhere. |
||
737 |
*/ |
||
738 |
|||
739 |
#ifndef HAS_ISBLANK |
||
740 |
|||
741 |
int (xisblank)(int c) |
||
742 |
{ |
||
743 |
return c==' ' || c=='\t'; |
||
744 |
} |
||
745 |
|||
746 |
#endif |
||
747 |
|||
748 |
struct charclass { |
||
749 |
const char *cc_name; |
||
750 |
int cc_namelen; |
||
751 |
int (*cc_func)(int); |
||
752 |
} charclasses[] = { |
||
753 |
{ "alnum", 5, isalnum }, |
||
754 |
{ "alpha", 5, isalpha }, |
||
755 |
#ifndef HAS_ISBLANK |
||
756 |
{ "blank", 5, isspace }, /* was isblank */ |
||
757 |
#else |
||
758 |
{ "blank", 5, isblank }, |
||
759 |
#endif |
||
760 |
{ "cntrl", 5, iscntrl }, |
||
761 |
{ "digit", 5, isdigit }, |
||
762 |
{ "graph", 5, isgraph }, |
||
763 |
{ "lower", 5, islower }, |
||
764 |
{ "print", 5, isprint }, |
||
765 |
{ "punct", 5, ispunct }, |
||
766 |
{ "space", 5, isspace }, |
||
767 |
{ "upper", 5, isupper }, |
||
768 |
{ "xdigit", 6, isxdigit }, |
||
769 |
{ NULL, 0, NULL }, |
||
770 |
}; |
||
771 |
|||
772 |
|||
773 |
int relex(void) /* lexical analyzer for reparse */ |
||
774 |
{ |
||
775 |
int c, n; |
||
776 |
int cflag; |
||
777 |
static uschar *buf = 0; |
||
778 |
static int bufsz = 100; |
||
779 |
1836 |
uschar *bp; |
|
780 |
struct charclass *cc; |
||
781 |
int i; |
||
782 |
|||
783 |
✓✓✓✗ ✓✓✗✗ ✗✓✓✓ ✓ |
918 |
switch (c = *prestr++) { |
784 |
2 |
case '|': return OR; |
|
785 |
7 |
case '*': return STAR; |
|
786 |
1 |
case '+': return PLUS; |
|
787 |
case '?': return QUEST; |
||
788 |
4 |
case '.': return DOT; |
|
789 |
120 |
case '\0': prestr--; return '\0'; |
|
790 |
case '^': |
||
791 |
case '$': |
||
792 |
case '(': |
||
793 |
case ')': |
||
794 |
95 |
return c; |
|
795 |
case '\\': |
||
796 |
27 |
rlxval = quoted(&prestr); |
|
797 |
27 |
return CHAR; |
|
798 |
default: |
||
799 |
648 |
rlxval = c; |
|
800 |
648 |
return CHAR; |
|
801 |
case '[': |
||
802 |
✓✓✗✓ |
23 |
if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) |
803 |
FATAL("out of space in reg expr %.10s..", lastre); |
||
804 |
14 |
bp = buf; |
|
805 |
✓✓ | 14 |
if (*prestr == '^') { |
806 |
cflag = 1; |
||
807 |
8 |
prestr++; |
|
808 |
8 |
} |
|
809 |
else |
||
810 |
cflag = 0; |
||
811 |
14 |
n = 2 * strlen((const char *) prestr)+1; |
|
812 |
✗✓ | 14 |
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) |
813 |
FATAL("out of space for reg expr %.10s...", lastre); |
||
814 |
for (; ; ) { |
||
815 |
✓✓ | 147 |
if ((c = *prestr++) == '\\') { |
816 |
4 |
*bp++ = '\\'; |
|
817 |
✗✓ | 4 |
if ((c = *prestr++) == '\0') |
818 |
FATAL("nonterminated character class %.20s...", lastre); |
||
819 |
4 |
*bp++ = c; |
|
820 |
/* } else if (c == '\n') { */ |
||
821 |
/* FATAL("newline in character class %.20s...", lastre); */ |
||
822 |
✗✓✗✗ |
147 |
} else if (c == '[' && *prestr == ':') { |
823 |
/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ |
||
824 |
for (cc = charclasses; cc->cc_name; cc++) |
||
825 |
if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) |
||
826 |
break; |
||
827 |
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && |
||
828 |
prestr[2 + cc->cc_namelen] == ']') { |
||
829 |
prestr += cc->cc_namelen + 3; |
||
830 |
for (i = 0; i < NCHARS; i++) { |
||
831 |
if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) |
||
832 |
FATAL("out of space for reg expr %.10s...", lastre); |
||
833 |
if (cc->cc_func(i)) { |
||
834 |
*bp++ = i; |
||
835 |
n++; |
||
836 |
} |
||
837 |
} |
||
838 |
} else |
||
839 |
*bp++ = c; |
||
840 |
✗✓ | 143 |
} else if (c == '\0') { |
841 |
FATAL("nonterminated character class %.20s", lastre); |
||
842 |
✓✓ | 143 |
} else if (bp == buf) { /* 1st char is special */ |
843 |
10 |
*bp++ = c; |
|
844 |
✓✓ | 143 |
} else if (c == ']') { |
845 |
14 |
*bp++ = 0; |
|
846 |
14 |
rlxstr = (uschar *) tostring((char *) buf); |
|
847 |
✓✓ | 14 |
if (cflag == 0) |
848 |
6 |
return CCL; |
|
849 |
else |
||
850 |
8 |
return NCCL; |
|
851 |
} else |
||
852 |
119 |
*bp++ = c; |
|
853 |
} |
||
854 |
} |
||
855 |
918 |
} |
|
856 |
|||
857 |
int cgoto(fa *f, int s, int c) |
||
858 |
{ |
||
859 |
int i, j, k; |
||
860 |
int *p, *q; |
||
861 |
|||
862 |
assert(c == HAT || c < NCHARS); |
||
863 |
✗✓ | 27144 |
while (f->accept >= maxsetvec) { /* guessing here! */ |
864 |
setvec = reallocarray(setvec, maxsetvec, 4 * sizeof(int)); |
||
865 |
tmpset = reallocarray(tmpset, maxsetvec, 4 * sizeof(int)); |
||
866 |
if (setvec == 0 || tmpset == 0) |
||
867 |
overflo("out of space in cgoto()"); |
||
868 |
maxsetvec *= 4; |
||
869 |
} |
||
870 |
✓✓ | 169188 |
for (i = 0; i <= f->accept; i++) |
871 |
75546 |
setvec[i] = 0; |
|
872 |
9048 |
setcnt = 0; |
|
873 |
/* compute positions of gototab[s,c] into setvec */ |
||
874 |
9048 |
p = f->posns[s]; |
|
875 |
✓✓ | 57330 |
for (i = 1; i <= *p; i++) { |
876 |
✓✓ | 19617 |
if ((k = f->re[p[i]].ltype) != FINAL) { |
877 |
✓✓ | 30196 |
if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) |
878 |
✓✓✓✗ |
29779 |
|| (k == DOT && c != 0 && c != HAT) |
879 |
✓✓ | 19194 |
|| (k == ALL && c != 0) |
880 |
✓✗ | 10724 |
|| (k == EMPTYRE && c != 0) |
881 |
✓✓ | 10918 |
|| (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) |
882 |
✓✓✓✓ ✓✓ |
10989 |
|| (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { |
883 |
9122 |
q = f->re[p[i]].lfollow; |
|
884 |
✓✓ | 53776 |
for (j = 1; j <= *q; j++) { |
885 |
✗✓ | 17766 |
if (q[j] >= maxsetvec) { |
886 |
setvec = reallocarray(setvec, |
||
887 |
maxsetvec, 4 * sizeof(int)); |
||
888 |
tmpset = reallocarray(tmpset, |
||
889 |
maxsetvec, 4 * sizeof(int)); |
||
890 |
if (setvec == 0 || tmpset == 0) |
||
891 |
overflo("cgoto overflow"); |
||
892 |
maxsetvec *= 4; |
||
893 |
} |
||
894 |
✓✗ | 17766 |
if (setvec[q[j]] == 0) { |
895 |
17766 |
setcnt++; |
|
896 |
17766 |
setvec[q[j]] = 1; |
|
897 |
17766 |
} |
|
898 |
} |
||
899 |
} |
||
900 |
} |
||
901 |
} |
||
902 |
/* determine if setvec is a previous state */ |
||
903 |
9048 |
tmpset[0] = setcnt; |
|
904 |
j = 1; |
||
905 |
✓✓ | 169188 |
for (i = f->accept; i >= 0; i--) |
906 |
✓✓ | 75546 |
if (setvec[i]) { |
907 |
17766 |
tmpset[j++] = i; |
|
908 |
17766 |
} |
|
909 |
/* tmpset == previous state? */ |
||
910 |
✓✓ | 40676 |
for (i = 1; i <= f->curstat; i++) { |
911 |
19930 |
p = f->posns[i]; |
|
912 |
✓✓ | 19930 |
if ((k = tmpset[0]) != p[0]) |
913 |
goto different; |
||
914 |
✓✓ | 53412 |
for (j = 1; j <= k; j++) |
915 |
✓✓ | 18066 |
if (tmpset[j] != p[j]) |
916 |
goto different; |
||
917 |
/* setvec is state i */ |
||
918 |
8640 |
f->gototab[s][c] = i; |
|
919 |
8640 |
return i; |
|
920 |
different:; |
||
921 |
} |
||
922 |
|||
923 |
/* add tmpset to current set of states */ |
||
924 |
✗✓ | 408 |
if (f->curstat >= NSTATES-1) { |
925 |
f->curstat = 2; |
||
926 |
f->reset = 1; |
||
927 |
for (i = 2; i < NSTATES; i++) |
||
928 |
xfree(f->posns[i]); |
||
929 |
} else |
||
930 |
408 |
++(f->curstat); |
|
931 |
✓✓ | 212160 |
for (i = 0; i < NCHARS; i++) |
932 |
105672 |
f->gototab[f->curstat][i] = 0; |
|
933 |
✗✓ | 408 |
xfree(f->posns[f->curstat]); |
934 |
✗✓ | 408 |
if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL) |
935 |
overflo("out of space in cgoto"); |
||
936 |
|||
937 |
408 |
f->posns[f->curstat] = p; |
|
938 |
408 |
f->gototab[s][c] = f->curstat; |
|
939 |
✓✓ | 3998 |
for (i = 0; i <= setcnt; i++) |
940 |
1591 |
p[i] = tmpset[i]; |
|
941 |
408 |
if (setvec[f->accept]) |
|
942 |
f->out[f->curstat] = 1; |
||
943 |
else |
||
944 |
f->out[f->curstat] = 0; |
||
945 |
408 |
return f->curstat; |
|
946 |
9048 |
} |
|
947 |
|||
948 |
|||
949 |
void freefa(fa *f) /* free a finite automaton */ |
||
950 |
{ |
||
951 |
int i; |
||
952 |
|||
953 |
if (f == NULL) |
||
954 |
return; |
||
955 |
for (i = 0; i <= f->curstat; i++) |
||
956 |
xfree(f->posns[i]); |
||
957 |
for (i = 0; i <= f->accept; i++) { |
||
958 |
xfree(f->re[i].lfollow); |
||
959 |
if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) |
||
960 |
xfree((f->re[i].lval.np)); |
||
961 |
} |
||
962 |
xfree(f->restr); |
||
963 |
xfree(f); |
||
964 |
} |
Generated by: GCOVR (Version 3.3) |