1 |
|
|
/* $OpenBSD: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> |
5 |
|
|
* Copyright (C) 1994-2015 Lua.org, PUC-Rio. |
6 |
|
|
* |
7 |
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
8 |
|
|
* a copy of this software and associated documentation files (the |
9 |
|
|
* "Software"), to deal in the Software without restriction, including |
10 |
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
11 |
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
12 |
|
|
* permit persons to whom the Software is furnished to do so, subject to |
13 |
|
|
* the following conditions: |
14 |
|
|
* |
15 |
|
|
* The above copyright notice and this permission notice shall be |
16 |
|
|
* included in all copies or substantial portions of the Software. |
17 |
|
|
* |
18 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 |
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
20 |
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
21 |
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
22 |
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
23 |
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
24 |
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
25 |
|
|
*/ |
26 |
|
|
|
27 |
|
|
/* |
28 |
|
|
* Derived from Lua 5.3.1: |
29 |
|
|
* $Id: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $ |
30 |
|
|
* Standard library for string operations and pattern-matching |
31 |
|
|
*/ |
32 |
|
|
|
33 |
|
|
#include <sys/types.h> |
34 |
|
|
#include <ctype.h> |
35 |
|
|
#include <errno.h> |
36 |
|
|
#include <stddef.h> |
37 |
|
|
#include <stdlib.h> |
38 |
|
|
#include <string.h> |
39 |
|
|
|
40 |
|
|
#include "patterns.h" |
41 |
|
|
|
42 |
|
|
#define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */ |
43 |
|
|
#define CAP_UNFINISHED (-1) |
44 |
|
|
#define CAP_POSITION (-2) |
45 |
|
|
#define L_ESC '%' |
46 |
|
|
#define SPECIALS "^$*+?.([%-" |
47 |
|
|
|
48 |
|
|
struct match_state { |
49 |
|
|
int matchdepth; /* control for recursive depth (to avoid C |
50 |
|
|
* stack overflow) */ |
51 |
|
|
int repetitioncounter; /* control the repetition items */ |
52 |
|
|
int maxcaptures; /* configured capture limit */ |
53 |
|
|
const char *src_init; /* init of source string */ |
54 |
|
|
const char *src_end; /* end ('\0') of source string */ |
55 |
|
|
const char *p_end; /* end ('\0') of pattern */ |
56 |
|
|
const char *error; /* should be NULL */ |
57 |
|
|
int level; /* total number of captures (finished or |
58 |
|
|
* unfinished) */ |
59 |
|
|
struct { |
60 |
|
|
const char *init; |
61 |
|
|
ptrdiff_t len; |
62 |
|
|
} capture[MAXCAPTURES]; |
63 |
|
|
}; |
64 |
|
|
|
65 |
|
|
/* recursive function */ |
66 |
|
|
static const char *match(struct match_state *, const char *, const char *); |
67 |
|
|
|
68 |
|
|
static int |
69 |
|
|
match_error(struct match_state *ms, const char *error) |
70 |
|
|
{ |
71 |
|
|
ms->error = ms->error == NULL ? error : ms->error; |
72 |
|
|
return (-1); |
73 |
|
|
} |
74 |
|
|
|
75 |
|
|
static int |
76 |
|
|
check_capture(struct match_state *ms, int l) |
77 |
|
|
{ |
78 |
|
|
l -= '1'; |
79 |
|
|
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) |
80 |
|
|
return match_error(ms, "invalid capture index"); |
81 |
|
|
return (l); |
82 |
|
|
} |
83 |
|
|
|
84 |
|
|
static int |
85 |
|
|
capture_to_close(struct match_state *ms) |
86 |
|
|
{ |
87 |
|
|
int level = ms->level; |
88 |
|
|
for (level--; level >= 0; level--) |
89 |
|
|
if (ms->capture[level].len == CAP_UNFINISHED) |
90 |
|
|
return (level); |
91 |
|
|
return match_error(ms, "invalid pattern capture"); |
92 |
|
|
} |
93 |
|
|
|
94 |
|
|
static const char * |
95 |
|
|
classend(struct match_state *ms, const char *p) |
96 |
|
|
{ |
97 |
|
|
switch (*p++) { |
98 |
|
|
case L_ESC: |
99 |
|
|
if (p == ms->p_end) |
100 |
|
|
match_error(ms, |
101 |
|
|
"malformed pattern (ends with '%')"); |
102 |
|
|
return p + 1; |
103 |
|
|
case '[': |
104 |
|
|
if (*p == '^') |
105 |
|
|
p++; |
106 |
|
|
do { |
107 |
|
|
/* look for a ']' */ |
108 |
|
|
if (p == ms->p_end) { |
109 |
|
|
match_error(ms, |
110 |
|
|
"malformed pattern (missing ']')"); |
111 |
|
|
break; |
112 |
|
|
} |
113 |
|
|
if (*(p++) == L_ESC && p < ms->p_end) { |
114 |
|
|
/* skip escapes (e.g. '%]') */ |
115 |
|
|
p++; |
116 |
|
|
} |
117 |
|
|
} while (*p != ']'); |
118 |
|
|
return p + 1; |
119 |
|
|
default: |
120 |
|
|
return p; |
121 |
|
|
} |
122 |
|
|
} |
123 |
|
|
|
124 |
|
|
static int |
125 |
|
|
match_class(int c, int cl) |
126 |
|
|
{ |
127 |
|
|
int res; |
128 |
|
|
switch (tolower(cl)) { |
129 |
|
|
case 'a': |
130 |
|
|
res = isalpha(c); |
131 |
|
|
break; |
132 |
|
|
case 'c': |
133 |
|
|
res = iscntrl(c); |
134 |
|
|
break; |
135 |
|
|
case 'd': |
136 |
|
|
res = isdigit(c); |
137 |
|
|
break; |
138 |
|
|
case 'g': |
139 |
|
|
res = isgraph(c); |
140 |
|
|
break; |
141 |
|
|
case 'l': |
142 |
|
|
res = islower(c); |
143 |
|
|
break; |
144 |
|
|
case 'p': |
145 |
|
|
res = ispunct(c); |
146 |
|
|
break; |
147 |
|
|
case 's': |
148 |
|
|
res = isspace(c); |
149 |
|
|
break; |
150 |
|
|
case 'u': |
151 |
|
|
res = isupper(c); |
152 |
|
|
break; |
153 |
|
|
case 'w': |
154 |
|
|
res = isalnum(c); |
155 |
|
|
break; |
156 |
|
|
case 'x': |
157 |
|
|
res = isxdigit(c); |
158 |
|
|
break; |
159 |
|
|
default: |
160 |
|
|
return (cl == c); |
161 |
|
|
} |
162 |
|
|
return (islower(cl) ? res : !res); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
static int |
166 |
|
|
matchbracketclass(int c, const char *p, const char *ec) |
167 |
|
|
{ |
168 |
|
|
int sig = 1; |
169 |
|
|
if (*(p + 1) == '^') { |
170 |
|
|
sig = 0; |
171 |
|
|
/* skip the '^' */ |
172 |
|
|
p++; |
173 |
|
|
} |
174 |
|
|
while (++p < ec) { |
175 |
|
|
if (*p == L_ESC) { |
176 |
|
|
p++; |
177 |
|
|
if (match_class(c, uchar(*p))) |
178 |
|
|
return sig; |
179 |
|
|
} else if ((*(p + 1) == '-') && (p + 2 < ec)) { |
180 |
|
|
p += 2; |
181 |
|
|
if (uchar(*(p - 2)) <= c && c <= uchar(*p)) |
182 |
|
|
return sig; |
183 |
|
|
} else if (uchar(*p) == c) |
184 |
|
|
return sig; |
185 |
|
|
} |
186 |
|
|
return !sig; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
static int |
190 |
|
|
singlematch(struct match_state *ms, const char *s, const char *p, |
191 |
|
|
const char *ep) |
192 |
|
|
{ |
193 |
|
|
if (s >= ms->src_end) |
194 |
|
|
return 0; |
195 |
|
|
else { |
196 |
|
|
int c = uchar(*s); |
197 |
|
|
switch (*p) { |
198 |
|
|
case '.': |
199 |
|
|
/* matches any char */ |
200 |
|
|
return (1); |
201 |
|
|
case L_ESC: |
202 |
|
|
return match_class(c, uchar(*(p + 1))); |
203 |
|
|
case '[': |
204 |
|
|
return matchbracketclass(c, p, ep - 1); |
205 |
|
|
default: |
206 |
|
|
return (uchar(*p) == c); |
207 |
|
|
} |
208 |
|
|
} |
209 |
|
|
} |
210 |
|
|
|
211 |
|
|
static const char * |
212 |
|
|
matchbalance(struct match_state *ms, const char *s, const char *p) |
213 |
|
|
{ |
214 |
|
|
if (p >= ms->p_end - 1) { |
215 |
|
|
match_error(ms, |
216 |
|
|
"malformed pattern (missing arguments to '%b')"); |
217 |
|
|
return (NULL); |
218 |
|
|
} |
219 |
|
|
if (*s != *p) |
220 |
|
|
return (NULL); |
221 |
|
|
else { |
222 |
|
|
int b = *p; |
223 |
|
|
int e = *(p + 1); |
224 |
|
|
int cont = 1; |
225 |
|
|
while (++s < ms->src_end) { |
226 |
|
|
if (*s == e) { |
227 |
|
|
if (--cont == 0) |
228 |
|
|
return s + 1; |
229 |
|
|
} else if (*s == b) |
230 |
|
|
cont++; |
231 |
|
|
} |
232 |
|
|
} |
233 |
|
|
|
234 |
|
|
/* string ends out of balance */ |
235 |
|
|
return (NULL); |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
static const char * |
239 |
|
|
max_expand(struct match_state *ms, const char *s, const char *p, const char *ep) |
240 |
|
|
{ |
241 |
|
|
ptrdiff_t i = 0; |
242 |
|
|
/* counts maximum expand for item */ |
243 |
|
|
while (singlematch(ms, s + i, p, ep)) |
244 |
|
|
i++; |
245 |
|
|
/* keeps trying to match with the maximum repetitions */ |
246 |
|
|
while (i >= 0) { |
247 |
|
|
const char *res = match(ms, (s + i), ep + 1); |
248 |
|
|
if (res) |
249 |
|
|
return res; |
250 |
|
|
/* else didn't match; reduce 1 repetition to try again */ |
251 |
|
|
i--; |
252 |
|
|
} |
253 |
|
|
return NULL; |
254 |
|
|
} |
255 |
|
|
|
256 |
|
|
static const char * |
257 |
|
|
min_expand(struct match_state *ms, const char *s, const char *p, const char *ep) |
258 |
|
|
{ |
259 |
|
|
for (;;) { |
260 |
|
|
const char *res = match(ms, s, ep + 1); |
261 |
|
|
if (res != NULL) |
262 |
|
|
return res; |
263 |
|
|
else if (singlematch(ms, s, p, ep)) |
264 |
|
|
s++; /* try with one more repetition */ |
265 |
|
|
else |
266 |
|
|
return NULL; |
267 |
|
|
} |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
static const char * |
271 |
|
|
start_capture(struct match_state *ms, const char *s, const char *p, int what) |
272 |
|
|
{ |
273 |
|
|
const char *res; |
274 |
|
|
|
275 |
|
|
int level = ms->level; |
276 |
|
|
if (level >= ms->maxcaptures) { |
277 |
|
|
match_error(ms, "too many captures"); |
278 |
|
|
return (NULL); |
279 |
|
|
} |
280 |
|
|
ms->capture[level].init = s; |
281 |
|
|
ms->capture[level].len = what; |
282 |
|
|
ms->level = level + 1; |
283 |
|
|
/* undo capture if match failed */ |
284 |
|
|
if ((res = match(ms, s, p)) == NULL) |
285 |
|
|
ms->level--; |
286 |
|
|
return res; |
287 |
|
|
} |
288 |
|
|
|
289 |
|
|
static const char * |
290 |
|
|
end_capture(struct match_state *ms, const char *s, const char *p) |
291 |
|
|
{ |
292 |
|
|
int l = capture_to_close(ms); |
293 |
|
|
const char *res; |
294 |
|
|
if (l == -1) |
295 |
|
|
return NULL; |
296 |
|
|
/* close capture */ |
297 |
|
|
ms->capture[l].len = s - ms->capture[l].init; |
298 |
|
|
/* undo capture if match failed */ |
299 |
|
|
if ((res = match(ms, s, p)) == NULL) |
300 |
|
|
ms->capture[l].len = CAP_UNFINISHED; |
301 |
|
|
return res; |
302 |
|
|
} |
303 |
|
|
|
304 |
|
|
static const char * |
305 |
|
|
match_capture(struct match_state *ms, const char *s, int l) |
306 |
|
|
{ |
307 |
|
|
size_t len; |
308 |
|
|
l = check_capture(ms, l); |
309 |
|
|
if (l == -1) |
310 |
|
|
return NULL; |
311 |
|
|
len = ms->capture[l].len; |
312 |
|
|
if ((size_t) (ms->src_end - s) >= len && |
313 |
|
|
memcmp(ms->capture[l].init, s, len) == 0) |
314 |
|
|
return s + len; |
315 |
|
|
else |
316 |
|
|
return NULL; |
317 |
|
|
} |
318 |
|
|
|
319 |
|
|
static const char * |
320 |
|
|
match(struct match_state *ms, const char *s, const char *p) |
321 |
|
|
{ |
322 |
|
|
const char *ep, *res; |
323 |
|
|
char previous; |
324 |
|
|
|
325 |
|
|
if (ms->matchdepth-- == 0) { |
326 |
|
|
match_error(ms, "pattern too complex"); |
327 |
|
|
return (NULL); |
328 |
|
|
} |
329 |
|
|
|
330 |
|
|
/* using goto's to optimize tail recursion */ |
331 |
|
|
init: |
332 |
|
|
/* end of pattern? */ |
333 |
|
|
if (p != ms->p_end) { |
334 |
|
|
switch (*p) { |
335 |
|
|
case '(': |
336 |
|
|
/* start capture */ |
337 |
|
|
if (*(p + 1) == ')') |
338 |
|
|
/* position capture? */ |
339 |
|
|
s = start_capture(ms, s, p + 2, CAP_POSITION); |
340 |
|
|
else |
341 |
|
|
s = start_capture(ms, s, p + 1, CAP_UNFINISHED); |
342 |
|
|
break; |
343 |
|
|
case ')': |
344 |
|
|
/* end capture */ |
345 |
|
|
s = end_capture(ms, s, p + 1); |
346 |
|
|
break; |
347 |
|
|
case '$': |
348 |
|
|
/* is the '$' the last char in pattern? */ |
349 |
|
|
if ((p + 1) != ms->p_end) { |
350 |
|
|
/* no; go to default */ |
351 |
|
|
goto dflt; |
352 |
|
|
} |
353 |
|
|
/* check end of string */ |
354 |
|
|
s = (s == ms->src_end) ? s : NULL; |
355 |
|
|
break; |
356 |
|
|
case L_ESC: |
357 |
|
|
/* escaped sequences not in the format class[*+?-]? */ |
358 |
|
|
switch (*(p + 1)) { |
359 |
|
|
case 'b': |
360 |
|
|
/* balanced string? */ |
361 |
|
|
s = matchbalance(ms, s, p + 2); |
362 |
|
|
if (s != NULL) { |
363 |
|
|
p += 4; |
364 |
|
|
/* return match(ms, s, p + 4); */ |
365 |
|
|
goto init; |
366 |
|
|
} /* else fail (s == NULL) */ |
367 |
|
|
break; |
368 |
|
|
case 'f': |
369 |
|
|
/* frontier? */ |
370 |
|
|
p += 2; |
371 |
|
|
if (*p != '[') { |
372 |
|
|
match_error(ms, "missing '['" |
373 |
|
|
" after '%f' in pattern"); |
374 |
|
|
break; |
375 |
|
|
} |
376 |
|
|
/* points to what is next */ |
377 |
|
|
ep = classend(ms, p); |
378 |
|
|
if (ms->error != NULL) |
379 |
|
|
break; |
380 |
|
|
previous = |
381 |
|
|
(s == ms->src_init) ? '\0' : *(s - 1); |
382 |
|
|
if (!matchbracketclass(uchar(previous), |
383 |
|
|
p, ep - 1) && |
384 |
|
|
matchbracketclass(uchar(*s), |
385 |
|
|
p, ep - 1)) { |
386 |
|
|
p = ep; |
387 |
|
|
/* return match(ms, s, ep); */ |
388 |
|
|
goto init; |
389 |
|
|
} |
390 |
|
|
/* match failed */ |
391 |
|
|
s = NULL; |
392 |
|
|
break; |
393 |
|
|
case '0': |
394 |
|
|
case '1': |
395 |
|
|
case '2': |
396 |
|
|
case '3': |
397 |
|
|
case '4': |
398 |
|
|
case '5': |
399 |
|
|
case '6': |
400 |
|
|
case '7': |
401 |
|
|
case '8': |
402 |
|
|
case '9': |
403 |
|
|
/* capture results (%0-%9)? */ |
404 |
|
|
s = match_capture(ms, s, uchar(*(p + 1))); |
405 |
|
|
if (s != NULL) { |
406 |
|
|
p += 2; |
407 |
|
|
/* return match(ms, s, p + 2) */ |
408 |
|
|
goto init; |
409 |
|
|
} |
410 |
|
|
break; |
411 |
|
|
default: |
412 |
|
|
goto dflt; |
413 |
|
|
} |
414 |
|
|
break; |
415 |
|
|
default: |
416 |
|
|
|
417 |
|
|
/* pattern class plus optional suffix */ |
418 |
|
|
dflt: |
419 |
|
|
/* points to optional suffix */ |
420 |
|
|
ep = classend(ms, p); |
421 |
|
|
if (ms->error != NULL) |
422 |
|
|
break; |
423 |
|
|
|
424 |
|
|
/* does not match at least once? */ |
425 |
|
|
if (!singlematch(ms, s, p, ep)) { |
426 |
|
|
if (ms->repetitioncounter-- == 0) { |
427 |
|
|
match_error(ms, "max repetition items"); |
428 |
|
|
s = NULL; /* fail */ |
429 |
|
|
/* accept empty? */ |
430 |
|
|
} else if |
431 |
|
|
(*ep == '*' || *ep == '?' || *ep == '-') { |
432 |
|
|
p = ep + 1; |
433 |
|
|
/* return match(ms, s, ep + 1); */ |
434 |
|
|
goto init; |
435 |
|
|
} else { |
436 |
|
|
/* '+' or no suffix */ |
437 |
|
|
s = NULL; /* fail */ |
438 |
|
|
} |
439 |
|
|
} else { |
440 |
|
|
/* matched once */ |
441 |
|
|
/* handle optional suffix */ |
442 |
|
|
switch (*ep) { |
443 |
|
|
case '?': |
444 |
|
|
/* optional */ |
445 |
|
|
if ((res = |
446 |
|
|
match(ms, s + 1, ep + 1)) != NULL) |
447 |
|
|
s = res; |
448 |
|
|
else { |
449 |
|
|
/* |
450 |
|
|
* else return |
451 |
|
|
* match(ms, s, ep + 1); |
452 |
|
|
*/ |
453 |
|
|
p = ep + 1; |
454 |
|
|
goto init; |
455 |
|
|
} |
456 |
|
|
break; |
457 |
|
|
case '+': |
458 |
|
|
/* 1 or more repetitions */ |
459 |
|
|
s++; /* 1 match already done */ |
460 |
|
|
/* FALLTHROUGH */ |
461 |
|
|
case '*': |
462 |
|
|
/* 0 or more repetitions */ |
463 |
|
|
s = max_expand(ms, s, p, ep); |
464 |
|
|
break; |
465 |
|
|
case '-': |
466 |
|
|
/* 0 or more repetitions (minimum) */ |
467 |
|
|
s = min_expand(ms, s, p, ep); |
468 |
|
|
break; |
469 |
|
|
default: |
470 |
|
|
/* no suffix */ |
471 |
|
|
s++; |
472 |
|
|
p = ep; |
473 |
|
|
/* return match(ms, s + 1, ep); */ |
474 |
|
|
goto init; |
475 |
|
|
} |
476 |
|
|
} |
477 |
|
|
break; |
478 |
|
|
} |
479 |
|
|
} |
480 |
|
|
ms->matchdepth++; |
481 |
|
|
return s; |
482 |
|
|
} |
483 |
|
|
|
484 |
|
|
static const char * |
485 |
|
|
lmemfind(const char *s1, size_t l1, |
486 |
|
|
const char *s2, size_t l2) |
487 |
|
|
{ |
488 |
|
|
const char *init; |
489 |
|
|
|
490 |
|
|
if (l2 == 0) { |
491 |
|
|
/* empty strings are everywhere */ |
492 |
|
|
return (s1); |
493 |
|
|
} else if (l2 > l1) { |
494 |
|
|
/* avoids a negative 'l1' */ |
495 |
|
|
return (NULL); |
496 |
|
|
} else { |
497 |
|
|
/* |
498 |
|
|
* to search for a '*s2' inside 's1' |
499 |
|
|
* - 1st char will be checked by 'memchr' |
500 |
|
|
* - 's2' cannot be found after that |
501 |
|
|
*/ |
502 |
|
|
l2--; |
503 |
|
|
l1 = l1 - l2; |
504 |
|
|
while (l1 > 0 && |
505 |
|
|
(init = (const char *)memchr(s1, *s2, l1)) != NULL) { |
506 |
|
|
/* 1st char is already checked */ |
507 |
|
|
init++; |
508 |
|
|
if (memcmp(init, s2 + 1, l2) == 0) |
509 |
|
|
return init - 1; |
510 |
|
|
else { |
511 |
|
|
/* correct 'l1' and 's1' to try again */ |
512 |
|
|
l1 -= init - s1; |
513 |
|
|
s1 = init; |
514 |
|
|
} |
515 |
|
|
} |
516 |
|
|
/* not found */ |
517 |
|
|
return (NULL); |
518 |
|
|
} |
519 |
|
|
} |
520 |
|
|
|
521 |
|
|
static int |
522 |
|
|
push_onecapture(struct match_state *ms, int i, const char *s, |
523 |
|
|
const char *e, struct str_find *sm) |
524 |
|
|
{ |
525 |
|
|
if (i >= ms->level) { |
526 |
|
|
if (i == 0 || ms->level == 0) { |
527 |
|
|
/* add whole match */ |
528 |
|
|
sm->sm_so = (off_t)(s - ms->src_init); |
529 |
|
|
sm->sm_eo = (off_t)(e - s) + sm->sm_so; |
530 |
|
|
} else |
531 |
|
|
return match_error(ms, "invalid capture index"); |
532 |
|
|
} else { |
533 |
|
|
ptrdiff_t l = ms->capture[i].len; |
534 |
|
|
if (l == CAP_UNFINISHED) |
535 |
|
|
return match_error(ms, "unfinished capture"); |
536 |
|
|
sm->sm_so = ms->capture[i].init - ms->src_init; |
537 |
|
|
sm->sm_eo = sm->sm_so + l; |
538 |
|
|
} |
539 |
|
|
sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo; |
540 |
|
|
return (0); |
541 |
|
|
} |
542 |
|
|
|
543 |
|
|
static int |
544 |
|
|
push_captures(struct match_state *ms, const char *s, const char *e, |
545 |
|
|
struct str_find *sm, size_t nsm) |
546 |
|
|
{ |
547 |
|
|
unsigned int i; |
548 |
|
|
unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level; |
549 |
|
|
|
550 |
|
|
if (nlevels > nsm) |
551 |
|
|
nlevels = nsm; |
552 |
|
|
for (i = 0; i < nlevels; i++) |
553 |
|
|
if (push_onecapture(ms, i, s, e, sm + i) == -1) |
554 |
|
|
break; |
555 |
|
|
|
556 |
|
|
/* number of strings pushed */ |
557 |
|
|
return (nlevels); |
558 |
|
|
} |
559 |
|
|
|
560 |
|
|
/* check whether pattern has no special characters */ |
561 |
|
|
static int |
562 |
|
|
nospecials(const char *p, size_t l) |
563 |
|
|
{ |
564 |
|
|
size_t upto = 0; |
565 |
|
|
|
566 |
|
|
do { |
567 |
|
|
if (strpbrk(p + upto, SPECIALS)) { |
568 |
|
|
/* pattern has a special character */ |
569 |
|
|
return 0; |
570 |
|
|
} |
571 |
|
|
/* may have more after \0 */ |
572 |
|
|
upto += strlen(p + upto) + 1; |
573 |
|
|
} while (upto <= l); |
574 |
|
|
|
575 |
|
|
/* no special chars found */ |
576 |
|
|
return (1); |
577 |
|
|
} |
578 |
|
|
|
579 |
|
|
static int |
580 |
|
|
str_find_aux(struct match_state *ms, const char *pattern, const char *string, |
581 |
|
|
struct str_find *sm, size_t nsm, off_t init) |
582 |
|
|
{ |
583 |
|
|
size_t ls = strlen(string); |
584 |
|
|
size_t lp = strlen(pattern); |
585 |
|
|
const char *s = string; |
586 |
|
|
const char *p = pattern; |
587 |
|
|
const char *s1, *s2; |
588 |
|
|
int anchor, i; |
589 |
|
|
|
590 |
|
|
if (init < 0) |
591 |
|
|
init = 0; |
592 |
|
|
else if (init > (off_t)ls) |
593 |
|
|
return match_error(ms, "starting after string's end"); |
594 |
|
|
s1 = s + init; |
595 |
|
|
|
596 |
|
|
if (nospecials(p, lp)) { |
597 |
|
|
/* do a plain search */ |
598 |
|
|
s2 = lmemfind(s1, ls - (size_t)init, p, lp); |
599 |
|
|
if (s2 != NULL) { |
600 |
|
|
i = 0; |
601 |
|
|
sm[i].sm_so = 0; |
602 |
|
|
sm[i].sm_eo = ls; |
603 |
|
|
if (nsm > 1) { |
604 |
|
|
i++; |
605 |
|
|
sm[i].sm_so = s2 - s; |
606 |
|
|
sm[i].sm_eo = (s2 - s) + lp; |
607 |
|
|
} |
608 |
|
|
return (i + 1); |
609 |
|
|
} |
610 |
|
|
return (0); |
611 |
|
|
} |
612 |
|
|
|
613 |
|
|
anchor = (*p == '^'); |
614 |
|
|
if (anchor) { |
615 |
|
|
p++; |
616 |
|
|
lp--; /* skip anchor character */ |
617 |
|
|
} |
618 |
|
|
ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1; |
619 |
|
|
ms->matchdepth = MAXCCALLS; |
620 |
|
|
ms->repetitioncounter = MAXREPETITION; |
621 |
|
|
ms->src_init = s; |
622 |
|
|
ms->src_end = s + ls; |
623 |
|
|
ms->p_end = p + lp; |
624 |
|
|
do { |
625 |
|
|
const char *res; |
626 |
|
|
ms->level = 0; |
627 |
|
|
if ((res = match(ms, s1, p)) != NULL) { |
628 |
|
|
sm->sm_so = 0; |
629 |
|
|
sm->sm_eo = ls; |
630 |
|
|
return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1; |
631 |
|
|
|
632 |
|
|
} else if (ms->error != NULL) { |
633 |
|
|
return 0; |
634 |
|
|
} |
635 |
|
|
} while (s1++ < ms->src_end && !anchor); |
636 |
|
|
|
637 |
|
|
return 0; |
638 |
|
|
} |
639 |
|
|
|
640 |
|
|
int |
641 |
|
|
str_find(const char *string, const char *pattern, struct str_find *sm, |
642 |
|
|
size_t nsm, const char **errstr) |
643 |
|
|
{ |
644 |
|
|
struct match_state ms; |
645 |
|
|
int ret; |
646 |
|
|
|
647 |
|
|
memset(&ms, 0, sizeof(ms)); |
648 |
|
|
memset(sm, 0, nsm * sizeof(*sm)); |
649 |
|
|
|
650 |
|
|
ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); |
651 |
|
|
if (ms.error != NULL) { |
652 |
|
|
/* Return 0 on error and store the error string */ |
653 |
|
|
*errstr = ms.error; |
654 |
|
|
ret = 0; |
655 |
|
|
} else |
656 |
|
|
*errstr = NULL; |
657 |
|
|
|
658 |
|
|
return (ret); |
659 |
|
|
} |
660 |
|
|
|
661 |
|
|
int |
662 |
|
|
str_match(const char *string, const char *pattern, struct str_match *m, |
663 |
|
|
const char **errstr) |
664 |
|
|
{ |
665 |
|
|
struct str_find sm[MAXCAPTURES]; |
666 |
|
|
struct match_state ms; |
667 |
|
|
int ret, i; |
668 |
|
|
size_t len, nsm; |
669 |
|
|
|
670 |
|
|
nsm = MAXCAPTURES; |
671 |
|
|
memset(&ms, 0, sizeof(ms)); |
672 |
|
|
memset(sm, 0, sizeof(sm)); |
673 |
|
|
memset(m, 0, sizeof(*m)); |
674 |
|
|
|
675 |
|
|
ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); |
676 |
|
|
if (ret <= 0 || ms.error != NULL) { |
677 |
|
|
/* Return -1 on error and store the error string */ |
678 |
|
|
*errstr = ms.error; |
679 |
|
|
return (-1); |
680 |
|
|
} |
681 |
|
|
|
682 |
|
|
if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) { |
683 |
|
|
*errstr = strerror(errno); |
684 |
|
|
return (-1); |
685 |
|
|
} |
686 |
|
|
m->sm_nmatch = ret; |
687 |
|
|
|
688 |
|
|
for (i = 0; i < ret; i++) { |
689 |
|
|
if (sm[i].sm_so > sm[i].sm_eo) |
690 |
|
|
continue; |
691 |
|
|
len = sm[i].sm_eo - sm[i].sm_so; |
692 |
|
|
if ((m->sm_match[i] = strndup(string + |
693 |
|
|
sm[i].sm_so, len)) == NULL) { |
694 |
|
|
*errstr = strerror(errno); |
695 |
|
|
str_match_free(m); |
696 |
|
|
return (-1); |
697 |
|
|
} |
698 |
|
|
} |
699 |
|
|
|
700 |
|
|
*errstr = NULL; |
701 |
|
|
return (0); |
702 |
|
|
} |
703 |
|
|
|
704 |
|
|
void |
705 |
|
|
str_match_free(struct str_match *m) |
706 |
|
|
{ |
707 |
|
|
unsigned int i = 0; |
708 |
|
|
for (i = 0; i < m->sm_nmatch; i++) |
709 |
|
|
free(m->sm_match[i]); |
710 |
|
|
free(m->sm_match); |
711 |
|
|
m->sm_match = NULL; |
712 |
|
|
m->sm_nmatch = 0; |
713 |
|
|
} |