GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.sbin/httpd/patterns.c Lines: 0 292 0.0 %
Date: 2017-11-13 Branches: 0 235 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $	*/
2
3
/*
4
 * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5
 * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining
8
 * a copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sublicense, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be
16
 * included in all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 */
26
27
/*
28
 * Derived from Lua 5.3.1:
29
 * $Id: patterns.c,v 1.5 2016/02/14 18:20:59 semarie Exp $
30
 * Standard library for string operations and pattern-matching
31
 */
32
33
#include <sys/types.h>
34
#include <ctype.h>
35
#include <errno.h>
36
#include <stddef.h>
37
#include <stdlib.h>
38
#include <string.h>
39
40
#include "patterns.h"
41
42
#define uchar(c)	((unsigned char)(c)) /* macro to 'unsign' a char */
43
#define CAP_UNFINISHED	(-1)
44
#define CAP_POSITION	(-2)
45
#define L_ESC		'%'
46
#define SPECIALS	"^$*+?.([%-"
47
48
struct match_state {
49
	int matchdepth;		/* control for recursive depth (to avoid C
50
				 * stack overflow) */
51
	int repetitioncounter;	/* control the repetition items */
52
	int maxcaptures;	/* configured capture limit */
53
	const char *src_init;	/* init of source string */
54
	const char *src_end;	/* end ('\0') of source string */
55
	const char *p_end;	/* end ('\0') of pattern */
56
	const char *error;	/* should be NULL */
57
	int level;		/* total number of captures (finished or
58
				 * unfinished) */
59
	struct {
60
		const char *init;
61
		ptrdiff_t len;
62
	} capture[MAXCAPTURES];
63
};
64
65
/* recursive function */
66
static const char *match(struct match_state *, const char *, const char *);
67
68
static int
69
match_error(struct match_state *ms, const char *error)
70
{
71
	ms->error = ms->error == NULL ? error : ms->error;
72
	return (-1);
73
}
74
75
static int
76
check_capture(struct match_state *ms, int l)
77
{
78
	l -= '1';
79
	if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
80
		return match_error(ms, "invalid capture index");
81
	return (l);
82
}
83
84
static int
85
capture_to_close(struct match_state *ms)
86
{
87
	int level = ms->level;
88
	for (level--; level >= 0; level--)
89
		if (ms->capture[level].len == CAP_UNFINISHED)
90
			return (level);
91
	return match_error(ms, "invalid pattern capture");
92
}
93
94
static const char *
95
classend(struct match_state *ms, const char *p)
96
{
97
	switch (*p++) {
98
	case L_ESC:
99
		if (p == ms->p_end)
100
			match_error(ms,
101
			    "malformed pattern (ends with '%')");
102
		return p + 1;
103
	case '[':
104
		if (*p == '^')
105
			p++;
106
		do {
107
			/* look for a ']' */
108
			if (p == ms->p_end) {
109
				match_error(ms,
110
				    "malformed pattern (missing ']')");
111
				break;
112
			}
113
			if (*(p++) == L_ESC && p < ms->p_end) {
114
				/* skip escapes (e.g. '%]') */
115
				p++;
116
			}
117
		} while (*p != ']');
118
		return p + 1;
119
	default:
120
		return p;
121
	}
122
}
123
124
static int
125
match_class(int c, int cl)
126
{
127
	int res;
128
	switch (tolower(cl)) {
129
	case 'a':
130
		res = isalpha(c);
131
		break;
132
	case 'c':
133
		res = iscntrl(c);
134
		break;
135
	case 'd':
136
		res = isdigit(c);
137
		break;
138
	case 'g':
139
		res = isgraph(c);
140
		break;
141
	case 'l':
142
		res = islower(c);
143
		break;
144
	case 'p':
145
		res = ispunct(c);
146
		break;
147
	case 's':
148
		res = isspace(c);
149
		break;
150
	case 'u':
151
		res = isupper(c);
152
		break;
153
	case 'w':
154
		res = isalnum(c);
155
		break;
156
	case 'x':
157
		res = isxdigit(c);
158
		break;
159
	default:
160
		return (cl == c);
161
	}
162
	return (islower(cl) ? res : !res);
163
}
164
165
static int
166
matchbracketclass(int c, const char *p, const char *ec)
167
{
168
	int sig = 1;
169
	if (*(p + 1) == '^') {
170
		sig = 0;
171
		/* skip the '^' */
172
		p++;
173
	}
174
	while (++p < ec) {
175
		if (*p == L_ESC) {
176
			p++;
177
			if (match_class(c, uchar(*p)))
178
				return sig;
179
		} else if ((*(p + 1) == '-') && (p + 2 < ec)) {
180
			p += 2;
181
			if (uchar(*(p - 2)) <= c && c <= uchar(*p))
182
				return sig;
183
		} else if (uchar(*p) == c)
184
			return sig;
185
	}
186
	return !sig;
187
}
188
189
static int
190
singlematch(struct match_state *ms, const char *s, const char *p,
191
    const char *ep)
192
{
193
	if (s >= ms->src_end)
194
		return 0;
195
	else {
196
		int c = uchar(*s);
197
		switch (*p) {
198
		case '.':
199
			/* matches any char */
200
			return (1);
201
		case L_ESC:
202
			return match_class(c, uchar(*(p + 1)));
203
		case '[':
204
			return matchbracketclass(c, p, ep - 1);
205
		default:
206
			return (uchar(*p) == c);
207
		}
208
	}
209
}
210
211
static const char *
212
matchbalance(struct match_state *ms, const char *s, const char *p)
213
{
214
	if (p >= ms->p_end - 1) {
215
		match_error(ms,
216
		    "malformed pattern (missing arguments to '%b')");
217
		return (NULL);
218
	}
219
	if (*s != *p)
220
		return (NULL);
221
	else {
222
		int b = *p;
223
		int e = *(p + 1);
224
		int cont = 1;
225
		while (++s < ms->src_end) {
226
			if (*s == e) {
227
				if (--cont == 0)
228
					return s + 1;
229
			} else if (*s == b)
230
				cont++;
231
		}
232
	}
233
234
	/* string ends out of balance */
235
	return (NULL);
236
}
237
238
static const char *
239
max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
240
{
241
	ptrdiff_t i = 0;
242
	/* counts maximum expand for item */
243
	while (singlematch(ms, s + i, p, ep))
244
		i++;
245
	/* keeps trying to match with the maximum repetitions */
246
	while (i >= 0) {
247
		const char *res = match(ms, (s + i), ep + 1);
248
		if (res)
249
			return res;
250
		/* else didn't match; reduce 1 repetition to try again */
251
		i--;
252
	}
253
	return NULL;
254
}
255
256
static const char *
257
min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
258
{
259
	for (;;) {
260
		const char *res = match(ms, s, ep + 1);
261
		if (res != NULL)
262
			return res;
263
		else if (singlematch(ms, s, p, ep))
264
			s++;	/* try with one more repetition */
265
		else
266
			return NULL;
267
	}
268
}
269
270
static const char *
271
start_capture(struct match_state *ms, const char *s, const char *p, int what)
272
{
273
	const char *res;
274
275
	int level = ms->level;
276
	if (level >= ms->maxcaptures) {
277
		match_error(ms, "too many captures");
278
		return (NULL);
279
	}
280
	ms->capture[level].init = s;
281
	ms->capture[level].len = what;
282
	ms->level = level + 1;
283
	/* undo capture if match failed */
284
	if ((res = match(ms, s, p)) == NULL)
285
		ms->level--;
286
	return res;
287
}
288
289
static const char *
290
end_capture(struct match_state *ms, const char *s, const char *p)
291
{
292
	int l = capture_to_close(ms);
293
	const char *res;
294
	if (l == -1)
295
		return NULL;
296
	/* close capture */
297
	ms->capture[l].len = s - ms->capture[l].init;
298
	/* undo capture if match failed */
299
	if ((res = match(ms, s, p)) == NULL)
300
		ms->capture[l].len = CAP_UNFINISHED;
301
	return res;
302
}
303
304
static const char *
305
match_capture(struct match_state *ms, const char *s, int l)
306
{
307
	size_t len;
308
	l = check_capture(ms, l);
309
	if (l == -1)
310
		return NULL;
311
	len = ms->capture[l].len;
312
	if ((size_t) (ms->src_end - s) >= len &&
313
	    memcmp(ms->capture[l].init, s, len) == 0)
314
		return s + len;
315
	else
316
		return NULL;
317
}
318
319
static const char *
320
match(struct match_state *ms, const char *s, const char *p)
321
{
322
	const char *ep, *res;
323
	char previous;
324
325
	if (ms->matchdepth-- == 0) {
326
		match_error(ms, "pattern too complex");
327
		return (NULL);
328
	}
329
330
	/* using goto's to optimize tail recursion */
331
 init:
332
	/* end of pattern? */
333
	if (p != ms->p_end) {
334
		switch (*p) {
335
		case '(':
336
			/* start capture */
337
			if (*(p + 1) == ')')
338
				/* position capture? */
339
				s = start_capture(ms, s, p + 2, CAP_POSITION);
340
			else
341
				s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
342
			break;
343
		case ')':
344
			/* end capture */
345
			s = end_capture(ms, s, p + 1);
346
			break;
347
		case '$':
348
			/* is the '$' the last char in pattern? */
349
			if ((p + 1) != ms->p_end) {
350
				/* no; go to default */
351
				goto dflt;
352
			}
353
			 /* check end of string */
354
			s = (s == ms->src_end) ? s : NULL;
355
			break;
356
		case L_ESC:
357
			/* escaped sequences not in the format class[*+?-]? */
358
			switch (*(p + 1)) {
359
			case 'b':
360
				/* balanced string? */
361
				s = matchbalance(ms, s, p + 2);
362
				if (s != NULL) {
363
					p += 4;
364
					/* return match(ms, s, p + 4); */
365
					goto init;
366
				} /* else fail (s == NULL) */
367
				break;
368
			case 'f':
369
				/* frontier? */
370
				p += 2;
371
				if (*p != '[') {
372
					match_error(ms, "missing '['"
373
					    " after '%f' in pattern");
374
					break;
375
				}
376
				/* points to what is next */
377
				ep = classend(ms, p);
378
				if (ms->error != NULL)
379
					break;
380
				previous =
381
				    (s == ms->src_init) ? '\0' : *(s - 1);
382
				if (!matchbracketclass(uchar(previous),
383
				    p, ep - 1) &&
384
				    matchbracketclass(uchar(*s),
385
				    p, ep - 1)) {
386
					p = ep;
387
					/* return match(ms, s, ep); */
388
					goto init;
389
				}
390
				/* match failed */
391
				s = NULL;
392
				break;
393
			case '0':
394
			case '1':
395
			case '2':
396
			case '3':
397
			case '4':
398
			case '5':
399
			case '6':
400
			case '7':
401
			case '8':
402
			case '9':
403
				/* capture results (%0-%9)? */
404
				s = match_capture(ms, s, uchar(*(p + 1)));
405
				if (s != NULL) {
406
					p += 2;
407
					/* return match(ms, s, p + 2) */
408
					goto init;
409
				}
410
				break;
411
			default:
412
				goto dflt;
413
			}
414
			break;
415
		default:
416
417
			/* pattern class plus optional suffix */
418
	dflt:
419
			/* points to optional suffix */
420
			ep = classend(ms, p);
421
			if (ms->error != NULL)
422
				break;
423
424
			/* does not match at least once? */
425
			if (!singlematch(ms, s, p, ep)) {
426
				if (ms->repetitioncounter-- == 0) {
427
					match_error(ms, "max repetition items");
428
					s = NULL; /* fail */
429
				/* accept empty? */
430
				} else if
431
				    (*ep == '*' || *ep == '?' || *ep == '-') {
432
					 p = ep + 1;
433
					/* return match(ms, s, ep + 1); */
434
					 goto init;
435
				} else {
436
					/* '+' or no suffix */
437
					s = NULL; /* fail */
438
				}
439
			} else {
440
				/* matched once */
441
				/* handle optional suffix */
442
				switch (*ep) {
443
				case '?':
444
					/* optional */
445
					if ((res =
446
					    match(ms, s + 1, ep + 1)) != NULL)
447
						s = res;
448
					else {
449
						/*
450
						 * else return
451
						 *     match(ms, s, ep + 1);
452
						 */
453
						p = ep + 1;
454
						goto init;
455
					}
456
					break;
457
				case '+':
458
					/* 1 or more repetitions */
459
					s++; /* 1 match already done */
460
					/* FALLTHROUGH */
461
				case '*':
462
					/* 0 or more repetitions */
463
					s = max_expand(ms, s, p, ep);
464
					break;
465
				case '-':
466
					/* 0 or more repetitions (minimum) */
467
					s = min_expand(ms, s, p, ep);
468
					break;
469
				default:
470
					/* no suffix */
471
					s++;
472
					p = ep;
473
					/* return match(ms, s + 1, ep); */
474
					goto init;
475
				}
476
			}
477
			break;
478
		}
479
	}
480
	ms->matchdepth++;
481
	return s;
482
}
483
484
static const char *
485
lmemfind(const char *s1, size_t l1,
486
    const char *s2, size_t l2)
487
{
488
	const char *init;
489
490
	if (l2 == 0) {
491
		/* empty strings are everywhere */
492
		return (s1);
493
	} else if (l2 > l1) {
494
		/* avoids a negative 'l1' */
495
		return (NULL);
496
	} else {
497
		/*
498
		 * to search for a '*s2' inside 's1'
499
		 * - 1st char will be checked by 'memchr'
500
		 * - 's2' cannot be found after that
501
		 */
502
		l2--;
503
		l1 = l1 - l2;
504
		while (l1 > 0 &&
505
		    (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
506
			/* 1st char is already checked */
507
			init++;
508
			if (memcmp(init, s2 + 1, l2) == 0)
509
				return init - 1;
510
			else {
511
				/* correct 'l1' and 's1' to try again */
512
				l1 -= init - s1;
513
				s1 = init;
514
			}
515
		}
516
		/* not found */
517
		return (NULL);
518
	}
519
}
520
521
static int
522
push_onecapture(struct match_state *ms, int i, const char *s,
523
    const char *e, struct str_find *sm)
524
{
525
	if (i >= ms->level) {
526
		if (i == 0 || ms->level == 0) {
527
			/* add whole match */
528
			sm->sm_so = (off_t)(s - ms->src_init);
529
			sm->sm_eo = (off_t)(e - s) + sm->sm_so;
530
		} else
531
			return match_error(ms, "invalid capture index");
532
	} else {
533
		ptrdiff_t l = ms->capture[i].len;
534
		if (l == CAP_UNFINISHED)
535
			return match_error(ms, "unfinished capture");
536
		sm->sm_so = ms->capture[i].init - ms->src_init;
537
		sm->sm_eo = sm->sm_so + l;
538
	}
539
	sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
540
	return (0);
541
}
542
543
static int
544
push_captures(struct match_state *ms, const char *s, const char *e,
545
    struct str_find *sm, size_t nsm)
546
{
547
	unsigned int i;
548
	unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;
549
550
	if (nlevels > nsm)
551
		nlevels = nsm;
552
	for (i = 0; i < nlevels; i++)
553
		if (push_onecapture(ms, i, s, e, sm + i) == -1)
554
			break;
555
556
	/* number of strings pushed */
557
	return (nlevels);
558
}
559
560
/* check whether pattern has no special characters */
561
static int
562
nospecials(const char *p, size_t l)
563
{
564
	size_t upto = 0;
565
566
	do {
567
		if (strpbrk(p + upto, SPECIALS)) {
568
			/* pattern has a special character */
569
			return 0;
570
		}
571
		/* may have more after \0 */
572
		upto += strlen(p + upto) + 1;
573
	} while (upto <= l);
574
575
	/* no special chars found */
576
	return (1);
577
}
578
579
static int
580
str_find_aux(struct match_state *ms, const char *pattern, const char *string,
581
    struct str_find *sm, size_t nsm, off_t init)
582
{
583
	size_t		 ls = strlen(string);
584
	size_t		 lp = strlen(pattern);
585
	const char	*s = string;
586
	const char	*p = pattern;
587
	const char	*s1, *s2;
588
	int		 anchor, i;
589
590
	if (init < 0)
591
		init = 0;
592
	else if (init > (off_t)ls)
593
		return match_error(ms, "starting after string's end");
594
	s1 = s + init;
595
596
	if (nospecials(p, lp)) {
597
		/* do a plain search */
598
		s2 = lmemfind(s1, ls - (size_t)init, p, lp);
599
		if (s2 != NULL) {
600
			i = 0;
601
			sm[i].sm_so = 0;
602
			sm[i].sm_eo = ls;
603
			if (nsm > 1) {
604
				i++;
605
				sm[i].sm_so = s2 - s;
606
				sm[i].sm_eo = (s2 - s) + lp;
607
			}
608
			return (i + 1);
609
		}
610
		return (0);
611
	}
612
613
	anchor = (*p == '^');
614
	if (anchor) {
615
		p++;
616
		lp--;	/* skip anchor character */
617
	}
618
	ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
619
	ms->matchdepth = MAXCCALLS;
620
	ms->repetitioncounter = MAXREPETITION;
621
	ms->src_init = s;
622
	ms->src_end = s + ls;
623
	ms->p_end = p + lp;
624
	do {
625
		const char *res;
626
		ms->level = 0;
627
		if ((res = match(ms, s1, p)) != NULL) {
628
			sm->sm_so = 0;
629
			sm->sm_eo = ls;
630
			return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;
631
632
		} else if (ms->error != NULL) {
633
			return 0;
634
		}
635
	} while (s1++ < ms->src_end && !anchor);
636
637
	return 0;
638
}
639
640
int
641
str_find(const char *string, const char *pattern, struct str_find *sm,
642
    size_t nsm, const char **errstr)
643
{
644
	struct match_state	ms;
645
	int			ret;
646
647
	memset(&ms, 0, sizeof(ms));
648
	memset(sm, 0, nsm * sizeof(*sm));
649
650
	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
651
	if (ms.error != NULL) {
652
		/* Return 0 on error and store the error string */
653
		*errstr = ms.error;
654
		ret = 0;
655
	} else
656
		*errstr = NULL;
657
658
	return (ret);
659
}
660
661
int
662
str_match(const char *string, const char *pattern, struct str_match *m,
663
    const char **errstr)
664
{
665
	struct str_find		 sm[MAXCAPTURES];
666
	struct match_state	 ms;
667
	int			 ret, i;
668
	size_t			 len, nsm;
669
670
	nsm = MAXCAPTURES;
671
	memset(&ms, 0, sizeof(ms));
672
	memset(sm, 0, sizeof(sm));
673
	memset(m, 0, sizeof(*m));
674
675
	ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
676
	if (ret <= 0 || ms.error != NULL) {
677
		/* Return -1 on error and store the error string */
678
		*errstr = ms.error;
679
		return (-1);
680
	}
681
682
	if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
683
		*errstr = strerror(errno);
684
		return (-1);
685
	}
686
	m->sm_nmatch = ret;
687
688
	for (i = 0; i < ret; i++) {
689
		if (sm[i].sm_so > sm[i].sm_eo)
690
			continue;
691
		len = sm[i].sm_eo - sm[i].sm_so;
692
		if ((m->sm_match[i] = strndup(string +
693
		    sm[i].sm_so, len)) == NULL) {
694
			*errstr = strerror(errno);
695
			str_match_free(m);
696
			return (-1);
697
		}
698
	}
699
700
	*errstr = NULL;
701
	return (0);
702
}
703
704
void
705
str_match_free(struct str_match *m)
706
{
707
	unsigned int	 i = 0;
708
	for (i = 0; i < m->sm_nmatch; i++)
709
		free(m->sm_match[i]);
710
	free(m->sm_match);
711
	m->sm_match = NULL;
712
	m->sm_nmatch = 0;
713
}