GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libc/gen/fnmatch.c Lines: 0 162 0.0 %
Date: 2017-11-07 Branches: 0 261 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: fnmatch.c,v 1.21 2016/03/01 20:29:03 millert Exp $	*/
2
3
/* Copyright (c) 2011, VMware, Inc.
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *     * Redistributions of source code must retain the above copyright
9
 *       notice, this list of conditions and the following disclaimer.
10
 *     * Redistributions in binary form must reproduce the above copyright
11
 *       notice, this list of conditions and the following disclaimer in the
12
 *       documentation and/or other materials provided with the distribution.
13
 *     * Neither the name of the VMware, Inc. nor the names of its contributors
14
 *       may be used to endorse or promote products derived from this software
15
 *       without specific prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
21
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28
29
/*
30
 * Copyright (c) 2008, 2016 Todd C. Miller <millert@openbsd.org>
31
 *
32
 * Permission to use, copy, modify, and distribute this software for any
33
 * purpose with or without fee is hereby granted, provided that the above
34
 * copyright notice and this permission notice appear in all copies.
35
 *
36
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43
 */
44
45
/* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
46
 *
47
 * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
48
 * as described in;
49
 *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
50
 *
51
 * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
52
 * from chapter 2. "Shell Command Language"
53
 *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
54
 * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
55
 * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
56
 * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
57
 * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
58
 * a valid bracket expression is treated as an ordinary character; 4. a differing
59
 * number of consecutive slashes within pattern and string will NOT match;
60
 * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
61
 *
62
 * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
63
 * from chapter 9, "Regular Expressions"
64
 *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
65
 * with no support for collating symbols, equivalence class expressions or
66
 * character class expressions.  A partial range expression with a leading
67
 * hyphen following a valid range expression will match only the ordinary
68
 * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
69
 * 'a' through 'm', a <hyphen> '-', or a 'z').
70
 *
71
 * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
72
 * path segment of string, and FNM_CASEFOLD to ignore alpha case.
73
 *
74
 * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
75
 * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
76
 * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
77
 * nonalpha characters within a range.
78
 *
79
 * XXX comments below indicate porting required for multi-byte character sets
80
 * and non-POSIX locale collation orders; requires mbr* APIs to track shift
81
 * state of pattern and string (rewinding pattern and string repeatedly).
82
 *
83
 * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
84
 * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
85
 * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
86
 */
87
88
#include <fnmatch.h>
89
#include <string.h>
90
#include <ctype.h>
91
92
#include "charclass.h"
93
94
#define	RANGE_MATCH	1
95
#define	RANGE_NOMATCH	0
96
#define	RANGE_ERROR	(-1)
97
98
static int
99
classmatch(const char *pattern, char test, int foldcase, const char **ep)
100
{
101
	const char * const mismatch = pattern;
102
	const char *colon;
103
	struct cclass *cc;
104
	int rval = RANGE_NOMATCH;
105
	size_t len;
106
107
	if (pattern[0] != '[' || pattern[1] != ':') {
108
		*ep = mismatch;
109
		return RANGE_ERROR;
110
	}
111
	pattern += 2;
112
113
	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
114
		*ep = mismatch;
115
		return RANGE_ERROR;
116
	}
117
	*ep = colon + 2;
118
	len = (size_t)(colon - pattern);
119
120
	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
121
		pattern = "lower:]";
122
	for (cc = cclasses; cc->name != NULL; cc++) {
123
		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
124
			if (cc->isctype((unsigned char)test))
125
				rval = RANGE_MATCH;
126
			break;
127
		}
128
	}
129
	if (cc->name == NULL) {
130
		/* invalid character class, treat as normal text */
131
		*ep = mismatch;
132
		rval = RANGE_ERROR;
133
	}
134
	return rval;
135
}
136
137
/* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
138
 * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
139
 * however the "\/" sequence is advanced to '/'.
140
 *
141
 * Both pattern and string are **char to support pointer increment of arbitrary
142
 * multibyte characters for the given locale, in a later iteration of this code
143
 */
144
static int fnmatch_ch(const char **pattern, const char **string, int flags)
145
{
146
	const char * const mismatch = *pattern;
147
	const int nocase = !!(flags & FNM_CASEFOLD);
148
	const int escape = !(flags & FNM_NOESCAPE);
149
	const int slash = !!(flags & FNM_PATHNAME);
150
	int result = FNM_NOMATCH;
151
	const char *startch;
152
	int negate;
153
154
	if (**pattern == '[') {
155
		++*pattern;
156
157
		/* Handle negation, either leading ! or ^ operators */
158
		negate = (**pattern == '!') || (**pattern == '^');
159
		if (negate)
160
			++*pattern;
161
162
		/* ']' is an ordinary char at the start of the range pattern */
163
		if (**pattern == ']')
164
			goto leadingclosebrace;
165
166
		while (**pattern) {
167
			if (**pattern == ']') {
168
				++*pattern;
169
				/* XXX: Fix for MBCS character width */
170
				++*string;
171
				return (result ^ negate);
172
			}
173
174
			if (escape && (**pattern == '\\')) {
175
				++*pattern;
176
177
				/* Patterns must terminate with ']', not EOS */
178
				if (!**pattern)
179
					break;
180
			}
181
182
			/* Patterns must terminate with ']' not '/' */
183
			if (slash && (**pattern == '/'))
184
				break;
185
186
			/* Match character classes. */
187
			switch (classmatch(*pattern, **string, nocase, pattern)) {
188
			case RANGE_MATCH:
189
				result = 0;
190
				continue;
191
			case RANGE_NOMATCH:
192
				/* Valid character class but no match. */
193
				continue;
194
			default:
195
				/* Not a valid character class. */
196
				break;
197
			}
198
			if (!**pattern)
199
				break;
200
201
leadingclosebrace:
202
			/* Look at only well-formed range patterns;
203
			 * "x-]" is not allowed unless escaped ("x-\]")
204
			 * XXX: Fix for locale/MBCS character width
205
			 */
206
			if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) {
207
				startch = *pattern;
208
				*pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
209
210
				/*
211
				 * NOT a properly balanced [expr] pattern, EOS
212
				 * terminated or ranges containing a slash in
213
				 * FNM_PATHNAME mode pattern fall out to to the
214
				 * rewind and test '[' literal code path.
215
				 */
216
				if (!**pattern || (slash && (**pattern == '/')))
217
					break;
218
219
				/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
220
				if ((**string >= *startch) && (**string <= **pattern))
221
					result = 0;
222
				else if (nocase &&
223
				    (isupper((unsigned char)**string) ||
224
				     isupper((unsigned char)*startch) ||
225
				     isupper((unsigned char)**pattern)) &&
226
				    (tolower((unsigned char)**string) >=
227
				     tolower((unsigned char)*startch)) &&
228
				    (tolower((unsigned char)**string) <=
229
				     tolower((unsigned char)**pattern)))
230
					result = 0;
231
232
				++*pattern;
233
				continue;
234
			}
235
236
			/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
237
			if ((**string == **pattern))
238
				result = 0;
239
			else if (nocase && (isupper((unsigned char)**string) ||
240
			    isupper((unsigned char)**pattern)) &&
241
			    (tolower((unsigned char)**string) ==
242
			    tolower((unsigned char)**pattern)))
243
				result = 0;
244
245
			++*pattern;
246
		}
247
		/*
248
		 * NOT a properly balanced [expr] pattern;
249
		 * Rewind and reset result to test '[' literal
250
		 */
251
		*pattern = mismatch;
252
		result = FNM_NOMATCH;
253
	} else if (**pattern == '?') {
254
		/* Optimize '?' match before unescaping **pattern */
255
		if (!**string || (slash && (**string == '/')))
256
			return FNM_NOMATCH;
257
		result = 0;
258
		goto fnmatch_ch_success;
259
	} else if (escape && (**pattern == '\\') && (*pattern)[1]) {
260
		++*pattern;
261
	}
262
263
	/* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
264
	if (**string == **pattern)
265
		result = 0;
266
	else if (nocase && (isupper((unsigned char)**string) ||
267
	    isupper((unsigned char)**pattern)) &&
268
	    (tolower((unsigned char)**string) ==
269
	    tolower((unsigned char)**pattern)))
270
		result = 0;
271
272
	/* Refuse to advance over trailing slash or NULs */
273
	if (**string == '\0' || **pattern == '\0' ||
274
	    (slash && ((**string == '/') || (**pattern == '/'))))
275
		return result;
276
277
fnmatch_ch_success:
278
	++*pattern;
279
	++*string;
280
	return result;
281
}
282
283
284
int fnmatch(const char *pattern, const char *string, int flags)
285
{
286
	static const char dummystring[2] = {' ', 0};
287
	const int escape = !(flags & FNM_NOESCAPE);
288
	const int slash = !!(flags & FNM_PATHNAME);
289
	const int leading_dir = !!(flags & FNM_LEADING_DIR);
290
	const char *dummyptr, *matchptr, *strendseg;
291
	int wild;
292
	/* For '*' wild processing only; surpress 'used before initialization'
293
	 * warnings with dummy initialization values;
294
	 */
295
	const char *strstartseg = NULL;
296
	const char *mismatch = NULL;
297
	int matchlen = 0;
298
299
	if (*pattern == '*')
300
		goto firstsegment;
301
302
	while (*pattern && *string) {
303
		/*
304
		 * Pre-decode "\/" which has no special significance, and
305
		 * match balanced slashes, starting a new segment pattern.
306
		 */
307
		if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
308
			++pattern;
309
		if (slash && (*pattern == '/') && (*string == '/')) {
310
			++pattern;
311
			++string;
312
		}
313
314
firstsegment:
315
		/*
316
		 * At the beginning of each segment, validate leading period
317
		 * behavior.
318
		 */
319
		if ((flags & FNM_PERIOD) && (*string == '.')) {
320
		    if (*pattern == '.')
321
			    ++pattern;
322
		    else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
323
			    pattern += 2;
324
		    else
325
			    return FNM_NOMATCH;
326
		    ++string;
327
		}
328
329
		/*
330
		 * Determine the end of string segment.  Presumes '/'
331
		 * character is unique, not composite in any MBCS encoding
332
		 */
333
		if (slash) {
334
			strendseg = strchr(string, '/');
335
			if (!strendseg)
336
				strendseg = strchr(string, '\0');
337
		} else {
338
			strendseg = strchr(string, '\0');
339
		}
340
341
		/*
342
		 * Allow pattern '*' to be consumed even with no remaining
343
		 * string to match.
344
		 */
345
		while (*pattern) {
346
			if ((string > strendseg) ||
347
			    ((string == strendseg) && (*pattern != '*')))
348
				break;
349
350
			if (slash && ((*pattern == '/') ||
351
			    (escape && (*pattern == '\\') && (pattern[1] == '/'))))
352
				break;
353
354
			/*
355
			 * Reduce groups of '*' and '?' to n '?' matches
356
			 * followed by one '*' test for simplicity.
357
			 */
358
			for (wild = 0; (*pattern == '*') || (*pattern == '?'); ++pattern) {
359
				if (*pattern == '*') {
360
					wild = 1;
361
				} else if (string < strendseg) {  /* && (*pattern == '?') */
362
					/* XXX: Advance 1 char for MBCS locale */
363
					++string;
364
				}
365
				else {  /* (string >= strendseg) && (*pattern == '?') */
366
					return FNM_NOMATCH;
367
				}
368
			}
369
370
			if (wild) {
371
				strstartseg = string;
372
				mismatch = pattern;
373
374
				/*
375
				 * Count fixed (non '*') char matches remaining
376
				 * in pattern * excluding '/' (or "\/") and '*'.
377
				 */
378
				for (matchptr = pattern, matchlen = 0; 1; ++matchlen) {
379
					if ((*matchptr == '\0') ||
380
					    (slash && ((*matchptr == '/') ||
381
					    (escape && (*matchptr == '\\') &&
382
					    (matchptr[1] == '/'))))) {
383
						/* Compare precisely this many
384
						 * trailing string chars, the
385
						 * resulting match needs no
386
						 * wildcard loop.
387
						 */
388
						/* XXX: Adjust for MBCS */
389
						if (string + matchlen > strendseg)
390
							return FNM_NOMATCH;
391
392
						string = strendseg - matchlen;
393
						wild = 0;
394
						break;
395
					}
396
397
					if (*matchptr == '*') {
398
						/*
399
						 * Ensure at least this many
400
						 * trailing string chars remain
401
						 * for the first comparison.
402
						 */
403
						/* XXX: Adjust for MBCS */
404
						if (string + matchlen > strendseg)
405
							return FNM_NOMATCH;
406
407
						/*
408
						 * Begin first wild comparison
409
						 * at the current position.
410
						 */
411
						break;
412
					}
413
414
					/*
415
					 * Skip forward in pattern by a single
416
					 * character match Use a dummy
417
					 * fnmatch_ch() test to count one
418
					 * "[range]" escape.
419
					 */
420
					/* XXX: Adjust for MBCS */
421
					if (escape && (*matchptr == '\\') &&
422
					    matchptr[1]) {
423
						matchptr += 2;
424
					} else if (*matchptr == '[') {
425
						dummyptr = dummystring;
426
						fnmatch_ch(&matchptr, &dummyptr,
427
						    flags);
428
					} else {
429
						++matchptr;
430
					}
431
				}
432
			}
433
434
			/* Incrementally match string against the pattern. */
435
			while (*pattern && (string < strendseg)) {
436
				/* Success; begin a new wild pattern search. */
437
				if (*pattern == '*')
438
					break;
439
440
				if (slash && ((*string == '/') ||
441
				    (*pattern == '/') || (escape &&
442
				    (*pattern == '\\') && (pattern[1] == '/'))))
443
					break;
444
445
				/*
446
				 * Compare ch's (the pattern is advanced over
447
				 * "\/" to the '/', but slashes will mismatch,
448
				 * and are not consumed).
449
				 */
450
				if (!fnmatch_ch(&pattern, &string, flags))
451
					continue;
452
453
				/*
454
				 * Failed to match, loop against next char
455
				 * offset of string segment until not enough
456
				 * string chars remain to match the fixed
457
				 * pattern.
458
				 */
459
				if (wild) {
460
					/* XXX: Advance 1 char for MBCS locale */
461
					string = ++strstartseg;
462
					if (string + matchlen > strendseg)
463
						return FNM_NOMATCH;
464
465
					pattern = mismatch;
466
					continue;
467
				} else
468
					return FNM_NOMATCH;
469
			}
470
		}
471
472
		if (*string && !((slash || leading_dir) && (*string == '/')))
473
			return FNM_NOMATCH;
474
475
		if (*pattern && !(slash && ((*pattern == '/') ||
476
		    (escape && (*pattern == '\\') && (pattern[1] == '/')))))
477
			return FNM_NOMATCH;
478
479
		if (leading_dir && !*pattern && *string == '/')
480
			return 0;
481
	}
482
483
	/* Where both pattern and string are at EOS, declare success.  */
484
	if (!*string && !*pattern)
485
		return 0;
486
487
	/* Pattern didn't match to the end of string. */
488
	return FNM_NOMATCH;
489
}