GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/grep/util.c Lines: 240 275 87.3 %
Date: 2017-11-07 Branches: 224 307 73.0 %

Line Branch Exec Source
1
/*	$OpenBSD: util.c,v 1.57 2017/04/03 16:18:35 tedu Exp $	*/
2
3
/*-
4
 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 */
28
29
#include <sys/types.h>
30
#include <sys/stat.h>
31
32
#include <ctype.h>
33
#include <err.h>
34
#include <errno.h>
35
#include <fts.h>
36
#include <regex.h>
37
#include <stdbool.h>
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <unistd.h>
42
#include <zlib.h>
43
44
#include "grep.h"
45
46
/*
47
 * Process a file line by line...
48
 */
49
50
static int	linesqueued;
51
static int	procline(str_t *l, int);
52
static int	grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch, int);
53
#ifndef SMALL
54
static bool	grep_cmp(const char *, const char *, size_t);
55
static void	grep_revstr(unsigned char *, int);
56
#endif
57
58
int
59
grep_tree(char **argv)
60
{
61
	FTS	*fts;
62
	FTSENT	*p;
63
	int	c, fts_flags;
64
65
	c = 0;
66
67
	fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
68
69
2
	if (!(fts = fts_open(argv, fts_flags, NULL)))
70
		err(2, NULL);
71
18
	while ((p = fts_read(fts)) != NULL) {
72

33
		switch (p->fts_info) {
73
		case FTS_DNR:
74
			break;
75
		case FTS_ERR:
76
			file_err = 1;
77
			if(!sflag)
78
				warnc(p->fts_errno, "%s", p->fts_path);
79
			break;
80
		case FTS_DP:
81
			break;
82
		default:
83
16
			c += procfile(p->fts_path);
84
16
			break;
85
		}
86
	}
87
1
	if (errno)
88
		err(2, "fts_read");
89
1
	fts_close(fts);
90
1
	return c;
91
}
92
93
int
94
procfile(char *fn)
95
{
96
8696
	str_t ln;
97
	file_t *f;
98
	int c, t, z, nottext;
99
100
4348
	if (fn == NULL) {
101
		fn = "(standard input)";
102
3392
		f = grep_fdopen(STDIN_FILENO, "r");
103
3392
	} else {
104
956
		f = grep_open(fn, "r");
105
	}
106
4348
	if (f == NULL) {
107
9
		file_err = 1;
108
9
		if (!sflag)
109
			warn("%s", fn);
110
9
		return 0;
111
	}
112
113
4339
	nottext = grep_bin_file(f);
114
4339
	if (nottext && binbehave == BIN_FILE_SKIP) {
115
		grep_close(f);
116
		return 0;
117
	}
118
119
4339
	ln.file = fn;
120
4339
	ln.line_no = 0;
121
4339
	ln.len = 0;
122
4339
	linesqueued = 0;
123
4339
	tail = 0;
124
4339
	ln.off = -1;
125
126
4339
	if (Bflag > 0)
127
9
		initqueue();
128

3234940
	for (c = 0;  c == 0 || !(lflag || qflag); ) {
129
1095703
		ln.off += ln.len + 1;
130
1095703
		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
131
			break;
132

2184676
		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
133
1045362
			--ln.len;
134
1093539
		ln.line_no++;
135
136
1093539
		z = tail;
137
138
1093539
		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
139
27
			enqueue(&ln);
140
27
			linesqueued++;
141
27
		}
142
1093539
		c += t;
143
	}
144
4339
	if (Bflag > 0)
145
9
		clearqueue();
146
4339
	grep_close(f);
147
148
4339
	if (cflag) {
149
31
		if (!hflag)
150
			printf("%s:", ln.file);
151
31
		printf("%u\n", c);
152
31
	}
153
4339
	if (lflag && c != 0)
154
18
		printf("%s\n", fn);
155
4339
	if (Lflag && c == 0)
156
		printf("%s\n", fn);
157
13017
	if (c && !cflag && !lflag && !Lflag &&
158
8678
	    binbehave == BIN_FILE_BIN && nottext && !qflag)
159
3
		printf("Binary file %s matches\n", fn);
160
161
4339
	return c;
162
4348
}
163
164
165
/*
166
 * Process an individual line in a file. Return non-zero if it matches.
167
 */
168
169
#define isword(x) (isalnum((unsigned char)x) || (x) == '_')
170
171
static int
172
procline(str_t *l, int nottext)
173
{
174
2187078
	regmatch_t	pmatch = { 0 };
175
	int		c, i, r;
176
	regoff_t	offset;
177
178
	/* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
179
	 * into regoff_t */
180
1093539
	if (l->len > SSIZE_MAX) {
181
		errx(2, "Line is too big to process");
182
	}
183
184
	c = 0;
185
	i = 0;
186
1093539
	if (matchall) {
187
		c = 1;
188
45
		goto print;
189
	}
190
191
4348690
	for (i = 0; i < patterns; i++) {
192
1093504
		offset = 0;
193
redo:
194
1093585
		if (fg_pattern[i].pattern) {
195
			int flags = 0;
196
983316
			if (offset)
197
				flags |= REG_NOTBOL;
198
1966632
			r = grep_search(&fg_pattern[i], l->dat + offset,
199
983316
			    l->len - offset, &pmatch, flags);
200
983316
			pmatch.rm_so += offset;
201
983316
			pmatch.rm_eo += offset;
202
983316
		} else {
203
110269
			int flags = eflags;
204
110269
			if (offset)
205
81
				flags |= REG_NOTBOL;
206
110269
			pmatch.rm_so = offset;
207
110269
			pmatch.rm_eo = l->len;
208
110269
			r = regexec(&r_pattern[i], l->dat, 1, &pmatch, flags);
209
		}
210
1093585
		if (r == 0 && xflag) {
211

138
			if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
212
63
				r = REG_NOMATCH;
213
		}
214
1093585
		if (r == 0) {
215
			c = 1;
216

12824
			if (oflag && pmatch.rm_so != pmatch.rm_eo)
217
				goto print;
218
			break;
219
		}
220
	}
221
1093485
	if (oflag)
222
18
		return c;
223
print:
224
1093602
	if (vflag)
225
910929
		c = !c;
226
227
1093602
	if (c && binbehave == BIN_FILE_BIN && nottext)
228
7
		return c; /* Binary file */
229
230
1093595
	if ((tail > 0 || c) && !cflag && !qflag) {
231
920146
		if (c) {
232

1839279
			if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
233
			    (Aflag || Bflag))
234
				printf("--\n");
235
920131
			first = 1;
236
920131
			tail = Aflag;
237
920131
			if (Bflag > 0)
238
9
				printqueue();
239
920131
			linesqueued = 0;
240
920131
			printline(l, ':', oflag ? &pmatch : NULL);
241
920131
		} else {
242
15
			printline(l, '-', oflag ? &pmatch : NULL);
243
15
			tail--;
244
		}
245
	}
246
1093595
	if (oflag && !matchall) {
247
81
		offset = pmatch.rm_eo;
248
81
		goto redo;
249
	}
250
1093514
	return c;
251
1093539
}
252
253
#ifndef SMALL
254
void
255
fgrepcomp(fastgrep_t *fg, const unsigned char *pattern)
256
{
257
	int i;
258
259
	/* Initialize. */
260
668
	fg->patternLen = strlen(pattern);
261
334
	fg->bol = 0;
262
334
	fg->eol = 0;
263
334
	fg->wmatch = wflag;
264
334
	fg->reversedSearch = 0;
265
266
	/*
267
	 * Make a copy and upper case it for later if in -i mode,
268
	 * else just copy the pointer.
269
	 */
270
334
	if (iflag) {
271
		fg->pattern = grep_malloc(fg->patternLen + 1);
272
		for (i = 0; i < fg->patternLen; i++)
273
			fg->pattern[i] = toupper(pattern[i]);
274
		fg->pattern[fg->patternLen] = '\0';
275
	} else
276
334
		fg->pattern = (unsigned char *)pattern;	/* really const */
277
278
	/* Preprocess pattern. */
279
171676
	for (i = 0; i <= UCHAR_MAX; i++)
280
85504
		fg->qsBc[i] = fg->patternLen;
281
4354
	for (i = 1; i < fg->patternLen; i++) {
282
1843
		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
283
		/*
284
		 * If case is ignored, make the jump apply to both upper and
285
		 * lower cased characters.  As the pattern is stored in upper
286
		 * case, apply the same to the lower case equivalents.
287
		 */
288
1843
		if (iflag)
289
			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
290
	}
291
334
}
292
#endif
293
294
/*
295
 * Returns: -1 on failure, 0 on success
296
 */
297
int
298
fastcomp(fastgrep_t *fg, const char *pattern)
299
{
300
#ifdef SMALL
301
	return -1;
302
#else
303
	int i;
304
	int bol = 0;
305
	int eol = 0;
306
	int shiftPatternLen;
307
	int hasDot = 0;
308
	int firstHalfDot = -1;
309
	int firstLastHalfDot = -1;
310
	int lastHalfDot = 0;
311
312
	/* Initialize. */
313
7854
	fg->patternLen = strlen(pattern);
314
3927
	fg->bol = 0;
315
3927
	fg->eol = 0;
316
3927
	fg->wmatch = 0;
317
3927
	fg->reversedSearch = 0;
318
319
	/* Remove end-of-line character ('$'). */
320

7845
	if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') {
321
		eol++;
322
87
		fg->eol = 1;
323
87
		fg->patternLen--;
324
87
	}
325
326
	/* Remove beginning-of-line character ('^'). */
327
3927
	if (pattern[0] == '^') {
328
		bol++;
329
599
		fg->bol = 1;
330
599
		fg->patternLen--;
331
599
	}
332
333
	/* Remove enclosing [[:<:]] and [[:>:]] (word match). */
334
3927
	if (wflag) {
335
		/* basic re's use \( \), extended re's ( ) */
336
534
		int extra = Eflag ? 1 : 2;
337
534
		fg->patternLen -= 14 + 2 * extra;
338
534
		fg->wmatch = 7 + extra;
339

4062
	} else if (fg->patternLen >= 14 &&
340
298
	    strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
341
135
	    strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
342
135
		fg->patternLen -= 14;
343
135
		fg->wmatch = 7;
344
135
	}
345
346
	/*
347
	 * Copy pattern minus '^' and '$' characters as well as word
348
	 * match character classes at the beginning and ending of the
349
	 * string respectively.
350
	 */
351
3927
	fg->pattern = grep_malloc(fg->patternLen + 1);
352
3927
	memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
353
3927
	fg->pattern[fg->patternLen] = '\0';
354
355
	/* Look for ways to cheat...er...avoid the full regex engine. */
356
53880
	for (i = 0; i < fg->patternLen; i++)
357
	{
358



23838
		switch (fg->pattern[i]) {
359
		case '.':
360
			hasDot = i;
361
110
			if (i < fg->patternLen / 2) {
362
60
				if (firstHalfDot < 0)
363
					/* Closest dot to the beginning */
364
60
					firstHalfDot = i;
365
			} else {
366
				/* Closest dot to the end of the pattern. */
367
				lastHalfDot = i;
368
50
				if (firstLastHalfDot < 0)
369
50
					firstLastHalfDot = i;
370
			}
371
			break;
372
		case '(': case ')':
373
		case '{': case '}':
374
			/* Special in BRE if preceded by '\\' */
375
		case '?':
376
		case '+':
377
		case '|':
378
			/* Not special in BRE. */
379
292
			if (!Eflag)
380
				goto nonspecial;
381
		case '\\':
382
		case '*':
383
		case '[': case ']':
384
			/* Free memory and let others know this is empty. */
385
533
			free(fg->pattern);
386
533
			fg->pattern = NULL;
387
533
			return (-1);
388
		default:
389
nonspecial:
390
22903
			if (iflag)
391
103
				fg->pattern[i] = toupper(fg->pattern[i]);
392
			break;
393
		}
394
	}
395
396
	/*
397
	 * Determine if a reverse search would be faster based on the placement
398
	 * of the dots.
399
	 */
400

6797
	if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) &&
401
3421
	    ((lastHalfDot) && ((firstHalfDot < 0) ||
402
9
	    ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
403
27
		fg->reversedSearch = 1;
404
27
		hasDot = fg->patternLen - (firstHalfDot < 0 ?
405
27
		    firstLastHalfDot : firstHalfDot) - 1;
406
27
		grep_revstr(fg->pattern, fg->patternLen);
407
27
	}
408
409
	/*
410
	 * Normal Quick Search would require a shift based on the position the
411
	 * next character after the comparison is within the pattern.  With
412
	 * wildcards, the position of the last dot effects the maximum shift
413
	 * distance.
414
	 * The closer to the end the wild card is the slower the search.  A
415
	 * reverse version of this algorithm would be useful for wildcards near
416
	 * the end of the string.
417
	 *
418
	 * Examples:
419
	 * Pattern	Max shift
420
	 * -------	---------
421
	 * this		5
422
	 * .his		4
423
	 * t.is		3
424
	 * th.s		2
425
	 * thi.		1
426
	 */
427
428
	/* Adjust the shift based on location of the last dot ('.'). */
429
3394
	shiftPatternLen = fg->patternLen - hasDot;
430
431
	/* Preprocess pattern. */
432
1744516
	for (i = 0; i <= UCHAR_MAX; i++)
433
868864
		fg->qsBc[i] = shiftPatternLen;
434
42936
	for (i = hasDot + 1; i < fg->patternLen; i++) {
435
18074
		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
436
		/*
437
		 * If case is ignored, make the jump apply to both upper and
438
		 * lower cased characters.  As the pattern is stored in upper
439
		 * case, apply the same to the lower case equivalents.
440
		 */
441
18074
		if (iflag)
442
86
			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
443
	}
444
445
	/*
446
	 * Put pattern back to normal after pre-processing to allow for easy
447
	 * comparisons later.
448
	 */
449
3394
	if (fg->reversedSearch)
450
27
		grep_revstr(fg->pattern, fg->patternLen);
451
452
3394
	return (0);
453
#endif
454
3927
}
455
456
/*
457
 * Word boundaries using regular expressions are defined as the point
458
 * of transition from a non-word char to a word char, or vice versa.
459
 * This means that grep -w +a and grep -w a+ never match anything,
460
 * because they lack a starting or ending transition, but grep -w a+b
461
 * does match a line containing a+b.
462
 */
463
#define wmatch(d, l, s, e)	\
464
	((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
465
	  e > s && isword(d[s]) && isword(d[e-1]))
466
467
static int
468
grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch,
469
    int flags)
470
{
471
#ifdef SMALL
472
	return 0;
473
#else
474
	regoff_t j;
475
	int rtrnVal = REG_NOMATCH;
476
477
1966632
	pmatch->rm_so = -1;
478
983316
	pmatch->rm_eo = -1;
479
480
	/* No point in going farther if we do not have enough data. */
481
983316
	if (dataLen < fg->patternLen)
482
7717
		return (rtrnVal);
483
484
	/* Only try once at the beginning or ending of the line. */
485

1921460
	if (fg->bol || fg->eol) {
486

59554
		if (fg->bol && (flags & REG_NOTBOL))
487
			return 0;
488
		/* Simple text comparison. */
489
		/* Verify data is >= pattern length before searching on it. */
490
29816
		if (dataLen >= fg->patternLen) {
491
			/* Determine where in data to start search at. */
492
29816
			if (fg->eol)
493
87
				j = dataLen - fg->patternLen;
494
			else
495
				j = 0;
496

59563
			if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
497
59632
				if (grep_cmp(fg->pattern, data + j,
498
29816
				    fg->patternLen)) {
499
651
					pmatch->rm_so = j;
500
651
					pmatch->rm_eo = j + fg->patternLen;
501






1735
					if (!fg->wmatch || wmatch(data, dataLen,
502
					    pmatch->rm_so, pmatch->rm_eo))
503
651
						rtrnVal = 0;
504
				}
505
		}
506
945783
	} else if (fg->reversedSearch) {
507
		/* Quick Search algorithm. */
508
		j = dataLen;
509
297
		do {
510
1422
			if (grep_cmp(fg->pattern, data + j - fg->patternLen,
511
			    fg->patternLen)) {
512
252
				pmatch->rm_so = j - fg->patternLen;
513
252
				pmatch->rm_eo = j;
514






405
				if (!fg->wmatch || wmatch(data, dataLen,
515
				    pmatch->rm_so, pmatch->rm_eo)) {
516
					rtrnVal = 0;
517
243
					break;
518
				}
519
			}
520
			/* Shift if within bounds, otherwise, we are done. */
521
1179
			if (j == fg->patternLen)
522
				break;
523
1143
			j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
524
1143
		} while (j >= fg->patternLen);
525
	} else {
526
		/* Quick Search algorithm. */
527
		j = 0;
528
945486
		do {
529
9364968
			if (grep_cmp(fg->pattern, data + j, fg->patternLen)) {
530
11164
				pmatch->rm_so = j;
531
11164
				pmatch->rm_eo = j + fg->patternLen;
532

22328
				if (fg->patternLen == 0 || !fg->wmatch ||
533





2517
				    wmatch(data, dataLen, pmatch->rm_so,
534
				    pmatch->rm_eo)) {
535
					rtrnVal = 0;
536
10883
					break;
537
				}
538
			}
539
540
			/* Shift if within bounds, otherwise, we are done. */
541
9354085
			if (j + fg->patternLen == dataLen)
542
				break;
543
			else
544
8445600
				j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
545
8445600
		} while (j <= (dataLen - fg->patternLen));
546
	}
547
548
975599
	return (rtrnVal);
549
#endif
550
983316
}
551
552
553
void *
554
grep_malloc(size_t size)
555
{
556
	void	*ptr;
557
558
27074
	if ((ptr = malloc(size)) == NULL)
559
		err(2, "malloc");
560
13537
	return ptr;
561
}
562
563
void *
564
grep_calloc(size_t nmemb, size_t size)
565
{
566
	void	*ptr;
567
568
16924
	if ((ptr = calloc(nmemb, size)) == NULL)
569
		err(2, "calloc");
570
8462
	return ptr;
571
}
572
573
void *
574
grep_realloc(void *ptr, size_t size)
575
{
576
	if ((ptr = realloc(ptr, size)) == NULL)
577
		err(2, "realloc");
578
	return ptr;
579
}
580
581
void *
582
grep_reallocarray(void *ptr, size_t nmemb, size_t size)
583
{
584
8806
	if ((ptr = reallocarray(ptr, nmemb, size)) == NULL)
585
		err(2, "reallocarray");
586
4403
	return ptr;
587
}
588
589
#ifndef SMALL
590
/*
591
 * Returns:	true on success, false on failure
592
 */
593
static bool
594
grep_cmp(const char *pattern, const char *data, size_t len)
595
{
596
	size_t i;
597
598
28932578
	for (i = 0; i < len; i++) {
599

29282828
		if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
600

19227089
		    || (iflag && pattern[i] == toupper((unsigned char)data[i])))
601
			continue;
602
9384139
		return false;
603
	}
604
605
12067
	return true;
606
9396206
}
607
608
static void
609
grep_revstr(unsigned char *str, int len)
610
{
611
	int i;
612
	char c;
613
614
414
	for (i = 0; i < len / 2; i++) {
615
126
		c = str[i];
616
126
		str[i] = str[len - i - 1];
617
126
		str[len - i - 1] = c;
618
	}
619
54
}
620
#endif
621
622
void
623
printline(str_t *line, int sep, regmatch_t *pmatch)
624
{
625
	int n;
626
627
	n = 0;
628
1840310
	if (!hflag) {
629
143
		fputs(line->file, stdout);
630
		++n;
631
143
	}
632
920155
	if (nflag) {
633
		if (n)
634
			putchar(sep);
635
		printf("%lld", line->line_no);
636
		++n;
637
	}
638
920155
	if (bflag) {
639
		if (n)
640
			putchar(sep);
641
		printf("%lld", (long long)line->off);
642
		++n;
643
	}
644
920155
	if (n)
645
286
		putchar(sep);
646
920155
	if (pmatch)
647
162
		fwrite(line->dat + pmatch->rm_so,
648
81
		    pmatch->rm_eo - pmatch->rm_so, 1, stdout);
649
	else
650
920074
		fwrite(line->dat, line->len, 1, stdout);
651
1840310
	putchar('\n');
652
920155
}