GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/grep/util.c Lines: 172 275 62.5 %
Date: 2016-12-06 Branches: 154 335 46.0 %

Line Branch Exec Source
1
/*	$OpenBSD: util.c,v 1.55 2016/04/04 05:49:47 otto Exp $	*/
2
3
/*-
4
 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5
 * All rights reserved.
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 */
28
29
#include <sys/types.h>
30
#include <sys/stat.h>
31
32
#include <ctype.h>
33
#include <err.h>
34
#include <errno.h>
35
#include <fts.h>
36
#include <regex.h>
37
#include <stdbool.h>
38
#include <stdio.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <unistd.h>
42
#include <zlib.h>
43
44
#include "grep.h"
45
46
/*
47
 * Process a file line by line...
48
 */
49
50
static int	linesqueued;
51
static int	procline(str_t *l, int);
52
static int	grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch);
53
#ifndef SMALL
54
static bool	grep_cmp(const char *, const char *, size_t);
55
static void	grep_revstr(unsigned char *, int);
56
#endif
57
58
int
59
grep_tree(char **argv)
60
{
61
	FTS	*fts;
62
	FTSENT	*p;
63
	int	c, fts_flags;
64
65
	c = 0;
66
67
	fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR;
68
69
	if (!(fts = fts_open(argv, fts_flags, NULL)))
70
		err(2, NULL);
71
	while ((p = fts_read(fts)) != NULL) {
72
		switch (p->fts_info) {
73
		case FTS_DNR:
74
			break;
75
		case FTS_ERR:
76
			file_err = 1;
77
			if(!sflag)
78
				warnc(p->fts_errno, "%s", p->fts_path);
79
			break;
80
		case FTS_DP:
81
			break;
82
		default:
83
			c += procfile(p->fts_path);
84
			break;
85
		}
86
	}
87
	if (errno)
88
		err(2, "fts_read");
89
	fts_close(fts);
90
	return c;
91
}
92
93
int
94
procfile(char *fn)
95
349
{
96
	str_t ln;
97
	file_t *f;
98
	int c, t, z, nottext;
99
100
349
	if (fn == NULL) {
101
82
		fn = "(standard input)";
102
82
		f = grep_fdopen(STDIN_FILENO, "r");
103
	} else {
104
267
		f = grep_open(fn, "r");
105
	}
106
349
	if (f == NULL) {
107
		file_err = 1;
108
		if (!sflag)
109
			warn("%s", fn);
110
		return 0;
111
	}
112
113
349
	nottext = grep_bin_file(f);
114

349
	if (nottext && binbehave == BIN_FILE_SKIP) {
115
		grep_close(f);
116
		return 0;
117
	}
118
119
349
	ln.file = fn;
120
349
	ln.line_no = 0;
121
349
	ln.len = 0;
122
349
	linesqueued = 0;
123
349
	tail = 0;
124
349
	ln.off = -1;
125
126
349
	if (Bflag > 0)
127
		initqueue();
128

21677
	for (c = 0;  c == 0 || !(lflag || qflag); ) {
129
21322
		ln.off += ln.len + 1;
130
21322
		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
131
343
			break;
132

20979
		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
133
18620
			--ln.len;
134
20979
		ln.line_no++;
135
136
20979
		z = tail;
137
138

20979
		if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
139
			enqueue(&ln);
140
			linesqueued++;
141
		}
142
20979
		c += t;
143
	}
144
349
	if (Bflag > 0)
145
		clearqueue();
146
349
	grep_close(f);
147
148
349
	if (cflag) {
149
5
		if (!hflag)
150
			printf("%s:", ln.file);
151
5
		printf("%u\n", c);
152
	}
153
349
	if (lflag && c != 0)
154
		printf("%s\n", fn);
155
349
	if (Lflag && c == 0)
156
		printf("%s\n", fn);
157



349
	if (c && !cflag && !lflag && !Lflag &&
158
	    binbehave == BIN_FILE_BIN && nottext && !qflag)
159
		printf("Binary file %s matches\n", fn);
160
161
349
	return c;
162
}
163
164
165
/*
166
 * Process an individual line in a file. Return non-zero if it matches.
167
 */
168
169
#define isword(x) (isalnum((unsigned char)x) || (x) == '_')
170
171
static int
172
procline(str_t *l, int nottext)
173
20979
{
174
	regmatch_t	pmatch;
175
	int		c, i, r;
176
	regoff_t	offset;
177
178
	/* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
179
	 * into regoff_t */
180
20979
	if (l->len > SSIZE_MAX) {
181
		errx(2, "Line is too big to process");
182
	}
183
184
20979
	c = 0;
185
20979
	i = 0;
186
20979
	if (matchall) {
187
		c = 1;
188
		goto print;
189
	}
190
191
41475
	for (i = 0; i < patterns; i++) {
192
20979
		offset = 0;
193
20979
redo:
194
20979
		if (fg_pattern[i].pattern) {
195
5814
			r = grep_search(&fg_pattern[i], l->dat + offset,
196
			    l->len - offset, &pmatch);
197
5814
			pmatch.rm_so += offset;
198
5814
			pmatch.rm_eo += offset;
199
		} else {
200
15165
			pmatch.rm_so = offset;
201
15165
			pmatch.rm_eo = l->len;
202
15165
			r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
203
		}
204

20979
		if (r == 0 && xflag) {
205
			if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
206
				r = REG_NOMATCH;
207
		}
208
20979
		if (r == 0) {
209
483
			c = 1;
210

483
			if (oflag && pmatch.rm_so != pmatch.rm_eo)
211
				goto print;
212
			break;
213
		}
214
	}
215
20979
	if (oflag)
216
		return c;
217
20979
print:
218
20979
	if (vflag)
219
366
		c = !c;
220
221

20979
	if (c && binbehave == BIN_FILE_BIN && nottext)
222
		return c; /* Binary file */
223
224

20979
	if ((tail > 0 || c) && !cflag && !qflag) {
225
471
		if (c) {
226


471
			if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
227
			    (Aflag || Bflag))
228
				printf("--\n");
229
471
			first = 1;
230
471
			tail = Aflag;
231
471
			if (Bflag > 0)
232
				printqueue();
233
471
			linesqueued = 0;
234
471
			printline(l, ':', oflag ? &pmatch : NULL);
235
		} else {
236
			printline(l, '-', oflag ? &pmatch : NULL);
237
			tail--;
238
		}
239
	}
240

20979
	if (oflag && !matchall) {
241
		offset = pmatch.rm_eo;
242
		goto redo;
243
	}
244
20979
	return c;
245
}
246
247
#ifndef SMALL
248
void
249
fgrepcomp(fastgrep_t *fg, const unsigned char *pattern)
250
{
251
	int i;
252
253
	/* Initialize. */
254
	fg->patternLen = strlen(pattern);
255
	fg->bol = 0;
256
	fg->eol = 0;
257
	fg->wmatch = wflag;
258
	fg->reversedSearch = 0;
259
260
	/*
261
	 * Make a copy and upper case it for later if in -i mode,
262
	 * else just copy the pointer.
263
	 */
264
	if (iflag) {
265
		fg->pattern = grep_malloc(fg->patternLen + 1);
266
		for (i = 0; i < fg->patternLen; i++)
267
			fg->pattern[i] = toupper(pattern[i]);
268
		fg->pattern[fg->patternLen] = '\0';
269
	} else
270
		fg->pattern = (unsigned char *)pattern;	/* really const */
271
272
	/* Preprocess pattern. */
273
	for (i = 0; i <= UCHAR_MAX; i++)
274
		fg->qsBc[i] = fg->patternLen;
275
	for (i = 1; i < fg->patternLen; i++) {
276
		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
277
		/*
278
		 * If case is ignored, make the jump apply to both upper and
279
		 * lower cased characters.  As the pattern is stored in upper
280
		 * case, apply the same to the lower case equivalents.
281
		 */
282
		if (iflag)
283
			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
284
	}
285
}
286
#endif
287
288
/*
289
 * Returns: -1 on failure, 0 on success
290
 */
291
int
292
fastcomp(fastgrep_t *fg, const char *pattern)
293
371
{
294
#ifdef SMALL
295
	return -1;
296
#else
297
	int i;
298
371
	int bol = 0;
299
371
	int eol = 0;
300
	int shiftPatternLen;
301
371
	int hasDot = 0;
302
371
	int firstHalfDot = -1;
303
371
	int firstLastHalfDot = -1;
304
371
	int lastHalfDot = 0;
305
306
	/* Initialize. */
307
371
	fg->patternLen = strlen(pattern);
308
371
	fg->bol = 0;
309
371
	fg->eol = 0;
310
371
	fg->wmatch = 0;
311
371
	fg->reversedSearch = 0;
312
313
	/* Remove end-of-line character ('$'). */
314

371
	if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') {
315
25
		eol++;
316
25
		fg->eol = 1;
317
25
		fg->patternLen--;
318
	}
319
320
	/* Remove beginning-of-line character ('^'). */
321
371
	if (pattern[0] == '^') {
322
262
		bol++;
323
262
		fg->bol = 1;
324
262
		fg->patternLen--;
325
	}
326
327
	/* Remove enclosing [[:<:]] and [[:>:]] (word match). */
328
371
	if (wflag) {
329
		/* basic re's use \( \), extended re's ( ) */
330
		int extra = Eflag ? 1 : 2;
331
		fg->patternLen -= 14 + 2 * extra;
332
		fg->wmatch = 7 + extra;
333

371
	} else if (fg->patternLen >= 14 &&
334
	    strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
335
	    strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
336
15
		fg->patternLen -= 14;
337
15
		fg->wmatch = 7;
338
	}
339
340
	/*
341
	 * Copy pattern minus '^' and '$' characters as well as word
342
	 * match character classes at the beginning and ending of the
343
	 * string respectively.
344
	 */
345
371
	fg->pattern = grep_malloc(fg->patternLen + 1);
346
371
	memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
347
371
	fg->pattern[fg->patternLen] = '\0';
348
349
	/* Look for ways to cheat...er...avoid the full regex engine. */
350
1262
	for (i = 0; i < fg->patternLen; i++)
351
	{
352

1168
		switch (fg->pattern[i]) {
353
		case '.':
354
1
			hasDot = i;
355
1
			if (i < fg->patternLen / 2) {
356
				if (firstHalfDot < 0)
357
					/* Closest dot to the beginning */
358
					firstHalfDot = i;
359
			} else {
360
				/* Closest dot to the end of the pattern. */
361
1
				lastHalfDot = i;
362
1
				if (firstLastHalfDot < 0)
363
1
					firstLastHalfDot = i;
364
			}
365
			break;
366
		case '(': case ')':
367
		case '{': case '}':
368
			/* Special in BRE if preceded by '\\' */
369
		case '?':
370
		case '+':
371
		case '|':
372
			/* Not special in BRE. */
373
58
			if (!Eflag)
374
44
				goto nonspecial;
375
		case '\\':
376
		case '*':
377
		case '[': case ']':
378
			/* Free memory and let others know this is empty. */
379
277
			free(fg->pattern);
380
277
			fg->pattern = NULL;
381
277
			return (-1);
382
		default:
383
890
nonspecial:
384
890
			if (iflag)
385
				fg->pattern[i] = toupper(fg->pattern[i]);
386
			break;
387
		}
388
	}
389
390
	/*
391
	 * Determine if a reverse search would be faster based on the placement
392
	 * of the dots.
393
	 */
394



94
	if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) &&
395
	    ((lastHalfDot) && ((firstHalfDot < 0) ||
396
	    ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
397
		fg->reversedSearch = 1;
398
		hasDot = fg->patternLen - (firstHalfDot < 0 ?
399
		    firstLastHalfDot : firstHalfDot) - 1;
400
		grep_revstr(fg->pattern, fg->patternLen);
401
	}
402
403
	/*
404
	 * Normal Quick Search would require a shift based on the position the
405
	 * next character after the comparison is within the pattern.  With
406
	 * wildcards, the position of the last dot effects the maximum shift
407
	 * distance.
408
	 * The closer to the end the wild card is the slower the search.  A
409
	 * reverse version of this algorithm would be useful for wildcards near
410
	 * the end of the string.
411
	 *
412
	 * Examples:
413
	 * Pattern	Max shift
414
	 * -------	---------
415
	 * this		5
416
	 * .his		4
417
	 * t.is		3
418
	 * th.s		2
419
	 * thi.		1
420
	 */
421
422
	/* Adjust the shift based on location of the last dot ('.'). */
423
94
	shiftPatternLen = fg->patternLen - hasDot;
424
425
	/* Preprocess pattern. */
426
24158
	for (i = 0; i <= UCHAR_MAX; i++)
427
24064
		fg->qsBc[i] = shiftPatternLen;
428
628
	for (i = hasDot + 1; i < fg->patternLen; i++) {
429
534
		fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
430
		/*
431
		 * If case is ignored, make the jump apply to both upper and
432
		 * lower cased characters.  As the pattern is stored in upper
433
		 * case, apply the same to the lower case equivalents.
434
		 */
435
534
		if (iflag)
436
			fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
437
	}
438
439
	/*
440
	 * Put pattern back to normal after pre-processing to allow for easy
441
	 * comparisons later.
442
	 */
443
94
	if (fg->reversedSearch)
444
		grep_revstr(fg->pattern, fg->patternLen);
445
446
94
	return (0);
447
#endif
448
}
449
450
/*
451
 * Word boundaries using regular expressions are defined as the point
452
 * of transition from a non-word char to a word char, or vice versa.
453
 * This means that grep -w +a and grep -w a+ never match anything,
454
 * because they lack a starting or ending transition, but grep -w a+b
455
 * does match a line containing a+b.
456
 */
457
#define wmatch(d, l, s, e)	\
458
	((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
459
	  e > s && isword(d[s]) && isword(d[e-1]))
460
461
static int
462
grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch)
463
5814
{
464
#ifdef SMALL
465
	return 0;
466
#else
467
	regoff_t j;
468
5814
	int rtrnVal = REG_NOMATCH;
469
470
5814
	pmatch->rm_so = -1;
471
5814
	pmatch->rm_eo = -1;
472
473
	/* No point in going farther if we do not have enough data. */
474
5814
	if (dataLen < fg->patternLen)
475
2297
		return (rtrnVal);
476
477
	/* Only try once at the beginning or ending of the line. */
478

3517
	if (fg->bol || fg->eol) {
479
		/* Simple text comparison. */
480
		/* Verify data is >= pattern length before searching on it. */
481
881
		if (dataLen >= fg->patternLen) {
482
			/* Determine where in data to start search at. */
483
881
			if (fg->eol)
484
22
				j = dataLen - fg->patternLen;
485
			else
486
859
				j = 0;
487

881
			if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
488
881
				if (grep_cmp(fg->pattern, data + j,
489
				    fg->patternLen)) {
490
99
					pmatch->rm_so = j;
491
99
					pmatch->rm_eo = j + fg->patternLen;
492






99
					if (!fg->wmatch || wmatch(data, dataLen,
493
					    pmatch->rm_so, pmatch->rm_eo))
494
99
						rtrnVal = 0;
495
				}
496
		}
497
2636
	} else if (fg->reversedSearch) {
498
		/* Quick Search algorithm. */
499
		j = dataLen;
500
		do {
501
			if (grep_cmp(fg->pattern, data + j - fg->patternLen,
502
			    fg->patternLen)) {
503
				pmatch->rm_so = j - fg->patternLen;
504
				pmatch->rm_eo = j;
505
				if (!fg->wmatch || wmatch(data, dataLen,
506
				    pmatch->rm_so, pmatch->rm_eo)) {
507
					rtrnVal = 0;
508
					break;
509
				}
510
			}
511
			/* Shift if within bounds, otherwise, we are done. */
512
			if (j == fg->patternLen)
513
				break;
514
			j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]];
515
		} while (j >= fg->patternLen);
516
	} else {
517
		/* Quick Search algorithm. */
518
2636
		j = 0;
519
		do {
520
31880
			if (grep_cmp(fg->pattern, data + j, fg->patternLen)) {
521
322
				pmatch->rm_so = j;
522
322
				pmatch->rm_eo = j + fg->patternLen;
523






336
				if (fg->patternLen == 0 || !fg->wmatch ||
524
				    wmatch(data, dataLen, pmatch->rm_so,
525
				    pmatch->rm_eo)) {
526
320
					rtrnVal = 0;
527
320
					break;
528
				}
529
			}
530
531
			/* Shift if within bounds, otherwise, we are done. */
532
31560
			if (j + fg->patternLen == dataLen)
533
489
				break;
534
			else
535
31071
				j += fg->qsBc[(unsigned char)data[j + fg->patternLen]];
536
31071
		} while (j <= (dataLen - fg->patternLen));
537
	}
538
539
3517
	return (rtrnVal);
540
#endif
541
}
542
543
544
void *
545
grep_malloc(size_t size)
546
1358
{
547
	void	*ptr;
548
549
1358
	if ((ptr = malloc(size)) == NULL)
550
		err(2, "malloc");
551
1358
	return ptr;
552
}
553
554
void *
555
grep_calloc(size_t nmemb, size_t size)
556
698
{
557
	void	*ptr;
558
559
698
	if ((ptr = calloc(nmemb, size)) == NULL)
560
		err(2, "calloc");
561
698
	return ptr;
562
}
563
564
void *
565
grep_realloc(void *ptr, size_t size)
566
{
567
	if ((ptr = realloc(ptr, size)) == NULL)
568
		err(2, "realloc");
569
	return ptr;
570
}
571
572
void *
573
grep_reallocarray(void *ptr, size_t nmemb, size_t size)
574
415
{
575
415
	if ((ptr = reallocarray(ptr, nmemb, size)) == NULL)
576
		err(2, "reallocarray");
577
415
	return ptr;
578
}
579
580
#ifndef SMALL
581
/*
582
 * Returns:	true on success, false on failure
583
 */
584
static bool
585
grep_cmp(const char *pattern, const char *data, size_t len)
586
32761
{
587
	size_t i;
588
589
34040
	for (i = 0; i < len; i++) {
590


33619
		if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
591
		    || (iflag && pattern[i] == toupper((unsigned char)data[i])))
592
			continue;
593
32340
		return false;
594
	}
595
596
421
	return true;
597
}
598
599
static void
600
grep_revstr(unsigned char *str, int len)
601
{
602
	int i;
603
	char c;
604
605
	for (i = 0; i < len / 2; i++) {
606
		c = str[i];
607
		str[i] = str[len - i - 1];
608
		str[len - i - 1] = c;
609
	}
610
}
611
#endif
612
613
void
614
printline(str_t *line, int sep, regmatch_t *pmatch)
615
471
{
616
	int n;
617
618
471
	n = 0;
619
471
	if (!hflag) {
620
		fputs(line->file, stdout);
621
		++n;
622
	}
623
471
	if (nflag) {
624
		if (n)
625
			putchar(sep);
626
		printf("%lld", line->line_no);
627
		++n;
628
	}
629
471
	if (bflag) {
630
		if (n)
631
			putchar(sep);
632
		printf("%lld", (long long)line->off);
633
		++n;
634
	}
635
471
	if (n)
636
		putchar(sep);
637
471
	if (pmatch)
638
		fwrite(line->dat + pmatch->rm_so,
639
		    pmatch->rm_eo - pmatch->rm_so, 1, stdout);
640
	else
641
471
		fwrite(line->dat, line->len, 1, stdout);
642
471
	putchar('\n');
643
471
}