GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sed/process.c Lines: 245 292 83.9 %
Date: 2017-11-13 Branches: 209 275 76.0 %

Line Branch Exec Source
1
/*	$OpenBSD: process.c,v 1.32 2017/02/22 14:09:09 tom Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992 Diomidis Spinellis.
5
 * Copyright (c) 1992, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Diomidis Spinellis of Imperial College, University of London.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 */
35
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/uio.h>
39
40
#include <ctype.h>
41
#include <errno.h>
42
#include <fcntl.h>
43
#include <limits.h>
44
#include <regex.h>
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48
#include <unistd.h>
49
50
#include "defs.h"
51
#include "extern.h"
52
53
static SPACE HS, PS, SS;
54
#define	pd		PS.deleted
55
#define	ps		PS.space
56
#define	psl		PS.len
57
#define	psanl		PS.append_newline
58
#define	hs		HS.space
59
#define	hsl		HS.len
60
61
static inline int	 applies(struct s_command *);
62
static void		 flush_appends(void);
63
static void		 lputs(char *);
64
static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65
			     size_t);
66
static void		 regsub(SPACE *, char *, char *);
67
static int		 substitute(struct s_command *);
68
69
struct s_appends *appends;	/* Array of pointers to strings to append. */
70
static int appendx;		/* Index into appends array. */
71
size_t appendnum;		/* Size of appends array. */
72
73
static int lastaddr;		/* Set by applies if last address of a range. */
74
static int sdone;		/* If any substitutes since last line input. */
75
				/* Iov structure for 'w' commands. */
76
static regex_t *defpreg;
77
size_t maxnsub;
78
regmatch_t *match;
79
80
#define OUT() do {\
81
	fwrite(ps, 1, psl, outfile);\
82
	if (psanl) fputc('\n', outfile);\
83
} while (0)
84
85
void
86
process(void)
87
{
88
	struct s_command *cp;
89
7808
	SPACE tspace;
90
	size_t len, oldpsl;
91
	char *p;
92
93
896560
	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
94
444446
		pd = 0;
95
top:
96
444446
		cp = prog;
97
redirect:
98
3127436
		while (cp != NULL) {
99
1767696
			if (!applies(cp)) {
100
824250
				cp = cp->next;
101
824250
				continue;
102
			}
103






1466680
			switch (cp->code) {
104
			case '{':
105
173697
				cp = cp->u.c;
106
173697
				goto redirect;
107
			case 'a':
108
80
				if (appendx >= appendnum) {
109
					appends = xreallocarray(appends,
110
					    appendnum,
111
					    2 * sizeof(struct s_appends));
112
					appendnum *= 2;
113
				}
114
80
				appends[appendx].type = AP_STRING;
115
80
				appends[appendx].s = cp->t;
116
80
				appends[appendx].len = strlen(cp->t);
117
80
				appendx++;
118
80
				break;
119
			case 'b':
120
2912
				cp = cp->u.c;
121
2912
				goto redirect;
122
			case 'c':
123
164
				pd = 1;
124
164
				psl = 0;
125

208
				if (cp->a2 == NULL || lastaddr || lastline())
126
120
					(void)fprintf(outfile, "%s", cp->t);
127
				break;
128
			case 'd':
129
236229
				pd = 1;
130
236229
				goto new;
131
			case 'D':
132
				if (pd)
133
					goto new;
134
				if (psl == 0 ||
135
				    (p = memchr(ps, '\n', psl)) == NULL) {
136
					pd = 1;
137
					goto new;
138
				} else {
139
					psl -= (p + 1) - ps;
140
					memmove(ps, p + 1, psl);
141
					goto top;
142
				}
143
			case 'g':
144
916
				cspace(&PS, hs, hsl, REPLACE);
145
916
				break;
146
			case 'G':
147
196
				cspace(&PS, "\n", 1, 0);
148
196
				cspace(&PS, hs, hsl, 0);
149
196
				break;
150
			case 'h':
151
516
				cspace(&HS, ps, psl, REPLACE);
152
516
				break;
153
			case 'H':
154
328
				cspace(&HS, "\n", 1, 0);
155
328
				cspace(&HS, ps, psl, 0);
156
328
				break;
157
			case 'i':
158
8
				(void)fprintf(outfile, "%s", cp->t);
159
8
				break;
160
			case 'l':
161
12
				lputs(ps);
162
12
				break;
163
			case 'n':
164
8
				if (!nflag && !pd)
165
8
					OUT();
166
8
				flush_appends();
167
8
				if (!mf_fgets(&PS, REPLACE))
168
					exit(0);
169
8
				pd = 0;
170
8
				break;
171
			case 'N':
172
40
				flush_appends();
173
40
				cspace(&PS, "\n", 1, 0);
174
40
				if (!mf_fgets(&PS, 0))
175
					exit(0);
176
				break;
177
			case 'p':
178
1832
				if (pd)
179
					break;
180
3552
				OUT();
181
				break;
182
			case 'P':
183
				if (pd)
184
					break;
185
				if ((p = memchr(ps, '\n', psl)) != NULL) {
186
					oldpsl = psl;
187
					psl = p - ps;
188
					psanl = 1;
189
					OUT();
190
					psl = oldpsl;
191
				} else {
192
					OUT();
193
				}
194
				break;
195
			case 'q':
196
66
				if (!nflag && !pd)
197
132
					OUT();
198
				flush_appends();
199
				exit(0);
200
			case 'r':
201
12
				if (appendx >= appendnum) {
202
					appends = xreallocarray(appends,
203
					    appendnum,
204
					    2 * sizeof(struct s_appends));
205
					appendnum *= 2;
206
				}
207
12
				appends[appendx].type = AP_FILE;
208
12
				appends[appendx].s = cp->t;
209
12
				appends[appendx].len = strlen(cp->t);
210
12
				appendx++;
211
12
				break;
212
			case 's':
213
345437
				sdone |= substitute(cp);
214
345437
				break;
215
			case 't':
216
772
				if (sdone) {
217
452
					sdone = 0;
218
452
					cp = cp->u.c;
219
452
					goto redirect;
220
				}
221
				break;
222
			case 'w':
223
171653
				if (pd)
224
					break;
225

171653
				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
226
				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
227
				    DEFFILEMODE)) == -1)
228
					error(FATAL, "%s: %s",
229
					    cp->t, strerror(errno));
230

343306
				if (write(cp->u.fd, ps, psl) != psl ||
231
171653
				    write(cp->u.fd, "\n", 1) != 1)
232
					error(FATAL, "%s: %s",
233
					    cp->t, strerror(errno));
234
				break;
235
			case 'x':
236
1560
				if (hs == NULL)
237
4
					cspace(&HS, "", 0, REPLACE);
238
1560
				tspace = PS;
239
1560
				PS = HS;
240
1560
				psanl = tspace.append_newline;
241
1560
				HS = tspace;
242
1560
				break;
243
			case 'y':
244
120
				if (pd || psl == 0)
245
					break;
246
1328
				for (p = ps, len = psl; len--; ++p)
247
544
					*p = cp->u.y[(unsigned char)*p];
248
				break;
249
			case ':':
250
			case '}':
251
				break;
252
			case '=':
253
36
				(void)fprintf(outfile, "%lu\n", linenum);
254
36
			}
255
530086
			cp = cp->next;
256
		} /* for all cp */
257
258
444376
new:		if (!nflag && !pd)
259
380228
			OUT();
260
444376
		flush_appends();
261
	} /* for all lines */
262
3834
}
263
264
/*
265
 * TRUE if the address passed matches the current program state
266
 * (lastline, linenumber, ps).
267
 */
268
#define	MATCH(a)						\
269
	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
270
	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
271
272
/*
273
 * Return TRUE if the command applies to the current line.  Sets the inrange
274
 * flag to process ranges.  Interprets the non-select (``!'') flag.
275
 */
276
static inline int
277
applies(struct s_command *cp)
278
{
279
	int r;
280
281
3535392
	lastaddr = 0;
282

2926759
	if (cp->a1 == NULL && cp->a2 == NULL)
283
1159063
		r = 1;
284
608633
	else if (cp->a2)
285
9281
		if (cp->inrange) {
286


15800
			if (MATCH(cp->a2)) {
287
98
				cp->inrange = 0;
288
98
				lastaddr = 1;
289
98
			}
290
			r = 1;
291


11482
		} else if (MATCH(cp->a1)) {
292
			/*
293
			 * If the second address is a number less than or
294
			 * equal to the line number first selected, only
295
			 * one line shall be selected.
296
			 *	-- POSIX 1003.2
297
			 */
298

182
			if (cp->a2->type == AT_LINE &&
299
64
			    linenum >= cp->a2->u.l)
300
8
				lastaddr = 1;
301
			else
302
110
				cp->inrange = 1;
303
			r = 1;
304
118
		} else
305
			r = 0;
306
	else
307

1925406
		r = MATCH(cp->a1);
308
3944584
	return (cp->nonsel ? !r : r);
309
}
310
311
/*
312
 * Reset all inrange markers.
313
 */
314
void
315
resetranges(void)
316
{
317
	struct s_command *cp;
318
319
148
	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
320
56
		if (cp->a2)
321
			cp->inrange = 0;
322
12
}
323
324
/*
325
 * substitute --
326
 *	Do substitutions in the pattern space.  Currently, we build a
327
 *	copy of the new pattern space in the substitute space structure
328
 *	and then swap them.
329
 */
330
static int
331
substitute(struct s_command *cp)
332
{
333
690874
	SPACE tspace;
334
	regex_t *re;
335
	regoff_t slen;
336
	int n, lastempty;
337
	size_t le = 0;
338
	char *s;
339
340
345437
	s = ps;
341
345437
	re = cp->u.s->re;
342
690874
	if (re == NULL) {
343
346269
		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
344
			linenum = cp->u.s->linenum;
345
			error(COMPILE, "\\%d not defined in the RE",
346
			    cp->u.s->maxbref);
347
		}
348
	}
349
345437
	if (!regexec_e(re, ps, 0, 0, 0, psl))
350
102198
		return (0);
351
352
243239
	SS.len = 0;				/* Clean substitute space. */
353
	slen = psl;
354
243239
	n = cp->u.s->n;
355
	lastempty = 1;
356
357
243239
	do {
358
		/* Copy the leading retained string. */
359

544970
		if (n <= 1 && (match[0].rm_so > le))
360
212122
			cspace(&SS, s, match[0].rm_so - le, APPEND);
361
362
		/* Skip zero-length matches right after other matches. */
363

312603
		if (lastempty || (match[0].rm_so - le) ||
364
10340
		    match[0].rm_so != match[0].rm_eo) {
365
272715
			if (n <= 1) {
366
				/* Want this match: append replacement. */
367
271751
				regsub(&SS, ps, cp->u.s->new);
368
271751
				if (n == 1)
369
235183
					n = -1;
370
			} else {
371
				/* Want a later match: append original. */
372
964
				if (match[0].rm_eo - le)
373
600
					cspace(&SS, s, match[0].rm_eo - le,
374
					    APPEND);
375
964
				n--;
376
			}
377
		}
378
379
		/* Move past this match. */
380
272979
		s = ps + match[0].rm_eo;
381
272979
		slen = psl - match[0].rm_eo;
382
		le = match[0].rm_eo;
383
384
		/*
385
		 * After a zero-length match, advance one byte,
386
		 * and at the end of the line, terminate.
387
		 */
388
272979
		if (match[0].rm_so == match[0].rm_eo) {
389

61964
			if (*s == '\0' || *s == '\n')
390
12072
				slen = -1;
391
			else
392
24946
				slen--;
393
37018
			if (*s != '\0') {
394
24946
				cspace(&SS, s++, 1, APPEND);
395
24946
				le++;
396
24946
			}
397
			lastempty = 1;
398
37018
		} else
399
			lastempty = 0;
400
401

310327
	} while (n >= 0 && slen >= 0 &&
402
37348
	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
403
404
	/* Did not find the requested number of matches. */
405
243239
	if (n > 0)
406
340
		return (0);
407
408
	/* Copy the trailing retained string. */
409
242899
	if (slen > 0)
410
222480
		cspace(&SS, s, slen, APPEND);
411
412
	/*
413
	 * Swap the substitute space and the pattern space, and make sure
414
	 * that any leftover pointers into stdio memory get lost.
415
	 */
416
242899
	tspace = PS;
417
242899
	PS = SS;
418
242899
	psanl = tspace.append_newline;
419
242899
	SS = tspace;
420
242899
	SS.space = SS.back;
421
422
	/* Handle the 'p' flag. */
423
242899
	if (cp->u.s->p)
424
5668
		OUT();
425
426
	/* Handle the 'w' flag. */
427
242899
	if (cp->u.s->wfile && !pd) {
428

11256
		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
429
		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
430
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
431

22512
		if (write(cp->u.s->wfd, ps, psl) != psl ||
432
11256
		    write(cp->u.s->wfd, "\n", 1) != 1)
433
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
434
	}
435
242899
	return (1);
436
345437
}
437
438
/*
439
 * Flush append requests.  Always called before reading a line,
440
 * therefore it also resets the substitution done (sdone) flag.
441
 */
442
static void
443
flush_appends(void)
444
{
445
	FILE *f;
446
	int count, i;
447
888980
	char buf[8 * 1024];
448
449
889164
	for (i = 0; i < appendx; i++)
450
184
		switch (appends[i].type) {
451
		case AP_STRING:
452
160
			fwrite(appends[i].s, sizeof(char), appends[i].len,
453
80
			    outfile);
454
80
			break;
455
		case AP_FILE:
456
			/*
457
			 * Read files probably shouldn't be cached.  Since
458
			 * it's not an error to read a non-existent file,
459
			 * it's possible that another program is interacting
460
			 * with the sed script through the file system.  It
461
			 * would be truly bizarre, but possible.  It's probably
462
			 * not that big a performance win, anyhow.
463
			 */
464
12
			if ((f = fopen(appends[i].s, "r")) == NULL)
465
				break;
466
24
			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
467
4
				(void)fwrite(buf, sizeof(char), count, outfile);
468
8
			(void)fclose(f);
469
8
			break;
470
		}
471

888980
	if (ferror(outfile))
472
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
473
444490
	appendx = sdone = 0;
474
444490
}
475
476
static void
477
lputs(char *s)
478
{
479
	int count;
480
	extern int termwidth;
481
	const char *escapes;
482
	char *p;
483
484
2068
	for (count = 0; *s; ++s) {
485
1016
		if (count >= termwidth) {
486
32
			(void)fprintf(outfile, "\\\n");
487
			count = 0;
488
32
		}
489

1900
		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
490
884
		    && *s != '\\') {
491
376
			(void)fputc(*s, outfile);
492
376
			count++;
493
1016
		} else if (*s == '\n') {
494
			(void)fputc('$', outfile);
495
			(void)fputc('\n', outfile);
496
			count = 0;
497
		} else {
498
			escapes = "\\\a\b\f\r\t\v";
499
640
			(void)fputc('\\', outfile);
500
640
			if ((p = strchr(escapes, *s))) {
501
28
				(void)fputc("\\abfrtv"[p - escapes], outfile);
502
28
				count += 2;
503
28
			} else {
504
612
				(void)fprintf(outfile, "%03o", *(u_char *)s);
505
612
				count += 4;
506
			}
507
		}
508
	}
509
12
	(void)fputc('$', outfile);
510
12
	(void)fputc('\n', outfile);
511

24
	if (ferror(outfile))
512
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
513
12
}
514
515
static inline int
516
regexec_e(regex_t *preg, const char *string, int eflags,
517
    int nomatch, size_t start, size_t stop)
518
{
519
	int eval;
520
521
1725908
	if (preg == NULL) {
522
832
		if (defpreg == NULL)
523
			error(FATAL, "first RE may not be empty");
524
	} else
525
862122
		defpreg = preg;
526
527
	/* Set anchors */
528
862954
	match[0].rm_so = start;
529
862954
	match[0].rm_eo = stop;
530
531
1725908
	eval = regexec(defpreg, string,
532
862954
	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
533
862954
	switch (eval) {
534
	case 0:
535
458302
		return (1);
536
	case REG_NOMATCH:
537
404652
		return (0);
538
	}
539
	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
540
862954
}
541
542
/*
543
 * regsub - perform substitutions after a regexp match
544
 * Based on a routine by Henry Spencer
545
 */
546
static void
547
regsub(SPACE *sp, char *string, char *src)
548
{
549
	int len, no;
550
	char c, *dst;
551
552
#define	NEEDSP(reqlen)							\
553
	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
554
		size_t newlen = sp->blen + (reqlen) + 1024;		\
555
		sp->space = sp->back = xrealloc(sp->back, newlen);	\
556
		sp->blen = newlen;					\
557
		dst = sp->space + sp->len;				\
558
	}
559
560
543502
	dst = sp->space + sp->len;
561
1563628
	while ((c = *src++) != '\0') {
562
510063
		if (c == '&')
563
1882
			no = 0;
564

518374
		else if (c == '\\' && isdigit((unsigned char)*src))
565
9949
			no = *src++ - '0';
566
		else
567
			no = -1;
568
510063
		if (no < 0) {		/* Ordinary character. */
569

498720
			if (c == '\\' && (*src == '\\' || *src == '&'))
570
244
				c = *src++;
571
499821
			NEEDSP(1);
572
498232
			*dst++ = c;
573
498232
			++sp->len;
574

521894
		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
575
11831
			len = match[no].rm_eo - match[no].rm_so;
576
11866
			NEEDSP(len);
577
11831
			memmove(dst, string + match[no].rm_so, len);
578
11831
			dst += len;
579
11831
			sp->len += len;
580
11831
		}
581
	}
582
271956
	NEEDSP(1);
583
271751
	*dst = '\0';
584
271751
}
585
586
/*
587
 * aspace --
588
 *	Append the source space to the destination space, allocating new
589
 *	space as necessary.
590
 */
591
void
592
cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
593
{
594
	size_t tlen;
595
596
	/* Make sure SPACE has enough memory and ramp up quickly. */
597
1814324
	tlen = sp->len + len + 1;
598
907162
	if (tlen > sp->blen) {
599
4667
		size_t newlen = tlen + 1024;
600
4667
		sp->space = sp->back = xrealloc(sp->back, newlen);
601
4667
		sp->blen = newlen;
602
4667
	}
603
604
907162
	if (spflag == REPLACE)
605
445890
		sp->len = 0;
606
607
907162
	memmove(sp->space + sp->len, p, len);
608
609
907162
	sp->space[sp->len += len] = '\0';
610
907162
}
611
612
/*
613
 * Close all cached opened files and report any errors
614
 */
615
void
616
cfclose(struct s_command *cp, struct s_command *end)
617
{
618
619
29010
	for (; cp != end; cp = cp->next)
620

14190
		switch (cp->code) {
621
		case 's':
622

6252
			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
623
				error(FATAL,
624
				    "%s: %s", cp->u.s->wfile, strerror(errno));
625
5448
			cp->u.s->wfd = -1;
626
5448
			break;
627
		case 'w':
628

496
			if (cp->u.fd != -1 && close(cp->u.fd))
629
				error(FATAL, "%s: %s", cp->t, strerror(errno));
630
248
			cp->u.fd = -1;
631
248
			break;
632
		case '{':
633
520
			cfclose(cp->u.c, cp->next);
634
520
			break;
635
		}
636
4354
}