GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sed/process.c Lines: 173 284 60.9 %
Date: 2016-12-06 Branches: 136 277 49.1 %

Line Branch Exec Source
1
/*	$OpenBSD: process.c,v 1.28 2016/05/30 18:10:29 martijn Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992 Diomidis Spinellis.
5
 * Copyright (c) 1992, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Diomidis Spinellis of Imperial College, University of London.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 */
35
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/uio.h>
39
40
#include <ctype.h>
41
#include <errno.h>
42
#include <fcntl.h>
43
#include <limits.h>
44
#include <regex.h>
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48
#include <unistd.h>
49
50
#include "defs.h"
51
#include "extern.h"
52
53
static SPACE HS, PS, SS;
54
#define	pd		PS.deleted
55
#define	ps		PS.space
56
#define	psl		PS.len
57
#define	psanl		PS.append_newline
58
#define	hs		HS.space
59
#define	hsl		HS.len
60
61
static inline int	 applies(struct s_command *);
62
static void		 flush_appends(void);
63
static void		 lputs(char *);
64
static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65
			     size_t);
66
static void		 regsub(SPACE *, char *, char *);
67
static int		 substitute(struct s_command *);
68
69
struct s_appends *appends;	/* Array of pointers to strings to append. */
70
static int appendx;		/* Index into appends array. */
71
size_t appendnum;		/* Size of appends array. */
72
73
static int lastaddr;		/* Set by applies if last address of a range. */
74
static int sdone;		/* If any substitutes since last line input. */
75
				/* Iov structure for 'w' commands. */
76
static regex_t *defpreg;
77
size_t maxnsub;
78
regmatch_t *match;
79
80
#define OUT() do {\
81
	fwrite(ps, 1, psl, outfile);\
82
	if (psanl) fputc('\n', outfile);\
83
} while (0)
84
85
void
86
process(void)
87
775
{
88
	struct s_command *cp;
89
	SPACE tspace;
90
	size_t len, oldpsl;
91
	char *p;
92
	int oldpsanl;
93
94
106203
	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
95
104680
		pd = 0;
96
104680
top:
97
104680
		cp = prog;
98
redirect:
99
378069
		while (cp != NULL) {
100
273674
			if (!applies(cp)) {
101
96720
				cp = cp->next;
102
96720
				continue;
103
			}
104






176954
			switch (cp->code) {
105
			case '{':
106
164
				cp = cp->u.c;
107
164
				goto redirect;
108
			case 'a':
109
				if (appendx >= appendnum) {
110
					appends = xreallocarray(appends,
111
					    appendnum,
112
					    2 * sizeof(struct s_appends));
113
					appendnum *= 2;
114
				}
115
				appends[appendx].type = AP_STRING;
116
				appends[appendx].s = cp->t;
117
				appends[appendx].len = strlen(cp->t);
118
				appendx++;
119
				break;
120
			case 'b':
121
30852
				cp = cp->u.c;
122
30852
				goto redirect;
123
			case 'c':
124
				pd = 1;
125
				psl = 0;
126
				if (cp->a2 == NULL || lastaddr || lastline())
127
					(void)fprintf(outfile, "%s", cp->t);
128
				break;
129
			case 'd':
130
258
				pd = 1;
131
258
				goto new;
132
			case 'D':
133
				if (pd)
134
					goto new;
135
				if (psl == 0 ||
136
				    (p = memchr(ps, '\n', psl)) == NULL) {
137
					pd = 1;
138
					goto new;
139
				} else {
140
					psl -= (p + 1) - ps;
141
					memmove(ps, p + 1, psl);
142
					goto top;
143
				}
144
			case 'g':
145
77
				cspace(&PS, hs, hsl, REPLACE);
146
77
				break;
147
			case 'G':
148
				cspace(&PS, "\n", 1, 0);
149
				cspace(&PS, hs, hsl, 0);
150
				break;
151
			case 'h':
152
156
				cspace(&HS, ps, psl, REPLACE);
153
156
				break;
154
			case 'H':
155
				cspace(&HS, "\n", 1, 0);
156
				cspace(&HS, ps, psl, 0);
157
				break;
158
			case 'i':
159
				(void)fprintf(outfile, "%s", cp->t);
160
				break;
161
			case 'l':
162
				lputs(ps);
163
				break;
164
			case 'n':
165
				if (!nflag && !pd)
166
					OUT();
167
				flush_appends();
168
				if (!mf_fgets(&PS, REPLACE))
169
					exit(0);
170
				pd = 0;
171
				break;
172
			case 'N':
173
77
				flush_appends();
174
77
				cspace(&PS, "\n", 1, 0);
175
77
				if (!mf_fgets(&PS, 0))
176
					exit(0);
177
				break;
178
			case 'p':
179
719
				if (pd)
180
					break;
181
719
				OUT();
182
				break;
183
			case 'P':
184
				if (pd)
185
					break;
186
				if ((p = memchr(ps, '\n', psl)) != NULL) {
187
					oldpsl = psl;
188
					oldpsanl = psanl;
189
					psl = p - ps;
190
					psanl = 1;
191
					OUT();
192
					psl = oldpsl;
193
				} else {
194
					OUT();
195
				}
196
				break;
197
			case 'q':
198

27
				if (!nflag && !pd)
199
26
					OUT();
200
27
				flush_appends();
201
27
				exit(0);
202
			case 'r':
203
49
				if (appendx >= appendnum) {
204
					appends = xreallocarray(appends,
205
					    appendnum,
206
					    2 * sizeof(struct s_appends));
207
					appendnum *= 2;
208
				}
209
49
				appends[appendx].type = AP_FILE;
210
49
				appends[appendx].s = cp->t;
211
49
				appends[appendx].len = strlen(cp->t);
212
49
				appendx++;
213
49
				break;
214
			case 's':
215
119397
				sdone |= substitute(cp);
216
119397
				break;
217
			case 't':
218
6759
				if (sdone) {
219
871
					sdone = 0;
220
871
					cp = cp->u.c;
221
871
					goto redirect;
222
				}
223
				break;
224
			case 'w':
225
				if (pd)
226
					break;
227
				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
228
				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
229
				    DEFFILEMODE)) == -1)
230
					error(FATAL, "%s: %s",
231
					    cp->t, strerror(errno));
232
				if (write(cp->u.fd, ps, psl) != psl ||
233
				    write(cp->u.fd, "\n", 1) != 1)
234
					error(FATAL, "%s: %s",
235
					    cp->t, strerror(errno));
236
				break;
237
			case 'x':
238
4
				if (hs == NULL)
239
					cspace(&HS, "", 0, REPLACE);
240
4
				tspace = PS;
241
4
				PS = HS;
242
4
				psanl = tspace.append_newline;
243
4
				HS = tspace;
244
4
				break;
245
			case 'y':
246

190
				if (pd || psl == 0)
247
					break;
248
3510
				for (p = ps, len = psl; len--; ++p)
249
3320
					*p = cp->u.y[(unsigned char)*p];
250
				break;
251
			case ':':
252
			case '}':
253
				break;
254
			case '=':
255
				(void)fprintf(outfile, "%lu\n", linenum);
256
			}
257
144782
			cp = cp->next;
258
		} /* for all cp */
259
260

104653
new:		if (!nflag && !pd)
261
82151
			OUT();
262
104653
		flush_appends();
263
	} /* for all lines */
264
748
}
265
266
/*
267
 * TRUE if the address passed matches the current program state
268
 * (lastline, linenumber, ps).
269
 */
270
#define	MATCH(a)						\
271
	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
272
	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
273
274
/*
275
 * Return TRUE if the command applies to the current line.  Sets the inrange
276
 * flag to process ranges.  Interprets the non-select (``!'') flag.
277
 */
278
static inline int
279
applies(struct s_command *cp)
280
273674
{
281
	int r;
282
283
273674
	lastaddr = 0;
284

429756
	if (cp->a1 == NULL && cp->a2 == NULL)
285
156082
		r = 1;
286
117592
	else if (cp->a2)
287
48
		if (cp->inrange) {
288

47
			if (MATCH(cp->a2)) {
289
				cp->inrange = 0;
290
				lastaddr = 1;
291
			}
292
47
			r = 1;
293

1
		} else if (MATCH(cp->a1)) {
294
			/*
295
			 * If the second address is a number less than or
296
			 * equal to the line number first selected, only
297
			 * one line shall be selected.
298
			 *	-- POSIX 1003.2
299
			 */
300

1
			if (cp->a2->type == AT_LINE &&
301
			    linenum >= cp->a2->u.l)
302
				lastaddr = 1;
303
			else
304
1
				cp->inrange = 1;
305
1
			r = 1;
306
		} else
307
			r = 0;
308
	else
309

117544
		r = MATCH(cp->a1);
310
273674
	return (cp->nonsel ? !r : r);
311
}
312
313
/*
314
 * Reset all inrange markers.
315
 */
316
void
317
resetranges(void)
318
{
319
	struct s_command *cp;
320
321
	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
322
		if (cp->a2)
323
			cp->inrange = 0;
324
}
325
326
/*
327
 * substitute --
328
 *	Do substitutions in the pattern space.  Currently, we build a
329
 *	copy of the new pattern space in the substitute space structure
330
 *	and then swap them.
331
 */
332
static int
333
substitute(struct s_command *cp)
334
119397
{
335
	SPACE tspace;
336
	regex_t *re;
337
	regoff_t slen;
338
	int n, lastempty;
339
119397
	size_t le = 0;
340
	char *s;
341
342
119397
	s = ps;
343
119397
	re = cp->u.s->re;
344
119397
	if (re == NULL) {
345
		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
346
			linenum = cp->u.s->linenum;
347
			error(COMPILE, "\\%d not defined in the RE",
348
			    cp->u.s->maxbref);
349
		}
350
	}
351
119397
	if (!regexec_e(re, ps, 0, 0, 0, psl))
352
108440
		return (0);
353
354
10957
	SS.len = 0;				/* Clean substitute space. */
355
10957
	slen = psl;
356
10957
	n = cp->u.s->n;
357
10957
	lastempty = 1;
358
359
	do {
360
		/* Copy the leading retained string. */
361

11494
		if (n <= 1 && (match[0].rm_so > le))
362
2259
			cspace(&SS, s, match[0].rm_so - le, APPEND);
363
364
		/* Skip zero-length matches right after other matches. */
365

11494
		if (lastempty || (match[0].rm_so - le) ||
366
		    match[0].rm_so != match[0].rm_eo) {
367
11494
			if (n <= 1) {
368
				/* Want this match: append replacement. */
369
11494
				regsub(&SS, ps, cp->u.s->new);
370
11494
				if (n == 1)
371
9995
					n = -1;
372
			} else {
373
				/* Want a later match: append original. */
374
				if (match[0].rm_eo - le)
375
					cspace(&SS, s, match[0].rm_eo - le,
376
					    APPEND);
377
				n--;
378
			}
379
		}
380
381
		/* Move past this match. */
382
11494
		s = ps + match[0].rm_eo;
383
11494
		slen = psl - match[0].rm_eo;
384
11494
		le = match[0].rm_eo;
385
386
		/*
387
		 * After a zero-length match, advance one byte,
388
		 * and at the end of the line, terminate.
389
		 */
390
11494
		if (match[0].rm_so == match[0].rm_eo) {
391
5509
			if (*s == '\0' || *s == '\n')
392
612
				slen = -1;
393
			else
394
4897
				slen--;
395
5509
			if (*s != '\0') {
396
4897
			 	cspace(&SS, s++, 1, APPEND);
397
4897
				le++;
398
			}
399
5509
			lastempty = 1;
400
		} else
401
5985
			lastempty = 0;
402
403
	} while (n >= 0 && slen >= 0 &&
404

11494
	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
405
406
	/* Did not find the requested number of matches. */
407
10957
	if (n > 1)
408
		return (0);
409
410
	/* Copy the trailing retained string. */
411
10957
	if (slen > 0)
412
6109
		cspace(&SS, s, slen, APPEND);
413
414
	/*
415
	 * Swap the substitute space and the pattern space, and make sure
416
	 * that any leftover pointers into stdio memory get lost.
417
	 */
418
10957
	tspace = PS;
419
10957
	PS = SS;
420
10957
	psanl = tspace.append_newline;
421
10957
	SS = tspace;
422
10957
	SS.space = SS.back;
423
424
	/* Handle the 'p' flag. */
425
10957
	if (cp->u.s->p)
426
2627
		OUT();
427
428
	/* Handle the 'w' flag. */
429

10957
	if (cp->u.s->wfile && !pd) {
430
		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
431
		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
432
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
433
		if (write(cp->u.s->wfd, ps, psl) != psl ||
434
		    write(cp->u.s->wfd, "\n", 1) != 1)
435
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
436
	}
437
10957
	return (1);
438
}
439
440
/*
441
 * Flush append requests.  Always called before reading a line,
442
 * therefore it also resets the substitution done (sdone) flag.
443
 */
444
static void
445
flush_appends(void)
446
104757
{
447
	FILE *f;
448
	int count, i;
449
	char buf[8 * 1024];
450
451
104806
	for (i = 0; i < appendx; i++)
452
49
		switch (appends[i].type) {
453
		case AP_STRING:
454
			fwrite(appends[i].s, sizeof(char), appends[i].len,
455
			    outfile);
456
			break;
457
		case AP_FILE:
458
			/*
459
			 * Read files probably shouldn't be cached.  Since
460
			 * it's not an error to read a non-existent file,
461
			 * it's possible that another program is interacting
462
			 * with the sed script through the file system.  It
463
			 * would be truly bizarre, but possible.  It's probably
464
			 * not that big a performance win, anyhow.
465
			 */
466
49
			if ((f = fopen(appends[i].s, "r")) == NULL)
467
				break;
468
98
			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
469
49
				(void)fwrite(buf, sizeof(char), count, outfile);
470
49
			(void)fclose(f);
471
			break;
472
		}
473

104757
	if (ferror(outfile))
474
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
475
104757
	appendx = sdone = 0;
476
104757
}
477
478
static void
479
lputs(char *s)
480
{
481
	int count;
482
	extern int termwidth;
483
	const char *escapes;
484
	char *p;
485
486
	for (count = 0; *s; ++s) {
487
		if (count >= termwidth) {
488
			(void)fprintf(outfile, "\\\n");
489
			count = 0;
490
		}
491
		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
492
		    && *s != '\\') {
493
			(void)fputc(*s, outfile);
494
			count++;
495
		} else if (*s == '\n') {
496
			(void)fputc('$', outfile);
497
			(void)fputc('\n', outfile);
498
			count = 0;
499
		} else {
500
			escapes = "\\\a\b\f\r\t\v";
501
			(void)fputc('\\', outfile);
502
			if ((p = strchr(escapes, *s))) {
503
				(void)fputc("\\abfrtv"[p - escapes], outfile);
504
				count += 2;
505
			} else {
506
				(void)fprintf(outfile, "%03o", *(u_char *)s);
507
				count += 4;
508
			}
509
		}
510
	}
511
	(void)fputc('$', outfile);
512
	(void)fputc('\n', outfile);
513
	if (ferror(outfile))
514
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
515
}
516
517
static inline int
518
regexec_e(regex_t *preg, const char *string, int eflags,
519
    int nomatch, size_t start, size_t stop)
520
196588
{
521
	int eval;
522
523
196588
	if (preg == NULL) {
524
		if (defpreg == NULL)
525
			error(FATAL, "first RE may not be empty");
526
	} else
527
196588
		defpreg = preg;
528
529
	/* Set anchors */
530
196588
	match[0].rm_so = start;
531
196588
	match[0].rm_eo = stop;
532
533
196588
	eval = regexec(defpreg, string,
534
	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
535
196588
	switch (eval) {
536
	case 0:
537
13602
		return (1);
538
	case REG_NOMATCH:
539
182986
		return (0);
540
	}
541
	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
542
	/* NOTREACHED */
543
}
544
545
/*
546
 * regsub - perform substitutions after a regexp match
547
 * Based on a routine by Henry Spencer
548
 */
549
static void
550
regsub(SPACE *sp, char *string, char *src)
551
11494
{
552
	int len, no;
553
	char c, *dst;
554
555
#define	NEEDSP(reqlen)							\
556
	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
557
		size_t newlen = sp->blen + (reqlen) + 1024;		\
558
		sp->space = sp->back = xrealloc(sp->back, newlen);	\
559
		sp->blen = newlen;					\
560
		dst = sp->space + sp->len;				\
561
	}
562
563
11494
	dst = sp->space + sp->len;
564
57148
	while ((c = *src++) != '\0') {
565
34160
		if (c == '&')
566
353
			no = 0;
567

37161
		else if (c == '\\' && isdigit((unsigned char)*src))
568
3354
			no = *src++ - '0';
569
		else
570
30453
			no = -1;
571
34160
		if (no < 0) {		/* Ordinary character. */
572

30453
 			if (c == '\\' && (*src == '\\' || *src == '&'))
573
325
 				c = *src++;
574
30453
			NEEDSP(1);
575
30453
 			*dst++ = c;
576
30453
			++sp->len;
577

3707
 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
578
3707
			len = match[no].rm_eo - match[no].rm_so;
579
3707
			NEEDSP(len);
580
3707
			memmove(dst, string + match[no].rm_so, len);
581
3707
			dst += len;
582
3707
			sp->len += len;
583
		}
584
	}
585
11494
	NEEDSP(1);
586
11494
	*dst = '\0';
587
11494
}
588
589
/*
590
 * aspace --
591
 *	Append the source space to the destination space, allocating new
592
 *	space as necessary.
593
 */
594
void
595
cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
596
118332
{
597
	size_t tlen;
598
599
	/* Make sure SPACE has enough memory and ramp up quickly. */
600
118332
	tlen = sp->len + len + 1;
601
118332
	if (tlen > sp->blen) {
602
969
		size_t newlen = tlen + 1024;
603
969
		sp->space = sp->back = xrealloc(sp->back, newlen);
604
969
		sp->blen = newlen;
605
	}
606
607
118332
	if (spflag == REPLACE)
608
104913
		sp->len = 0;
609
610
118332
	memmove(sp->space + sp->len, p, len);
611
612
118332
	sp->space[sp->len += len] = '\0';
613
118332
}
614
615
/*
616
 * Close all cached opened files and report any errors
617
 */
618
void
619
cfclose(struct s_command *cp, struct s_command *end)
620
1226
{
621
622
15228
	for (; cp != end; cp = cp->next)
623

14002
		switch (cp->code) {
624
		case 's':
625

10891
			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
626
				error(FATAL,
627
				    "%s: %s", cp->u.s->wfile, strerror(errno));
628
10891
			cp->u.s->wfd = -1;
629
10891
			break;
630
		case 'w':
631
			if (cp->u.fd != -1 && close(cp->u.fd))
632
				error(FATAL, "%s: %s", cp->t, strerror(errno));
633
			cp->u.fd = -1;
634
			break;
635
		case '{':
636
478
			cfclose(cp->u.c, cp->next);
637
			break;
638
		}
639
1226
}