GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sed/process.c Lines: 245 292 83.9 %
Date: 2017-11-07 Branches: 211 275 76.7 %

Line Branch Exec Source
1
/*	$OpenBSD: process.c,v 1.32 2017/02/22 14:09:09 tom Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992 Diomidis Spinellis.
5
 * Copyright (c) 1992, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Diomidis Spinellis of Imperial College, University of London.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 */
35
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/uio.h>
39
40
#include <ctype.h>
41
#include <errno.h>
42
#include <fcntl.h>
43
#include <limits.h>
44
#include <regex.h>
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48
#include <unistd.h>
49
50
#include "defs.h"
51
#include "extern.h"
52
53
static SPACE HS, PS, SS;
54
#define	pd		PS.deleted
55
#define	ps		PS.space
56
#define	psl		PS.len
57
#define	psanl		PS.append_newline
58
#define	hs		HS.space
59
#define	hsl		HS.len
60
61
static inline int	 applies(struct s_command *);
62
static void		 flush_appends(void);
63
static void		 lputs(char *);
64
static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65
			     size_t);
66
static void		 regsub(SPACE *, char *, char *);
67
static int		 substitute(struct s_command *);
68
69
struct s_appends *appends;	/* Array of pointers to strings to append. */
70
static int appendx;		/* Index into appends array. */
71
size_t appendnum;		/* Size of appends array. */
72
73
static int lastaddr;		/* Set by applies if last address of a range. */
74
static int sdone;		/* If any substitutes since last line input. */
75
				/* Iov structure for 'w' commands. */
76
static regex_t *defpreg;
77
size_t maxnsub;
78
regmatch_t *match;
79
80
#define OUT() do {\
81
	fwrite(ps, 1, psl, outfile);\
82
	if (psanl) fputc('\n', outfile);\
83
} while (0)
84
85
void
86
process(void)
87
{
88
	struct s_command *cp;
89
18624
	SPACE tspace;
90
	size_t len, oldpsl;
91
	char *p;
92
93
1875750
	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
94
928788
		pd = 0;
95
top:
96
928788
		cp = prog;
97
redirect:
98
5773465
		while (cp != NULL) {
99
3037681
			if (!applies(cp)) {
100
1469257
				cp = cp->next;
101
1469257
				continue;
102
			}
103






2467828
			switch (cp->code) {
104
			case '{':
105
257546
				cp = cp->u.c;
106
257546
				goto redirect;
107
			case 'a':
108
140
				if (appendx >= appendnum) {
109
					appends = xreallocarray(appends,
110
					    appendnum,
111
					    2 * sizeof(struct s_appends));
112
					appendnum *= 2;
113
				}
114
140
				appends[appendx].type = AP_STRING;
115
140
				appends[appendx].s = cp->t;
116
140
				appends[appendx].len = strlen(cp->t);
117
140
				appendx++;
118
140
				break;
119
			case 'b':
120
36988
				cp = cp->u.c;
121
36988
				goto redirect;
122
			case 'c':
123
287
				pd = 1;
124
287
				psl = 0;
125

364
				if (cp->a2 == NULL || lastaddr || lastline())
126
210
					(void)fprintf(outfile, "%s", cp->t);
127
				break;
128
			case 'd':
129
344883
				pd = 1;
130
344883
				goto new;
131
			case 'D':
132
				if (pd)
133
					goto new;
134
				if (psl == 0 ||
135
				    (p = memchr(ps, '\n', psl)) == NULL) {
136
					pd = 1;
137
					goto new;
138
				} else {
139
					psl -= (p + 1) - ps;
140
					memmove(ps, p + 1, psl);
141
					goto top;
142
				}
143
			case 'g':
144
1074
				cspace(&PS, hs, hsl, REPLACE);
145
1074
				break;
146
			case 'G':
147
343
				cspace(&PS, "\n", 1, 0);
148
343
				cspace(&PS, hs, hsl, 0);
149
343
				break;
150
			case 'h':
151
756
				cspace(&HS, ps, psl, REPLACE);
152
756
				break;
153
			case 'H':
154
574
				cspace(&HS, "\n", 1, 0);
155
574
				cspace(&HS, ps, psl, 0);
156
574
				break;
157
			case 'i':
158
14
				(void)fprintf(outfile, "%s", cp->t);
159
14
				break;
160
			case 'l':
161
21
				lputs(ps);
162
21
				break;
163
			case 'n':
164
14
				if (!nflag && !pd)
165
14
					OUT();
166
14
				flush_appends();
167
14
				if (!mf_fgets(&PS, REPLACE))
168
					exit(0);
169
14
				pd = 0;
170
14
				break;
171
			case 'N':
172
147
				flush_appends();
173
147
				cspace(&PS, "\n", 1, 0);
174
147
				if (!mf_fgets(&PS, 0))
175
					exit(0);
176
				break;
177
			case 'p':
178
5612
				if (pd)
179
					break;
180
11028
				OUT();
181
				break;
182
			case 'P':
183
				if (pd)
184
					break;
185
				if ((p = memchr(ps, '\n', psl)) != NULL) {
186
					oldpsl = psl;
187
					psl = p - ps;
188
					psanl = 1;
189
					OUT();
190
					psl = oldpsl;
191
				} else {
192
					OUT();
193
				}
194
				break;
195
			case 'q':
196
218
				if (!nflag && !pd)
197
434
					OUT();
198
				flush_appends();
199
				exit(0);
200
			case 'r':
201
71
				if (appendx >= appendnum) {
202
					appends = xreallocarray(appends,
203
					    appendnum,
204
					    2 * sizeof(struct s_appends));
205
					appendnum *= 2;
206
				}
207
71
				appends[appendx].type = AP_FILE;
208
71
				appends[appendx].s = cp->t;
209
71
				appends[appendx].len = strlen(cp->t);
210
71
				appendx++;
211
71
				break;
212
			case 's':
213
626281
				sdone |= substitute(cp);
214
626281
				break;
215
			case 't':
216
8152
				if (sdone) {
217
1666
					sdone = 0;
218
1666
					cp = cp->u.c;
219
1666
					goto redirect;
220
				}
221
				break;
222
			case 'w':
223
254265
				if (pd)
224
					break;
225

254265
				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
226
				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
227
				    DEFFILEMODE)) == -1)
228
					error(FATAL, "%s: %s",
229
					    cp->t, strerror(errno));
230

508530
				if (write(cp->u.fd, ps, psl) != psl ||
231
254265
				    write(cp->u.fd, "\n", 1) != 1)
232
					error(FATAL, "%s: %s",
233
					    cp->t, strerror(errno));
234
				break;
235
			case 'x':
236
2824
				if (hs == NULL)
237
52
					cspace(&HS, "", 0, REPLACE);
238
2824
				tspace = PS;
239
2824
				PS = HS;
240
2824
				psanl = tspace.append_newline;
241
2824
				HS = tspace;
242
2824
				break;
243
			case 'y':
244
439
				if (pd || psl == 0)
245
					break;
246
10954
				for (p = ps, len = psl; len--; ++p)
247
5038
					*p = cp->u.y[(unsigned char)*p];
248
				break;
249
			case ':':
250
			case '}':
251
				break;
252
			case '=':
253
63
				(void)fprintf(outfile, "%lu\n", linenum);
254
63
			}
255
927116
			cp = cp->next;
256
		} /* for all cp */
257
258
928563
new:		if (!nflag && !pd)
259
1061090
			OUT();
260
928563
		flush_appends();
261
	} /* for all lines */
262
9087
}
263
264
/*
265
 * TRUE if the address passed matches the current program state
266
 * (lastline, linenumber, ps).
267
 */
268
#define	MATCH(a)						\
269
	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
270
	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
271
272
/*
273
 * Return TRUE if the command applies to the current line.  Sets the inrange
274
 * flag to process ranges.  Interprets the non-select (``!'') flag.
275
 */
276
static inline int
277
applies(struct s_command *cp)
278
{
279
	int r;
280
281
6075362
	lastaddr = 0;
282

4910381
	if (cp->a1 == NULL && cp->a2 == NULL)
283
1872700
		r = 1;
284
1164981
	else if (cp->a2)
285
14387
		if (cp->inrange) {
286


21866
			if (MATCH(cp->a2)) {
287
200
				cp->inrange = 0;
288
200
				lastaddr = 1;
289
200
			}
290
			r = 1;
291


19269
		} else if (MATCH(cp->a1)) {
292
			/*
293
			 * If the second address is a number less than or
294
			 * equal to the line number first selected, only
295
			 * one line shall be selected.
296
			 *	-- POSIX 1003.2
297
			 */
298

349
			if (cp->a2->type == AT_LINE &&
299
113
			    linenum >= cp->a2->u.l)
300
14
				lastaddr = 1;
301
			else
302
222
				cp->inrange = 1;
303
			r = 1;
304
236
		} else
305
			r = 0;
306
	else
307

3831874
		r = MATCH(cp->a1);
308
6727204
	return (cp->nonsel ? !r : r);
309
}
310
311
/*
312
 * Reset all inrange markers.
313
 */
314
void
315
resetranges(void)
316
{
317
	struct s_command *cp;
318
319
259
	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
320
98
		if (cp->a2)
321
			cp->inrange = 0;
322
21
}
323
324
/*
325
 * substitute --
326
 *	Do substitutions in the pattern space.  Currently, we build a
327
 *	copy of the new pattern space in the substitute space structure
328
 *	and then swap them.
329
 */
330
static int
331
substitute(struct s_command *cp)
332
{
333
1252562
	SPACE tspace;
334
	regex_t *re;
335
	regoff_t slen;
336
	int n, lastempty;
337
	size_t le = 0;
338
	char *s;
339
340
626281
	s = ps;
341
626281
	re = cp->u.s->re;
342
1252562
	if (re == NULL) {
343
627782
		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
344
			linenum = cp->u.s->linenum;
345
			error(COMPILE, "\\%d not defined in the RE",
346
			    cp->u.s->maxbref);
347
		}
348
	}
349
626281
	if (!regexec_e(re, ps, 0, 0, 0, psl))
350
239868
		return (0);
351
352
386413
	SS.len = 0;				/* Clean substitute space. */
353
	slen = psl;
354
386413
	n = cp->u.s->n;
355
	lastempty = 1;
356
357
386413
	do {
358
		/* Copy the leading retained string. */
359

849657
		if (n <= 1 && (match[0].rm_so > le))
360
299845
			cspace(&SS, s, match[0].rm_so - le, APPEND);
361
362
		/* Skip zero-length matches right after other matches. */
363

482403
		if (lastempty || (match[0].rm_so - le) ||
364
18228
		    match[0].rm_so != match[0].rm_eo) {
365
425231
			if (n <= 1) {
366
				/* Want this match: append replacement. */
367
423544
				regsub(&SS, ps, cp->u.s->new);
368
423544
				if (n == 1)
369
375532
					n = -1;
370
			} else {
371
				/* Want a later match: append original. */
372
1687
				if (match[0].rm_eo - le)
373
1050
					cspace(&SS, s, match[0].rm_eo - le,
374
					    APPEND);
375
1687
				n--;
376
			}
377
		}
378
379
		/* Move past this match. */
380
425693
		s = ps + match[0].rm_eo;
381
425693
		slen = psl - match[0].rm_eo;
382
		le = match[0].rm_eo;
383
384
		/*
385
		 * After a zero-length match, advance one byte,
386
		 * and at the end of the line, terminate.
387
		 */
388
425693
		if (match[0].rm_so == match[0].rm_eo) {
389

97067
			if (*s == '\0' || *s == '\n')
390
13491
				slen = -1;
391
			else
392
41788
				slen--;
393
55279
			if (*s != '\0') {
394
41788
				cspace(&SS, s++, 1, APPEND);
395
41788
				le++;
396
41788
			}
397
			lastempty = 1;
398
55279
		} else
399
			lastempty = 0;
400
401

475070
	} while (n >= 0 && slen >= 0 &&
402
49377
	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
403
404
	/* Did not find the requested number of matches. */
405
386413
	if (n > 0)
406
595
		return (0);
407
408
	/* Copy the trailing retained string. */
409
385818
	if (slen > 0)
410
354346
		cspace(&SS, s, slen, APPEND);
411
412
	/*
413
	 * Swap the substitute space and the pattern space, and make sure
414
	 * that any leftover pointers into stdio memory get lost.
415
	 */
416
385818
	tspace = PS;
417
385818
	PS = SS;
418
385818
	psanl = tspace.append_newline;
419
385818
	SS = tspace;
420
385818
	SS.space = SS.back;
421
422
	/* Handle the 'p' flag. */
423
385818
	if (cp->u.s->p)
424
18470
		OUT();
425
426
	/* Handle the 'w' flag. */
427
385818
	if (cp->u.s->wfile && !pd) {
428

19698
		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
429
		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
430
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
431

39396
		if (write(cp->u.s->wfd, ps, psl) != psl ||
432
19698
		    write(cp->u.s->wfd, "\n", 1) != 1)
433
			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
434
	}
435
385818
	return (1);
436
626281
}
437
438
/*
439
 * Flush append requests.  Always called before reading a line,
440
 * therefore it also resets the substitution done (sdone) flag.
441
 */
442
static void
443
flush_appends(void)
444
{
445
	FILE *f;
446
	int count, i;
447
1857884
	char buf[8 * 1024];
448
449
1858306
	for (i = 0; i < appendx; i++)
450
422
		switch (appends[i].type) {
451
		case AP_STRING:
452
280
			fwrite(appends[i].s, sizeof(char), appends[i].len,
453
140
			    outfile);
454
140
			break;
455
		case AP_FILE:
456
			/*
457
			 * Read files probably shouldn't be cached.  Since
458
			 * it's not an error to read a non-existent file,
459
			 * it's possible that another program is interacting
460
			 * with the sed script through the file system.  It
461
			 * would be truly bizarre, but possible.  It's probably
462
			 * not that big a performance win, anyhow.
463
			 */
464
71
			if ((f = fopen(appends[i].s, "r")) == NULL)
465
				break;
466
178
			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
467
57
				(void)fwrite(buf, sizeof(char), count, outfile);
468
64
			(void)fclose(f);
469
64
			break;
470
		}
471

1857884
	if (ferror(outfile))
472
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
473
928942
	appendx = sdone = 0;
474
928942
}
475
476
static void
477
lputs(char *s)
478
{
479
	int count;
480
	extern int termwidth;
481
	const char *escapes;
482
	char *p;
483
484
3619
	for (count = 0; *s; ++s) {
485
1778
		if (count >= termwidth) {
486
56
			(void)fprintf(outfile, "\\\n");
487
			count = 0;
488
56
		}
489

3325
		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
490
1547
		    && *s != '\\') {
491
658
			(void)fputc(*s, outfile);
492
658
			count++;
493
1778
		} else if (*s == '\n') {
494
			(void)fputc('$', outfile);
495
			(void)fputc('\n', outfile);
496
			count = 0;
497
		} else {
498
			escapes = "\\\a\b\f\r\t\v";
499
1120
			(void)fputc('\\', outfile);
500
1120
			if ((p = strchr(escapes, *s))) {
501
49
				(void)fputc("\\abfrtv"[p - escapes], outfile);
502
49
				count += 2;
503
49
			} else {
504
1071
				(void)fprintf(outfile, "%03o", *(u_char *)s);
505
1071
				count += 4;
506
			}
507
		}
508
	}
509
21
	(void)fputc('$', outfile);
510
21
	(void)fputc('\n', outfile);
511

42
	if (ferror(outfile))
512
		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
513
21
}
514
515
static inline int
516
regexec_e(regex_t *preg, const char *string, int eflags,
517
    int nomatch, size_t start, size_t stop)
518
{
519
	int eval;
520
521
2917162
	if (preg == NULL) {
522
1501
		if (defpreg == NULL)
523
			error(FATAL, "first RE may not be empty");
524
	} else
525
1457080
		defpreg = preg;
526
527
	/* Set anchors */
528
1458581
	match[0].rm_so = start;
529
1458581
	match[0].rm_eo = stop;
530
531
2917162
	eval = regexec(defpreg, string,
532
1458581
	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
533
1458581
	switch (eval) {
534
	case 0:
535
697494
		return (1);
536
	case REG_NOMATCH:
537
761087
		return (0);
538
	}
539
	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
540
1458581
}
541
542
/*
543
 * regsub - perform substitutions after a regexp match
544
 * Based on a routine by Henry Spencer
545
 */
546
static void
547
regsub(SPACE *sp, char *string, char *src)
548
{
549
	int len, no;
550
	char c, *dst;
551
552
#define	NEEDSP(reqlen)							\
553
	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
554
		size_t newlen = sp->blen + (reqlen) + 1024;		\
555
		sp->space = sp->back = xrealloc(sp->back, newlen);	\
556
		sp->blen = newlen;					\
557
		dst = sp->space + sp->len;				\
558
	}
559
560
847088
	dst = sp->space + sp->len;
561
1789777
	while ((c = *src++) != '\0') {
562
942689
		if (c == '&')
563
3547
			no = 0;
564

959668
		else if (c == '\\' && isdigit((unsigned char)*src))
565
19786
			no = *src++ - '0';
566
		else
567
			no = -1;
568
942689
		if (no < 0) {		/* Ordinary character. */
569

920523
			if (c == '\\' && (*src == '\\' || *src == '&'))
570
740
				c = *src++;
571
922426
			NEEDSP(1);
572
919356
			*dst++ = c;
573
919356
			++sp->len;
574

966022
		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
575
23333
			len = match[no].rm_eo - match[no].rm_so;
576
23400
			NEEDSP(len);
577
23333
			memmove(dst, string + match[no].rm_so, len);
578
23333
			dst += len;
579
23333
			sp->len += len;
580
23333
		}
581
	}
582
424203
	NEEDSP(1);
583
423544
	*dst = '\0';
584
423544
}
585
586
/*
587
 * aspace --
588
 *	Append the source space to the destination space, allocating new
589
 *	space as necessary.
590
 */
591
void
592
cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
593
{
594
	size_t tlen;
595
596
	/* Make sure SPACE has enough memory and ramp up quickly. */
597
3259668
	tlen = sp->len + len + 1;
598
1629834
	if (tlen > sp->blen) {
599
10854
		size_t newlen = tlen + 1024;
600
10854
		sp->space = sp->back = xrealloc(sp->back, newlen);
601
10854
		sp->blen = newlen;
602
10854
	}
603
604
1629834
	if (spflag == REPLACE)
605
930684
		sp->len = 0;
606
607
1629834
	memmove(sp->space + sp->len, p, len);
608
609
1629834
	sp->space[sp->len += len] = '\0';
610
1629834
}
611
612
/*
613
 * Close all cached opened files and report any errors
614
 */
615
void
616
cfclose(struct s_command *cp, struct s_command *end)
617
{
618
619
91479
	for (; cp != end; cp = cp->next)
620

54184
		switch (cp->code) {
621
		case 's':
622

23806
			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
623
				error(FATAL,
624
				    "%s: %s", cp->u.s->wfile, strerror(errno));
625
22399
			cp->u.s->wfd = -1;
626
22399
			break;
627
		case 'w':
628

734
			if (cp->u.fd != -1 && close(cp->u.fd))
629
				error(FATAL, "%s: %s", cp->t, strerror(errno));
630
367
			cp->u.fd = -1;
631
367
			break;
632
		case '{':
633
1382
			cfclose(cp->u.c, cp->next);
634
1382
			break;
635
		}
636
10469
}