GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/vi/build/../ex/ex_subst.c Lines: 0 456 0.0 %
Date: 2017-11-07 Branches: 0 744 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: ex_subst.c,v 1.30 2017/04/18 01:45:35 deraadt Exp $	*/
2
3
/*-
4
 * Copyright (c) 1992, 1993, 1994
5
 *	The Regents of the University of California.  All rights reserved.
6
 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7
 *	Keith Bostic.  All rights reserved.
8
 *
9
 * See the LICENSE file for redistribution information.
10
 */
11
12
#include "config.h"
13
14
#include <sys/queue.h>
15
#include <sys/time.h>
16
17
#include <bitstring.h>
18
#include <ctype.h>
19
#include <errno.h>
20
#include <limits.h>
21
#include <stdio.h>
22
#include <stdlib.h>
23
#include <string.h>
24
#include <unistd.h>
25
26
#include "../common/common.h"
27
#include "../vi/vi.h"
28
29
#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
30
31
#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
32
#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
33
34
static int re_conv(SCR *, char **, size_t *, int *);
35
static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]);
36
static int re_tag_conv(SCR *, char **, size_t *, int *);
37
static int s(SCR *, EXCMD *, char *, regex_t *, u_int);
38
39
/*
40
 * ex_s --
41
 *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
42
 *
43
 *	Substitute on lines matching a pattern.
44
 *
45
 * PUBLIC: int ex_s(SCR *, EXCMD *);
46
 */
47
int
48
ex_s(SCR *sp, EXCMD *cmdp)
49
{
50
	regex_t *re;
51
	size_t blen, len;
52
	u_int flags;
53
	int delim;
54
	char *bp, *ptrn, *rep, *p, *t;
55
56
	/*
57
	 * Skip leading white space.
58
	 *
59
	 * !!!
60
	 * Historic vi allowed any non-alphanumeric to serve as the
61
	 * substitution command delimiter.
62
	 *
63
	 * !!!
64
	 * If the arguments are empty, it's the same as &, i.e. we
65
	 * repeat the last substitution.
66
	 */
67
	if (cmdp->argc == 0)
68
		goto subagain;
69
	for (p = cmdp->argv[0]->bp,
70
	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
71
		if (!isblank(*p))
72
			break;
73
	}
74
	if (len == 0)
75
subagain:	return (ex_subagain(sp, cmdp));
76
77
	delim = *p++;
78
	if (isalnum(delim) || delim == '\\')
79
		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
80
81
	/*
82
	 * !!!
83
	 * The full-blown substitute command reset the remembered
84
	 * state of the 'c' and 'g' suffices.
85
	 */
86
	sp->c_suffix = sp->g_suffix = 0;
87
88
	/*
89
	 * Get the pattern string, toss escaping characters.
90
	 *
91
	 * !!!
92
	 * Historic vi accepted any of the following forms:
93
	 *
94
	 *	:s/abc/def/		change "abc" to "def"
95
	 *	:s/abc/def		change "abc" to "def"
96
	 *	:s/abc/			delete "abc"
97
	 *	:s/abc			delete "abc"
98
	 *
99
	 * QUOTING NOTE:
100
	 *
101
	 * Only toss an escaping character if it escapes a delimiter.
102
	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
103
	 * would be nice to be more regular, i.e. for each layer of
104
	 * escaping a single escaping character is removed, but that's
105
	 * not how the historic vi worked.
106
	 */
107
	for (ptrn = t = p;;) {
108
		if (p[0] == '\0' || p[0] == delim) {
109
			if (p[0] == delim)
110
				++p;
111
			/*
112
			 * !!!
113
			 * Nul terminate the pattern string -- it's passed
114
			 * to regcomp which doesn't understand anything else.
115
			 */
116
			*t = '\0';
117
			break;
118
		}
119
		if (p[0] == '\\') {
120
			if (p[1] == delim)
121
				++p;
122
			else if (p[1] == '\\')
123
				*t++ = *p++;
124
		}
125
		*t++ = *p++;
126
	}
127
128
	/*
129
	 * If the pattern string is empty, use the last RE (not just the
130
	 * last substitution RE).
131
	 */
132
	if (*ptrn == '\0') {
133
		if (sp->re == NULL) {
134
			ex_emsg(sp, NULL, EXM_NOPREVRE);
135
			return (1);
136
		}
137
138
		/* Re-compile the RE if necessary. */
139
		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
140
		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
141
			return (1);
142
		flags = 0;
143
	} else {
144
		/*
145
		 * !!!
146
		 * Compile the RE.  Historic practice is that substitutes set
147
		 * the search direction as well as both substitute and search
148
		 * RE's.  We compile the RE twice, as we don't want to bother
149
		 * ref counting the pattern string and (opaque) structure.
150
		 */
151
		if (re_compile(sp, ptrn, t - ptrn,
152
		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
153
			return (1);
154
		if (re_compile(sp, ptrn, t - ptrn,
155
		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
156
			return (1);
157
158
		flags = SUB_FIRST;
159
		sp->searchdir = FORWARD;
160
	}
161
	re = &sp->re_c;
162
163
	/*
164
	 * Get the replacement string.
165
	 *
166
	 * The special character & (\& if O_MAGIC not set) matches the
167
	 * entire RE.  No handling of & is required here, it's done by
168
	 * re_sub().
169
	 *
170
	 * The special character ~ (\~ if O_MAGIC not set) inserts the
171
	 * previous replacement string into this replacement string.
172
	 * Count ~'s to figure out how much space we need.  We could
173
	 * special case nonexistent last patterns or whether or not
174
	 * O_MAGIC is set, but it's probably not worth the effort.
175
	 *
176
	 * QUOTING NOTE:
177
	 *
178
	 * Only toss an escaping character if it escapes a delimiter or
179
	 * if O_MAGIC is set and it escapes a tilde.
180
	 *
181
	 * !!!
182
	 * If the entire replacement pattern is "%", then use the last
183
	 * replacement pattern.  This semantic was added to vi in System
184
	 * V and then percolated elsewhere, presumably around the time
185
	 * that it was added to their version of ed(1).
186
	 */
187
	if (p[0] == '\0' || p[0] == delim) {
188
		if (p[0] == delim)
189
			++p;
190
		free(sp->repl);
191
		sp->repl = NULL;
192
		sp->repl_len = 0;
193
	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
194
		p += p[1] == delim ? 2 : 1;
195
	else {
196
		for (rep = p, len = 0;
197
		    p[0] != '\0' && p[0] != delim; ++p, ++len)
198
			if (p[0] == '~')
199
				len += sp->repl_len;
200
		GET_SPACE_RET(sp, bp, blen, len);
201
		for (t = bp, len = 0, p = rep;;) {
202
			if (p[0] == '\0' || p[0] == delim) {
203
				if (p[0] == delim)
204
					++p;
205
				break;
206
			}
207
			if (p[0] == '\\') {
208
				if (p[1] == delim)
209
					++p;
210
				else if (p[1] == '\\') {
211
					*t++ = *p++;
212
					++len;
213
				} else if (p[1] == '~') {
214
					++p;
215
					if (!O_ISSET(sp, O_MAGIC))
216
						goto tilde;
217
				}
218
			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
219
tilde:				++p;
220
				memcpy(t, sp->repl, sp->repl_len);
221
				t += sp->repl_len;
222
				len += sp->repl_len;
223
				continue;
224
			}
225
			*t++ = *p++;
226
			++len;
227
		}
228
		if ((sp->repl_len = len) != 0) {
229
			free(sp->repl);
230
			if ((sp->repl = malloc(len)) == NULL) {
231
				msgq(sp, M_SYSERR, NULL);
232
				FREE_SPACE(sp, bp, blen);
233
				return (1);
234
			}
235
			memcpy(sp->repl, bp, len);
236
		}
237
		FREE_SPACE(sp, bp, blen);
238
	}
239
	return (s(sp, cmdp, p, re, flags));
240
}
241
242
/*
243
 * ex_subagain --
244
 *	[line [,line]] & [cgr] [count] [#lp]]
245
 *
246
 *	Substitute using the last substitute RE and replacement pattern.
247
 *
248
 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
249
 */
250
int
251
ex_subagain(SCR *sp, EXCMD *cmdp)
252
{
253
	if (sp->subre == NULL) {
254
		ex_emsg(sp, NULL, EXM_NOPREVRE);
255
		return (1);
256
	}
257
	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
258
	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
259
		return (1);
260
	return (s(sp,
261
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
262
}
263
264
/*
265
 * ex_subtilde --
266
 *	[line [,line]] ~ [cgr] [count] [#lp]]
267
 *
268
 *	Substitute using the last RE and last substitute replacement pattern.
269
 *
270
 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
271
 */
272
int
273
ex_subtilde(SCR *sp, EXCMD *cmdp)
274
{
275
	if (sp->re == NULL) {
276
		ex_emsg(sp, NULL, EXM_NOPREVRE);
277
		return (1);
278
	}
279
	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
280
	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
281
		return (1);
282
	return (s(sp,
283
	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
284
}
285
286
/*
287
 * s --
288
 * Do the substitution.  This stuff is *really* tricky.  There are lots of
289
 * special cases, and general nastiness.  Don't mess with it unless you're
290
 * pretty confident.
291
 *
292
 * The nasty part of the substitution is what happens when the replacement
293
 * string contains newlines.  It's a bit tricky -- consider the information
294
 * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
295
 * to build a set of newline offsets which we use to break the line up later,
296
 * when the replacement is done.  Don't change it unless you're *damned*
297
 * confident.
298
 */
299
#define	NEEDNEWLINE(sp) {						\
300
	if ((sp)->newl_len == (sp)->newl_cnt) {				\
301
		(sp)->newl_len += 25;					\
302
		REALLOCARRAY((sp), (sp)->newl,				\
303
		    (sp)->newl_len, sizeof(size_t));			\
304
		if ((sp)->newl == NULL) {				\
305
			(sp)->newl_len = 0;				\
306
			return (1);					\
307
		}							\
308
	}								\
309
}
310
311
#define	BUILD(sp, l, len) {						\
312
	if (lbclen + (len) > lblen) {					\
313
		lblen += MAXIMUM(lbclen + (len), 256);			\
314
		REALLOC((sp), lb, lblen);				\
315
		if (lb == NULL) {					\
316
			lbclen = 0;					\
317
			return (1);					\
318
		}							\
319
	}								\
320
	memcpy(lb + lbclen, (l), (len));				\
321
	lbclen += (len);						\
322
}
323
324
#define	NEEDSP(sp, len, pnt) {						\
325
	if (lbclen + (len) > lblen) {					\
326
		lblen += MAXIMUM(lbclen + (len), 256);			\
327
		REALLOC((sp), lb, lblen);				\
328
		if (lb == NULL) {					\
329
			lbclen = 0;					\
330
			return (1);					\
331
		}							\
332
		(pnt) = lb + lbclen;					\
333
	}								\
334
}
335
336
static int
337
s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags)
338
{
339
	EVENT ev;
340
	MARK from, to;
341
	TEXTH tiq;
342
	recno_t elno, lno, slno;
343
	regmatch_t match[10];
344
	size_t blen, cnt, last, lbclen, lblen, len, llen;
345
	size_t offset, saved_offset, scno;
346
	int lflag, nflag, pflag, rflag;
347
	int didsub, do_eol_match, eflags, nempty, eval;
348
	int linechanged, matched, quit, rval;
349
	unsigned long ul;
350
	char *bp, *lb;
351
352
	NEEDFILE(sp, cmdp);
353
354
	slno = sp->lno;
355
	scno = sp->cno;
356
357
	/*
358
	 * !!!
359
	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
360
	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
361
	 * not set, they were initialized to 0 for all substitute commands.  If
362
	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
363
	 * specified substitute/replacement patterns (see ex_s()).
364
	 */
365
	if (!O_ISSET(sp, O_EDCOMPATIBLE))
366
		sp->c_suffix = sp->g_suffix = 0;
367
368
	/*
369
	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
370
	 * it only displayed the last change.  I'd disallow them, but they are
371
	 * useful in combination with the [v]global commands.  In the current
372
	 * model the problem is combining them with the 'c' flag -- the screen
373
	 * would have to flip back and forth between the confirm screen and the
374
	 * ex print screen, which would be pretty awful.  We do display all
375
	 * changes, though, for what that's worth.
376
	 *
377
	 * !!!
378
	 * Historic vi was fairly strict about the order of "options", the
379
	 * count, and "flags".  I'm somewhat fuzzy on the difference between
380
	 * options and flags, anyway, so this is a simpler approach, and we
381
	 * just take it them in whatever order the user gives them.  (The ex
382
	 * usage statement doesn't reflect this.)
383
	 */
384
	lflag = nflag = pflag = rflag = 0;
385
	if (s == NULL)
386
		goto noargs;
387
	for (lno = OOBLNO; *s != '\0'; ++s)
388
		switch (*s) {
389
		case ' ':
390
		case '\t':
391
			continue;
392
		case '+':
393
			++cmdp->flagoff;
394
			break;
395
		case '-':
396
			--cmdp->flagoff;
397
			break;
398
		case '0': case '1': case '2': case '3': case '4':
399
		case '5': case '6': case '7': case '8': case '9':
400
			if (lno != OOBLNO)
401
				goto usage;
402
			errno = 0;
403
			if ((ul = strtoul(s, &s, 10)) >= UINT_MAX)
404
				errno = ERANGE;
405
			if (*s == '\0')		/* Loop increment correction. */
406
				--s;
407
			if (errno == ERANGE) {
408
				if (ul >= UINT_MAX)
409
					msgq(sp, M_ERR, "Count overflow");
410
				else
411
					msgq(sp, M_SYSERR, NULL);
412
				return (1);
413
			}
414
			lno = (recno_t)ul;
415
			/*
416
			 * In historic vi, the count was inclusive from the
417
			 * second address.
418
			 */
419
			cmdp->addr1.lno = cmdp->addr2.lno;
420
			cmdp->addr2.lno += lno - 1;
421
			if (!db_exist(sp, cmdp->addr2.lno) &&
422
			    db_last(sp, &cmdp->addr2.lno))
423
				return (1);
424
			break;
425
		case '#':
426
			nflag = 1;
427
			break;
428
		case 'c':
429
			sp->c_suffix = !sp->c_suffix;
430
431
			/* Ex text structure initialization. */
432
			if (F_ISSET(sp, SC_EX)) {
433
				memset(&tiq, 0, sizeof(TEXTH));
434
				TAILQ_INIT(&tiq);
435
			}
436
			break;
437
		case 'g':
438
			sp->g_suffix = !sp->g_suffix;
439
			break;
440
		case 'l':
441
			lflag = 1;
442
			break;
443
		case 'p':
444
			pflag = 1;
445
			break;
446
		case 'r':
447
			if (LF_ISSET(SUB_FIRST)) {
448
				msgq(sp, M_ERR,
449
		    "Regular expression specified; r flag meaningless");
450
				return (1);
451
			}
452
			if (!F_ISSET(sp, SC_RE_SEARCH)) {
453
				ex_emsg(sp, NULL, EXM_NOPREVRE);
454
				return (1);
455
			}
456
			rflag = 1;
457
			re = &sp->re_c;
458
			break;
459
		default:
460
			goto usage;
461
		}
462
463
	if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
464
usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
465
		return (1);
466
	}
467
468
noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
469
		msgq(sp, M_ERR,
470
"The #, l and p flags may not be combined with the c flag in vi mode");
471
		return (1);
472
	}
473
474
	/*
475
	 * bp:		if interactive, line cache
476
	 * blen:	if interactive, line cache length
477
	 * lb:		build buffer pointer.
478
	 * lbclen:	current length of built buffer.
479
	 * lblen;	length of build buffer.
480
	 */
481
	bp = lb = NULL;
482
	blen = lbclen = lblen = 0;
483
484
	/* For each line... */
485
	for (matched = quit = 0, lno = cmdp->addr1.lno,
486
	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
487
488
		/* Someone's unhappy, time to stop. */
489
		if (INTERRUPTED(sp))
490
			break;
491
492
		/* Get the line. */
493
		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
494
			goto err;
495
496
		/*
497
		 * Make a local copy if doing confirmation -- when calling
498
		 * the confirm routine we're likely to lose the cached copy.
499
		 */
500
		if (sp->c_suffix) {
501
			if (bp == NULL) {
502
				GET_SPACE_RET(sp, bp, blen, llen);
503
			} else
504
				ADD_SPACE_RET(sp, bp, blen, llen);
505
			memcpy(bp, s, llen);
506
			s = bp;
507
		}
508
509
		/* Start searching from the beginning. */
510
		offset = 0;
511
		len = llen;
512
513
		/* Reset the build buffer offset. */
514
		lbclen = 0;
515
516
		/* Reset empty match test variable. */
517
		nempty = -1;
518
519
		/*
520
		 * We don't want to have to do a setline if the line didn't
521
		 * change -- keep track of whether or not this line changed.
522
		 * If doing confirmations, don't want to keep setting the
523
		 * line if change is refused -- keep track of substitutions.
524
		 */
525
		didsub = linechanged = 0;
526
527
		/* New line, do an EOL match. */
528
		do_eol_match = 1;
529
530
		/* It's not nul terminated, but we pretend it is. */
531
		eflags = REG_STARTEND;
532
533
		/* The search area is from s + offset to the EOL.  */
534
nextmatch:	match[0].rm_so = offset;
535
		match[0].rm_eo = llen;
536
537
		/* Get the next match. */
538
		eval = regexec(re, (char *)s, 10, match, eflags);
539
540
		/*
541
		 * There wasn't a match or if there was an error, deal with
542
		 * it.  If there was a previous match in this line, resolve
543
		 * the changes into the database.  Otherwise, just move on.
544
		 */
545
		if (eval == REG_NOMATCH)
546
			goto endmatch;
547
		if (eval != 0) {
548
			re_error(sp, eval, re);
549
			goto err;
550
		}
551
		matched = 1;
552
553
		/* Only the first search can match an anchored expression. */
554
		eflags |= REG_NOTBOL;
555
556
		/*
557
		 * !!!
558
		 * It's possible to match 0-length strings -- for example, the
559
		 * command s;a*;X;, when matched against the string "aabb" will
560
		 * result in "XbXbX", i.e. the matches are "aa", the space
561
		 * between the b's and the space between the b's and the end of
562
		 * the string.  There is a similar space between the beginning
563
		 * of the string and the a's.  The rule that we use (because vi
564
		 * historically used it) is that any 0-length match, occurring
565
		 * immediately after a match, is ignored.  Otherwise, the above
566
		 * example would have resulted in "XXbXbX".  Another example is
567
		 * incorrectly using " *" to replace groups of spaces with one
568
		 * space.
569
		 *
570
		 * If the match is empty and at the same place as the end of the
571
		 * previous match, ignore the match and move forward.  If
572
		 * there's no more characters in the string, we were
573
		 * attempting to match after the last character, so quit.
574
		 */
575
		if (match[0].rm_so == nempty && match[0].rm_eo == nempty) {
576
			nempty = -1;
577
			if (len == 0)
578
				goto endmatch;
579
			BUILD(sp, s + offset, 1)
580
			++offset;
581
			--len;
582
			goto nextmatch;
583
		}
584
585
		/* Confirm change. */
586
		if (sp->c_suffix) {
587
			/*
588
			 * Set the cursor position for confirmation.  Note,
589
			 * if we matched on a '$', the cursor may be past
590
			 * the end of line.
591
			 */
592
			from.lno = to.lno = lno;
593
			from.cno = match[0].rm_so;
594
			to.cno = match[0].rm_eo;
595
			/*
596
			 * Both ex and vi have to correct for a change before
597
			 * the first character in the line.
598
			 */
599
			if (llen == 0)
600
				from.cno = to.cno = 0;
601
			if (F_ISSET(sp, SC_VI)) {
602
				/*
603
				 * Only vi has to correct for a change after
604
				 * the last character in the line.
605
				 *
606
				 * XXX
607
				 * It would be nice to change the vi code so
608
				 * that we could display a cursor past EOL.
609
				 */
610
				if (to.cno >= llen)
611
					to.cno = llen - 1;
612
				if (from.cno >= llen)
613
					from.cno = llen - 1;
614
615
				sp->lno = from.lno;
616
				sp->cno = from.cno;
617
				if (vs_refresh(sp, 1))
618
					goto err;
619
620
				vs_update(sp, "Confirm change? [n]", NULL);
621
622
				if (v_event_get(sp, &ev, 0, 0))
623
					goto err;
624
				switch (ev.e_event) {
625
				case E_CHARACTER:
626
					break;
627
				case E_EOF:
628
				case E_ERR:
629
				case E_INTERRUPT:
630
					goto lquit;
631
				default:
632
					v_event_err(sp, &ev);
633
					goto lquit;
634
				}
635
			} else {
636
				if (ex_print(sp, cmdp, &from, &to, 0) ||
637
				    ex_scprint(sp, &from, &to))
638
					goto lquit;
639
				if (ex_txt(sp, &tiq, 0, TXT_CR))
640
					goto err;
641
				ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
642
			}
643
644
			switch (ev.e_c) {
645
			case CH_YES:
646
				break;
647
			default:
648
			case CH_NO:
649
				didsub = 0;
650
				BUILD(sp, s + offset, match[0].rm_eo - offset);
651
				goto skip;
652
			case CH_QUIT:
653
				/* Set the quit/interrupted flags. */
654
lquit:				quit = 1;
655
				F_SET(sp->gp, G_INTERRUPTED);
656
657
				/*
658
				 * Resolve any changes, then return to (and
659
				 * exit from) the main loop.
660
				 */
661
				goto endmatch;
662
			}
663
		}
664
665
		/*
666
		 * Set the cursor to the last position changed, converting
667
		 * from 1-based to 0-based.
668
		 */
669
		sp->lno = lno;
670
		sp->cno = match[0].rm_so;
671
672
		/* Copy the bytes before the match into the build buffer. */
673
		BUILD(sp, s + offset, match[0].rm_so - offset);
674
675
		/* Substitute the matching bytes. */
676
		didsub = 1;
677
		if (re_sub(sp, s, &lb, &lbclen, &lblen, match))
678
			goto err;
679
680
		/* Set the change flag so we know this line was modified. */
681
		linechanged = 1;
682
683
		/* Move past the matched bytes. */
684
skip:		offset = match[0].rm_eo;
685
		len = llen - match[0].rm_eo;
686
687
		/* A match cannot be followed by an empty pattern. */
688
		nempty = match[0].rm_eo;
689
690
		/*
691
		 * If doing a global change with confirmation, we have to
692
		 * update the screen.  The basic idea is to store the line
693
		 * so the screen update routines can find it, and restart.
694
		 */
695
		if (didsub && sp->c_suffix && sp->g_suffix) {
696
			/*
697
			 * The new search offset will be the end of the
698
			 * modified line.
699
			 */
700
			saved_offset = lbclen;
701
702
			/* Copy the rest of the line. */
703
			if (len)
704
				BUILD(sp, s + offset, len)
705
706
			/* Set the new offset. */
707
			offset = saved_offset;
708
709
			/* Store inserted lines, adjusting the build buffer. */
710
			last = 0;
711
			if (sp->newl_cnt) {
712
				for (cnt = 0;
713
				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
714
					if (db_insert(sp, lno,
715
					    lb + last, sp->newl[cnt] - last))
716
						goto err;
717
					last = sp->newl[cnt] + 1;
718
					++sp->rptlines[L_ADDED];
719
				}
720
				lbclen -= last;
721
				offset -= last;
722
				sp->newl_cnt = 0;
723
			}
724
725
			/* Store and retrieve the line. */
726
			if (db_set(sp, lno, lb + last, lbclen))
727
				goto err;
728
			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
729
				goto err;
730
			ADD_SPACE_RET(sp, bp, blen, llen)
731
			memcpy(bp, s, llen);
732
			s = bp;
733
			len = llen - offset;
734
735
			/* Restart the build. */
736
			lbclen = 0;
737
			BUILD(sp, s, offset);
738
739
			/*
740
			 * If we haven't already done the after-the-string
741
			 * match, do one.  Set REG_NOTEOL so the '$' pattern
742
			 * only matches once.
743
			 */
744
			if (!do_eol_match)
745
				goto endmatch;
746
			if (offset == len) {
747
				do_eol_match = 0;
748
				eflags |= REG_NOTEOL;
749
			}
750
			goto nextmatch;
751
		}
752
753
		/*
754
		 * If it's a global:
755
		 *
756
		 * If at the end of the string, do a test for the after
757
		 * the string match.  Set REG_NOTEOL so the '$' pattern
758
		 * only matches once.
759
		 */
760
		if (sp->g_suffix && do_eol_match) {
761
			if (len == 0) {
762
				do_eol_match = 0;
763
				eflags |= REG_NOTEOL;
764
			}
765
			goto nextmatch;
766
		}
767
768
endmatch:	if (!linechanged)
769
			continue;
770
771
		/* Copy any remaining bytes into the build buffer. */
772
		if (len)
773
			BUILD(sp, s + offset, len)
774
775
		/* Store inserted lines, adjusting the build buffer. */
776
		last = 0;
777
		if (sp->newl_cnt) {
778
			for (cnt = 0;
779
			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
780
				if (db_insert(sp,
781
				    lno, lb + last, sp->newl[cnt] - last))
782
					goto err;
783
				last = sp->newl[cnt] + 1;
784
				++sp->rptlines[L_ADDED];
785
			}
786
			lbclen -= last;
787
			sp->newl_cnt = 0;
788
		}
789
790
		/* Store the changed line. */
791
		if (db_set(sp, lno, lb + last, lbclen))
792
			goto err;
793
794
		/* Update changed line counter. */
795
		if (sp->rptlchange != lno) {
796
			sp->rptlchange = lno;
797
			++sp->rptlines[L_CHANGED];
798
		}
799
800
		/*
801
		 * !!!
802
		 * Display as necessary.  Historic practice is to only
803
		 * display the last line of a line split into multiple
804
		 * lines.
805
		 */
806
		if (lflag || nflag || pflag) {
807
			from.lno = to.lno = lno;
808
			from.cno = to.cno = 0;
809
			if (lflag)
810
				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
811
			if (nflag)
812
				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
813
			if (pflag)
814
				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
815
		}
816
	}
817
818
	/*
819
	 * !!!
820
	 * Historically, vi attempted to leave the cursor at the same place if
821
	 * the substitution was done at the current cursor position.  Otherwise
822
	 * it moved it to the first non-blank of the last line changed.  There
823
	 * were some problems: for example, :s/$/foo/ with the cursor on the
824
	 * last character of the line left the cursor on the last character, or
825
	 * the & command with multiple occurrences of the matching string in the
826
	 * line usually left the cursor in a fairly random position.
827
	 *
828
	 * We try to do the same thing, with the exception that if the user is
829
	 * doing substitution with confirmation, we move to the last line about
830
	 * which the user was consulted, as opposed to the last line that they
831
	 * actually changed.  This prevents a screen flash if the user doesn't
832
	 * change many of the possible lines.
833
	 */
834
	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
835
		sp->cno = 0;
836
		(void)nonblank(sp, sp->lno, &sp->cno);
837
	}
838
839
	/*
840
	 * If not in a global command, and nothing matched, say so.
841
	 * Else, if none of the lines displayed, put something up.
842
	 */
843
	rval = 0;
844
	if (!matched) {
845
		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
846
			msgq(sp, M_ERR, "No match found");
847
			goto err;
848
		}
849
	} else if (!lflag && !nflag && !pflag)
850
		F_SET(cmdp, E_AUTOPRINT);
851
852
	if (0) {
853
err:		rval = 1;
854
	}
855
856
	if (bp != NULL)
857
		FREE_SPACE(sp, bp, blen);
858
	free(lb);
859
	return (rval);
860
}
861
862
/*
863
 * re_compile --
864
 *	Compile the RE.
865
 *
866
 * PUBLIC: int re_compile(SCR *,
867
 * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int);
868
 */
869
int
870
re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp,
871
    regex_t *rep, u_int flags)
872
{
873
	size_t len;
874
	int reflags, replaced, rval;
875
	char *p;
876
877
	/* Set RE flags. */
878
	reflags = 0;
879
	if (!LF_ISSET(RE_C_TAG)) {
880
		if (O_ISSET(sp, O_EXTENDED))
881
			reflags |= REG_EXTENDED;
882
		if (O_ISSET(sp, O_IGNORECASE))
883
			reflags |= REG_ICASE;
884
		if (O_ISSET(sp, O_ICLOWER)) {
885
			for (p = ptrn, len = plen; len > 0; ++p, --len)
886
				if (isupper(*p))
887
					break;
888
			if (len == 0)
889
				reflags |= REG_ICASE;
890
		}
891
	}
892
893
	/* If we're replacing a saved value, clear the old one. */
894
	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
895
		regfree(&sp->re_c);
896
		F_CLR(sp, SC_RE_SEARCH);
897
	}
898
	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
899
		regfree(&sp->subre_c);
900
		F_CLR(sp, SC_RE_SUBST);
901
	}
902
903
	/*
904
	 * If we're saving the string, it's a pattern we haven't seen before,
905
	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
906
	 * later recompilation.   Free any previously saved value.
907
	 */
908
	if (ptrnp != NULL) {
909
		if (LF_ISSET(RE_C_TAG)) {
910
			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
911
				return (1);
912
		} else
913
			if (re_conv(sp, &ptrn, &plen, &replaced))
914
				return (1);
915
916
		/* Discard previous pattern. */
917
		free(*ptrnp);
918
		*ptrnp = NULL;
919
		if (lenp != NULL)
920
			*lenp = plen;
921
922
		/*
923
		 * Copy the string into allocated memory.
924
		 *
925
		 * XXX
926
		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
927
		 * for now.  There's just no other solution.
928
		 */
929
		MALLOC(sp, *ptrnp, plen + 1);
930
		if (*ptrnp != NULL) {
931
			memcpy(*ptrnp, ptrn, plen);
932
			(*ptrnp)[plen] = '\0';
933
		}
934
935
		/* Free up conversion-routine-allocated memory. */
936
		if (replaced)
937
			FREE_SPACE(sp, ptrn, 0);
938
939
		if (*ptrnp == NULL)
940
			return (1);
941
942
		ptrn = *ptrnp;
943
	}
944
945
	/*
946
	 * XXX
947
	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
948
	 * contained a nul.  Bummer!
949
	 */
950
	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
951
		if (!LF_ISSET(RE_C_SILENT))
952
			re_error(sp, rval, rep);
953
		return (1);
954
	}
955
956
	if (LF_ISSET(RE_C_SEARCH))
957
		F_SET(sp, SC_RE_SEARCH);
958
	if (LF_ISSET(RE_C_SUBST))
959
		F_SET(sp, SC_RE_SUBST);
960
961
	return (0);
962
}
963
964
/*
965
 * re_conv --
966
 *	Convert vi's regular expressions into something that the
967
 *	the POSIX 1003.2 RE functions can handle.
968
 *
969
 * There are two conversions we make to make vi's RE's (specifically
970
 * the global, search, and substitute patterns) work with POSIX RE's.
971
 * We assume that \<ptrn\> does "word" searches, which is non-standard
972
 * but supported by most regexp libraries..
973
 *
974
 * 1: If O_MAGIC is not set, strip backslashes from the magic character
975
 *    set (.[*~) that have them, and add them to the ones that don't.
976
 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
977
 *    from the last substitute command's replacement string.  If O_MAGIC
978
 *    is set, it's the string "~".
979
 *
980
 * !!!/XXX
981
 * This doesn't exactly match the historic behavior of vi because we do
982
 * the ~ substitution before calling the RE engine, so magic characters
983
 * in the replacement string will be expanded by the RE engine, and they
984
 * weren't historically.  It's a bug.
985
 */
986
static int
987
re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
988
{
989
	size_t blen, len, needlen;
990
	int magic;
991
	char *bp, *p, *t;
992
993
	/*
994
	 * First pass through, we figure out how much space we'll need.
995
	 * We do it in two passes, on the grounds that most of the time
996
	 * the user is doing a search and won't have magic characters.
997
	 * That way we can skip most of the memory allocation and copies.
998
	 */
999
	magic = 0;
1000
	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1001
		switch (*p) {
1002
		case '\\':
1003
			if (len > 1) {
1004
				--len;
1005
				switch (*++p) {
1006
				case '~':
1007
					if (!O_ISSET(sp, O_MAGIC)) {
1008
						magic = 1;
1009
						needlen += sp->repl_len;
1010
					}
1011
					break;
1012
				case '.':
1013
				case '[':
1014
				case '*':
1015
					if (!O_ISSET(sp, O_MAGIC)) {
1016
						magic = 1;
1017
						needlen += 1;
1018
					}
1019
					break;
1020
				default:
1021
					needlen += 2;
1022
				}
1023
			} else
1024
				needlen += 1;
1025
			break;
1026
		case '~':
1027
			if (O_ISSET(sp, O_MAGIC)) {
1028
				magic = 1;
1029
				needlen += sp->repl_len;
1030
			}
1031
			break;
1032
		case '.':
1033
		case '[':
1034
		case '*':
1035
			if (!O_ISSET(sp, O_MAGIC)) {
1036
				magic = 1;
1037
				needlen += 2;
1038
			}
1039
			break;
1040
		default:
1041
			needlen += 1;
1042
			break;
1043
		}
1044
1045
	if (!magic) {
1046
		*replacedp = 0;
1047
		return (0);
1048
	}
1049
1050
	/* Get enough memory to hold the final pattern. */
1051
	*replacedp = 1;
1052
	GET_SPACE_RET(sp, bp, blen, needlen);
1053
1054
	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1055
		switch (*p) {
1056
		case '\\':
1057
			if (len > 1) {
1058
				--len;
1059
				switch (*++p) {
1060
				case '~':
1061
					if (O_ISSET(sp, O_MAGIC))
1062
						*t++ = '~';
1063
					else {
1064
						memcpy(t,
1065
						    sp->repl, sp->repl_len);
1066
						t += sp->repl_len;
1067
					}
1068
					break;
1069
				case '.':
1070
				case '[':
1071
				case '*':
1072
					if (O_ISSET(sp, O_MAGIC))
1073
						*t++ = '\\';
1074
					*t++ = *p;
1075
					break;
1076
				default:
1077
					*t++ = '\\';
1078
					*t++ = *p;
1079
				}
1080
			} else
1081
				*t++ = '\\';
1082
			break;
1083
		case '~':
1084
			if (O_ISSET(sp, O_MAGIC)) {
1085
				memcpy(t, sp->repl, sp->repl_len);
1086
				t += sp->repl_len;
1087
			} else
1088
				*t++ = '~';
1089
			break;
1090
		case '.':
1091
		case '[':
1092
		case '*':
1093
			if (!O_ISSET(sp, O_MAGIC))
1094
				*t++ = '\\';
1095
			*t++ = *p;
1096
			break;
1097
		default:
1098
			*t++ = *p;
1099
			break;
1100
		}
1101
1102
	*ptrnp = bp;
1103
	*plenp = t - bp;
1104
	return (0);
1105
}
1106
1107
/*
1108
 * re_tag_conv --
1109
 *	Convert a tags search path into something that the POSIX
1110
 *	1003.2 RE functions can handle.
1111
 */
1112
static int
1113
re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp)
1114
{
1115
	size_t blen, len;
1116
	int lastdollar;
1117
	char *bp, *p, *t;
1118
1119
	len = *plenp;
1120
1121
	/* Max memory usage is 2 times the length of the string. */
1122
	*replacedp = 1;
1123
	GET_SPACE_RET(sp, bp, blen, len * 2);
1124
1125
	p = *ptrnp;
1126
	t = bp;
1127
1128
	/* If the last character is a '/' or '?', we just strip it. */
1129
	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1130
		--len;
1131
1132
	/* If the next-to-last or last character is a '$', it's magic. */
1133
	if (len > 0 && p[len - 1] == '$') {
1134
		--len;
1135
		lastdollar = 1;
1136
	} else
1137
		lastdollar = 0;
1138
1139
	/* If the first character is a '/' or '?', we just strip it. */
1140
	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1141
		++p;
1142
		--len;
1143
	}
1144
1145
	/* If the first or second character is a '^', it's magic. */
1146
	if (p[0] == '^') {
1147
		*t++ = *p++;
1148
		--len;
1149
	}
1150
1151
	/*
1152
	 * Escape every other magic character we can find, meanwhile stripping
1153
	 * the backslashes ctags inserts when escaping the search delimiter
1154
	 * characters.
1155
	 */
1156
	for (; len > 0; --len) {
1157
		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1158
			++p;
1159
			--len;
1160
		} else if (strchr("^.[]$*", p[0]))
1161
			*t++ = '\\';
1162
		*t++ = *p++;
1163
		if (len == 0)
1164
			break;
1165
	}
1166
	if (lastdollar)
1167
		*t++ = '$';
1168
1169
	*ptrnp = bp;
1170
	*plenp = t - bp;
1171
	return (0);
1172
}
1173
1174
/*
1175
 * re_error --
1176
 *	Report a regular expression error.
1177
 *
1178
 * PUBLIC: void re_error(SCR *, int, regex_t *);
1179
 */
1180
void
1181
re_error(SCR *sp, int errcode, regex_t *preg)
1182
{
1183
	size_t s;
1184
	char *oe;
1185
1186
	s = regerror(errcode, preg, "", 0);
1187
	if ((oe = malloc(s)) == NULL)
1188
		msgq(sp, M_SYSERR, NULL);
1189
	else {
1190
		(void)regerror(errcode, preg, oe, s);
1191
		msgq(sp, M_ERR, "RE error: %s", oe);
1192
		free(oe);
1193
	}
1194
}
1195
1196
/*
1197
 * re_sub --
1198
 * 	Do the substitution for a regular expression.
1199
 */
1200
static int
1201
re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp,
1202
    regmatch_t match[10])
1203
{
1204
	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1205
	size_t lbclen, lblen;		/* Local copies. */
1206
	size_t mlen;			/* Match length. */
1207
	size_t rpl;			/* Remaining replacement length. */
1208
	char *rp;			/* Replacement pointer. */
1209
	int ch;
1210
	int no;				/* Match replacement offset. */
1211
	char *p, *t;			/* Buffer pointers. */
1212
	char *lb;			/* Local copies. */
1213
1214
	lb = *lbp;			/* Get local copies. */
1215
	lbclen = *lbclenp;
1216
	lblen = *lblenp;
1217
1218
	/*
1219
	 * QUOTING NOTE:
1220
	 *
1221
	 * There are some special sequences that vi provides in the
1222
	 * replacement patterns.
1223
	 *	 & string the RE matched (\& if nomagic set)
1224
	 *	\# n-th regular subexpression
1225
	 *	\E end \U, \L conversion
1226
	 *	\e end \U, \L conversion
1227
	 *	\l convert the next character to lower-case
1228
	 *	\L convert to lower-case, until \E, \e, or end of replacement
1229
	 *	\u convert the next character to upper-case
1230
	 *	\U convert to upper-case, until \E, \e, or end of replacement
1231
	 *
1232
	 * Otherwise, since this is the lowest level of replacement, discard
1233
	 * all escaping characters.  This (hopefully) matches historic practice.
1234
	 */
1235
#define	OUTCH(ch, nltrans) {						\
1236
	CHAR_T __ch = (ch);						\
1237
	u_int __value = KEY_VAL(sp, __ch);				\
1238
	if ((nltrans) && (__value == K_CR || __value == K_NL)) {	\
1239
		NEEDNEWLINE(sp);					\
1240
		sp->newl[sp->newl_cnt++] = lbclen;			\
1241
	} else if (conv != C_NOTSET) {					\
1242
		switch (conv) {						\
1243
		case C_ONELOWER:					\
1244
			conv = C_NOTSET;				\
1245
			/* FALLTHROUGH */				\
1246
		case C_LOWER:						\
1247
			if (isupper(__ch))				\
1248
				__ch = tolower(__ch);			\
1249
			break;						\
1250
		case C_ONEUPPER:					\
1251
			conv = C_NOTSET;				\
1252
			/* FALLTHROUGH */				\
1253
		case C_UPPER:						\
1254
			if (islower(__ch))				\
1255
				__ch = toupper(__ch);			\
1256
			break;						\
1257
		default:						\
1258
			abort();					\
1259
		}							\
1260
	}								\
1261
	NEEDSP(sp, 1, p);						\
1262
	*p++ = __ch;							\
1263
	++lbclen;							\
1264
}
1265
	conv = C_NOTSET;
1266
	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1267
		switch (ch = *rp++) {
1268
		case '&':
1269
			if (O_ISSET(sp, O_MAGIC)) {
1270
				no = 0;
1271
				goto subzero;
1272
			}
1273
			break;
1274
		case '\\':
1275
			if (rpl == 0)
1276
				break;
1277
			--rpl;
1278
			switch (ch = *rp) {
1279
			case '&':
1280
				++rp;
1281
				if (!O_ISSET(sp, O_MAGIC)) {
1282
					no = 0;
1283
					goto subzero;
1284
				}
1285
				break;
1286
			case '0': case '1': case '2': case '3': case '4':
1287
			case '5': case '6': case '7': case '8': case '9':
1288
				no = *rp++ - '0';
1289
subzero:			if (match[no].rm_so == -1 ||
1290
			    	    match[no].rm_eo == -1)
1291
					break;
1292
				mlen = match[no].rm_eo - match[no].rm_so;
1293
				for (t = ip + match[no].rm_so; mlen--; ++t)
1294
					OUTCH(*t, 0);
1295
				continue;
1296
			case 'e':
1297
			case 'E':
1298
				++rp;
1299
				conv = C_NOTSET;
1300
				continue;
1301
			case 'l':
1302
				++rp;
1303
				conv = C_ONELOWER;
1304
				continue;
1305
			case 'L':
1306
				++rp;
1307
				conv = C_LOWER;
1308
				continue;
1309
			case 'u':
1310
				++rp;
1311
				conv = C_ONEUPPER;
1312
				continue;
1313
			case 'U':
1314
				++rp;
1315
				conv = C_UPPER;
1316
				continue;
1317
			default:
1318
				++rp;
1319
				break;
1320
			}
1321
		}
1322
		OUTCH(ch, 1);
1323
	}
1324
1325
	*lbp = lb;			/* Update caller's information. */
1326
	*lbclenp = lbclen;
1327
	*lblenp = lblen;
1328
	return (0);
1329
}