1 |
|
|
/* $OpenBSD: ex_subst.c,v 1.30 2017/04/18 01:45:35 deraadt Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 1992, 1993, 1994 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* Copyright (c) 1992, 1993, 1994, 1995, 1996 |
7 |
|
|
* Keith Bostic. All rights reserved. |
8 |
|
|
* |
9 |
|
|
* See the LICENSE file for redistribution information. |
10 |
|
|
*/ |
11 |
|
|
|
12 |
|
|
#include "config.h" |
13 |
|
|
|
14 |
|
|
#include <sys/queue.h> |
15 |
|
|
#include <sys/time.h> |
16 |
|
|
|
17 |
|
|
#include <bitstring.h> |
18 |
|
|
#include <ctype.h> |
19 |
|
|
#include <errno.h> |
20 |
|
|
#include <limits.h> |
21 |
|
|
#include <stdio.h> |
22 |
|
|
#include <stdlib.h> |
23 |
|
|
#include <string.h> |
24 |
|
|
#include <unistd.h> |
25 |
|
|
|
26 |
|
|
#include "../common/common.h" |
27 |
|
|
#include "../vi/vi.h" |
28 |
|
|
|
29 |
|
|
#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) |
30 |
|
|
|
31 |
|
|
#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ |
32 |
|
|
#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ |
33 |
|
|
|
34 |
|
|
static int re_conv(SCR *, char **, size_t *, int *); |
35 |
|
|
static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]); |
36 |
|
|
static int re_tag_conv(SCR *, char **, size_t *, int *); |
37 |
|
|
static int s(SCR *, EXCMD *, char *, regex_t *, u_int); |
38 |
|
|
|
39 |
|
|
/* |
40 |
|
|
* ex_s -- |
41 |
|
|
* [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] |
42 |
|
|
* |
43 |
|
|
* Substitute on lines matching a pattern. |
44 |
|
|
* |
45 |
|
|
* PUBLIC: int ex_s(SCR *, EXCMD *); |
46 |
|
|
*/ |
47 |
|
|
int |
48 |
|
|
ex_s(SCR *sp, EXCMD *cmdp) |
49 |
|
|
{ |
50 |
|
|
regex_t *re; |
51 |
|
|
size_t blen, len; |
52 |
|
|
u_int flags; |
53 |
|
|
int delim; |
54 |
|
|
char *bp, *ptrn, *rep, *p, *t; |
55 |
|
|
|
56 |
|
|
/* |
57 |
|
|
* Skip leading white space. |
58 |
|
|
* |
59 |
|
|
* !!! |
60 |
|
|
* Historic vi allowed any non-alphanumeric to serve as the |
61 |
|
|
* substitution command delimiter. |
62 |
|
|
* |
63 |
|
|
* !!! |
64 |
|
|
* If the arguments are empty, it's the same as &, i.e. we |
65 |
|
|
* repeat the last substitution. |
66 |
|
|
*/ |
67 |
|
|
if (cmdp->argc == 0) |
68 |
|
|
goto subagain; |
69 |
|
|
for (p = cmdp->argv[0]->bp, |
70 |
|
|
len = cmdp->argv[0]->len; len > 0; --len, ++p) { |
71 |
|
|
if (!isblank(*p)) |
72 |
|
|
break; |
73 |
|
|
} |
74 |
|
|
if (len == 0) |
75 |
|
|
subagain: return (ex_subagain(sp, cmdp)); |
76 |
|
|
|
77 |
|
|
delim = *p++; |
78 |
|
|
if (isalnum(delim) || delim == '\\') |
79 |
|
|
return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); |
80 |
|
|
|
81 |
|
|
/* |
82 |
|
|
* !!! |
83 |
|
|
* The full-blown substitute command reset the remembered |
84 |
|
|
* state of the 'c' and 'g' suffices. |
85 |
|
|
*/ |
86 |
|
|
sp->c_suffix = sp->g_suffix = 0; |
87 |
|
|
|
88 |
|
|
/* |
89 |
|
|
* Get the pattern string, toss escaping characters. |
90 |
|
|
* |
91 |
|
|
* !!! |
92 |
|
|
* Historic vi accepted any of the following forms: |
93 |
|
|
* |
94 |
|
|
* :s/abc/def/ change "abc" to "def" |
95 |
|
|
* :s/abc/def change "abc" to "def" |
96 |
|
|
* :s/abc/ delete "abc" |
97 |
|
|
* :s/abc delete "abc" |
98 |
|
|
* |
99 |
|
|
* QUOTING NOTE: |
100 |
|
|
* |
101 |
|
|
* Only toss an escaping character if it escapes a delimiter. |
102 |
|
|
* This means that "s/A/\\\\f" replaces "A" with "\\f". It |
103 |
|
|
* would be nice to be more regular, i.e. for each layer of |
104 |
|
|
* escaping a single escaping character is removed, but that's |
105 |
|
|
* not how the historic vi worked. |
106 |
|
|
*/ |
107 |
|
|
for (ptrn = t = p;;) { |
108 |
|
|
if (p[0] == '\0' || p[0] == delim) { |
109 |
|
|
if (p[0] == delim) |
110 |
|
|
++p; |
111 |
|
|
/* |
112 |
|
|
* !!! |
113 |
|
|
* Nul terminate the pattern string -- it's passed |
114 |
|
|
* to regcomp which doesn't understand anything else. |
115 |
|
|
*/ |
116 |
|
|
*t = '\0'; |
117 |
|
|
break; |
118 |
|
|
} |
119 |
|
|
if (p[0] == '\\') { |
120 |
|
|
if (p[1] == delim) |
121 |
|
|
++p; |
122 |
|
|
else if (p[1] == '\\') |
123 |
|
|
*t++ = *p++; |
124 |
|
|
} |
125 |
|
|
*t++ = *p++; |
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
/* |
129 |
|
|
* If the pattern string is empty, use the last RE (not just the |
130 |
|
|
* last substitution RE). |
131 |
|
|
*/ |
132 |
|
|
if (*ptrn == '\0') { |
133 |
|
|
if (sp->re == NULL) { |
134 |
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE); |
135 |
|
|
return (1); |
136 |
|
|
} |
137 |
|
|
|
138 |
|
|
/* Re-compile the RE if necessary. */ |
139 |
|
|
if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, |
140 |
|
|
sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) |
141 |
|
|
return (1); |
142 |
|
|
flags = 0; |
143 |
|
|
} else { |
144 |
|
|
/* |
145 |
|
|
* !!! |
146 |
|
|
* Compile the RE. Historic practice is that substitutes set |
147 |
|
|
* the search direction as well as both substitute and search |
148 |
|
|
* RE's. We compile the RE twice, as we don't want to bother |
149 |
|
|
* ref counting the pattern string and (opaque) structure. |
150 |
|
|
*/ |
151 |
|
|
if (re_compile(sp, ptrn, t - ptrn, |
152 |
|
|
&sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) |
153 |
|
|
return (1); |
154 |
|
|
if (re_compile(sp, ptrn, t - ptrn, |
155 |
|
|
&sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) |
156 |
|
|
return (1); |
157 |
|
|
|
158 |
|
|
flags = SUB_FIRST; |
159 |
|
|
sp->searchdir = FORWARD; |
160 |
|
|
} |
161 |
|
|
re = &sp->re_c; |
162 |
|
|
|
163 |
|
|
/* |
164 |
|
|
* Get the replacement string. |
165 |
|
|
* |
166 |
|
|
* The special character & (\& if O_MAGIC not set) matches the |
167 |
|
|
* entire RE. No handling of & is required here, it's done by |
168 |
|
|
* re_sub(). |
169 |
|
|
* |
170 |
|
|
* The special character ~ (\~ if O_MAGIC not set) inserts the |
171 |
|
|
* previous replacement string into this replacement string. |
172 |
|
|
* Count ~'s to figure out how much space we need. We could |
173 |
|
|
* special case nonexistent last patterns or whether or not |
174 |
|
|
* O_MAGIC is set, but it's probably not worth the effort. |
175 |
|
|
* |
176 |
|
|
* QUOTING NOTE: |
177 |
|
|
* |
178 |
|
|
* Only toss an escaping character if it escapes a delimiter or |
179 |
|
|
* if O_MAGIC is set and it escapes a tilde. |
180 |
|
|
* |
181 |
|
|
* !!! |
182 |
|
|
* If the entire replacement pattern is "%", then use the last |
183 |
|
|
* replacement pattern. This semantic was added to vi in System |
184 |
|
|
* V and then percolated elsewhere, presumably around the time |
185 |
|
|
* that it was added to their version of ed(1). |
186 |
|
|
*/ |
187 |
|
|
if (p[0] == '\0' || p[0] == delim) { |
188 |
|
|
if (p[0] == delim) |
189 |
|
|
++p; |
190 |
|
|
free(sp->repl); |
191 |
|
|
sp->repl = NULL; |
192 |
|
|
sp->repl_len = 0; |
193 |
|
|
} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) |
194 |
|
|
p += p[1] == delim ? 2 : 1; |
195 |
|
|
else { |
196 |
|
|
for (rep = p, len = 0; |
197 |
|
|
p[0] != '\0' && p[0] != delim; ++p, ++len) |
198 |
|
|
if (p[0] == '~') |
199 |
|
|
len += sp->repl_len; |
200 |
|
|
GET_SPACE_RET(sp, bp, blen, len); |
201 |
|
|
for (t = bp, len = 0, p = rep;;) { |
202 |
|
|
if (p[0] == '\0' || p[0] == delim) { |
203 |
|
|
if (p[0] == delim) |
204 |
|
|
++p; |
205 |
|
|
break; |
206 |
|
|
} |
207 |
|
|
if (p[0] == '\\') { |
208 |
|
|
if (p[1] == delim) |
209 |
|
|
++p; |
210 |
|
|
else if (p[1] == '\\') { |
211 |
|
|
*t++ = *p++; |
212 |
|
|
++len; |
213 |
|
|
} else if (p[1] == '~') { |
214 |
|
|
++p; |
215 |
|
|
if (!O_ISSET(sp, O_MAGIC)) |
216 |
|
|
goto tilde; |
217 |
|
|
} |
218 |
|
|
} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { |
219 |
|
|
tilde: ++p; |
220 |
|
|
memcpy(t, sp->repl, sp->repl_len); |
221 |
|
|
t += sp->repl_len; |
222 |
|
|
len += sp->repl_len; |
223 |
|
|
continue; |
224 |
|
|
} |
225 |
|
|
*t++ = *p++; |
226 |
|
|
++len; |
227 |
|
|
} |
228 |
|
|
if ((sp->repl_len = len) != 0) { |
229 |
|
|
free(sp->repl); |
230 |
|
|
if ((sp->repl = malloc(len)) == NULL) { |
231 |
|
|
msgq(sp, M_SYSERR, NULL); |
232 |
|
|
FREE_SPACE(sp, bp, blen); |
233 |
|
|
return (1); |
234 |
|
|
} |
235 |
|
|
memcpy(sp->repl, bp, len); |
236 |
|
|
} |
237 |
|
|
FREE_SPACE(sp, bp, blen); |
238 |
|
|
} |
239 |
|
|
return (s(sp, cmdp, p, re, flags)); |
240 |
|
|
} |
241 |
|
|
|
242 |
|
|
/* |
243 |
|
|
* ex_subagain -- |
244 |
|
|
* [line [,line]] & [cgr] [count] [#lp]] |
245 |
|
|
* |
246 |
|
|
* Substitute using the last substitute RE and replacement pattern. |
247 |
|
|
* |
248 |
|
|
* PUBLIC: int ex_subagain(SCR *, EXCMD *); |
249 |
|
|
*/ |
250 |
|
|
int |
251 |
|
|
ex_subagain(SCR *sp, EXCMD *cmdp) |
252 |
|
|
{ |
253 |
|
|
if (sp->subre == NULL) { |
254 |
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE); |
255 |
|
|
return (1); |
256 |
|
|
} |
257 |
|
|
if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, |
258 |
|
|
sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) |
259 |
|
|
return (1); |
260 |
|
|
return (s(sp, |
261 |
|
|
cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); |
262 |
|
|
} |
263 |
|
|
|
264 |
|
|
/* |
265 |
|
|
* ex_subtilde -- |
266 |
|
|
* [line [,line]] ~ [cgr] [count] [#lp]] |
267 |
|
|
* |
268 |
|
|
* Substitute using the last RE and last substitute replacement pattern. |
269 |
|
|
* |
270 |
|
|
* PUBLIC: int ex_subtilde(SCR *, EXCMD *); |
271 |
|
|
*/ |
272 |
|
|
int |
273 |
|
|
ex_subtilde(SCR *sp, EXCMD *cmdp) |
274 |
|
|
{ |
275 |
|
|
if (sp->re == NULL) { |
276 |
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE); |
277 |
|
|
return (1); |
278 |
|
|
} |
279 |
|
|
if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, |
280 |
|
|
sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) |
281 |
|
|
return (1); |
282 |
|
|
return (s(sp, |
283 |
|
|
cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); |
284 |
|
|
} |
285 |
|
|
|
286 |
|
|
/* |
287 |
|
|
* s -- |
288 |
|
|
* Do the substitution. This stuff is *really* tricky. There are lots of |
289 |
|
|
* special cases, and general nastiness. Don't mess with it unless you're |
290 |
|
|
* pretty confident. |
291 |
|
|
* |
292 |
|
|
* The nasty part of the substitution is what happens when the replacement |
293 |
|
|
* string contains newlines. It's a bit tricky -- consider the information |
294 |
|
|
* that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is |
295 |
|
|
* to build a set of newline offsets which we use to break the line up later, |
296 |
|
|
* when the replacement is done. Don't change it unless you're *damned* |
297 |
|
|
* confident. |
298 |
|
|
*/ |
299 |
|
|
#define NEEDNEWLINE(sp) { \ |
300 |
|
|
if ((sp)->newl_len == (sp)->newl_cnt) { \ |
301 |
|
|
(sp)->newl_len += 25; \ |
302 |
|
|
REALLOCARRAY((sp), (sp)->newl, \ |
303 |
|
|
(sp)->newl_len, sizeof(size_t)); \ |
304 |
|
|
if ((sp)->newl == NULL) { \ |
305 |
|
|
(sp)->newl_len = 0; \ |
306 |
|
|
return (1); \ |
307 |
|
|
} \ |
308 |
|
|
} \ |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
#define BUILD(sp, l, len) { \ |
312 |
|
|
if (lbclen + (len) > lblen) { \ |
313 |
|
|
lblen += MAXIMUM(lbclen + (len), 256); \ |
314 |
|
|
REALLOC((sp), lb, lblen); \ |
315 |
|
|
if (lb == NULL) { \ |
316 |
|
|
lbclen = 0; \ |
317 |
|
|
return (1); \ |
318 |
|
|
} \ |
319 |
|
|
} \ |
320 |
|
|
memcpy(lb + lbclen, (l), (len)); \ |
321 |
|
|
lbclen += (len); \ |
322 |
|
|
} |
323 |
|
|
|
324 |
|
|
#define NEEDSP(sp, len, pnt) { \ |
325 |
|
|
if (lbclen + (len) > lblen) { \ |
326 |
|
|
lblen += MAXIMUM(lbclen + (len), 256); \ |
327 |
|
|
REALLOC((sp), lb, lblen); \ |
328 |
|
|
if (lb == NULL) { \ |
329 |
|
|
lbclen = 0; \ |
330 |
|
|
return (1); \ |
331 |
|
|
} \ |
332 |
|
|
(pnt) = lb + lbclen; \ |
333 |
|
|
} \ |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
static int |
337 |
|
|
s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags) |
338 |
|
|
{ |
339 |
|
|
EVENT ev; |
340 |
|
|
MARK from, to; |
341 |
|
|
TEXTH tiq; |
342 |
|
|
recno_t elno, lno, slno; |
343 |
|
|
regmatch_t match[10]; |
344 |
|
|
size_t blen, cnt, last, lbclen, lblen, len, llen; |
345 |
|
|
size_t offset, saved_offset, scno; |
346 |
|
|
int lflag, nflag, pflag, rflag; |
347 |
|
|
int didsub, do_eol_match, eflags, nempty, eval; |
348 |
|
|
int linechanged, matched, quit, rval; |
349 |
|
|
unsigned long ul; |
350 |
|
|
char *bp, *lb; |
351 |
|
|
|
352 |
|
|
NEEDFILE(sp, cmdp); |
353 |
|
|
|
354 |
|
|
slno = sp->lno; |
355 |
|
|
scno = sp->cno; |
356 |
|
|
|
357 |
|
|
/* |
358 |
|
|
* !!! |
359 |
|
|
* Historically, the 'g' and 'c' suffices were always toggled as flags, |
360 |
|
|
* so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was |
361 |
|
|
* not set, they were initialized to 0 for all substitute commands. If |
362 |
|
|
* O_EDCOMPATIBLE was set, they were initialized to 0 only if the user |
363 |
|
|
* specified substitute/replacement patterns (see ex_s()). |
364 |
|
|
*/ |
365 |
|
|
if (!O_ISSET(sp, O_EDCOMPATIBLE)) |
366 |
|
|
sp->c_suffix = sp->g_suffix = 0; |
367 |
|
|
|
368 |
|
|
/* |
369 |
|
|
* Historic vi permitted the '#', 'l' and 'p' options in vi mode, but |
370 |
|
|
* it only displayed the last change. I'd disallow them, but they are |
371 |
|
|
* useful in combination with the [v]global commands. In the current |
372 |
|
|
* model the problem is combining them with the 'c' flag -- the screen |
373 |
|
|
* would have to flip back and forth between the confirm screen and the |
374 |
|
|
* ex print screen, which would be pretty awful. We do display all |
375 |
|
|
* changes, though, for what that's worth. |
376 |
|
|
* |
377 |
|
|
* !!! |
378 |
|
|
* Historic vi was fairly strict about the order of "options", the |
379 |
|
|
* count, and "flags". I'm somewhat fuzzy on the difference between |
380 |
|
|
* options and flags, anyway, so this is a simpler approach, and we |
381 |
|
|
* just take it them in whatever order the user gives them. (The ex |
382 |
|
|
* usage statement doesn't reflect this.) |
383 |
|
|
*/ |
384 |
|
|
lflag = nflag = pflag = rflag = 0; |
385 |
|
|
if (s == NULL) |
386 |
|
|
goto noargs; |
387 |
|
|
for (lno = OOBLNO; *s != '\0'; ++s) |
388 |
|
|
switch (*s) { |
389 |
|
|
case ' ': |
390 |
|
|
case '\t': |
391 |
|
|
continue; |
392 |
|
|
case '+': |
393 |
|
|
++cmdp->flagoff; |
394 |
|
|
break; |
395 |
|
|
case '-': |
396 |
|
|
--cmdp->flagoff; |
397 |
|
|
break; |
398 |
|
|
case '0': case '1': case '2': case '3': case '4': |
399 |
|
|
case '5': case '6': case '7': case '8': case '9': |
400 |
|
|
if (lno != OOBLNO) |
401 |
|
|
goto usage; |
402 |
|
|
errno = 0; |
403 |
|
|
if ((ul = strtoul(s, &s, 10)) >= UINT_MAX) |
404 |
|
|
errno = ERANGE; |
405 |
|
|
if (*s == '\0') /* Loop increment correction. */ |
406 |
|
|
--s; |
407 |
|
|
if (errno == ERANGE) { |
408 |
|
|
if (ul >= UINT_MAX) |
409 |
|
|
msgq(sp, M_ERR, "Count overflow"); |
410 |
|
|
else |
411 |
|
|
msgq(sp, M_SYSERR, NULL); |
412 |
|
|
return (1); |
413 |
|
|
} |
414 |
|
|
lno = (recno_t)ul; |
415 |
|
|
/* |
416 |
|
|
* In historic vi, the count was inclusive from the |
417 |
|
|
* second address. |
418 |
|
|
*/ |
419 |
|
|
cmdp->addr1.lno = cmdp->addr2.lno; |
420 |
|
|
cmdp->addr2.lno += lno - 1; |
421 |
|
|
if (!db_exist(sp, cmdp->addr2.lno) && |
422 |
|
|
db_last(sp, &cmdp->addr2.lno)) |
423 |
|
|
return (1); |
424 |
|
|
break; |
425 |
|
|
case '#': |
426 |
|
|
nflag = 1; |
427 |
|
|
break; |
428 |
|
|
case 'c': |
429 |
|
|
sp->c_suffix = !sp->c_suffix; |
430 |
|
|
|
431 |
|
|
/* Ex text structure initialization. */ |
432 |
|
|
if (F_ISSET(sp, SC_EX)) { |
433 |
|
|
memset(&tiq, 0, sizeof(TEXTH)); |
434 |
|
|
TAILQ_INIT(&tiq); |
435 |
|
|
} |
436 |
|
|
break; |
437 |
|
|
case 'g': |
438 |
|
|
sp->g_suffix = !sp->g_suffix; |
439 |
|
|
break; |
440 |
|
|
case 'l': |
441 |
|
|
lflag = 1; |
442 |
|
|
break; |
443 |
|
|
case 'p': |
444 |
|
|
pflag = 1; |
445 |
|
|
break; |
446 |
|
|
case 'r': |
447 |
|
|
if (LF_ISSET(SUB_FIRST)) { |
448 |
|
|
msgq(sp, M_ERR, |
449 |
|
|
"Regular expression specified; r flag meaningless"); |
450 |
|
|
return (1); |
451 |
|
|
} |
452 |
|
|
if (!F_ISSET(sp, SC_RE_SEARCH)) { |
453 |
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE); |
454 |
|
|
return (1); |
455 |
|
|
} |
456 |
|
|
rflag = 1; |
457 |
|
|
re = &sp->re_c; |
458 |
|
|
break; |
459 |
|
|
default: |
460 |
|
|
goto usage; |
461 |
|
|
} |
462 |
|
|
|
463 |
|
|
if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { |
464 |
|
|
usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); |
465 |
|
|
return (1); |
466 |
|
|
} |
467 |
|
|
|
468 |
|
|
noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { |
469 |
|
|
msgq(sp, M_ERR, |
470 |
|
|
"The #, l and p flags may not be combined with the c flag in vi mode"); |
471 |
|
|
return (1); |
472 |
|
|
} |
473 |
|
|
|
474 |
|
|
/* |
475 |
|
|
* bp: if interactive, line cache |
476 |
|
|
* blen: if interactive, line cache length |
477 |
|
|
* lb: build buffer pointer. |
478 |
|
|
* lbclen: current length of built buffer. |
479 |
|
|
* lblen; length of build buffer. |
480 |
|
|
*/ |
481 |
|
|
bp = lb = NULL; |
482 |
|
|
blen = lbclen = lblen = 0; |
483 |
|
|
|
484 |
|
|
/* For each line... */ |
485 |
|
|
for (matched = quit = 0, lno = cmdp->addr1.lno, |
486 |
|
|
elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { |
487 |
|
|
|
488 |
|
|
/* Someone's unhappy, time to stop. */ |
489 |
|
|
if (INTERRUPTED(sp)) |
490 |
|
|
break; |
491 |
|
|
|
492 |
|
|
/* Get the line. */ |
493 |
|
|
if (db_get(sp, lno, DBG_FATAL, &s, &llen)) |
494 |
|
|
goto err; |
495 |
|
|
|
496 |
|
|
/* |
497 |
|
|
* Make a local copy if doing confirmation -- when calling |
498 |
|
|
* the confirm routine we're likely to lose the cached copy. |
499 |
|
|
*/ |
500 |
|
|
if (sp->c_suffix) { |
501 |
|
|
if (bp == NULL) { |
502 |
|
|
GET_SPACE_RET(sp, bp, blen, llen); |
503 |
|
|
} else |
504 |
|
|
ADD_SPACE_RET(sp, bp, blen, llen); |
505 |
|
|
memcpy(bp, s, llen); |
506 |
|
|
s = bp; |
507 |
|
|
} |
508 |
|
|
|
509 |
|
|
/* Start searching from the beginning. */ |
510 |
|
|
offset = 0; |
511 |
|
|
len = llen; |
512 |
|
|
|
513 |
|
|
/* Reset the build buffer offset. */ |
514 |
|
|
lbclen = 0; |
515 |
|
|
|
516 |
|
|
/* Reset empty match test variable. */ |
517 |
|
|
nempty = -1; |
518 |
|
|
|
519 |
|
|
/* |
520 |
|
|
* We don't want to have to do a setline if the line didn't |
521 |
|
|
* change -- keep track of whether or not this line changed. |
522 |
|
|
* If doing confirmations, don't want to keep setting the |
523 |
|
|
* line if change is refused -- keep track of substitutions. |
524 |
|
|
*/ |
525 |
|
|
didsub = linechanged = 0; |
526 |
|
|
|
527 |
|
|
/* New line, do an EOL match. */ |
528 |
|
|
do_eol_match = 1; |
529 |
|
|
|
530 |
|
|
/* It's not nul terminated, but we pretend it is. */ |
531 |
|
|
eflags = REG_STARTEND; |
532 |
|
|
|
533 |
|
|
/* The search area is from s + offset to the EOL. */ |
534 |
|
|
nextmatch: match[0].rm_so = offset; |
535 |
|
|
match[0].rm_eo = llen; |
536 |
|
|
|
537 |
|
|
/* Get the next match. */ |
538 |
|
|
eval = regexec(re, (char *)s, 10, match, eflags); |
539 |
|
|
|
540 |
|
|
/* |
541 |
|
|
* There wasn't a match or if there was an error, deal with |
542 |
|
|
* it. If there was a previous match in this line, resolve |
543 |
|
|
* the changes into the database. Otherwise, just move on. |
544 |
|
|
*/ |
545 |
|
|
if (eval == REG_NOMATCH) |
546 |
|
|
goto endmatch; |
547 |
|
|
if (eval != 0) { |
548 |
|
|
re_error(sp, eval, re); |
549 |
|
|
goto err; |
550 |
|
|
} |
551 |
|
|
matched = 1; |
552 |
|
|
|
553 |
|
|
/* Only the first search can match an anchored expression. */ |
554 |
|
|
eflags |= REG_NOTBOL; |
555 |
|
|
|
556 |
|
|
/* |
557 |
|
|
* !!! |
558 |
|
|
* It's possible to match 0-length strings -- for example, the |
559 |
|
|
* command s;a*;X;, when matched against the string "aabb" will |
560 |
|
|
* result in "XbXbX", i.e. the matches are "aa", the space |
561 |
|
|
* between the b's and the space between the b's and the end of |
562 |
|
|
* the string. There is a similar space between the beginning |
563 |
|
|
* of the string and the a's. The rule that we use (because vi |
564 |
|
|
* historically used it) is that any 0-length match, occurring |
565 |
|
|
* immediately after a match, is ignored. Otherwise, the above |
566 |
|
|
* example would have resulted in "XXbXbX". Another example is |
567 |
|
|
* incorrectly using " *" to replace groups of spaces with one |
568 |
|
|
* space. |
569 |
|
|
* |
570 |
|
|
* If the match is empty and at the same place as the end of the |
571 |
|
|
* previous match, ignore the match and move forward. If |
572 |
|
|
* there's no more characters in the string, we were |
573 |
|
|
* attempting to match after the last character, so quit. |
574 |
|
|
*/ |
575 |
|
|
if (match[0].rm_so == nempty && match[0].rm_eo == nempty) { |
576 |
|
|
nempty = -1; |
577 |
|
|
if (len == 0) |
578 |
|
|
goto endmatch; |
579 |
|
|
BUILD(sp, s + offset, 1) |
580 |
|
|
++offset; |
581 |
|
|
--len; |
582 |
|
|
goto nextmatch; |
583 |
|
|
} |
584 |
|
|
|
585 |
|
|
/* Confirm change. */ |
586 |
|
|
if (sp->c_suffix) { |
587 |
|
|
/* |
588 |
|
|
* Set the cursor position for confirmation. Note, |
589 |
|
|
* if we matched on a '$', the cursor may be past |
590 |
|
|
* the end of line. |
591 |
|
|
*/ |
592 |
|
|
from.lno = to.lno = lno; |
593 |
|
|
from.cno = match[0].rm_so; |
594 |
|
|
to.cno = match[0].rm_eo; |
595 |
|
|
/* |
596 |
|
|
* Both ex and vi have to correct for a change before |
597 |
|
|
* the first character in the line. |
598 |
|
|
*/ |
599 |
|
|
if (llen == 0) |
600 |
|
|
from.cno = to.cno = 0; |
601 |
|
|
if (F_ISSET(sp, SC_VI)) { |
602 |
|
|
/* |
603 |
|
|
* Only vi has to correct for a change after |
604 |
|
|
* the last character in the line. |
605 |
|
|
* |
606 |
|
|
* XXX |
607 |
|
|
* It would be nice to change the vi code so |
608 |
|
|
* that we could display a cursor past EOL. |
609 |
|
|
*/ |
610 |
|
|
if (to.cno >= llen) |
611 |
|
|
to.cno = llen - 1; |
612 |
|
|
if (from.cno >= llen) |
613 |
|
|
from.cno = llen - 1; |
614 |
|
|
|
615 |
|
|
sp->lno = from.lno; |
616 |
|
|
sp->cno = from.cno; |
617 |
|
|
if (vs_refresh(sp, 1)) |
618 |
|
|
goto err; |
619 |
|
|
|
620 |
|
|
vs_update(sp, "Confirm change? [n]", NULL); |
621 |
|
|
|
622 |
|
|
if (v_event_get(sp, &ev, 0, 0)) |
623 |
|
|
goto err; |
624 |
|
|
switch (ev.e_event) { |
625 |
|
|
case E_CHARACTER: |
626 |
|
|
break; |
627 |
|
|
case E_EOF: |
628 |
|
|
case E_ERR: |
629 |
|
|
case E_INTERRUPT: |
630 |
|
|
goto lquit; |
631 |
|
|
default: |
632 |
|
|
v_event_err(sp, &ev); |
633 |
|
|
goto lquit; |
634 |
|
|
} |
635 |
|
|
} else { |
636 |
|
|
if (ex_print(sp, cmdp, &from, &to, 0) || |
637 |
|
|
ex_scprint(sp, &from, &to)) |
638 |
|
|
goto lquit; |
639 |
|
|
if (ex_txt(sp, &tiq, 0, TXT_CR)) |
640 |
|
|
goto err; |
641 |
|
|
ev.e_c = TAILQ_FIRST(&tiq)->lb[0]; |
642 |
|
|
} |
643 |
|
|
|
644 |
|
|
switch (ev.e_c) { |
645 |
|
|
case CH_YES: |
646 |
|
|
break; |
647 |
|
|
default: |
648 |
|
|
case CH_NO: |
649 |
|
|
didsub = 0; |
650 |
|
|
BUILD(sp, s + offset, match[0].rm_eo - offset); |
651 |
|
|
goto skip; |
652 |
|
|
case CH_QUIT: |
653 |
|
|
/* Set the quit/interrupted flags. */ |
654 |
|
|
lquit: quit = 1; |
655 |
|
|
F_SET(sp->gp, G_INTERRUPTED); |
656 |
|
|
|
657 |
|
|
/* |
658 |
|
|
* Resolve any changes, then return to (and |
659 |
|
|
* exit from) the main loop. |
660 |
|
|
*/ |
661 |
|
|
goto endmatch; |
662 |
|
|
} |
663 |
|
|
} |
664 |
|
|
|
665 |
|
|
/* |
666 |
|
|
* Set the cursor to the last position changed, converting |
667 |
|
|
* from 1-based to 0-based. |
668 |
|
|
*/ |
669 |
|
|
sp->lno = lno; |
670 |
|
|
sp->cno = match[0].rm_so; |
671 |
|
|
|
672 |
|
|
/* Copy the bytes before the match into the build buffer. */ |
673 |
|
|
BUILD(sp, s + offset, match[0].rm_so - offset); |
674 |
|
|
|
675 |
|
|
/* Substitute the matching bytes. */ |
676 |
|
|
didsub = 1; |
677 |
|
|
if (re_sub(sp, s, &lb, &lbclen, &lblen, match)) |
678 |
|
|
goto err; |
679 |
|
|
|
680 |
|
|
/* Set the change flag so we know this line was modified. */ |
681 |
|
|
linechanged = 1; |
682 |
|
|
|
683 |
|
|
/* Move past the matched bytes. */ |
684 |
|
|
skip: offset = match[0].rm_eo; |
685 |
|
|
len = llen - match[0].rm_eo; |
686 |
|
|
|
687 |
|
|
/* A match cannot be followed by an empty pattern. */ |
688 |
|
|
nempty = match[0].rm_eo; |
689 |
|
|
|
690 |
|
|
/* |
691 |
|
|
* If doing a global change with confirmation, we have to |
692 |
|
|
* update the screen. The basic idea is to store the line |
693 |
|
|
* so the screen update routines can find it, and restart. |
694 |
|
|
*/ |
695 |
|
|
if (didsub && sp->c_suffix && sp->g_suffix) { |
696 |
|
|
/* |
697 |
|
|
* The new search offset will be the end of the |
698 |
|
|
* modified line. |
699 |
|
|
*/ |
700 |
|
|
saved_offset = lbclen; |
701 |
|
|
|
702 |
|
|
/* Copy the rest of the line. */ |
703 |
|
|
if (len) |
704 |
|
|
BUILD(sp, s + offset, len) |
705 |
|
|
|
706 |
|
|
/* Set the new offset. */ |
707 |
|
|
offset = saved_offset; |
708 |
|
|
|
709 |
|
|
/* Store inserted lines, adjusting the build buffer. */ |
710 |
|
|
last = 0; |
711 |
|
|
if (sp->newl_cnt) { |
712 |
|
|
for (cnt = 0; |
713 |
|
|
cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { |
714 |
|
|
if (db_insert(sp, lno, |
715 |
|
|
lb + last, sp->newl[cnt] - last)) |
716 |
|
|
goto err; |
717 |
|
|
last = sp->newl[cnt] + 1; |
718 |
|
|
++sp->rptlines[L_ADDED]; |
719 |
|
|
} |
720 |
|
|
lbclen -= last; |
721 |
|
|
offset -= last; |
722 |
|
|
sp->newl_cnt = 0; |
723 |
|
|
} |
724 |
|
|
|
725 |
|
|
/* Store and retrieve the line. */ |
726 |
|
|
if (db_set(sp, lno, lb + last, lbclen)) |
727 |
|
|
goto err; |
728 |
|
|
if (db_get(sp, lno, DBG_FATAL, &s, &llen)) |
729 |
|
|
goto err; |
730 |
|
|
ADD_SPACE_RET(sp, bp, blen, llen) |
731 |
|
|
memcpy(bp, s, llen); |
732 |
|
|
s = bp; |
733 |
|
|
len = llen - offset; |
734 |
|
|
|
735 |
|
|
/* Restart the build. */ |
736 |
|
|
lbclen = 0; |
737 |
|
|
BUILD(sp, s, offset); |
738 |
|
|
|
739 |
|
|
/* |
740 |
|
|
* If we haven't already done the after-the-string |
741 |
|
|
* match, do one. Set REG_NOTEOL so the '$' pattern |
742 |
|
|
* only matches once. |
743 |
|
|
*/ |
744 |
|
|
if (!do_eol_match) |
745 |
|
|
goto endmatch; |
746 |
|
|
if (offset == len) { |
747 |
|
|
do_eol_match = 0; |
748 |
|
|
eflags |= REG_NOTEOL; |
749 |
|
|
} |
750 |
|
|
goto nextmatch; |
751 |
|
|
} |
752 |
|
|
|
753 |
|
|
/* |
754 |
|
|
* If it's a global: |
755 |
|
|
* |
756 |
|
|
* If at the end of the string, do a test for the after |
757 |
|
|
* the string match. Set REG_NOTEOL so the '$' pattern |
758 |
|
|
* only matches once. |
759 |
|
|
*/ |
760 |
|
|
if (sp->g_suffix && do_eol_match) { |
761 |
|
|
if (len == 0) { |
762 |
|
|
do_eol_match = 0; |
763 |
|
|
eflags |= REG_NOTEOL; |
764 |
|
|
} |
765 |
|
|
goto nextmatch; |
766 |
|
|
} |
767 |
|
|
|
768 |
|
|
endmatch: if (!linechanged) |
769 |
|
|
continue; |
770 |
|
|
|
771 |
|
|
/* Copy any remaining bytes into the build buffer. */ |
772 |
|
|
if (len) |
773 |
|
|
BUILD(sp, s + offset, len) |
774 |
|
|
|
775 |
|
|
/* Store inserted lines, adjusting the build buffer. */ |
776 |
|
|
last = 0; |
777 |
|
|
if (sp->newl_cnt) { |
778 |
|
|
for (cnt = 0; |
779 |
|
|
cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { |
780 |
|
|
if (db_insert(sp, |
781 |
|
|
lno, lb + last, sp->newl[cnt] - last)) |
782 |
|
|
goto err; |
783 |
|
|
last = sp->newl[cnt] + 1; |
784 |
|
|
++sp->rptlines[L_ADDED]; |
785 |
|
|
} |
786 |
|
|
lbclen -= last; |
787 |
|
|
sp->newl_cnt = 0; |
788 |
|
|
} |
789 |
|
|
|
790 |
|
|
/* Store the changed line. */ |
791 |
|
|
if (db_set(sp, lno, lb + last, lbclen)) |
792 |
|
|
goto err; |
793 |
|
|
|
794 |
|
|
/* Update changed line counter. */ |
795 |
|
|
if (sp->rptlchange != lno) { |
796 |
|
|
sp->rptlchange = lno; |
797 |
|
|
++sp->rptlines[L_CHANGED]; |
798 |
|
|
} |
799 |
|
|
|
800 |
|
|
/* |
801 |
|
|
* !!! |
802 |
|
|
* Display as necessary. Historic practice is to only |
803 |
|
|
* display the last line of a line split into multiple |
804 |
|
|
* lines. |
805 |
|
|
*/ |
806 |
|
|
if (lflag || nflag || pflag) { |
807 |
|
|
from.lno = to.lno = lno; |
808 |
|
|
from.cno = to.cno = 0; |
809 |
|
|
if (lflag) |
810 |
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_LIST); |
811 |
|
|
if (nflag) |
812 |
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_HASH); |
813 |
|
|
if (pflag) |
814 |
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); |
815 |
|
|
} |
816 |
|
|
} |
817 |
|
|
|
818 |
|
|
/* |
819 |
|
|
* !!! |
820 |
|
|
* Historically, vi attempted to leave the cursor at the same place if |
821 |
|
|
* the substitution was done at the current cursor position. Otherwise |
822 |
|
|
* it moved it to the first non-blank of the last line changed. There |
823 |
|
|
* were some problems: for example, :s/$/foo/ with the cursor on the |
824 |
|
|
* last character of the line left the cursor on the last character, or |
825 |
|
|
* the & command with multiple occurrences of the matching string in the |
826 |
|
|
* line usually left the cursor in a fairly random position. |
827 |
|
|
* |
828 |
|
|
* We try to do the same thing, with the exception that if the user is |
829 |
|
|
* doing substitution with confirmation, we move to the last line about |
830 |
|
|
* which the user was consulted, as opposed to the last line that they |
831 |
|
|
* actually changed. This prevents a screen flash if the user doesn't |
832 |
|
|
* change many of the possible lines. |
833 |
|
|
*/ |
834 |
|
|
if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { |
835 |
|
|
sp->cno = 0; |
836 |
|
|
(void)nonblank(sp, sp->lno, &sp->cno); |
837 |
|
|
} |
838 |
|
|
|
839 |
|
|
/* |
840 |
|
|
* If not in a global command, and nothing matched, say so. |
841 |
|
|
* Else, if none of the lines displayed, put something up. |
842 |
|
|
*/ |
843 |
|
|
rval = 0; |
844 |
|
|
if (!matched) { |
845 |
|
|
if (!F_ISSET(sp, SC_EX_GLOBAL)) { |
846 |
|
|
msgq(sp, M_ERR, "No match found"); |
847 |
|
|
goto err; |
848 |
|
|
} |
849 |
|
|
} else if (!lflag && !nflag && !pflag) |
850 |
|
|
F_SET(cmdp, E_AUTOPRINT); |
851 |
|
|
|
852 |
|
|
if (0) { |
853 |
|
|
err: rval = 1; |
854 |
|
|
} |
855 |
|
|
|
856 |
|
|
if (bp != NULL) |
857 |
|
|
FREE_SPACE(sp, bp, blen); |
858 |
|
|
free(lb); |
859 |
|
|
return (rval); |
860 |
|
|
} |
861 |
|
|
|
862 |
|
|
/* |
863 |
|
|
* re_compile -- |
864 |
|
|
* Compile the RE. |
865 |
|
|
* |
866 |
|
|
* PUBLIC: int re_compile(SCR *, |
867 |
|
|
* PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int); |
868 |
|
|
*/ |
869 |
|
|
int |
870 |
|
|
re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp, |
871 |
|
|
regex_t *rep, u_int flags) |
872 |
|
|
{ |
873 |
|
|
size_t len; |
874 |
|
|
int reflags, replaced, rval; |
875 |
|
|
char *p; |
876 |
|
|
|
877 |
|
|
/* Set RE flags. */ |
878 |
|
|
reflags = 0; |
879 |
|
|
if (!LF_ISSET(RE_C_TAG)) { |
880 |
|
|
if (O_ISSET(sp, O_EXTENDED)) |
881 |
|
|
reflags |= REG_EXTENDED; |
882 |
|
|
if (O_ISSET(sp, O_IGNORECASE)) |
883 |
|
|
reflags |= REG_ICASE; |
884 |
|
|
if (O_ISSET(sp, O_ICLOWER)) { |
885 |
|
|
for (p = ptrn, len = plen; len > 0; ++p, --len) |
886 |
|
|
if (isupper(*p)) |
887 |
|
|
break; |
888 |
|
|
if (len == 0) |
889 |
|
|
reflags |= REG_ICASE; |
890 |
|
|
} |
891 |
|
|
} |
892 |
|
|
|
893 |
|
|
/* If we're replacing a saved value, clear the old one. */ |
894 |
|
|
if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { |
895 |
|
|
regfree(&sp->re_c); |
896 |
|
|
F_CLR(sp, SC_RE_SEARCH); |
897 |
|
|
} |
898 |
|
|
if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { |
899 |
|
|
regfree(&sp->subre_c); |
900 |
|
|
F_CLR(sp, SC_RE_SUBST); |
901 |
|
|
} |
902 |
|
|
|
903 |
|
|
/* |
904 |
|
|
* If we're saving the string, it's a pattern we haven't seen before, |
905 |
|
|
* so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for |
906 |
|
|
* later recompilation. Free any previously saved value. |
907 |
|
|
*/ |
908 |
|
|
if (ptrnp != NULL) { |
909 |
|
|
if (LF_ISSET(RE_C_TAG)) { |
910 |
|
|
if (re_tag_conv(sp, &ptrn, &plen, &replaced)) |
911 |
|
|
return (1); |
912 |
|
|
} else |
913 |
|
|
if (re_conv(sp, &ptrn, &plen, &replaced)) |
914 |
|
|
return (1); |
915 |
|
|
|
916 |
|
|
/* Discard previous pattern. */ |
917 |
|
|
free(*ptrnp); |
918 |
|
|
*ptrnp = NULL; |
919 |
|
|
if (lenp != NULL) |
920 |
|
|
*lenp = plen; |
921 |
|
|
|
922 |
|
|
/* |
923 |
|
|
* Copy the string into allocated memory. |
924 |
|
|
* |
925 |
|
|
* XXX |
926 |
|
|
* Regcomp isn't 8-bit clean, so the pattern is nul-terminated |
927 |
|
|
* for now. There's just no other solution. |
928 |
|
|
*/ |
929 |
|
|
MALLOC(sp, *ptrnp, plen + 1); |
930 |
|
|
if (*ptrnp != NULL) { |
931 |
|
|
memcpy(*ptrnp, ptrn, plen); |
932 |
|
|
(*ptrnp)[plen] = '\0'; |
933 |
|
|
} |
934 |
|
|
|
935 |
|
|
/* Free up conversion-routine-allocated memory. */ |
936 |
|
|
if (replaced) |
937 |
|
|
FREE_SPACE(sp, ptrn, 0); |
938 |
|
|
|
939 |
|
|
if (*ptrnp == NULL) |
940 |
|
|
return (1); |
941 |
|
|
|
942 |
|
|
ptrn = *ptrnp; |
943 |
|
|
} |
944 |
|
|
|
945 |
|
|
/* |
946 |
|
|
* XXX |
947 |
|
|
* Regcomp isn't 8-bit clean, so we just lost if the pattern |
948 |
|
|
* contained a nul. Bummer! |
949 |
|
|
*/ |
950 |
|
|
if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { |
951 |
|
|
if (!LF_ISSET(RE_C_SILENT)) |
952 |
|
|
re_error(sp, rval, rep); |
953 |
|
|
return (1); |
954 |
|
|
} |
955 |
|
|
|
956 |
|
|
if (LF_ISSET(RE_C_SEARCH)) |
957 |
|
|
F_SET(sp, SC_RE_SEARCH); |
958 |
|
|
if (LF_ISSET(RE_C_SUBST)) |
959 |
|
|
F_SET(sp, SC_RE_SUBST); |
960 |
|
|
|
961 |
|
|
return (0); |
962 |
|
|
} |
963 |
|
|
|
964 |
|
|
/* |
965 |
|
|
* re_conv -- |
966 |
|
|
* Convert vi's regular expressions into something that the |
967 |
|
|
* the POSIX 1003.2 RE functions can handle. |
968 |
|
|
* |
969 |
|
|
* There are two conversions we make to make vi's RE's (specifically |
970 |
|
|
* the global, search, and substitute patterns) work with POSIX RE's. |
971 |
|
|
* We assume that \<ptrn\> does "word" searches, which is non-standard |
972 |
|
|
* but supported by most regexp libraries.. |
973 |
|
|
* |
974 |
|
|
* 1: If O_MAGIC is not set, strip backslashes from the magic character |
975 |
|
|
* set (.[*~) that have them, and add them to the ones that don't. |
976 |
|
|
* 2: If O_MAGIC is not set, the string "\~" is replaced with the text |
977 |
|
|
* from the last substitute command's replacement string. If O_MAGIC |
978 |
|
|
* is set, it's the string "~". |
979 |
|
|
* |
980 |
|
|
* !!!/XXX |
981 |
|
|
* This doesn't exactly match the historic behavior of vi because we do |
982 |
|
|
* the ~ substitution before calling the RE engine, so magic characters |
983 |
|
|
* in the replacement string will be expanded by the RE engine, and they |
984 |
|
|
* weren't historically. It's a bug. |
985 |
|
|
*/ |
986 |
|
|
static int |
987 |
|
|
re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) |
988 |
|
|
{ |
989 |
|
|
size_t blen, len, needlen; |
990 |
|
|
int magic; |
991 |
|
|
char *bp, *p, *t; |
992 |
|
|
|
993 |
|
|
/* |
994 |
|
|
* First pass through, we figure out how much space we'll need. |
995 |
|
|
* We do it in two passes, on the grounds that most of the time |
996 |
|
|
* the user is doing a search and won't have magic characters. |
997 |
|
|
* That way we can skip most of the memory allocation and copies. |
998 |
|
|
*/ |
999 |
|
|
magic = 0; |
1000 |
|
|
for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) |
1001 |
|
|
switch (*p) { |
1002 |
|
|
case '\\': |
1003 |
|
|
if (len > 1) { |
1004 |
|
|
--len; |
1005 |
|
|
switch (*++p) { |
1006 |
|
|
case '~': |
1007 |
|
|
if (!O_ISSET(sp, O_MAGIC)) { |
1008 |
|
|
magic = 1; |
1009 |
|
|
needlen += sp->repl_len; |
1010 |
|
|
} |
1011 |
|
|
break; |
1012 |
|
|
case '.': |
1013 |
|
|
case '[': |
1014 |
|
|
case '*': |
1015 |
|
|
if (!O_ISSET(sp, O_MAGIC)) { |
1016 |
|
|
magic = 1; |
1017 |
|
|
needlen += 1; |
1018 |
|
|
} |
1019 |
|
|
break; |
1020 |
|
|
default: |
1021 |
|
|
needlen += 2; |
1022 |
|
|
} |
1023 |
|
|
} else |
1024 |
|
|
needlen += 1; |
1025 |
|
|
break; |
1026 |
|
|
case '~': |
1027 |
|
|
if (O_ISSET(sp, O_MAGIC)) { |
1028 |
|
|
magic = 1; |
1029 |
|
|
needlen += sp->repl_len; |
1030 |
|
|
} |
1031 |
|
|
break; |
1032 |
|
|
case '.': |
1033 |
|
|
case '[': |
1034 |
|
|
case '*': |
1035 |
|
|
if (!O_ISSET(sp, O_MAGIC)) { |
1036 |
|
|
magic = 1; |
1037 |
|
|
needlen += 2; |
1038 |
|
|
} |
1039 |
|
|
break; |
1040 |
|
|
default: |
1041 |
|
|
needlen += 1; |
1042 |
|
|
break; |
1043 |
|
|
} |
1044 |
|
|
|
1045 |
|
|
if (!magic) { |
1046 |
|
|
*replacedp = 0; |
1047 |
|
|
return (0); |
1048 |
|
|
} |
1049 |
|
|
|
1050 |
|
|
/* Get enough memory to hold the final pattern. */ |
1051 |
|
|
*replacedp = 1; |
1052 |
|
|
GET_SPACE_RET(sp, bp, blen, needlen); |
1053 |
|
|
|
1054 |
|
|
for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) |
1055 |
|
|
switch (*p) { |
1056 |
|
|
case '\\': |
1057 |
|
|
if (len > 1) { |
1058 |
|
|
--len; |
1059 |
|
|
switch (*++p) { |
1060 |
|
|
case '~': |
1061 |
|
|
if (O_ISSET(sp, O_MAGIC)) |
1062 |
|
|
*t++ = '~'; |
1063 |
|
|
else { |
1064 |
|
|
memcpy(t, |
1065 |
|
|
sp->repl, sp->repl_len); |
1066 |
|
|
t += sp->repl_len; |
1067 |
|
|
} |
1068 |
|
|
break; |
1069 |
|
|
case '.': |
1070 |
|
|
case '[': |
1071 |
|
|
case '*': |
1072 |
|
|
if (O_ISSET(sp, O_MAGIC)) |
1073 |
|
|
*t++ = '\\'; |
1074 |
|
|
*t++ = *p; |
1075 |
|
|
break; |
1076 |
|
|
default: |
1077 |
|
|
*t++ = '\\'; |
1078 |
|
|
*t++ = *p; |
1079 |
|
|
} |
1080 |
|
|
} else |
1081 |
|
|
*t++ = '\\'; |
1082 |
|
|
break; |
1083 |
|
|
case '~': |
1084 |
|
|
if (O_ISSET(sp, O_MAGIC)) { |
1085 |
|
|
memcpy(t, sp->repl, sp->repl_len); |
1086 |
|
|
t += sp->repl_len; |
1087 |
|
|
} else |
1088 |
|
|
*t++ = '~'; |
1089 |
|
|
break; |
1090 |
|
|
case '.': |
1091 |
|
|
case '[': |
1092 |
|
|
case '*': |
1093 |
|
|
if (!O_ISSET(sp, O_MAGIC)) |
1094 |
|
|
*t++ = '\\'; |
1095 |
|
|
*t++ = *p; |
1096 |
|
|
break; |
1097 |
|
|
default: |
1098 |
|
|
*t++ = *p; |
1099 |
|
|
break; |
1100 |
|
|
} |
1101 |
|
|
|
1102 |
|
|
*ptrnp = bp; |
1103 |
|
|
*plenp = t - bp; |
1104 |
|
|
return (0); |
1105 |
|
|
} |
1106 |
|
|
|
1107 |
|
|
/* |
1108 |
|
|
* re_tag_conv -- |
1109 |
|
|
* Convert a tags search path into something that the POSIX |
1110 |
|
|
* 1003.2 RE functions can handle. |
1111 |
|
|
*/ |
1112 |
|
|
static int |
1113 |
|
|
re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) |
1114 |
|
|
{ |
1115 |
|
|
size_t blen, len; |
1116 |
|
|
int lastdollar; |
1117 |
|
|
char *bp, *p, *t; |
1118 |
|
|
|
1119 |
|
|
len = *plenp; |
1120 |
|
|
|
1121 |
|
|
/* Max memory usage is 2 times the length of the string. */ |
1122 |
|
|
*replacedp = 1; |
1123 |
|
|
GET_SPACE_RET(sp, bp, blen, len * 2); |
1124 |
|
|
|
1125 |
|
|
p = *ptrnp; |
1126 |
|
|
t = bp; |
1127 |
|
|
|
1128 |
|
|
/* If the last character is a '/' or '?', we just strip it. */ |
1129 |
|
|
if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) |
1130 |
|
|
--len; |
1131 |
|
|
|
1132 |
|
|
/* If the next-to-last or last character is a '$', it's magic. */ |
1133 |
|
|
if (len > 0 && p[len - 1] == '$') { |
1134 |
|
|
--len; |
1135 |
|
|
lastdollar = 1; |
1136 |
|
|
} else |
1137 |
|
|
lastdollar = 0; |
1138 |
|
|
|
1139 |
|
|
/* If the first character is a '/' or '?', we just strip it. */ |
1140 |
|
|
if (len > 0 && (p[0] == '/' || p[0] == '?')) { |
1141 |
|
|
++p; |
1142 |
|
|
--len; |
1143 |
|
|
} |
1144 |
|
|
|
1145 |
|
|
/* If the first or second character is a '^', it's magic. */ |
1146 |
|
|
if (p[0] == '^') { |
1147 |
|
|
*t++ = *p++; |
1148 |
|
|
--len; |
1149 |
|
|
} |
1150 |
|
|
|
1151 |
|
|
/* |
1152 |
|
|
* Escape every other magic character we can find, meanwhile stripping |
1153 |
|
|
* the backslashes ctags inserts when escaping the search delimiter |
1154 |
|
|
* characters. |
1155 |
|
|
*/ |
1156 |
|
|
for (; len > 0; --len) { |
1157 |
|
|
if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { |
1158 |
|
|
++p; |
1159 |
|
|
--len; |
1160 |
|
|
} else if (strchr("^.[]$*", p[0])) |
1161 |
|
|
*t++ = '\\'; |
1162 |
|
|
*t++ = *p++; |
1163 |
|
|
if (len == 0) |
1164 |
|
|
break; |
1165 |
|
|
} |
1166 |
|
|
if (lastdollar) |
1167 |
|
|
*t++ = '$'; |
1168 |
|
|
|
1169 |
|
|
*ptrnp = bp; |
1170 |
|
|
*plenp = t - bp; |
1171 |
|
|
return (0); |
1172 |
|
|
} |
1173 |
|
|
|
1174 |
|
|
/* |
1175 |
|
|
* re_error -- |
1176 |
|
|
* Report a regular expression error. |
1177 |
|
|
* |
1178 |
|
|
* PUBLIC: void re_error(SCR *, int, regex_t *); |
1179 |
|
|
*/ |
1180 |
|
|
void |
1181 |
|
|
re_error(SCR *sp, int errcode, regex_t *preg) |
1182 |
|
|
{ |
1183 |
|
|
size_t s; |
1184 |
|
|
char *oe; |
1185 |
|
|
|
1186 |
|
|
s = regerror(errcode, preg, "", 0); |
1187 |
|
|
if ((oe = malloc(s)) == NULL) |
1188 |
|
|
msgq(sp, M_SYSERR, NULL); |
1189 |
|
|
else { |
1190 |
|
|
(void)regerror(errcode, preg, oe, s); |
1191 |
|
|
msgq(sp, M_ERR, "RE error: %s", oe); |
1192 |
|
|
free(oe); |
1193 |
|
|
} |
1194 |
|
|
} |
1195 |
|
|
|
1196 |
|
|
/* |
1197 |
|
|
* re_sub -- |
1198 |
|
|
* Do the substitution for a regular expression. |
1199 |
|
|
*/ |
1200 |
|
|
static int |
1201 |
|
|
re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp, |
1202 |
|
|
regmatch_t match[10]) |
1203 |
|
|
{ |
1204 |
|
|
enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; |
1205 |
|
|
size_t lbclen, lblen; /* Local copies. */ |
1206 |
|
|
size_t mlen; /* Match length. */ |
1207 |
|
|
size_t rpl; /* Remaining replacement length. */ |
1208 |
|
|
char *rp; /* Replacement pointer. */ |
1209 |
|
|
int ch; |
1210 |
|
|
int no; /* Match replacement offset. */ |
1211 |
|
|
char *p, *t; /* Buffer pointers. */ |
1212 |
|
|
char *lb; /* Local copies. */ |
1213 |
|
|
|
1214 |
|
|
lb = *lbp; /* Get local copies. */ |
1215 |
|
|
lbclen = *lbclenp; |
1216 |
|
|
lblen = *lblenp; |
1217 |
|
|
|
1218 |
|
|
/* |
1219 |
|
|
* QUOTING NOTE: |
1220 |
|
|
* |
1221 |
|
|
* There are some special sequences that vi provides in the |
1222 |
|
|
* replacement patterns. |
1223 |
|
|
* & string the RE matched (\& if nomagic set) |
1224 |
|
|
* \# n-th regular subexpression |
1225 |
|
|
* \E end \U, \L conversion |
1226 |
|
|
* \e end \U, \L conversion |
1227 |
|
|
* \l convert the next character to lower-case |
1228 |
|
|
* \L convert to lower-case, until \E, \e, or end of replacement |
1229 |
|
|
* \u convert the next character to upper-case |
1230 |
|
|
* \U convert to upper-case, until \E, \e, or end of replacement |
1231 |
|
|
* |
1232 |
|
|
* Otherwise, since this is the lowest level of replacement, discard |
1233 |
|
|
* all escaping characters. This (hopefully) matches historic practice. |
1234 |
|
|
*/ |
1235 |
|
|
#define OUTCH(ch, nltrans) { \ |
1236 |
|
|
CHAR_T __ch = (ch); \ |
1237 |
|
|
u_int __value = KEY_VAL(sp, __ch); \ |
1238 |
|
|
if ((nltrans) && (__value == K_CR || __value == K_NL)) { \ |
1239 |
|
|
NEEDNEWLINE(sp); \ |
1240 |
|
|
sp->newl[sp->newl_cnt++] = lbclen; \ |
1241 |
|
|
} else if (conv != C_NOTSET) { \ |
1242 |
|
|
switch (conv) { \ |
1243 |
|
|
case C_ONELOWER: \ |
1244 |
|
|
conv = C_NOTSET; \ |
1245 |
|
|
/* FALLTHROUGH */ \ |
1246 |
|
|
case C_LOWER: \ |
1247 |
|
|
if (isupper(__ch)) \ |
1248 |
|
|
__ch = tolower(__ch); \ |
1249 |
|
|
break; \ |
1250 |
|
|
case C_ONEUPPER: \ |
1251 |
|
|
conv = C_NOTSET; \ |
1252 |
|
|
/* FALLTHROUGH */ \ |
1253 |
|
|
case C_UPPER: \ |
1254 |
|
|
if (islower(__ch)) \ |
1255 |
|
|
__ch = toupper(__ch); \ |
1256 |
|
|
break; \ |
1257 |
|
|
default: \ |
1258 |
|
|
abort(); \ |
1259 |
|
|
} \ |
1260 |
|
|
} \ |
1261 |
|
|
NEEDSP(sp, 1, p); \ |
1262 |
|
|
*p++ = __ch; \ |
1263 |
|
|
++lbclen; \ |
1264 |
|
|
} |
1265 |
|
|
conv = C_NOTSET; |
1266 |
|
|
for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { |
1267 |
|
|
switch (ch = *rp++) { |
1268 |
|
|
case '&': |
1269 |
|
|
if (O_ISSET(sp, O_MAGIC)) { |
1270 |
|
|
no = 0; |
1271 |
|
|
goto subzero; |
1272 |
|
|
} |
1273 |
|
|
break; |
1274 |
|
|
case '\\': |
1275 |
|
|
if (rpl == 0) |
1276 |
|
|
break; |
1277 |
|
|
--rpl; |
1278 |
|
|
switch (ch = *rp) { |
1279 |
|
|
case '&': |
1280 |
|
|
++rp; |
1281 |
|
|
if (!O_ISSET(sp, O_MAGIC)) { |
1282 |
|
|
no = 0; |
1283 |
|
|
goto subzero; |
1284 |
|
|
} |
1285 |
|
|
break; |
1286 |
|
|
case '0': case '1': case '2': case '3': case '4': |
1287 |
|
|
case '5': case '6': case '7': case '8': case '9': |
1288 |
|
|
no = *rp++ - '0'; |
1289 |
|
|
subzero: if (match[no].rm_so == -1 || |
1290 |
|
|
match[no].rm_eo == -1) |
1291 |
|
|
break; |
1292 |
|
|
mlen = match[no].rm_eo - match[no].rm_so; |
1293 |
|
|
for (t = ip + match[no].rm_so; mlen--; ++t) |
1294 |
|
|
OUTCH(*t, 0); |
1295 |
|
|
continue; |
1296 |
|
|
case 'e': |
1297 |
|
|
case 'E': |
1298 |
|
|
++rp; |
1299 |
|
|
conv = C_NOTSET; |
1300 |
|
|
continue; |
1301 |
|
|
case 'l': |
1302 |
|
|
++rp; |
1303 |
|
|
conv = C_ONELOWER; |
1304 |
|
|
continue; |
1305 |
|
|
case 'L': |
1306 |
|
|
++rp; |
1307 |
|
|
conv = C_LOWER; |
1308 |
|
|
continue; |
1309 |
|
|
case 'u': |
1310 |
|
|
++rp; |
1311 |
|
|
conv = C_ONEUPPER; |
1312 |
|
|
continue; |
1313 |
|
|
case 'U': |
1314 |
|
|
++rp; |
1315 |
|
|
conv = C_UPPER; |
1316 |
|
|
continue; |
1317 |
|
|
default: |
1318 |
|
|
++rp; |
1319 |
|
|
break; |
1320 |
|
|
} |
1321 |
|
|
} |
1322 |
|
|
OUTCH(ch, 1); |
1323 |
|
|
} |
1324 |
|
|
|
1325 |
|
|
*lbp = lb; /* Update caller's information. */ |
1326 |
|
|
*lbclenp = lbclen; |
1327 |
|
|
*lblenp = lblen; |
1328 |
|
|
return (0); |
1329 |
|
|
} |