1 |
|
|
/* $OpenBSD: sub.c,v 1.18 2016/10/11 06:54:05 martijn Exp $ */ |
2 |
|
|
/* $NetBSD: sub.c,v 1.4 1995/03/21 09:04:50 cgd Exp $ */ |
3 |
|
|
|
4 |
|
|
/* sub.c: This file contains the substitution routines for the ed |
5 |
|
|
line editor */ |
6 |
|
|
/*- |
7 |
|
|
* Copyright (c) 1993 Andrew Moore, Talke Studio. |
8 |
|
|
* All rights reserved. |
9 |
|
|
* |
10 |
|
|
* Redistribution and use in source and binary forms, with or without |
11 |
|
|
* modification, are permitted provided that the following conditions |
12 |
|
|
* are met: |
13 |
|
|
* 1. Redistributions of source code must retain the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer. |
15 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
16 |
|
|
* notice, this list of conditions and the following disclaimer in the |
17 |
|
|
* documentation and/or other materials provided with the distribution. |
18 |
|
|
* |
19 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
23 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
|
* SUCH DAMAGE. |
30 |
|
|
*/ |
31 |
|
|
|
32 |
|
|
#include <limits.h> |
33 |
|
|
#include <regex.h> |
34 |
|
|
#include <signal.h> |
35 |
|
|
#include <stdio.h> |
36 |
|
|
#include <stdlib.h> |
37 |
|
|
#include <string.h> |
38 |
|
|
|
39 |
|
|
#include "ed.h" |
40 |
|
|
|
41 |
|
|
static char *extract_subst_template(void); |
42 |
|
|
static int substitute_matching_text(regex_t *, line_t *, int, int); |
43 |
|
|
static int apply_subst_template(char *, regmatch_t *, int, int); |
44 |
|
|
|
45 |
|
|
static char *rhbuf; /* rhs substitution buffer */ |
46 |
|
|
static int rhbufsz; /* rhs substitution buffer size */ |
47 |
|
|
static int rhbufi; /* rhs substitution buffer index */ |
48 |
|
|
|
49 |
|
|
/* extract_subst_tail: extract substitution tail from the command buffer */ |
50 |
|
|
int |
51 |
|
|
extract_subst_tail(int *flagp, int *np) |
52 |
|
|
{ |
53 |
|
|
char delimiter; |
54 |
|
|
|
55 |
|
|
*flagp = *np = 0; |
56 |
|
|
if ((delimiter = *ibufp) == '\n') { |
57 |
|
|
rhbufi = 0; |
58 |
|
|
*flagp = GPR; |
59 |
|
|
return 0; |
60 |
|
|
} else if (extract_subst_template() == NULL) |
61 |
|
|
return ERR; |
62 |
|
|
else if (*ibufp == '\n') { |
63 |
|
|
*flagp = GPR; |
64 |
|
|
return 0; |
65 |
|
|
} else if (*ibufp == delimiter) |
66 |
|
|
ibufp++; |
67 |
|
|
if ('1' <= *ibufp && *ibufp <= '9') { |
68 |
|
|
STRTOI(*np, ibufp); |
69 |
|
|
return 0; |
70 |
|
|
} else if (*ibufp == 'g') { |
71 |
|
|
ibufp++; |
72 |
|
|
*flagp = GSG; |
73 |
|
|
return 0; |
74 |
|
|
} |
75 |
|
|
return 0; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
|
79 |
|
|
/* extract_subst_template: return pointer to copy of substitution template |
80 |
|
|
in the command buffer */ |
81 |
|
|
static char * |
82 |
|
|
extract_subst_template(void) |
83 |
|
|
{ |
84 |
|
|
int n = 0; |
85 |
|
|
int i = 0; |
86 |
|
|
char c; |
87 |
|
|
char delimiter = *ibufp++; |
88 |
|
|
|
89 |
|
|
if (*ibufp == '%' && *(ibufp + 1) == delimiter) { |
90 |
|
|
ibufp++; |
91 |
|
|
if (!rhbuf) |
92 |
|
|
seterrmsg("no previous substitution"); |
93 |
|
|
return rhbuf; |
94 |
|
|
} |
95 |
|
|
while (*ibufp != delimiter) { |
96 |
|
|
REALLOC(rhbuf, rhbufsz, i + 2, NULL); |
97 |
|
|
if ((c = rhbuf[i++] = *ibufp++) == '\n' && *ibufp == '\0') { |
98 |
|
|
i--, ibufp--; |
99 |
|
|
break; |
100 |
|
|
} else if (c != '\\') |
101 |
|
|
; |
102 |
|
|
else if ((rhbuf[i++] = *ibufp++) != '\n') |
103 |
|
|
; |
104 |
|
|
else if (!isglobal) { |
105 |
|
|
while ((n = get_tty_line()) == 0 || |
106 |
|
|
(n > 0 && ibuf[n - 1] != '\n')) |
107 |
|
|
clearerr(stdin); |
108 |
|
|
if (n < 0) |
109 |
|
|
return NULL; |
110 |
|
|
} |
111 |
|
|
} |
112 |
|
|
REALLOC(rhbuf, rhbufsz, i + 1, NULL); |
113 |
|
|
rhbuf[rhbufi = i] = '\0'; |
114 |
|
|
return rhbuf; |
115 |
|
|
} |
116 |
|
|
|
117 |
|
|
|
118 |
|
|
static char *rbuf; /* substitute_matching_text buffer */ |
119 |
|
|
static int rbufsz; /* substitute_matching_text buffer size */ |
120 |
|
|
|
121 |
|
|
/* search_and_replace: for each line in a range, change text matching a pattern |
122 |
|
|
according to a substitution template; return status */ |
123 |
|
|
int |
124 |
|
|
search_and_replace(regex_t *pat, int gflag, int kth) |
125 |
|
|
{ |
126 |
|
|
undo_t *up; |
127 |
|
|
char *txt; |
128 |
|
|
char *eot; |
129 |
|
|
int lc; |
130 |
|
|
int xa = current_addr; |
131 |
|
|
int nsubs = 0; |
132 |
|
|
line_t *lp; |
133 |
|
|
int len; |
134 |
|
|
|
135 |
|
|
current_addr = first_addr - 1; |
136 |
|
|
for (lc = 0; lc <= second_addr - first_addr; lc++) { |
137 |
|
|
lp = get_addressed_line_node(++current_addr); |
138 |
|
|
if ((len = substitute_matching_text(pat, lp, gflag, kth)) < 0) |
139 |
|
|
return ERR; |
140 |
|
|
else if (len) { |
141 |
|
|
up = NULL; |
142 |
|
|
if (delete_lines(current_addr, current_addr) < 0) |
143 |
|
|
return ERR; |
144 |
|
|
txt = rbuf; |
145 |
|
|
eot = rbuf + len; |
146 |
|
|
SPL1(); |
147 |
|
|
do { |
148 |
|
|
if ((txt = put_sbuf_line(txt)) == NULL) { |
149 |
|
|
SPL0(); |
150 |
|
|
return ERR; |
151 |
|
|
} else if (up) |
152 |
|
|
up->t = get_addressed_line_node(current_addr); |
153 |
|
|
else if ((up = push_undo_stack(UADD, |
154 |
|
|
current_addr, current_addr)) == NULL) { |
155 |
|
|
SPL0(); |
156 |
|
|
return ERR; |
157 |
|
|
} |
158 |
|
|
} while (txt != eot); |
159 |
|
|
SPL0(); |
160 |
|
|
nsubs++; |
161 |
|
|
xa = current_addr; |
162 |
|
|
} |
163 |
|
|
} |
164 |
|
|
current_addr = xa; |
165 |
|
|
if (nsubs == 0 && !(gflag & GLB)) { |
166 |
|
|
seterrmsg("no match"); |
167 |
|
|
return ERR; |
168 |
|
|
} else if ((gflag & (GPR | GLS | GNP)) && |
169 |
|
|
display_lines(current_addr, current_addr, gflag) < 0) |
170 |
|
|
return ERR; |
171 |
|
|
return 0; |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
|
175 |
|
|
/* substitute_matching_text: replace text matched by a pattern according to |
176 |
|
|
a substitution template; return length of rbuf if changed, 0 if unchanged, or |
177 |
|
|
ERR on error */ |
178 |
|
|
static int |
179 |
|
|
substitute_matching_text(regex_t *pat, line_t *lp, int gflag, int kth) |
180 |
|
|
{ |
181 |
|
|
int off = 0; |
182 |
|
|
int changed = 0; |
183 |
|
|
int matchno = 0; |
184 |
|
|
int i = 0; |
185 |
|
|
int nempty = -1; |
186 |
|
|
regmatch_t rm[SE_MAX]; |
187 |
|
|
char *txt; |
188 |
|
|
char *eot, *eom; |
189 |
|
|
|
190 |
|
|
if ((eom = txt = get_sbuf_line(lp)) == NULL) |
191 |
|
|
return ERR; |
192 |
|
|
if (isbinary) |
193 |
|
|
NUL_TO_NEWLINE(txt, lp->len); |
194 |
|
|
eot = txt + lp->len; |
195 |
|
|
if (!regexec(pat, txt, SE_MAX, rm, 0)) { |
196 |
|
|
do { |
197 |
|
|
/* Don't do a 0-length match directly after a non-0-length */ |
198 |
|
|
if (rm[0].rm_eo == nempty) { |
199 |
|
|
rm[0].rm_so++; |
200 |
|
|
rm[0].rm_eo = lp->len; |
201 |
|
|
continue; |
202 |
|
|
} |
203 |
|
|
if (!kth || kth == ++matchno) { |
204 |
|
|
changed = 1; |
205 |
|
|
i = rm[0].rm_so - (eom - txt); |
206 |
|
|
REALLOC(rbuf, rbufsz, off + i, ERR); |
207 |
|
|
if (isbinary) |
208 |
|
|
NEWLINE_TO_NUL(eom, |
209 |
|
|
rm[0].rm_eo - (eom - txt)); |
210 |
|
|
memcpy(rbuf + off, eom, i); |
211 |
|
|
off += i; |
212 |
|
|
if ((off = apply_subst_template(txt, rm, off, |
213 |
|
|
pat->re_nsub)) < 0) |
214 |
|
|
return ERR; |
215 |
|
|
eom = txt + rm[0].rm_eo; |
216 |
|
|
if (kth) |
217 |
|
|
break; |
218 |
|
|
} |
219 |
|
|
if (rm[0].rm_so == rm[0].rm_eo) |
220 |
|
|
rm[0].rm_so = rm[0].rm_eo + 1; |
221 |
|
|
else |
222 |
|
|
nempty = rm[0].rm_so = rm[0].rm_eo; |
223 |
|
|
rm[0].rm_eo = lp->len; |
224 |
|
|
} while (rm[0].rm_so < lp->len && (gflag & GSG || kth) && |
225 |
|
|
!regexec(pat, txt, SE_MAX, rm, REG_STARTEND | REG_NOTBOL)); |
226 |
|
|
i = eot - eom; |
227 |
|
|
REALLOC(rbuf, rbufsz, off + i + 2, ERR); |
228 |
|
|
if (isbinary) |
229 |
|
|
NEWLINE_TO_NUL(eom, i); |
230 |
|
|
memcpy(rbuf + off, eom, i); |
231 |
|
|
memcpy(rbuf + off + i, "\n", 2); |
232 |
|
|
} |
233 |
|
|
return changed ? off + i + 1 : 0; |
234 |
|
|
} |
235 |
|
|
|
236 |
|
|
|
237 |
|
|
/* apply_subst_template: modify text according to a substitution template; |
238 |
|
|
return offset to end of modified text */ |
239 |
|
|
static int |
240 |
|
|
apply_subst_template(char *boln, regmatch_t *rm, int off, int re_nsub) |
241 |
|
|
{ |
242 |
|
|
int j = 0; |
243 |
|
|
int k = 0; |
244 |
|
|
int n; |
245 |
|
|
char *sub = rhbuf; |
246 |
|
|
|
247 |
|
|
for (; sub - rhbuf < rhbufi; sub++) |
248 |
|
|
if (*sub == '&') { |
249 |
|
|
j = rm[0].rm_so; |
250 |
|
|
k = rm[0].rm_eo; |
251 |
|
|
REALLOC(rbuf, rbufsz, off + k - j, ERR); |
252 |
|
|
while (j < k) |
253 |
|
|
rbuf[off++] = boln[j++]; |
254 |
|
|
} else if (*sub == '\\' && '1' <= *++sub && *sub <= '9' && |
255 |
|
|
(n = *sub - '0') <= re_nsub) { |
256 |
|
|
j = rm[n].rm_so; |
257 |
|
|
k = rm[n].rm_eo; |
258 |
|
|
REALLOC(rbuf, rbufsz, off + k - j, ERR); |
259 |
|
|
while (j < k) |
260 |
|
|
rbuf[off++] = boln[j++]; |
261 |
|
|
} else { |
262 |
|
|
REALLOC(rbuf, rbufsz, off + 1, ERR); |
263 |
|
|
rbuf[off++] = *sub; |
264 |
|
|
} |
265 |
|
|
REALLOC(rbuf, rbufsz, off + 1, ERR); |
266 |
|
|
rbuf[off] = '\0'; |
267 |
|
|
return off; |
268 |
|
|
} |