1 |
|
|
/* $OpenBSD: str.c,v 1.12 2012/12/05 23:20:26 deraadt Exp $ */ |
2 |
|
|
/* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */ |
3 |
|
|
|
4 |
|
|
/*- |
5 |
|
|
* Copyright (c) 1991, 1993 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
|
* modification, are permitted provided that the following conditions |
10 |
|
|
* are met: |
11 |
|
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
|
* documentation and/or other materials provided with the distribution. |
16 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
17 |
|
|
* may be used to endorse or promote products derived from this software |
18 |
|
|
* without specific prior written permission. |
19 |
|
|
* |
20 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
21 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
22 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
23 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
24 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
25 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
26 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
27 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
28 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
29 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
30 |
|
|
* SUCH DAMAGE. |
31 |
|
|
*/ |
32 |
|
|
|
33 |
|
|
#include <sys/types.h> |
34 |
|
|
|
35 |
|
|
#include <errno.h> |
36 |
|
|
#include <stddef.h> |
37 |
|
|
#include <stdio.h> |
38 |
|
|
#include <stdlib.h> |
39 |
|
|
#include <string.h> |
40 |
|
|
#include <ctype.h> |
41 |
|
|
#include <err.h> |
42 |
|
|
|
43 |
|
|
#include "extern.h" |
44 |
|
|
|
45 |
|
|
static int backslash(STR *); |
46 |
|
|
static int bracket(STR *); |
47 |
|
|
static int c_class(const void *, const void *); |
48 |
|
|
static void genclass(STR *); |
49 |
|
|
static void genequiv(STR *); |
50 |
|
|
static int genrange(STR *); |
51 |
|
|
static void genseq(STR *); |
52 |
|
|
|
53 |
|
|
int |
54 |
|
|
next(s) |
55 |
|
|
STR *s; |
56 |
|
|
{ |
57 |
|
|
int ch; |
58 |
|
|
|
59 |
✗✗✓✓ ✗✓✗ |
1876 |
switch (s->state) { |
60 |
|
|
case EOS: |
61 |
|
|
return (0); |
62 |
|
|
case INFINITE: |
63 |
|
|
return (1); |
64 |
|
|
case NORMAL: |
65 |
✓✓✓✓
|
622 |
switch (ch = *s->str) { |
66 |
|
|
case '\0': |
67 |
|
284 |
s->state = EOS; |
68 |
|
284 |
return (0); |
69 |
|
|
case '\\': |
70 |
|
118 |
s->lastch = backslash(s); |
71 |
|
118 |
break; |
72 |
|
|
case '[': |
73 |
✓✓ |
12 |
if (bracket(s)) |
74 |
|
4 |
return (next(s)); |
75 |
|
|
/* FALLTHROUGH */ |
76 |
|
|
default: |
77 |
|
208 |
++s->str; |
78 |
|
|
s->lastch = ch; |
79 |
|
208 |
break; |
80 |
|
|
} |
81 |
|
|
|
82 |
|
|
/* We can start a range at any time. */ |
83 |
✓✓✓✗
|
334 |
if (s->str[0] == '-' && genrange(s)) |
84 |
|
8 |
return (next(s)); |
85 |
|
318 |
return (1); |
86 |
|
|
case RANGE: |
87 |
✓✓ |
216 |
if (s->cnt-- == 0) { |
88 |
|
8 |
s->state = NORMAL; |
89 |
|
8 |
return (next(s)); |
90 |
|
|
} |
91 |
|
208 |
++s->lastch; |
92 |
|
208 |
return (1); |
93 |
|
|
case SEQUENCE: |
94 |
|
|
if (s->cnt-- == 0) { |
95 |
|
|
s->state = NORMAL; |
96 |
|
|
return (next(s)); |
97 |
|
|
} |
98 |
|
|
return (1); |
99 |
|
|
case SET: |
100 |
✓✓ |
108 |
if ((s->lastch = s->set[s->cnt++]) == OOBCH) { |
101 |
|
4 |
s->state = NORMAL; |
102 |
|
4 |
return (next(s)); |
103 |
|
|
} |
104 |
|
104 |
return (1); |
105 |
|
|
default: |
106 |
|
|
return 0; |
107 |
|
|
} |
108 |
|
|
/* NOTREACHED */ |
109 |
|
938 |
} |
110 |
|
|
|
111 |
|
|
static int |
112 |
|
|
bracket(s) |
113 |
|
|
STR *s; |
114 |
|
|
{ |
115 |
|
|
char *p; |
116 |
|
|
|
117 |
✓✗✓ |
24 |
switch (s->str[1]) { |
118 |
|
|
case ':': /* "[:class:]" */ |
119 |
✗✓ |
4 |
if ((p = strstr((char *)s->str + 2, ":]")) == NULL) |
120 |
|
|
return (0); |
121 |
|
4 |
*p = '\0'; |
122 |
|
4 |
s->str += 2; |
123 |
|
4 |
genclass(s); |
124 |
|
4 |
s->str = (unsigned char *)p + 2; |
125 |
|
4 |
return (1); |
126 |
|
|
case '=': /* "[=equiv=]" */ |
127 |
|
|
if ((p = strstr((char *)s->str + 2, "=]")) == NULL) |
128 |
|
|
return (0); |
129 |
|
|
s->str += 2; |
130 |
|
|
genequiv(s); |
131 |
|
|
return (1); |
132 |
|
|
default: /* "[\###*n]" or "[#*n]" */ |
133 |
✗✓ |
8 |
if ((p = strpbrk((char *)s->str + 2, "*]")) == NULL) |
134 |
|
|
return (0); |
135 |
✗✓✗✗
|
8 |
if (p[0] != '*' || strchr(p, ']') == NULL) |
136 |
|
8 |
return (0); |
137 |
|
|
s->str += 1; |
138 |
|
|
genseq(s); |
139 |
|
|
return (1); |
140 |
|
|
} |
141 |
|
|
/* NOTREACHED */ |
142 |
|
12 |
} |
143 |
|
|
|
144 |
|
|
typedef struct { |
145 |
|
|
char *name; |
146 |
|
|
int (*func)(int); |
147 |
|
|
int *set; |
148 |
|
|
} CLASS; |
149 |
|
|
|
150 |
|
|
static CLASS classes[] = { |
151 |
|
|
{ "alnum", isalnum, }, |
152 |
|
|
{ "alpha", isalpha, }, |
153 |
|
|
{ "blank", isblank, }, |
154 |
|
|
{ "cntrl", iscntrl, }, |
155 |
|
|
{ "digit", isdigit, }, |
156 |
|
|
{ "graph", isgraph, }, |
157 |
|
|
{ "lower", islower, }, |
158 |
|
|
{ "print", isprint, }, |
159 |
|
|
{ "punct", ispunct, }, |
160 |
|
|
{ "space", isspace, }, |
161 |
|
|
{ "upper", isupper, }, |
162 |
|
|
{ "xdigit", isxdigit, }, |
163 |
|
|
}; |
164 |
|
|
|
165 |
|
|
static void |
166 |
|
|
genclass(s) |
167 |
|
|
STR *s; |
168 |
|
|
{ |
169 |
|
|
int cnt, (*func)(int); |
170 |
|
8 |
CLASS *cp, tmp; |
171 |
|
|
int *p; |
172 |
|
|
|
173 |
|
4 |
tmp.name = (char *)s->str; |
174 |
✗✓ |
8 |
if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / |
175 |
|
4 |
sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) |
176 |
|
|
errx(1, "unknown class %s", s->str); |
177 |
|
|
|
178 |
✗✓ |
4 |
if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL) |
179 |
|
|
errx(1, "no memory for a class"); |
180 |
✓✓ |
2056 |
for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) |
181 |
✓✓ |
1024 |
if ((func)(cnt)) |
182 |
|
104 |
*p++ = cnt; |
183 |
|
4 |
*p = OOBCH; |
184 |
|
|
|
185 |
|
4 |
s->cnt = 0; |
186 |
|
4 |
s->state = SET; |
187 |
|
4 |
s->set = cp->set; |
188 |
|
4 |
} |
189 |
|
|
|
190 |
|
|
static int |
191 |
|
|
c_class(a, b) |
192 |
|
|
const void *a, *b; |
193 |
|
|
{ |
194 |
|
20 |
return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
/* |
198 |
|
|
* English doesn't have any equivalence classes, so for now |
199 |
|
|
* we just syntax check and grab the character. |
200 |
|
|
*/ |
201 |
|
|
static void |
202 |
|
|
genequiv(s) |
203 |
|
|
STR *s; |
204 |
|
|
{ |
205 |
|
|
if (*s->str == '\\') { |
206 |
|
|
s->equiv[0] = backslash(s); |
207 |
|
|
if (*s->str != '=') |
208 |
|
|
errx(1, "misplaced equivalence equals sign"); |
209 |
|
|
} else { |
210 |
|
|
s->equiv[0] = s->str[0]; |
211 |
|
|
if (s->str[1] != '=') |
212 |
|
|
errx(1, "misplaced equivalence equals sign"); |
213 |
|
|
} |
214 |
|
|
s->str += 2; |
215 |
|
|
s->cnt = 0; |
216 |
|
|
s->state = SET; |
217 |
|
|
s->set = s->equiv; |
218 |
|
|
} |
219 |
|
|
|
220 |
|
|
static int |
221 |
|
|
genrange(s) |
222 |
|
|
STR *s; |
223 |
|
|
{ |
224 |
|
|
int stopval; |
225 |
|
|
unsigned char *savestart; |
226 |
|
|
|
227 |
|
16 |
savestart = s->str; |
228 |
✗✓ |
24 |
stopval = *++s->str == '\\' ? backslash(s) : *s->str++; |
229 |
✗✓ |
8 |
if (stopval < (u_char)s->lastch) { |
230 |
|
|
s->str = savestart; |
231 |
|
|
return (0); |
232 |
|
|
} |
233 |
|
8 |
s->cnt = stopval - s->lastch + 1; |
234 |
|
8 |
s->state = RANGE; |
235 |
|
8 |
--s->lastch; |
236 |
|
8 |
return (1); |
237 |
|
8 |
} |
238 |
|
|
|
239 |
|
|
static void |
240 |
|
|
genseq(s) |
241 |
|
|
STR *s; |
242 |
|
|
{ |
243 |
|
|
char *ep; |
244 |
|
|
|
245 |
|
|
if (s->which == STRING1) |
246 |
|
|
errx(1, "sequences only valid in string2"); |
247 |
|
|
|
248 |
|
|
if (*s->str == '\\') |
249 |
|
|
s->lastch = backslash(s); |
250 |
|
|
else |
251 |
|
|
s->lastch = *s->str++; |
252 |
|
|
if (*s->str != '*') |
253 |
|
|
errx(1, "misplaced sequence asterisk"); |
254 |
|
|
|
255 |
|
|
switch (*++s->str) { |
256 |
|
|
case '\\': |
257 |
|
|
s->cnt = backslash(s); |
258 |
|
|
break; |
259 |
|
|
case ']': |
260 |
|
|
s->cnt = 0; |
261 |
|
|
++s->str; |
262 |
|
|
break; |
263 |
|
|
default: |
264 |
|
|
if (isdigit(*s->str)) { |
265 |
|
|
s->cnt = strtol((char *)s->str, &ep, 0); |
266 |
|
|
if (*ep == ']') { |
267 |
|
|
s->str = (unsigned char *)ep + 1; |
268 |
|
|
break; |
269 |
|
|
} |
270 |
|
|
} |
271 |
|
|
errx(1, "illegal sequence count"); |
272 |
|
|
/* NOTREACHED */ |
273 |
|
|
} |
274 |
|
|
|
275 |
|
|
s->state = s->cnt ? SEQUENCE : INFINITE; |
276 |
|
|
} |
277 |
|
|
|
278 |
|
|
/* |
279 |
|
|
* Translate \??? into a character. Up to 3 octal digits, if no digits either |
280 |
|
|
* an escape code or a literal character. |
281 |
|
|
*/ |
282 |
|
|
static int |
283 |
|
|
backslash(s) |
284 |
|
|
STR *s; |
285 |
|
|
{ |
286 |
|
|
int ch, cnt, val; |
287 |
|
|
|
288 |
|
236 |
for (cnt = val = 0;;) { |
289 |
|
118 |
ch = *++s->str; |
290 |
✓✗✗✓
|
236 |
if (!isascii(ch) || !isdigit(ch)) |
291 |
|
|
break; |
292 |
|
|
val = val * 8 + ch - '0'; |
293 |
|
|
if (++cnt == 3) { |
294 |
|
|
++s->str; |
295 |
|
|
break; |
296 |
|
|
} |
297 |
|
|
} |
298 |
✗✓ |
118 |
if (cnt) |
299 |
|
|
return (val); |
300 |
✓✗ |
118 |
if (ch != '\0') |
301 |
|
118 |
++s->str; |
302 |
✗✗✗✓ ✗✗✗✗ ✗ |
118 |
switch (ch) { |
303 |
|
|
case 'a': /* escape characters */ |
304 |
|
|
return ('\7'); |
305 |
|
|
case 'b': |
306 |
|
|
return ('\b'); |
307 |
|
|
case 'f': |
308 |
|
|
return ('\f'); |
309 |
|
|
case 'n': |
310 |
|
118 |
return ('\n'); |
311 |
|
|
case 'r': |
312 |
|
|
return ('\r'); |
313 |
|
|
case 't': |
314 |
|
|
return ('\t'); |
315 |
|
|
case 'v': |
316 |
|
|
return ('\13'); |
317 |
|
|
case '\0': /* \" -> \ */ |
318 |
|
|
s->state = EOS; |
319 |
|
|
return ('\\'); |
320 |
|
|
default: /* \x" -> x */ |
321 |
|
|
return (ch); |
322 |
|
|
} |
323 |
|
118 |
} |