1 |
|
|
/* $OpenBSD: str.c,v 1.31 2014/05/18 08:08:50 espie Exp $ */ |
2 |
|
|
/* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */ |
3 |
|
|
|
4 |
|
|
/*- |
5 |
|
|
* Copyright (c) 1988, 1989, 1990, 1993 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* Copyright (c) 1989 by Berkeley Softworks |
8 |
|
|
* All rights reserved. |
9 |
|
|
* |
10 |
|
|
* This code is derived from software contributed to Berkeley by |
11 |
|
|
* Adam de Boor. |
12 |
|
|
* |
13 |
|
|
* Redistribution and use in source and binary forms, with or without |
14 |
|
|
* modification, are permitted provided that the following conditions |
15 |
|
|
* are met: |
16 |
|
|
* 1. Redistributions of source code must retain the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer. |
18 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
19 |
|
|
* notice, this list of conditions and the following disclaimer in the |
20 |
|
|
* documentation and/or other materials provided with the distribution. |
21 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
22 |
|
|
* may be used to endorse or promote products derived from this software |
23 |
|
|
* without specific prior written permission. |
24 |
|
|
* |
25 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
26 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
27 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
28 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
29 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
30 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
31 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
32 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
33 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
34 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
35 |
|
|
* SUCH DAMAGE. |
36 |
|
|
*/ |
37 |
|
|
|
38 |
|
|
#include <ctype.h> |
39 |
|
|
#include <string.h> |
40 |
|
|
#include "config.h" |
41 |
|
|
#include "defines.h" |
42 |
|
|
#include "str.h" |
43 |
|
|
#include "memory.h" |
44 |
|
|
#include "buf.h" |
45 |
|
|
|
46 |
|
|
/* helpers for Str_Matchi */ |
47 |
|
|
static bool range_match(char, const char **, const char *); |
48 |
|
|
static bool star_match(const char *, const char *, const char *, const char *); |
49 |
|
|
|
50 |
|
|
char * |
51 |
|
|
Str_concati(const char *s1, const char *e1, const char *s2, const char *e2, |
52 |
|
|
int sep) |
53 |
|
53767 |
{ |
54 |
|
|
size_t len1, len2; |
55 |
|
|
char *result; |
56 |
|
|
|
57 |
|
|
/* get the length of both strings */ |
58 |
|
53767 |
len1 = e1 - s1; |
59 |
|
53767 |
len2 = e2 - s2; |
60 |
|
|
|
61 |
|
|
/* space for separator */ |
62 |
✓✓ |
53767 |
if (sep) |
63 |
|
5353 |
len1++; |
64 |
|
53767 |
result = emalloc(len1 + len2 + 1); |
65 |
|
|
|
66 |
|
|
/* copy first string into place */ |
67 |
|
53767 |
memcpy(result, s1, len1); |
68 |
|
|
|
69 |
|
|
/* add separator character */ |
70 |
✓✓ |
53767 |
if (sep) |
71 |
|
5353 |
result[len1-1] = sep; |
72 |
|
|
|
73 |
|
|
/* copy second string plus EOS into place */ |
74 |
|
53767 |
memcpy(result + len1, s2, len2); |
75 |
|
53767 |
result[len1+len2] = '\0'; |
76 |
|
53767 |
return result; |
77 |
|
|
} |
78 |
|
|
|
79 |
|
|
/*- |
80 |
|
|
* brk_string -- |
81 |
|
|
* Fracture a string into an array of words (as delineated by tabs or |
82 |
|
|
* spaces) taking quotation marks into account. Leading tabs/spaces |
83 |
|
|
* are ignored. |
84 |
|
|
* |
85 |
|
|
* returns -- |
86 |
|
|
* Pointer to the array of pointers to the words. To make life easier, |
87 |
|
|
* the first word is always the value of the .MAKE variable. |
88 |
|
|
*/ |
89 |
|
|
char ** |
90 |
|
|
brk_string(const char *str, int *store_argc, char **buffer) |
91 |
|
2701 |
{ |
92 |
|
|
int argc; |
93 |
|
|
char ch; |
94 |
|
|
char inquote; |
95 |
|
|
const char *p; |
96 |
|
|
char *start, *t; |
97 |
|
|
size_t len; |
98 |
|
2701 |
int argmax = 50; |
99 |
|
2701 |
size_t curlen = 0; |
100 |
|
2701 |
char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *)); |
101 |
|
|
|
102 |
|
|
/* skip leading space chars. */ |
103 |
✗✓ |
2701 |
for (; *str == ' ' || *str == '\t'; ++str) |
104 |
|
|
continue; |
105 |
|
|
|
106 |
|
|
/* allocate room for a copy of the string */ |
107 |
✓✗ |
2701 |
if ((len = strlen(str) + 1) > curlen) |
108 |
|
2701 |
*buffer = emalloc(curlen = len); |
109 |
|
|
|
110 |
|
|
/* |
111 |
|
|
* copy the string; at the same time, parse backslashes, |
112 |
|
|
* quotes and build the argument list. |
113 |
|
|
*/ |
114 |
|
2701 |
argc = 0; |
115 |
|
2701 |
inquote = '\0'; |
116 |
|
291560 |
for (p = str, start = t = *buffer;; ++p) { |
117 |
✓✓✓✗ ✓ |
291560 |
switch (ch = *p) { |
118 |
|
|
case '"': |
119 |
|
|
case '\'': |
120 |
✓✓ |
16 |
if (inquote) { |
121 |
✗✓ |
8 |
if (inquote == ch) |
122 |
|
8 |
inquote = '\0'; |
123 |
|
|
else |
124 |
|
|
break; |
125 |
|
|
} else { |
126 |
|
8 |
inquote = ch; |
127 |
|
|
/* Don't miss "" or '' */ |
128 |
✓✗✗✓
|
8 |
if (start == NULL && p[1] == inquote) { |
129 |
|
|
start = t + 1; |
130 |
|
|
break; |
131 |
|
|
} |
132 |
|
|
} |
133 |
|
|
continue; |
134 |
|
|
case ' ': |
135 |
|
|
case '\t': |
136 |
|
|
case '\n': |
137 |
✓✓ |
26780 |
if (inquote) |
138 |
|
80 |
break; |
139 |
✓✓ |
26700 |
if (!start) |
140 |
|
5611 |
continue; |
141 |
|
|
/* FALLTHROUGH */ |
142 |
|
|
case '\0': |
143 |
|
|
/* |
144 |
|
|
* end of a token -- make sure there's enough argv |
145 |
|
|
* space and save off a pointer. |
146 |
|
|
*/ |
147 |
✓✓ |
23790 |
if (!start) |
148 |
|
674 |
goto done; |
149 |
|
|
|
150 |
|
23116 |
*t++ = '\0'; |
151 |
✓✓ |
23116 |
if (argc == argmax) { |
152 |
|
5 |
argmax *= 2; /* ramp up fast */ |
153 |
|
5 |
argv = ereallocarray(argv, |
154 |
|
|
(argmax + 1), sizeof(char *)); |
155 |
|
|
} |
156 |
|
23116 |
argv[argc++] = start; |
157 |
|
23116 |
start = NULL; |
158 |
✓✓ |
23116 |
if (ch == '\n' || ch == '\0') |
159 |
|
2027 |
goto done; |
160 |
|
|
continue; |
161 |
|
|
case '\\': |
162 |
|
|
switch (ch = *++p) { |
163 |
|
|
case '\0': |
164 |
|
|
case '\n': |
165 |
|
|
/* hmmm; fix it up as best we can */ |
166 |
|
|
ch = '\\'; |
167 |
|
|
--p; |
168 |
|
|
break; |
169 |
|
|
case 'b': |
170 |
|
|
ch = '\b'; |
171 |
|
|
break; |
172 |
|
|
case 'f': |
173 |
|
|
ch = '\f'; |
174 |
|
|
break; |
175 |
|
|
case 'n': |
176 |
|
|
ch = '\n'; |
177 |
|
|
break; |
178 |
|
|
case 'r': |
179 |
|
|
ch = '\r'; |
180 |
|
|
break; |
181 |
|
|
case 't': |
182 |
|
|
ch = '\t'; |
183 |
|
|
break; |
184 |
|
|
} |
185 |
|
|
break; |
186 |
|
|
} |
187 |
✓✓ |
262143 |
if (!start) |
188 |
|
20415 |
start = t; |
189 |
|
262143 |
*t++ = ch; |
190 |
|
288859 |
} |
191 |
|
2701 |
done: |
192 |
|
2701 |
argv[argc] = NULL; |
193 |
|
2701 |
*store_argc = argc; |
194 |
|
2701 |
return argv; |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
|
198 |
|
|
const char * |
199 |
|
|
iterate_words(const char **end) |
200 |
|
97348 |
{ |
201 |
|
|
const char *start, *p; |
202 |
|
97348 |
char state = 0; |
203 |
|
97348 |
start = *end; |
204 |
|
|
|
205 |
✓✓ |
244436 |
while (ISSPACE(*start)) |
206 |
|
49740 |
start++; |
207 |
✓✓ |
97348 |
if (*start == '\0') |
208 |
|
27549 |
return NULL; |
209 |
|
|
|
210 |
|
663219 |
for (p = start;; p++) |
211 |
✗✗✓✓ ✓ |
663219 |
switch(*p) { |
212 |
|
|
case '\\': |
213 |
|
|
if (p[1] != '\0') |
214 |
|
|
p++; |
215 |
|
|
break; |
216 |
|
|
case '\'': |
217 |
|
|
case '"': |
218 |
|
|
if (state == *p) |
219 |
|
|
state = 0; |
220 |
|
|
else if (state == 0) |
221 |
|
|
state = *p; |
222 |
|
|
break; |
223 |
|
|
case ' ': |
224 |
|
|
case '\t': |
225 |
✓✗ |
46864 |
if (state != 0) |
226 |
|
|
break; |
227 |
|
|
/* FALLTHROUGH */ |
228 |
|
|
case '\0': |
229 |
|
69799 |
*end = p; |
230 |
|
69799 |
return start; |
231 |
|
|
default: |
232 |
|
|
break; |
233 |
|
593420 |
} |
234 |
|
|
} |
235 |
|
|
|
236 |
|
|
static bool |
237 |
|
|
star_match(const char *string, const char *estring, |
238 |
|
|
const char *pattern, const char *epattern) |
239 |
|
22100 |
{ |
240 |
|
|
/* '*' matches any substring. We handle this by calling ourselves |
241 |
|
|
* recursively for each postfix of string, until either we match or |
242 |
|
|
* we reach the end of the string. */ |
243 |
|
22100 |
pattern++; |
244 |
|
|
/* Skip over contiguous sequences of `?*', so that |
245 |
|
|
* recursive calls only occur on `real' characters. */ |
246 |
✓✓✗✓
|
44200 |
while (pattern != epattern && |
247 |
|
|
(*pattern == '?' || *pattern == '*')) { |
248 |
|
|
if (*pattern == '?') { |
249 |
|
|
if (string == estring) |
250 |
|
|
return false; |
251 |
|
|
else |
252 |
|
|
string++; |
253 |
|
|
} |
254 |
|
|
pattern++; |
255 |
|
|
} |
256 |
✓✓ |
22100 |
if (pattern == epattern) |
257 |
|
386 |
return true; |
258 |
✓✓ |
195798 |
for (; string != estring; string++) |
259 |
✓✓ |
195825 |
if (Str_Matchi(string, estring, pattern, |
260 |
|
|
epattern)) |
261 |
|
27 |
return true; |
262 |
|
21687 |
return false; |
263 |
|
|
} |
264 |
|
|
|
265 |
|
|
static bool |
266 |
|
|
range_match(char c, const char **ppat, const char *epattern) |
267 |
|
|
{ |
268 |
|
|
if (*ppat == epattern) { |
269 |
|
|
if (c == '[') |
270 |
|
|
return true; |
271 |
|
|
else |
272 |
|
|
return false; |
273 |
|
|
} |
274 |
|
|
if (**ppat == '!' || **ppat == '^') { |
275 |
|
|
(*ppat)++; |
276 |
|
|
return !range_match(c, ppat, epattern); |
277 |
|
|
} |
278 |
|
|
for (;;) { |
279 |
|
|
if (**ppat == '\\') { |
280 |
|
|
if (++(*ppat) == epattern) |
281 |
|
|
return false; |
282 |
|
|
} |
283 |
|
|
if (**ppat == c) |
284 |
|
|
break; |
285 |
|
|
if ((*ppat)[1] == '-') { |
286 |
|
|
if (*ppat + 2 == epattern) |
287 |
|
|
return false; |
288 |
|
|
if (**ppat < c && c <= (*ppat)[2]) |
289 |
|
|
break; |
290 |
|
|
if ((*ppat)[2] <= c && c < **ppat) |
291 |
|
|
break; |
292 |
|
|
*ppat += 3; |
293 |
|
|
} else |
294 |
|
|
(*ppat)++; |
295 |
|
|
/* The test for ']' is done at the end |
296 |
|
|
* so that ']' can be used at the |
297 |
|
|
* start of the range without '\' */ |
298 |
|
|
if (*ppat == epattern || **ppat == ']') |
299 |
|
|
return false; |
300 |
|
|
} |
301 |
|
|
/* Found matching character, skip over rest |
302 |
|
|
* of class. */ |
303 |
|
|
while (**ppat != ']') { |
304 |
|
|
if (**ppat == '\\') |
305 |
|
|
(*ppat)++; |
306 |
|
|
/* A non-terminated character class |
307 |
|
|
* is ok. */ |
308 |
|
|
if (*ppat == epattern) |
309 |
|
|
break; |
310 |
|
|
(*ppat)++; |
311 |
|
|
} |
312 |
|
|
return true; |
313 |
|
|
} |
314 |
|
|
|
315 |
|
|
bool |
316 |
|
|
Str_Matchi(const char *string, const char *estring, |
317 |
|
|
const char *pattern, const char *epattern) |
318 |
|
257304 |
{ |
319 |
✓✓ |
554986 |
while (pattern != epattern) { |
320 |
|
|
/* Check for a "*" as the next pattern character. */ |
321 |
✓✓ |
295567 |
if (*pattern == '*') |
322 |
|
22100 |
return star_match(string, estring, pattern, epattern); |
323 |
✓✓ |
273467 |
else if (string == estring) |
324 |
|
4831 |
return false; |
325 |
|
|
/* Check for a "[" as the next pattern character. It is |
326 |
|
|
* followed by a list of characters that are acceptable, or |
327 |
|
|
* by a range (two characters separated by "-"). */ |
328 |
✗✓ |
268636 |
else if (*pattern == '[') { |
329 |
|
|
pattern++; |
330 |
|
|
if (!range_match(*string, &pattern, epattern)) |
331 |
|
|
return false; |
332 |
|
|
|
333 |
|
|
} |
334 |
|
|
/* '?' matches any single character, so shunt test. */ |
335 |
✓✗ |
268636 |
else if (*pattern != '?') { |
336 |
|
|
/* If the next pattern character is '\', just strip |
337 |
|
|
* off the '\' so we do exact matching on the |
338 |
|
|
* character that follows. */ |
339 |
✗✓ |
268636 |
if (*pattern == '\\') { |
340 |
|
|
if (++pattern == epattern) |
341 |
|
|
return false; |
342 |
|
|
} |
343 |
|
|
/* There's no special character. Just make sure that |
344 |
|
|
* the next characters of each string match. */ |
345 |
✓✓ |
268636 |
if (*pattern != *string) |
346 |
|
228258 |
return false; |
347 |
|
|
} |
348 |
|
40378 |
pattern++; |
349 |
|
40378 |
string++; |
350 |
|
|
} |
351 |
✓✗ |
2115 |
if (string == estring) |
352 |
|
2115 |
return true; |
353 |
|
|
else |
354 |
|
|
return false; |
355 |
|
|
} |
356 |
|
|
|
357 |
|
|
|
358 |
|
|
/*- |
359 |
|
|
*----------------------------------------------------------------------- |
360 |
|
|
* Str_SYSVMatch -- |
361 |
|
|
* Check word against pattern for a match (% is wild), |
362 |
|
|
* |
363 |
|
|
* Results: |
364 |
|
|
* Returns the beginning position of a match or null. The number |
365 |
|
|
* of characters matched is returned in len. |
366 |
|
|
*----------------------------------------------------------------------- |
367 |
|
|
*/ |
368 |
|
|
const char * |
369 |
|
|
Str_SYSVMatch(const char *word, const char *pattern, size_t *len) |
370 |
|
447 |
{ |
371 |
|
447 |
const char *p = pattern; |
372 |
|
447 |
const char *w = word; |
373 |
|
|
const char *m; |
374 |
|
|
|
375 |
✗✓ |
447 |
if (*p == '\0') { |
376 |
|
|
/* Null pattern is the whole string. */ |
377 |
|
|
*len = strlen(w); |
378 |
|
|
return w; |
379 |
|
|
} |
380 |
|
|
|
381 |
✗✓ |
447 |
if ((m = strchr(p, '%')) != NULL) { |
382 |
|
|
/* Check that the prefix matches. */ |
383 |
|
|
for (; p != m && *w && *w == *p; w++, p++) |
384 |
|
|
continue; |
385 |
|
|
|
386 |
|
|
if (p != m) |
387 |
|
|
return NULL; /* No match. */ |
388 |
|
|
|
389 |
|
|
if (*++p == '\0') { |
390 |
|
|
/* No more pattern, return the rest of the string. */ |
391 |
|
|
*len = strlen(w); |
392 |
|
|
return w; |
393 |
|
|
} |
394 |
|
|
} |
395 |
|
|
|
396 |
|
447 |
m = w; |
397 |
|
|
|
398 |
|
|
/* Find a matching tail. */ |
399 |
|
|
do { |
400 |
✓✓ |
3525 |
if (strcmp(p, w) == 0) { |
401 |
|
447 |
*len = w - m; |
402 |
|
447 |
return m; |
403 |
|
|
} |
404 |
✓✗ |
3078 |
} while (*w++ != '\0'); |
405 |
|
|
|
406 |
|
|
return NULL; |
407 |
|
|
} |
408 |
|
|
|
409 |
|
|
|
410 |
|
|
/*- |
411 |
|
|
*----------------------------------------------------------------------- |
412 |
|
|
* Str_SYSVSubst -- |
413 |
|
|
* Substitute '%' in the pattern with len characters from src. |
414 |
|
|
* If the pattern does not contain a '%' prepend len characters |
415 |
|
|
* from src. |
416 |
|
|
* |
417 |
|
|
* Side Effects: |
418 |
|
|
* Adds result to buf |
419 |
|
|
*----------------------------------------------------------------------- |
420 |
|
|
*/ |
421 |
|
|
void |
422 |
|
|
Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len) |
423 |
|
447 |
{ |
424 |
|
|
const char *m; |
425 |
|
|
|
426 |
✗✓ |
447 |
if ((m = strchr(pat, '%')) != NULL) { |
427 |
|
|
/* Copy the prefix. */ |
428 |
|
|
Buf_Addi(buf, pat, m); |
429 |
|
|
/* Skip the %. */ |
430 |
|
|
pat = m + 1; |
431 |
|
|
} |
432 |
|
|
|
433 |
|
|
/* Copy the pattern. */ |
434 |
|
447 |
Buf_AddChars(buf, len, src); |
435 |
|
|
|
436 |
|
|
/* Append the rest. */ |
437 |
|
447 |
Buf_AddString(buf, pat); |
438 |
|
447 |
} |
439 |
|
|
|
440 |
|
|
char * |
441 |
|
|
Str_dupi(const char *begin, const char *end) |
442 |
|
30018 |
{ |
443 |
|
|
char *s; |
444 |
|
|
|
445 |
|
30018 |
s = emalloc(end - begin + 1); |
446 |
|
30018 |
memcpy(s, begin, end - begin); |
447 |
|
30018 |
s[end-begin] = '\0'; |
448 |
|
30018 |
return s; |
449 |
|
|
} |
450 |
|
|
|
451 |
|
|
char * |
452 |
|
|
escape_dupi(const char *begin, const char *end, const char *set) |
453 |
|
25789 |
{ |
454 |
|
|
char *s, *t; |
455 |
|
|
|
456 |
|
25789 |
t = s = emalloc(end - begin + 1); |
457 |
✓✓ |
225445 |
while (begin != end) { |
458 |
✗✓ |
173867 |
if (*begin == '\\') { |
459 |
|
|
begin++; |
460 |
|
|
if (begin == end) { |
461 |
|
|
*t++ = '\\'; |
462 |
|
|
break; |
463 |
|
|
} |
464 |
|
|
if (strchr(set, *begin) == NULL) |
465 |
|
|
*t++ = '\\'; |
466 |
|
|
} |
467 |
|
173867 |
*t++ = *begin++; |
468 |
|
|
} |
469 |
|
25789 |
*t++ = '\0'; |
470 |
|
25789 |
return s; |
471 |
|
|
} |
472 |
|
|
|
473 |
|
|
char * |
474 |
|
|
Str_rchri(const char *begin, const char *end, int c) |
475 |
|
52025 |
{ |
476 |
✓✗ |
52025 |
if (begin != end) |
477 |
|
|
do { |
478 |
✓✓ |
466867 |
if (*--end == c) |
479 |
|
31781 |
return (char *)end; |
480 |
✓✓ |
435086 |
} while (end != begin); |
481 |
|
20244 |
return NULL; |
482 |
|
|
} |