GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/tr/str.c Lines: 72 132 54.5 %
Date: 2017-11-07 Branches: 40 86 46.5 %

Line Branch Exec Source
1
/*	$OpenBSD: str.c,v 1.12 2012/12/05 23:20:26 deraadt Exp $	*/
2
/*	$NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $	*/
3
4
/*-
5
 * Copyright (c) 1991, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 * 3. Neither the name of the University nor the names of its contributors
17
 *    may be used to endorse or promote products derived from this software
18
 *    without specific prior written permission.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
#include <sys/types.h>
34
35
#include <errno.h>
36
#include <stddef.h>
37
#include <stdio.h>
38
#include <stdlib.h>
39
#include <string.h>
40
#include <ctype.h>
41
#include <err.h>
42
43
#include "extern.h"
44
45
static int	backslash(STR *);
46
static int	bracket(STR *);
47
static int	c_class(const void *, const void *);
48
static void	genclass(STR *);
49
static void	genequiv(STR *);
50
static int	genrange(STR *);
51
static void	genseq(STR *);
52
53
int
54
next(s)
55
	STR *s;
56
{
57
	int ch;
58
59

2580
	switch (s->state) {
60
	case EOS:
61
1
		return (0);
62
	case INFINITE:
63
		return (1);
64
	case NORMAL:
65

916
		switch (ch = *s->str) {
66
		case '\0':
67
416
			s->state = EOS;
68
416
			return (0);
69
		case '\\':
70
170
			s->lastch = backslash(s);
71
170
			break;
72
		case '[':
73
15
			if (bracket(s))
74
7
				return (next(s));
75
			/* FALLTHROUGH */
76
		default:
77
315
			++s->str;
78
			s->lastch = ch;
79
315
			break;
80
		}
81
82
		/* We can start a range at any time. */
83

493
		if (s->str[0] == '-' && genrange(s))
84
8
			return (next(s));
85
477
		return (1);
86
	case RANGE:
87
216
		if (s->cnt-- == 0) {
88
8
			s->state = NORMAL;
89
8
			return (next(s));
90
		}
91
208
		++s->lastch;
92
208
		return (1);
93
	case SEQUENCE:
94
		if (s->cnt-- == 0) {
95
			s->state = NORMAL;
96
			return (next(s));
97
		}
98
		return (1);
99
	case SET:
100
165
		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
101
7
			s->state = NORMAL;
102
7
			return (next(s));
103
		}
104
158
		return (1);
105
	default:
106
		return 0;
107
	}
108
	/* NOTREACHED */
109
1290
}
110
111
static int
112
bracket(s)
113
	STR *s;
114
{
115
	char *p;
116
117
30
	switch (s->str[1]) {
118
	case ':':				/* "[:class:]" */
119
7
		if ((p = strstr((char *)s->str + 2, ":]")) == NULL)
120
			return (0);
121
7
		*p = '\0';
122
7
		s->str += 2;
123
7
		genclass(s);
124
7
		s->str = (unsigned char *)p + 2;
125
7
		return (1);
126
	case '=':				/* "[=equiv=]" */
127
		if ((p = strstr((char *)s->str + 2, "=]")) == NULL)
128
			return (0);
129
		s->str += 2;
130
		genequiv(s);
131
		return (1);
132
	default:				/* "[\###*n]" or "[#*n]" */
133
8
		if ((p = strpbrk((char *)s->str + 2, "*]")) == NULL)
134
			return (0);
135

8
		if (p[0] != '*' || strchr(p, ']') == NULL)
136
8
			return (0);
137
		s->str += 1;
138
		genseq(s);
139
		return (1);
140
	}
141
	/* NOTREACHED */
142
15
}
143
144
typedef struct {
145
	char *name;
146
	int (*func)(int);
147
	int *set;
148
} CLASS;
149
150
static CLASS classes[] = {
151
	{ "alnum",  isalnum,  },
152
	{ "alpha",  isalpha,  },
153
	{ "blank",  isblank,  },
154
	{ "cntrl",  iscntrl,  },
155
	{ "digit",  isdigit,  },
156
	{ "graph",  isgraph,  },
157
	{ "lower",  islower,  },
158
	{ "print",  isprint,  },
159
	{ "punct",  ispunct,  },
160
	{ "space",  isspace,  },
161
	{ "upper",  isupper,  },
162
	{ "xdigit", isxdigit, },
163
};
164
165
static void
166
genclass(s)
167
	STR *s;
168
{
169
	int cnt, (*func)(int);
170
14
	CLASS *cp, tmp;
171
	int *p;
172
173
7
	tmp.name = (char *)s->str;
174
14
	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
175
7
	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
176
		errx(1, "unknown class %s", s->str);
177
178
7
	if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL)
179
		errx(1, "no memory for a class");
180
3598
	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
181
1792
		if ((func)(cnt))
182
158
			*p++ = cnt;
183
7
	*p = OOBCH;
184
185
7
	s->cnt = 0;
186
7
	s->state = SET;
187
7
	s->set = cp->set;
188
7
}
189
190
static int
191
c_class(a, b)
192
	const void *a, *b;
193
{
194
38
	return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
195
}
196
197
/*
198
 * English doesn't have any equivalence classes, so for now
199
 * we just syntax check and grab the character.
200
 */
201
static void
202
genequiv(s)
203
	STR *s;
204
{
205
	if (*s->str == '\\') {
206
		s->equiv[0] = backslash(s);
207
		if (*s->str != '=')
208
			errx(1, "misplaced equivalence equals sign");
209
	} else {
210
		s->equiv[0] = s->str[0];
211
		if (s->str[1] != '=')
212
			errx(1, "misplaced equivalence equals sign");
213
	}
214
	s->str += 2;
215
	s->cnt = 0;
216
	s->state = SET;
217
	s->set = s->equiv;
218
}
219
220
static int
221
genrange(s)
222
	STR *s;
223
{
224
	int stopval;
225
	unsigned char *savestart;
226
227
16
	savestart = s->str;
228
24
	stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
229
8
	if (stopval < (u_char)s->lastch) {
230
		s->str = savestart;
231
		return (0);
232
	}
233
8
	s->cnt = stopval - s->lastch + 1;
234
8
	s->state = RANGE;
235
8
	--s->lastch;
236
8
	return (1);
237
8
}
238
239
static void
240
genseq(s)
241
	STR *s;
242
{
243
	char *ep;
244
245
	if (s->which == STRING1)
246
		errx(1, "sequences only valid in string2");
247
248
	if (*s->str == '\\')
249
		s->lastch = backslash(s);
250
	else
251
		s->lastch = *s->str++;
252
	if (*s->str != '*')
253
		errx(1, "misplaced sequence asterisk");
254
255
	switch (*++s->str) {
256
	case '\\':
257
		s->cnt = backslash(s);
258
		break;
259
	case ']':
260
		s->cnt = 0;
261
		++s->str;
262
		break;
263
	default:
264
		if (isdigit(*s->str)) {
265
			s->cnt = strtol((char *)s->str, &ep, 0);
266
			if (*ep == ']') {
267
				s->str = (unsigned char *)ep + 1;
268
				break;
269
			}
270
		}
271
		errx(1, "illegal sequence count");
272
		/* NOTREACHED */
273
	}
274
275
	s->state = s->cnt ? SEQUENCE : INFINITE;
276
}
277
278
/*
279
 * Translate \??? into a character.  Up to 3 octal digits, if no digits either
280
 * an escape code or a literal character.
281
 */
282
static int
283
backslash(s)
284
	STR *s;
285
{
286
	int ch, cnt, val;
287
288
340
	for (cnt = val = 0;;) {
289
172
		ch = *++s->str;
290

344
		if (!isascii(ch) || !isdigit(ch))
291
			break;
292
3
		val = val * 8 + ch - '0';
293
3
		if (++cnt == 3) {
294
1
			++s->str;
295
1
			break;
296
		}
297
	}
298
170
	if (cnt)
299
1
		return (val);
300
169
	if (ch != '\0')
301
169
		++s->str;
302


169
	switch (ch) {
303
		case 'a':			/* escape characters */
304
			return ('\7');
305
		case 'b':
306
			return ('\b');
307
		case 'f':
308
			return ('\f');
309
		case 'n':
310
165
			return ('\n');
311
		case 'r':
312
4
			return ('\r');
313
		case 't':
314
			return ('\t');
315
		case 'v':
316
			return ('\13');
317
		case '\0':			/*  \" -> \ */
318
			s->state = EOS;
319
			return ('\\');
320
		default:			/* \x" -> x */
321
			return (ch);
322
	}
323
170
}