GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/lex/ccl.c Lines: 30 73 41.1 %
Date: 2016-12-06 Branches: 17 58 29.3 %

Line Branch Exec Source
1
/*	$OpenBSD: ccl.c,v 1.8 2015/11/19 22:55:13 tedu Exp $	*/
2
3
/* ccl - routines for character classes */
4
5
/*  Copyright (c) 1990 The Regents of the University of California. */
6
/*  All rights reserved. */
7
8
/*  This code is derived from software contributed to Berkeley by */
9
/*  Vern Paxson. */
10
11
/*  The United States Government has rights in this work pursuant */
12
/*  to contract no. DE-AC03-76SF00098 between the United States */
13
 /* Department of Energy and the University of California. */
14
15
/*  This file is part of flex. */
16
17
/*  Redistribution and use in source and binary forms, with or without */
18
/*  modification, are permitted provided that the following conditions */
19
/*  are met: */
20
21
/*  1. Redistributions of source code must retain the above copyright */
22
/*     notice, this list of conditions and the following disclaimer. */
23
/*  2. Redistributions in binary form must reproduce the above copyright */
24
/*     notice, this list of conditions and the following disclaimer in the */
25
/*     documentation and/or other materials provided with the distribution. */
26
27
/*  Neither the name of the University nor the names of its contributors */
28
/*  may be used to endorse or promote products derived from this software */
29
/*  without specific prior written permission. */
30
31
/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34
/*  PURPOSE. */
35
36
#include "flexdef.h"
37
38
/* return true if the chr is in the ccl. Takes negation into account. */
39
static bool
40
ccl_contains(const int cclp, const int ch)
41
{
42
	int ind, len, i;
43
44
	len = ccllen[cclp];
45
	ind = cclmap[cclp];
46
47
	for (i = 0; i < len; ++i)
48
		if (ccltbl[ind + i] == ch)
49
			return !cclng[cclp];
50
51
	return cclng[cclp];
52
}
53
54
55
/* ccladd - add a single character to a ccl */
56
57
void
58
ccladd(cclp, ch)
59
	int cclp;
60
	int ch;
61
477
{
62
	int ind, len, newpos, i;
63
64
477
	check_char(ch);
65
66
477
	len = ccllen[cclp];
67
477
	ind = cclmap[cclp];
68
69
	/* check to see if the character is already in the ccl */
70
71
11295
	for (i = 0; i < len; ++i)
72
10870
		if (ccltbl[ind + i] == ch)
73
52
			return;
74
75
	/* mark newlines */
76
425
	if (ch == nlch)
77
17
		ccl_has_nl[cclp] = true;
78
79
425
	newpos = ind + len;
80
81
425
	if (newpos >= current_max_ccl_tbl_size) {
82
		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
83
84
		++num_reallocs;
85
86
		ccltbl = reallocate_Character_array(ccltbl,
87
		    current_max_ccl_tbl_size);
88
	}
89
425
	ccllen[cclp] = len + 1;
90
425
	ccltbl[newpos] = ch;
91
}
92
93
/* dump_cclp - same thing as list_character_set, but for cclps.  */
94
95
static void
96
dump_cclp(FILE * file, int cclp)
97
{
98
	int i;
99
100
	putc('[', file);
101
102
	for (i = 0; i < csize; ++i) {
103
		if (ccl_contains(cclp, i)) {
104
			int start_char = i;
105
106
			putc(' ', file);
107
108
			fputs(readable_form(i), file);
109
110
			while (++i < csize && ccl_contains(cclp, i));
111
112
			if (i - 1 > start_char)
113
				/* this was a run */
114
				fprintf(file, "-%s",
115
				    readable_form(i - 1));
116
117
			putc(' ', file);
118
		}
119
	}
120
121
	putc(']', file);
122
}
123
124
125
126
/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
127
int
128
ccl_set_diff(int a, int b)
129
{
130
	int d, ch;
131
132
	/* create new class  */
133
	d = cclinit();
134
135
	/*
136
	 * In order to handle negation, we spin through all possible chars,
137
	 * addding each char in a that is not in b. (This could be O(n^2),
138
	 * but n is small and bounded.)
139
	 */
140
	for (ch = 0; ch < csize; ++ch)
141
		if (ccl_contains(a, ch) && !ccl_contains(b, ch))
142
			ccladd(d, ch);
143
144
	/* debug */
145
	if (0) {
146
		fprintf(stderr, "ccl_set_diff (");
147
		fprintf(stderr, "\n    ");
148
		dump_cclp(stderr, a);
149
		fprintf(stderr, "\n    ");
150
		dump_cclp(stderr, b);
151
		fprintf(stderr, "\n    ");
152
		dump_cclp(stderr, d);
153
		fprintf(stderr, "\n)\n");
154
	}
155
	return d;
156
}
157
158
/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
159
int
160
ccl_set_union(int a, int b)
161
{
162
	int d, i;
163
164
	/* create new class  */
165
	d = cclinit();
166
167
	/* Add all of a */
168
	for (i = 0; i < ccllen[a]; ++i)
169
		ccladd(d, ccltbl[cclmap[a] + i]);
170
171
	/* Add all of b */
172
	for (i = 0; i < ccllen[b]; ++i)
173
		ccladd(d, ccltbl[cclmap[b] + i]);
174
175
	/* debug */
176
	if (0) {
177
		fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
178
		fprintf(stderr, "\n    ");
179
		dump_cclp(stderr, a);
180
		fprintf(stderr, "\n    ");
181
		dump_cclp(stderr, b);
182
		fprintf(stderr, "\n    ");
183
		dump_cclp(stderr, d);
184
		fprintf(stderr, "\n)\n");
185
	}
186
	return d;
187
}
188
189
190
/* cclinit - return an empty ccl */
191
192
int
193
cclinit()
194
44
{
195
44
	if (++lastccl >= current_maxccls) {
196
		current_maxccls += MAX_CCLS_INCREMENT;
197
198
		++num_reallocs;
199
200
		cclmap =
201
		    reallocate_integer_array(cclmap, current_maxccls);
202
		ccllen =
203
		    reallocate_integer_array(ccllen, current_maxccls);
204
		cclng = reallocate_integer_array(cclng, current_maxccls);
205
		ccl_has_nl =
206
		    reallocate_bool_array(ccl_has_nl,
207
		    current_maxccls);
208
	}
209
44
	if (lastccl == 1)
210
		/* we're making the first ccl */
211
5
		cclmap[lastccl] = 0;
212
213
	else
214
		/*
215
		 * The new pointer is just past the end of the last ccl.
216
		 * Since the cclmap points to the \first/ character of a ccl,
217
		 * adding the length of the ccl to the cclmap pointer will
218
		 * produce a cursor to the first free space.
219
		 */
220
39
		cclmap[lastccl] =
221
		    cclmap[lastccl - 1] + ccllen[lastccl - 1];
222
223
44
	ccllen[lastccl] = 0;
224
44
	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
225
44
	ccl_has_nl[lastccl] = false;
226
227
44
	return lastccl;
228
}
229
230
231
/* cclnegate - negate the given ccl */
232
233
void
234
cclnegate(cclp)
235
	int cclp;
236
22
{
237
22
	cclng[cclp] = 1;
238
22
	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
239
22
}
240
241
242
/* list_character_set - list the members of a set of characters in CCL form
243
 *
244
 * Writes to the given file a character-class representation of those
245
 * characters present in the given CCL.  A character is present if it
246
 * has a non-zero value in the cset array.
247
 */
248
249
void
250
list_character_set(file, cset)
251
	FILE *file;
252
	int cset[];
253
{
254
	int i;
255
256
	putc('[', file);
257
258
	for (i = 0; i < csize; ++i) {
259
		if (cset[i]) {
260
			int start_char = i;
261
262
			putc(' ', file);
263
264
			fputs(readable_form(i), file);
265
266
			while (++i < csize && cset[i]);
267
268
			if (i - 1 > start_char)
269
				/* this was a run */
270
				fprintf(file, "-%s",
271
				    readable_form(i - 1));
272
273
			putc(' ', file);
274
		}
275
	}
276
277
	putc(']', file);
278
}
279
280
/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
281
 * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
282
 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
283
 * be in the range. If not, then this range is ambiguous, and the function
284
 * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
285
 * [a-z] will be labeled ambiguous because it does not include [A-Z].
286
 *
287
 * @param c1 the lower end of the range
288
 * @param c2 the upper end of the range
289
 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
290
 */
291
bool
292
range_covers_case(int c1, int c2)
293
{
294
	int i, o;
295
296
	for (i = c1; i <= c2; i++) {
297
		if (has_case(i)) {
298
			o = reverse_case(i);
299
			if (o < c1 || c2 < o)
300
				return false;
301
		}
302
	}
303
	return true;
304
}
305
306
/** Reverse the case of a character, if possible.
307
 * @return c if case-reversal does not apply.
308
 */
309
int
310
reverse_case(int c)
311
751
{
312

1477
	return isupper(c) ? tolower(c) : (islower(c) ? toupper(c) : c);
313
}
314
315
/** Return true if c is uppercase or lowercase. */
316
bool
317
has_case(int c)
318
1000
{
319

1970
	return (isupper(c) || islower(c)) ? true : false;
320
}