GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libedit/tokenizer.c Lines: 0 173 0.0 %
Date: 2017-11-13 Branches: 0 84 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: tokenizer.c,v 1.21 2016/04/11 21:17:29 schwarze Exp $	*/
2
/*	$NetBSD: tokenizer.c,v 1.28 2016/04/11 18:56:31 christos Exp $	*/
3
4
/*-
5
 * Copyright (c) 1992, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * This code is derived from software contributed to Berkeley by
9
 * Christos Zoulas of Cornell University.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 */
35
36
#include "config.h"
37
38
/* We build this file twice, once as NARROW, once as WIDE. */
39
/*
40
 * tokenize.c: Bourne shell like tokenizer
41
 */
42
#include <stdlib.h>
43
#include <string.h>
44
45
#include "histedit.h"
46
47
typedef enum {
48
	Q_none, Q_single, Q_double, Q_one, Q_doubleone
49
} quote_t;
50
51
#define	TOK_KEEP	1
52
#define	TOK_EAT		2
53
54
#define	WINCR		20
55
#define	AINCR		10
56
57
#define	IFS		STR("\t \n")
58
59
#ifdef NARROWCHAR
60
#define	Char			char
61
#define	FUN(prefix, rest)	prefix ## _ ## rest
62
#define	TYPE(type)		type
63
#define	STR(x)			x
64
#define	Strchr(s, c)		strchr(s, c)
65
#define	tok_strdup(s)		strdup(s)
66
#else
67
#define	Char			wchar_t
68
#define	FUN(prefix, rest)	prefix ## _w ## rest
69
#define	TYPE(type)		type ## W
70
#define	STR(x)			L ## x
71
#define	Strchr(s, c)		wcschr(s, c)
72
#define	tok_strdup(s)		wcsdup(s)
73
#endif
74
75
struct TYPE(tokenizer) {
76
	Char	*ifs;		/* In field separator			 */
77
	int	 argc, amax;	/* Current and maximum number of args	 */
78
	Char   **argv;		/* Argument list			 */
79
	Char	*wptr, *wmax;	/* Space and limit on the word buffer	 */
80
	Char	*wstart;	/* Beginning of next word		 */
81
	Char	*wspace;	/* Space of word buffer			 */
82
	quote_t	 quote;		/* Quoting state			 */
83
	int	 flags;		/* flags;				 */
84
};
85
86
87
static void FUN(tok,finish)(TYPE(Tokenizer) *);
88
89
90
/* FUN(tok,finish)():
91
 *	Finish a word in the tokenizer.
92
 */
93
static void
94
FUN(tok,finish)(TYPE(Tokenizer) *tok)
95
{
96
97
	*tok->wptr = '\0';
98
	if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) {
99
		tok->argv[tok->argc++] = tok->wstart;
100
		tok->argv[tok->argc] = NULL;
101
		tok->wstart = ++tok->wptr;
102
	}
103
	tok->flags &= ~TOK_KEEP;
104
}
105
106
107
/* FUN(tok,init)():
108
 *	Initialize the tokenizer
109
 */
110
TYPE(Tokenizer) *
111
FUN(tok,init)(const Char *ifs)
112
{
113
	TYPE(Tokenizer) *tok = malloc(sizeof(TYPE(Tokenizer)));
114
115
	if (tok == NULL)
116
		return NULL;
117
	tok->ifs = tok_strdup(ifs ? ifs : IFS);
118
	if (tok->ifs == NULL) {
119
		free(tok);
120
		return NULL;
121
	}
122
	tok->argc = 0;
123
	tok->amax = AINCR;
124
	tok->argv = reallocarray(NULL, tok->amax, sizeof(*tok->argv));
125
	if (tok->argv == NULL) {
126
		free(tok->ifs);
127
		free(tok);
128
		return NULL;
129
	}
130
	tok->argv[0] = NULL;
131
	tok->wspace = reallocarray(NULL, WINCR, sizeof(*tok->wspace));
132
	if (tok->wspace == NULL) {
133
		free(tok->argv);
134
		free(tok->ifs);
135
		free(tok);
136
		return NULL;
137
	}
138
	tok->wmax = tok->wspace + WINCR;
139
	tok->wstart = tok->wspace;
140
	tok->wptr = tok->wspace;
141
	tok->flags = 0;
142
	tok->quote = Q_none;
143
144
	return tok;
145
}
146
147
148
/* FUN(tok,reset)():
149
 *	Reset the tokenizer
150
 */
151
void
152
FUN(tok,reset)(TYPE(Tokenizer) *tok)
153
{
154
155
	tok->argc = 0;
156
	tok->wstart = tok->wspace;
157
	tok->wptr = tok->wspace;
158
	tok->flags = 0;
159
	tok->quote = Q_none;
160
}
161
162
163
/* FUN(tok,end)():
164
 *	Clean up
165
 */
166
void
167
FUN(tok,end)(TYPE(Tokenizer) *tok)
168
{
169
170
	free(tok->ifs);
171
	free(tok->wspace);
172
	free(tok->argv);
173
	free(tok);
174
}
175
176
177
178
/* FUN(tok,line)():
179
 *	Bourne shell (sh(1)) like tokenizing
180
 *	Arguments:
181
 *		tok	current tokenizer state (setup with FUN(tok,init)())
182
 *		line	line to parse
183
 *	Returns:
184
 *		-1	Internal error
185
 *		 3	Quoted return
186
 *		 2	Unmatched double quote
187
 *		 1	Unmatched single quote
188
 *		 0	Ok
189
 *	Modifies (if return value is 0):
190
 *		argc	number of arguments
191
 *		argv	argument array
192
 *		cursorc	if !NULL, argv element containing cursor
193
 *		cursorv	if !NULL, offset in argv[cursorc] of cursor
194
 */
195
int
196
FUN(tok,line)(TYPE(Tokenizer) *tok, const TYPE(LineInfo) *line,
197
    int *argc, const Char ***argv, int *cursorc, int *cursoro)
198
{
199
	const Char *ptr;
200
	int cc, co;
201
202
	cc = co = -1;
203
	ptr = line->buffer;
204
	for (ptr = line->buffer; ;ptr++) {
205
		if (ptr >= line->lastchar)
206
			ptr = STR("");
207
		if (ptr == line->cursor) {
208
			cc = tok->argc;
209
			co = (int)(tok->wptr - tok->wstart);
210
		}
211
		switch (*ptr) {
212
		case '\'':
213
			tok->flags |= TOK_KEEP;
214
			tok->flags &= ~TOK_EAT;
215
			switch (tok->quote) {
216
			case Q_none:
217
				tok->quote = Q_single;	/* Enter single quote
218
							 * mode */
219
				break;
220
221
			case Q_single:	/* Exit single quote mode */
222
				tok->quote = Q_none;
223
				break;
224
225
			case Q_one:	/* Quote this ' */
226
				tok->quote = Q_none;
227
				*tok->wptr++ = *ptr;
228
				break;
229
230
			case Q_double:	/* Stay in double quote mode */
231
				*tok->wptr++ = *ptr;
232
				break;
233
234
			case Q_doubleone:	/* Quote this ' */
235
				tok->quote = Q_double;
236
				*tok->wptr++ = *ptr;
237
				break;
238
239
			default:
240
				return -1;
241
			}
242
			break;
243
244
		case '"':
245
			tok->flags &= ~TOK_EAT;
246
			tok->flags |= TOK_KEEP;
247
			switch (tok->quote) {
248
			case Q_none:	/* Enter double quote mode */
249
				tok->quote = Q_double;
250
				break;
251
252
			case Q_double:	/* Exit double quote mode */
253
				tok->quote = Q_none;
254
				break;
255
256
			case Q_one:	/* Quote this " */
257
				tok->quote = Q_none;
258
				*tok->wptr++ = *ptr;
259
				break;
260
261
			case Q_single:	/* Stay in single quote mode */
262
				*tok->wptr++ = *ptr;
263
				break;
264
265
			case Q_doubleone:	/* Quote this " */
266
				tok->quote = Q_double;
267
				*tok->wptr++ = *ptr;
268
				break;
269
270
			default:
271
				return -1;
272
			}
273
			break;
274
275
		case '\\':
276
			tok->flags |= TOK_KEEP;
277
			tok->flags &= ~TOK_EAT;
278
			switch (tok->quote) {
279
			case Q_none:	/* Quote next character */
280
				tok->quote = Q_one;
281
				break;
282
283
			case Q_double:	/* Quote next character */
284
				tok->quote = Q_doubleone;
285
				break;
286
287
			case Q_one:	/* Quote this, restore state */
288
				*tok->wptr++ = *ptr;
289
				tok->quote = Q_none;
290
				break;
291
292
			case Q_single:	/* Stay in single quote mode */
293
				*tok->wptr++ = *ptr;
294
				break;
295
296
			case Q_doubleone:	/* Quote this \ */
297
				tok->quote = Q_double;
298
				*tok->wptr++ = *ptr;
299
				break;
300
301
			default:
302
				return -1;
303
			}
304
			break;
305
306
		case '\n':
307
			tok->flags &= ~TOK_EAT;
308
			switch (tok->quote) {
309
			case Q_none:
310
				goto tok_line_outok;
311
312
			case Q_single:
313
			case Q_double:
314
				*tok->wptr++ = *ptr;	/* Add the return */
315
				break;
316
317
			case Q_doubleone:   /* Back to double, eat the '\n' */
318
				tok->flags |= TOK_EAT;
319
				tok->quote = Q_double;
320
				break;
321
322
			case Q_one:	/* No quote, more eat the '\n' */
323
				tok->flags |= TOK_EAT;
324
				tok->quote = Q_none;
325
				break;
326
327
			default:
328
				return 0;
329
			}
330
			break;
331
332
		case '\0':
333
			switch (tok->quote) {
334
			case Q_none:
335
				/* Finish word and return */
336
				if (tok->flags & TOK_EAT) {
337
					tok->flags &= ~TOK_EAT;
338
					return 3;
339
				}
340
				goto tok_line_outok;
341
342
			case Q_single:
343
				return 1;
344
345
			case Q_double:
346
				return 2;
347
348
			case Q_doubleone:
349
				tok->quote = Q_double;
350
				*tok->wptr++ = *ptr;
351
				break;
352
353
			case Q_one:
354
				tok->quote = Q_none;
355
				*tok->wptr++ = *ptr;
356
				break;
357
358
			default:
359
				return -1;
360
			}
361
			break;
362
363
		default:
364
			tok->flags &= ~TOK_EAT;
365
			switch (tok->quote) {
366
			case Q_none:
367
				if (Strchr(tok->ifs, *ptr) != NULL)
368
					FUN(tok,finish)(tok);
369
				else
370
					*tok->wptr++ = *ptr;
371
				break;
372
373
			case Q_single:
374
			case Q_double:
375
				*tok->wptr++ = *ptr;
376
				break;
377
378
379
			case Q_doubleone:
380
				*tok->wptr++ = '\\';
381
				tok->quote = Q_double;
382
				*tok->wptr++ = *ptr;
383
				break;
384
385
			case Q_one:
386
				tok->quote = Q_none;
387
				*tok->wptr++ = *ptr;
388
				break;
389
390
			default:
391
				return -1;
392
393
			}
394
			break;
395
		}
396
397
		if (tok->wptr >= tok->wmax - 4) {
398
			size_t size = tok->wmax - tok->wspace + WINCR;
399
			Char *s = reallocarray(tok->wspace, size, sizeof(*s));
400
			if (s == NULL)
401
				return -1;
402
403
			if (s != tok->wspace) {
404
				int i;
405
				for (i = 0; i < tok->argc; i++) {
406
				    tok->argv[i] =
407
					(tok->argv[i] - tok->wspace) + s;
408
				}
409
				tok->wptr = (tok->wptr - tok->wspace) + s;
410
				tok->wstart = (tok->wstart - tok->wspace) + s;
411
				tok->wspace = s;
412
			}
413
			tok->wmax = s + size;
414
		}
415
		if (tok->argc >= tok->amax - 4) {
416
			Char **p;
417
			tok->amax += AINCR;
418
			p = reallocarray(tok->argv, tok->amax, sizeof(*p));
419
			if (p == NULL) {
420
				tok->amax -= AINCR;
421
				return -1;
422
			}
423
			tok->argv = p;
424
		}
425
	}
426
 tok_line_outok:
427
	if (cc == -1 && co == -1) {
428
		cc = tok->argc;
429
		co = (int)(tok->wptr - tok->wstart);
430
	}
431
	if (cursorc != NULL)
432
		*cursorc = cc;
433
	if (cursoro != NULL)
434
		*cursoro = co;
435
	FUN(tok,finish)(tok);
436
	*argv = (const Char **)tok->argv;
437
	*argc = tok->argc;
438
	return 0;
439
}
440
441
/* FUN(tok,str)():
442
 *	Simpler version of tok_line, taking a NUL terminated line
443
 *	and splitting into words, ignoring cursor state.
444
 */
445
int
446
FUN(tok,str)(TYPE(Tokenizer) *tok, const Char *line, int *argc,
447
    const Char ***argv)
448
{
449
	TYPE(LineInfo) li;
450
451
	memset(&li, 0, sizeof(li));
452
	li.buffer = line;
453
	li.cursor = li.lastchar = Strchr(line, '\0');
454
	return FUN(tok,line)(tok, &li, argc, argv, NULL, NULL);
455
}