GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/lex.c Lines: 227 302 75.2 %
Date: 2017-11-13 Branches: 199 348 57.2 %

Line Branch Exec Source
1
/*	$OpenBSD: lex.c,v 1.12 2011/09/28 19:27:18 millert Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <ctype.h>
30
#include "awk.h"
31
#include "ytab.h"
32
33
extern YYSTYPE	yylval;
34
extern int	infunc;
35
36
int	lineno	= 1;
37
int	bracecnt = 0;
38
int	brackcnt  = 0;
39
int	parencnt = 0;
40
41
typedef struct Keyword {
42
	const char *word;
43
	int	sub;
44
	int	type;
45
} Keyword;
46
47
Keyword keywords[] ={	/* keep sorted: binary searched */
48
	{ "BEGIN",	XBEGIN,		XBEGIN },
49
	{ "END",	XEND,		XEND },
50
	{ "NF",		VARNF,		VARNF },
51
	{ "and",	FAND,		BLTIN },
52
	{ "atan2",	FATAN,		BLTIN },
53
	{ "break",	BREAK,		BREAK },
54
	{ "close",	CLOSE,		CLOSE },
55
	{ "compl",	FCOMPL,		BLTIN },
56
	{ "continue",	CONTINUE,	CONTINUE },
57
	{ "cos",	FCOS,		BLTIN },
58
	{ "delete",	DELETE,		DELETE },
59
	{ "do",		DO,		DO },
60
	{ "else",	ELSE,		ELSE },
61
	{ "exit",	EXIT,		EXIT },
62
	{ "exp",	FEXP,		BLTIN },
63
	{ "fflush",	FFLUSH,		BLTIN },
64
	{ "for",	FOR,		FOR },
65
	{ "func",	FUNC,		FUNC },
66
	{ "function",	FUNC,		FUNC },
67
	{ "getline",	GETLINE,	GETLINE },
68
	{ "gsub",	GSUB,		GSUB },
69
	{ "if",		IF,		IF },
70
	{ "in",		IN,		IN },
71
	{ "index",	INDEX,		INDEX },
72
	{ "int",	FINT,		BLTIN },
73
	{ "length",	FLENGTH,	BLTIN },
74
	{ "log",	FLOG,		BLTIN },
75
	{ "lshift",	FLSHIFT,	BLTIN },
76
	{ "match",	MATCHFCN,	MATCHFCN },
77
	{ "next",	NEXT,		NEXT },
78
	{ "nextfile",	NEXTFILE,	NEXTFILE },
79
	{ "or",		FFOR,		BLTIN },
80
	{ "print",	PRINT,		PRINT },
81
	{ "printf",	PRINTF,		PRINTF },
82
	{ "rand",	FRAND,		BLTIN },
83
	{ "return",	RETURN,		RETURN },
84
	{ "rshift",	FRSHIFT,	BLTIN },
85
	{ "sin",	FSIN,		BLTIN },
86
	{ "split",	SPLIT,		SPLIT },
87
	{ "sprintf",	SPRINTF,	SPRINTF },
88
	{ "sqrt",	FSQRT,		BLTIN },
89
	{ "srand",	FSRAND,		BLTIN },
90
	{ "sub",	SUB,		SUB },
91
	{ "substr",	SUBSTR,		SUBSTR },
92
	{ "system",	FSYSTEM,	BLTIN },
93
	{ "tolower",	FTOLOWER,	BLTIN },
94
	{ "toupper",	FTOUPPER,	BLTIN },
95
	{ "while",	WHILE,		WHILE },
96
	{ "xor",	FXOR,		BLTIN },
97
};
98
99
#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
100
101
int peek(void);
102
int gettok(char **, int *);
103
int binsearch(char *, Keyword *, int);
104
105
int peek(void)
106
{
107
9058
	int c = input();
108
4529
	unput(c);
109
4529
	return c;
110
}
111
112
int gettok(char **pbuf, int *psz)	/* get next input token */
113
{
114
	int c, retc;
115
73040
	char *buf = *pbuf;
116
36520
	int sz = *psz;
117
36520
	char *bp = buf;
118
119
36520
	c = input();
120
36520
	if (c == 0)
121
487
		return 0;
122
36033
	buf[0] = c;
123
36033
	buf[1] = 0;
124
36033
	if (!isalnum(c) && c != '.' && c != '_')
125
28853
		return c;
126
127
7180
	*bp++ = c;
128
7180
	if (isalpha(c) || c == '_') {	/* it's a varname */
129
44776
		for ( ; (c = input()) != 0; ) {
130
22388
			if (bp-buf >= sz)
131
481
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
132
					FATAL( "out of space for name %.10s...", buf );
133
22388
			if (isalnum(c) || c == '_')
134
17407
				*bp++ = c;
135
			else {
136
4981
				*bp = 0;
137
4981
				unput(c);
138
4981
				break;
139
			}
140
		}
141
4981
		*bp = 0;
142
		retc = 'a';	/* alphanumeric */
143
4981
	} else {	/* maybe it's a number, but could be . */
144
2199
		char *rem;
145
		/* read input until can't be a number */
146
5704
		for ( ; (c = input()) != 0; ) {
147
2852
			if (bp-buf >= sz)
148
6
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
149
					FATAL( "out of space for number %.10s...", buf );
150
11408
			if (isdigit(c) || c == 'e' || c == 'E'
151
8556
			  || c == '.' || c == '+' || c == '-')
152
653
				*bp++ = c;
153
			else {
154
2199
				unput(c);
155
2199
				break;
156
			}
157
		}
158
2199
		*bp = 0;
159
2199
		strtod(buf, &rem);	/* parse the number */
160
2199
		if (rem == buf) {	/* it wasn't a valid number at all */
161
			buf[1] = 0;	/* return one character as token */
162
			retc = buf[0];	/* character is its own type */
163
			unputstr(rem+1); /* put rest back for later */
164
		} else {	/* some prefix was a number */
165
2199
			unputstr(rem);	/* put rest back for later */
166
2199
			rem[0] = 0;	/* truncate buf after number part */
167
			retc = '0';	/* type is number */
168
		}
169
2199
	}
170
7180
	*pbuf = buf;
171
7180
	*psz = sz;
172
7180
	return retc;
173
36520
}
174
175
int	word(char *);
176
int	string(void);
177
int	regexpr(void);
178
int	sc	= 0;	/* 1 => return a } right now */
179
int	reg	= 0;	/* 1 => return a REGEXPR now */
180
181
int yylex(void)
182
{
183
	int c;
184
	static char *buf = 0;
185
	static int bufsize = 5; /* BUG: setting this small causes core dump! */
186
187

44569
	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
188
		FATAL( "out of space in yylex" );
189
22041
	if (sc) {
190
938
		sc = 0;
191
1876
		RET('}');
192
	}
193
21103
	if (reg) {
194
80
		reg = 0;
195
80
		return regexpr();
196
	}
197
35820
	for (;;) {
198
35820
		c = gettok(&buf, &bufsize);
199
35820
		if (c == 0)
200
487
			return 0;
201
35333
		if (isalpha(c) || c == '_')
202
4927
			return word(buf);
203
30406
		if (isdigit(c)) {
204
1553
			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
205
			/* should this also have STR set? */
206
3106
			RET(NUMBER);
207
		}
208
209
29241
		yylval.i = c;
210







29241
		switch (c) {
211
		case '\n':	/* {EOL} */
212
6916
			RET(NL);
213
		case '\r':	/* assume \n is coming */
214
		case ' ':	/* {WS}+ */
215
		case '\t':
216
			break;
217
		case '#':	/* #.* strip comments */
218
55276
			while ((c = input()) != '\n' && c != 0)
219
				;
220
388
			unput(c);
221
388
			break;
222
		case ';':
223
2558
			RET(';');
224
		case '\\':
225
			if (peek() == '\n') {
226
				input();
227
			} else if (peek() == '\r') {
228
				input(); input();	/* \n */
229
				lineno++;
230
			} else {
231
				RET(c);
232
			}
233
			break;
234
		case '&':
235
17
			if (peek() == '&') {
236
34
				input(); RET(AND);
237
			} else
238
				RET('&');
239
		case '|':
240
46
			if (peek() == '|') {
241
92
				input(); RET(BOR);
242
			} else
243
				RET('|');
244
		case '!':
245
110
			if (peek() == '=') {
246
212
				input(); yylval.i = NE; RET(NE);
247
4
			} else if (peek() == '~') {
248
4
				input(); yylval.i = NOTMATCH; RET(MATCHOP);
249
			} else
250
4
				RET(NOT);
251
		case '~':
252
12
			yylval.i = MATCH;
253
24
			RET(MATCHOP);
254
		case '<':
255
149
			if (peek() == '=') {
256
166
				input(); yylval.i = LE; RET(LE);
257
			} else {
258
132
				yylval.i = LT; RET(LT);
259
			}
260
		case '=':
261
987
			if (peek() == '=') {
262
446
				input(); yylval.i = EQ; RET(EQ);
263
			} else {
264
1528
				yylval.i = ASSIGN; RET(ASGNOP);
265
			}
266
		case '>':
267
79
			if (peek() == '=') {
268
30
				input(); yylval.i = GE; RET(GE);
269
64
			} else if (peek() == '>') {
270
				input(); yylval.i = APPEND; RET(APPEND);
271
			} else {
272
128
				yylval.i = GT; RET(GT);
273
			}
274
		case '+':
275
297
			if (peek() == '+') {
276
314
				input(); yylval.i = INCR; RET(INCR);
277
140
			} else if (peek() == '=') {
278
80
				input(); yylval.i = ADDEQ; RET(ASGNOP);
279
			} else
280
200
				RET('+');
281
		case '-':
282
55
			if (peek() == '-') {
283
26
				input(); yylval.i = DECR; RET(DECR);
284
42
			} else if (peek() == '=') {
285
				input(); yylval.i = SUBEQ; RET(ASGNOP);
286
			} else
287
84
				RET('-');
288
		case '*':
289
18
			if (peek() == '=') {	/* *= */
290
				input(); yylval.i = MULTEQ; RET(ASGNOP);
291
18
			} else if (peek() == '*') {	/* ** or **= */
292
				input();	/* eat 2nd * */
293
				if (peek() == '=') {
294
					input(); yylval.i = POWEQ; RET(ASGNOP);
295
				} else {
296
					RET(POWER);
297
				}
298
			} else
299
36
				RET('*');
300
		case '/':
301
320
			RET('/');
302
		case '%':
303
68
			if (peek() == '=') {
304
				input(); yylval.i = MODEQ; RET(ASGNOP);
305
			} else
306
136
				RET('%');
307
		case '^':
308
			if (peek() == '=') {
309
				input(); yylval.i = POWEQ; RET(ASGNOP);
310
			} else
311
				RET(POWER);
312
313
		case '$':
314
			/* BUG: awkward, if not wrong */
315
700
			c = gettok(&buf, &bufsize);
316
700
			if (isalpha(c)) {
317
54
				if (strcmp(buf, "NF") == 0) {	/* very special */
318
					unputstr("(NF)");
319
					RET(INDIRECT);
320
				}
321
54
				c = peek();
322

108
				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
323
					unputstr(buf);
324
					RET(INDIRECT);
325
				}
326
54
				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
327
108
				RET(IVAR);
328
646
			} else if (c == 0) {	/*  */
329
				SYNTAX( "unexpected end of input after $" );
330
				RET(';');
331
			} else {
332
646
				unputstr(buf);
333
1292
				RET(INDIRECT);
334
			}
335
336
		case '}':
337
938
			if (--bracecnt < 0)
338
				SYNTAX( "extra }" );
339
938
			sc = 1;
340
1876
			RET(';');
341
		case ']':
342
24
			if (--brackcnt < 0)
343
				SYNTAX( "extra ]" );
344
48
			RET(']');
345
		case ')':
346
1105
			if (--parencnt < 0)
347
				SYNTAX( "extra )" );
348
2210
			RET(')');
349
		case '{':
350
938
			bracecnt++;
351
1876
			RET('{');
352
		case '[':
353
24
			brackcnt++;
354
48
			RET('[');
355
		case '(':
356
1105
			parencnt++;
357
2210
			RET('(');
358
359
		case '"':
360
1788
			return string();	/* BUG: should be like tran.c ? */
361
362
		default:
363
1398
			RET(c);
364
		}
365
	}
366
22041
}
367
368
int string(void)
369
{
370
3576
	int c, n;
371
1788
	char *s, *bp;
372
	static char *buf = 0;
373
	static int bufsz = 500;
374
375

2065
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
376
		FATAL("out of space for strings");
377
52532
	for (bp = buf; (c = input()) != '"'; ) {
378
24478
		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
379
			FATAL("out of space for string %.10s...", buf);
380

24478
		switch (c) {
381
		case '\n':
382
		case '\r':
383
		case 0:
384
			SYNTAX( "non-terminated string %.10s...", buf );
385
			lineno++;
386
			if (c == 0)	/* hopeless */
387
				FATAL( "giving up" );
388
			break;
389
		case '\\':
390
989
			c = input();
391




989
			switch (c) {
392
116
			case '"': *bp++ = '"'; break;
393
649
			case 'n': *bp++ = '\n'; break;
394
186
			case 't': *bp++ = '\t'; break;
395
			case 'f': *bp++ = '\f'; break;
396
			case 'r': *bp++ = '\r'; break;
397
			case 'b': *bp++ = '\b'; break;
398
			case 'v': *bp++ = '\v'; break;
399
			case 'a': *bp++ = '\007'; break;
400
38
			case '\\': *bp++ = '\\'; break;
401
402
			case '0': case '1': case '2': /* octal: \d \dd \ddd */
403
			case '3': case '4': case '5': case '6': case '7':
404
				n = c - '0';
405
				if ((c = peek()) >= '0' && c < '8') {
406
					n = 8 * n + input() - '0';
407
					if ((c = peek()) >= '0' && c < '8')
408
						n = 8 * n + input() - '0';
409
				}
410
				*bp++ = n;
411
				break;
412
413
			case 'x':	/* hex  \x0-9a-fA-F + */
414
			    {	char xbuf[100], *px;
415
				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
416
					if (isdigit(c)
417
					 || (c >= 'a' && c <= 'f')
418
					 || (c >= 'A' && c <= 'F'))
419
						*px++ = c;
420
					else
421
						break;
422
				}
423
				*px = 0;
424
				unput(c);
425
	  			sscanf(xbuf, "%x", (unsigned int *) &n);
426
				*bp++ = n;
427
				break;
428
			    }
429
430
			default:
431
				*bp++ = c;
432
				break;
433
			}
434
			break;
435
		default:
436
23489
			*bp++ = c;
437
23489
			break;
438
		}
439
	}
440
1788
	*bp = 0;
441
1788
	s = tostring(buf);
442
1788
	*bp++ = ' '; *bp++ = 0;
443
1788
	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
444
3576
	RET(STRING);
445
1788
}
446
447
448
int binsearch(char *w, Keyword *kp, int n)
449
{
450
	int cond, low, mid, high;
451
452
	low = 0;
453
9854
	high = n - 1;
454
59916
	while (low <= high) {
455
27577
		mid = (low + high) / 2;
456
27577
		if ((cond = strcmp(w, kp[mid].word)) < 0)
457
12341
			high = mid - 1;
458
15236
		else if (cond > 0)
459
12690
			low = mid + 1;
460
		else
461
2546
			return mid;
462
	}
463
2381
	return -1;
464
4927
}
465
466
int word(char *w)
467
{
468
	Keyword *kp;
469
	int c, n;
470
471
9854
	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
472
/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
473
4927
	kp = keywords + n;
474
4927
	if (n != -1) {	/* found in table */
475
2546
		yylval.i = kp->sub;
476

2546
		switch (kp->type) {	/* special handling */
477
		case BLTIN:
478
38
			if (kp->sub == FSYSTEM && safe)
479
				SYNTAX( "system is unsafe" );
480
76
			RET(kp->type);
481
		case FUNC:
482
24
			if (infunc)
483
				SYNTAX( "illegal nested function" );
484
48
			RET(kp->type);
485
		case RETURN:
486
12
			if (!infunc)
487
				SYNTAX( "return not in function" );
488
24
			RET(kp->type);
489
		case VARNF:
490
12
			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
491
24
			RET(VARNF);
492
		default:
493
4920
			RET(kp->type);
494
		}
495
	}
496
2381
	c = peek();	/* look for '(' */
497

2565
	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
498
48
		yylval.i = n;
499
96
		RET(ARG);
500
	} else {
501
2333
		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
502
2333
		if (c == '(') {
503
268
			RET(CALL);
504
		} else {
505
4398
			RET(VAR);
506
		}
507
	}
508
4927
}
509
510
void startreg(void)	/* next call to yylex will return a regular expression */
511
{
512
160
	reg = 1;
513
80
}
514
515
int regexpr(void)
516
{
517
	int c, openclass = 0;
518
	static char *buf = 0;
519
	static int bufsz = 500;
520
160
	char *bp;
521
522

142
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
523
		FATAL("out of space for rex expr");
524
80
	bp = buf;
525
1228
	for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) {
526
534
		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
527
			FATAL("out of space for reg expr %.10s...", buf);
528
534
		if (c == '\n') {
529
			SYNTAX( "newline in regular expression %.10s...", buf );
530
			unput('\n');
531
			break;
532
534
		} else if (c == '\\') {
533
18
			*bp++ = '\\';
534
18
			*bp++ = input();
535
18
		} else {
536
516
			if (c == '[')
537
6
				openclass = 1;
538
510
			else if (c == ']')
539
6
				openclass = 0;
540
516
			*bp++ = c;
541
		}
542
	}
543
80
	*bp = 0;
544
80
	if (c == 0)
545
		SYNTAX("non-terminated regular expression %.10s...", buf);
546
80
	yylval.s = tostring(buf);
547
80
	unput('/');
548
160
	RET(REGEXPR);
549
80
}
550
551
/* low-level lexical stuff, sort of inherited from lex */
552
553
char	ebuf[300];
554
char	*ep = ebuf;
555
char	yysbuf[100];	/* pushback buffer */
556
char	*yysptr = yysbuf;
557
FILE	*yyin = 0;
558
559
int input(void)	/* get next lexical input character */
560
{
561
	int c;
562
	extern char *lexprog;
563
564
245032
	if (yysptr > yysbuf)
565
12863
		c = (uschar)*--yysptr;
566
109653
	else if (lexprog != NULL) {	/* awk '...' */
567
34873
		if ((c = (uschar)*lexprog) != 0)
568
34402
			lexprog++;
569
	} else				/* awk -f ... */
570
74780
		c = pgetc();
571
122516
	if (c == '\n')
572
4057
		lineno++;
573
118459
	else if (c == EOF)
574
16
		c = 0;
575
122516
	if (ep >= ebuf + sizeof ebuf)
576
280
		ep = ebuf;
577
122516
	return *ep++ = c;
578
}
579
580
void unput(int c)	/* put lexical character back on input */
581
{
582
25726
	if (c == '\n')
583
599
		lineno--;
584
12863
	if (yysptr >= yysbuf + sizeof(yysbuf))
585
		FATAL("pushed back too much: %.20s...", yysbuf);
586
12863
	*yysptr++ = c;
587
12863
	if (--ep < ebuf)
588
		ep = ebuf + sizeof(ebuf) - 1;
589
12863
}
590
591
void unputstr(const char *s)	/* put a string back on input */
592
{
593
	int i;
594
595
9907
	for (i = strlen(s)-1; i >= 0; i--)
596
686
		unput(s[i]);
597
2845
}