GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/lex.c Lines: 228 302 75.5 %
Date: 2017-11-07 Branches: 200 348 57.5 %

Line Branch Exec Source
1
/*	$OpenBSD: lex.c,v 1.12 2011/09/28 19:27:18 millert Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <ctype.h>
30
#include "awk.h"
31
#include "ytab.h"
32
33
extern YYSTYPE	yylval;
34
extern int	infunc;
35
36
int	lineno	= 1;
37
int	bracecnt = 0;
38
int	brackcnt  = 0;
39
int	parencnt = 0;
40
41
typedef struct Keyword {
42
	const char *word;
43
	int	sub;
44
	int	type;
45
} Keyword;
46
47
Keyword keywords[] ={	/* keep sorted: binary searched */
48
	{ "BEGIN",	XBEGIN,		XBEGIN },
49
	{ "END",	XEND,		XEND },
50
	{ "NF",		VARNF,		VARNF },
51
	{ "and",	FAND,		BLTIN },
52
	{ "atan2",	FATAN,		BLTIN },
53
	{ "break",	BREAK,		BREAK },
54
	{ "close",	CLOSE,		CLOSE },
55
	{ "compl",	FCOMPL,		BLTIN },
56
	{ "continue",	CONTINUE,	CONTINUE },
57
	{ "cos",	FCOS,		BLTIN },
58
	{ "delete",	DELETE,		DELETE },
59
	{ "do",		DO,		DO },
60
	{ "else",	ELSE,		ELSE },
61
	{ "exit",	EXIT,		EXIT },
62
	{ "exp",	FEXP,		BLTIN },
63
	{ "fflush",	FFLUSH,		BLTIN },
64
	{ "for",	FOR,		FOR },
65
	{ "func",	FUNC,		FUNC },
66
	{ "function",	FUNC,		FUNC },
67
	{ "getline",	GETLINE,	GETLINE },
68
	{ "gsub",	GSUB,		GSUB },
69
	{ "if",		IF,		IF },
70
	{ "in",		IN,		IN },
71
	{ "index",	INDEX,		INDEX },
72
	{ "int",	FINT,		BLTIN },
73
	{ "length",	FLENGTH,	BLTIN },
74
	{ "log",	FLOG,		BLTIN },
75
	{ "lshift",	FLSHIFT,	BLTIN },
76
	{ "match",	MATCHFCN,	MATCHFCN },
77
	{ "next",	NEXT,		NEXT },
78
	{ "nextfile",	NEXTFILE,	NEXTFILE },
79
	{ "or",		FFOR,		BLTIN },
80
	{ "print",	PRINT,		PRINT },
81
	{ "printf",	PRINTF,		PRINTF },
82
	{ "rand",	FRAND,		BLTIN },
83
	{ "return",	RETURN,		RETURN },
84
	{ "rshift",	FRSHIFT,	BLTIN },
85
	{ "sin",	FSIN,		BLTIN },
86
	{ "split",	SPLIT,		SPLIT },
87
	{ "sprintf",	SPRINTF,	SPRINTF },
88
	{ "sqrt",	FSQRT,		BLTIN },
89
	{ "srand",	FSRAND,		BLTIN },
90
	{ "sub",	SUB,		SUB },
91
	{ "substr",	SUBSTR,		SUBSTR },
92
	{ "system",	FSYSTEM,	BLTIN },
93
	{ "tolower",	FTOLOWER,	BLTIN },
94
	{ "toupper",	FTOUPPER,	BLTIN },
95
	{ "while",	WHILE,		WHILE },
96
	{ "xor",	FXOR,		BLTIN },
97
};
98
99
#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
100
101
int peek(void);
102
int gettok(char **, int *);
103
int binsearch(char *, Keyword *, int);
104
105
int peek(void)
106
{
107
17946
	int c = input();
108
8973
	unput(c);
109
8973
	return c;
110
}
111
112
int gettok(char **pbuf, int *psz)	/* get next input token */
113
{
114
	int c, retc;
115
134050
	char *buf = *pbuf;
116
67025
	int sz = *psz;
117
67025
	char *bp = buf;
118
119
67025
	c = input();
120
67025
	if (c == 0)
121
981
		return 0;
122
66044
	buf[0] = c;
123
66044
	buf[1] = 0;
124
66044
	if (!isalnum(c) && c != '.' && c != '_')
125
52373
		return c;
126
127
13671
	*bp++ = c;
128
13671
	if (isalpha(c) || c == '_') {	/* it's a varname */
129
61205
		for ( ; (c = input()) != 0; ) {
130
35141
			if (bp-buf >= sz)
131
963
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
132
					FATAL( "out of space for name %.10s...", buf );
133
35141
			if (isalnum(c) || c == '_')
134
26064
				*bp++ = c;
135
			else {
136
9077
				*bp = 0;
137
9077
				unput(c);
138
9077
				break;
139
			}
140
		}
141
9077
		*bp = 0;
142
		retc = 'a';	/* alphanumeric */
143
9077
	} else {	/* maybe it's a number, but could be . */
144
4594
		char *rem;
145
		/* read input until can't be a number */
146
12532
		for ( ; (c = input()) != 0; ) {
147
6266
			if (bp-buf >= sz)
148
18
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
149
					FATAL( "out of space for number %.10s...", buf );
150
25064
			if (isdigit(c) || c == 'e' || c == 'E'
151
18798
			  || c == '.' || c == '+' || c == '-')
152
1672
				*bp++ = c;
153
			else {
154
4594
				unput(c);
155
4594
				break;
156
			}
157
		}
158
4594
		*bp = 0;
159
4594
		strtod(buf, &rem);	/* parse the number */
160
4594
		if (rem == buf) {	/* it wasn't a valid number at all */
161
			buf[1] = 0;	/* return one character as token */
162
			retc = buf[0];	/* character is its own type */
163
			unputstr(rem+1); /* put rest back for later */
164
		} else {	/* some prefix was a number */
165
4594
			unputstr(rem);	/* put rest back for later */
166
4594
			rem[0] = 0;	/* truncate buf after number part */
167
			retc = '0';	/* type is number */
168
		}
169
4594
	}
170
13671
	*pbuf = buf;
171
13671
	*psz = sz;
172
13671
	return retc;
173
67025
}
174
175
int	word(char *);
176
int	string(void);
177
int	regexpr(void);
178
int	sc	= 0;	/* 1 => return a } right now */
179
int	reg	= 0;	/* 1 => return a REGEXPR now */
180
181
int yylex(void)
182
{
183
	int c;
184
	static char *buf = 0;
185
	static int bufsize = 5; /* BUG: setting this small causes core dump! */
186
187

85335
	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
188
		FATAL( "out of space in yylex" );
189
42177
	if (sc) {
190
1746
		sc = 0;
191
3492
		RET('}');
192
	}
193
40431
	if (reg) {
194
114
		reg = 0;
195
114
		return regexpr();
196
	}
197
	for (;;) {
198
65683
		c = gettok(&buf, &bufsize);
199
65683
		if (c == 0)
200
981
			return 0;
201
64702
		if (isalpha(c) || c == '_')
202
9021
			return word(buf);
203
55681
		if (isdigit(c)) {
204
3310
			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
205
			/* should this also have STR set? */
206
6620
			RET(NUMBER);
207
		}
208
209
124179
		yylval.i = c;
210







124179
		switch (c) {
211
		case '\n':	/* {EOL} */
212
11604
			RET(NL);
213
		case '\r':	/* assume \n is coming */
214
		case ' ':	/* {WS}+ */
215
		case '\t':
216
			break;
217
		case '#':	/* #.* strip comments */
218
32624
			while ((c = input()) != '\n' && c != 0)
219
				;
220
465
			unput(c);
221
465
			break;
222
		case ';':
223
5198
			RET(';');
224
		case '\\':
225
			if (peek() == '\n') {
226
				input();
227
			} else if (peek() == '\r') {
228
				input(); input();	/* \n */
229
				lineno++;
230
			} else {
231
				RET(c);
232
			}
233
			break;
234
		case '&':
235
40
			if (peek() == '&') {
236
80
				input(); RET(AND);
237
			} else
238
				RET('&');
239
		case '|':
240
64
			if (peek() == '|') {
241
128
				input(); RET(BOR);
242
			} else
243
				RET('|');
244
		case '!':
245
189
			if (peek() == '=') {
246
366
				input(); yylval.i = NE; RET(NE);
247
6
			} else if (peek() == '~') {
248
8
				input(); yylval.i = NOTMATCH; RET(MATCHOP);
249
			} else
250
4
				RET(NOT);
251
		case '~':
252
12
			yylval.i = MATCH;
253
24
			RET(MATCHOP);
254
		case '<':
255
350
			if (peek() == '=') {
256
412
				input(); yylval.i = LE; RET(LE);
257
			} else {
258
288
				yylval.i = LT; RET(LT);
259
			}
260
		case '=':
261
2001
			if (peek() == '=') {
262
810
				input(); yylval.i = EQ; RET(EQ);
263
			} else {
264
3192
				yylval.i = ASSIGN; RET(ASGNOP);
265
			}
266
		case '>':
267
161
			if (peek() == '=') {
268
54
				input(); yylval.i = GE; RET(GE);
269
134
			} else if (peek() == '>') {
270
				input(); yylval.i = APPEND; RET(APPEND);
271
			} else {
272
268
				yylval.i = GT; RET(GT);
273
			}
274
		case '+':
275
649
			if (peek() == '+') {
276
756
				input(); yylval.i = INCR; RET(INCR);
277
271
			} else if (peek() == '=') {
278
212
				input(); yylval.i = ADDEQ; RET(ASGNOP);
279
			} else
280
330
				RET('+');
281
		case '-':
282
112
			if (peek() == '-') {
283
72
				input(); yylval.i = DECR; RET(DECR);
284
76
			} else if (peek() == '=') {
285
				input(); yylval.i = SUBEQ; RET(ASGNOP);
286
			} else
287
152
				RET('-');
288
		case '*':
289
54
			if (peek() == '=') {	/* *= */
290
				input(); yylval.i = MULTEQ; RET(ASGNOP);
291
54
			} else if (peek() == '*') {	/* ** or **= */
292
				input();	/* eat 2nd * */
293
				if (peek() == '=') {
294
					input(); yylval.i = POWEQ; RET(ASGNOP);
295
				} else {
296
					RET(POWER);
297
				}
298
			} else
299
108
				RET('*');
300
		case '/':
301
456
			RET('/');
302
		case '%':
303
178
			if (peek() == '=') {
304
				input(); yylval.i = MODEQ; RET(ASGNOP);
305
			} else
306
356
				RET('%');
307
		case '^':
308
			if (peek() == '=') {
309
				input(); yylval.i = POWEQ; RET(ASGNOP);
310
			} else
311
				RET(POWER);
312
313
		case '$':
314
			/* BUG: awkward, if not wrong */
315
1342
			c = gettok(&buf, &bufsize);
316
1342
			if (isalpha(c)) {
317
56
				if (strcmp(buf, "NF") == 0) {	/* very special */
318
					unputstr("(NF)");
319
					RET(INDIRECT);
320
				}
321
56
				c = peek();
322

112
				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
323
					unputstr(buf);
324
					RET(INDIRECT);
325
				}
326
56
				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
327
112
				RET(IVAR);
328
1286
			} else if (c == 0) {	/*  */
329
				SYNTAX( "unexpected end of input after $" );
330
				RET(';');
331
			} else {
332
1286
				unputstr(buf);
333
2572
				RET(INDIRECT);
334
			}
335
336
		case '}':
337
1746
			if (--bracecnt < 0)
338
				SYNTAX( "extra }" );
339
1746
			sc = 1;
340
3492
			RET(';');
341
		case ']':
342
291
			if (--brackcnt < 0)
343
				SYNTAX( "extra ]" );
344
582
			RET(']');
345
		case ')':
346
2219
			if (--parencnt < 0)
347
				SYNTAX( "extra )" );
348
4438
			RET(')');
349
		case '{':
350
1746
			bracecnt++;
351
3492
			RET('{');
352
		case '[':
353
291
			brackcnt++;
354
582
			RET('[');
355
		case '(':
356
2219
			parencnt++;
357
4438
			RET('(');
358
359
		case '"':
360
3196
			return string();	/* BUG: should be like tran.c ? */
361
362
		default:
363
3032
			RET(c);
364
		}
365
	}
366
42177
}
367
368
int string(void)
369
{
370
6392
	int c, n;
371
3196
	char *s, *bp;
372
	static char *buf = 0;
373
	static int bufsz = 500;
374
375

3825
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
376
		FATAL("out of space for strings");
377
45964
	for (bp = buf; (c = input()) != '"'; ) {
378
39572
		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
379
			FATAL("out of space for string %.10s...", buf);
380

39572
		switch (c) {
381
		case '\n':
382
		case '\r':
383
		case 0:
384
			SYNTAX( "non-terminated string %.10s...", buf );
385
			lineno++;
386
			if (c == 0)	/* hopeless */
387
				FATAL( "giving up" );
388
			break;
389
		case '\\':
390
2050
			c = input();
391




2050
			switch (c) {
392
176
			case '"': *bp++ = '"'; break;
393
1610
			case 'n': *bp++ = '\n'; break;
394
215
			case 't': *bp++ = '\t'; break;
395
			case 'f': *bp++ = '\f'; break;
396
5
			case 'r': *bp++ = '\r'; break;
397
			case 'b': *bp++ = '\b'; break;
398
			case 'v': *bp++ = '\v'; break;
399
			case 'a': *bp++ = '\007'; break;
400
44
			case '\\': *bp++ = '\\'; break;
401
402
			case '0': case '1': case '2': /* octal: \d \dd \ddd */
403
			case '3': case '4': case '5': case '6': case '7':
404
				n = c - '0';
405
				if ((c = peek()) >= '0' && c < '8') {
406
					n = 8 * n + input() - '0';
407
					if ((c = peek()) >= '0' && c < '8')
408
						n = 8 * n + input() - '0';
409
				}
410
				*bp++ = n;
411
				break;
412
413
			case 'x':	/* hex  \x0-9a-fA-F + */
414
			    {	char xbuf[100], *px;
415
				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
416
					if (isdigit(c)
417
					 || (c >= 'a' && c <= 'f')
418
					 || (c >= 'A' && c <= 'F'))
419
						*px++ = c;
420
					else
421
						break;
422
				}
423
				*px = 0;
424
				unput(c);
425
	  			sscanf(xbuf, "%x", (unsigned int *) &n);
426
				*bp++ = n;
427
				break;
428
			    }
429
430
			default:
431
				*bp++ = c;
432
				break;
433
			}
434
			break;
435
		default:
436
37522
			*bp++ = c;
437
37522
			break;
438
		}
439
	}
440
3196
	*bp = 0;
441
3196
	s = tostring(buf);
442
3196
	*bp++ = ' '; *bp++ = 0;
443
3196
	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
444
6392
	RET(STRING);
445
3196
}
446
447
448
int binsearch(char *w, Keyword *kp, int n)
449
{
450
	int cond, low, mid, high;
451
452
	low = 0;
453
18042
	high = n - 1;
454
63853
	while (low <= high) {
455
50254
		mid = (low + high) / 2;
456
50254
		if ((cond = strcmp(w, kp[mid].word)) < 0)
457
23192
			high = mid - 1;
458
27062
		else if (cond > 0)
459
22619
			low = mid + 1;
460
		else
461
4443
			return mid;
462
	}
463
4578
	return -1;
464
9021
}
465
466
int word(char *w)
467
{
468
	Keyword *kp;
469
	int c, n;
470
471
18042
	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
472
/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
473
9021
	kp = keywords + n;
474
9021
	if (n != -1) {	/* found in table */
475
4443
		yylval.i = kp->sub;
476

4443
		switch (kp->type) {	/* special handling */
477
		case BLTIN:
478
60
			if (kp->sub == FSYSTEM && safe)
479
				SYNTAX( "system is unsafe" );
480
120
			RET(kp->type);
481
		case FUNC:
482
31
			if (infunc)
483
				SYNTAX( "illegal nested function" );
484
62
			RET(kp->type);
485
		case RETURN:
486
14
			if (!infunc)
487
				SYNTAX( "return not in function" );
488
28
			RET(kp->type);
489
		case VARNF:
490
13
			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
491
26
			RET(VARNF);
492
		default:
493
8650
			RET(kp->type);
494
		}
495
	}
496
4578
	c = peek();	/* look for '(' */
497

4792
	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
498
54
		yylval.i = n;
499
108
		RET(ARG);
500
	} else {
501
4524
		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
502
4524
		if (c == '(') {
503
314
			RET(CALL);
504
		} else {
505
8734
			RET(VAR);
506
		}
507
	}
508
9021
}
509
510
void startreg(void)	/* next call to yylex will return a regular expression */
511
{
512
228
	reg = 1;
513
114
}
514
515
int regexpr(void)
516
{
517
	int c, openclass = 0;
518
	static char *buf = 0;
519
	static int bufsz = 500;
520
228
	char *bp;
521
522

207
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
523
		FATAL("out of space for rex expr");
524
114
	bp = buf;
525
1151
	for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) {
526
923
		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
527
			FATAL("out of space for reg expr %.10s...", buf);
528
923
		if (c == '\n') {
529
			SYNTAX( "newline in regular expression %.10s...", buf );
530
			unput('\n');
531
			break;
532
923
		} else if (c == '\\') {
533
31
			*bp++ = '\\';
534
31
			*bp++ = input();
535
31
		} else {
536
892
			if (c == '[')
537
14
				openclass = 1;
538
878
			else if (c == ']')
539
14
				openclass = 0;
540
892
			*bp++ = c;
541
		}
542
	}
543
114
	*bp = 0;
544
114
	if (c == 0)
545
		SYNTAX("non-terminated regular expression %.10s...", buf);
546
114
	yylval.s = tostring(buf);
547
114
	unput('/');
548
228
	RET(REGEXPR);
549
114
}
550
551
/* low-level lexical stuff, sort of inherited from lex */
552
553
char	ebuf[300];
554
char	*ep = ebuf;
555
char	yysbuf[100];	/* pushback buffer */
556
char	*yysptr = yysbuf;
557
FILE	*yyin = 0;
558
559
int input(void)	/* get next lexical input character */
560
{
561
	int c;
562
	extern char *lexprog;
563
564
394728
	if (yysptr > yysbuf)
565
24599
		c = (uschar)*--yysptr;
566
172765
	else if (lexprog != NULL) {	/* awk '...' */
567
82514
		if ((c = (uschar)*lexprog) != 0)
568
81553
			lexprog++;
569
	} else				/* awk -f ... */
570
90251
		c = pgetc();
571
197364
	if (c == '\n')
572
6654
		lineno++;
573
190710
	else if (c == EOF)
574
20
		c = 0;
575
197364
	if (ep >= ebuf + sizeof ebuf)
576
377
		ep = ebuf;
577
197364
	return *ep++ = c;
578
}
579
580
void unput(int c)	/* put lexical character back on input */
581
{
582
49198
	if (c == '\n')
583
852
		lineno--;
584
24599
	if (yysptr >= yysbuf + sizeof(yysbuf))
585
		FATAL("pushed back too much: %.20s...", yysbuf);
586
24599
	*yysptr++ = c;
587
24599
	if (--ep < ebuf)
588
24599
		ep = ebuf + sizeof(ebuf) - 1;
589
24599
}
590
591
void unputstr(const char *s)	/* put a string back on input */
592
{
593
	int i;
594
595
20392
	for (i = strlen(s)-1; i >= 0; i--)
596
1376
		unput(s[i]);
597
5880
}