GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/awk/lex.c Lines: 202 298 67.8 %
Date: 2016-12-06 Branches: 173 357 48.5 %

Line Branch Exec Source
1
/*	$OpenBSD: lex.c,v 1.12 2011/09/28 19:27:18 millert Exp $	*/
2
/****************************************************************
3
Copyright (C) Lucent Technologies 1997
4
All Rights Reserved
5
6
Permission to use, copy, modify, and distribute this software and
7
its documentation for any purpose and without fee is hereby
8
granted, provided that the above copyright notice appear in all
9
copies and that both that the copyright notice and this
10
permission notice and warranty disclaimer appear in supporting
11
documentation, and that the name Lucent Technologies or any of
12
its entities not be used in advertising or publicity pertaining
13
to distribution of the software without specific, written prior
14
permission.
15
16
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23
THIS SOFTWARE.
24
****************************************************************/
25
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <ctype.h>
30
#include "awk.h"
31
#include "ytab.h"
32
33
extern YYSTYPE	yylval;
34
extern int	infunc;
35
36
int	lineno	= 1;
37
int	bracecnt = 0;
38
int	brackcnt  = 0;
39
int	parencnt = 0;
40
41
typedef struct Keyword {
42
	const char *word;
43
	int	sub;
44
	int	type;
45
} Keyword;
46
47
Keyword keywords[] ={	/* keep sorted: binary searched */
48
	{ "BEGIN",	XBEGIN,		XBEGIN },
49
	{ "END",	XEND,		XEND },
50
	{ "NF",		VARNF,		VARNF },
51
	{ "and",	FAND,		BLTIN },
52
	{ "atan2",	FATAN,		BLTIN },
53
	{ "break",	BREAK,		BREAK },
54
	{ "close",	CLOSE,		CLOSE },
55
	{ "compl",	FCOMPL,		BLTIN },
56
	{ "continue",	CONTINUE,	CONTINUE },
57
	{ "cos",	FCOS,		BLTIN },
58
	{ "delete",	DELETE,		DELETE },
59
	{ "do",		DO,		DO },
60
	{ "else",	ELSE,		ELSE },
61
	{ "exit",	EXIT,		EXIT },
62
	{ "exp",	FEXP,		BLTIN },
63
	{ "fflush",	FFLUSH,		BLTIN },
64
	{ "for",	FOR,		FOR },
65
	{ "func",	FUNC,		FUNC },
66
	{ "function",	FUNC,		FUNC },
67
	{ "getline",	GETLINE,	GETLINE },
68
	{ "gsub",	GSUB,		GSUB },
69
	{ "if",		IF,		IF },
70
	{ "in",		IN,		IN },
71
	{ "index",	INDEX,		INDEX },
72
	{ "int",	FINT,		BLTIN },
73
	{ "length",	FLENGTH,	BLTIN },
74
	{ "log",	FLOG,		BLTIN },
75
	{ "lshift",	FLSHIFT,	BLTIN },
76
	{ "match",	MATCHFCN,	MATCHFCN },
77
	{ "next",	NEXT,		NEXT },
78
	{ "nextfile",	NEXTFILE,	NEXTFILE },
79
	{ "or",		FFOR,		BLTIN },
80
	{ "print",	PRINT,		PRINT },
81
	{ "printf",	PRINTF,		PRINTF },
82
	{ "rand",	FRAND,		BLTIN },
83
	{ "return",	RETURN,		RETURN },
84
	{ "rshift",	FRSHIFT,	BLTIN },
85
	{ "sin",	FSIN,		BLTIN },
86
	{ "split",	SPLIT,		SPLIT },
87
	{ "sprintf",	SPRINTF,	SPRINTF },
88
	{ "sqrt",	FSQRT,		BLTIN },
89
	{ "srand",	FSRAND,		BLTIN },
90
	{ "sub",	SUB,		SUB },
91
	{ "substr",	SUBSTR,		SUBSTR },
92
	{ "system",	FSYSTEM,	BLTIN },
93
	{ "tolower",	FTOLOWER,	BLTIN },
94
	{ "toupper",	FTOUPPER,	BLTIN },
95
	{ "while",	WHILE,		WHILE },
96
	{ "xor",	FXOR,		BLTIN },
97
};
98
99
#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
100
101
int peek(void);
102
int gettok(char **, int *);
103
int binsearch(char *, Keyword *, int);
104
105
int peek(void)
106
941
{
107
941
	int c = input();
108
941
	unput(c);
109
941
	return c;
110
}
111
112
int gettok(char **pbuf, int *psz)	/* get next input token */
113
5212
{
114
	int c, retc;
115
5212
	char *buf = *pbuf;
116
5212
	int sz = *psz;
117
5212
	char *bp = buf;
118
119
5212
	c = input();
120
5212
	if (c == 0)
121
31
		return 0;
122
5181
	buf[0] = c;
123
5181
	buf[1] = 0;
124

5181
	if (!isalnum(c) && c != '.' && c != '_')
125
4240
		return c;
126
127
941
	*bp++ = c;
128
941
	if (isalpha(c) || c == '_') {	/* it's a varname */
129
1782
		for ( ; (c = input()) != 0; ) {
130
1782
			if (bp-buf >= sz)
131
29
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
132
					FATAL( "out of space for name %.10s...", buf );
133
1782
			if (isalnum(c) || c == '_')
134
1071
				*bp++ = c;
135
			else {
136
711
				*bp = 0;
137
711
				unput(c);
138
711
				break;
139
			}
140
		}
141
711
		*bp = 0;
142
711
		retc = 'a';	/* alphanumeric */
143
	} else {	/* maybe it's a number, but could be . */
144
		char *rem;
145
		/* read input until can't be a number */
146
354
		for ( ; (c = input()) != 0; ) {
147
354
			if (bp-buf >= sz)
148
2
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
149
					FATAL( "out of space for number %.10s...", buf );
150

478
			if (isdigit(c) || c == 'e' || c == 'E'
151
			  || c == '.' || c == '+' || c == '-')
152
124
				*bp++ = c;
153
			else {
154
230
				unput(c);
155
230
				break;
156
			}
157
		}
158
230
		*bp = 0;
159
230
		strtod(buf, &rem);	/* parse the number */
160
230
		if (rem == buf) {	/* it wasn't a valid number at all */
161
			buf[1] = 0;	/* return one character as token */
162
			retc = buf[0];	/* character is its own type */
163
			unputstr(rem+1); /* put rest back for later */
164
		} else {	/* some prefix was a number */
165
230
			unputstr(rem);	/* put rest back for later */
166
230
			rem[0] = 0;	/* truncate buf after number part */
167
230
			retc = '0';	/* type is number */
168
		}
169
	}
170
941
	*pbuf = buf;
171
941
	*psz = sz;
172
941
	return retc;
173
}
174
175
int	word(char *);
176
int	string(void);
177
int	regexpr(void);
178
int	sc	= 0;	/* 1 => return a } right now */
179
int	reg	= 0;	/* 1 => return a REGEXPR now */
180
181
int yylex(void)
182
3814
{
183
	int c;
184
	static char *buf = 0;
185
	static int bufsize = 5; /* BUG: setting this small causes core dump! */
186
187

3814
	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
188
		FATAL( "out of space in yylex" );
189
3814
	if (sc) {
190
73
		sc = 0;
191
73
		RET('}');
192
	}
193
3741
	if (reg) {
194
5
		reg = 0;
195
5
		return regexpr();
196
	}
197
	for (;;) {
198
5168
		c = gettok(&buf, &bufsize);
199
5168
		if (c == 0)
200
31
			return 0;
201
5137
		if (isalpha(c) || c == '_')
202
711
			return word(buf);
203
4426
		if (isdigit(c)) {
204
188
			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
205
			/* should this also have STR set? */
206
188
			RET(NUMBER);
207
		}
208
209
4238
		yylval.i = c;
210






4238
		switch (c) {
211
		case '\n':	/* {EOL} */
212
531
			RET(NL);
213
		case '\r':	/* assume \n is coming */
214
		case ' ':	/* {WS}+ */
215
		case '\t':
216
			break;
217
		case '#':	/* #.* strip comments */
218

240
			while ((c = input()) != '\n' && c != 0)
219
				;
220
5
			unput(c);
221
5
			break;
222
		case ';':
223
169
			RET(';');
224
		case '\\':
225
			if (peek() == '\n') {
226
				input();
227
			} else if (peek() == '\r') {
228
				input(); input();	/* \n */
229
				lineno++;
230
			} else {
231
				RET(c);
232
			}
233
			break;
234
		case '&':
235
			if (peek() == '&') {
236
				input(); RET(AND);
237
			} else
238
				RET('&');
239
		case '|':
240
			if (peek() == '|') {
241
				input(); RET(BOR);
242
			} else
243
				RET('|');
244
		case '!':
245
2
			if (peek() == '=') {
246
				input(); yylval.i = NE; RET(NE);
247
2
			} else if (peek() == '~') {
248
2
				input(); yylval.i = NOTMATCH; RET(MATCHOP);
249
			} else
250
				RET(NOT);
251
		case '~':
252
			yylval.i = MATCH;
253
			RET(MATCHOP);
254
		case '<':
255
21
			if (peek() == '=') {
256
14
				input(); yylval.i = LE; RET(LE);
257
			} else {
258
7
				yylval.i = LT; RET(LT);
259
			}
260
		case '=':
261
323
			if (peek() == '=') {
262
14
				input(); yylval.i = EQ; RET(EQ);
263
			} else {
264
309
				yylval.i = ASSIGN; RET(ASGNOP);
265
			}
266
		case '>':
267
1
			if (peek() == '=') {
268
1
				input(); yylval.i = GE; RET(GE);
269
			} else if (peek() == '>') {
270
				input(); yylval.i = APPEND; RET(APPEND);
271
			} else {
272
				yylval.i = GT; RET(GT);
273
			}
274
		case '+':
275
46
			if (peek() == '+') {
276
26
				input(); yylval.i = INCR; RET(INCR);
277
20
			} else if (peek() == '=') {
278
10
				input(); yylval.i = ADDEQ; RET(ASGNOP);
279
			} else
280
10
				RET('+');
281
		case '-':
282
4
			if (peek() == '-') {
283
3
				input(); yylval.i = DECR; RET(DECR);
284
1
			} else if (peek() == '=') {
285
				input(); yylval.i = SUBEQ; RET(ASGNOP);
286
			} else
287
1
				RET('-');
288
		case '*':
289
6
			if (peek() == '=') {	/* *= */
290
				input(); yylval.i = MULTEQ; RET(ASGNOP);
291
6
			} else if (peek() == '*') {	/* ** or **= */
292
				input();	/* eat 2nd * */
293
				if (peek() == '=') {
294
					input(); yylval.i = POWEQ; RET(ASGNOP);
295
				} else {
296
					RET(POWER);
297
				}
298
			} else
299
6
				RET('*');
300
		case '/':
301
10
			RET('/');
302
		case '%':
303
18
			if (peek() == '=') {
304
				input(); yylval.i = MODEQ; RET(ASGNOP);
305
			} else
306
18
				RET('%');
307
		case '^':
308
			if (peek() == '=') {
309
				input(); yylval.i = POWEQ; RET(ASGNOP);
310
			} else
311
				RET(POWER);
312
313
		case '$':
314
			/* BUG: awkward, if not wrong */
315
44
			c = gettok(&buf, &bufsize);
316
44
			if (isalpha(c)) {
317
				if (strcmp(buf, "NF") == 0) {	/* very special */
318
					unputstr("(NF)");
319
					RET(INDIRECT);
320
				}
321
				c = peek();
322
				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
323
					unputstr(buf);
324
					RET(INDIRECT);
325
				}
326
				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
327
				RET(IVAR);
328
44
			} else if (c == 0) {	/*  */
329
				SYNTAX( "unexpected end of input after $" );
330
				RET(';');
331
			} else {
332
44
				unputstr(buf);
333
44
				RET(INDIRECT);
334
			}
335
336
		case '}':
337
73
			if (--bracecnt < 0)
338
				SYNTAX( "extra }" );
339
73
			sc = 1;
340
73
			RET(';');
341
		case ']':
342
253
			if (--brackcnt < 0)
343
				SYNTAX( "extra ]" );
344
253
			RET(']');
345
		case ')':
346
152
			if (--parencnt < 0)
347
				SYNTAX( "extra )" );
348
152
			RET(')');
349
		case '{':
350
73
			bracecnt++;
351
73
			RET('{');
352
		case '[':
353
253
			brackcnt++;
354
253
			RET('[');
355
		case '(':
356
152
			parencnt++;
357
152
			RET('(');
358
359
		case '"':
360
560
			return string();	/* BUG: should be like tran.c ? */
361
362
		default:
363
115
			RET(c);
364
		}
365
	}
366
}
367
368
int string(void)
369
560
{
370
	int c, n;
371
	char *s, *bp;
372
	static char *buf = 0;
373
	static int bufsz = 500;
374
375

560
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
376
		FATAL("out of space for strings");
377
6397
	for (bp = buf; (c = input()) != '"'; ) {
378
5277
		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
379
			FATAL("out of space for string %.10s...", buf);
380
5277
		switch (c) {
381
		case '\n':
382
		case '\r':
383
		case 0:
384
			SYNTAX( "non-terminated string %.10s...", buf );
385
			lineno++;
386
			if (c == 0)	/* hopeless */
387
				FATAL( "giving up" );
388
			break;
389
		case '\\':
390
187
			c = input();
391



187
			switch (c) {
392
40
			case '"': *bp++ = '"'; break;
393
144
			case 'n': *bp++ = '\n'; break;
394
2
			case 't': *bp++ = '\t'; break;
395
			case 'f': *bp++ = '\f'; break;
396
1
			case 'r': *bp++ = '\r'; break;
397
			case 'b': *bp++ = '\b'; break;
398
			case 'v': *bp++ = '\v'; break;
399
			case 'a': *bp++ = '\007'; break;
400
			case '\\': *bp++ = '\\'; break;
401
402
			case '0': case '1': case '2': /* octal: \d \dd \ddd */
403
			case '3': case '4': case '5': case '6': case '7':
404
				n = c - '0';
405
				if ((c = peek()) >= '0' && c < '8') {
406
					n = 8 * n + input() - '0';
407
					if ((c = peek()) >= '0' && c < '8')
408
						n = 8 * n + input() - '0';
409
				}
410
				*bp++ = n;
411
				break;
412
413
			case 'x':	/* hex  \x0-9a-fA-F + */
414
			    {	char xbuf[100], *px;
415
				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
416
					if (isdigit(c)
417
					 || (c >= 'a' && c <= 'f')
418
					 || (c >= 'A' && c <= 'F'))
419
						*px++ = c;
420
					else
421
						break;
422
				}
423
				*px = 0;
424
				unput(c);
425
	  			sscanf(xbuf, "%x", (unsigned int *) &n);
426
				*bp++ = n;
427
				break;
428
			    }
429
430
			default:
431
				*bp++ = c;
432
				break;
433
			}
434
			break;
435
		default:
436
5090
			*bp++ = c;
437
			break;
438
		}
439
	}
440
560
	*bp = 0;
441
560
	s = tostring(buf);
442
560
	*bp++ = ' '; *bp++ = 0;
443
560
	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
444
560
	RET(STRING);
445
}
446
447
448
int binsearch(char *w, Keyword *kp, int n)
449
711
{
450
	int cond, low, mid, high;
451
452
711
	low = 0;
453
711
	high = n - 1;
454
5134
	while (low <= high) {
455
3932
		mid = (low + high) / 2;
456
3932
		if ((cond = strcmp(w, kp[mid].word)) < 0)
457
2327
			high = mid - 1;
458
1605
		else if (cond > 0)
459
1385
			low = mid + 1;
460
		else
461
220
			return mid;
462
	}
463
491
	return -1;
464
}
465
466
int word(char *w)
467
711
{
468
	Keyword *kp;
469
	int c, n;
470
471
711
	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
472
/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
473
711
	kp = keywords + n;
474
711
	if (n != -1) {	/* found in table */
475
220
		yylval.i = kp->sub;
476

220
		switch (kp->type) {	/* special handling */
477
		case BLTIN:
478

4
			if (kp->sub == FSYSTEM && safe)
479
				SYNTAX( "system is unsafe" );
480
4
			RET(kp->type);
481
		case FUNC:
482
			if (infunc)
483
				SYNTAX( "illegal nested function" );
484
			RET(kp->type);
485
		case RETURN:
486
			if (!infunc)
487
				SYNTAX( "return not in function" );
488
			RET(kp->type);
489
		case VARNF:
490
			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
491
			RET(VARNF);
492
		default:
493
216
			RET(kp->type);
494
		}
495
	}
496
491
	c = peek();	/* look for '(' */
497

491
	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
498
		yylval.i = n;
499
		RET(ARG);
500
	} else {
501
491
		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
502
491
		if (c == '(') {
503
			RET(CALL);
504
		} else {
505
491
			RET(VAR);
506
		}
507
	}
508
}
509
510
void startreg(void)	/* next call to yylex will return a regular expression */
511
5
{
512
5
	reg = 1;
513
5
}
514
515
int regexpr(void)
516
5
{
517
5
	int c, openclass = 0;
518
	static char *buf = 0;
519
	static int bufsz = 500;
520
	char *bp;
521
522

5
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
523
		FATAL("out of space for rex expr");
524
5
	bp = buf;
525

212
	for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) {
526
202
		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
527
			FATAL("out of space for reg expr %.10s...", buf);
528
202
		if (c == '\n') {
529
			SYNTAX( "newline in regular expression %.10s...", buf );
530
			unput('\n');
531
			break;
532
202
		} else if (c == '\\') {
533
4
			*bp++ = '\\';
534
4
			*bp++ = input();
535
		} else {
536
198
			if (c == '[')
537
6
				openclass = 1;
538
192
			else if (c == ']')
539
6
				openclass = 0;
540
198
			*bp++ = c;
541
		}
542
	}
543
5
	*bp = 0;
544
5
	if (c == 0)
545
		SYNTAX("non-terminated regular expression %.10s...", buf);
546
5
	yylval.s = tostring(buf);
547
5
	unput('/');
548
5
	RET(REGEXPR);
549
}
550
551
/* low-level lexical stuff, sort of inherited from lex */
552
553
char	ebuf[300];
554
char	*ep = ebuf;
555
char	yysbuf[100];	/* pushback buffer */
556
char	*yysptr = yysbuf;
557
FILE	*yyin = 0;
558
559
int input(void)	/* get next lexical input character */
560
14834
{
561
	int c;
562
	extern char *lexprog;
563
564
14834
	if (yysptr > yysbuf)
565
1936
		c = (uschar)*--yysptr;
566
12898
	else if (lexprog != NULL) {	/* awk '...' */
567
5091
		if ((c = (uschar)*lexprog) != 0)
568
5062
			lexprog++;
569
	} else				/* awk -f ... */
570
7807
		c = pgetc();
571
14834
	if (c == '\n')
572
562
		lineno++;
573
14272
	else if (c == EOF)
574
2
		c = 0;
575
14834
	if (ep >= ebuf + sizeof ebuf)
576
31
		ep = ebuf;
577
14834
	return *ep++ = c;
578
}
579
580
void unput(int c)	/* put lexical character back on input */
581
1936
{
582
1936
	if (c == '\n')
583
31
		lineno--;
584
1936
	if (yysptr >= yysbuf + sizeof(yysbuf))
585
		FATAL("pushed back too much: %.20s...", yysbuf);
586
1936
	*yysptr++ = c;
587
1936
	if (--ep < ebuf)
588
		ep = ebuf + sizeof(ebuf) - 1;
589
1936
}
590
591
void unputstr(const char *s)	/* put a string back on input */
592
274
{
593
	int i;
594
595
318
	for (i = strlen(s)-1; i >= 0; i--)
596
44
		unput(s[i]);
597
274
}