GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/indent/lexi.c Lines: 0 241 0.0 %
Date: 2017-11-13 Branches: 0 266 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: lexi.c,v 1.20 2016/06/06 06:43:03 tobiasu Exp $	*/
2
3
/*
4
 * Copyright (c) 1980, 1993
5
 *	The Regents of the University of California.
6
 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7
 * Copyright (c) 1985 Sun Microsystems, Inc.
8
 * All rights reserved.
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 * 1. Redistributions of source code must retain the above copyright
14
 *    notice, this list of conditions and the following disclaimer.
15
 * 2. Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 * 3. Neither the name of the University nor the names of its contributors
19
 *    may be used to endorse or promote products derived from this software
20
 *    without specific prior written permission.
21
 *
22
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
 * SUCH DAMAGE.
33
 */
34
35
/*
36
 * Here we have the token scanner for indent.  It scans off one token and puts
37
 * it in the global variable "token".  It returns a code, indicating the type
38
 * of token scanned.
39
 */
40
41
#include <stdio.h>
42
#include <ctype.h>
43
#include <stdlib.h>
44
#include <string.h>
45
#include <err.h>
46
#include "indent_globs.h"
47
#include "indent_codes.h"
48
49
#define alphanum 1
50
#define opchar 3
51
52
struct templ {
53
    char       *rwd;
54
    int         rwcode;
55
};
56
57
struct templ specialsinit[] = {
58
	{ "switch", 1 },
59
	{ "case", 2 },
60
	{ "break", 0 },
61
	{ "struct", 3 },
62
	{ "union", 3 },
63
	{ "enum", 3 },
64
	{ "default", 2 },
65
	{ "int", 4 },
66
	{ "char", 4 },
67
	{ "float", 4 },
68
	{ "double", 4 },
69
	{ "long", 4 },
70
	{ "short", 4 },
71
	{ "typedef", 4 },
72
	{ "unsigned", 4 },
73
	{ "register", 4 },
74
	{ "static", 4 },
75
	{ "global", 4 },
76
	{ "extern", 4 },
77
	{ "void", 4 },
78
	{ "goto", 0 },
79
	{ "return", 0 },
80
	{ "if", 5 },
81
	{ "while", 5 },
82
	{ "for", 5 },
83
	{ "else", 6 },
84
	{ "do", 6 },
85
	{ "sizeof", 7 },
86
};
87
88
struct templ *specials = specialsinit;
89
int	nspecials = sizeof(specialsinit) / sizeof(specialsinit[0]);
90
int	maxspecials;
91
92
char        chartype[128] =
93
{				/* this is used to facilitate the decision of
94
				 * what type (alphanumeric, operator) each
95
				 * character is */
96
    0, 0, 0, 0, 0, 0, 0, 0,
97
    0, 0, 0, 0, 0, 0, 0, 0,
98
    0, 0, 0, 0, 0, 0, 0, 0,
99
    0, 0, 0, 0, 0, 0, 0, 0,
100
    0, 3, 0, 0, 1, 3, 3, 0,
101
    0, 0, 3, 3, 0, 3, 0, 3,
102
    1, 1, 1, 1, 1, 1, 1, 1,
103
    1, 1, 0, 0, 3, 3, 3, 3,
104
    0, 1, 1, 1, 1, 1, 1, 1,
105
    1, 1, 1, 1, 1, 1, 1, 1,
106
    1, 1, 1, 1, 1, 1, 1, 1,
107
    1, 1, 1, 0, 0, 0, 3, 1,
108
    0, 1, 1, 1, 1, 1, 1, 1,
109
    1, 1, 1, 1, 1, 1, 1, 1,
110
    1, 1, 1, 1, 1, 1, 1, 1,
111
    1, 1, 1, 0, 3, 0, 3, 0
112
};
113
114
115
116
117
int
118
lexi(void)
119
{
120
    int         unary_delim;	/* this is set to 1 if the current token
121
				 * forces a following operator to be unary */
122
    static int  last_code;	/* the last token type returned */
123
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
124
    int         code;		/* internal code to be returned */
125
    char        qchar;		/* the delimiter character for a string */
126
    int		i;
127
128
    e_token = s_token;		/* point to start of place to save token */
129
    unary_delim = false;
130
    ps.col_1 = ps.last_nl;	/* tell world that this token started in
131
				 * column 1 iff the last thing scanned was nl */
132
    ps.last_nl = false;
133
134
    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
135
	ps.col_1 = false;	/* leading blanks imply token is not in column
136
				 * 1 */
137
	if (++buf_ptr >= buf_end)
138
	    fill_buffer();
139
    }
140
141
    /* Scan an alphanumeric token */
142
    if (chartype[(int)*buf_ptr] == alphanum ||
143
	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
144
	/*
145
	 * we have a character or number
146
	 */
147
	char *j;	/* used for searching thru list of
148
			 * reserved words */
149
	if (isdigit((unsigned char)*buf_ptr) ||
150
	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
151
	    int         seendot = 0,
152
	                seenexp = 0,
153
			seensfx = 0;
154
	    if (*buf_ptr == '0' &&
155
		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
156
		*e_token++ = *buf_ptr++;
157
		*e_token++ = *buf_ptr++;
158
		while (isxdigit(*buf_ptr)) {
159
		    CHECK_SIZE_TOKEN;
160
		    *e_token++ = *buf_ptr++;
161
		}
162
	    }
163
	    else
164
		while (1) {
165
		    if (*buf_ptr == '.') {
166
			if (seendot)
167
			    break;
168
			else
169
			    seendot++;
170
		    }
171
		    CHECK_SIZE_TOKEN;
172
		    *e_token++ = *buf_ptr++;
173
		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
174
			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
175
			    break;
176
			else {
177
			    seenexp++;
178
			    seendot++;
179
			    CHECK_SIZE_TOKEN;
180
			    *e_token++ = *buf_ptr++;
181
			    if (*buf_ptr == '+' || *buf_ptr == '-')
182
				*e_token++ = *buf_ptr++;
183
			}
184
		    }
185
		}
186
	    while (1) {
187
		if (!(seensfx & 1) &&
188
			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
189
		    CHECK_SIZE_TOKEN;
190
		    *e_token++ = *buf_ptr++;
191
		    seensfx |= 1;
192
		    continue;
193
		}
194
        	if (!(seensfx & 2) &&
195
			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
196
		    CHECK_SIZE_TOKEN;
197
		    if (buf_ptr[1] == buf_ptr[0])
198
		        *e_token++ = *buf_ptr++;
199
		    *e_token++ = *buf_ptr++;
200
		    seensfx |= 2;
201
		    continue;
202
		}
203
		break;
204
	    }
205
	    if (!(seensfx & 1) &&
206
	        (*buf_ptr == 'F' || *buf_ptr == 'f')) {
207
		CHECK_SIZE_TOKEN;
208
		*e_token++ = *buf_ptr++;
209
		seensfx |= 1;
210
	    }
211
	}
212
	else
213
	    while (chartype[(int)*buf_ptr] == alphanum) {	/* copy it over */
214
		CHECK_SIZE_TOKEN;
215
		*e_token++ = *buf_ptr++;
216
		if (buf_ptr >= buf_end)
217
		    fill_buffer();
218
	    }
219
	*e_token++ = '\0';
220
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
221
	    if (++buf_ptr >= buf_end)
222
		fill_buffer();
223
	}
224
	ps.its_a_keyword = false;
225
	ps.sizeof_keyword = false;
226
	if (l_struct) {		/* if last token was 'struct', then this token
227
				 * should be treated as a declaration */
228
	    l_struct = false;
229
	    last_code = ident;
230
	    ps.last_u_d = true;
231
	    return (decl);
232
	}
233
	ps.last_u_d = false;	/* Operator after identifier is binary */
234
	last_code = ident;	/* Remember that this is the code we will
235
				 * return */
236
237
	/*
238
	 * This loop will check if the token is a keyword.
239
	 */
240
	for (i = 0; i < nspecials; i++) {
241
	    char *p = s_token;	/* point at scanned token */
242
	    j = specials[i].rwd;
243
	    if (*j++ != *p++ || *j++ != *p++)
244
		continue;	/* This test depends on the fact that
245
				 * identifiers are always at least 1 character
246
				 * long (ie. the first two bytes of the
247
				 * identifier are always meaningful) */
248
	    if (p[-1] == 0)
249
		break;		/* If its a one-character identifier */
250
	    while (*p++ == *j)
251
		if (*j++ == 0)
252
		    goto found_keyword;	/* I wish that C had a multi-level
253
					 * break... */
254
	}
255
	if (i < nspecials) {		/* we have a keyword */
256
    found_keyword:
257
	    ps.its_a_keyword = true;
258
	    ps.last_u_d = true;
259
	    switch (specials[i].rwcode) {
260
	    case 1:		/* it is a switch */
261
		return (swstmt);
262
	    case 2:		/* a case or default */
263
		return (casestmt);
264
265
	    case 3:		/* a "struct" */
266
		if (ps.p_l_follow)
267
		    break;	/* inside parens: cast */
268
		l_struct = true;
269
270
		/*
271
		 * Next time around, we will want to know that we have had a
272
		 * 'struct'
273
		 */
274
	    case 4:		/* one of the declaration keywords */
275
		if (ps.p_l_follow) {
276
		    ps.cast_mask |= 1 << ps.p_l_follow;
277
		    break;	/* inside parens: cast */
278
		}
279
		last_code = decl;
280
		return (decl);
281
282
	    case 5:		/* if, while, for */
283
		return (sp_paren);
284
285
	    case 6:		/* do, else */
286
		return (sp_nparen);
287
288
	    case 7:
289
		ps.sizeof_keyword = true;
290
	    default:		/* all others are treated like any other
291
				 * identifier */
292
		return (ident);
293
	    }			/* end of switch */
294
	}			/* end of if (found_it) */
295
	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
296
	    char *tp = buf_ptr;
297
	    while (tp < buf_end)
298
		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
299
		    goto not_proc;
300
	    strlcpy(ps.procname, token, sizeof ps.procname);
301
	    ps.in_parameter_declaration = 1;
302
	    rparen_count = 1;
303
    not_proc:;
304
	}
305
	/*
306
	 * The following hack attempts to guess whether or not the current
307
	 * token is in fact a declaration keyword -- one that has been
308
	 * typedefd
309
	 */
310
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
311
	    isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
312
		&& !ps.p_l_follow
313
	        && !ps.block_init
314
		&& (ps.last_token == rparen || ps.last_token == semicolon ||
315
		    ps.last_token == decl ||
316
		    ps.last_token == lbrace || ps.last_token == rbrace)) {
317
	    ps.its_a_keyword = true;
318
	    ps.last_u_d = true;
319
	    last_code = decl;
320
	    return decl;
321
	}
322
	if (last_code == decl)	/* if this is a declared variable, then
323
				 * following sign is unary */
324
	    ps.last_u_d = true;	/* will make "int a -1" work */
325
	last_code = ident;
326
	return (ident);		/* the ident is not in the list */
327
    }				/* end of procesing for alpanum character */
328
329
    /* Scan a non-alphanumeric token */
330
331
    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
332
				 * moved here */
333
    *e_token = '\0';
334
    if (++buf_ptr >= buf_end)
335
	fill_buffer();
336
337
    switch (*token) {
338
    case '\n':
339
	unary_delim = ps.last_u_d;
340
	ps.last_nl = true;	/* remember that we just had a newline */
341
	code = (had_eof ? 0 : newline);
342
343
	/*
344
	 * if data has been exausted, the newline is a dummy, and we should
345
	 * return code to stop
346
	 */
347
	break;
348
349
    case '\'':			/* start of quoted character */
350
    case '"':			/* start of string */
351
	qchar = *token;
352
	if (troff) {
353
	    e_token[-1] = '`';
354
	    if (qchar == '"')
355
		*e_token++ = '`';
356
	    e_token = chfont(&bodyf, &stringf, e_token);
357
	}
358
	do {			/* copy the string */
359
	    while (1) {		/* move one character or [/<char>]<char> */
360
		if (*buf_ptr == '\n') {
361
		    printf("%d: Unterminated literal\n", line_no);
362
		    goto stop_lit;
363
		}
364
		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
365
					 * since CHECK_SIZE guarantees that there
366
					 * are at least 5 entries left */
367
		*e_token = *buf_ptr++;
368
		if (buf_ptr >= buf_end)
369
		    fill_buffer();
370
		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
371
		    if (*buf_ptr == '\n')	/* check for escaped newline */
372
			++line_no;
373
		    if (troff) {
374
			*++e_token = BACKSLASH;
375
			if (*buf_ptr == BACKSLASH)
376
			    *++e_token = BACKSLASH;
377
		    }
378
		    *++e_token = *buf_ptr++;
379
		    ++e_token;	/* we must increment this again because we
380
				 * copied two chars */
381
		    if (buf_ptr >= buf_end)
382
			fill_buffer();
383
		}
384
		else
385
		    break;	/* we copied one character */
386
	    }			/* end of while (1) */
387
	} while (*e_token++ != qchar);
388
	if (troff) {
389
	    e_token = chfont(&stringf, &bodyf, e_token - 1);
390
	    if (qchar == '"')
391
		*e_token++ = '\'';
392
	}
393
stop_lit:
394
	code = ident;
395
	break;
396
397
    case ('('):
398
    case ('['):
399
	unary_delim = true;
400
	code = lparen;
401
	break;
402
403
    case (')'):
404
    case (']'):
405
	code = rparen;
406
	break;
407
408
    case '#':
409
	unary_delim = ps.last_u_d;
410
	code = preesc;
411
	break;
412
413
    case '?':
414
	unary_delim = true;
415
	code = question;
416
	break;
417
418
    case (':'):
419
	code = colon;
420
	unary_delim = true;
421
	break;
422
423
    case (';'):
424
	unary_delim = true;
425
	code = semicolon;
426
	break;
427
428
    case ('{'):
429
	unary_delim = true;
430
431
	/*
432
	 * if (ps.in_or_st) ps.block_init = 1;
433
	 */
434
	/* ?	code = ps.block_init ? lparen : lbrace; */
435
	code = lbrace;
436
	break;
437
438
    case ('}'):
439
	unary_delim = true;
440
	/* ?	code = ps.block_init ? rparen : rbrace; */
441
	code = rbrace;
442
	break;
443
444
    case 014:			/* a form feed */
445
	unary_delim = ps.last_u_d;
446
	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
447
				 * right */
448
	code = form_feed;
449
	break;
450
451
    case (','):
452
	unary_delim = true;
453
	code = comma;
454
	break;
455
456
    case '.':
457
	unary_delim = false;
458
	code = period;
459
	break;
460
461
    case '-':
462
    case '+':			/* check for -, +, --, ++ */
463
	code = (ps.last_u_d ? unary_op : binary_op);
464
	unary_delim = true;
465
466
	if (*buf_ptr == token[0]) {
467
	    /* check for doubled character */
468
	    *e_token++ = *buf_ptr++;
469
	    /* buffer overflow will be checked at end of loop */
470
	    if (last_code == ident || last_code == rparen) {
471
		code = (ps.last_u_d ? unary_op : postop);
472
		/* check for following ++ or -- */
473
		unary_delim = false;
474
	    }
475
	}
476
	else if (*buf_ptr == '=')
477
	    /* check for operator += */
478
	    *e_token++ = *buf_ptr++;
479
	else if (*buf_ptr == '>') {
480
	    /* check for operator -> */
481
	    *e_token++ = *buf_ptr++;
482
	    if (!pointer_as_binop) {
483
		unary_delim = false;
484
		code = unary_op;
485
		ps.want_blank = false;
486
	    }
487
	}
488
	break;			/* buffer overflow will be checked at end of
489
				 * switch */
490
491
    case '=':
492
	if (ps.in_or_st)
493
	    ps.block_init = 1;
494
#ifdef undef
495
	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
496
	    e_token[-1] = *buf_ptr++;
497
	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
498
		*e_token++ = *buf_ptr++;
499
	    *e_token++ = '=';	/* Flip =+ to += */
500
	    *e_token = 0;
501
	}
502
#else
503
	if (*buf_ptr == '=') {/* == */
504
	    *e_token++ = '=';	/* Flip =+ to += */
505
	    buf_ptr++;
506
	    *e_token = 0;
507
	}
508
#endif
509
	code = binary_op;
510
	unary_delim = true;
511
	break;
512
	/* can drop thru!!! */
513
514
    case '>':
515
    case '<':
516
    case '!':			/* ops like <, <<, <=, !=, etc */
517
	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
518
	    *e_token++ = *buf_ptr;
519
	    if (++buf_ptr >= buf_end)
520
		fill_buffer();
521
	}
522
	if (*buf_ptr == '=')
523
	    *e_token++ = *buf_ptr++;
524
	code = (ps.last_u_d ? unary_op : binary_op);
525
	unary_delim = true;
526
	break;
527
528
    default:
529
	if (token[0] == '/' && *buf_ptr == '*') {
530
	    /* it is start of comment */
531
	    *e_token++ = '*';
532
533
	    if (++buf_ptr >= buf_end)
534
		fill_buffer();
535
536
	    code = comment;
537
	    unary_delim = ps.last_u_d;
538
	    break;
539
	}
540
	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
541
	    /*
542
	     * handle ||, &&, etc, and also things as in int *****i
543
	     */
544
	    *e_token++ = *buf_ptr;
545
	    if (++buf_ptr >= buf_end)
546
		fill_buffer();
547
	}
548
	code = (ps.last_u_d ? unary_op : binary_op);
549
	unary_delim = true;
550
551
552
    }				/* end of switch */
553
    if (code != newline) {
554
	l_struct = false;
555
	last_code = code;
556
    }
557
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
558
	fill_buffer();
559
    ps.last_u_d = unary_delim;
560
    *e_token = '\0';		/* null terminate the token */
561
    return (code);
562
}
563
564
/*
565
 * Add the given keyword to the keyword table, using val as the keyword type
566
 */
567
void
568
addkey(char *key, int val)
569
{
570
    struct templ *p;
571
    int i;
572
573
    for (i = 0; i < nspecials; i++) {
574
	p = &specials[i];
575
	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
576
	    return;
577
    }
578
579
    if (specials == specialsinit) {
580
	/*
581
	 * Whoa. Must reallocate special table.
582
	 */
583
	nspecials = sizeof (specialsinit) / sizeof (specialsinit[0]);
584
	maxspecials = nspecials + (nspecials >> 2);
585
	specials = calloc(maxspecials, sizeof specials[0]);
586
	if (specials == NULL)
587
	    err(1, NULL);
588
	memcpy(specials, specialsinit, sizeof specialsinit);
589
    } else if (nspecials >= maxspecials) {
590
	int newspecials = maxspecials + (maxspecials >> 2);
591
	struct templ *specials2;
592
593
	specials2 = reallocarray(specials, newspecials, sizeof(specials[0]));
594
	if (specials2 == NULL)
595
	    err(1, NULL);
596
	specials = specials2;
597
	maxspecials = newspecials;
598
    }
599
600
    p = &specials[nspecials];
601
    p->rwd = key;
602
    p->rwcode = val;
603
    nspecials++;
604
    return;
605
}