GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/ctags/C.c Lines: 202 224 90.2 %
Date: 2017-11-13 Branches: 208 275 75.6 %

Line Branch Exec Source
1
/*	$OpenBSD: C.c,v 1.15 2014/12/08 03:58:56 jsg Exp $	*/
2
/*	$NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $	*/
3
4
/*
5
 * Copyright (c) 1987, 1993, 1994
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 * 3. Neither the name of the University nor the names of its contributors
17
 *    may be used to endorse or promote products derived from this software
18
 *    without specific prior written permission.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
#include <limits.h>
34
#include <stdio.h>
35
#include <string.h>
36
37
#include "ctags.h"
38
39
static int	func_entry(void);
40
static void	hash_entry(void);
41
static void	skip_string(int);
42
static int	str_entry(int);
43
44
/*
45
 * c_entries --
46
 *	read .c and .h files and call appropriate routines
47
 */
48
void
49
c_entries(void)
50
{
51
	int	c;			/* current character */
52
	int	level;			/* brace level */
53
	int	token;			/* if reading a token */
54
	int	t_def;			/* if reading a typedef */
55
	int	t_level;		/* typedef's brace level */
56
	char	*sp;			/* buffer pointer */
57
2856
	char	tok[MAXTOKEN];		/* token buffer */
58
59
1428
	lineftell = ftell(inf);
60
1428
	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61

9909384
	while (GETC(!=, EOF)) {
62


2358148
		switch (c) {
63
		/*
64
		 * Here's where it DOESN'T handle: {
65
		 *	foo(a)
66
		 *	{
67
		 *	#ifdef notdef
68
		 *		}
69
		 *	#endif
70
		 *		if (a)
71
		 *			puts("hello, world");
72
		 *	}
73
		 */
74
		case '{':
75
9872
			++level;
76
9872
			goto endtok;
77
		case '}':
78
			/*
79
			 * if level goes below zero, try and fix
80
			 * it, even though we've already messed up
81
			 */
82
14080
			if (--level < 0)
83
				level = 0;
84
14080
			goto endtok;
85
86
		case '\n':
87
142432
			SETLINE;
88
			/*
89
			 * the above 3 cases are similar in that they
90
			 * are special characters that also end tokens.
91
			 */
92
166384
endtok:			if (sp > tok) {
93
4152
				*sp = EOS;
94
				token = YES;
95
				sp = tok;
96
4152
			}
97
			else
98
				token = NO;
99
166384
			continue;
100
101
		/*
102
		 * We ignore quoted strings and character constants
103
		 * completely.
104
		 */
105
		case '"':
106
		case '\'':
107
8980
			(void)skip_string(c);
108
8980
			break;
109
110
		/*
111
		 * comments can be fun; note the state is unchanged after
112
		 * return, in case we found:
113
		 *	"foo() XX comment XX { int bar; }"
114
		 */
115
		case '/':
116

50664
			if (GETC(==, '*')) {
117
12158
				skip_comment(c);
118
12158
				continue;
119
508
			} else if (c == '/') {
120
6
				skip_comment(c);
121
6
				continue;
122
			}
123
502
			(void)ungetc(c, inf);
124
			c = '/';
125
502
			goto storec;
126
127
		/* hash marks flag #define's. */
128
		case '#':
129
11370
			if (sp == tok) {
130
11370
				hash_entry();
131
11370
				break;
132
			}
133
			goto storec;
134
135
		/*
136
		 * if we have a current token, parenthesis on
137
		 * level zero indicates a function.
138
		 */
139
		case '(':
140
66096
			do {
141

265456
				if (GETC(==, EOF))
142
					return;
143
66364
			} while (iswhite(c));
144
66096
			if (c == '*')
145
				break;
146
			else
147
64054
				ungetc(c, inf);
148
64054
			if (!level && token) {
149
				int	curline;
150
151
6710
				if (sp != tok)
152
6700
					*sp = EOS;
153
				/*
154
				 * grab the line immediately, we may
155
				 * already be wrong, for example,
156
				 *	foo\n
157
				 *	(arg1,
158
				 */
159
6710
				get_line();
160
6710
				curline = lineno;
161
6710
				if (func_entry()) {
162
4022
					++level;
163
4022
					pfnote(tok, curline);
164
4022
				}
165
				break;
166
			}
167
			goto storec;
168
169
		/*
170
		 * semi-colons indicate the end of a typedef; if we find a
171
		 * typedef we search for the next semi-colon of the same
172
		 * level as the typedef.  Ignoring "structs", they are
173
		 * tricky, since you can find:
174
		 *
175
		 *	"typedef int time_t;"
176
		 *	"typedef unsigned int u_int;"
177
		 *	"typedef unsigned int u_int [10];"
178
		 *
179
		 * If looking at a typedef, we save a copy of the last token
180
		 * found.  Then, when we find the ';' we take the current
181
		 * token if it starts with a valid token name, else we take
182
		 * the one we saved.  There's probably some reasonable
183
		 * alternative to this...
184
		 */
185
		case ';':
186

66500
			if (t_def && level == t_level) {
187
				t_def = NO;
188
50
				get_line();
189
50
				if (sp != tok)
190
46
					*sp = EOS;
191
50
				pfnote(tok, lineno);
192
50
				break;
193
			}
194
			goto storec;
195
196
		/*
197
		 * store characters until one that can't be part of a token
198
		 * comes along; check the current token against certain
199
		 * reserved words.
200
		 */
201
		default:
202
			/*
203
			 * to treat following function.
204
			 * func      (arg) {
205
			 * ....
206
			 * }
207
			 */
208
2026300
			if (c == ' ' || c == '\t') {
209
				int save = c;
210


2747136
				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
211
					;
212
331422
				if (c == EOF)
213
					return;
214
331422
				(void)ungetc(c, inf);
215
				c = save;
216
331422
			}
217
2150448
	storec:		if (!intoken(c)) {
218
686000
				if (sp == tok)
219
					break;
220
247944
				*sp = EOS;
221
				/* no typedefs inside typedefs */
222

495568
				if (!t_def &&
223
247624
					   !memcmp(tok, "typedef",8)) {
224
					t_def = YES;
225
					t_level = level;
226
50
					break;
227
				}
228
				/* catch "typedef struct" */
229

493716
				if ((!t_def || t_level < level)
230
248134
				    && (!memcmp(tok, "struct", 7)
231
493356
				    || !memcmp(tok, "union", 6)
232
491044
				    || !memcmp(tok, "enum", 5))) {
233
					/*
234
					 * get line immediately;
235
					 * may change before '{'
236
					 */
237
2394
					get_line();
238
2394
					if (str_entry(c))
239
192
						++level;
240
					break;
241
					/* } */
242
				}
243
				sp = tok;
244
245500
			}
245

1758622
			else if (sp != tok || begtoken(c)) {
246
				/* hell... truncate it */
247
1429844
				if (sp == tok + sizeof tok - 1)
248
					*sp = EOS;
249
				else
250
1429844
					*sp++ = c;
251
				token = YES;
252
1429844
			}
253
1709948
			continue;
254
		}
255
256
		sp = tok;
257
		token = NO;
258
	}
259
2856
}
260
261
/*
262
 * func_entry --
263
 *	handle a function reference
264
 */
265
static int
266
func_entry(void)
267
{
268
	int	c;			/* current character */
269
	int	level = 0;		/* for matching '()' */
270
	static char attribute[] = "__attribute__";
271
13420
	char maybe_attribute[sizeof attribute + 1];
272
	char *anext;
273
274
	/*
275
	 * Find the end of the assumed function declaration.
276
	 * Note that ANSI C functions can have type definitions so keep
277
	 * track of the parentheses nesting level.
278
	 */
279

942080
	while (GETC(!=, EOF)) {
280

189868
		switch (c) {
281
		case '\'':
282
		case '"':
283
			/* skip strings and character constants */
284
30
			skip_string(c);
285
30
			break;
286
		case '/':
287
			/* skip comments */
288

288
			if (GETC(==, '*'))
289
72
				skip_comment(c);
290
			else if (c == '/')
291
				skip_comment(c);
292
			break;
293
		case '(':
294
358
			level++;
295
358
			break;
296
		case ')':
297
7068
			if (level == 0)
298
				goto fnd;
299
358
			level--;
300
358
			break;
301
		case '\n':
302
634
			SETLINE;
303
634
		}
304
	}
305
	return (NO);
306
fnd:
307
	/*
308
	 * we assume that the character after a function's right paren
309
	 * is a token character if it's a function and a non-token
310
	 * character if it's a declaration.  Comments don't count...
311
	 */
312
6748
	for (anext = maybe_attribute;;) {
313


83608
		while (GETC(!=, EOF) && iswhite(c))
314
4300
			if (c == '\n')
315
3976
				SETLINE;
316
7644
		if (c == EOF)
317
			return NO;
318
		/*
319
		 * Recognize the GNU __attribute__ extension, which would
320
		 * otherwise make the heuristic test DTWT
321
		 */
322
7644
		if (anext == maybe_attribute) {
323
6748
			if (intoken(c)) {
324
120
				*anext++ = c;
325
120
				continue;
326
			}
327
		} else {
328
896
			if (intoken(c)) {
329
778
				if (anext - maybe_attribute < (int)(sizeof attribute - 1))
330
776
					*anext++ = c;
331
				else
332
					break;
333
776
				continue;
334
			} else {
335
118
				*anext++ = '\0';
336
118
				if (strcmp(maybe_attribute, attribute) == 0) {
337
24
					(void)ungetc(c, inf);
338
24
					return NO;
339
				}
340
				break;
341
			}
342
		}
343
6628
		if (intoken(c) || c == '{')
344
			break;
345


2854
		if (c == '/' && GETC(==, '*'))
346
38
			skip_comment(c);
347
2664
		else if (c == '/')
348
			skip_comment(c);
349
		else {				/* don't ever "read" '/' */
350
2664
			(void)ungetc(c, inf);
351
2664
			return (NO);
352
		}
353
	}
354
4022
	if (c != '{')
355
96
		(void)skip_key('{');
356
4022
	return (YES);
357
6710
}
358
359
/*
360
 * hash_entry --
361
 *	handle a line starting with a '#'
362
 */
363
static void
364
hash_entry(void)
365
{
366
	int	c;			/* character read */
367
	int	curline;		/* line started on */
368
	char	*sp;			/* buffer pointer */
369
22740
	char	tok[MAXTOKEN];		/* storage buffer */
370
371
	/*
372
	 * to treat following macro.
373
	 * #     macro(arg)        ....
374
	 */
375


68496
	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
376
		;
377
11370
	(void)ungetc(c, inf);
378
379
11370
	curline = lineno;
380
82194
	for (sp = tok;;) {		/* get next token */
381

328776
		if (GETC(==, EOF))
382
			return;
383
82194
		if (iswhite(c))
384
			break;
385
		/* hell... truncate it */
386
70824
		if (sp == tok + sizeof tok - 1)
387
			*sp = EOS;
388
		else
389
70824
			*sp++ = c;
390
	}
391
11370
	*sp = EOS;
392
11370
	if (memcmp(tok, "define", 6))	/* only interested in #define's */
393
		goto skip;
394
1716
	for (;;) {			/* this doesn't handle "#define \n" */
395

6864
		if (GETC(==, EOF))
396
			return;
397
1716
		if (!iswhite(c))
398
			break;
399
	}
400
13334
	for (sp = tok;;) {		/* get next token */
401
		/* hell... truncate it */
402
13334
		if (sp == tok + sizeof tok - 1)
403
			*sp = EOS;
404
		else
405
13334
			*sp++ = c;
406

53336
		if (GETC(==, EOF))
407
			return;
408
		/*
409
		 * this is where it DOESN'T handle
410
		 * "#define \n"
411
		 */
412
13334
		if (!intoken(c))
413
			break;
414
	}
415
1714
	*sp = EOS;
416
1714
	if (dflag || c == '(') {	/* only want macros */
417
608
		get_line();
418
608
		pfnote(tok, curline);
419
608
	}
420
11370
skip:	if (c == '\n') {		/* get rid of rest of define */
421
1386
		SETLINE
422
1386
		if (*(sp - 1) != '\\')
423
1386
			return;
424
	}
425
9984
	(void)skip_key('\n');
426
21354
}
427
428
/*
429
 * str_entry --
430
 *	handle a struct, union or enum entry
431
 */
432
static int
433
str_entry(int c)
434
{
435
	int	curline;		/* line started on */
436
	char	*sp;			/* buffer pointer */
437
4788
	char	tok[LINE_MAX];		/* storage buffer */
438
439
2394
	curline = lineno;
440
9576
	while (iswhite(c))
441

9576
		if (GETC(==, EOF))
442
			return (NO);
443
2394
	if (c == '{')		/* it was "struct {" */
444
50
		return (YES);
445
19062
	for (sp = tok;;) {		/* get next token */
446
		/* hell... truncate it */
447
19062
		if (sp == tok + sizeof tok - 1)
448
			*sp = EOS;
449
		else
450
19062
			*sp++ = c;
451

76248
		if (GETC(==, EOF))
452
			return (NO);
453
19062
		if (!intoken(c))
454
			break;
455
	}
456
2372
	switch (c) {
457
		case '{':		/* it was "struct foo{" */
458
			--sp;
459
			break;
460
		case '\n':		/* it was "struct foo\n" */
461
28
			SETLINE;
462
			/*FALLTHROUGH*/
463
		default:		/* probably "struct foo " */
464

12650
			while (GETC(!=, EOF))
465
2530
				if (!iswhite(c))
466
					break;
467
2344
			if (c != '{') {
468
2202
				(void)ungetc(c, inf);
469
2202
				return (NO);
470
			}
471
	}
472
142
	*sp = EOS;
473
142
	pfnote(tok, curline);
474
142
	return (YES);
475
2394
}
476
477
/*
478
 * skip_comment --
479
 *	skip over comment
480
 */
481
void
482
skip_comment(int commenttype)
483
{
484
	int	c;			/* character read */
485
	int	star;			/* '*' flag */
486
487

13574846
	for (star = 0; GETC(!=, EOF);)
488

2760644
		switch(c) {
489
		/* comments don't nest, nor can they be escaped. */
490
		case '*':
491
			star = YES;
492
62774
			break;
493
		case '/':
494
18098
			if (commenttype == '*' && star)
495
12940
				return;
496
			break;
497
		case '\n':
498
48270
			if (commenttype == '/') {
499
				/* We don't really parse C, so sometimes it
500
				 * is necessary to see the newline
501
				 */
502
6
				ungetc(c, inf);
503
6
				return;
504
			}
505
48264
			SETLINE;
506
			/*FALLTHROUGH*/
507
		default:
508
			star = NO;
509
2631502
			break;
510
		}
511
12946
}
512
513
/*
514
 * skip_string --
515
 *	skip to the end of a string or character constant.
516
 */
517
static void
518
skip_string(int key)
519
{
520
	int	c,
521
		skip;
522
523

360814
	for (skip = NO; GETC(!=, EOF); )
524
70176
		switch (c) {
525
		case '\\':		/* a backslash escapes anything */
526
2670
			skip = !skip;	/* we toggle in case it's "\\" */
527
2670
			break;
528
		case '\n':
529
			SETLINE;
530
			/*FALLTHROUGH*/
531
		default:
532
67506
			if (c == key && !skip)
533
9934
				return;
534
			skip = NO;
535
57572
		}
536
9934
}
537
538
/*
539
 * skip_key --
540
 *	skip to next char "key"
541
 */
542
int
543
skip_key(int key)
544
{
545
	int	c,
546
		skip,
547
		retval;
548
549

765290
	for (skip = retval = NO; GETC(!=, EOF);)
550


164100
		switch(c) {
551
		case '\\':		/* a backslash escapes anything */
552
1122
			skip = !skip;	/* we toggle in case it's "\\" */
553
1122
			break;
554
		case ';':		/* special case for yacc; if one */
555
		case '|':		/* of these chars occurs, we may */
556
			retval = YES;	/* have moved out of the rule */
557
1078
			break;		/* not used by C */
558
		case '\'':
559
		case '"':
560
			/* skip strings and character constants */
561
924
			skip_string(c);
562
924
			break;
563
		case '/':
564
			/* skip comments */
565

9320
			if (GETC(==, '*')) {
566
672
				skip_comment(c);
567
672
				break;
568
1658
			} else if (c == '/') {
569
				skip_comment(c);
570
				break;
571
			}
572
1658
			(void)ungetc(c, inf);
573
			c = '/';
574
1658
			goto norm;
575
		case '\n':
576
11400
			SETLINE;
577
			/*FALLTHROUGH*/
578
		default:
579
		norm:
580
147246
			if (c == key && !skip)
581
10080
				return (retval);
582
			skip = NO;
583
137166
		}
584
	return (retval);
585
10080
}