GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/ctags/C.c Lines: 198 220 90.0 %
Date: 2017-11-07 Branches: 208 275 75.6 %

Line Branch Exec Source
1
/*	$OpenBSD: C.c,v 1.15 2014/12/08 03:58:56 jsg Exp $	*/
2
/*	$NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $	*/
3
4
/*
5
 * Copyright (c) 1987, 1993, 1994
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 * 3. Neither the name of the University nor the names of its contributors
17
 *    may be used to endorse or promote products derived from this software
18
 *    without specific prior written permission.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
#include <limits.h>
34
#include <stdio.h>
35
#include <string.h>
36
37
#include "ctags.h"
38
39
static int	func_entry(void);
40
static void	hash_entry(void);
41
static void	skip_string(int);
42
static int	str_entry(int);
43
44
/*
45
 * c_entries --
46
 *	read .c and .h files and call appropriate routines
47
 */
48
void
49
c_entries(void)
50
{
51
	int	c;			/* current character */
52
	int	level;			/* brace level */
53
	int	token;			/* if reading a token */
54
	int	t_def;			/* if reading a typedef */
55
	int	t_level;		/* typedef's brace level */
56
	char	*sp;			/* buffer pointer */
57
4284
	char	tok[MAXTOKEN];		/* token buffer */
58
59
2142
	lineftell = ftell(inf);
60
2142
	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61

14873975
	while (GETC(!=, EOF)) {
62


3539923
		switch (c) {
63
		/*
64
		 * Here's where it DOESN'T handle: {
65
		 *	foo(a)
66
		 *	{
67
		 *	#ifdef notdef
68
		 *		}
69
		 *	#endif
70
		 *		if (a)
71
		 *			puts("hello, world");
72
		 *	}
73
		 */
74
		case '{':
75
14805
			++level;
76
14805
			goto endtok;
77
		case '}':
78
			/*
79
			 * if level goes below zero, try and fix
80
			 * it, even though we've already messed up
81
			 */
82
21120
			if (--level < 0)
83
				level = 0;
84
21120
			goto endtok;
85
86
		case '\n':
87
213681
			SETLINE;
88
			/*
89
			 * the above 3 cases are similar in that they
90
			 * are special characters that also end tokens.
91
			 */
92
249606
endtok:			if (sp > tok) {
93
6207
				*sp = EOS;
94
				token = YES;
95
				sp = tok;
96
6207
			}
97
			else
98
				token = NO;
99
			continue;
100
101
		/*
102
		 * We ignore quoted strings and character constants
103
		 * completely.
104
		 */
105
		case '"':
106
		case '\'':
107
13473
			(void)skip_string(c);
108
13473
			break;
109
110
		/*
111
		 * comments can be fun; note the state is unchanged after
112
		 * return, in case we found:
113
		 *	"foo() XX comment XX { int bar; }"
114
		 */
115
		case '/':
116

75996
			if (GETC(==, '*')) {
117
18237
				skip_comment(c);
118
18237
				continue;
119
762
			} else if (c == '/') {
120
9
				skip_comment(c);
121
9
				continue;
122
			}
123
753
			(void)ungetc(c, inf);
124
			c = '/';
125
753
			goto storec;
126
127
		/* hash marks flag #define's. */
128
		case '#':
129
17055
			if (sp == tok) {
130
17055
				hash_entry();
131
17055
				break;
132
			}
133
			goto storec;
134
135
		/*
136
		 * if we have a current token, parenthesis on
137
		 * level zero indicates a function.
138
		 */
139
		case '(':
140
			do {
141

398976
				if (GETC(==, EOF))
142
					return;
143
99744
			} while (iswhite(c));
144
99342
			if (c == '*')
145
				break;
146
			else
147
96279
				ungetc(c, inf);
148
96279
			if (!level && token) {
149
				int	curline;
150
151
10068
				if (sp != tok)
152
10053
					*sp = EOS;
153
				/*
154
				 * grab the line immediately, we may
155
				 * already be wrong, for example,
156
				 *	foo\n
157
				 *	(arg1,
158
				 */
159
10068
				get_line();
160
10068
				curline = lineno;
161
10068
				if (func_entry()) {
162
6036
					++level;
163
6036
					pfnote(tok, curline);
164
6036
				}
165
				break;
166
			}
167
			goto storec;
168
169
		/*
170
		 * semi-colons indicate the end of a typedef; if we find a
171
		 * typedef we search for the next semi-colon of the same
172
		 * level as the typedef.  Ignoring "structs", they are
173
		 * tricky, since you can find:
174
		 *
175
		 *	"typedef int time_t;"
176
		 *	"typedef unsigned int u_int;"
177
		 *	"typedef unsigned int u_int [10];"
178
		 *
179
		 * If looking at a typedef, we save a copy of the last token
180
		 * found.  Then, when we find the ';' we take the current
181
		 * token if it starts with a valid token name, else we take
182
		 * the one we saved.  There's probably some reasonable
183
		 * alternative to this...
184
		 */
185
		case ';':
186

99765
			if (t_def && level == t_level) {
187
				t_def = NO;
188
75
				get_line();
189
75
				if (sp != tok)
190
69
					*sp = EOS;
191
75
				pfnote(tok, lineno);
192
75
				break;
193
			}
194
			goto storec;
195
196
		/*
197
		 * store characters until one that can't be part of a token
198
		 * comes along; check the current token against certain
199
		 * reserved words.
200
		 */
201
		default:
202
			/*
203
			 * to treat following function.
204
			 * func      (arg) {
205
			 * ....
206
			 * }
207
			 */
208
3041503
			if (c == ' ' || c == '\t') {
209
				int save = c;
210


3933796
				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
211
					;
212
497581
				if (c == EOF)
213
					return;
214
497581
				(void)ungetc(c, inf);
215
				c = save;
216
497581
			}
217
3227935
	storec:		if (!intoken(c)) {
218
1029931
				if (sp == tok)
219
					break;
220
372153
				*sp = EOS;
221
				/* no typedefs inside typedefs */
222

743826
				if (!t_def &&
223
371673
					   !memcmp(tok, "typedef",8)) {
224
					t_def = YES;
225
					t_level = level;
226
75
					break;
227
				}
228
				/* catch "typedef struct" */
229

741045
				if ((!t_def || t_level < level)
230
372438
				    && (!memcmp(tok, "struct", 7)
231
740505
				    || !memcmp(tok, "union", 6)
232
737034
				    || !memcmp(tok, "enum", 5))) {
233
					/*
234
					 * get line immediately;
235
					 * may change before '{'
236
					 */
237
3594
					get_line();
238
3594
					if (str_entry(c))
239
288
						++level;
240
					break;
241
					/* } */
242
				}
243
				sp = tok;
244
368484
			}
245

2639580
			else if (sp != tok || begtoken(c)) {
246
				/* hell... truncate it */
247
2146002
				if (sp == tok + sizeof tok - 1)
248
					*sp = EOS;
249
				else
250
2146002
					*sp++ = c;
251
				token = YES;
252
2146002
			}
253
			continue;
254
		}
255
256
		sp = tok;
257
		token = NO;
258
	}
259
4284
}
260
261
/*
262
 * func_entry --
263
 *	handle a function reference
264
 */
265
static int
266
func_entry(void)
267
{
268
	int	c;			/* current character */
269
	int	level = 0;		/* for matching '()' */
270
	static char attribute[] = "__attribute__";
271
20136
	char maybe_attribute[sizeof attribute + 1];
272
	char *anext;
273
274
	/*
275
	 * Find the end of the assumed function declaration.
276
	 * Note that ANSI C functions can have type definitions so keep
277
	 * track of the parentheses nesting level.
278
	 */
279

1140860
	while (GETC(!=, EOF)) {
280

294941
		switch (c) {
281
		case '\'':
282
		case '"':
283
			/* skip strings and character constants */
284
45
			skip_string(c);
285
45
			break;
286
		case '/':
287
			/* skip comments */
288

432
			if (GETC(==, '*'))
289
108
				skip_comment(c);
290
			else if (c == '/')
291
				skip_comment(c);
292
			break;
293
		case '(':
294
537
			level++;
295
537
			break;
296
		case ')':
297
10605
			if (level == 0)
298
				goto fnd;
299
537
			level--;
300
537
			break;
301
		case '\n':
302
951
			SETLINE;
303
951
		}
304
	}
305
	return (NO);
306
fnd:
307
	/*
308
	 * we assume that the character after a function's right paren
309
	 * is a token character if it's a function and a non-token
310
	 * character if it's a declaration.  Comments don't count...
311
	 */
312
10068
	for (anext = maybe_attribute;;) {
313


119001
		while (GETC(!=, EOF) && iswhite(c))
314
6453
			if (c == '\n')
315
5967
				SETLINE;
316
11469
		if (c == EOF)
317
			return NO;
318
		/*
319
		 * Recognize the GNU __attribute__ extension, which would
320
		 * otherwise make the heuristic test DTWT
321
		 */
322
11469
		if (anext == maybe_attribute) {
323
10125
			if (intoken(c)) {
324
180
				*anext++ = c;
325
180
				continue;
326
			}
327
		} else {
328
1344
			if (intoken(c)) {
329
1167
				if (anext - maybe_attribute < (int)(sizeof attribute - 1))
330
1164
					*anext++ = c;
331
				else
332
					break;
333
1164
				continue;
334
			} else {
335
177
				*anext++ = '\0';
336
177
				if (strcmp(maybe_attribute, attribute) == 0) {
337
36
					(void)ungetc(c, inf);
338
36
					return NO;
339
				}
340
				break;
341
			}
342
		}
343
9945
		if (intoken(c) || c == '{')
344
			break;
345


4281
		if (c == '/' && GETC(==, '*'))
346
57
			skip_comment(c);
347
3996
		else if (c == '/')
348
			skip_comment(c);
349
		else {				/* don't ever "read" '/' */
350
3996
			(void)ungetc(c, inf);
351
3996
			return (NO);
352
		}
353
	}
354
6036
	if (c != '{')
355
144
		(void)skip_key('{');
356
6036
	return (YES);
357
10068
}
358
359
/*
360
 * hash_entry --
361
 *	handle a line starting with a '#'
362
 */
363
static void
364
hash_entry(void)
365
{
366
	int	c;			/* character read */
367
	int	curline;		/* line started on */
368
	char	*sp;			/* buffer pointer */
369
34110
	char	tok[MAXTOKEN];		/* storage buffer */
370
371
	/*
372
	 * to treat following macro.
373
	 * #     macro(arg)        ....
374
	 */
375


102675
	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
376
		;
377
17055
	(void)ungetc(c, inf);
378
379
17055
	curline = lineno;
380
17055
	for (sp = tok;;) {		/* get next token */
381

493164
		if (GETC(==, EOF))
382
			return;
383
123291
		if (iswhite(c))
384
			break;
385
		/* hell... truncate it */
386
106236
		if (sp == tok + sizeof tok - 1)
387
			*sp = EOS;
388
		else
389
106236
			*sp++ = c;
390
	}
391
17055
	*sp = EOS;
392
17055
	if (memcmp(tok, "define", 6))	/* only interested in #define's */
393
		goto skip;
394
	for (;;) {			/* this doesn't handle "#define \n" */
395

10296
		if (GETC(==, EOF))
396
			return;
397
2574
		if (!iswhite(c))
398
			break;
399
	}
400
2571
	for (sp = tok;;) {		/* get next token */
401
		/* hell... truncate it */
402
20001
		if (sp == tok + sizeof tok - 1)
403
			*sp = EOS;
404
		else
405
20001
			*sp++ = c;
406

80004
		if (GETC(==, EOF))
407
			return;
408
		/*
409
		 * this is where it DOESN'T handle
410
		 * "#define \n"
411
		 */
412
20001
		if (!intoken(c))
413
			break;
414
	}
415
2571
	*sp = EOS;
416
2571
	if (dflag || c == '(') {	/* only want macros */
417
912
		get_line();
418
912
		pfnote(tok, curline);
419
912
	}
420
17055
skip:	if (c == '\n') {		/* get rid of rest of define */
421
2079
		SETLINE
422
2079
		if (*(sp - 1) != '\\')
423
2079
			return;
424
	}
425
14976
	(void)skip_key('\n');
426
32031
}
427
428
/*
429
 * str_entry --
430
 *	handle a struct, union or enum entry
431
 */
432
static int
433
str_entry(int c)
434
{
435
	int	curline;		/* line started on */
436
	char	*sp;			/* buffer pointer */
437
7188
	char	tok[LINE_MAX];		/* storage buffer */
438
439
3594
	curline = lineno;
440
10782
	while (iswhite(c))
441

14376
		if (GETC(==, EOF))
442
			return (NO);
443
3594
	if (c == '{')		/* it was "struct {" */
444
75
		return (YES);
445
3519
	for (sp = tok;;) {		/* get next token */
446
		/* hell... truncate it */
447
28608
		if (sp == tok + sizeof tok - 1)
448
			*sp = EOS;
449
		else
450
28608
			*sp++ = c;
451

114432
		if (GETC(==, EOF))
452
			return (NO);
453
28608
		if (!intoken(c))
454
			break;
455
	}
456
3840
	switch (c) {
457
		case '{':		/* it was "struct foo{" */
458
			--sp;
459
			break;
460
		case '\n':		/* it was "struct foo\n" */
461
34
			SETLINE;
462
			/*FALLTHROUGH*/
463
		default:		/* probably "struct foo " */
464

15224
			while (GETC(!=, EOF))
465
3806
				if (!iswhite(c))
466
					break;
467
3519
			if (c != '{') {
468
3306
				(void)ungetc(c, inf);
469
3306
				return (NO);
470
			}
471
	}
472
213
	*sp = EOS;
473
213
	pfnote(tok, curline);
474
213
	return (YES);
475
3594
}
476
477
/*
478
 * skip_comment --
479
 *	skip over comment
480
 */
481
void
482
skip_comment(int commenttype)
483
{
484
	int	c;			/* character read */
485
	int	star;			/* '*' flag */
486
487

16314482
	for (star = 0; GETC(!=, EOF);)
488

4141307
		switch(c) {
489
		/* comments don't nest, nor can they be escaped. */
490
		case '*':
491
			star = YES;
492
94161
			break;
493
		case '/':
494
27147
			if (commenttype == '*' && star)
495
19410
				return;
496
			break;
497
		case '\n':
498
72405
			if (commenttype == '/') {
499
				/* We don't really parse C, so sometimes it
500
				 * is necessary to see the newline
501
				 */
502
9
				ungetc(c, inf);
503
9
				return;
504
			}
505
72396
			SETLINE;
506
			/*FALLTHROUGH*/
507
		default:
508
			star = NO;
509
3947594
			break;
510
		}
511
19419
}
512
513
/*
514
 * skip_string --
515
 *	skip to the end of a string or character constant.
516
 */
517
static void
518
skip_string(int key)
519
{
520
	int	c,
521
		skip;
522
523

450888
	for (skip = NO; GETC(!=, EOF); )
524
105270
		switch (c) {
525
		case '\\':		/* a backslash escapes anything */
526
4008
			skip = !skip;	/* we toggle in case it's "\\" */
527
4008
			break;
528
		case '\n':
529
			SETLINE;
530
			/*FALLTHROUGH*/
531
		default:
532
101262
			if (c == key && !skip)
533
14904
				return;
534
			skip = NO;
535
86358
		}
536
14904
}
537
538
/*
539
 * skip_key --
540
 *	skip to next char "key"
541
 */
542
int
543
skip_key(int key)
544
{
545
	int	c,
546
		skip,
547
		retval;
548
549

936548
	for (skip = retval = NO; GETC(!=, EOF);)
550


246164
		switch(c) {
551
		case '\\':		/* a backslash escapes anything */
552
1683
			skip = !skip;	/* we toggle in case it's "\\" */
553
1683
			break;
554
		case ';':		/* special case for yacc; if one */
555
		case '|':		/* of these chars occurs, we may */
556
			retval = YES;	/* have moved out of the rule */
557
1617
			break;		/* not used by C */
558
		case '\'':
559
		case '"':
560
			/* skip strings and character constants */
561
1386
			skip_string(c);
562
1386
			break;
563
		case '/':
564
			/* skip comments */
565

13980
			if (GETC(==, '*')) {
566
1008
				skip_comment(c);
567
1008
				break;
568
2487
			} else if (c == '/') {
569
				skip_comment(c);
570
				break;
571
			}
572
2487
			(void)ungetc(c, inf);
573
			c = '/';
574
2487
			goto norm;
575
		case '\n':
576
17100
			SETLINE;
577
			/*FALLTHROUGH*/
578
		default:
579
		norm:
580
220883
			if (c == key && !skip)
581
15120
				return (retval);
582
			skip = NO;
583
205763
		}
584
	return (retval);
585
15120
}