Head

GCC Code Coverage Report

Directory:	./		Exec	Total	Coverage
File:	usr.bin/indent/lexi.c	Lines:	0	238	0.0 %
Date:	2017-11-07	Branches:	0	266	0.0 %


/*	$OpenBSD: lexi.c,v 1.20 2016/06/06 06:43:03 tobiasu Exp $	*/

/*
 * Copyright (c) 1980, 1993
 *	The Regents of the University of California.
 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
 * Copyright (c) 1985 Sun Microsystems, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Here we have the token scanner for indent.  It scans off one token and puts
 * it in the global variable "token".  It returns a code, indicating the type
 * of token scanned.
 */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <err.h>
#include "indent_globs.h"
#include "indent_codes.h"

#define alphanum 1
#define opchar 3

struct templ {
    char       *rwd;
    int         rwcode;
};

struct templ specialsinit[] = {
	{ "switch", 1 },
	{ "case", 2 },
	{ "break", 0 },
	{ "struct", 3 },
	{ "union", 3 },
	{ "enum", 3 },
	{ "default", 2 },
	{ "int", 4 },
	{ "char", 4 },
	{ "float", 4 },
	{ "double", 4 },
	{ "long", 4 },
	{ "short", 4 },
	{ "typedef", 4 },
	{ "unsigned", 4 },
	{ "register", 4 },
	{ "static", 4 },
	{ "global", 4 },
	{ "extern", 4 },
	{ "void", 4 },
	{ "goto", 0 },
	{ "return", 0 },
	{ "if", 5 },
	{ "while", 5 },
	{ "for", 5 },
	{ "else", 6 },
	{ "do", 6 },
	{ "sizeof", 7 },
};

struct templ *specials = specialsinit;
int	nspecials = sizeof(specialsinit) / sizeof(specialsinit[0]);
int	maxspecials;

char        chartype[128] =
{				/* this is used to facilitate the decision of
				 * what type (alphanumeric, operator) each
				 * character is */
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0,
    0, 3, 0, 0, 1, 3, 3, 0,
    0, 0, 3, 3, 0, 3, 0, 3,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 0, 0, 3, 3, 3, 3,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 0, 0, 3, 1,
    0, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 0, 3, 0, 3, 0
};




int
lexi(void)
{
    int         unary_delim;	/* this is set to 1 if the current token
				 * forces a following operator to be unary */
    static int  last_code;	/* the last token type returned */
    static int  l_struct;	/* set to 1 if the last token was 'struct' */
    int         code;		/* internal code to be returned */
    char        qchar;		/* the delimiter character for a string */
    int		i;

    e_token = s_token;		/* point to start of place to save token */
    unary_delim = false;
    ps.col_1 = ps.last_nl;	/* tell world that this token started in
				 * column 1 iff the last thing scanned was nl */
    ps.last_nl = false;

    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	ps.col_1 = false;	/* leading blanks imply token is not in column
				 * 1 */
	if (++buf_ptr >= buf_end)
	    fill_buffer();
    }

    /* Scan an alphanumeric token */
    if (chartype[(int)*buf_ptr] == alphanum ||
	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
	/*
	 * we have a character or number
	 */
	char *j;	/* used for searching thru list of
			 * reserved words */
	if (isdigit((unsigned char)*buf_ptr) ||
	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
	    int         seendot = 0,
	                seenexp = 0,
			seensfx = 0;
	    if (*buf_ptr == '0' &&
		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
		*e_token++ = *buf_ptr++;
		*e_token++ = *buf_ptr++;
		while (isxdigit(*buf_ptr)) {
		    CHECK_SIZE_TOKEN;
		    *e_token++ = *buf_ptr++;
		}
	    }
	    else
		while (1) {
		    if (*buf_ptr == '.') {
			if (seendot)
			    break;
			else
			    seendot++;
		    }
		    CHECK_SIZE_TOKEN;
		    *e_token++ = *buf_ptr++;
		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
			    break;
			else {
			    seenexp++;
			    seendot++;
			    CHECK_SIZE_TOKEN;
			    *e_token++ = *buf_ptr++;
			    if (*buf_ptr == '+' || *buf_ptr == '-')
				*e_token++ = *buf_ptr++;
			}
		    }
		}
	    while (1) {
		if (!(seensfx & 1) &&
			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
		    CHECK_SIZE_TOKEN;
		    *e_token++ = *buf_ptr++;
		    seensfx |= 1;
		    continue;
		}
        	if (!(seensfx & 2) &&
			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
		    CHECK_SIZE_TOKEN;
		    if (buf_ptr[1] == buf_ptr[0])
		        *e_token++ = *buf_ptr++;
		    *e_token++ = *buf_ptr++;
		    seensfx |= 2;
		    continue;
		}
		break;
	    }
	    if (!(seensfx & 1) &&
	        (*buf_ptr == 'F' || *buf_ptr == 'f')) {
		CHECK_SIZE_TOKEN;
		*e_token++ = *buf_ptr++;
		seensfx |= 1;
	    }
	}
	else
	    while (chartype[(int)*buf_ptr] == alphanum) {	/* copy it over */
		CHECK_SIZE_TOKEN;
		*e_token++ = *buf_ptr++;
		if (buf_ptr >= buf_end)
		    fill_buffer();
	    }
	*e_token++ = '\0';
	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	ps.its_a_keyword = false;
	ps.sizeof_keyword = false;
	if (l_struct) {		/* if last token was 'struct', then this token
				 * should be treated as a declaration */
	    l_struct = false;
	    last_code = ident;
	    ps.last_u_d = true;
	    return (decl);
	}
	ps.last_u_d = false;	/* Operator after identifier is binary */
	last_code = ident;	/* Remember that this is the code we will
				 * return */

	/*
	 * This loop will check if the token is a keyword.
	 */
	for (i = 0; i < nspecials; i++) {
	    char *p = s_token;	/* point at scanned token */
	    j = specials[i].rwd;
	    if (*j++ != *p++ || *j++ != *p++)
		continue;	/* This test depends on the fact that
				 * identifiers are always at least 1 character
				 * long (ie. the first two bytes of the
				 * identifier are always meaningful) */
	    if (p[-1] == 0)
		break;		/* If its a one-character identifier */
	    while (*p++ == *j)
		if (*j++ == 0)
		    goto found_keyword;	/* I wish that C had a multi-level
					 * break... */
	}
	if (i < nspecials) {		/* we have a keyword */
    found_keyword:
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    switch (specials[i].rwcode) {
	    case 1:		/* it is a switch */
		return (swstmt);
	    case 2:		/* a case or default */
		return (casestmt);

	    case 3:		/* a "struct" */
		if (ps.p_l_follow)
		    break;	/* inside parens: cast */
		l_struct = true;

		/*
		 * Next time around, we will want to know that we have had a
		 * 'struct'
		 */
	    case 4:		/* one of the declaration keywords */
		if (ps.p_l_follow) {
		    ps.cast_mask |= 1 << ps.p_l_follow;
		    break;	/* inside parens: cast */
		}
		last_code = decl;
		return (decl);

	    case 5:		/* if, while, for */
		return (sp_paren);

	    case 6:		/* do, else */
		return (sp_nparen);

	    case 7:
		ps.sizeof_keyword = true;
	    default:		/* all others are treated like any other
				 * identifier */
		return (ident);
	    }			/* end of switch */
	}			/* end of if (found_it) */
	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
	    char *tp = buf_ptr;
	    while (tp < buf_end)
		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
		    goto not_proc;
	    strlcpy(ps.procname, token, sizeof ps.procname);
	    ps.in_parameter_declaration = 1;
	    rparen_count = 1;
    not_proc:;
	}
	/*
	 * The following hack attempts to guess whether or not the current
	 * token is in fact a declaration keyword -- one that has been
	 * typedefd
	 */
	if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
	    isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
		&& !ps.p_l_follow
	        && !ps.block_init
		&& (ps.last_token == rparen || ps.last_token == semicolon ||
		    ps.last_token == decl ||
		    ps.last_token == lbrace || ps.last_token == rbrace)) {
	    ps.its_a_keyword = true;
	    ps.last_u_d = true;
	    last_code = decl;
	    return decl;
	}
	if (last_code == decl)	/* if this is a declared variable, then
				 * following sign is unary */
	    ps.last_u_d = true;	/* will make "int a -1" work */
	last_code = ident;
	return (ident);		/* the ident is not in the list */
    }				/* end of procesing for alpanum character */

    /* Scan a non-alphanumeric token */

    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
				 * moved here */
    *e_token = '\0';
    if (++buf_ptr >= buf_end)
	fill_buffer();

    switch (*token) {
    case '\n':
	unary_delim = ps.last_u_d;
	ps.last_nl = true;	/* remember that we just had a newline */
	code = (had_eof ? 0 : newline);

	/*
	 * if data has been exausted, the newline is a dummy, and we should
	 * return code to stop
	 */
	break;

    case '\'':			/* start of quoted character */
    case '"':			/* start of string */
	qchar = *token;
	if (troff) {
	    e_token[-1] = '`';
	    if (qchar == '"')
		*e_token++ = '`';
	    e_token = chfont(&bodyf, &stringf, e_token);
	}
	do {			/* copy the string */
	    while (1) {		/* move one character or [/<char>]<char> */
		if (*buf_ptr == '\n') {
		    printf("%d: Unterminated literal\n", line_no);
		    goto stop_lit;
		}
		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
					 * since CHECK_SIZE guarantees that there
					 * are at least 5 entries left */
		*e_token = *buf_ptr++;
		if (buf_ptr >= buf_end)
		    fill_buffer();
		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
		    if (*buf_ptr == '\n')	/* check for escaped newline */
			++line_no;
		    if (troff) {
			*++e_token = BACKSLASH;
			if (*buf_ptr == BACKSLASH)
			    *++e_token = BACKSLASH;
		    }
		    *++e_token = *buf_ptr++;
		    ++e_token;	/* we must increment this again because we
				 * copied two chars */
		    if (buf_ptr >= buf_end)
			fill_buffer();
		}
		else
		    break;	/* we copied one character */
	    }			/* end of while (1) */
	} while (*e_token++ != qchar);
	if (troff) {
	    e_token = chfont(&stringf, &bodyf, e_token - 1);
	    if (qchar == '"')
		*e_token++ = '\'';
	}
stop_lit:
	code = ident;
	break;

    case ('('):
    case ('['):
	unary_delim = true;
	code = lparen;
	break;

    case (')'):
    case (']'):
	code = rparen;
	break;

    case '#':
	unary_delim = ps.last_u_d;
	code = preesc;
	break;

    case '?':
	unary_delim = true;
	code = question;
	break;

    case (':'):
	code = colon;
	unary_delim = true;
	break;

    case (';'):
	unary_delim = true;
	code = semicolon;
	break;

    case ('{'):
	unary_delim = true;

	/*
	 * if (ps.in_or_st) ps.block_init = 1;
	 */
	/* ?	code = ps.block_init ? lparen : lbrace; */
	code = lbrace;
	break;

    case ('}'):
	unary_delim = true;
	/* ?	code = ps.block_init ? rparen : rbrace; */
	code = rbrace;
	break;

    case 014:			/* a form feed */
	unary_delim = ps.last_u_d;
	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
				 * right */
	code = form_feed;
	break;

    case (','):
	unary_delim = true;
	code = comma;
	break;

    case '.':
	unary_delim = false;
	code = period;
	break;

    case '-':
    case '+':			/* check for -, +, --, ++ */
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;

	if (*buf_ptr == token[0]) {
	    /* check for doubled character */
	    *e_token++ = *buf_ptr++;
	    /* buffer overflow will be checked at end of loop */
	    if (last_code == ident || last_code == rparen) {
		code = (ps.last_u_d ? unary_op : postop);
		/* check for following ++ or -- */
		unary_delim = false;
	    }
	}
	else if (*buf_ptr == '=')
	    /* check for operator += */
	    *e_token++ = *buf_ptr++;
	else if (*buf_ptr == '>') {
	    /* check for operator -> */
	    *e_token++ = *buf_ptr++;
	    if (!pointer_as_binop) {
		unary_delim = false;
		code = unary_op;
		ps.want_blank = false;
	    }
	}
	break;			/* buffer overflow will be checked at end of
				 * switch */

    case '=':
	if (ps.in_or_st)
	    ps.block_init = 1;
#ifdef undef
	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
	    e_token[-1] = *buf_ptr++;
	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
		*e_token++ = *buf_ptr++;
	    *e_token++ = '=';	/* Flip =+ to += */
	    *e_token = 0;
	}
#else
	if (*buf_ptr == '=') {/* == */
	    *e_token++ = '=';	/* Flip =+ to += */
	    buf_ptr++;
	    *e_token = 0;
	}
#endif
	code = binary_op;
	unary_delim = true;
	break;
	/* can drop thru!!! */

    case '>':
    case '<':
    case '!':			/* ops like <, <<, <=, !=, etc */
	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
	    *e_token++ = *buf_ptr;
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	if (*buf_ptr == '=')
	    *e_token++ = *buf_ptr++;
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;
	break;

    default:
	if (token[0] == '/' && *buf_ptr == '*') {
	    /* it is start of comment */
	    *e_token++ = '*';

	    if (++buf_ptr >= buf_end)
		fill_buffer();

	    code = comment;
	    unary_delim = ps.last_u_d;
	    break;
	}
	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
	    /*
	     * handle ||, &&, etc, and also things as in int *****i
	     */
	    *e_token++ = *buf_ptr;
	    if (++buf_ptr >= buf_end)
		fill_buffer();
	}
	code = (ps.last_u_d ? unary_op : binary_op);
	unary_delim = true;


    }				/* end of switch */
    if (code != newline) {
	l_struct = false;
	last_code = code;
    }
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
	fill_buffer();
    ps.last_u_d = unary_delim;
    *e_token = '\0';		/* null terminate the token */
    return (code);
}

/*
 * Add the given keyword to the keyword table, using val as the keyword type
 */
void
addkey(char *key, int val)
{
    struct templ *p;
    int i;

    for (i = 0; i < nspecials; i++) {
	p = &specials[i];
	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
	    return;
    }

    if (specials == specialsinit) {
	/*
	 * Whoa. Must reallocate special table.
	 */
	nspecials = sizeof (specialsinit) / sizeof (specialsinit[0]);
	maxspecials = nspecials + (nspecials >> 2);
	specials = calloc(maxspecials, sizeof specials[0]);
	if (specials == NULL)
	    err(1, NULL);
	memcpy(specials, specialsinit, sizeof specialsinit);
    } else if (nspecials >= maxspecials) {
	int newspecials = maxspecials + (maxspecials >> 2);
	struct templ *specials2;

	specials2 = reallocarray(specials, newspecials, sizeof(specials[0]));
	if (specials2 == NULL)
	    err(1, NULL);
	specials = specials2;
	maxspecials = newspecials;
    }

    p = &specials[nspecials];
    p->rwd = key;
    p->rwcode = val;
    nspecials++;
    return;
}


Generated by: GCOVR (Version 3.3)

Line	Branch	Exec	Source
1			/* $OpenBSD: lexi.c,v 1.20 2016/06/06 06:43:03 tobiasu Exp $ */
2
3			/*
4			* Copyright (c) 1980, 1993
5			* The Regents of the University of California.
6			* Copyright (c) 1976 Board of Trustees of the University of Illinois.
7			* Copyright (c) 1985 Sun Microsystems, Inc.
8			* All rights reserved.
9			*
10			* Redistribution and use in source and binary forms, with or without
11			* modification, are permitted provided that the following conditions
12			* are met:
13			* 1. Redistributions of source code must retain the above copyright
14			* notice, this list of conditions and the following disclaimer.
15			* 2. Redistributions in binary form must reproduce the above copyright
16			* notice, this list of conditions and the following disclaimer in the
17			* documentation and/or other materials provided with the distribution.
18			* 3. Neither the name of the University nor the names of its contributors
19			* may be used to endorse or promote products derived from this software
20			* without specific prior written permission.
21			*
22			* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23			* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24			* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25			* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26			* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27			* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28			* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29			* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30			* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31			* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32			* SUCH DAMAGE.
33			*/
34
35			/*
36			* Here we have the token scanner for indent. It scans off one token and puts
37			* it in the global variable "token". It returns a code, indicating the type
38			* of token scanned.
39			*/
40
41			#include <stdio.h>
42			#include <ctype.h>
43			#include <stdlib.h>
44			#include <string.h>
45			#include <err.h>
46			#include "indent_globs.h"
47			#include "indent_codes.h"
48
49			#define alphanum 1
50			#define opchar 3
51
52			struct templ {
53			char *rwd;
54			int rwcode;
55			};
56
57			struct templ specialsinit[] = {
58			{ "switch", 1 },
59			{ "case", 2 },
60			{ "break", 0 },
61			{ "struct", 3 },
62			{ "union", 3 },
63			{ "enum", 3 },
64			{ "default", 2 },
65			{ "int", 4 },
66			{ "char", 4 },
67			{ "float", 4 },
68			{ "double", 4 },
69			{ "long", 4 },
70			{ "short", 4 },
71			{ "typedef", 4 },
72			{ "unsigned", 4 },
73			{ "register", 4 },
74			{ "static", 4 },
75			{ "global", 4 },
76			{ "extern", 4 },
77			{ "void", 4 },
78			{ "goto", 0 },
79			{ "return", 0 },
80			{ "if", 5 },
81			{ "while", 5 },
82			{ "for", 5 },
83			{ "else", 6 },
84			{ "do", 6 },
85			{ "sizeof", 7 },
86			};
87
88			struct templ *specials = specialsinit;
89			int nspecials = sizeof(specialsinit) / sizeof(specialsinit[0]);
90			int maxspecials;
91
92			char chartype[128] =
93			{ /* this is used to facilitate the decision of
94			* what type (alphanumeric, operator) each
95			* character is */
96			0, 0, 0, 0, 0, 0, 0, 0,
97			0, 0, 0, 0, 0, 0, 0, 0,
98			0, 0, 0, 0, 0, 0, 0, 0,
99			0, 0, 0, 0, 0, 0, 0, 0,
100			0, 3, 0, 0, 1, 3, 3, 0,
101			0, 0, 3, 3, 0, 3, 0, 3,
102			1, 1, 1, 1, 1, 1, 1, 1,
103			1, 1, 0, 0, 3, 3, 3, 3,
104			0, 1, 1, 1, 1, 1, 1, 1,
105			1, 1, 1, 1, 1, 1, 1, 1,
106			1, 1, 1, 1, 1, 1, 1, 1,
107			1, 1, 1, 0, 0, 0, 3, 1,
108			0, 1, 1, 1, 1, 1, 1, 1,
109			1, 1, 1, 1, 1, 1, 1, 1,
110			1, 1, 1, 1, 1, 1, 1, 1,
111			1, 1, 1, 0, 3, 0, 3, 0
112			};
113
114
115
116
117			int
118			lexi(void)
119			{
120			int unary_delim; /* this is set to 1 if the current token
121			* forces a following operator to be unary */
122			static int last_code; /* the last token type returned */
123			static int l_struct; /* set to 1 if the last token was 'struct' */
124			int code; /* internal code to be returned */
125			char qchar; /* the delimiter character for a string */
126			int i;
127
128			e_token = s_token; /* point to start of place to save token */
129			unary_delim = false;
130			ps.col_1 = ps.last_nl; /* tell world that this token started in
131			* column 1 iff the last thing scanned was nl */
132			ps.last_nl = false;
133
134			while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
135			ps.col_1 = false; /* leading blanks imply token is not in column
136			* 1 */
137			if (++buf_ptr >= buf_end)
138			fill_buffer();
139			}
140
141			/* Scan an alphanumeric token */
142			if (chartype[(int)*buf_ptr] == alphanum \|\|
143			(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
144			/*
145			* we have a character or number
146			*/
147			char j; / used for searching thru list of
148			* reserved words */
149			if (isdigit((unsigned char)*buf_ptr) \|\|
150			(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
151			int seendot = 0,
152			seenexp = 0,
153			seensfx = 0;
154			if (*buf_ptr == '0' &&
155			(buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X')) {
156			e_token++ = buf_ptr++;
157			e_token++ = buf_ptr++;
158			while (isxdigit(*buf_ptr)) {
159			CHECK_SIZE_TOKEN;
160			e_token++ = buf_ptr++;
161			}
162			}
163			else
164			while (1) {
165			if (*buf_ptr == '.') {
166			if (seendot)
167			break;
168			else
169			seendot++;
170			}
171			CHECK_SIZE_TOKEN;
172			e_token++ = buf_ptr++;
173			if (!isdigit((unsigned char)buf_ptr) && buf_ptr != '.') {
174			if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp)
175			break;
176			else {
177			seenexp++;
178			seendot++;
179			CHECK_SIZE_TOKEN;
180			e_token++ = buf_ptr++;
181			if (buf_ptr == '+' \|\| buf_ptr == '-')
182			e_token++ = buf_ptr++;
183			}
184			}
185			}
186			while (1) {
187			if (!(seensfx & 1) &&
188			(buf_ptr == 'U' \|\| buf_ptr == 'u')) {
189			CHECK_SIZE_TOKEN;
190			e_token++ = buf_ptr++;
191			seensfx \|= 1;
192			continue;
193			}
194			if (!(seensfx & 2) &&
195			(buf_ptr == 'L' \|\| buf_ptr == 'l')) {
196			CHECK_SIZE_TOKEN;
197			if (buf_ptr[1] == buf_ptr[0])
198			e_token++ = buf_ptr++;
199			e_token++ = buf_ptr++;
200			seensfx \|= 2;
201			continue;
202			}
203			break;
204			}
205			if (!(seensfx & 1) &&
206			(buf_ptr == 'F' \|\| buf_ptr == 'f')) {
207			CHECK_SIZE_TOKEN;
208			e_token++ = buf_ptr++;
209			seensfx \|= 1;
210			}
211			}
212			else
213			while (chartype[(int)buf_ptr] == alphanum) { / copy it over */
214			CHECK_SIZE_TOKEN;
215			e_token++ = buf_ptr++;
216			if (buf_ptr >= buf_end)
217			fill_buffer();
218			}
219			*e_token++ = '\0';
220			while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
221			if (++buf_ptr >= buf_end)
222			fill_buffer();
223			}
224			ps.its_a_keyword = false;
225			ps.sizeof_keyword = false;
226			if (l_struct) { /* if last token was 'struct', then this token
227			* should be treated as a declaration */
228			l_struct = false;
229			last_code = ident;
230			ps.last_u_d = true;
231			return (decl);
232			}
233			ps.last_u_d = false; /* Operator after identifier is binary */
234			last_code = ident; /* Remember that this is the code we will
235			* return */
236
237			/*
238			* This loop will check if the token is a keyword.
239			*/
240			for (i = 0; i < nspecials; i++) {
241			char p = s_token; / point at scanned token */
242			j = specials[i].rwd;
243			if (j++ != p++ \|\| j++ != p++)
244			continue; /* This test depends on the fact that
245			* identifiers are always at least 1 character
246			* long (ie. the first two bytes of the
247			* identifier are always meaningful) */
248			if (p[-1] == 0)
249			break; /* If its a one-character identifier */
250			while (p++ == j)
251			if (*j++ == 0)
252			goto found_keyword; /* I wish that C had a multi-level
253			* break... */
254			}
255			if (i < nspecials) { /* we have a keyword */
256			found_keyword:
257			ps.its_a_keyword = true;
258			ps.last_u_d = true;
259			switch (specials[i].rwcode) {
260			case 1: /* it is a switch */
261			return (swstmt);
262			case 2: /* a case or default */
263			return (casestmt);
264
265			case 3: /* a "struct" */
266			if (ps.p_l_follow)
267			break; /* inside parens: cast */
268			l_struct = true;
269
270			/*
271			* Next time around, we will want to know that we have had a
272			* 'struct'
273			*/
274			case 4: /* one of the declaration keywords */
275			if (ps.p_l_follow) {
276			ps.cast_mask \|= 1 << ps.p_l_follow;
277			break; /* inside parens: cast */
278			}
279			last_code = decl;
280			return (decl);
281
282			case 5: /* if, while, for */
283			return (sp_paren);
284
285			case 6: /* do, else */
286			return (sp_nparen);
287
288			case 7:
289			ps.sizeof_keyword = true;
290			default: /* all others are treated like any other
291			* identifier */
292			return (ident);
293			} /* end of switch */
294			} /* end of if (found_it) */
295			if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
296			char *tp = buf_ptr;
297			while (tp < buf_end)
298			if (tp++ == ')' && (tp == ';' \|\| *tp == ','))
299			goto not_proc;
300			strlcpy(ps.procname, token, sizeof ps.procname);
301			ps.in_parameter_declaration = 1;
302			rparen_count = 1;
303			not_proc:;
304			}
305			/*
306			* The following hack attempts to guess whether or not the current
307			* token is in fact a declaration keyword -- one that has been
308			* typedefd
309			*/
310			if (((buf_ptr == '' && buf_ptr[1] != '=') \|\|
311			isalpha((unsigned char)buf_ptr) \|\| buf_ptr == '_')
312			&& !ps.p_l_follow
313			&& !ps.block_init
314			&& (ps.last_token == rparen \|\| ps.last_token == semicolon \|\|
315			ps.last_token == decl \|\|
316			ps.last_token == lbrace \|\| ps.last_token == rbrace)) {
317			ps.its_a_keyword = true;
318			ps.last_u_d = true;
319			last_code = decl;
320			return decl;
321			}
322			if (last_code == decl) /* if this is a declared variable, then
323			* following sign is unary */
324			ps.last_u_d = true; /* will make "int a -1" work */
325			last_code = ident;
326			return (ident); /* the ident is not in the list */
327			} /* end of procesing for alpanum character */
328
329			/* Scan a non-alphanumeric token */
330
331			e_token++ = buf_ptr; /* if it is only a one-character token, it is
332			* moved here */
333			*e_token = '\0';
334			if (++buf_ptr >= buf_end)
335			fill_buffer();
336
337			switch (*token) {
338			case '\n':
339			unary_delim = ps.last_u_d;
340			ps.last_nl = true; /* remember that we just had a newline */
341			code = (had_eof ? 0 : newline);
342
343			/*
344			* if data has been exausted, the newline is a dummy, and we should
345			* return code to stop
346			*/
347			break;
348
349			case '\'': /* start of quoted character */
350			case '"': /* start of string */
351			qchar = *token;
352			if (troff) {
353			e_token[-1] = '`';
354			if (qchar == '"')
355			*e_token++ = '`';
356			e_token = chfont(&bodyf, &stringf, e_token);
357			}
358			do { /* copy the string */
359			while (1) { /* move one character or [/<char>]<char> */
360			if (*buf_ptr == '\n') {
361			printf("%d: Unterminated literal\n", line_no);
362			goto stop_lit;
363			}
364			CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
365			* since CHECK_SIZE guarantees that there
366			* are at least 5 entries left */
367			e_token = buf_ptr++;
368			if (buf_ptr >= buf_end)
369			fill_buffer();
370			if (e_token == BACKSLASH) { / if escape, copy extra char */
371			if (buf_ptr == '\n') / check for escaped newline */
372			++line_no;
373			if (troff) {
374			*++e_token = BACKSLASH;
375			if (*buf_ptr == BACKSLASH)
376			*++e_token = BACKSLASH;
377			}
378			++e_token = buf_ptr++;
379			++e_token; /* we must increment this again because we
380			* copied two chars */
381			if (buf_ptr >= buf_end)
382			fill_buffer();
383			}
384			else
385			break; /* we copied one character */
386			} /* end of while (1) */
387			} while (*e_token++ != qchar);
388			if (troff) {
389			e_token = chfont(&stringf, &bodyf, e_token - 1);
390			if (qchar == '"')
391			*e_token++ = '\'';
392			}
393			stop_lit:
394			code = ident;
395			break;
396
397			case ('('):
398			case ('['):
399			unary_delim = true;
400			code = lparen;
401			break;
402
403			case (')'):
404			case (']'):
405			code = rparen;
406			break;
407
408			case '#':
409			unary_delim = ps.last_u_d;
410			code = preesc;
411			break;
412
413			case '?':
414			unary_delim = true;
415			code = question;
416			break;
417
418			case (':'):
419			code = colon;
420			unary_delim = true;
421			break;
422
423			case (';'):
424			unary_delim = true;
425			code = semicolon;
426			break;
427
428			case ('{'):
429			unary_delim = true;
430
431			/*
432			* if (ps.in_or_st) ps.block_init = 1;
433			*/
434			/* ? code = ps.block_init ? lparen : lbrace; */
435			code = lbrace;
436			break;
437
438			case ('}'):
439			unary_delim = true;
440			/* ? code = ps.block_init ? rparen : rbrace; */
441			code = rbrace;
442			break;
443
444			case 014: /* a form feed */
445			unary_delim = ps.last_u_d;
446			ps.last_nl = true; /* remember this so we can set 'ps.col_1'
447			* right */
448			code = form_feed;
449			break;
450
451			case (','):
452			unary_delim = true;
453			code = comma;
454			break;
455
456			case '.':
457			unary_delim = false;
458			code = period;
459			break;
460
461			case '-':
462			case '+': /* check for -, +, --, ++ */
463			code = (ps.last_u_d ? unary_op : binary_op);
464			unary_delim = true;
465
466			if (*buf_ptr == token[0]) {
467			/* check for doubled character */
468			e_token++ = buf_ptr++;
469			/* buffer overflow will be checked at end of loop */
470			if (last_code == ident \|\| last_code == rparen) {
471			code = (ps.last_u_d ? unary_op : postop);
472			/* check for following ++ or -- */
473			unary_delim = false;
474			}
475			}
476			else if (*buf_ptr == '=')
477			/* check for operator += */
478			e_token++ = buf_ptr++;
479			else if (*buf_ptr == '>') {
480			/* check for operator -> */
481			e_token++ = buf_ptr++;
482			if (!pointer_as_binop) {
483			unary_delim = false;
484			code = unary_op;
485			ps.want_blank = false;
486			}
487			}
488			break; /* buffer overflow will be checked at end of
489			* switch */
490
491			case '=':
492			if (ps.in_or_st)
493			ps.block_init = 1;
494			#ifdef undef
495			if (chartype[buf_ptr] == opchar) { / we have two char assignment */
496			e_token[-1] = *buf_ptr++;
497			if ((e_token[-1] == '<' \|\| e_token[-1] == '>') && e_token[-1] == *buf_ptr)
498			e_token++ = buf_ptr++;
499			e_token++ = '='; / Flip =+ to += */
500			*e_token = 0;
501			}
502			#else
503			if (buf_ptr == '=') {/ == */
504			e_token++ = '='; / Flip =+ to += */
505			buf_ptr++;
506			*e_token = 0;
507			}
508			#endif
509			code = binary_op;
510			unary_delim = true;
511			break;
512			/* can drop thru!!! */
513
514			case '>':
515			case '<':
516			case '!': /* ops like <, <<, <=, !=, etc */
517			if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=') {
518			e_token++ = buf_ptr;
519			if (++buf_ptr >= buf_end)
520			fill_buffer();
521			}
522			if (*buf_ptr == '=')
523			e_token++ = buf_ptr++;
524			code = (ps.last_u_d ? unary_op : binary_op);
525			unary_delim = true;
526			break;
527
528			default:
529			if (token[0] == '/' && buf_ptr == '') {
530			/* it is start of comment */
531			e_token++ = '';
532
533			if (++buf_ptr >= buf_end)
534			fill_buffer();
535
536			code = comment;
537			unary_delim = ps.last_u_d;
538			break;
539			}
540			while ((e_token - 1) == buf_ptr \|\| *buf_ptr == '=') {
541			/*
542			* handle \|\|, &&, etc, and also things as in int *****i
543			*/
544			e_token++ = buf_ptr;
545			if (++buf_ptr >= buf_end)
546			fill_buffer();
547			}
548			code = (ps.last_u_d ? unary_op : binary_op);
549			unary_delim = true;
550
551
552			} /* end of switch */
553			if (code != newline) {
554			l_struct = false;
555			last_code = code;
556			}
557			if (buf_ptr >= buf_end) /* check for input buffer empty */
558			fill_buffer();
559			ps.last_u_d = unary_delim;
560			e_token = '\0'; / null terminate the token */
561			return (code);
562			}
563
564			/*
565			* Add the given keyword to the keyword table, using val as the keyword type
566			*/
567			void
568			addkey(char *key, int val)
569			{
570			struct templ *p;
571			int i;
572
573			for (i = 0; i < nspecials; i++) {
574			p = &specials[i];
575			if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
576			return;
577			}
578
579			if (specials == specialsinit) {
580			/*
581			* Whoa. Must reallocate special table.
582			*/
583			nspecials = sizeof (specialsinit) / sizeof (specialsinit[0]);
584			maxspecials = nspecials + (nspecials >> 2);
585			specials = calloc(maxspecials, sizeof specials[0]);
586			if (specials == NULL)
587			err(1, NULL);
588			memcpy(specials, specialsinit, sizeof specialsinit);
589			} else if (nspecials >= maxspecials) {
590			int newspecials = maxspecials + (maxspecials >> 2);
591			struct templ *specials2;
592
593			specials2 = reallocarray(specials, newspecials, sizeof(specials[0]));
594			if (specials2 == NULL)
595			err(1, NULL);
596			specials = specials2;
597			maxspecials = newspecials;
598			}
599
600			p = &specials[nspecials];
601			p->rwd = key;
602			p->rwcode = val;
603			nspecials++;
604			return;
605			}