GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/lex/parse.y Lines: 13 33 39.4 %
Date: 2017-11-07 Branches: 5 8 62.5 %

Line Branch Exec Source
1
/*	$OpenBSD: parse.y,v 1.10 2017/04/12 14:53:27 millert Exp $	*/
2
3
/* parse.y - parser for flex input */
4
5
%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6
%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7
%token OPT_TABLES
8
9
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11
12
%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13
%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14
15
%left CCL_OP_DIFF CCL_OP_UNION
16
17
/*
18
 *POSIX and AT&T lex place the
19
 * precedence of the repeat operator, {}, below that of concatenation.
20
 * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
21
 * Regular Expression (ERE) precedence that has the repeat operator
22
 * higher than concatenation.  This causes ab{3} to yield abbb.
23
 *
24
 * In order to support the POSIX and AT&T precedence and the flex
25
 * precedence we define two token sets for the begin and end tokens of
26
 * the repeat operator, '{' and '}'.  The lexical scanner chooses
27
 * which tokens to return based on whether posix_compat or lex_compat
28
 * are specified. Specifying either posix_compat or lex_compat will
29
 * cause flex to parse scanner files as per the AT&T and
30
 * POSIX-mandated behavior.
31
 */
32
33
%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34
35
36
%{
37
/*  Copyright (c) 1990 The Regents of the University of California. */
38
/*  All rights reserved. */
39
40
/*  This code is derived from software contributed to Berkeley by */
41
/*  Vern Paxson. */
42
43
/*  The United States Government has rights in this work pursuant */
44
/*  to contract no. DE-AC03-76SF00098 between the United States */
45
/*  Department of Energy and the University of California. */
46
47
/*  This file is part of flex. */
48
49
/*  Redistribution and use in source and binary forms, with or without */
50
/*  modification, are permitted provided that the following conditions */
51
/*  are met: */
52
53
/*  1. Redistributions of source code must retain the above copyright */
54
/*     notice, this list of conditions and the following disclaimer. */
55
/*  2. Redistributions in binary form must reproduce the above copyright */
56
/*     notice, this list of conditions and the following disclaimer in the */
57
/*     documentation and/or other materials provided with the distribution. */
58
59
/*  Neither the name of the University nor the names of its contributors */
60
/*  may be used to endorse or promote products derived from this software */
61
/*  without specific prior written permission. */
62
63
/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66
/*  PURPOSE. */
67
68
#include "flexdef.h"
69
#include "tables.h"
70
71
int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
72
int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
73
74
int *scon_stk;
75
int scon_stk_ptr;
76
77
static int madeany = false;  /* whether we've made the '.' character class */
78
static int ccldot, cclany;
79
int previous_continued_action;	/* whether the previous rule's action was '|' */
80
81
#define format_warn3(fmt, a1, a2) \
82
	do{ \
83
        char fw3_msg[MAXLINE];\
84
        snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
85
        warn( fw3_msg );\
86
	}while(0)
87
88
/* Expand a POSIX character class expression. */
89
#define CCL_EXPR(func) \
90
	do{ \
91
	int c; \
92
	for ( c = 0; c < csize; ++c ) \
93
		if ( isascii(c) && func(c) ) \
94
			ccladd( currccl, c ); \
95
	}while(0)
96
97
/* negated class */
98
#define CCL_NEG_EXPR(func) \
99
	do{ \
100
	int c; \
101
	for ( c = 0; c < csize; ++c ) \
102
		if ( !func(c) ) \
103
			ccladd( currccl, c ); \
104
	}while(0)
105
106
/* On some over-ambitious machines, such as DEC Alpha's, the default
107
 * token type is "long" instead of "int"; this leads to problems with
108
 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
109
 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
110
 * following should ensure that the default token type is "int".
111
 */
112
#define YYSTYPE int
113
114
%}
115
116
%%
117
goal		:  initlex sect1 sect1end sect2 initforrule
118
			{ /* add default rule */
119
			int def_rule;
120
121
			pat = cclinit();
122
			cclnegate( pat );
123
124
			def_rule = mkstate( -pat );
125
126
			/* Remember the number of the default rule so we
127
			 * don't generate "can't match" warnings for it.
128
			 */
129
			default_rule = num_rules;
130
131
			finish_rule( def_rule, false, 0, 0, 0);
132
133
			for ( i = 1; i <= lastsc; ++i )
134
				scset[i] = mkbranch( scset[i], def_rule );
135
136
			if ( spprdflt )
137
				add_action(
138
				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
139
			else
140
				add_action( "ECHO" );
141
142
			add_action( ";\n\tYY_BREAK\n" );
143
			}
144
		;
145
146
initlex		:
147
			{ /* initialize for processing rules */
148
149
			/* Create default DFA start condition. */
150
			scinstal( "INITIAL", false );
151
			}
152
		;
153
154
sect1		:  sect1 startconddecl namelist1
155
		|  sect1 options
156
		|
157
		|  error
158
			{ synerr( _("unknown error processing section 1") ); }
159
		;
160
161
sect1end	:  SECTEND
162
			{
163
			check_options();
164
			scon_stk = allocate_integer_array( lastsc + 1 );
165
			scon_stk_ptr = 0;
166
			}
167
		;
168
169
startconddecl	:  SCDECL
170
			{ xcluflg = false; }
171
172
		|  XSCDECL
173
			{ xcluflg = true; }
174
		;
175
176
namelist1	:  namelist1 NAME
177
			{ scinstal( nmstr, xcluflg ); }
178
179
		|  NAME
180
			{ scinstal( nmstr, xcluflg ); }
181
182
		|  error
183
			{ synerr( _("bad start condition list") ); }
184
		;
185
186
options		:  OPTION_OP optionlist
187
		;
188
189
optionlist	:  optionlist option
190
		|
191
		;
192
193
option		:  OPT_OUTFILE '=' NAME
194
			{
195
			outfilename = copy_string( nmstr );
196
			did_outfilename = 1;
197
			}
198
		|  OPT_EXTRA_TYPE '=' NAME
199
			{ extra_type = copy_string( nmstr ); }
200
		|  OPT_PREFIX '=' NAME
201
			{ prefix = copy_string( nmstr ); }
202
		|  OPT_YYCLASS '=' NAME
203
			{ yyclass = copy_string( nmstr ); }
204
		|  OPT_HEADER '=' NAME
205
			{ headerfilename = copy_string( nmstr ); }
206
	    |  OPT_TABLES '=' NAME
207
            { tablesext = true; tablesfilename = copy_string( nmstr ); }
208
		;
209
210
sect2		:  sect2 scon initforrule flexrule '\n'
211
			{ scon_stk_ptr = $2; }
212
		|  sect2 scon '{' sect2 '}'
213
			{ scon_stk_ptr = $2; }
214
		|
215
		;
216
217
initforrule	:
218
			{
219
			/* Initialize for a parse of one rule. */
220
			trlcontxt = variable_trail_rule = varlength = false;
221
			trailcnt = headcnt = rulelen = 0;
222
			current_state_type = STATE_NORMAL;
223
			previous_continued_action = continued_action;
224
			in_rule = true;
225
226
			new_rule();
227
			}
228
		;
229
230
flexrule	:  '^' rule
231
			{
232
			pat = $2;
233
			finish_rule( pat, variable_trail_rule,
234
				headcnt, trailcnt , previous_continued_action);
235
236
			if ( scon_stk_ptr > 0 )
237
				{
238
				for ( i = 1; i <= scon_stk_ptr; ++i )
239
					scbol[scon_stk[i]] =
240
						mkbranch( scbol[scon_stk[i]],
241
								pat );
242
				}
243
244
			else
245
				{
246
				/* Add to all non-exclusive start conditions,
247
				 * including the default (0) start condition.
248
				 */
249
250
				for ( i = 1; i <= lastsc; ++i )
251
					if ( ! scxclu[i] )
252
						scbol[i] = mkbranch( scbol[i],
253
									pat );
254
				}
255
256
			if ( ! bol_needed )
257
				{
258
				bol_needed = true;
259
260
				if ( performance_report > 1 )
261
					pinpoint_message(
262
			"'^' operator results in sub-optimal performance" );
263
				}
264
			}
265
266
		|  rule
267
			{
268
			pat = $1;
269
			finish_rule( pat, variable_trail_rule,
270
				headcnt, trailcnt , previous_continued_action);
271
272
			if ( scon_stk_ptr > 0 )
273
				{
274
				for ( i = 1; i <= scon_stk_ptr; ++i )
275
					scset[scon_stk[i]] =
276
						mkbranch( scset[scon_stk[i]],
277
								pat );
278
				}
279
280
			else
281
				{
282
				for ( i = 1; i <= lastsc; ++i )
283
					if ( ! scxclu[i] )
284
						scset[i] =
285
							mkbranch( scset[i],
286
								pat );
287
				}
288
			}
289
290
		|  EOF_OP
291
			{
292
			if ( scon_stk_ptr > 0 )
293
				build_eof_action();
294
295
			else
296
				{
297
				/* This EOF applies to all start conditions
298
				 * which don't already have EOF actions.
299
				 */
300
				for ( i = 1; i <= lastsc; ++i )
301
					if ( ! sceof[i] )
302
						scon_stk[++scon_stk_ptr] = i;
303
304
				if ( scon_stk_ptr == 0 )
305
					warn(
306
			"all start conditions already have <<EOF>> rules" );
307
308
				else
309
					build_eof_action();
310
				}
311
			}
312
313
		|  error
314
			{ synerr( _("unrecognized rule") ); }
315
		;
316
317
scon_stk_ptr	:
318
			{ $$ = scon_stk_ptr; }
319
		;
320
321
scon		:  '<' scon_stk_ptr namelist2 '>'
322
			{ $$ = $2; }
323
324
		|  '<' '*' '>'
325
			{
326
			$$ = scon_stk_ptr;
327
328
			for ( i = 1; i <= lastsc; ++i )
329
				{
330
				int j;
331
332
				for ( j = 1; j <= scon_stk_ptr; ++j )
333
					if ( scon_stk[j] == i )
334
						break;
335
336
				if ( j > scon_stk_ptr )
337
					scon_stk[++scon_stk_ptr] = i;
338
				}
339
			}
340
341
		|
342
			{ $$ = scon_stk_ptr; }
343
		;
344
345
namelist2	:  namelist2 ',' sconname
346
347
		|  sconname
348
349
		|  error
350
			{ synerr( _("bad start condition list") ); }
351
		;
352
353
sconname	:  NAME
354
			{
355
			if ( (scnum = sclookup( nmstr )) == 0 )
356
				format_pinpoint_message(
357
					"undeclared start condition %s",
358
					nmstr );
359
			else
360
				{
361
				for ( i = 1; i <= scon_stk_ptr; ++i )
362
					if ( scon_stk[i] == scnum )
363
						{
364
						format_warn(
365
							"<%s> specified twice",
366
							scname[scnum] );
367
						break;
368
						}
369
370
				if ( i > scon_stk_ptr )
371
					scon_stk[++scon_stk_ptr] = scnum;
372
				}
373
			}
374
		;
375
376
rule		:  re2 re
377
			{
378
			if ( transchar[lastst[$2]] != SYM_EPSILON )
379
				/* Provide final transition \now/ so it
380
				 * will be marked as a trailing context
381
				 * state.
382
				 */
383
				$2 = link_machines( $2,
384
						mkstate( SYM_EPSILON ) );
385
386
			mark_beginning_as_normal( $2 );
387
			current_state_type = STATE_NORMAL;
388
389
			if ( previous_continued_action )
390
				{
391
				/* We need to treat this as variable trailing
392
				 * context so that the backup does not happen
393
				 * in the action but before the action switch
394
				 * statement.  If the backup happens in the
395
				 * action, then the rules "falling into" this
396
				 * one's action will *also* do the backup,
397
				 * erroneously.
398
				 */
399
				if ( ! varlength || headcnt != 0 )
400
					warn(
401
		"trailing context made variable due to preceding '|' action" );
402
403
				/* Mark as variable. */
404
				varlength = true;
405
				headcnt = 0;
406
407
				}
408
409
			if ( lex_compat || (varlength && headcnt == 0) )
410
				{ /* variable trailing context rule */
411
				/* Mark the first part of the rule as the
412
				 * accepting "head" part of a trailing
413
				 * context rule.
414
				 *
415
				 * By the way, we didn't do this at the
416
				 * beginning of this production because back
417
				 * then current_state_type was set up for a
418
				 * trail rule, and add_accept() can create
419
				 * a new state ...
420
				 */
421
				add_accept( $1,
422
					num_rules | YY_TRAILING_HEAD_MASK );
423
				variable_trail_rule = true;
424
				}
425
426
			else
427
				trailcnt = rulelen;
428
429
			$$ = link_machines( $1, $2 );
430
			}
431
432
		|  re2 re '$'
433
			{ synerr( _("trailing context used twice") ); }
434
435
		|  re '$'
436
			{
437
			headcnt = 0;
438
			trailcnt = 1;
439
			rulelen = 1;
440
			varlength = false;
441
442
			current_state_type = STATE_TRAILING_CONTEXT;
443
444
			if ( trlcontxt )
445
				{
446
				synerr( _("trailing context used twice") );
447
				$$ = mkstate( SYM_EPSILON );
448
				}
449
450
			else if ( previous_continued_action )
451
				{
452
				/* See the comment in the rule for "re2 re"
453
				 * above.
454
				 */
455
				warn(
456
		"trailing context made variable due to preceding '|' action" );
457
458
				varlength = true;
459
				}
460
461
			if ( lex_compat || varlength )
462
				{
463
				/* Again, see the comment in the rule for
464
				 * "re2 re" above.
465
				 */
466
				add_accept( $1,
467
					num_rules | YY_TRAILING_HEAD_MASK );
468
				variable_trail_rule = true;
469
				}
470
471
			trlcontxt = true;
472
473
			eps = mkstate( SYM_EPSILON );
474
			$$ = link_machines( $1,
475
				link_machines( eps, mkstate( '\n' ) ) );
476
			}
477
478
		|  re
479
			{
480
			$$ = $1;
481
482
			if ( trlcontxt )
483
				{
484
				if ( lex_compat || (varlength && headcnt == 0) )
485
					/* Both head and trail are
486
					 * variable-length.
487
					 */
488
					variable_trail_rule = true;
489
				else
490
					trailcnt = rulelen;
491
				}
492
			}
493
		;
494
495
496
re		:  re '|' series
497
			{
498
			varlength = true;
499
			$$ = mkor( $1, $3 );
500
			}
501
502
		|  series
503
			{ $$ = $1; }
504
		;
505
506
507
re2		:  re '/'
508
			{
509
			/* This rule is written separately so the
510
			 * reduction will occur before the trailing
511
			 * series is parsed.
512
			 */
513
514
			if ( trlcontxt )
515
				synerr( _("trailing context used twice") );
516
			else
517
				trlcontxt = true;
518
519
			if ( varlength )
520
				/* We hope the trailing context is
521
				 * fixed-length.
522
				 */
523
				varlength = false;
524
			else
525
				headcnt = rulelen;
526
527
			rulelen = 0;
528
529
			current_state_type = STATE_TRAILING_CONTEXT;
530
			$$ = $1;
531
			}
532
		;
533
534
series		:  series singleton
535
			{
536
			/* This is where concatenation of adjacent patterns
537
			 * gets done.
538
			 */
539
			$$ = link_machines( $1, $2 );
540
			}
541
542
		|  singleton
543
			{ $$ = $1; }
544
545
		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
546
			{
547
			varlength = true;
548
549
			if ( $3 > $5 || $3 < 0 )
550
				{
551
				synerr( _("bad iteration values") );
552
				$$ = $1;
553
				}
554
			else
555
				{
556
				if ( $3 == 0 )
557
					{
558
					if ( $5 <= 0 )
559
						{
560
						synerr(
561
						_("bad iteration values") );
562
						$$ = $1;
563
						}
564
					else
565
						$$ = mkopt(
566
							mkrep( $1, 1, $5 ) );
567
					}
568
				else
569
					$$ = mkrep( $1, $3, $5 );
570
				}
571
			}
572
573
		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
574
			{
575
			varlength = true;
576
577
			if ( $3 <= 0 )
578
				{
579
				synerr( _("iteration value must be positive") );
580
				$$ = $1;
581
				}
582
583
			else
584
				$$ = mkrep( $1, $3, INFINITE_REPEAT );
585
			}
586
587
		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
588
			{
589
			/* The series could be something like "(foo)",
590
			 * in which case we have no idea what its length
591
			 * is, so we punt here.
592
			 */
593
			varlength = true;
594
595
			if ( $3 <= 0 )
596
				{
597
				  synerr( _("iteration value must be positive")
598
					  );
599
				$$ = $1;
600
				}
601
602
			else
603
				$$ = link_machines( $1,
604
						copysingl( $1, $3 - 1 ) );
605
			}
606
607
		;
608
609
singleton	:  singleton '*'
610
			{
611
			varlength = true;
612
613
			$$ = mkclos( $1 );
614
			}
615
616
		|  singleton '+'
617
			{
618
			varlength = true;
619
			$$ = mkposcl( $1 );
620
			}
621
622
		|  singleton '?'
623
			{
624
			varlength = true;
625
			$$ = mkopt( $1 );
626
			}
627
628
		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
629
			{
630
			varlength = true;
631
632
			if ( $3 > $5 || $3 < 0 )
633
				{
634
				synerr( _("bad iteration values") );
635
				$$ = $1;
636
				}
637
			else
638
				{
639
				if ( $3 == 0 )
640
					{
641
					if ( $5 <= 0 )
642
						{
643
						synerr(
644
						_("bad iteration values") );
645
						$$ = $1;
646
						}
647
					else
648
						$$ = mkopt(
649
							mkrep( $1, 1, $5 ) );
650
					}
651
				else
652
					$$ = mkrep( $1, $3, $5 );
653
				}
654
			}
655
656
		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
657
			{
658
			varlength = true;
659
660
			if ( $3 <= 0 )
661
				{
662
				synerr( _("iteration value must be positive") );
663
				$$ = $1;
664
				}
665
666
			else
667
				$$ = mkrep( $1, $3, INFINITE_REPEAT );
668
			}
669
670
		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
671
			{
672
			/* The singleton could be something like "(foo)",
673
			 * in which case we have no idea what its length
674
			 * is, so we punt here.
675
			 */
676
			varlength = true;
677
678
			if ( $3 <= 0 )
679
				{
680
				synerr( _("iteration value must be positive") );
681
				$$ = $1;
682
				}
683
684
			else
685
				$$ = link_machines( $1,
686
						copysingl( $1, $3 - 1 ) );
687
			}
688
689
		|  '.'
690
			{
691
			if ( ! madeany )
692
				{
693
				/* Create the '.' character class. */
694
                    ccldot = cclinit();
695
                    ccladd( ccldot, '\n' );
696
                    cclnegate( ccldot );
697
698
                    if ( useecs )
699
                        mkeccl( ccltbl + cclmap[ccldot],
700
                            ccllen[ccldot], nextecm,
701
                            ecgroup, csize, csize );
702
703
				/* Create the (?s:'.') character class. */
704
                    cclany = cclinit();
705
                    cclnegate( cclany );
706
707
                    if ( useecs )
708
                        mkeccl( ccltbl + cclmap[cclany],
709
                            ccllen[cclany], nextecm,
710
                            ecgroup, csize, csize );
711
712
				madeany = true;
713
				}
714
715
			++rulelen;
716
717
            if (sf_dot_all())
718
                $$ = mkstate( -cclany );
719
            else
720
                $$ = mkstate( -ccldot );
721
			}
722
723
		|  fullccl
724
			{
725
				/* Sort characters for fast searching.
726
				 */
727
				qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
728
729
			if ( useecs )
730
				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
731
					nextecm, ecgroup, csize, csize );
732
733
			++rulelen;
734
735
			if (ccl_has_nl[$1])
736
				rule_has_nl[num_rules] = true;
737
738
			$$ = mkstate( -$1 );
739
			}
740
741
		|  PREVCCL
742
			{
743
			++rulelen;
744
745
			if (ccl_has_nl[$1])
746
				rule_has_nl[num_rules] = true;
747
748
			$$ = mkstate( -$1 );
749
			}
750
751
		|  '"' string '"'
752
			{ $$ = $2; }
753
754
		|  '(' re ')'
755
			{ $$ = $2; }
756
757
		|  CHAR
758
			{
759
			++rulelen;
760
761
			if ($1 == nlch)
762
				rule_has_nl[num_rules] = true;
763
764
            if (sf_case_ins() && has_case($1))
765
                /* create an alternation, as in (a|A) */
766
                $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
767
            else
768
                $$ = mkstate( $1 );
769
			}
770
		;
771
fullccl:
772
        fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
773
    |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
774
    |   braceccl
775
    ;
776
777
braceccl:
778
779
            '[' ccl ']' { $$ = $2; }
780
781
		|  '[' '^' ccl ']'
782
			{
783
			cclnegate( $3 );
784
			$$ = $3;
785
			}
786
		;
787
788
ccl		:  ccl CHAR '-' CHAR
789
			{
790
791
			if (sf_case_ins())
792
			  {
793
794
			    /* If one end of the range has case and the other
795
			     * does not, or the cases are different, then we're not
796
			     * sure what range the user is trying to express.
797
			     * Examples: [@-z] or [S-t]
798
			     */
799
			    if (has_case ($2) != has_case ($4)
800
				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
801
				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
802
			      format_warn3 (
803
			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
804
					    $2, $4);
805
806
			    /* If the range spans uppercase characters but not
807
			     * lowercase (or vice-versa), then should we automatically
808
			     * include lowercase characters in the range?
809
			     * Example: [@-_] spans [a-z] but not [A-Z]
810
			     */
811
			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
812
			      format_warn3 (
813
			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
814
					    $2, $4);
815
			  }
816
817
			if ( $2 > $4 )
818
				synerr( _("negative range in character class") );
819
820
			else
821
				{
822
				for ( i = $2; i <= $4; ++i )
823
					ccladd( $1, i );
824
825
				/* Keep track if this ccl is staying in
826
				 * alphabetical order.
827
				 */
828
				cclsorted = cclsorted && ($2 > lastchar);
829
				lastchar = $4;
830
831
                /* Do it again for upper/lowercase */
832
                if (sf_case_ins() && has_case($2) && has_case($4)){
833
                    $2 = reverse_case ($2);
834
                    $4 = reverse_case ($4);
835
836
                    for ( i = $2; i <= $4; ++i )
837
                        ccladd( $1, i );
838
839
                    cclsorted = cclsorted && ($2 > lastchar);
840
                    lastchar = $4;
841
                }
842
843
				}
844
845
			$$ = $1;
846
			}
847
848
		|  ccl CHAR
849
			{
850
			ccladd( $1, $2 );
851
			cclsorted = cclsorted && ($2 > lastchar);
852
			lastchar = $2;
853
854
            /* Do it again for upper/lowercase */
855
            if (sf_case_ins() && has_case($2)){
856
                $2 = reverse_case ($2);
857
                ccladd ($1, $2);
858
859
                cclsorted = cclsorted && ($2 > lastchar);
860
                lastchar = $2;
861
            }
862
863
			$$ = $1;
864
			}
865
866
		|  ccl ccl_expr
867
			{
868
			/* Too hard to properly maintain cclsorted. */
869
			cclsorted = false;
870
			$$ = $1;
871
			}
872
873
		|
874
			{
875
			cclsorted = true;
876
			lastchar = 0;
877
			currccl = $$ = cclinit();
878
			}
879
		;
880
881
ccl_expr:
882
           CCE_ALNUM	{ CCL_EXPR(isalnum); }
883
		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
884
		|  CCE_BLANK	{ CCL_EXPR(isblank); }
885
		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
886
		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
887
		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
888
		|  CCE_LOWER	{
889
                          CCL_EXPR(islower);
890
                          if (sf_case_ins())
891
                              CCL_EXPR(isupper);
892
                        }
893
		|  CCE_PRINT	{ CCL_EXPR(isprint); }
894
		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
895
		|  CCE_SPACE	{ CCL_EXPR(isspace); }
896
		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
897
		|  CCE_UPPER	{
898
                    CCL_EXPR(isupper);
899
                    if (sf_case_ins())
900
                        CCL_EXPR(islower);
901
				}
902
903
        |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
904
		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
905
		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(isblank); }
906
		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
907
		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
908
		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
909
		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
910
		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
911
		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
912
		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
913
		|  CCE_NEG_LOWER	{
914
				if ( sf_case_ins() )
915
					warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
916
				else
917
					CCL_NEG_EXPR(islower);
918
				}
919
		|  CCE_NEG_UPPER	{
920
				if ( sf_case_ins() )
921
					warn(_("[:^upper:] ambiguous in case insensitive scanner"));
922
				else
923
					CCL_NEG_EXPR(isupper);
924
				}
925
		;
926
927
string		:  string CHAR
928
			{
929
			if ( $2 == nlch )
930
				rule_has_nl[num_rules] = true;
931
932
			++rulelen;
933
934
            if (sf_case_ins() && has_case($2))
935
                $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
936
            else
937
                $$ = mkstate ($2);
938
939
			$$ = link_machines( $1, $$);
940
			}
941
942
		|
943
			{ $$ = mkstate( SYM_EPSILON ); }
944
		;
945
946
%%
947
948
949
/* build_eof_action - build the "<<EOF>>" action for the active start
950
 *                    conditions
951
 */
952
953
void build_eof_action()
954
	{
955
	int i;
956
28
	char action_text[MAXLINE];
957
958
60
	for ( i = 1; i <= scon_stk_ptr; ++i )
959
		{
960
16
		if ( sceof[scon_stk[i]] )
961
			format_pinpoint_message(
962
				"multiple <<EOF>> rules for start condition %s",
963
				scname[scon_stk[i]] );
964
965
		else
966
			{
967
16
			sceof[scon_stk[i]] = true;
968
969
16
			if (previous_continued_action /* && previous action was regular */)
970
1
				add_action("YY_RULE_SETUP\n");
971
972
32
			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
973
16
				scname[scon_stk[i]] );
974
16
			add_action( action_text );
975
			}
976
		}
977
978
14
	line_directive_out( (FILE *) 0, 1 );
979
980
	/* This isn't a normal rule after all - don't count it as
981
	 * such, so we don't have any holes in the rule numbering
982
	 * (which make generating "rule can never match" warnings
983
	 * more difficult.
984
	 */
985
14
	--num_rules;
986
14
	++num_eof_rules;
987
14
	}
988
989
990
/* format_synerr - write out formatted syntax error */
991
992
void format_synerr( msg, arg )
993
const char *msg, arg[];
994
	{
995
	char errmsg[MAXLINE];
996
997
	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
998
	synerr( errmsg );
999
	}
1000
1001
1002
/* synerr - report a syntax error */
1003
1004
void synerr( str )
1005
const char *str;
1006
	{
1007
	syntaxerror = true;
1008
	pinpoint_message( str );
1009
	}
1010
1011
1012
/* format_warn - write out formatted warning */
1013
1014
void format_warn( msg, arg )
1015
const char *msg, arg[];
1016
	{
1017
	char warn_msg[MAXLINE];
1018
1019
	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1020
	warn( warn_msg );
1021
	}
1022
1023
1024
/* warn - report a warning, unless -w was given */
1025
1026
void warn( str )
1027
const char *str;
1028
	{
1029
	line_warning( str, linenum );
1030
	}
1031
1032
/* format_pinpoint_message - write out a message formatted with one string,
1033
 *			     pinpointing its location
1034
 */
1035
1036
void format_pinpoint_message( msg, arg )
1037
const char *msg, arg[];
1038
	{
1039
	char errmsg[MAXLINE];
1040
1041
	snprintf( errmsg, sizeof(errmsg), msg, arg );
1042
	pinpoint_message( errmsg );
1043
	}
1044
1045
1046
/* pinpoint_message - write out a message, pinpointing its location */
1047
1048
void pinpoint_message( str )
1049
const char *str;
1050
	{
1051
	line_pinpoint( str, linenum );
1052
	}
1053
1054
1055
/* line_warning - report a warning at a given line, unless -w was given */
1056
1057
void line_warning( str, line )
1058
const char *str;
1059
int line;
1060
	{
1061
	char warning[MAXLINE];
1062
1063
	if ( ! nowarn )
1064
		{
1065
		snprintf( warning, sizeof(warning), "warning, %s", str );
1066
		line_pinpoint( warning, line );
1067
		}
1068
	}
1069
1070
1071
/* line_pinpoint - write out a message, pinpointing it at the given line */
1072
1073
void line_pinpoint( str, line )
1074
const char *str;
1075
int line;
1076
	{
1077
	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1078
	}
1079
1080
1081
/* yyerror - eat up an error message from the parser;
1082
 *	     currently, messages are ignore
1083
 */
1084
1085
void yyerror( msg )
1086
const char *msg;
1087
	{
1088
	}