GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/lex/dfa.c Lines: 263 387 68.0 %
Date: 2017-11-07 Branches: 184 288 63.9 %

Line Branch Exec Source
1
/*	$OpenBSD: dfa.c,v 1.8 2015/11/19 23:20:34 tedu Exp $	*/
2
3
/* dfa - DFA construction routines */
4
5
/*  Copyright (c) 1990 The Regents of the University of California. */
6
/*  All rights reserved. */
7
8
/*  This code is derived from software contributed to Berkeley by */
9
/*  Vern Paxson. */
10
11
/*  The United States Government has rights in this work pursuant */
12
/*  to contract no. DE-AC03-76SF00098 between the United States */
13
/*  Department of Energy and the University of California. */
14
15
/*  Redistribution and use in source and binary forms, with or without */
16
/*  modification, are permitted provided that the following conditions */
17
/*  are met: */
18
19
/*  1. Redistributions of source code must retain the above copyright */
20
/*     notice, this list of conditions and the following disclaimer. */
21
/*  2. Redistributions in binary form must reproduce the above copyright */
22
/*     notice, this list of conditions and the following disclaimer in the */
23
/*     documentation and/or other materials provided with the distribution. */
24
25
/*  Neither the name of the University nor the names of its contributors */
26
/*  may be used to endorse or promote products derived from this software */
27
/*  without specific prior written permission. */
28
29
/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32
/*  PURPOSE. */
33
34
#include "flexdef.h"
35
#include "tables.h"
36
37
/* declare functions that have forward references */
38
39
void dump_associated_rules PROTO ((FILE *, int));
40
void dump_transitions PROTO ((FILE *, int[]));
41
void sympartition PROTO ((int[], int, int[], int[]));
42
int symfollowset PROTO ((int[], int, int, int[]));
43
44
45
/* check_for_backing_up - check a DFA state for backing up
46
 *
47
 * synopsis
48
 *     void check_for_backing_up( int ds, int state[numecs] );
49
 *
50
 * ds is the number of the state to check and state[] is its out-transitions,
51
 * indexed by equivalence class.
52
 */
53
54
void check_for_backing_up (ds, state)
55
     int ds;
56
     int state[];
57
{
58


14896
	if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) {	/* state is non-accepting */
59
831
		++num_backing_up;
60
61
831
		if (backing_up_report) {
62
			fprintf (backing_up_file,
63
				 _("State #%d is non-accepting -\n"), ds);
64
65
			/* identify the state */
66
			dump_associated_rules (backing_up_file, ds);
67
68
			/* Now identify it further using the out- and
69
			 * jam-transitions.
70
			 */
71
			dump_transitions (backing_up_file, state);
72
73
			putc ('\n', backing_up_file);
74
		}
75
	}
76
3727
}
77
78
79
/* check_trailing_context - check to see if NFA state set constitutes
80
 *                          "dangerous" trailing context
81
 *
82
 * synopsis
83
 *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
84
 *				int accset[nacc+1], int nacc );
85
 *
86
 * NOTES
87
 *  Trailing context is "dangerous" if both the head and the trailing
88
 *  part are of variable size \and/ there's a DFA state which contains
89
 *  both an accepting state for the head part of the rule and NFA states
90
 *  which occur after the beginning of the trailing context.
91
 *
92
 *  When such a rule is matched, it's impossible to tell if having been
93
 *  in the DFA state indicates the beginning of the trailing context or
94
 *  further-along scanning of the pattern.  In these cases, a warning
95
 *  message is issued.
96
 *
97
 *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98
 *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99
 */
100
101
void check_trailing_context (nfa_states, num_states, accset, nacc)
102
     int    *nfa_states, num_states;
103
     int    *accset;
104
     int nacc;
105
{
106
	int i, j;
107
108
	for (i = 1; i <= num_states; ++i) {
109
		int     ns = nfa_states[i];
110
		int type = state_type[ns];
111
		int ar = assoc_rule[ns];
112
113
		if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) {	/* do nothing */
114
		}
115
116
		else if (type == STATE_TRAILING_CONTEXT) {
117
			/* Potential trouble.  Scan set of accepting numbers
118
			 * for the one marking the end of the "head".  We
119
			 * assume that this looping will be fairly cheap
120
			 * since it's rare that an accepting number set
121
			 * is large.
122
			 */
123
			for (j = 1; j <= nacc; ++j)
124
				if (accset[j] & YY_TRAILING_HEAD_MASK) {
125
					line_warning (_
126
						      ("dangerous trailing context"),
127
						      rule_linenum[ar]);
128
					return;
129
				}
130
		}
131
	}
132
}
133
134
135
/* dump_associated_rules - list the rules associated with a DFA state
136
 *
137
 * Goes through the set of NFA states associated with the DFA and
138
 * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
139
 * and writes a report to the given file.
140
 */
141
142
void dump_associated_rules (file, ds)
143
     FILE   *file;
144
     int ds;
145
{
146
	int i, j;
147
	int num_associated_rules = 0;
148
	int     rule_set[MAX_ASSOC_RULES + 1];
149
	int    *dset = dss[ds];
150
	int     size = dfasiz[ds];
151
152
	for (i = 1; i <= size; ++i) {
153
		int rule_num = rule_linenum[assoc_rule[dset[i]]];
154
155
		for (j = 1; j <= num_associated_rules; ++j)
156
			if (rule_num == rule_set[j])
157
				break;
158
159
		if (j > num_associated_rules) {	/* new rule */
160
			if (num_associated_rules < MAX_ASSOC_RULES)
161
				rule_set[++num_associated_rules] =
162
					rule_num;
163
		}
164
	}
165
166
	qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp);
167
168
	fprintf (file, _(" associated rule line numbers:"));
169
170
	for (i = 1; i <= num_associated_rules; ++i) {
171
		if (i % 8 == 1)
172
			putc ('\n', file);
173
174
		fprintf (file, "\t%d", rule_set[i]);
175
	}
176
177
	putc ('\n', file);
178
}
179
180
181
/* dump_transitions - list the transitions associated with a DFA state
182
 *
183
 * synopsis
184
 *     dump_transitions( FILE *file, int state[numecs] );
185
 *
186
 * Goes through the set of out-transitions and lists them in human-readable
187
 * form (i.e., not as equivalence classes); also lists jam transitions
188
 * (i.e., all those which are not out-transitions, plus EOF).  The dump
189
 * is done to the given file.
190
 */
191
192
void dump_transitions (file, state)
193
     FILE   *file;
194
     int state[];
195
{
196
	int i, ec;
197
	int     out_char_set[CSIZE];
198
199
	for (i = 0; i < csize; ++i) {
200
		ec = ABS (ecgroup[i]);
201
		out_char_set[i] = state[ec];
202
	}
203
204
	fprintf (file, _(" out-transitions: "));
205
206
	list_character_set (file, out_char_set);
207
208
	/* now invert the members of the set to get the jam transitions */
209
	for (i = 0; i < csize; ++i)
210
		out_char_set[i] = !out_char_set[i];
211
212
	fprintf (file, _("\n jam-transitions: EOF "));
213
214
	list_character_set (file, out_char_set);
215
216
	putc ('\n', file);
217
}
218
219
220
/* epsclosure - construct the epsilon closure of a set of ndfa states
221
 *
222
 * synopsis
223
 *    int *epsclosure( int t[num_states], int *numstates_addr,
224
 *			int accset[num_rules+1], int *nacc_addr,
225
 *			int *hashval_addr );
226
 *
227
 * NOTES
228
 *  The epsilon closure is the set of all states reachable by an arbitrary
229
 *  number of epsilon transitions, which themselves do not have epsilon
230
 *  transitions going out, unioned with the set of states which have non-null
231
 *  accepting numbers.  t is an array of size numstates of nfa state numbers.
232
 *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
233
 *  accset holds a list of the accepting numbers, and the size of accset is
234
 *  given by *nacc_addr.  t may be subjected to reallocation if it is not
235
 *  large enough to hold the epsilon closure.
236
 *
237
 *  hashval is the hash value for the dfa corresponding to the state set.
238
 */
239
240
int    *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr)
241
     int    *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
242
{
243
	int stkpos, ns, tsp;
244
21494
	int     numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
245
	int     stkend, nstate;
246
	static int did_stk_init = false, *stk;
247
248
#define MARK_STATE(state) \
249
do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
250
251
#define IS_MARKED(state) (trans1[state] < 0)
252
253
#define UNMARK_STATE(state) \
254
do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
255
256
#define CHECK_ACCEPT(state) \
257
do{ \
258
nfaccnum = accptnum[state]; \
259
if ( nfaccnum != NIL ) \
260
accset[++nacc] = nfaccnum; \
261
}while(0)
262
263
#define DO_REALLOCATION() \
264
do { \
265
current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
266
++num_reallocs; \
267
t = reallocate_integer_array( t, current_max_dfa_size ); \
268
stk = reallocate_integer_array( stk, current_max_dfa_size ); \
269
}while(0) \
270
271
#define PUT_ON_STACK(state) \
272
do { \
273
if ( ++stkend >= current_max_dfa_size ) \
274
DO_REALLOCATION(); \
275
stk[stkend] = state; \
276
MARK_STATE(state); \
277
}while(0)
278
279
#define ADD_STATE(state) \
280
do { \
281
if ( ++numstates >= current_max_dfa_size ) \
282
DO_REALLOCATION(); \
283
t[numstates] = state; \
284
hashval += state; \
285
}while(0)
286
287
#define STACK_STATE(state) \
288
do { \
289
PUT_ON_STACK(state); \
290
CHECK_ACCEPT(state); \
291
if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
292
ADD_STATE(state); \
293
}while(0)
294
295
296
10747
	if (!did_stk_init) {
297
10
		stk = allocate_integer_array (current_max_dfa_size);
298
10
		did_stk_init = true;
299
10
	}
300
301
	nacc = stkend = hashval = 0;
302
303
107092
	for (nstate = 1; nstate <= numstates; ++nstate) {
304
42799
		ns = t[nstate];
305
306
		/* The state could be marked if we've already pushed it onto
307
		 * the stack.
308
		 */
309
42799
		if (!IS_MARKED (ns)) {
310
85574
			PUT_ON_STACK (ns);
311
46965
			CHECK_ACCEPT (ns);
312
42787
			hashval += ns;
313
42787
		}
314
	}
315
316
289028
	for (stkpos = 1; stkpos <= stkend; ++stkpos) {
317
133767
		ns = stk[stkpos];
318
133767
		transsym = transchar[ns];
319
320
133767
		if (transsym == SYM_EPSILON) {
321
74955
			tsp = trans1[ns] + MARKER_DIFFERENCE;
322
323
74955
			if (tsp != NO_TRANSITION) {
324
66750
				if (!IS_MARKED (tsp))
325


358978
					STACK_STATE (tsp);
326
327
66750
				tsp = trans2[ns];
328
329
100354
				if (tsp != NO_TRANSITION
330
100354
				    && !IS_MARKED (tsp))
331


115572
					STACK_STATE (tsp);
332
			}
333
		}
334
	}
335
336
	/* Clear out "visit" markers. */
337
338
289028
	for (stkpos = 1; stkpos <= stkend; ++stkpos) {
339
133767
		if (IS_MARKED (stk[stkpos]))
340
133767
			UNMARK_STATE (stk[stkpos]);
341
		else
342
			flexfatal (_
343
				   ("consistency check failed in epsclosure()"));
344
	}
345
346
10747
	*ns_addr = numstates;
347
10747
	*hv_addr = hashval;
348
10747
	*nacc_addr = nacc;
349
350
10747
	return t;
351
}
352
353
354
/* increase_max_dfas - increase the maximum number of DFAs */
355
356
void increase_max_dfas ()
357
{
358
4
	current_max_dfas += MAX_DFAS_INCREMENT;
359
360
2
	++num_reallocs;
361
362
2
	base = reallocate_integer_array (base, current_max_dfas);
363
2
	def = reallocate_integer_array (def, current_max_dfas);
364
2
	dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
365
2
	accsiz = reallocate_integer_array (accsiz, current_max_dfas);
366
2
	dhash = reallocate_integer_array (dhash, current_max_dfas);
367
2
	dss = reallocate_int_ptr_array (dss, current_max_dfas);
368
2
	dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
369
370
2
	if (nultrans)
371
		nultrans =
372
			reallocate_integer_array (nultrans,
373
						  current_max_dfas);
374
2
}
375
376
377
/* ntod - convert an ndfa to a dfa
378
 *
379
 * Creates the dfa corresponding to the ndfa we've constructed.  The
380
 * dfa starts out in state #1.
381
 */
382
383
void ntod ()
384
{
385
20
	int    *accset, ds, nacc, newds;
386
10
	int     sym, hashval, numstates, dsize;
387
	int     num_full_table_rows=0;	/* used only for -f */
388
	int    *nset, *dset;
389
	int     targptr, totaltrans, i, comstate, comfreq, targ;
390
10
	int     symlist[CSIZE + 1];
391
	int     num_start_states;
392
	int     todo_head, todo_next;
393
394
	struct yytbl_data *yynxt_tbl = 0;
395
	flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
396
397
	/* Note that the following are indexed by *equivalence classes*
398
	 * and not by characters.  Since equivalence classes are indexed
399
	 * beginning with 1, even if the scanner accepts NUL's, this
400
	 * means that (since every character is potentially in its own
401
	 * equivalence class) these arrays must have room for indices
402
	 * from 1 to CSIZE, so their size must be CSIZE + 1.
403
	 */
404
10
	int     duplist[CSIZE + 1], state[CSIZE + 1];
405
10
	int     targfreq[CSIZE + 1], targstate[CSIZE + 1];
406
407
	/* accset needs to be large enough to hold all of the rules present
408
	 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
409
	 */
410
10
	accset = allocate_integer_array ((num_rules + 1) * 2);
411
10
	nset = allocate_integer_array (current_max_dfa_size);
412
413
	/* The "todo" queue is represented by the head, which is the DFA
414
	 * state currently being processed, and the "next", which is the
415
	 * next DFA state number available (not in use).  We depend on the
416
	 * fact that snstods() returns DFA's \in increasing order/, and thus
417
	 * need only know the bounds of the dfas to be processed.
418
	 */
419
	todo_head = todo_next = 0;
420
421
5160
	for (i = 0; i <= csize; ++i) {
422
2570
		duplist[i] = NIL;
423
2570
		symlist[i] = false;
424
	}
425
426
1160
	for (i = 0; i <= num_rules; ++i)
427
570
		accset[i] = NIL;
428
429
10
	if (trace) {
430
		dumpnfa (scset[1]);
431
		fputs (_("\n\nDFA Dump:\n\n"), stderr);
432
	}
433
434
10
	inittbl ();
435
436
	/* Check to see whether we should build a separate table for
437
	 * transitions on NUL characters.  We don't do this for full-speed
438
	 * (-F) scanners, since for them we don't have a simple state
439
	 * number lying around with which to index the table.  We also
440
	 * don't bother doing it for scanners unless (1) NUL is in its own
441
	 * equivalence class (indicated by a positive value of
442
	 * ecgroup[NUL]), (2) NUL's equivalence class is the last
443
	 * equivalence class, and (3) the number of equivalence classes is
444
	 * the same as the number of characters.  This latter case comes
445
	 * about when useecs is false or when it's true but every character
446
	 * still manages to land in its own class (unlikely, but it's
447
	 * cheap to check for).  If all these things are true then the
448
	 * character code needed to represent NUL's equivalence class for
449
	 * indexing the tables is going to take one more bit than the
450
	 * number of characters, and therefore we won't be assured of
451
	 * being able to fit it into a YY_CHAR variable.  This rules out
452
	 * storing the transitions in a compressed table, since the code
453
	 * for interpreting them uses a YY_CHAR variable (perhaps it
454
	 * should just use an integer, though; this is worth pondering ...
455
	 * ###).
456
	 *
457
	 * Finally, for full tables, we want the number of entries in the
458
	 * table to be a power of two so the array references go fast (it
459
	 * will just take a shift to compute the major index).  If
460
	 * encoding NUL's transitions in the table will spoil this, we
461
	 * give it its own table (note that this will be the case if we're
462
	 * not using equivalence classes).
463
	 */
464
465
	/* Note that the test for ecgroup[0] == numecs below accomplishes
466
	 * both (1) and (2) above
467
	 */
468

20
	if (!fullspd && ecgroup[0] == numecs) {
469
		/* NUL is alone in its equivalence class, which is the
470
		 * last one.
471
		 */
472
2
		int     use_NUL_table = (numecs == csize);
473
474
2
		if (fulltbl && !use_NUL_table) {
475
			/* We still may want to use the table if numecs
476
			 * is a power of 2.
477
			 */
478
			int     power_of_two;
479
480
			for (power_of_two = 1; power_of_two <= csize;
481
			     power_of_two *= 2)
482
				if (numecs == power_of_two) {
483
					use_NUL_table = true;
484
					break;
485
				}
486
		}
487
488
2
		if (use_NUL_table)
489
2
			nultrans =
490
2
				allocate_integer_array (current_max_dfas);
491
492
		/* From now on, nultrans != nil indicates that we're
493
		 * saving null transitions for later, separate encoding.
494
		 */
495
2
	}
496
497
498
10
	if (fullspd) {
499
		for (i = 0; i <= numecs; ++i)
500
			state[i] = 0;
501
502
		place_state (state, 0, 0);
503
		dfaacc[0].dfaacc_state = 0;
504
	}
505
506
10
	else if (fulltbl) {
507
		if (nultrans)
508
			/* We won't be including NUL's transitions in the
509
			 * table, so build it for entries from 0 .. numecs - 1.
510
			 */
511
			num_full_table_rows = numecs;
512
513
		else
514
			/* Take into account the fact that we'll be including
515
			 * the NUL entries in the transition table.  Build it
516
			 * from 0 .. numecs.
517
			 */
518
			num_full_table_rows = numecs + 1;
519
520
		/* Begin generating yy_nxt[][]
521
		 * This spans the entire LONG function.
522
		 * This table is tricky because we don't know how big it will be.
523
		 * So we'll have to realloc() on the way...
524
		 * we'll wait until we can calculate yynxt_tbl->td_hilen.
525
		 */
526
		yynxt_tbl =
527
			(struct yytbl_data *) calloc (1,
528
						      sizeof (struct
529
							      yytbl_data));
530
		yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
531
		yynxt_tbl->td_hilen = 1;
532
		yynxt_tbl->td_lolen = num_full_table_rows;
533
		yynxt_tbl->td_data = yynxt_data =
534
			(flex_int32_t *) calloc (yynxt_tbl->td_lolen *
535
					    yynxt_tbl->td_hilen,
536
					    sizeof (flex_int32_t));
537
		yynxt_curr = 0;
538
539
		buf_prints (&yydmap_buf,
540
			    "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
541
			    long_align ? "flex_int32_t" : "flex_int16_t");
542
543
		/* Unless -Ca, declare it "short" because it's a real
544
		 * long-shot that that won't be large enough.
545
		 */
546
		if (gentables)
547
			out_str_dec
548
				("static yyconst %s yy_nxt[][%d] =\n    {\n",
549
				 long_align ? "flex_int32_t" : "flex_int16_t",
550
				 num_full_table_rows);
551
		else {
552
			out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
553
			out_str ("static yyconst %s *yy_nxt =0;\n",
554
				 long_align ? "flex_int32_t" : "flex_int16_t");
555
		}
556
557
558
		if (gentables)
559
			outn ("    {");
560
561
		/* Generate 0 entries for state #0. */
562
		for (i = 0; i < num_full_table_rows; ++i) {
563
			mk2data (0);
564
			yynxt_data[yynxt_curr++] = 0;
565
		}
566
567
		dataflush ();
568
		if (gentables)
569
			outn ("    },\n");
570
	}
571
572
	/* Create the first states. */
573
574
10
	num_start_states = lastsc * 2;
575
576
148
	for (i = 1; i <= num_start_states; ++i) {
577
64
		numstates = 1;
578
579
		/* For each start condition, make one state for the case when
580
		 * we're at the beginning of the line (the '^' operator) and
581
		 * one for the case when we're not.
582
		 */
583
64
		if (i % 2 == 1)
584
32
			nset[numstates] = scset[(i / 2) + 1];
585
		else
586
32
			nset[numstates] =
587
32
				mkbranch (scbol[i / 2], scset[i / 2]);
588
589
64
		nset = epsclosure (nset, &numstates, accset, &nacc,
590
				   &hashval);
591
592
64
		if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
593
64
			numas += nacc;
594
64
			totnst += numstates;
595
64
			++todo_next;
596
597
64
			if (variable_trailing_context_rules && nacc > 0)
598
				check_trailing_context (nset, numstates,
599
							accset, nacc);
600
		}
601
	}
602
603
10
	if (!fullspd) {
604
10
		if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
605
			flexfatal (_
606
				   ("could not create unique end-of-buffer state"));
607
608
10
		++numas;
609
10
		++num_start_states;
610
10
		++todo_next;
611
10
	}
612
613
614
3811
	while (todo_head < todo_next) {
615
		targptr = 0;
616
		totaltrans = 0;
617
618
469654
		for (i = 1; i <= numecs; ++i)
619
231026
			state[i] = 0;
620
621
3801
		ds = ++todo_head;
622
623
3801
		dset = dss[ds];
624
3801
		dsize = dfasiz[ds];
625
626
3801
		if (trace)
627
			fprintf (stderr, _("state # %d:\n"), ds);
628
629
3801
		sympartition (dset, dsize, symlist, duplist);
630
631
469654
		for (sym = 1; sym <= numecs; ++sym) {
632
231026
			if (symlist[sym]) {
633
110795
				symlist[sym] = 0;
634
635
110795
				if (duplist[sym] == NIL) {
636
					/* Symbol has unique out-transitions. */
637
10683
					numstates =
638
10683
						symfollowset (dset, dsize,
639
							      sym, nset);
640
10683
					nset = epsclosure (nset,
641
							   &numstates,
642
							   accset, &nacc,
643
							   &hashval);
644
645
10683
					if (snstods
646
10683
					    (nset, numstates, accset, nacc,
647
10683
					     hashval, &newds)) {
648
7454
						totnst = totnst +
649
3727
							numstates;
650
3727
						++todo_next;
651
3727
						numas += nacc;
652
653
3727
						if (variable_trailing_context_rules && nacc > 0)
654
							check_trailing_context
655
								(nset,
656
								 numstates,
657
								 accset,
658
								 nacc);
659
					}
660
661
10683
					state[sym] = newds;
662
663
10683
					if (trace)
664
						fprintf (stderr,
665
							 "\t%d\t%d\n", sym,
666
							 newds);
667
668
10683
					targfreq[++targptr] = 1;
669
10683
					targstate[targptr] = newds;
670
					++numuniq;
671
10683
				}
672
673
				else {
674
					/* sym's equivalence class has the same
675
					 * transitions as duplist(sym)'s
676
					 * equivalence class.
677
					 */
678
100112
					targ = state[duplist[sym]];
679
100112
					state[sym] = targ;
680
681
100112
					if (trace)
682
						fprintf (stderr,
683
							 "\t%d\t%d\n", sym,
684
							 targ);
685
686
					/* Update frequency count for
687
					 * destination state.
688
					 */
689
690
					i = 0;
691
252858
					while (targstate[++i] != targ) ;
692
693
100112
					++targfreq[i];
694
					++numdup;
695
				}
696
697
110795
				++totaltrans;
698
110795
				duplist[sym] = NIL;
699
110795
			}
700
		}
701
702
703
3801
		numsnpairs += totaltrans;
704
705
3801
		if (ds > num_start_states)
706
3727
			check_for_backing_up (ds, state);
707
708
3801
		if (nultrans) {
709
152
			nultrans[ds] = state[NUL_ec];
710
152
			state[NUL_ec] = 0;	/* remove transition */
711
152
		}
712
713
3801
		if (fulltbl) {
714
715
			/* Each time we hit here, it's another td_hilen, so we realloc. */
716
			yynxt_tbl->td_hilen++;
717
			yynxt_tbl->td_data = yynxt_data =
718
				(flex_int32_t *) realloc (yynxt_data,
719
						     yynxt_tbl->td_hilen *
720
						     yynxt_tbl->td_lolen *
721
						     sizeof (flex_int32_t));
722
723
724
			if (gentables)
725
				outn ("    {");
726
727
			/* Supply array's 0-element. */
728
			if (ds == end_of_buffer_state) {
729
				mk2data (-end_of_buffer_state);
730
				yynxt_data[yynxt_curr++] =
731
					-end_of_buffer_state;
732
			}
733
			else {
734
				mk2data (end_of_buffer_state);
735
				yynxt_data[yynxt_curr++] =
736
					end_of_buffer_state;
737
			}
738
739
			for (i = 1; i < num_full_table_rows; ++i) {
740
				/* Jams are marked by negative of state
741
				 * number.
742
				 */
743
				mk2data (state[i] ? state[i] : -ds);
744
				yynxt_data[yynxt_curr++] =
745
					state[i] ? state[i] : -ds;
746
			}
747
748
			dataflush ();
749
			if (gentables)
750
				outn ("    },\n");
751
		}
752
753
3801
		else if (fullspd)
754
			place_state (state, ds, totaltrans);
755
756
3801
		else if (ds == end_of_buffer_state)
757
			/* Special case this state to make sure it does what
758
			 * it's supposed to, i.e., jam on end-of-buffer.
759
			 */
760
10
			stack1 (ds, 0, 0, JAMSTATE);
761
762
		else {		/* normal, compressed state */
763
764
			/* Determine which destination state is the most
765
			 * common, and how many transitions to it there are.
766
			 */
767
768
			comfreq = 0;
769
			comstate = 0;
770
771
28948
			for (i = 1; i <= targptr; ++i)
772
10683
				if (targfreq[i] > comfreq) {
773
					comfreq = targfreq[i];
774
5373
					comstate = targstate[i];
775
5373
				}
776
777
3791
			bldtbl (state, ds, totaltrans, comstate, comfreq);
778
		}
779
	}
780
781
10
	if (fulltbl) {
782
		dataend ();
783
		if (tablesext) {
784
			yytbl_data_compress (yynxt_tbl);
785
			if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
786
				flexerror (_
787
					   ("Could not write yynxt_tbl[][]"));
788
		}
789
		if (yynxt_tbl) {
790
			yytbl_data_destroy (yynxt_tbl);
791
			yynxt_tbl = 0;
792
		}
793
	}
794
795
10
	else if (!fullspd) {
796
10
		cmptmps ();	/* create compressed template entries */
797
798
		/* Create tables for all the states with only one
799
		 * out-transition.
800
		 */
801
2506
		while (onesp > 0) {
802
2486
			mk1tbl (onestate[onesp], onesym[onesp],
803
1243
				onenext[onesp], onedef[onesp]);
804
1243
			--onesp;
805
		}
806
807
10
		mkdeftbl ();
808
10
	}
809
810
10
	free ((void *) accset);
811
10
	free ((void *) nset);
812
10
}
813
814
815
/* snstods - converts a set of ndfa states into a dfa state
816
 *
817
 * synopsis
818
 *    is_new_state = snstods( int sns[numstates], int numstates,
819
 *				int accset[num_rules+1], int nacc,
820
 *				int hashval, int *newds_addr );
821
 *
822
 * On return, the dfa state number is in newds.
823
 */
824
825
int snstods (sns, numstates, accset, nacc, hashval, newds_addr)
826
     int sns[], numstates, accset[], nacc, hashval, *newds_addr;
827
{
828
	int     didsort = 0;
829
	int i, j;
830
	int     newds, *oldsns;
831
832
8024187
	for (i = 1; i <= lastdfa; ++i)
833
4002914
		if (hashval == dhash[i]) {
834
7073
			if (numstates == dfasiz[i]) {
835
6996
				oldsns = dss[i];
836
837
6996
				if (!didsort) {
838
					/* We sort the states in sns so we
839
					 * can compare it to oldsns quickly.
840
					 */
841
6970
					qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
842
					didsort = 1;
843
6970
				}
844
845
115712
				for (j = 1; j <= numstates; ++j)
846
50900
					if (sns[j] != oldsns[j])
847
						break;
848
849
6996
				if (j > numstates) {
850
6956
					++dfaeql;
851
6956
					*newds_addr = i;
852
6956
					return 0;
853
				}
854
855
				++hshcol;
856
			}
857
858
			else
859
				++hshsave;
860
117
		}
861
862
	/* Make a new dfa. */
863
864
3801
	if (++lastdfa >= current_max_dfas)
865
2
		increase_max_dfas ();
866
867
3801
	newds = lastdfa;
868
869
3801
	dss[newds] = allocate_integer_array (numstates + 1);
870
871
	/* If we haven't already sorted the states in sns, we do so now,
872
	 * so that future comparisons with it can be made quickly.
873
	 */
874
875
3801
	if (!didsort)
876
3787
		qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
877
878
102118
	for (i = 1; i <= numstates; ++i)
879
47258
		dss[newds][i] = sns[i];
880
881
3801
	dfasiz[newds] = numstates;
882
3801
	dhash[newds] = hashval;
883
884
3801
	if (nacc == 0) {
885
893
		if (reject)
886
41
			dfaacc[newds].dfaacc_set = (int *) 0;
887
		else
888
852
			dfaacc[newds].dfaacc_state = 0;
889
890
893
		accsiz[newds] = 0;
891
893
	}
892
893
2908
	else if (reject) {
894
		/* We sort the accepting set in increasing order so the
895
		 * disambiguating rule that the first rule listed is considered
896
		 * match in the event of ties will work.
897
		 */
898
899
122
		qsort (&accset [1], nacc, sizeof (accset [1]), intcmp);
900
901
122
		dfaacc[newds].dfaacc_set =
902
122
			allocate_integer_array (nacc + 1);
903
904
		/* Save the accepting set for later */
905
770
		for (i = 1; i <= nacc; ++i) {
906
263
			dfaacc[newds].dfaacc_set[i] = accset[i];
907
908
263
			if (accset[i] <= num_rules)
909
				/* Who knows, perhaps a REJECT can yield
910
				 * this rule.
911
				 */
912
263
				rule_useful[accset[i]] = true;
913
		}
914
915
122
		accsiz[newds] = nacc;
916
122
	}
917
918
	else {
919
		/* Find lowest numbered rule so the disambiguating rule
920
		 * will work.
921
		 */
922
2786
		j = num_rules + 1;
923
924
12622
		for (i = 1; i <= nacc; ++i)
925
3525
			if (accset[i] < j)
926
2885
				j = accset[i];
927
928
2786
		dfaacc[newds].dfaacc_state = j;
929
930
2786
		if (j <= num_rules)
931
2786
			rule_useful[j] = true;
932
	}
933
934
7602
	*newds_addr = newds;
935
936
3801
	return 1;
937
10757
}
938
939
940
/* symfollowset - follow the symbol transitions one step
941
 *
942
 * synopsis
943
 *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
944
 *				int transsym, int nset[current_max_dfa_size] );
945
 */
946
947
int symfollowset (ds, dsize, transsym, nset)
948
     int ds[], dsize, transsym, nset[];
949
{
950
	int     ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
951
952
	numstates = 0;
953
954
480389
	for (i = 1; i <= dsize; ++i) {	/* for each nfa state ns in the state set of ds */
955
224170
		ns = ds[i];
956
224170
		sym = transchar[ns];
957
224170
		tsp = trans1[ns];
958
959
224170
		if (sym < 0) {	/* it's a character class */
960
71667
			sym = -sym;
961
71667
			ccllist = cclmap[sym];
962
71667
			lenccl = ccllen[sym];
963
964
71667
			if (cclng[sym]) {
965
31951
				for (j = 0; j < lenccl; ++j) {
966
					/* Loop through negated character
967
					 * class.
968
					 */
969
16335
					ch = ccltbl[ccllist + j];
970
971
16335
					if (ch == 0)
972
						ch = NUL_ec;
973
974
16335
					if (ch > transsym)
975
						/* Transsym isn't in negated
976
						 * ccl.
977
						 */
978
						break;
979
980
14590
					else if (ch == transsym)
981
						/* next 2 */
982
						goto bottom;
983
				}
984
985
				/* Didn't find transsym in ccl. */
986
3829
				nset[++numstates] = tsp;
987
3829
			}
988
989
			else
990
731216
				for (j = 0; j < lenccl; ++j) {
991
388724
					ch = ccltbl[ccllist + j];
992
993
388724
					if (ch == 0)
994
						ch = NUL_ec;
995
996
388724
					if (ch > transsym)
997
						break;
998
358644
					else if (ch == transsym) {
999
26426
						nset[++numstates] = tsp;
1000
26426
						break;
1001
					}
1002
				}
1003
		}
1004
1005
152503
		else if (sym == SYM_EPSILON) {	/* do nothing */
1006
		}
1007
1008
90200
		else if (ABS (ecgroup[sym]) == transsym)
1009
12480
			nset[++numstates] = tsp;
1010
1011
	      bottom:;
1012
	}
1013
1014
10683
	return numstates;
1015
}
1016
1017
1018
/* sympartition - partition characters with same out-transitions
1019
 *
1020
 * synopsis
1021
 *    sympartition( int ds[current_max_dfa_size], int numstates,
1022
 *			int symlist[numecs], int duplist[numecs] );
1023
 */
1024
1025
void sympartition (ds, numstates, symlist, duplist)
1026
     int ds[], numstates;
1027
     int symlist[], duplist[];
1028
{
1029
7602
	int     tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1030
1031
	/* Partitioning is done by creating equivalence classes for those
1032
	 * characters which have out-transitions from the given state.  Thus
1033
	 * we are really creating equivalence classes of equivalence classes.
1034
	 */
1035
1036
469654
	for (i = 1; i <= numecs; ++i) {	/* initialize equivalence class list */
1037
231026
		duplist[i] = i - 1;
1038
231026
		dupfwd[i] = i + 1;
1039
	}
1040
1041
3801
	duplist[1] = NIL;
1042
3801
	dupfwd[numecs] = NIL;
1043
1044
102118
	for (i = 1; i <= numstates; ++i) {
1045
47258
		ns = ds[i];
1046
47258
		tch = transchar[ns];
1047
1048
47258
		if (tch != SYM_EPSILON) {
1049

57738
			if (tch < -lastccl || tch >= csize) {
1050
				flexfatal (_
1051
					   ("bad transition character detected in sympartition()"));
1052
			}
1053
1054
28869
			if (tch >= 0) {	/* character transition */
1055
12480
				int     ec = ecgroup[tch];
1056
1057
12480
				mkechar (ec, dupfwd, duplist);
1058
12480
				symlist[ec] = 1;
1059
12480
			}
1060
1061
			else {	/* character class */
1062
16389
				tch = -tch;
1063
1064
16389
				lenccl = ccllen[tch];
1065
16389
				cclp = cclmap[tch];
1066
32778
				mkeccl (ccltbl + cclp, lenccl, dupfwd,
1067
16389
					duplist, numecs, NUL_ec);
1068
1069
16389
				if (cclng[tch]) {
1070
					j = 0;
1071
1072
11880
					for (k = 0; k < lenccl; ++k) {
1073
4963
						ich = ccltbl[cclp + k];
1074
1075
4963
						if (ich == 0)
1076
							ich = NUL_ec;
1077
1078
60152
						for (++j; j < ich; ++j)
1079
25113
							symlist[j] = 1;
1080
					}
1081
1082
78952
					for (++j; j <= numecs; ++j)
1083
38499
						symlist[j] = 1;
1084
				}
1085
1086
				else
1087
522136
					for (k = 0; k < lenccl; ++k) {
1088
245656
						ich = ccltbl[cclp + k];
1089
1090
245656
						if (ich == 0)
1091
							ich = NUL_ec;
1092
1093
245656
						symlist[ich] = 1;
1094
					}
1095
			}
1096
		}
1097
	}
1098
3801
}