1 |
|
|
/* $OpenBSD: nfa.c,v 1.11 2015/11/19 22:52:40 tedu Exp $ */ |
2 |
|
|
|
3 |
|
|
/* nfa - NFA construction routines */ |
4 |
|
|
|
5 |
|
|
/* Copyright (c) 1990 The Regents of the University of California. */ |
6 |
|
|
/* All rights reserved. */ |
7 |
|
|
|
8 |
|
|
/* This code is derived from software contributed to Berkeley by */ |
9 |
|
|
/* Vern Paxson. */ |
10 |
|
|
|
11 |
|
|
/* The United States Government has rights in this work pursuant */ |
12 |
|
|
/* to contract no. DE-AC03-76SF00098 between the United States */ |
13 |
|
|
/* Department of Energy and the University of California. */ |
14 |
|
|
|
15 |
|
|
/* This file is part of flex. */ |
16 |
|
|
|
17 |
|
|
/* Redistribution and use in source and binary forms, with or without */ |
18 |
|
|
/* modification, are permitted provided that the following conditions */ |
19 |
|
|
/* are met: */ |
20 |
|
|
|
21 |
|
|
/* 1. Redistributions of source code must retain the above copyright */ |
22 |
|
|
/* notice, this list of conditions and the following disclaimer. */ |
23 |
|
|
/* 2. Redistributions in binary form must reproduce the above copyright */ |
24 |
|
|
/* notice, this list of conditions and the following disclaimer in the */ |
25 |
|
|
/* documentation and/or other materials provided with the distribution. */ |
26 |
|
|
|
27 |
|
|
/* Neither the name of the University nor the names of its contributors */ |
28 |
|
|
/* may be used to endorse or promote products derived from this software */ |
29 |
|
|
/* without specific prior written permission. */ |
30 |
|
|
|
31 |
|
|
/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
32 |
|
|
/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
33 |
|
|
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
34 |
|
|
/* PURPOSE. */ |
35 |
|
|
|
36 |
|
|
#include "flexdef.h" |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
/* declare functions that have forward references */ |
40 |
|
|
|
41 |
|
|
int dupmachine PROTO((int)); |
42 |
|
|
void mkxtion PROTO((int, int)); |
43 |
|
|
|
44 |
|
|
|
45 |
|
|
/* add_accept - add an accepting state to a machine |
46 |
|
|
* |
47 |
|
|
* accepting_number becomes mach's accepting number. |
48 |
|
|
*/ |
49 |
|
|
|
50 |
|
|
void |
51 |
|
|
add_accept(mach, accepting_number) |
52 |
|
|
int mach, accepting_number; |
53 |
|
|
{ |
54 |
|
|
/* |
55 |
|
|
* Hang the accepting number off an epsilon state. if it is |
56 |
|
|
* associated with a state that has a non-epsilon out-transition, |
57 |
|
|
* then the state will accept BEFORE it makes that transition, i.e., |
58 |
|
|
* one character too soon. |
59 |
|
|
*/ |
60 |
|
|
|
61 |
✓✓ |
1120 |
if (transchar[finalst[mach]] == SYM_EPSILON) |
62 |
|
89 |
accptnum[finalst[mach]] = accepting_number; |
63 |
|
|
|
64 |
|
|
else { |
65 |
|
471 |
int astate = mkstate(SYM_EPSILON); |
66 |
|
|
|
67 |
|
471 |
accptnum[astate] = accepting_number; |
68 |
|
471 |
(void) link_machines(mach, astate); |
69 |
|
|
} |
70 |
|
560 |
} |
71 |
|
|
|
72 |
|
|
|
73 |
|
|
/* copysingl - make a given number of copies of a singleton machine |
74 |
|
|
* |
75 |
|
|
* synopsis |
76 |
|
|
* |
77 |
|
|
* newsng = copysingl( singl, num ); |
78 |
|
|
* |
79 |
|
|
* newsng - a new singleton composed of num copies of singl |
80 |
|
|
* singl - a singleton machine |
81 |
|
|
* num - the number of copies of singl to be present in newsng |
82 |
|
|
*/ |
83 |
|
|
|
84 |
|
|
int |
85 |
|
|
copysingl(singl, num) |
86 |
|
|
int singl, num; |
87 |
|
|
{ |
88 |
|
|
int copy, i; |
89 |
|
|
|
90 |
|
|
copy = mkstate(SYM_EPSILON); |
91 |
|
|
|
92 |
|
|
for (i = 1; i <= num; ++i) |
93 |
|
|
copy = link_machines(copy, dupmachine(singl)); |
94 |
|
|
|
95 |
|
|
return copy; |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
|
99 |
|
|
/* dumpnfa - debugging routine to write out an nfa */ |
100 |
|
|
|
101 |
|
|
void |
102 |
|
|
dumpnfa(state1) |
103 |
|
|
int state1; |
104 |
|
|
|
105 |
|
|
{ |
106 |
|
|
int sym, tsp1, tsp2, anum, ns; |
107 |
|
|
|
108 |
|
|
fprintf(stderr, |
109 |
|
|
_ |
110 |
|
|
("\n\n********** beginning dump of nfa with start state %d\n"), |
111 |
|
|
state1); |
112 |
|
|
|
113 |
|
|
/* |
114 |
|
|
* We probably should loop starting at firstst[state1] and going to |
115 |
|
|
* lastst[state1], but they're not maintained properly when we "or" |
116 |
|
|
* all of the rules together. So we use our knowledge that the |
117 |
|
|
* machine starts at state 1 and ends at lastnfa. |
118 |
|
|
*/ |
119 |
|
|
|
120 |
|
|
/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ |
121 |
|
|
for (ns = 1; ns <= lastnfa; ++ns) { |
122 |
|
|
fprintf(stderr, _("state # %4d\t"), ns); |
123 |
|
|
|
124 |
|
|
sym = transchar[ns]; |
125 |
|
|
tsp1 = trans1[ns]; |
126 |
|
|
tsp2 = trans2[ns]; |
127 |
|
|
anum = accptnum[ns]; |
128 |
|
|
|
129 |
|
|
fprintf(stderr, "%3d: %4d, %4d", sym, tsp1, tsp2); |
130 |
|
|
|
131 |
|
|
if (anum != NIL) |
132 |
|
|
fprintf(stderr, " [%d]", anum); |
133 |
|
|
|
134 |
|
|
fprintf(stderr, "\n"); |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
fprintf(stderr, _("********** end of dump\n")); |
138 |
|
|
} |
139 |
|
|
|
140 |
|
|
|
141 |
|
|
/* dupmachine - make a duplicate of a given machine |
142 |
|
|
* |
143 |
|
|
* synopsis |
144 |
|
|
* |
145 |
|
|
* copy = dupmachine( mach ); |
146 |
|
|
* |
147 |
|
|
* copy - holds duplicate of mach |
148 |
|
|
* mach - machine to be duplicated |
149 |
|
|
* |
150 |
|
|
* note that the copy of mach is NOT an exact duplicate; rather, all the |
151 |
|
|
* transition states values are adjusted so that the copy is self-contained, |
152 |
|
|
* as the original should have been. |
153 |
|
|
* |
154 |
|
|
* also note that the original MUST be contiguous, with its low and high |
155 |
|
|
* states accessible by the arrays firstst and lastst |
156 |
|
|
*/ |
157 |
|
|
|
158 |
|
|
int |
159 |
|
|
dupmachine(mach) |
160 |
|
|
int mach; |
161 |
|
|
{ |
162 |
|
|
int i, init, state_offset; |
163 |
|
|
int state = 0; |
164 |
|
|
int last = lastst[mach]; |
165 |
|
|
|
166 |
|
|
for (i = firstst[mach]; i <= last; ++i) { |
167 |
|
|
state = mkstate(transchar[i]); |
168 |
|
|
|
169 |
|
|
if (trans1[i] != NO_TRANSITION) { |
170 |
|
|
mkxtion(finalst[state], trans1[i] + state - i); |
171 |
|
|
|
172 |
|
|
if (transchar[i] == SYM_EPSILON && |
173 |
|
|
trans2[i] != NO_TRANSITION) |
174 |
|
|
mkxtion(finalst[state], |
175 |
|
|
trans2[i] + state - i); |
176 |
|
|
} |
177 |
|
|
accptnum[state] = accptnum[i]; |
178 |
|
|
} |
179 |
|
|
|
180 |
|
|
if (state == 0) |
181 |
|
|
flexfatal(_("empty machine in dupmachine()")); |
182 |
|
|
|
183 |
|
|
state_offset = state - i + 1; |
184 |
|
|
|
185 |
|
|
init = mach + state_offset; |
186 |
|
|
firstst[init] = firstst[mach] + state_offset; |
187 |
|
|
finalst[init] = finalst[mach] + state_offset; |
188 |
|
|
lastst[init] = lastst[mach] + state_offset; |
189 |
|
|
|
190 |
|
|
return init; |
191 |
|
|
} |
192 |
|
|
|
193 |
|
|
|
194 |
|
|
/* finish_rule - finish up the processing for a rule |
195 |
|
|
* |
196 |
|
|
* An accepting number is added to the given machine. If variable_trail_rule |
197 |
|
|
* is true then the rule has trailing context and both the head and trail |
198 |
|
|
* are variable size. Otherwise if headcnt or trailcnt is non-zero then |
199 |
|
|
* the machine recognizes a pattern with trailing context and headcnt is |
200 |
|
|
* the number of characters in the matched part of the pattern, or zero |
201 |
|
|
* if the matched part has variable length. trailcnt is the number of |
202 |
|
|
* trailing context characters in the pattern, or zero if the trailing |
203 |
|
|
* context has variable length. |
204 |
|
|
*/ |
205 |
|
|
|
206 |
|
|
void |
207 |
|
|
finish_rule(mach, variable_trail_rule, headcnt, trailcnt, |
208 |
|
|
pcont_act) |
209 |
|
|
int mach, variable_trail_rule, headcnt, trailcnt, pcont_act; |
210 |
|
|
{ |
211 |
|
1120 |
char action_text[MAXLINE]; |
212 |
|
|
|
213 |
|
560 |
add_accept(mach, num_rules); |
214 |
|
|
|
215 |
|
|
/* |
216 |
|
|
* We did this in new_rule(), but it often gets the wrong number |
217 |
|
|
* because we do it before we start parsing the current rule. |
218 |
|
|
*/ |
219 |
|
560 |
rule_linenum[num_rules] = linenum; |
220 |
|
|
|
221 |
|
|
/* |
222 |
|
|
* If this is a continued action, then the line-number has already |
223 |
|
|
* been updated, giving us the wrong number. |
224 |
|
|
*/ |
225 |
✓✓ |
560 |
if (continued_action) |
226 |
|
1 |
--rule_linenum[num_rules]; |
227 |
|
|
|
228 |
|
|
|
229 |
|
|
/* |
230 |
|
|
* If the previous rule was continued action, then we inherit the |
231 |
|
|
* previous newline flag, possibly overriding the current one. |
232 |
|
|
*/ |
233 |
✗✓✗✗
|
560 |
if (pcont_act && rule_has_nl[num_rules - 1]) |
234 |
|
|
rule_has_nl[num_rules] = true; |
235 |
|
|
|
236 |
|
560 |
snprintf(action_text, sizeof(action_text), "case %d:\n", num_rules); |
237 |
|
560 |
add_action(action_text); |
238 |
✓✓ |
560 |
if (rule_has_nl[num_rules]) { |
239 |
|
27 |
snprintf(action_text, sizeof(action_text), "/* rule %d can match eol */\n", |
240 |
|
|
num_rules); |
241 |
|
27 |
add_action(action_text); |
242 |
|
27 |
} |
243 |
✗✓ |
560 |
if (variable_trail_rule) { |
244 |
|
|
rule_type[num_rules] = RULE_VARIABLE; |
245 |
|
|
|
246 |
|
|
if (performance_report > 0) |
247 |
|
|
fprintf(stderr, |
248 |
|
|
_ |
249 |
|
|
("Variable trailing context rule at line %d\n"), |
250 |
|
|
rule_linenum[num_rules]); |
251 |
|
|
|
252 |
|
|
variable_trailing_context_rules = true; |
253 |
|
|
} else { |
254 |
|
560 |
rule_type[num_rules] = RULE_NORMAL; |
255 |
|
|
|
256 |
✗✓ |
560 |
if (headcnt > 0 || trailcnt > 0) { |
257 |
|
|
/* |
258 |
|
|
* Do trailing context magic to not match the |
259 |
|
|
* trailing characters. |
260 |
|
|
*/ |
261 |
|
|
char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp"; |
262 |
|
|
char *scanner_bp = "yy_bp"; |
263 |
|
|
|
264 |
|
|
add_action |
265 |
|
|
("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n"); |
266 |
|
|
|
267 |
|
|
if (headcnt > 0) { |
268 |
|
|
if (rule_has_nl[num_rules]) { |
269 |
|
|
snprintf(action_text, sizeof(action_text), |
270 |
|
|
"YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt); |
271 |
|
|
add_action(action_text); |
272 |
|
|
} |
273 |
|
|
snprintf(action_text, sizeof(action_text), "%s = %s + %d;\n", |
274 |
|
|
scanner_cp, scanner_bp, headcnt); |
275 |
|
|
add_action(action_text); |
276 |
|
|
} else { |
277 |
|
|
if (rule_has_nl[num_rules]) { |
278 |
|
|
snprintf(action_text, sizeof(action_text), |
279 |
|
|
"YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt); |
280 |
|
|
add_action(action_text); |
281 |
|
|
} |
282 |
|
|
snprintf(action_text, sizeof(action_text), "%s -= %d;\n", |
283 |
|
|
scanner_cp, trailcnt); |
284 |
|
|
add_action(action_text); |
285 |
|
|
} |
286 |
|
|
|
287 |
|
|
add_action |
288 |
|
|
("YY_DO_BEFORE_ACTION; /* set up yytext again */\n"); |
289 |
|
|
} |
290 |
|
|
} |
291 |
|
|
|
292 |
|
|
/* |
293 |
|
|
* Okay, in the action code at this point yytext and yyleng have |
294 |
|
|
* their proper final values for this rule, so here's the point to do |
295 |
|
|
* any user action. But don't do it for continued actions, as |
296 |
|
|
* that'll result in multiple YY_RULE_SETUP's. |
297 |
|
|
*/ |
298 |
✓✓ |
560 |
if (!continued_action) |
299 |
|
559 |
add_action("YY_RULE_SETUP\n"); |
300 |
|
|
|
301 |
|
560 |
line_directive_out((FILE *) 0, 1); |
302 |
|
560 |
} |
303 |
|
|
|
304 |
|
|
|
305 |
|
|
/* link_machines - connect two machines together |
306 |
|
|
* |
307 |
|
|
* synopsis |
308 |
|
|
* |
309 |
|
|
* new = link_machines( first, last ); |
310 |
|
|
* |
311 |
|
|
* new - a machine constructed by connecting first to last |
312 |
|
|
* first - the machine whose successor is to be last |
313 |
|
|
* last - the machine whose predecessor is to be first |
314 |
|
|
* |
315 |
|
|
* note: this routine concatenates the machine first with the machine |
316 |
|
|
* last to produce a machine new which will pattern-match first first |
317 |
|
|
* and then last, and will fail if either of the sub-patterns fails. |
318 |
|
|
* FIRST is set to new by the operation. last is unmolested. |
319 |
|
|
*/ |
320 |
|
|
|
321 |
|
|
int |
322 |
|
|
link_machines(first, last) |
323 |
|
|
int first, last; |
324 |
|
|
{ |
325 |
✗✓ |
25180 |
if (first == NIL) |
326 |
|
|
return last; |
327 |
|
|
|
328 |
✗✓ |
12590 |
else if (last == NIL) |
329 |
|
|
return first; |
330 |
|
|
|
331 |
|
|
else { |
332 |
|
12590 |
mkxtion(finalst[first], last); |
333 |
|
12590 |
finalst[first] = finalst[last]; |
334 |
|
12590 |
lastst[first] = MAX(lastst[first], lastst[last]); |
335 |
|
12590 |
firstst[first] = MIN(firstst[first], firstst[last]); |
336 |
|
|
|
337 |
|
12590 |
return first; |
338 |
|
|
} |
339 |
|
12590 |
} |
340 |
|
|
|
341 |
|
|
|
342 |
|
|
/* mark_beginning_as_normal - mark each "beginning" state in a machine |
343 |
|
|
* as being a "normal" (i.e., not trailing context- |
344 |
|
|
* associated) states |
345 |
|
|
* |
346 |
|
|
* The "beginning" states are the epsilon closure of the first state |
347 |
|
|
*/ |
348 |
|
|
|
349 |
|
|
void |
350 |
|
|
mark_beginning_as_normal(mach) |
351 |
|
|
int mach; |
352 |
|
|
{ |
353 |
|
|
switch (state_type[mach]) { |
354 |
|
|
case STATE_NORMAL: |
355 |
|
|
/* Oh, we've already visited here. */ |
356 |
|
|
return; |
357 |
|
|
|
358 |
|
|
case STATE_TRAILING_CONTEXT: |
359 |
|
|
state_type[mach] = STATE_NORMAL; |
360 |
|
|
|
361 |
|
|
if (transchar[mach] == SYM_EPSILON) { |
362 |
|
|
if (trans1[mach] != NO_TRANSITION) |
363 |
|
|
mark_beginning_as_normal(trans1[mach]); |
364 |
|
|
|
365 |
|
|
if (trans2[mach] != NO_TRANSITION) |
366 |
|
|
mark_beginning_as_normal(trans2[mach]); |
367 |
|
|
} |
368 |
|
|
break; |
369 |
|
|
|
370 |
|
|
default: |
371 |
|
|
flexerror(_ |
372 |
|
|
("bad state type in mark_beginning_as_normal()")); |
373 |
|
|
break; |
374 |
|
|
} |
375 |
|
|
} |
376 |
|
|
|
377 |
|
|
|
378 |
|
|
/* mkbranch - make a machine that branches to two machines |
379 |
|
|
* |
380 |
|
|
* synopsis |
381 |
|
|
* |
382 |
|
|
* branch = mkbranch( first, second ); |
383 |
|
|
* |
384 |
|
|
* branch - a machine which matches either first's pattern or second's |
385 |
|
|
* first, second - machines whose patterns are to be or'ed (the | operator) |
386 |
|
|
* |
387 |
|
|
* Note that first and second are NEITHER destroyed by the operation. Also, |
388 |
|
|
* the resulting machine CANNOT be used with any other "mk" operation except |
389 |
|
|
* more mkbranch's. Compare with mkor() |
390 |
|
|
*/ |
391 |
|
|
|
392 |
|
|
int |
393 |
|
|
mkbranch(first, second) |
394 |
|
|
int first, second; |
395 |
|
|
{ |
396 |
|
|
int eps; |
397 |
|
|
|
398 |
✗✓ |
1376 |
if (first == NO_TRANSITION) |
399 |
|
|
return second; |
400 |
|
|
|
401 |
✗✓ |
688 |
else if (second == NO_TRANSITION) |
402 |
|
|
return first; |
403 |
|
|
|
404 |
|
688 |
eps = mkstate(SYM_EPSILON); |
405 |
|
|
|
406 |
|
688 |
mkxtion(eps, first); |
407 |
|
688 |
mkxtion(eps, second); |
408 |
|
|
|
409 |
|
688 |
return eps; |
410 |
|
688 |
} |
411 |
|
|
|
412 |
|
|
|
413 |
|
|
/* mkclos - convert a machine into a closure |
414 |
|
|
* |
415 |
|
|
* synopsis |
416 |
|
|
* new = mkclos( state ); |
417 |
|
|
* |
418 |
|
|
* new - a new state which matches the closure of "state" |
419 |
|
|
*/ |
420 |
|
|
|
421 |
|
|
int |
422 |
|
|
mkclos(state) |
423 |
|
|
int state; |
424 |
|
|
{ |
425 |
|
58 |
return mkopt(mkposcl(state)); |
426 |
|
|
} |
427 |
|
|
|
428 |
|
|
|
429 |
|
|
/* mkopt - make a machine optional |
430 |
|
|
* |
431 |
|
|
* synopsis |
432 |
|
|
* |
433 |
|
|
* new = mkopt( mach ); |
434 |
|
|
* |
435 |
|
|
* new - a machine which optionally matches whatever mach matched |
436 |
|
|
* mach - the machine to make optional |
437 |
|
|
* |
438 |
|
|
* notes: |
439 |
|
|
* 1. mach must be the last machine created |
440 |
|
|
* 2. mach is destroyed by the call |
441 |
|
|
*/ |
442 |
|
|
|
443 |
|
|
int |
444 |
|
|
mkopt(mach) |
445 |
|
|
int mach; |
446 |
|
|
{ |
447 |
|
|
int eps; |
448 |
|
|
|
449 |
✓✓✓✗
|
3143 |
if (!SUPER_FREE_EPSILON(finalst[mach])) { |
450 |
|
1557 |
eps = mkstate(SYM_EPSILON); |
451 |
|
1557 |
mach = link_machines(mach, eps); |
452 |
|
1557 |
} |
453 |
|
|
/* |
454 |
|
|
* Can't skimp on the following if FREE_EPSILON(mach) is true because |
455 |
|
|
* some state interior to "mach" might point back to the beginning |
456 |
|
|
* for a closure. |
457 |
|
|
*/ |
458 |
|
1557 |
eps = mkstate(SYM_EPSILON); |
459 |
|
1557 |
mach = link_machines(eps, mach); |
460 |
|
|
|
461 |
|
1557 |
mkxtion(mach, finalst[mach]); |
462 |
|
|
|
463 |
|
1557 |
return mach; |
464 |
|
|
} |
465 |
|
|
|
466 |
|
|
|
467 |
|
|
/* mkor - make a machine that matches either one of two machines |
468 |
|
|
* |
469 |
|
|
* synopsis |
470 |
|
|
* |
471 |
|
|
* new = mkor( first, second ); |
472 |
|
|
* |
473 |
|
|
* new - a machine which matches either first's pattern or second's |
474 |
|
|
* first, second - machines whose patterns are to be or'ed (the | operator) |
475 |
|
|
* |
476 |
|
|
* note that first and second are both destroyed by the operation |
477 |
|
|
* the code is rather convoluted because an attempt is made to minimize |
478 |
|
|
* the number of epsilon states needed |
479 |
|
|
*/ |
480 |
|
|
|
481 |
|
|
int |
482 |
|
|
mkor(first, second) |
483 |
|
|
int first, second; |
484 |
|
|
{ |
485 |
|
|
int eps, orend; |
486 |
|
|
|
487 |
✗✓ |
2842 |
if (first == NIL) |
488 |
|
|
return second; |
489 |
|
|
|
490 |
✗✓ |
1421 |
else if (second == NIL) |
491 |
|
|
return first; |
492 |
|
|
|
493 |
|
|
else { |
494 |
|
|
/* |
495 |
|
|
* See comment in mkopt() about why we can't use the first |
496 |
|
|
* state of "first" or "second" if they satisfy |
497 |
|
|
* "FREE_EPSILON". |
498 |
|
|
*/ |
499 |
|
1421 |
eps = mkstate(SYM_EPSILON); |
500 |
|
|
|
501 |
|
1421 |
first = link_machines(eps, first); |
502 |
|
|
|
503 |
|
1421 |
mkxtion(first, second); |
504 |
|
|
|
505 |
✓✓✓✓ ✓✗ |
1873 |
if (SUPER_FREE_EPSILON(finalst[first]) && |
506 |
|
128 |
accptnum[finalst[first]] == NIL) { |
507 |
|
|
orend = finalst[first]; |
508 |
|
128 |
mkxtion(finalst[second], orend); |
509 |
✓✓✗✓ ✗✗ |
1617 |
} else if (SUPER_FREE_EPSILON(finalst[second]) && |
510 |
|
|
accptnum[finalst[second]] == NIL) { |
511 |
|
|
orend = finalst[second]; |
512 |
|
|
mkxtion(finalst[first], orend); |
513 |
|
|
} else { |
514 |
|
1293 |
eps = mkstate(SYM_EPSILON); |
515 |
|
|
|
516 |
|
1293 |
first = link_machines(first, eps); |
517 |
|
1293 |
orend = finalst[first]; |
518 |
|
|
|
519 |
|
1293 |
mkxtion(finalst[second], orend); |
520 |
|
|
} |
521 |
|
|
} |
522 |
|
|
|
523 |
|
1421 |
finalst[first] = orend; |
524 |
|
1421 |
return first; |
525 |
|
1421 |
} |
526 |
|
|
|
527 |
|
|
|
528 |
|
|
/* mkposcl - convert a machine into a positive closure |
529 |
|
|
* |
530 |
|
|
* synopsis |
531 |
|
|
* new = mkposcl( state ); |
532 |
|
|
* |
533 |
|
|
* new - a machine matching the positive closure of "state" |
534 |
|
|
*/ |
535 |
|
|
|
536 |
|
|
int |
537 |
|
|
mkposcl(state) |
538 |
|
|
int state; |
539 |
|
|
{ |
540 |
|
|
int eps; |
541 |
|
|
|
542 |
✓✓✓✓
|
911 |
if (SUPER_FREE_EPSILON(finalst[state])) { |
543 |
|
9 |
mkxtion(finalst[state], state); |
544 |
|
9 |
return state; |
545 |
|
|
} else { |
546 |
|
441 |
eps = mkstate(SYM_EPSILON); |
547 |
|
441 |
mkxtion(eps, state); |
548 |
|
441 |
return link_machines(state, eps); |
549 |
|
|
} |
550 |
|
450 |
} |
551 |
|
|
|
552 |
|
|
|
553 |
|
|
/* mkrep - make a replicated machine |
554 |
|
|
* |
555 |
|
|
* synopsis |
556 |
|
|
* new = mkrep( mach, lb, ub ); |
557 |
|
|
* |
558 |
|
|
* new - a machine that matches whatever "mach" matched from "lb" |
559 |
|
|
* number of times to "ub" number of times |
560 |
|
|
* |
561 |
|
|
* note |
562 |
|
|
* if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach" |
563 |
|
|
*/ |
564 |
|
|
|
565 |
|
|
int |
566 |
|
|
mkrep(mach, lb, ub) |
567 |
|
|
int mach, lb, ub; |
568 |
|
|
{ |
569 |
|
|
int base_mach, tail, copy, i; |
570 |
|
|
|
571 |
|
|
base_mach = copysingl(mach, lb - 1); |
572 |
|
|
|
573 |
|
|
if (ub == INFINITE_REPEAT) { |
574 |
|
|
copy = dupmachine(mach); |
575 |
|
|
mach = link_machines(mach, |
576 |
|
|
link_machines(base_mach, |
577 |
|
|
mkclos(copy))); |
578 |
|
|
} else { |
579 |
|
|
tail = mkstate(SYM_EPSILON); |
580 |
|
|
|
581 |
|
|
for (i = lb; i < ub; ++i) { |
582 |
|
|
copy = dupmachine(mach); |
583 |
|
|
tail = mkopt(link_machines(copy, tail)); |
584 |
|
|
} |
585 |
|
|
|
586 |
|
|
mach = |
587 |
|
|
link_machines(mach, |
588 |
|
|
link_machines(base_mach, tail)); |
589 |
|
|
} |
590 |
|
|
|
591 |
|
|
return mach; |
592 |
|
|
} |
593 |
|
|
|
594 |
|
|
|
595 |
|
|
/* mkstate - create a state with a transition on a given symbol |
596 |
|
|
* |
597 |
|
|
* synopsis |
598 |
|
|
* |
599 |
|
|
* state = mkstate( sym ); |
600 |
|
|
* |
601 |
|
|
* state - a new state matching sym |
602 |
|
|
* sym - the symbol the new state is to have an out-transition on |
603 |
|
|
* |
604 |
|
|
* note that this routine makes new states in ascending order through the |
605 |
|
|
* state array (and increments LASTNFA accordingly). The routine DUPMACHINE |
606 |
|
|
* relies on machines being made in ascending order and that they are |
607 |
|
|
* CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge |
608 |
|
|
* that it admittedly is) |
609 |
|
|
*/ |
610 |
|
|
|
611 |
|
|
int |
612 |
|
|
mkstate(sym) |
613 |
|
|
int sym; |
614 |
|
|
{ |
615 |
✓✓ |
30902 |
if (++lastnfa >= current_mns) { |
616 |
✗✓ |
10 |
if ((current_mns += MNS_INCREMENT) >= maximum_mns) |
617 |
|
|
lerrif(_ |
618 |
|
|
("input rules are too complicated (>= %d NFA states)"), |
619 |
|
|
current_mns); |
620 |
|
|
|
621 |
|
10 |
++num_reallocs; |
622 |
|
|
|
623 |
|
10 |
firstst = reallocate_integer_array(firstst, current_mns); |
624 |
|
10 |
lastst = reallocate_integer_array(lastst, current_mns); |
625 |
|
10 |
finalst = reallocate_integer_array(finalst, current_mns); |
626 |
|
10 |
transchar = |
627 |
|
10 |
reallocate_integer_array(transchar, current_mns); |
628 |
|
10 |
trans1 = reallocate_integer_array(trans1, current_mns); |
629 |
|
10 |
trans2 = reallocate_integer_array(trans2, current_mns); |
630 |
|
10 |
accptnum = |
631 |
|
10 |
reallocate_integer_array(accptnum, current_mns); |
632 |
|
10 |
assoc_rule = |
633 |
|
10 |
reallocate_integer_array(assoc_rule, current_mns); |
634 |
|
10 |
state_type = |
635 |
|
10 |
reallocate_integer_array(state_type, current_mns); |
636 |
|
10 |
} |
637 |
|
15451 |
firstst[lastnfa] = lastnfa; |
638 |
|
15451 |
finalst[lastnfa] = lastnfa; |
639 |
|
15451 |
lastst[lastnfa] = lastnfa; |
640 |
|
15451 |
transchar[lastnfa] = sym; |
641 |
|
15451 |
trans1[lastnfa] = NO_TRANSITION; |
642 |
|
15451 |
trans2[lastnfa] = NO_TRANSITION; |
643 |
|
15451 |
accptnum[lastnfa] = NIL; |
644 |
|
15451 |
assoc_rule[lastnfa] = num_rules; |
645 |
|
15451 |
state_type[lastnfa] = current_state_type; |
646 |
|
|
|
647 |
|
|
/* |
648 |
|
|
* Fix up equivalence classes base on this transition. Note that any |
649 |
|
|
* character which has its own transition gets its own equivalence |
650 |
|
|
* class. Thus only characters which are only in character classes |
651 |
|
|
* have a chance at being in the same equivalence class. E.g. "a|b" |
652 |
|
|
* puts 'a' and 'b' into two different equivalence classes. "[ab]" |
653 |
|
|
* puts them in the same equivalence class (barring other differences |
654 |
|
|
* elsewhere in the input). |
655 |
|
|
*/ |
656 |
|
|
|
657 |
✓✓ |
15451 |
if (sym < 0) { |
658 |
|
|
/* |
659 |
|
|
* We don't have to update the equivalence classes since that |
660 |
|
|
* was already done when the ccl was created for the first |
661 |
|
|
* time. |
662 |
|
|
*/ |
663 |
✓✓ |
12904 |
} else if (sym == SYM_EPSILON) |
664 |
|
7806 |
++numeps; |
665 |
|
|
|
666 |
|
|
else { |
667 |
|
5098 |
check_char(sym); |
668 |
|
|
|
669 |
✓✓ |
5098 |
if (useecs) |
670 |
|
|
/* Map NUL's to csize. */ |
671 |
|
4966 |
mkechar(sym ? sym : csize, nextecm, ecgroup); |
672 |
|
|
} |
673 |
|
|
|
674 |
|
15451 |
return lastnfa; |
675 |
|
|
} |
676 |
|
|
|
677 |
|
|
|
678 |
|
|
/* mkxtion - make a transition from one state to another |
679 |
|
|
* |
680 |
|
|
* synopsis |
681 |
|
|
* |
682 |
|
|
* mkxtion( statefrom, stateto ); |
683 |
|
|
* |
684 |
|
|
* statefrom - the state from which the transition is to be made |
685 |
|
|
* stateto - the state to which the transition is to be made |
686 |
|
|
*/ |
687 |
|
|
|
688 |
|
|
void |
689 |
|
|
mkxtion(statefrom, stateto) |
690 |
|
|
int statefrom, stateto; |
691 |
|
|
{ |
692 |
✓✓ |
37630 |
if (trans1[statefrom] == NO_TRANSITION) |
693 |
|
14722 |
trans1[statefrom] = stateto; |
694 |
|
|
|
695 |
✓✗✗✓
|
8186 |
else if ((transchar[statefrom] != SYM_EPSILON) || |
696 |
|
4093 |
(trans2[statefrom] != NO_TRANSITION)) |
697 |
|
|
flexfatal(_("found too many transitions in mkxtion()")); |
698 |
|
|
|
699 |
|
|
else { /* second out-transition for an epsilon state */ |
700 |
|
4093 |
++eps2; |
701 |
|
4093 |
trans2[statefrom] = stateto; |
702 |
|
|
} |
703 |
|
18815 |
} |
704 |
|
|
|
705 |
|
|
/* new_rule - initialize for a new rule */ |
706 |
|
|
|
707 |
|
|
void |
708 |
|
|
new_rule() |
709 |
|
|
{ |
710 |
✓✓ |
1148 |
if (++num_rules >= current_max_rules) { |
711 |
|
2 |
++num_reallocs; |
712 |
|
2 |
current_max_rules += MAX_RULES_INCREMENT; |
713 |
|
2 |
rule_type = reallocate_integer_array(rule_type, |
714 |
|
|
current_max_rules); |
715 |
|
2 |
rule_linenum = reallocate_integer_array(rule_linenum, |
716 |
|
|
current_max_rules); |
717 |
|
2 |
rule_useful = reallocate_integer_array(rule_useful, |
718 |
|
|
current_max_rules); |
719 |
|
2 |
rule_has_nl = reallocate_bool_array(rule_has_nl, |
720 |
|
|
current_max_rules); |
721 |
|
2 |
} |
722 |
✗✓ |
574 |
if (num_rules > MAX_RULE) |
723 |
|
|
lerrif(_("too many rules (> %d)!"), MAX_RULE); |
724 |
|
|
|
725 |
|
574 |
rule_linenum[num_rules] = linenum; |
726 |
|
574 |
rule_useful[num_rules] = false; |
727 |
|
574 |
rule_has_nl[num_rules] = false; |
728 |
|
574 |
} |