1 |
|
|
/* $OpenBSD: tblcmp.c,v 1.10 2015/11/19 23:34:56 mmcc Exp $ */ |
2 |
|
|
|
3 |
|
|
/* tblcmp - table compression routines */ |
4 |
|
|
|
5 |
|
|
/* Copyright (c) 1990 The Regents of the University of California. */ |
6 |
|
|
/* All rights reserved. */ |
7 |
|
|
|
8 |
|
|
/* This code is derived from software contributed to Berkeley by */ |
9 |
|
|
/* Vern Paxson. */ |
10 |
|
|
|
11 |
|
|
/* The United States Government has rights in this work pursuant */ |
12 |
|
|
/* to contract no. DE-AC03-76SF00098 between the United States */ |
13 |
|
|
/* Department of Energy and the University of California. */ |
14 |
|
|
|
15 |
|
|
/* This file is part of flex. */ |
16 |
|
|
|
17 |
|
|
/* Redistribution and use in source and binary forms, with or without */ |
18 |
|
|
/* modification, are permitted provided that the following conditions */ |
19 |
|
|
/* are met: */ |
20 |
|
|
|
21 |
|
|
/* 1. Redistributions of source code must retain the above copyright */ |
22 |
|
|
/* notice, this list of conditions and the following disclaimer. */ |
23 |
|
|
/* 2. Redistributions in binary form must reproduce the above copyright */ |
24 |
|
|
/* notice, this list of conditions and the following disclaimer in the */ |
25 |
|
|
/* documentation and/or other materials provided with the distribution. */ |
26 |
|
|
|
27 |
|
|
/* Neither the name of the University nor the names of its contributors */ |
28 |
|
|
/* may be used to endorse or promote products derived from this software */ |
29 |
|
|
/* without specific prior written permission. */ |
30 |
|
|
|
31 |
|
|
/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
32 |
|
|
/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
33 |
|
|
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
34 |
|
|
/* PURPOSE. */ |
35 |
|
|
|
36 |
|
|
#include "flexdef.h" |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
/* declarations for functions that have forward references */ |
40 |
|
|
|
41 |
|
|
void mkentry PROTO((int *, int, int, int, int)); |
42 |
|
|
void mkprot PROTO((int[], int, int)); |
43 |
|
|
void mktemplate PROTO((int[], int, int)); |
44 |
|
|
void mv2front PROTO((int)); |
45 |
|
|
int tbldiff PROTO((int[], int, int[])); |
46 |
|
|
|
47 |
|
|
|
48 |
|
|
/* bldtbl - build table entries for dfa state |
49 |
|
|
* |
50 |
|
|
* synopsis |
51 |
|
|
* int state[numecs], statenum, totaltrans, comstate, comfreq; |
52 |
|
|
* bldtbl( state, statenum, totaltrans, comstate, comfreq ); |
53 |
|
|
* |
54 |
|
|
* State is the statenum'th dfa state. It is indexed by equivalence class and |
55 |
|
|
* gives the number of the state to enter for a given equivalence class. |
56 |
|
|
* totaltrans is the total number of transitions out of the state. Comstate |
57 |
|
|
* is that state which is the destination of the most transitions out of State. |
58 |
|
|
* Comfreq is how many transitions there are out of State to Comstate. |
59 |
|
|
* |
60 |
|
|
* A note on terminology: |
61 |
|
|
* "protos" are transition tables which have a high probability of |
62 |
|
|
* either being redundant (a state processed later will have an identical |
63 |
|
|
* transition table) or nearly redundant (a state processed later will have |
64 |
|
|
* many of the same out-transitions). A "most recently used" queue of |
65 |
|
|
* protos is kept around with the hope that most states will find a proto |
66 |
|
|
* which is similar enough to be usable, and therefore compacting the |
67 |
|
|
* output tables. |
68 |
|
|
* "templates" are a special type of proto. If a transition table is |
69 |
|
|
* homogeneous or nearly homogeneous (all transitions go to the same |
70 |
|
|
* destination) then the odds are good that future states will also go |
71 |
|
|
* to the same destination state on basically the same character set. |
72 |
|
|
* These homogeneous states are so common when dealing with large rule |
73 |
|
|
* sets that they merit special attention. If the transition table were |
74 |
|
|
* simply made into a proto, then (typically) each subsequent, similar |
75 |
|
|
* state will differ from the proto for two out-transitions. One of these |
76 |
|
|
* out-transitions will be that character on which the proto does not go |
77 |
|
|
* to the common destination, and one will be that character on which the |
78 |
|
|
* state does not go to the common destination. Templates, on the other |
79 |
|
|
* hand, go to the common state on EVERY transition character, and therefore |
80 |
|
|
* cost only one difference. |
81 |
|
|
*/ |
82 |
|
|
|
83 |
|
|
void |
84 |
|
|
bldtbl(state, statenum, totaltrans, comstate, comfreq) |
85 |
|
|
int state[], statenum, totaltrans, comstate, comfreq; |
86 |
|
|
{ |
87 |
|
7582 |
int extptr, extrct[2][CSIZE + 1]; |
88 |
|
|
int mindiff, minprot, i, d; |
89 |
|
|
|
90 |
|
|
/* |
91 |
|
|
* If extptr is 0 then the first array of extrct holds the result of |
92 |
|
|
* the "best difference" to date, which is those transitions which |
93 |
|
|
* occur in "state" but not in the proto which, to date, has the |
94 |
|
|
* fewest differences between itself and "state". If extptr is 1 |
95 |
|
|
* then the second array of extrct hold the best difference. The two |
96 |
|
|
* arrays are toggled between so that the best difference to date can |
97 |
|
|
* be kept around and also a difference just created by checking |
98 |
|
|
* against a candidate "best" proto. |
99 |
|
|
*/ |
100 |
|
|
|
101 |
|
|
extptr = 0; |
102 |
|
|
|
103 |
|
|
/* |
104 |
|
|
* If the state has too few out-transitions, don't bother trying to |
105 |
|
|
* compact its tables. |
106 |
|
|
*/ |
107 |
|
|
|
108 |
✓✓ |
3791 |
if ((totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE)) |
109 |
|
540 |
mkentry(state, numecs, statenum, JAMSTATE, totaltrans); |
110 |
|
|
|
111 |
|
|
else { |
112 |
|
|
/* |
113 |
|
|
* "checkcom" is true if we should only check "state" against |
114 |
|
|
* protos which have the same "comstate" value. |
115 |
|
|
*/ |
116 |
|
|
int checkcom = |
117 |
|
|
|
118 |
|
3251 |
comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; |
119 |
|
|
|
120 |
|
3251 |
minprot = firstprot; |
121 |
|
|
mindiff = totaltrans; |
122 |
|
|
|
123 |
✓✓ |
3251 |
if (checkcom) { |
124 |
|
|
/* Find first proto which has the same "comstate". */ |
125 |
✓✓ |
79534 |
for (i = firstprot; i != NIL; i = protnext[i]) |
126 |
✓✓ |
38818 |
if (protcomst[i] == comstate) { |
127 |
|
|
minprot = i; |
128 |
|
1932 |
mindiff = tbldiff(state, minprot, |
129 |
|
1932 |
extrct[extptr]); |
130 |
|
1932 |
break; |
131 |
|
|
} |
132 |
|
|
} else { |
133 |
|
|
/* |
134 |
|
|
* Since we've decided that the most common |
135 |
|
|
* destination out of "state" does not occur with a |
136 |
|
|
* high enough frequency, we set the "comstate" to |
137 |
|
|
* zero, assuring that if this state is entered into |
138 |
|
|
* the proto list, it will not be considered a |
139 |
|
|
* template. |
140 |
|
|
*/ |
141 |
|
|
comstate = 0; |
142 |
|
|
|
143 |
✓✓ |
370 |
if (firstprot != NIL) { |
144 |
|
|
minprot = firstprot; |
145 |
|
364 |
mindiff = tbldiff(state, minprot, |
146 |
|
364 |
extrct[extptr]); |
147 |
|
364 |
} |
148 |
|
|
} |
149 |
|
|
|
150 |
|
|
/* |
151 |
|
|
* We now have the first interesting proto in "minprot". If |
152 |
|
|
* it matches within the tolerances set for the first proto, |
153 |
|
|
* we don't want to bother scanning the rest of the proto |
154 |
|
|
* list to see if we have any other reasonable matches. |
155 |
|
|
*/ |
156 |
|
|
|
157 |
✓✓ |
6502 |
if (mindiff * 100 > |
158 |
|
3251 |
totaltrans * FIRST_MATCH_DIFF_PERCENTAGE) { |
159 |
|
|
/* |
160 |
|
|
* Not a good enough match. Scan the rest of the |
161 |
|
|
* protos. |
162 |
|
|
*/ |
163 |
✓✓ |
110328 |
for (i = minprot; i != NIL; i = protnext[i]) { |
164 |
|
53625 |
d = tbldiff(state, i, extrct[1 - extptr]); |
165 |
✓✓ |
53625 |
if (d < mindiff) { |
166 |
|
|
extptr = 1 - extptr; |
167 |
|
|
mindiff = d; |
168 |
|
|
minprot = i; |
169 |
|
479 |
} |
170 |
|
|
} |
171 |
|
|
} |
172 |
|
|
/* |
173 |
|
|
* Check if the proto we've decided on as our best bet is |
174 |
|
|
* close enough to the state we want to match to be usable. |
175 |
|
|
*/ |
176 |
|
|
|
177 |
✓✓ |
3251 |
if (mindiff * 100 > |
178 |
|
|
totaltrans * ACCEPTABLE_DIFF_PERCENTAGE) { |
179 |
|
|
/* |
180 |
|
|
* No good. If the state is homogeneous enough, we |
181 |
|
|
* make a template out of it. Otherwise, we make a |
182 |
|
|
* proto. |
183 |
|
|
*/ |
184 |
|
|
|
185 |
✓✓ |
1223 |
if (comfreq * 100 >= |
186 |
|
1223 |
totaltrans * TEMPLATE_SAME_PERCENTAGE) |
187 |
|
945 |
mktemplate(state, statenum, |
188 |
|
|
comstate); |
189 |
|
|
|
190 |
|
|
else { |
191 |
|
278 |
mkprot(state, statenum, comstate); |
192 |
|
278 |
mkentry(state, numecs, statenum, |
193 |
|
|
JAMSTATE, totaltrans); |
194 |
|
|
} |
195 |
|
|
} else { /* use the proto */ |
196 |
|
4056 |
mkentry(extrct[extptr], numecs, statenum, |
197 |
|
2028 |
prottbl[minprot], mindiff); |
198 |
|
|
|
199 |
|
|
/* |
200 |
|
|
* If this state was sufficiently different from the |
201 |
|
|
* proto we built it from, make it, too, a proto. |
202 |
|
|
*/ |
203 |
|
|
|
204 |
✓✓ |
2028 |
if (mindiff * 100 >= |
205 |
|
2028 |
totaltrans * NEW_PROTO_DIFF_PERCENTAGE) |
206 |
|
128 |
mkprot(state, statenum, comstate); |
207 |
|
|
|
208 |
|
|
/* |
209 |
|
|
* Since mkprot added a new proto to the proto queue, |
210 |
|
|
* it's possible that "minprot" is no longer on the |
211 |
|
|
* proto queue (if it happened to have been the last |
212 |
|
|
* entry, it would have been bumped off). If it's |
213 |
|
|
* not there, then the new proto took its physical |
214 |
|
|
* place (though logically the new proto is at the |
215 |
|
|
* beginning of the queue), so in that case the |
216 |
|
|
* following call will do nothing. |
217 |
|
|
*/ |
218 |
|
|
|
219 |
|
2028 |
mv2front(minprot); |
220 |
|
|
} |
221 |
|
|
} |
222 |
|
3791 |
} |
223 |
|
|
|
224 |
|
|
|
225 |
|
|
/* cmptmps - compress template table entries |
226 |
|
|
* |
227 |
|
|
* Template tables are compressed by using the 'template equivalence |
228 |
|
|
* classes', which are collections of transition character equivalence |
229 |
|
|
* classes which always appear together in templates - really meta-equivalence |
230 |
|
|
* classes. |
231 |
|
|
*/ |
232 |
|
|
|
233 |
|
|
void |
234 |
|
|
cmptmps() |
235 |
|
|
{ |
236 |
|
20 |
int tmpstorage[CSIZE + 1]; |
237 |
|
10 |
int *tmp = tmpstorage, i, j; |
238 |
|
|
int totaltrans, trans; |
239 |
|
|
|
240 |
|
10 |
peakpairs = numtemps * numecs + tblend; |
241 |
|
|
|
242 |
✓✓ |
10 |
if (usemecs) { |
243 |
|
|
/* |
244 |
|
|
* Create equivalence classes based on data gathered on |
245 |
|
|
* template transitions. |
246 |
|
|
*/ |
247 |
|
8 |
nummecs = cre8ecs(tecfwd, tecbck, numecs); |
248 |
|
8 |
} else |
249 |
|
|
nummecs = numecs; |
250 |
|
|
|
251 |
✗✓ |
20 |
while (lastdfa + numtemps + 1 >= current_max_dfas) |
252 |
|
|
increase_max_dfas(); |
253 |
|
|
|
254 |
|
|
/* Loop through each template. */ |
255 |
|
|
|
256 |
✓✓ |
1910 |
for (i = 1; i <= numtemps; ++i) { |
257 |
|
|
/* Number of non-jam transitions out of this template. */ |
258 |
|
|
totaltrans = 0; |
259 |
|
|
|
260 |
✓✓ |
101274 |
for (j = 1; j <= numecs; ++j) { |
261 |
|
49692 |
trans = tnxt[numecs * i + j]; |
262 |
|
|
|
263 |
✓✓ |
49692 |
if (usemecs) { |
264 |
|
|
/* |
265 |
|
|
* The absolute value of tecbck is the |
266 |
|
|
* meta-equivalence class of a given |
267 |
|
|
* equivalence class, as set up by cre8ecs(). |
268 |
|
|
*/ |
269 |
✓✓ |
47644 |
if (tecbck[j] > 0) { |
270 |
|
7385 |
tmp[tecbck[j]] = trans; |
271 |
|
|
|
272 |
✓✓ |
7385 |
if (trans > 0) |
273 |
|
2543 |
++totaltrans; |
274 |
|
|
} |
275 |
|
|
} else { |
276 |
|
2048 |
tmp[j] = trans; |
277 |
|
|
|
278 |
✓✓ |
2048 |
if (trans > 0) |
279 |
|
1652 |
++totaltrans; |
280 |
|
|
} |
281 |
|
|
} |
282 |
|
|
|
283 |
|
|
/* |
284 |
|
|
* It is assumed (in a rather subtle way) in the skeleton |
285 |
|
|
* that if we're using meta-equivalence classes, the def[] |
286 |
|
|
* entry for all templates is the jam template, i.e., |
287 |
|
|
* templates never default to other non-jam table entries |
288 |
|
|
* (e.g., another template) |
289 |
|
|
*/ |
290 |
|
|
|
291 |
|
|
/* Leave room for the jam-state after the last real state. */ |
292 |
|
945 |
mkentry(tmp, nummecs, lastdfa + i + 1, JAMSTATE, |
293 |
|
|
totaltrans); |
294 |
|
|
} |
295 |
|
10 |
} |
296 |
|
|
|
297 |
|
|
|
298 |
|
|
|
299 |
|
|
/* expand_nxt_chk - expand the next check arrays */ |
300 |
|
|
|
301 |
|
|
void |
302 |
|
|
expand_nxt_chk() |
303 |
|
|
{ |
304 |
|
14 |
int old_max = current_max_xpairs; |
305 |
|
|
|
306 |
|
7 |
current_max_xpairs += MAX_XPAIRS_INCREMENT; |
307 |
|
|
|
308 |
|
7 |
++num_reallocs; |
309 |
|
|
|
310 |
|
7 |
nxt = reallocate_integer_array(nxt, current_max_xpairs); |
311 |
|
7 |
chk = reallocate_integer_array(chk, current_max_xpairs); |
312 |
|
|
|
313 |
|
7 |
memset((chk + old_max), 0, MAX_XPAIRS_INCREMENT * sizeof(int)); |
314 |
|
7 |
} |
315 |
|
|
|
316 |
|
|
|
317 |
|
|
/* find_table_space - finds a space in the table for a state to be placed |
318 |
|
|
* |
319 |
|
|
* synopsis |
320 |
|
|
* int *state, numtrans, block_start; |
321 |
|
|
* int find_table_space(); |
322 |
|
|
* |
323 |
|
|
* block_start = find_table_space( state, numtrans ); |
324 |
|
|
* |
325 |
|
|
* State is the state to be added to the full speed transition table. |
326 |
|
|
* Numtrans is the number of out-transitions for the state. |
327 |
|
|
* |
328 |
|
|
* find_table_space() returns the position of the start of the first block (in |
329 |
|
|
* chk) able to accommodate the state |
330 |
|
|
* |
331 |
|
|
* In determining if a state will or will not fit, find_table_space() must take |
332 |
|
|
* into account the fact that an end-of-buffer state will be added at [0], |
333 |
|
|
* and an action number will be added in [-1]. |
334 |
|
|
*/ |
335 |
|
|
|
336 |
|
|
int |
337 |
|
|
find_table_space(state, numtrans) |
338 |
|
|
int *state, numtrans; |
339 |
|
|
{ |
340 |
|
|
/* |
341 |
|
|
* Firstfree is the position of the first possible occurrence of two |
342 |
|
|
* consecutive unused records in the chk and nxt arrays. |
343 |
|
|
*/ |
344 |
|
|
int i; |
345 |
|
|
int *state_ptr, *chk_ptr; |
346 |
|
|
int *ptr_to_last_entry_in_state; |
347 |
|
|
|
348 |
|
|
/* |
349 |
|
|
* If there are too many out-transitions, put the state at the end of |
350 |
|
|
* nxt and chk. |
351 |
|
|
*/ |
352 |
|
|
if (numtrans > MAX_XTIONS_FULL_INTERIOR_FIT) { |
353 |
|
|
/* |
354 |
|
|
* If table is empty, return the first available spot in |
355 |
|
|
* chk/nxt, which should be 1. |
356 |
|
|
*/ |
357 |
|
|
if (tblend < 2) |
358 |
|
|
return 1; |
359 |
|
|
|
360 |
|
|
/* |
361 |
|
|
* Start searching for table space near the end of chk/nxt |
362 |
|
|
* arrays. |
363 |
|
|
*/ |
364 |
|
|
i = tblend - numecs; |
365 |
|
|
} else |
366 |
|
|
/* |
367 |
|
|
* Start searching for table space from the beginning |
368 |
|
|
* (skipping only the elements which will definitely not hold |
369 |
|
|
* the new state). |
370 |
|
|
*/ |
371 |
|
|
i = firstfree; |
372 |
|
|
|
373 |
|
|
while (1) { /* loops until a space is found */ |
374 |
|
|
while (i + numecs >= current_max_xpairs) |
375 |
|
|
expand_nxt_chk(); |
376 |
|
|
|
377 |
|
|
/* |
378 |
|
|
* Loops until space for end-of-buffer and action number are |
379 |
|
|
* found. |
380 |
|
|
*/ |
381 |
|
|
while (1) { |
382 |
|
|
/* Check for action number space. */ |
383 |
|
|
if (chk[i - 1] == 0) { |
384 |
|
|
/* Check for end-of-buffer space. */ |
385 |
|
|
if (chk[i] == 0) |
386 |
|
|
break; |
387 |
|
|
|
388 |
|
|
else |
389 |
|
|
/* |
390 |
|
|
* Since i != 0, there is no use |
391 |
|
|
* checking to see if (++i) - 1 == 0, |
392 |
|
|
* because that's the same as i == 0, |
393 |
|
|
* so we skip a space. |
394 |
|
|
*/ |
395 |
|
|
i += 2; |
396 |
|
|
} else |
397 |
|
|
++i; |
398 |
|
|
|
399 |
|
|
while (i + numecs >= current_max_xpairs) |
400 |
|
|
expand_nxt_chk(); |
401 |
|
|
} |
402 |
|
|
|
403 |
|
|
/* |
404 |
|
|
* If we started search from the beginning, store the new |
405 |
|
|
* firstfree for the next call of find_table_space(). |
406 |
|
|
*/ |
407 |
|
|
if (numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT) |
408 |
|
|
firstfree = i + 1; |
409 |
|
|
|
410 |
|
|
/* |
411 |
|
|
* Check to see if all elements in chk (and therefore nxt) |
412 |
|
|
* that are needed for the new state have not yet been taken. |
413 |
|
|
*/ |
414 |
|
|
|
415 |
|
|
state_ptr = &state[1]; |
416 |
|
|
ptr_to_last_entry_in_state = &chk[i + numecs + 1]; |
417 |
|
|
|
418 |
|
|
for (chk_ptr = &chk[i + 1]; |
419 |
|
|
chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr) |
420 |
|
|
if (*(state_ptr++) != 0 && *chk_ptr != 0) |
421 |
|
|
break; |
422 |
|
|
|
423 |
|
|
if (chk_ptr == ptr_to_last_entry_in_state) |
424 |
|
|
return i; |
425 |
|
|
|
426 |
|
|
else |
427 |
|
|
++i; |
428 |
|
|
} |
429 |
|
|
} |
430 |
|
|
|
431 |
|
|
|
432 |
|
|
/* inittbl - initialize transition tables |
433 |
|
|
* |
434 |
|
|
* Initializes "firstfree" to be one beyond the end of the table. Initializes |
435 |
|
|
* all "chk" entries to be zero. |
436 |
|
|
*/ |
437 |
|
|
void |
438 |
|
|
inittbl() |
439 |
|
|
{ |
440 |
|
|
int i; |
441 |
|
|
|
442 |
|
20 |
memset(chk, 0, current_max_xpairs * sizeof(int)); |
443 |
|
|
|
444 |
|
10 |
tblend = 0; |
445 |
|
10 |
firstfree = tblend + 1; |
446 |
|
10 |
numtemps = 0; |
447 |
|
|
|
448 |
✓✓ |
10 |
if (usemecs) { |
449 |
|
|
/* |
450 |
|
|
* Set up doubly-linked meta-equivalence classes; these are |
451 |
|
|
* sets of equivalence classes which all have identical |
452 |
|
|
* transitions out of TEMPLATES. |
453 |
|
|
*/ |
454 |
|
|
|
455 |
|
8 |
tecbck[1] = NIL; |
456 |
|
|
|
457 |
✓✓ |
620 |
for (i = 2; i <= numecs; ++i) { |
458 |
|
302 |
tecbck[i] = i - 1; |
459 |
|
302 |
tecfwd[i - 1] = i; |
460 |
|
|
} |
461 |
|
|
|
462 |
|
8 |
tecfwd[numecs] = NIL; |
463 |
|
8 |
} |
464 |
|
10 |
} |
465 |
|
|
|
466 |
|
|
|
467 |
|
|
/* mkdeftbl - make the default, "jam" table entries */ |
468 |
|
|
|
469 |
|
|
void |
470 |
|
|
mkdeftbl() |
471 |
|
|
{ |
472 |
|
|
int i; |
473 |
|
|
|
474 |
|
20 |
jamstate = lastdfa + 1; |
475 |
|
|
|
476 |
|
10 |
++tblend; /* room for transition on end-of-buffer |
477 |
|
|
* character */ |
478 |
|
|
|
479 |
✗✓ |
20 |
while (tblend + numecs >= current_max_xpairs) |
480 |
|
|
expand_nxt_chk(); |
481 |
|
|
|
482 |
|
|
/* Add in default end-of-buffer transition. */ |
483 |
|
10 |
nxt[tblend] = end_of_buffer_state; |
484 |
|
10 |
chk[tblend] = jamstate; |
485 |
|
|
|
486 |
✓✓ |
1664 |
for (i = 1; i <= numecs; ++i) { |
487 |
|
822 |
nxt[tblend + i] = 0; |
488 |
|
822 |
chk[tblend + i] = jamstate; |
489 |
|
|
} |
490 |
|
|
|
491 |
|
10 |
jambase = tblend; |
492 |
|
|
|
493 |
|
10 |
base[jamstate] = jambase; |
494 |
|
10 |
def[jamstate] = 0; |
495 |
|
|
|
496 |
|
10 |
tblend += numecs; |
497 |
|
10 |
++numtemps; |
498 |
|
10 |
} |
499 |
|
|
|
500 |
|
|
|
501 |
|
|
/* mkentry - create base/def and nxt/chk entries for transition array |
502 |
|
|
* |
503 |
|
|
* synopsis |
504 |
|
|
* int state[numchars + 1], numchars, statenum, deflink, totaltrans; |
505 |
|
|
* mkentry( state, numchars, statenum, deflink, totaltrans ); |
506 |
|
|
* |
507 |
|
|
* "state" is a transition array "numchars" characters in size, "statenum" |
508 |
|
|
* is the offset to be used into the base/def tables, and "deflink" is the |
509 |
|
|
* entry to put in the "def" table entry. If "deflink" is equal to |
510 |
|
|
* "JAMSTATE", then no attempt will be made to fit zero entries of "state" |
511 |
|
|
* (i.e., jam entries) into the table. It is assumed that by linking to |
512 |
|
|
* "JAMSTATE" they will be taken care of. In any case, entries in "state" |
513 |
|
|
* marking transitions to "SAME_TRANS" are treated as though they will be |
514 |
|
|
* taken care of by whereever "deflink" points. "totaltrans" is the total |
515 |
|
|
* number of transitions out of the state. If it is below a certain threshold, |
516 |
|
|
* the tables are searched for an interior spot that will accommodate the |
517 |
|
|
* state array. |
518 |
|
|
*/ |
519 |
|
|
|
520 |
|
|
void |
521 |
|
|
mkentry(state, numchars, statenum, deflink, totaltrans) |
522 |
|
|
int *state; |
523 |
|
|
int numchars, statenum, deflink, totaltrans; |
524 |
|
|
{ |
525 |
|
|
int minec, maxec, i, baseaddr; |
526 |
|
|
int tblbase, tbllast; |
527 |
|
|
|
528 |
✓✓ |
9472 |
if (totaltrans == 0) { /* there are no out-transitions */ |
529 |
|
693 |
if (deflink == JAMSTATE) |
530 |
|
|
base[statenum] = JAMSTATE; |
531 |
|
|
else |
532 |
|
|
base[statenum] = 0; |
533 |
|
|
|
534 |
|
693 |
def[statenum] = deflink; |
535 |
|
693 |
return; |
536 |
|
|
} |
537 |
✓✗ |
131220 |
for (minec = 1; minec <= numchars; ++minec) { |
538 |
✓✓ |
65610 |
if (state[minec] != SAME_TRANS) |
539 |
✓✓ |
15163 |
if (state[minec] != 0 || deflink != JAMSTATE) |
540 |
|
|
break; |
541 |
|
|
} |
542 |
|
|
|
543 |
✓✓ |
4043 |
if (totaltrans == 1) { |
544 |
|
|
/* |
545 |
|
|
* There's only one out-transition. Save it for later to |
546 |
|
|
* fill in holes in the tables. |
547 |
|
|
*/ |
548 |
|
1233 |
stack1(statenum, minec, state[minec], deflink); |
549 |
|
1233 |
return; |
550 |
|
|
} |
551 |
✓✗ |
84534 |
for (maxec = numchars; maxec > 0; --maxec) { |
552 |
✓✓ |
42267 |
if (state[maxec] != SAME_TRANS) |
553 |
✓✓ |
18643 |
if (state[maxec] != 0 || deflink != JAMSTATE) |
554 |
|
|
break; |
555 |
|
|
} |
556 |
|
|
|
557 |
|
|
/* |
558 |
|
|
* Whether we try to fit the state table in the middle of the table |
559 |
|
|
* entries we have already generated, or if we just take the state |
560 |
|
|
* table at the end of the nxt/chk tables, we must make sure that we |
561 |
|
|
* have a valid base address (i.e., non-negative). Note that |
562 |
|
|
* negative base addresses dangerous at run-time (because indexing |
563 |
|
|
* the nxt array with one and a low-valued character will access |
564 |
|
|
* memory before the start of the array. |
565 |
|
|
*/ |
566 |
|
|
|
567 |
|
|
/* Find the first transition of state that we need to worry about. */ |
568 |
✓✓ |
2810 |
if (totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE) { |
569 |
|
|
/* Attempt to squeeze it into the middle of the tables. */ |
570 |
|
1554 |
baseaddr = firstfree; |
571 |
|
|
|
572 |
✓✓ |
4815 |
while (baseaddr < minec) { |
573 |
|
|
/* |
574 |
|
|
* Using baseaddr would result in a negative base |
575 |
|
|
* address below; find the next free slot. |
576 |
|
|
*/ |
577 |
✓✓ |
6330 |
for (++baseaddr; chk[baseaddr] != 0; ++baseaddr); |
578 |
|
|
} |
579 |
|
|
|
580 |
✗✓ |
1554 |
while (baseaddr + maxec - minec + 1 >= current_max_xpairs) |
581 |
|
|
expand_nxt_chk(); |
582 |
|
|
|
583 |
✓✓ |
6005184 |
for (i = minec; i <= maxec; ++i) |
584 |
✓✓✓✓
|
3600325 |
if (state[i] != SAME_TRANS && |
585 |
✓✓ |
603624 |
(state[i] != 0 || deflink != JAMSTATE) && |
586 |
|
599287 |
chk[baseaddr + i - minec] != 0) { /* baseaddr unsuitable - |
587 |
|
|
* find another */ |
588 |
✓✓ |
4154012 |
for (++baseaddr; |
589 |
✓✗ |
2077006 |
baseaddr < current_max_xpairs && |
590 |
|
3913164 |
chk[baseaddr] != 0; ++baseaddr); |
591 |
|
|
|
592 |
✓✓ |
481699 |
while (baseaddr + maxec - minec + 1 >= |
593 |
|
240849 |
current_max_xpairs) |
594 |
|
1 |
expand_nxt_chk(); |
595 |
|
|
|
596 |
|
|
/* |
597 |
|
|
* Reset the loop counter so we'll start all |
598 |
|
|
* over again next time it's incremented. |
599 |
|
|
*/ |
600 |
|
|
|
601 |
|
240848 |
i = minec - 1; |
602 |
|
240848 |
} |
603 |
|
|
} else { |
604 |
|
|
/* |
605 |
|
|
* Ensure that the base address we eventually generate is |
606 |
|
|
* non-negative. |
607 |
|
|
*/ |
608 |
|
1256 |
baseaddr = MAX(tblend + 1, minec); |
609 |
|
|
} |
610 |
|
|
|
611 |
|
2810 |
tblbase = baseaddr - minec; |
612 |
|
2810 |
tbllast = tblbase + maxec; |
613 |
|
|
|
614 |
✓✓ |
5632 |
while (tbllast + 1 >= current_max_xpairs) |
615 |
|
6 |
expand_nxt_chk(); |
616 |
|
|
|
617 |
|
2810 |
base[statenum] = tblbase; |
618 |
|
2810 |
def[statenum] = deflink; |
619 |
|
|
|
620 |
✓✓ |
113226 |
for (i = minec; i <= maxec; ++i) |
621 |
✓✓ |
53803 |
if (state[i] != SAME_TRANS) |
622 |
✓✓ |
19609 |
if (state[i] != 0 || deflink != JAMSTATE) { |
623 |
|
15163 |
nxt[tblbase + i] = state[i]; |
624 |
|
15163 |
chk[tblbase + i] = statenum; |
625 |
|
15163 |
} |
626 |
✓✓ |
2810 |
if (baseaddr == firstfree) |
627 |
|
|
/* Find next free slot in tables. */ |
628 |
✓✓ |
336 |
for (++firstfree; chk[firstfree] != 0; ++firstfree); |
629 |
|
|
|
630 |
|
2810 |
tblend = MAX(tblend, tbllast); |
631 |
|
7546 |
} |
632 |
|
|
|
633 |
|
|
|
634 |
|
|
/* mk1tbl - create table entries for a state (or state fragment) which |
635 |
|
|
* has only one out-transition |
636 |
|
|
*/ |
637 |
|
|
|
638 |
|
|
void |
639 |
|
|
mk1tbl(state, sym, onenxt, onedef) |
640 |
|
|
int state, sym, onenxt, onedef; |
641 |
|
|
{ |
642 |
✓✓ |
2486 |
if (firstfree < sym) |
643 |
|
10 |
firstfree = sym; |
644 |
|
|
|
645 |
✓✓ |
10474 |
while (chk[firstfree] != 0) |
646 |
✓✗ |
9231 |
if (++firstfree >= current_max_xpairs) |
647 |
|
|
expand_nxt_chk(); |
648 |
|
|
|
649 |
|
1243 |
base[state] = firstfree - sym; |
650 |
|
1243 |
def[state] = onedef; |
651 |
|
1243 |
chk[firstfree] = state; |
652 |
|
1243 |
nxt[firstfree] = onenxt; |
653 |
|
|
|
654 |
✓✓ |
1243 |
if (firstfree > tblend) { |
655 |
|
133 |
tblend = firstfree++; |
656 |
|
|
|
657 |
✗✓ |
133 |
if (firstfree >= current_max_xpairs) |
658 |
|
|
expand_nxt_chk(); |
659 |
|
|
} |
660 |
|
1243 |
} |
661 |
|
|
|
662 |
|
|
|
663 |
|
|
/* mkprot - create new proto entry */ |
664 |
|
|
|
665 |
|
|
void |
666 |
|
|
mkprot(state, statenum, comstate) |
667 |
|
|
int state[], statenum, comstate; |
668 |
|
|
{ |
669 |
|
|
int i, slot, tblbase; |
670 |
|
|
|
671 |
✓✓✓✓
|
2853 |
if (++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE) { |
672 |
|
|
/* |
673 |
|
|
* Gotta make room for the new proto by dropping last entry |
674 |
|
|
* in the queue. |
675 |
|
|
*/ |
676 |
|
1220 |
slot = lastprot; |
677 |
|
1220 |
lastprot = protprev[lastprot]; |
678 |
|
1220 |
protnext[lastprot] = NIL; |
679 |
|
1220 |
} else |
680 |
|
|
slot = numprots; |
681 |
|
|
|
682 |
|
1351 |
protnext[slot] = firstprot; |
683 |
|
|
|
684 |
✓✓ |
1351 |
if (firstprot != NIL) |
685 |
|
1341 |
protprev[firstprot] = slot; |
686 |
|
|
|
687 |
|
1351 |
firstprot = slot; |
688 |
|
1351 |
prottbl[slot] = statenum; |
689 |
|
1351 |
protcomst[slot] = comstate; |
690 |
|
|
|
691 |
|
|
/* Copy state into save area so it can be compared with rapidly. */ |
692 |
|
1351 |
tblbase = numecs * (slot - 1); |
693 |
|
|
|
694 |
✓✓ |
144196 |
for (i = 1; i <= numecs; ++i) |
695 |
|
70747 |
protsave[tblbase + i] = state[i]; |
696 |
|
1351 |
} |
697 |
|
|
|
698 |
|
|
|
699 |
|
|
/* mktemplate - create a template entry based on a state, and connect the state |
700 |
|
|
* to it |
701 |
|
|
*/ |
702 |
|
|
|
703 |
|
|
void |
704 |
|
|
mktemplate(state, statenum, comstate) |
705 |
|
|
int state[], statenum, comstate; |
706 |
|
|
{ |
707 |
|
1890 |
int i, numdiff, tmpbase, tmp[CSIZE + 1]; |
708 |
|
945 |
u_char transset[CSIZE + 1]; |
709 |
|
|
int tsptr; |
710 |
|
|
|
711 |
|
945 |
++numtemps; |
712 |
|
|
|
713 |
|
|
tsptr = 0; |
714 |
|
|
|
715 |
|
|
/* |
716 |
|
|
* Calculate where we will temporarily store the transition table of |
717 |
|
|
* the template in the tnxt[] array. The final transition table gets |
718 |
|
|
* created by cmptmps(). |
719 |
|
|
*/ |
720 |
|
|
|
721 |
|
945 |
tmpbase = numtemps * numecs; |
722 |
|
|
|
723 |
✓✓ |
945 |
if (tmpbase + numecs >= current_max_template_xpairs) { |
724 |
|
18 |
current_max_template_xpairs += |
725 |
|
|
MAX_TEMPLATE_XPAIRS_INCREMENT; |
726 |
|
|
|
727 |
|
18 |
++num_reallocs; |
728 |
|
|
|
729 |
|
18 |
tnxt = reallocate_integer_array(tnxt, |
730 |
|
|
current_max_template_xpairs); |
731 |
|
18 |
} |
732 |
✓✓ |
101274 |
for (i = 1; i <= numecs; ++i) |
733 |
✓✓ |
49692 |
if (state[i] == 0) |
734 |
|
33987 |
tnxt[tmpbase + i] = 0; |
735 |
|
|
else { |
736 |
|
15705 |
transset[tsptr++] = i; |
737 |
|
15705 |
tnxt[tmpbase + i] = comstate; |
738 |
|
|
} |
739 |
|
|
|
740 |
✓✓ |
945 |
if (usemecs) |
741 |
|
937 |
mkeccl(transset, tsptr, tecfwd, tecbck, numecs, 0); |
742 |
|
|
|
743 |
|
945 |
mkprot(tnxt + tmpbase, -numtemps, comstate); |
744 |
|
|
|
745 |
|
|
/* |
746 |
|
|
* We rely on the fact that mkprot adds things to the beginning of |
747 |
|
|
* the proto queue. |
748 |
|
|
*/ |
749 |
|
|
|
750 |
|
945 |
numdiff = tbldiff(state, firstprot, tmp); |
751 |
|
945 |
mkentry(tmp, numecs, statenum, -numtemps, numdiff); |
752 |
|
945 |
} |
753 |
|
|
|
754 |
|
|
|
755 |
|
|
/* mv2front - move proto queue element to front of queue */ |
756 |
|
|
|
757 |
|
|
void |
758 |
|
|
mv2front(qelm) |
759 |
|
|
int qelm; |
760 |
|
|
{ |
761 |
✓✓ |
4056 |
if (firstprot != qelm) { |
762 |
✓✓ |
302 |
if (qelm == lastprot) |
763 |
|
13 |
lastprot = protprev[lastprot]; |
764 |
|
|
|
765 |
|
302 |
protnext[protprev[qelm]] = protnext[qelm]; |
766 |
|
|
|
767 |
✓✓ |
302 |
if (protnext[qelm] != NIL) |
768 |
|
289 |
protprev[protnext[qelm]] = protprev[qelm]; |
769 |
|
|
|
770 |
|
302 |
protprev[qelm] = NIL; |
771 |
|
302 |
protnext[qelm] = firstprot; |
772 |
|
302 |
protprev[firstprot] = qelm; |
773 |
|
302 |
firstprot = qelm; |
774 |
|
302 |
} |
775 |
|
2028 |
} |
776 |
|
|
|
777 |
|
|
|
778 |
|
|
/* place_state - place a state into full speed transition table |
779 |
|
|
* |
780 |
|
|
* State is the statenum'th state. It is indexed by equivalence class and |
781 |
|
|
* gives the number of the state to enter for a given equivalence class. |
782 |
|
|
* Transnum is the number of out-transitions for the state. |
783 |
|
|
*/ |
784 |
|
|
|
785 |
|
|
void |
786 |
|
|
place_state(state, statenum, transnum) |
787 |
|
|
int *state, statenum, transnum; |
788 |
|
|
{ |
789 |
|
|
int i; |
790 |
|
|
int *state_ptr; |
791 |
|
|
int position = find_table_space(state, transnum); |
792 |
|
|
|
793 |
|
|
/* "base" is the table of start positions. */ |
794 |
|
|
base[statenum] = position; |
795 |
|
|
|
796 |
|
|
/* |
797 |
|
|
* Put in action number marker; this non-zero number makes sure that |
798 |
|
|
* find_table_space() knows that this position in chk/nxt is taken |
799 |
|
|
* and should not be used for another accepting number in another |
800 |
|
|
* state. |
801 |
|
|
*/ |
802 |
|
|
chk[position - 1] = 1; |
803 |
|
|
|
804 |
|
|
/* |
805 |
|
|
* Put in end-of-buffer marker; this is for the same purposes as |
806 |
|
|
* above. |
807 |
|
|
*/ |
808 |
|
|
chk[position] = 1; |
809 |
|
|
|
810 |
|
|
/* Place the state into chk and nxt. */ |
811 |
|
|
state_ptr = &state[1]; |
812 |
|
|
|
813 |
|
|
for (i = 1; i <= numecs; ++i, ++state_ptr) |
814 |
|
|
if (*state_ptr != 0) { |
815 |
|
|
chk[position + i] = i; |
816 |
|
|
nxt[position + i] = *state_ptr; |
817 |
|
|
} |
818 |
|
|
if (position + numecs > tblend) |
819 |
|
|
tblend = position + numecs; |
820 |
|
|
} |
821 |
|
|
|
822 |
|
|
|
823 |
|
|
/* stack1 - save states with only one out-transition to be processed later |
824 |
|
|
* |
825 |
|
|
* If there's room for another state on the "one-transition" stack, the |
826 |
|
|
* state is pushed onto it, to be processed later by mk1tbl. If there's |
827 |
|
|
* no room, we process the sucker right now. |
828 |
|
|
*/ |
829 |
|
|
|
830 |
|
|
void |
831 |
|
|
stack1(statenum, sym, nextstate, deflink) |
832 |
|
|
int statenum, sym, nextstate, deflink; |
833 |
|
|
{ |
834 |
✗✓ |
2486 |
if (onesp >= ONE_STACK_SIZE - 1) |
835 |
|
|
mk1tbl(statenum, sym, nextstate, deflink); |
836 |
|
|
|
837 |
|
|
else { |
838 |
|
1243 |
++onesp; |
839 |
|
1243 |
onestate[onesp] = statenum; |
840 |
|
1243 |
onesym[onesp] = sym; |
841 |
|
1243 |
onenext[onesp] = nextstate; |
842 |
|
1243 |
onedef[onesp] = deflink; |
843 |
|
|
} |
844 |
|
1243 |
} |
845 |
|
|
|
846 |
|
|
|
847 |
|
|
/* tbldiff - compute differences between two state tables |
848 |
|
|
* |
849 |
|
|
* "state" is the state array which is to be extracted from the pr'th |
850 |
|
|
* proto. "pr" is both the number of the proto we are extracting from |
851 |
|
|
* and an index into the save area where we can find the proto's complete |
852 |
|
|
* state table. Each entry in "state" which differs from the corresponding |
853 |
|
|
* entry of "pr" will appear in "ext". |
854 |
|
|
* |
855 |
|
|
* Entries which are the same in both "state" and "pr" will be marked |
856 |
|
|
* as transitions to "SAME_TRANS" in "ext". The total number of differences |
857 |
|
|
* between "state" and "pr" is returned as function value. Note that this |
858 |
|
|
* number is "numecs" minus the number of "SAME_TRANS" entries in "ext". |
859 |
|
|
*/ |
860 |
|
|
|
861 |
|
|
int |
862 |
|
|
tbldiff(state, pr, ext) |
863 |
|
|
int state[], pr, ext[]; |
864 |
|
|
{ |
865 |
|
|
int i, *sp = state, *ep = ext, *protp; |
866 |
|
|
int numdiff = 0; |
867 |
|
|
|
868 |
|
113732 |
protp = &protsave[numecs * (pr - 1)]; |
869 |
|
|
|
870 |
✓✓ |
5972084 |
for (i = numecs; i > 0; --i) { |
871 |
✓✓ |
2929176 |
if (*++protp == *++sp) |
872 |
|
1973661 |
*++ep = SAME_TRANS; |
873 |
|
|
else { |
874 |
|
955515 |
*++ep = *sp; |
875 |
|
955515 |
++numdiff; |
876 |
|
|
} |
877 |
|
|
} |
878 |
|
|
|
879 |
|
56866 |
return numdiff; |
880 |
|
|
} |