1 |
|
|
/* $OpenBSD: tsort.c,v 1.35 2016/01/05 16:10:57 espie Exp $ */ |
2 |
|
|
/* ex:ts=8 sw=4: |
3 |
|
|
* |
4 |
|
|
* Copyright (c) 1999-2004 Marc Espie <espie@openbsd.org> |
5 |
|
|
* |
6 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
7 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
8 |
|
|
* copyright notice and this permission notice appear in all copies. |
9 |
|
|
* |
10 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
15 |
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
16 |
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include <assert.h> |
20 |
|
|
#include <ctype.h> |
21 |
|
|
#include <err.h> |
22 |
|
|
#include <limits.h> |
23 |
|
|
#include <stddef.h> |
24 |
|
|
#include <stdio.h> |
25 |
|
|
#include <stdint.h> |
26 |
|
|
#include <stdlib.h> |
27 |
|
|
#include <string.h> |
28 |
|
|
#include <unistd.h> |
29 |
|
|
#include <ohash.h> |
30 |
|
|
|
31 |
|
|
/* The complexity of topological sorting is O(e), where e is the |
32 |
|
|
* size of input. While reading input, vertices have to be identified, |
33 |
|
|
* thus add the complexity of e keys retrieval among v keys using |
34 |
|
|
* an appropriate data structure. This program uses open double hashing |
35 |
|
|
* for that purpose. See Knuth for the expected complexity of double |
36 |
|
|
* hashing (Brent variation should probably be used if v << e, as a user |
37 |
|
|
* option). |
38 |
|
|
* |
39 |
|
|
* The algorithm used for longest cycle reporting is accurate, but somewhat |
40 |
|
|
* expensive. It may need to build all free paths of the graph (a free |
41 |
|
|
* path is a path that never goes twice through the same node), whose |
42 |
|
|
* number can be as high as O(2^e). Usually, the number of free paths is |
43 |
|
|
* much smaller though. This program's author does not believe that a |
44 |
|
|
* significantly better worst-case complexity algorithm exists. |
45 |
|
|
* |
46 |
|
|
* In case of a hints file, the set of minimal nodes is maintained as a |
47 |
|
|
* heap. The resulting complexity is O(e+v log v) for the worst case. |
48 |
|
|
* The average should actually be near O(e). |
49 |
|
|
* |
50 |
|
|
* If the hints file is incomplete, there is some extra complexity incurred |
51 |
|
|
* by make_transparent, which does propagate order values to unmarked |
52 |
|
|
* nodes. In the worst case, make_transparent is O(e u), |
53 |
|
|
* where u is the number of originally unmarked nodes. |
54 |
|
|
* In practice, it is much faster. |
55 |
|
|
* |
56 |
|
|
* The simple topological sort algorithm detects cycles. This program |
57 |
|
|
* goes further, breaking cycles through the use of simple heuristics. |
58 |
|
|
* Each cycle break checks the whole set of nodes, hence if c cycles break |
59 |
|
|
* are needed, this is an extra cost of O(c v). |
60 |
|
|
* |
61 |
|
|
* Possible heuristics are as follows: |
62 |
|
|
* - break cycle at node with lowest number of predecessors (default case), |
63 |
|
|
* - break longest cycle at node with lowest number of predecessors, |
64 |
|
|
* - break cycle at next node from the hints file. |
65 |
|
|
* |
66 |
|
|
* Except for the hints file case, which sets an explicit constraint on |
67 |
|
|
* which cycle to break, those heuristics locally result in the smallest |
68 |
|
|
* number of broken edges. |
69 |
|
|
* |
70 |
|
|
* Those are admittedly greedy strategies, as is the selection of the next |
71 |
|
|
* node from the hints file amongst equivalent candidates that is used for |
72 |
|
|
* `stable' topological sorting. |
73 |
|
|
*/ |
74 |
|
|
|
75 |
|
|
#ifdef __GNUC__ |
76 |
|
|
#define UNUSED __attribute__((unused)) |
77 |
|
|
#else |
78 |
|
|
#define UNUSED |
79 |
|
|
#endif |
80 |
|
|
|
81 |
|
|
struct node; |
82 |
|
|
|
83 |
|
|
/* The set of arcs from a given node is stored as a linked list. */ |
84 |
|
|
struct link { |
85 |
|
|
struct link *next; |
86 |
|
|
struct node *node; |
87 |
|
|
}; |
88 |
|
|
|
89 |
|
|
#define NO_ORDER UINT_MAX |
90 |
|
|
|
91 |
|
|
struct node { |
92 |
|
|
unsigned int refs; /* Number of arcs left, coming into this node. |
93 |
|
|
* Note that nodes with a null count can't |
94 |
|
|
* be part of cycles. */ |
95 |
|
|
struct link *arcs; /* List of forward arcs. */ |
96 |
|
|
|
97 |
|
|
unsigned int order; /* Order of nodes according to a hint file. */ |
98 |
|
|
|
99 |
|
|
/* Cycle detection algorithms build a free path of nodes. */ |
100 |
|
|
struct node *from; /* Previous node in the current path. */ |
101 |
|
|
|
102 |
|
|
unsigned int mark; /* Mark processed nodes in cycle discovery. */ |
103 |
|
|
struct link *traverse; /* Next link to traverse when backtracking. */ |
104 |
|
|
char k[1]; /* Name of this node. */ |
105 |
|
|
}; |
106 |
|
|
|
107 |
|
|
#define HASH_START 9 |
108 |
|
|
|
109 |
|
|
struct array { |
110 |
|
|
unsigned int entries; |
111 |
|
|
struct node **t; |
112 |
|
|
}; |
113 |
|
|
|
114 |
|
|
static void nodes_init(struct ohash *); |
115 |
|
|
static struct node *node_lookup(struct ohash *, const char *, const char *); |
116 |
|
|
static void usage(void); |
117 |
|
|
static struct node *new_node(const char *, const char *); |
118 |
|
|
|
119 |
|
|
static unsigned int read_pairs(FILE *, struct ohash *, int, |
120 |
|
|
const char *, unsigned int, int); |
121 |
|
|
static void split_nodes(struct ohash *, struct array *, struct array *); |
122 |
|
|
static void make_transparent(struct ohash *); |
123 |
|
|
static void insert_arc(struct node *, struct node *); |
124 |
|
|
|
125 |
|
|
#ifdef DEBUG |
126 |
|
|
static void dump_node(struct node *); |
127 |
|
|
static void dump_array(struct array *); |
128 |
|
|
static void dump_hash(struct ohash *); |
129 |
|
|
#endif |
130 |
|
|
static unsigned int read_hints(FILE *, struct ohash *, int, |
131 |
|
|
const char *, unsigned int); |
132 |
|
|
static struct node *find_smallest_node(struct array *); |
133 |
|
|
static struct node *find_good_cycle_break(struct array *); |
134 |
|
|
static void print_cycle(struct array *); |
135 |
|
|
static struct node *find_cycle_from(struct node *, struct array *); |
136 |
|
|
static struct node *find_predecessor(struct array *, struct node *); |
137 |
|
|
static unsigned int traverse_node(struct node *, unsigned int, struct array *); |
138 |
|
|
static struct node *find_longest_cycle(struct array *, struct array *); |
139 |
|
|
static struct node *find_normal_cycle(struct array *, struct array *); |
140 |
|
|
|
141 |
|
|
static void heap_down(struct array *, unsigned int); |
142 |
|
|
static void heapify(struct array *, int); |
143 |
|
|
static struct node *dequeue(struct array *); |
144 |
|
|
static void enqueue(struct array *, struct node *); |
145 |
|
|
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
static void *hash_calloc(size_t, size_t, void *); |
149 |
|
|
static void hash_free(void *, void *); |
150 |
|
|
static void* entry_alloc(size_t, void *); |
151 |
|
|
static void *ereallocarray(void *, size_t, size_t); |
152 |
|
|
static void *emem(void *); |
153 |
|
|
#define DEBUG_TRAVERSE 0 |
154 |
|
|
static struct ohash_info node_info = { |
155 |
|
|
offsetof(struct node, k), NULL, hash_calloc, hash_free, entry_alloc }; |
156 |
|
|
static void parse_args(int, char *[], struct ohash *); |
157 |
|
|
static int tsort(struct ohash *); |
158 |
|
|
|
159 |
|
|
static int quiet_flag, long_flag, |
160 |
|
|
warn_flag, hints_flag, verbose_flag; |
161 |
|
|
|
162 |
|
|
|
163 |
|
|
int main(int, char *[]); |
164 |
|
|
|
165 |
|
|
/*** |
166 |
|
|
*** Memory handling. |
167 |
|
|
***/ |
168 |
|
|
|
169 |
|
|
static void * |
170 |
|
|
emem(void *p) |
171 |
|
20 |
{ |
172 |
✓✗ |
20 |
if (p) |
173 |
|
20 |
return p; |
174 |
|
|
else |
175 |
|
|
errx(1, "Memory exhausted"); |
176 |
|
|
} |
177 |
|
|
|
178 |
|
|
static void * |
179 |
|
|
hash_calloc(size_t n, size_t s, void *u UNUSED) |
180 |
|
4 |
{ |
181 |
|
4 |
return emem(calloc(n, s)); |
182 |
|
|
} |
183 |
|
|
|
184 |
|
|
static void |
185 |
|
|
hash_free(void *p, void *u UNUSED) |
186 |
|
4 |
{ |
187 |
|
4 |
free(p); |
188 |
|
4 |
} |
189 |
|
|
|
190 |
|
|
static void * |
191 |
|
|
entry_alloc(size_t s, void *u UNUSED) |
192 |
|
4 |
{ |
193 |
|
4 |
return ereallocarray(NULL, 1, s); |
194 |
|
|
} |
195 |
|
|
|
196 |
|
|
static void * |
197 |
|
|
ereallocarray(void *p, size_t n, size_t s) |
198 |
|
16 |
{ |
199 |
|
16 |
return emem(reallocarray(p, n, s)); |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
|
203 |
|
|
/*** |
204 |
|
|
*** Hash table. |
205 |
|
|
***/ |
206 |
|
|
|
207 |
|
|
/* Inserting and finding nodes in the hash structure. |
208 |
|
|
* We handle interval strings for efficiency wrt fgetln. */ |
209 |
|
|
static struct node * |
210 |
|
|
new_node(const char *start, const char *end) |
211 |
|
4 |
{ |
212 |
|
|
struct node *n; |
213 |
|
|
|
214 |
|
4 |
n = ohash_create_entry(&node_info, start, &end); |
215 |
|
4 |
n->from = NULL; |
216 |
|
4 |
n->arcs = NULL; |
217 |
|
4 |
n->refs = 0; |
218 |
|
4 |
n->mark = 0; |
219 |
|
4 |
n->order = NO_ORDER; |
220 |
|
4 |
n->traverse = NULL; |
221 |
|
4 |
return n; |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
|
225 |
|
|
static void |
226 |
|
|
nodes_init(struct ohash *h) |
227 |
|
4 |
{ |
228 |
|
4 |
ohash_init(h, HASH_START, &node_info); |
229 |
|
4 |
} |
230 |
|
|
|
231 |
|
|
static struct node * |
232 |
|
|
node_lookup(struct ohash *h, const char *start, const char *end) |
233 |
|
8 |
{ |
234 |
|
|
unsigned int i; |
235 |
|
|
struct node * n; |
236 |
|
|
|
237 |
|
8 |
i = ohash_qlookupi(h, start, &end); |
238 |
|
|
|
239 |
|
8 |
n = ohash_find(h, i); |
240 |
✓✓ |
8 |
if (n == NULL) |
241 |
|
4 |
n = ohash_insert(h, i, new_node(start, end)); |
242 |
|
8 |
return n; |
243 |
|
|
} |
244 |
|
|
|
245 |
|
|
#ifdef DEBUG |
246 |
|
|
static void |
247 |
|
|
dump_node(struct node *n) |
248 |
|
|
{ |
249 |
|
|
struct link *l; |
250 |
|
|
|
251 |
|
|
if (n->refs == 0) |
252 |
|
|
return; |
253 |
|
|
printf("%s (%u/%u): ", n->k, n->order, n->refs); |
254 |
|
|
for (l = n->arcs; l != NULL; l = l->next) |
255 |
|
|
if (n->refs != 0) |
256 |
|
|
printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs); |
257 |
|
|
putchar('\n'); |
258 |
|
|
} |
259 |
|
|
|
260 |
|
|
static void |
261 |
|
|
dump_array(struct array *a) |
262 |
|
|
{ |
263 |
|
|
unsigned int i; |
264 |
|
|
|
265 |
|
|
for (i = 0; i < a->entries; i++) |
266 |
|
|
dump_node(a->t[i]); |
267 |
|
|
} |
268 |
|
|
|
269 |
|
|
static void |
270 |
|
|
dump_hash(struct ohash *h) |
271 |
|
|
{ |
272 |
|
|
unsigned int i; |
273 |
|
|
struct node *n; |
274 |
|
|
|
275 |
|
|
for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i)) |
276 |
|
|
dump_node(n); |
277 |
|
|
} |
278 |
|
|
#endif |
279 |
|
|
|
280 |
|
|
/*** |
281 |
|
|
*** Reading data. |
282 |
|
|
***/ |
283 |
|
|
|
284 |
|
|
static void |
285 |
|
|
insert_arc(struct node *a, struct node *b) |
286 |
|
|
{ |
287 |
|
|
struct link *l; |
288 |
|
|
|
289 |
|
|
/* Check that this arc is not already present. */ |
290 |
|
|
for (l = a->arcs; l != NULL; l = l->next) { |
291 |
|
|
if (l->node == b) |
292 |
|
|
return; |
293 |
|
|
} |
294 |
|
|
b->refs++; |
295 |
|
|
l = ereallocarray(NULL, 1, sizeof(struct link)); |
296 |
|
|
l->node = b; |
297 |
|
|
l->next = a->arcs; |
298 |
|
|
a->arcs = l; |
299 |
|
|
} |
300 |
|
|
|
301 |
|
|
static unsigned int |
302 |
|
|
read_pairs(FILE *f, struct ohash *h, int reverse, const char *name, |
303 |
|
|
unsigned int order, int hint) |
304 |
|
4 |
{ |
305 |
|
|
int toggle; |
306 |
|
|
struct node *a; |
307 |
|
|
size_t size; |
308 |
|
|
char *str; |
309 |
|
|
|
310 |
|
4 |
toggle = 1; |
311 |
|
4 |
a = NULL; |
312 |
|
|
|
313 |
✓✓ |
12 |
while ((str = fgetln(f, &size)) != NULL) { |
314 |
|
|
char *sentinel; |
315 |
|
|
|
316 |
|
4 |
sentinel = str + size; |
317 |
|
|
for (;;) { |
318 |
|
|
char *e; |
319 |
|
|
|
320 |
✓✓✓✓
|
36 |
while (str < sentinel && |
321 |
|
|
isspace((unsigned char)*str)) |
322 |
|
8 |
str++; |
323 |
✓✓ |
12 |
if (str == sentinel) |
324 |
|
4 |
break; |
325 |
|
8 |
for (e = str; |
326 |
✓✗✓✓
|
52 |
e < sentinel && !isspace((unsigned char)*e); e++) |
327 |
|
|
continue; |
328 |
✓✓ |
8 |
if (toggle) { |
329 |
|
4 |
a = node_lookup(h, str, e); |
330 |
✗✓ |
4 |
if (a->order == NO_ORDER && hint) |
331 |
|
|
a->order = order++; |
332 |
|
|
} else { |
333 |
|
|
struct node *b; |
334 |
|
|
|
335 |
|
4 |
b = node_lookup(h, str, e); |
336 |
✗✓ |
4 |
assert(a != NULL); |
337 |
✗✓ |
4 |
if (b != a) { |
338 |
|
|
if (reverse) |
339 |
|
|
insert_arc(b, a); |
340 |
|
|
else |
341 |
|
|
insert_arc(a, b); |
342 |
|
|
} |
343 |
|
|
} |
344 |
|
8 |
toggle = !toggle; |
345 |
|
8 |
str = e; |
346 |
|
8 |
} |
347 |
|
|
} |
348 |
✗✓ |
4 |
if (toggle == 0) |
349 |
|
|
errx(1, "odd number of node names in %s", name); |
350 |
✓✗✗✓
|
4 |
if (!feof(f)) |
351 |
|
|
err(1, "error reading %s", name); |
352 |
|
4 |
return order; |
353 |
|
|
} |
354 |
|
|
|
355 |
|
|
static unsigned int |
356 |
|
|
read_hints(FILE *f, struct ohash *h, int quiet, const char *name, |
357 |
|
|
unsigned int order) |
358 |
|
|
{ |
359 |
|
|
char *str; |
360 |
|
|
size_t size; |
361 |
|
|
|
362 |
|
|
while ((str = fgetln(f, &size)) != NULL) { |
363 |
|
|
char *sentinel; |
364 |
|
|
|
365 |
|
|
sentinel = str + size; |
366 |
|
|
for (;;) { |
367 |
|
|
char *e; |
368 |
|
|
struct node *a; |
369 |
|
|
|
370 |
|
|
while (str < sentinel && isspace((unsigned char)*str)) |
371 |
|
|
str++; |
372 |
|
|
if (str == sentinel) |
373 |
|
|
break; |
374 |
|
|
for (e = str; |
375 |
|
|
e < sentinel && !isspace((unsigned char)*e); e++) |
376 |
|
|
continue; |
377 |
|
|
a = node_lookup(h, str, e); |
378 |
|
|
if (a->order != NO_ORDER) { |
379 |
|
|
if (!quiet) |
380 |
|
|
warnx( |
381 |
|
|
"duplicate node %s in hints file %s", |
382 |
|
|
a->k, name); |
383 |
|
|
} else |
384 |
|
|
a->order = order++; |
385 |
|
|
str = e; |
386 |
|
|
} |
387 |
|
|
} |
388 |
|
|
if (!feof(f)) |
389 |
|
|
err(1, "error reading %s", name); |
390 |
|
|
return order; |
391 |
|
|
} |
392 |
|
|
|
393 |
|
|
|
394 |
|
|
/*** |
395 |
|
|
*** Standard heap handling routines. |
396 |
|
|
***/ |
397 |
|
|
|
398 |
|
|
static void |
399 |
|
|
heap_down(struct array *h, unsigned int i) |
400 |
|
|
{ |
401 |
|
|
unsigned int j; |
402 |
|
|
struct node *swap; |
403 |
|
|
|
404 |
|
|
for (; (j=2*i+1) < h->entries; i = j) { |
405 |
|
|
if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order) |
406 |
|
|
j++; |
407 |
|
|
if (h->t[i]->order <= h->t[j]->order) |
408 |
|
|
break; |
409 |
|
|
swap = h->t[i]; |
410 |
|
|
h->t[i] = h->t[j]; |
411 |
|
|
h->t[j] = swap; |
412 |
|
|
} |
413 |
|
|
} |
414 |
|
|
|
415 |
|
|
static void |
416 |
|
|
heapify(struct array *h, int verbose) |
417 |
|
|
{ |
418 |
|
|
unsigned int i; |
419 |
|
|
|
420 |
|
|
for (i = h->entries; i != 0;) { |
421 |
|
|
if (h->t[--i]->order == NO_ORDER && verbose) |
422 |
|
|
warnx("node %s absent from hints file", h->t[i]->k); |
423 |
|
|
heap_down(h, i); |
424 |
|
|
} |
425 |
|
|
} |
426 |
|
|
|
427 |
|
|
#define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] ) |
428 |
|
|
|
429 |
|
|
static struct node * |
430 |
|
|
dequeue(struct array *h) |
431 |
|
|
{ |
432 |
|
|
struct node *n; |
433 |
|
|
|
434 |
|
|
if (h->entries == 0) |
435 |
|
|
n = NULL; |
436 |
|
|
else { |
437 |
|
|
n = h->t[0]; |
438 |
|
|
if (--h->entries != 0) { |
439 |
|
|
h->t[0] = h->t[h->entries]; |
440 |
|
|
heap_down(h, 0); |
441 |
|
|
} |
442 |
|
|
} |
443 |
|
|
return n; |
444 |
|
|
} |
445 |
|
|
|
446 |
|
|
#define ENQUEUE(h, n) do { \ |
447 |
|
|
if (hints_flag) \ |
448 |
|
|
enqueue((h), (n)); \ |
449 |
|
|
else \ |
450 |
|
|
(h)->t[(h)->entries++] = (n); \ |
451 |
|
|
} while(0); |
452 |
|
|
|
453 |
|
|
static void |
454 |
|
|
enqueue(struct array *h, struct node *n) |
455 |
|
|
{ |
456 |
|
|
unsigned int i, j; |
457 |
|
|
struct node *swap; |
458 |
|
|
|
459 |
|
|
h->t[h->entries++] = n; |
460 |
|
|
for (i = h->entries-1; i > 0; i = j) { |
461 |
|
|
j = (i-1)/2; |
462 |
|
|
if (h->t[j]->order < h->t[i]->order) |
463 |
|
|
break; |
464 |
|
|
swap = h->t[j]; |
465 |
|
|
h->t[j] = h->t[i]; |
466 |
|
|
h->t[i] = swap; |
467 |
|
|
} |
468 |
|
|
} |
469 |
|
|
|
470 |
|
|
/* Nodes without order should not hinder direct dependencies. |
471 |
|
|
* Iterate until no nodes are left. |
472 |
|
|
*/ |
473 |
|
|
static void |
474 |
|
|
make_transparent(struct ohash *hash) |
475 |
|
|
{ |
476 |
|
|
struct node *n; |
477 |
|
|
unsigned int i; |
478 |
|
|
struct link *l; |
479 |
|
|
int adjusted; |
480 |
|
|
int bad; |
481 |
|
|
unsigned int min; |
482 |
|
|
|
483 |
|
|
/* first try to solve complete nodes */ |
484 |
|
|
do { |
485 |
|
|
adjusted = 0; |
486 |
|
|
bad = 0; |
487 |
|
|
for (n = ohash_first(hash, &i); n != NULL; |
488 |
|
|
n = ohash_next(hash, &i)) { |
489 |
|
|
if (n->order == NO_ORDER) { |
490 |
|
|
min = NO_ORDER; |
491 |
|
|
|
492 |
|
|
for (l = n->arcs; l != NULL; l = l->next) { |
493 |
|
|
/* unsolved node -> delay resolution */ |
494 |
|
|
if (l->node->order == NO_ORDER) { |
495 |
|
|
bad = 1; |
496 |
|
|
break; |
497 |
|
|
} else if (l->node->order < min) |
498 |
|
|
min = l->node->order; |
499 |
|
|
} |
500 |
|
|
if (min < NO_ORDER && l == NULL) { |
501 |
|
|
n->order = min; |
502 |
|
|
adjusted = 1; |
503 |
|
|
} |
504 |
|
|
} |
505 |
|
|
} |
506 |
|
|
|
507 |
|
|
} while (adjusted); |
508 |
|
|
|
509 |
|
|
/* then, if incomplete nodes are left, do them */ |
510 |
|
|
if (bad) do { |
511 |
|
|
adjusted = 0; |
512 |
|
|
for (n = ohash_first(hash, &i); n != NULL; |
513 |
|
|
n = ohash_next(hash, &i)) |
514 |
|
|
if (n->order == NO_ORDER) |
515 |
|
|
for (l = n->arcs; l != NULL; l = l->next) |
516 |
|
|
if (l->node->order < n->order) { |
517 |
|
|
n->order = l->node->order; |
518 |
|
|
adjusted = 1; |
519 |
|
|
} |
520 |
|
|
} while (adjusted); |
521 |
|
|
} |
522 |
|
|
|
523 |
|
|
|
524 |
|
|
/*** |
525 |
|
|
*** Search through hash array for nodes. |
526 |
|
|
***/ |
527 |
|
|
|
528 |
|
|
/* Split nodes into unrefed nodes/live nodes. */ |
529 |
|
|
static void |
530 |
|
|
split_nodes(struct ohash *hash, struct array *heap, struct array *remaining) |
531 |
|
4 |
{ |
532 |
|
|
|
533 |
|
|
struct node *n; |
534 |
|
|
unsigned int i; |
535 |
|
|
|
536 |
|
4 |
heap->t = ereallocarray(NULL, ohash_entries(hash), |
537 |
|
|
sizeof(struct node *)); |
538 |
|
4 |
remaining->t = ereallocarray(NULL, ohash_entries(hash), |
539 |
|
|
sizeof(struct node *)); |
540 |
|
4 |
heap->entries = 0; |
541 |
|
4 |
remaining->entries = 0; |
542 |
|
|
|
543 |
✓✓ |
8 |
for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) { |
544 |
✓✗ |
4 |
if (n->refs == 0) |
545 |
|
4 |
heap->t[heap->entries++] = n; |
546 |
|
|
else |
547 |
|
|
remaining->t[remaining->entries++] = n; |
548 |
|
|
} |
549 |
|
4 |
} |
550 |
|
|
|
551 |
|
|
/* Good point to break a cycle: live node with as few refs as possible. */ |
552 |
|
|
static struct node * |
553 |
|
|
find_good_cycle_break(struct array *h) |
554 |
|
|
{ |
555 |
|
|
unsigned int i; |
556 |
|
|
unsigned int best; |
557 |
|
|
struct node *u; |
558 |
|
|
|
559 |
|
|
best = UINT_MAX; |
560 |
|
|
u = NULL; |
561 |
|
|
|
562 |
|
|
assert(h->entries != 0); |
563 |
|
|
for (i = 0; i < h->entries; i++) { |
564 |
|
|
struct node *n = h->t[i]; |
565 |
|
|
/* No need to look further. */ |
566 |
|
|
if (n->refs == 1) |
567 |
|
|
return n; |
568 |
|
|
if (n->refs != 0 && n->refs < best) { |
569 |
|
|
best = n->refs; |
570 |
|
|
u = n; |
571 |
|
|
} |
572 |
|
|
} |
573 |
|
|
assert(u != NULL); |
574 |
|
|
return u; |
575 |
|
|
} |
576 |
|
|
|
577 |
|
|
/* Retrieve the node with the smallest order. */ |
578 |
|
|
static struct node * |
579 |
|
|
find_smallest_node(struct array *h) |
580 |
|
|
{ |
581 |
|
|
unsigned int i; |
582 |
|
|
unsigned int best; |
583 |
|
|
struct node *u; |
584 |
|
|
|
585 |
|
|
best = UINT_MAX; |
586 |
|
|
u = NULL; |
587 |
|
|
|
588 |
|
|
assert(h->entries != 0); |
589 |
|
|
for (i = 0; i < h->entries; i++) { |
590 |
|
|
struct node *n = h->t[i]; |
591 |
|
|
if (n->refs != 0 && n->order < best) { |
592 |
|
|
best = n->order; |
593 |
|
|
u = n; |
594 |
|
|
} |
595 |
|
|
} |
596 |
|
|
assert(u != NULL); |
597 |
|
|
return u; |
598 |
|
|
} |
599 |
|
|
|
600 |
|
|
|
601 |
|
|
/*** |
602 |
|
|
*** Graph algorithms. |
603 |
|
|
***/ |
604 |
|
|
|
605 |
|
|
/* Explore the nodes reachable from i to find a cycle, store it in c. |
606 |
|
|
* This may fail. */ |
607 |
|
|
static struct node * |
608 |
|
|
find_cycle_from(struct node *i, struct array *c) |
609 |
|
|
{ |
610 |
|
|
struct node *n; |
611 |
|
|
|
612 |
|
|
n = i; |
613 |
|
|
/* XXX Previous cycle findings may have left this pointer non-null. */ |
614 |
|
|
i->from = NULL; |
615 |
|
|
|
616 |
|
|
for (;;) { |
617 |
|
|
/* Note that all marks are reversed before this code exits. */ |
618 |
|
|
n->mark = 1; |
619 |
|
|
if (n->traverse) |
620 |
|
|
n->traverse = n->traverse->next; |
621 |
|
|
else |
622 |
|
|
n->traverse = n->arcs; |
623 |
|
|
/* Skip over dead nodes. */ |
624 |
|
|
while (n->traverse && n->traverse->node->refs == 0) |
625 |
|
|
n->traverse = n->traverse->next; |
626 |
|
|
if (n->traverse) { |
627 |
|
|
struct node *go = n->traverse->node; |
628 |
|
|
|
629 |
|
|
if (go->mark) { |
630 |
|
|
c->entries = 0; |
631 |
|
|
for (; n != NULL && n != go; n = n->from) { |
632 |
|
|
c->t[c->entries++] = n; |
633 |
|
|
n->mark = 0; |
634 |
|
|
} |
635 |
|
|
for (; n != NULL; n = n->from) |
636 |
|
|
n->mark = 0; |
637 |
|
|
c->t[c->entries++] = go; |
638 |
|
|
return go; |
639 |
|
|
} else { |
640 |
|
|
go->from = n; |
641 |
|
|
n = go; |
642 |
|
|
} |
643 |
|
|
} else { |
644 |
|
|
n->mark = 0; |
645 |
|
|
n = n->from; |
646 |
|
|
if (n == NULL) |
647 |
|
|
return NULL; |
648 |
|
|
} |
649 |
|
|
} |
650 |
|
|
} |
651 |
|
|
|
652 |
|
|
/* Find a live predecessor of node n. This is a slow routine, as it needs |
653 |
|
|
* to go through the whole array, but it is not needed often. |
654 |
|
|
*/ |
655 |
|
|
static struct node * |
656 |
|
|
find_predecessor(struct array *a, struct node *n) |
657 |
|
|
{ |
658 |
|
|
unsigned int i; |
659 |
|
|
|
660 |
|
|
for (i = 0; i < a->entries; i++) { |
661 |
|
|
struct node *m; |
662 |
|
|
|
663 |
|
|
m = a->t[i]; |
664 |
|
|
if (m->refs != 0) { |
665 |
|
|
struct link *l; |
666 |
|
|
|
667 |
|
|
for (l = m->arcs; l != NULL; l = l->next) |
668 |
|
|
if (l->node == n) |
669 |
|
|
return m; |
670 |
|
|
} |
671 |
|
|
} |
672 |
|
|
assert(1 == 0); |
673 |
|
|
return NULL; |
674 |
|
|
} |
675 |
|
|
|
676 |
|
|
/* Traverse all strongly connected components reachable from node n. |
677 |
|
|
Start numbering them at o. Return the maximum order reached. |
678 |
|
|
Update the largest cycle found so far. |
679 |
|
|
*/ |
680 |
|
|
static unsigned int |
681 |
|
|
traverse_node(struct node *n, unsigned int o, struct array *c) |
682 |
|
|
{ |
683 |
|
|
unsigned int min, max; |
684 |
|
|
|
685 |
|
|
n->from = NULL; |
686 |
|
|
min = o; |
687 |
|
|
max = ++o; |
688 |
|
|
|
689 |
|
|
for (;;) { |
690 |
|
|
n->mark = o; |
691 |
|
|
if (DEBUG_TRAVERSE) |
692 |
|
|
printf("%s(%u) ", n->k, n->mark); |
693 |
|
|
/* Find next arc to explore. */ |
694 |
|
|
if (n->traverse) |
695 |
|
|
n->traverse = n->traverse->next; |
696 |
|
|
else |
697 |
|
|
n->traverse = n->arcs; |
698 |
|
|
/* Skip over dead nodes. */ |
699 |
|
|
while (n->traverse && n->traverse->node->refs == 0) |
700 |
|
|
n->traverse = n->traverse->next; |
701 |
|
|
/* If arc left. */ |
702 |
|
|
if (n->traverse) { |
703 |
|
|
struct node *go; |
704 |
|
|
|
705 |
|
|
go = n->traverse->node; |
706 |
|
|
/* Optimisation: if go->mark < min, we already |
707 |
|
|
* visited this strongly-connected component in |
708 |
|
|
* a previous pass. Hence, this can yield no new |
709 |
|
|
* cycle. */ |
710 |
|
|
|
711 |
|
|
/* Not part of the current path: go for it. */ |
712 |
|
|
if (go->mark == 0 || go->mark == min) { |
713 |
|
|
go->from = n; |
714 |
|
|
n = go; |
715 |
|
|
o++; |
716 |
|
|
if (o > max) |
717 |
|
|
max = o; |
718 |
|
|
/* Part of the current path: check cycle length. */ |
719 |
|
|
} else if (go->mark > min) { |
720 |
|
|
if (DEBUG_TRAVERSE) |
721 |
|
|
printf("%d\n", o - go->mark + 1); |
722 |
|
|
if (o - go->mark + 1 > c->entries) { |
723 |
|
|
struct node *t; |
724 |
|
|
unsigned int i; |
725 |
|
|
|
726 |
|
|
c->entries = o - go->mark + 1; |
727 |
|
|
i = 0; |
728 |
|
|
c->t[i++] = go; |
729 |
|
|
for (t = n; t != go; t = t->from) |
730 |
|
|
c->t[i++] = t; |
731 |
|
|
} |
732 |
|
|
} |
733 |
|
|
|
734 |
|
|
/* No arc left: backtrack. */ |
735 |
|
|
} else { |
736 |
|
|
n->mark = min; |
737 |
|
|
n = n->from; |
738 |
|
|
if (!n) |
739 |
|
|
return max; |
740 |
|
|
o--; |
741 |
|
|
} |
742 |
|
|
} |
743 |
|
|
} |
744 |
|
|
|
745 |
|
|
static void |
746 |
|
|
print_cycle(struct array *c) |
747 |
|
|
{ |
748 |
|
|
unsigned int i; |
749 |
|
|
|
750 |
|
|
/* Printing in reverse order, since cycle discoveries finds reverse |
751 |
|
|
* edges. */ |
752 |
|
|
for (i = c->entries; i != 0;) { |
753 |
|
|
i--; |
754 |
|
|
warnx("%s", c->t[i]->k); |
755 |
|
|
} |
756 |
|
|
} |
757 |
|
|
|
758 |
|
|
static struct node * |
759 |
|
|
find_longest_cycle(struct array *h, struct array *c) |
760 |
|
|
{ |
761 |
|
|
unsigned int i; |
762 |
|
|
unsigned int o; |
763 |
|
|
unsigned int best; |
764 |
|
|
struct node *n; |
765 |
|
|
static int notfirst = 0; |
766 |
|
|
|
767 |
|
|
assert(h->entries != 0); |
768 |
|
|
|
769 |
|
|
/* No cycle found yet. */ |
770 |
|
|
c->entries = 0; |
771 |
|
|
|
772 |
|
|
/* Reset the set of marks, except the first time around. */ |
773 |
|
|
if (notfirst) { |
774 |
|
|
for (i = 0; i < h->entries; i++) |
775 |
|
|
h->t[i]->mark = 0; |
776 |
|
|
} else |
777 |
|
|
notfirst = 1; |
778 |
|
|
|
779 |
|
|
o = 0; |
780 |
|
|
|
781 |
|
|
/* Traverse the array. Each unmarked, live node heralds a |
782 |
|
|
* new set of strongly connected components. */ |
783 |
|
|
for (i = 0; i < h->entries; i++) { |
784 |
|
|
n = h->t[i]; |
785 |
|
|
if (n->refs != 0 && n->mark == 0) { |
786 |
|
|
/* Each call to traverse_node uses a separate |
787 |
|
|
* interval of numbers to mark nodes. */ |
788 |
|
|
o++; |
789 |
|
|
o = traverse_node(n, o, c); |
790 |
|
|
} |
791 |
|
|
} |
792 |
|
|
|
793 |
|
|
assert(c->entries != 0); |
794 |
|
|
n = c->t[0]; |
795 |
|
|
best = n->refs; |
796 |
|
|
for (i = 0; i < c->entries; i++) { |
797 |
|
|
if (c->t[i]->refs < best) { |
798 |
|
|
n = c->t[i]; |
799 |
|
|
best = n->refs; |
800 |
|
|
} |
801 |
|
|
} |
802 |
|
|
return n; |
803 |
|
|
} |
804 |
|
|
|
805 |
|
|
static struct node * |
806 |
|
|
find_normal_cycle(struct array *h, struct array *c) |
807 |
|
|
{ |
808 |
|
|
struct node *b, *n; |
809 |
|
|
|
810 |
|
|
if (hints_flag) |
811 |
|
|
n = find_smallest_node(h); |
812 |
|
|
else |
813 |
|
|
n = find_good_cycle_break(h); |
814 |
|
|
while ((b = find_cycle_from(n, c)) == NULL) |
815 |
|
|
n = find_predecessor(h, n); |
816 |
|
|
return b; |
817 |
|
|
} |
818 |
|
|
|
819 |
|
|
|
820 |
|
|
#define plural(n) ((n) > 1 ? "s" : "") |
821 |
|
|
|
822 |
|
|
static void |
823 |
|
|
parse_args(int argc, char *argv[], struct ohash *pairs) |
824 |
|
4 |
{ |
825 |
|
|
int c; |
826 |
|
|
unsigned int order; |
827 |
|
|
int reverse_flag; |
828 |
|
|
const char **files; |
829 |
|
|
int i, j; |
830 |
|
|
|
831 |
|
4 |
i = 0; |
832 |
|
|
|
833 |
|
4 |
reverse_flag = quiet_flag = long_flag = |
834 |
|
|
warn_flag = hints_flag = verbose_flag = 0; |
835 |
|
|
/* argc is good enough, as we start at argv[1] */ |
836 |
|
4 |
files = ereallocarray(NULL, argc, sizeof (char *)); |
837 |
✓✓ |
12 |
while ((c = getopt(argc, argv, "h:flqrvw")) != -1) { |
838 |
✗✗✗✓ ✗✗✗✗
|
4 |
switch(c) { |
839 |
|
|
case 'h': |
840 |
|
|
files[i++] = optarg; |
841 |
|
|
hints_flag = 1; |
842 |
|
|
break; |
843 |
|
|
/*FALLTHRU*/ |
844 |
|
|
case 'f': |
845 |
|
|
hints_flag = 2; |
846 |
|
|
break; |
847 |
|
|
case 'l': |
848 |
|
|
long_flag = 1; |
849 |
|
|
break; |
850 |
|
|
case 'q': |
851 |
|
4 |
quiet_flag = 1; |
852 |
|
4 |
break; |
853 |
|
|
case 'r': |
854 |
|
|
reverse_flag = 1; |
855 |
|
|
break; |
856 |
|
|
case 'v': |
857 |
|
|
verbose_flag = 1; |
858 |
|
|
break; |
859 |
|
|
case 'w': |
860 |
|
|
warn_flag = 1; |
861 |
|
|
break; |
862 |
|
|
default: |
863 |
|
|
usage(); |
864 |
|
|
} |
865 |
|
|
} |
866 |
|
|
|
867 |
|
4 |
argc -= optind; |
868 |
|
4 |
argv += optind; |
869 |
|
|
|
870 |
✗✗✓ |
4 |
switch(argc) { |
871 |
|
|
case 1: |
872 |
|
|
files[i++] = argv[0]; |
873 |
|
|
break; |
874 |
|
|
case 0: |
875 |
|
|
break; |
876 |
|
|
default: |
877 |
|
|
usage(); |
878 |
|
|
} |
879 |
|
|
|
880 |
|
4 |
files[i] = NULL; |
881 |
|
|
|
882 |
|
|
/* if (pledge("stdio rpath wpath cpath", files) == -1) */ |
883 |
✗✓ |
4 |
if (pledge("stdio rpath wpath cpath", NULL) == -1) |
884 |
|
|
err(1, "pledge"); |
885 |
|
|
|
886 |
|
4 |
nodes_init(pairs); |
887 |
|
4 |
order = 0; |
888 |
|
|
|
889 |
✗✓ |
4 |
for (j = 0; j != i-argc; j++) { |
890 |
|
|
FILE *f; |
891 |
|
|
|
892 |
|
|
f = fopen(files[j], "r"); |
893 |
|
|
if (f == NULL) |
894 |
|
|
err(1, "Can't open hint file %s", files[i]); |
895 |
|
|
order = read_hints(f, pairs, quiet_flag, files[i], order); |
896 |
|
|
fclose(f); |
897 |
|
|
} |
898 |
|
4 |
free(files); |
899 |
|
|
|
900 |
✗✓ |
4 |
if (argc == 1) { |
901 |
|
|
FILE *f; |
902 |
|
|
|
903 |
|
|
f = fopen(argv[0], "r"); |
904 |
|
|
if (f == NULL) |
905 |
|
|
err(1, "Can't open file %s", argv[0]); |
906 |
|
|
order = read_pairs(f, pairs, reverse_flag, argv[0], order, |
907 |
|
|
hints_flag == 2); |
908 |
|
|
fclose(f); |
909 |
|
|
} else { |
910 |
|
4 |
order = read_pairs(stdin, pairs, reverse_flag, "stdin", |
911 |
|
|
order, hints_flag == 2); |
912 |
|
|
} |
913 |
|
|
|
914 |
✗✓ |
4 |
if (pledge("stdio wpath cpath rpath", NULL) == -1) |
915 |
|
|
err(1, "pledge"); |
916 |
|
4 |
} |
917 |
|
|
|
918 |
|
|
static int |
919 |
|
|
tsort(struct ohash *pairs) |
920 |
|
4 |
{ |
921 |
|
|
struct array aux; /* Unrefed nodes/cycle reporting. */ |
922 |
|
|
struct array remaining; |
923 |
|
|
unsigned int broken_arcs, broken_cycles; |
924 |
|
|
unsigned int left; |
925 |
|
|
|
926 |
|
4 |
broken_arcs = 0; |
927 |
|
4 |
broken_cycles = 0; |
928 |
|
|
|
929 |
✗✓ |
4 |
if (hints_flag) |
930 |
|
|
make_transparent(pairs); |
931 |
|
4 |
split_nodes(pairs, &aux, &remaining); |
932 |
|
4 |
ohash_delete(pairs); |
933 |
|
|
|
934 |
✗✓ |
4 |
if (hints_flag) |
935 |
|
|
heapify(&aux, verbose_flag); |
936 |
|
|
|
937 |
|
4 |
left = remaining.entries + aux.entries; |
938 |
✓✓ |
12 |
while (left != 0) { |
939 |
|
|
|
940 |
|
|
/* Standard topological sort. */ |
941 |
✓✓ |
8 |
while (aux.entries) { |
942 |
|
|
struct link *l; |
943 |
|
|
struct node *n; |
944 |
|
|
|
945 |
✗✓ |
4 |
n = DEQUEUE(&aux); |
946 |
|
4 |
printf("%s\n", n->k); |
947 |
|
4 |
left--; |
948 |
|
|
/* We can't free nodes, as we don't know which |
949 |
|
|
* entry we can remove in the hash table. We |
950 |
|
|
* rely on refs == 0 to recognize live nodes. |
951 |
|
|
* Decrease ref count of live nodes, enter new |
952 |
|
|
* candidates into the unrefed list. */ |
953 |
✗✓ |
4 |
for (l = n->arcs; l != NULL; l = l->next) |
954 |
|
|
if (l->node->refs != 0 && |
955 |
|
|
--l->node->refs == 0) { |
956 |
|
|
ENQUEUE(&aux, l->node); |
957 |
|
|
} |
958 |
|
|
} |
959 |
|
|
/* There are still cycles to break. */ |
960 |
✗✓ |
4 |
if (left != 0) { |
961 |
|
|
struct node *n; |
962 |
|
|
|
963 |
|
|
broken_cycles++; |
964 |
|
|
/* XXX Simple cycle detection and long cycle |
965 |
|
|
* detection are mutually exclusive. */ |
966 |
|
|
|
967 |
|
|
if (long_flag) |
968 |
|
|
n = find_longest_cycle(&remaining, &aux); |
969 |
|
|
else |
970 |
|
|
n = find_normal_cycle(&remaining, &aux); |
971 |
|
|
|
972 |
|
|
if (!quiet_flag) { |
973 |
|
|
warnx("cycle in data"); |
974 |
|
|
print_cycle(&aux); |
975 |
|
|
} |
976 |
|
|
|
977 |
|
|
if (verbose_flag) |
978 |
|
|
warnx("%u edge%s broken", n->refs, |
979 |
|
|
plural(n->refs)); |
980 |
|
|
broken_arcs += n->refs; |
981 |
|
|
n->refs = 0; |
982 |
|
|
/* Reinitialization, cycle reporting uses aux. */ |
983 |
|
|
aux.t[0] = n; |
984 |
|
|
aux.entries = 1; |
985 |
|
|
} |
986 |
|
|
} |
987 |
✗✓ |
4 |
if (verbose_flag && broken_cycles != 0) |
988 |
|
|
warnx("%u cycle%s broken, for a total of %u edge%s", |
989 |
|
|
broken_cycles, plural(broken_cycles), |
990 |
|
|
broken_arcs, plural(broken_arcs)); |
991 |
✗✓ |
4 |
if (warn_flag) |
992 |
|
|
return (broken_cycles < 256 ? broken_cycles : 255); |
993 |
|
|
else |
994 |
|
4 |
return (0); |
995 |
|
|
} |
996 |
|
|
|
997 |
|
|
int |
998 |
|
|
main(int argc, char *argv[]) |
999 |
|
4 |
{ |
1000 |
|
|
struct ohash pairs; |
1001 |
|
|
|
1002 |
✗✓ |
4 |
if (pledge("stdio rpath wpath cpath", NULL) == -1) |
1003 |
|
|
err(1, "pledge"); |
1004 |
|
|
|
1005 |
|
4 |
parse_args(argc, argv, &pairs); |
1006 |
|
4 |
return tsort(&pairs); |
1007 |
|
|
} |
1008 |
|
|
|
1009 |
|
|
|
1010 |
|
|
extern char *__progname; |
1011 |
|
|
|
1012 |
|
|
static void |
1013 |
|
|
usage(void) |
1014 |
|
|
{ |
1015 |
|
|
fprintf(stderr, "Usage: %s [-flqrvw] [-h file] [file]\n", __progname); |
1016 |
|
|
exit(1); |
1017 |
|
|
} |