1 |
|
|
/* $OpenBSD: radixsort.c,v 1.9 2007/09/02 15:19:17 deraadt Exp $ */ |
2 |
|
|
/*- |
3 |
|
|
* Copyright (c) 1990, 1993 |
4 |
|
|
* The Regents of the University of California. All rights reserved. |
5 |
|
|
* |
6 |
|
|
* This code is derived from software contributed to Berkeley by |
7 |
|
|
* Peter McIlroy and by Dan Bernstein at New York University, |
8 |
|
|
* |
9 |
|
|
* Redistribution and use in source and binary forms, with or without |
10 |
|
|
* modification, are permitted provided that the following conditions |
11 |
|
|
* are met: |
12 |
|
|
* 1. Redistributions of source code must retain the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer. |
14 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer in the |
16 |
|
|
* documentation and/or other materials provided with the distribution. |
17 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
18 |
|
|
* may be used to endorse or promote products derived from this software |
19 |
|
|
* without specific prior written permission. |
20 |
|
|
* |
21 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
22 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
23 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
24 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
25 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
26 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
27 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
28 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
29 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
30 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
31 |
|
|
* SUCH DAMAGE. |
32 |
|
|
*/ |
33 |
|
|
|
34 |
|
|
/* |
35 |
|
|
* Radixsort routines. |
36 |
|
|
* |
37 |
|
|
* Program r_sort_a() is unstable but uses O(logN) extra memory for a stack. |
38 |
|
|
* Use radixsort(a, n, trace, endchar) for this case. |
39 |
|
|
* |
40 |
|
|
* For stable sorting (using N extra pointers) use sradixsort(), which calls |
41 |
|
|
* r_sort_b(). |
42 |
|
|
* |
43 |
|
|
* For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic, |
44 |
|
|
* "Engineering Radix Sort". |
45 |
|
|
*/ |
46 |
|
|
|
47 |
|
|
#include <sys/types.h> |
48 |
|
|
#include <stdlib.h> |
49 |
|
|
#include <errno.h> |
50 |
|
|
|
51 |
|
|
typedef struct { |
52 |
|
|
const u_char **sa; |
53 |
|
|
int sn, si; |
54 |
|
|
} stack; |
55 |
|
|
|
56 |
|
|
static __inline void simplesort |
57 |
|
|
(const u_char **, int, int, const u_char *, u_int); |
58 |
|
|
static void r_sort_a(const u_char **, int, int, const u_char *, u_int); |
59 |
|
|
static void r_sort_b(const u_char **, |
60 |
|
|
const u_char **, int, int, const u_char *, u_int); |
61 |
|
|
|
62 |
|
|
#define THRESHOLD 20 /* Divert to simplesort(). */ |
63 |
|
|
#define SIZE 512 /* Default stack size. */ |
64 |
|
|
|
65 |
|
|
#define SETUP { \ |
66 |
|
|
if (tab == NULL) { \ |
67 |
|
|
tr = tr0; \ |
68 |
|
|
for (c = 0; c < endch; c++) \ |
69 |
|
|
tr0[c] = c + 1; \ |
70 |
|
|
tr0[c] = 0; \ |
71 |
|
|
for (c++; c < 256; c++) \ |
72 |
|
|
tr0[c] = c; \ |
73 |
|
|
endch = 0; \ |
74 |
|
|
} else { \ |
75 |
|
|
endch = tab[endch]; \ |
76 |
|
|
tr = tab; \ |
77 |
|
|
if (endch != 0 && endch != 255) { \ |
78 |
|
|
errno = EINVAL; \ |
79 |
|
|
return (-1); \ |
80 |
|
|
} \ |
81 |
|
|
} \ |
82 |
|
|
} |
83 |
|
|
|
84 |
|
|
int |
85 |
|
|
radixsort(const u_char **a, int n, const u_char *tab, u_int endch) |
86 |
|
|
{ |
87 |
|
|
const u_char *tr; |
88 |
|
|
int c; |
89 |
|
|
u_char tr0[256]; |
90 |
|
|
|
91 |
|
|
SETUP; |
92 |
|
|
r_sort_a(a, n, 0, tr, endch); |
93 |
|
|
return (0); |
94 |
|
|
} |
95 |
|
|
|
96 |
|
|
int |
97 |
|
|
sradixsort(const u_char **a, int n, const u_char *tab, u_int endch) |
98 |
|
|
{ |
99 |
|
|
const u_char *tr, **ta; |
100 |
|
|
int c; |
101 |
|
|
u_char tr0[256]; |
102 |
|
|
|
103 |
|
|
SETUP; |
104 |
|
|
if (n < THRESHOLD) |
105 |
|
|
simplesort(a, n, 0, tr, endch); |
106 |
|
|
else { |
107 |
|
|
if ((ta = calloc(n, sizeof(a))) == NULL) |
108 |
|
|
return (-1); |
109 |
|
|
r_sort_b(a, ta, n, 0, tr, endch); |
110 |
|
|
free(ta); |
111 |
|
|
} |
112 |
|
|
return (0); |
113 |
|
|
} |
114 |
|
|
|
115 |
|
|
#define empty(s) (s >= sp) |
116 |
|
|
#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si |
117 |
|
|
#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i |
118 |
|
|
#define swap(a, b, t) t = a, a = b, b = t |
119 |
|
|
|
120 |
|
|
/* Unstable, in-place sort. */ |
121 |
|
|
void |
122 |
|
|
r_sort_a(const u_char **a, int n, int i, const u_char *tr, u_int endch) |
123 |
|
|
{ |
124 |
|
|
static int count[256], nc, bmin; |
125 |
|
|
int c; |
126 |
|
|
const u_char **ak, *r; |
127 |
|
|
stack s[SIZE], *sp, *sp0, *sp1, temp; |
128 |
|
|
int *cp, bigc; |
129 |
|
|
const u_char **an, *t, **aj, **top[256]; |
130 |
|
|
|
131 |
|
|
/* Set up stack. */ |
132 |
|
|
sp = s; |
133 |
|
|
push(a, n, i); |
134 |
|
|
while (!empty(s)) { |
135 |
|
|
pop(a, n, i); |
136 |
|
|
if (n < THRESHOLD) { |
137 |
|
|
simplesort(a, n, i, tr, endch); |
138 |
|
|
continue; |
139 |
|
|
} |
140 |
|
|
an = a + n; |
141 |
|
|
|
142 |
|
|
/* Make character histogram. */ |
143 |
|
|
if (nc == 0) { |
144 |
|
|
bmin = 255; /* First occupied bin, excluding eos. */ |
145 |
|
|
for (ak = a; ak < an;) { |
146 |
|
|
c = tr[(*ak++)[i]]; |
147 |
|
|
if (++count[c] == 1 && c != endch) { |
148 |
|
|
if (c < bmin) |
149 |
|
|
bmin = c; |
150 |
|
|
nc++; |
151 |
|
|
} |
152 |
|
|
} |
153 |
|
|
if (sp + nc > s + SIZE) { /* Get more stack. */ |
154 |
|
|
r_sort_a(a, n, i, tr, endch); |
155 |
|
|
continue; |
156 |
|
|
} |
157 |
|
|
} |
158 |
|
|
|
159 |
|
|
/* |
160 |
|
|
* Set top[]; push incompletely sorted bins onto stack. |
161 |
|
|
* top[] = pointers to last out-of-place element in bins. |
162 |
|
|
* count[] = counts of elements in bins. |
163 |
|
|
* Before permuting: top[c-1] + count[c] = top[c]; |
164 |
|
|
* during deal: top[c] counts down to top[c-1]. |
165 |
|
|
*/ |
166 |
|
|
sp0 = sp1 = sp; /* Stack position of biggest bin. */ |
167 |
|
|
bigc = 2; /* Size of biggest bin. */ |
168 |
|
|
if (endch == 0) /* Special case: set top[eos]. */ |
169 |
|
|
top[0] = ak = a + count[0]; |
170 |
|
|
else { |
171 |
|
|
ak = a; |
172 |
|
|
top[255] = an; |
173 |
|
|
} |
174 |
|
|
for (cp = count + bmin; nc > 0; cp++) { |
175 |
|
|
while (*cp == 0) /* Find next non-empty pile. */ |
176 |
|
|
cp++; |
177 |
|
|
if (*cp > 1) { |
178 |
|
|
if (*cp > bigc) { |
179 |
|
|
bigc = *cp; |
180 |
|
|
sp1 = sp; |
181 |
|
|
} |
182 |
|
|
push(ak, *cp, i+1); |
183 |
|
|
} |
184 |
|
|
top[cp-count] = ak += *cp; |
185 |
|
|
nc--; |
186 |
|
|
} |
187 |
|
|
swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */ |
188 |
|
|
|
189 |
|
|
/* |
190 |
|
|
* Permute misplacements home. Already home: everything |
191 |
|
|
* before aj, and in bin[c], items from top[c] on. |
192 |
|
|
* Inner loop: |
193 |
|
|
* r = next element to put in place; |
194 |
|
|
* ak = top[r[i]] = location to put the next element. |
195 |
|
|
* aj = bottom of 1st disordered bin. |
196 |
|
|
* Outer loop: |
197 |
|
|
* Once the 1st disordered bin is done, ie. aj >= ak, |
198 |
|
|
* aj<-aj + count[c] connects the bins in a linked list; |
199 |
|
|
* reset count[c]. |
200 |
|
|
*/ |
201 |
|
|
for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0) |
202 |
|
|
for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);) |
203 |
|
|
swap(*ak, r, t); |
204 |
|
|
} |
205 |
|
|
} |
206 |
|
|
|
207 |
|
|
/* Stable sort, requiring additional memory. */ |
208 |
|
|
void |
209 |
|
|
r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr, |
210 |
|
|
u_int endch) |
211 |
|
|
{ |
212 |
|
|
static int count[256], nc, bmin; |
213 |
|
|
int c; |
214 |
|
|
const u_char **ak, **ai; |
215 |
|
|
stack s[512], *sp, *sp0, *sp1, temp; |
216 |
|
|
const u_char **top[256]; |
217 |
|
|
int *cp, bigc; |
218 |
|
|
|
219 |
|
|
sp = s; |
220 |
|
|
push(a, n, i); |
221 |
|
|
while (!empty(s)) { |
222 |
|
|
pop(a, n, i); |
223 |
|
|
if (n < THRESHOLD) { |
224 |
|
|
simplesort(a, n, i, tr, endch); |
225 |
|
|
continue; |
226 |
|
|
} |
227 |
|
|
|
228 |
|
|
if (nc == 0) { |
229 |
|
|
bmin = 255; |
230 |
|
|
for (ak = a + n; --ak >= a;) { |
231 |
|
|
c = tr[(*ak)[i]]; |
232 |
|
|
if (++count[c] == 1 && c != endch) { |
233 |
|
|
if (c < bmin) |
234 |
|
|
bmin = c; |
235 |
|
|
nc++; |
236 |
|
|
} |
237 |
|
|
} |
238 |
|
|
if (sp + nc > s + SIZE) { |
239 |
|
|
r_sort_b(a, ta, n, i, tr, endch); |
240 |
|
|
continue; |
241 |
|
|
} |
242 |
|
|
} |
243 |
|
|
|
244 |
|
|
sp0 = sp1 = sp; |
245 |
|
|
bigc = 2; |
246 |
|
|
if (endch == 0) { |
247 |
|
|
top[0] = ak = a + count[0]; |
248 |
|
|
count[0] = 0; |
249 |
|
|
} else { |
250 |
|
|
ak = a; |
251 |
|
|
top[255] = a + n; |
252 |
|
|
count[255] = 0; |
253 |
|
|
} |
254 |
|
|
for (cp = count + bmin; nc > 0; cp++) { |
255 |
|
|
while (*cp == 0) |
256 |
|
|
cp++; |
257 |
|
|
if ((c = *cp) > 1) { |
258 |
|
|
if (c > bigc) { |
259 |
|
|
bigc = c; |
260 |
|
|
sp1 = sp; |
261 |
|
|
} |
262 |
|
|
push(ak, c, i+1); |
263 |
|
|
} |
264 |
|
|
top[cp-count] = ak += c; |
265 |
|
|
*cp = 0; /* Reset count[]. */ |
266 |
|
|
nc--; |
267 |
|
|
} |
268 |
|
|
swap(*sp0, *sp1, temp); |
269 |
|
|
|
270 |
|
|
for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */ |
271 |
|
|
*--ak = *--ai; |
272 |
|
|
for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ |
273 |
|
|
*--top[tr[(*ak)[i]]] = *ak; |
274 |
|
|
} |
275 |
|
|
} |
276 |
|
|
|
277 |
|
|
static __inline void |
278 |
|
|
simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch) |
279 |
|
|
/* insertion sort */ |
280 |
|
|
{ |
281 |
|
|
u_char ch; |
282 |
|
|
const u_char **ak, **ai, *s, *t; |
283 |
|
|
|
284 |
|
|
for (ak = a+1; --n >= 1; ak++) |
285 |
|
|
for (ai = ak; ai > a; ai--) { |
286 |
|
|
for (s = ai[0] + b, t = ai[-1] + b; |
287 |
|
|
(ch = tr[*s]) != endch; s++, t++) |
288 |
|
|
if (ch != tr[*t]) |
289 |
|
|
break; |
290 |
|
|
if (ch >= tr[*t]) |
291 |
|
|
break; |
292 |
|
|
swap(ai[0], ai[-1], s); |
293 |
|
|
} |
294 |
|
|
} |