1 |
|
|
/* $OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> |
5 |
|
|
* Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> |
6 |
|
|
* All rights reserved. |
7 |
|
|
* |
8 |
|
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
|
* modification, are permitted provided that the following conditions |
10 |
|
|
* are met: |
11 |
|
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
|
* documentation and/or other materials provided with the distribution. |
16 |
|
|
* |
17 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
|
* SUCH DAMAGE. |
28 |
|
|
*/ |
29 |
|
|
|
30 |
|
|
#include <sys/types.h> |
31 |
|
|
|
32 |
|
|
#include <errno.h> |
33 |
|
|
#include <err.h> |
34 |
|
|
#include <langinfo.h> |
35 |
|
|
#include <limits.h> |
36 |
|
|
#include <math.h> |
37 |
|
|
#include <md5.h> |
38 |
|
|
#include <stdlib.h> |
39 |
|
|
#include <string.h> |
40 |
|
|
#include <wchar.h> |
41 |
|
|
#include <wctype.h> |
42 |
|
|
|
43 |
|
|
#include "coll.h" |
44 |
|
|
#include "vsort.h" |
45 |
|
|
|
46 |
|
|
struct key_specs *keys; |
47 |
|
|
size_t keys_num = 0; |
48 |
|
|
|
49 |
|
|
wint_t symbol_decimal_point = L'.'; |
50 |
|
|
/* there is no default thousands separator in collate rules: */ |
51 |
|
|
wint_t symbol_thousands_sep = 0; |
52 |
|
|
wint_t symbol_negative_sign = L'-'; |
53 |
|
|
wint_t symbol_positive_sign = L'+'; |
54 |
|
|
|
55 |
|
|
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); |
56 |
|
|
static int gnumcoll(struct key_value*, struct key_value *, size_t offset); |
57 |
|
|
static int monthcoll(struct key_value*, struct key_value *, size_t offset); |
58 |
|
|
static int numcoll(struct key_value*, struct key_value *, size_t offset); |
59 |
|
|
static int hnumcoll(struct key_value*, struct key_value *, size_t offset); |
60 |
|
|
static int randomcoll(struct key_value*, struct key_value *, size_t offset); |
61 |
|
|
static int versioncoll(struct key_value*, struct key_value *, size_t offset); |
62 |
|
|
|
63 |
|
|
/* |
64 |
|
|
* Allocate keys array |
65 |
|
|
*/ |
66 |
|
|
struct keys_array * |
67 |
|
|
keys_array_alloc(void) |
68 |
|
|
{ |
69 |
|
|
struct keys_array *ka; |
70 |
|
|
size_t sz; |
71 |
|
|
|
72 |
|
|
sz = keys_array_size(); |
73 |
|
|
ka = sort_calloc(1, sz); |
74 |
|
|
|
75 |
|
|
return ka; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
/* |
79 |
|
|
* Calculate whether we need key hint space |
80 |
|
|
*/ |
81 |
|
|
static size_t |
82 |
|
|
key_hint_size(void) |
83 |
|
11562 |
{ |
84 |
✗✓ |
11562 |
return need_hint ? sizeof(struct key_hint) : 0; |
85 |
|
|
} |
86 |
|
|
|
87 |
|
|
/* |
88 |
|
|
* Calculate keys array size |
89 |
|
|
*/ |
90 |
|
|
size_t |
91 |
|
|
keys_array_size(void) |
92 |
|
11562 |
{ |
93 |
|
11562 |
return keys_num * (sizeof(struct key_value) + key_hint_size()); |
94 |
|
|
} |
95 |
|
|
|
96 |
|
|
/* |
97 |
|
|
* Clean data of keys array |
98 |
|
|
*/ |
99 |
|
|
void |
100 |
|
|
clean_keys_array(const struct bwstring *s, struct keys_array *ka) |
101 |
|
3854 |
{ |
102 |
✓✗ |
3854 |
if (ka) { |
103 |
|
|
size_t i; |
104 |
|
|
|
105 |
✓✓ |
7708 |
for (i = 0; i < keys_num; ++i) |
106 |
✗✓✗✗
|
3854 |
if (ka->key[i].k && ka->key[i].k != s) |
107 |
|
|
bwsfree(ka->key[i].k); |
108 |
|
3854 |
memset(ka, 0, keys_array_size()); |
109 |
|
|
} |
110 |
|
3854 |
} |
111 |
|
|
|
112 |
|
|
/* |
113 |
|
|
* Set value of a key in the keys set |
114 |
|
|
*/ |
115 |
|
|
void |
116 |
|
|
set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) |
117 |
|
3854 |
{ |
118 |
✓✗✓✗
|
3854 |
if (ka && keys_num > ind) { |
119 |
|
|
struct key_value *kv; |
120 |
|
|
|
121 |
|
3854 |
kv = &(ka->key[ind]); |
122 |
|
|
|
123 |
✓✗ |
3854 |
if (kv->k != s) |
124 |
|
3854 |
bwsfree(kv->k); |
125 |
|
3854 |
kv->k = s; |
126 |
|
|
} |
127 |
|
3854 |
} |
128 |
|
|
|
129 |
|
|
/* |
130 |
|
|
* Initialize a sort list item |
131 |
|
|
*/ |
132 |
|
|
struct sort_list_item * |
133 |
|
|
sort_list_item_alloc(void) |
134 |
|
3854 |
{ |
135 |
|
|
struct sort_list_item *si; |
136 |
|
|
size_t sz; |
137 |
|
|
|
138 |
|
3854 |
sz = sizeof(struct sort_list_item) + keys_array_size(); |
139 |
|
3854 |
si = sort_calloc(1, sz); |
140 |
|
|
|
141 |
|
3854 |
return si; |
142 |
|
|
} |
143 |
|
|
|
144 |
|
|
size_t |
145 |
|
|
sort_list_item_size(struct sort_list_item *si) |
146 |
|
3854 |
{ |
147 |
|
3854 |
size_t i, ret = 0; |
148 |
|
|
|
149 |
✓✗ |
3854 |
if (si) { |
150 |
|
3854 |
ret = sizeof(struct sort_list_item) + keys_array_size(); |
151 |
✓✗ |
3854 |
if (si->str) |
152 |
|
3854 |
ret += bws_memsize(si->str); |
153 |
✓✓ |
7708 |
for (i = 0; i < keys_num; ++i) { |
154 |
|
|
struct key_value *kv; |
155 |
|
|
|
156 |
|
3854 |
kv = &(si->ka.key[i]); |
157 |
|
|
|
158 |
✗✓ |
3854 |
if (kv->k != si->str) |
159 |
|
|
ret += bws_memsize(kv->k); |
160 |
|
|
} |
161 |
|
|
} |
162 |
|
3854 |
return ret; |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
/* |
166 |
|
|
* Calculate key for a sort list item |
167 |
|
|
*/ |
168 |
|
|
static void |
169 |
|
|
sort_list_item_make_key(struct sort_list_item *si) |
170 |
|
3854 |
{ |
171 |
|
3854 |
preproc(si->str, &(si->ka)); |
172 |
|
3854 |
} |
173 |
|
|
|
174 |
|
|
/* |
175 |
|
|
* Set value of a sort list item. |
176 |
|
|
* Return combined string and keys memory size. |
177 |
|
|
*/ |
178 |
|
|
void |
179 |
|
|
sort_list_item_set(struct sort_list_item *si, struct bwstring *str) |
180 |
|
3854 |
{ |
181 |
✓✗ |
3854 |
if (si) { |
182 |
|
3854 |
clean_keys_array(si->str, &(si->ka)); |
183 |
✗✓ |
3854 |
if (si->str) { |
184 |
|
|
if (si->str == str) { |
185 |
|
|
/* we are trying to reset the same string */ |
186 |
|
|
return; |
187 |
|
|
} else { |
188 |
|
|
bwsfree(si->str); |
189 |
|
|
si->str = NULL; |
190 |
|
|
} |
191 |
|
|
} |
192 |
|
3854 |
si->str = str; |
193 |
|
3854 |
sort_list_item_make_key(si); |
194 |
|
|
} |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
/* |
198 |
|
|
* De-allocate a sort list item object memory |
199 |
|
|
*/ |
200 |
|
|
void |
201 |
|
|
sort_list_item_clean(struct sort_list_item *si) |
202 |
|
|
{ |
203 |
|
|
if (si) { |
204 |
|
|
clean_keys_array(si->str, &(si->ka)); |
205 |
|
|
if (si->str) { |
206 |
|
|
bwsfree(si->str); |
207 |
|
|
si->str = NULL; |
208 |
|
|
} |
209 |
|
|
} |
210 |
|
|
} |
211 |
|
|
|
212 |
|
|
/* |
213 |
|
|
* Skip columns according to specs |
214 |
|
|
*/ |
215 |
|
|
static size_t |
216 |
|
|
skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, |
217 |
|
|
bool skip_blanks, bool *empty_key) |
218 |
|
|
{ |
219 |
|
|
if (cols < 1) |
220 |
|
|
return BWSLEN(s) + 1; |
221 |
|
|
|
222 |
|
|
if (skip_blanks) |
223 |
|
|
while (start < BWSLEN(s) && iswblank(BWS_GET(s, start))) |
224 |
|
|
++start; |
225 |
|
|
|
226 |
|
|
while (start < BWSLEN(s) && cols > 1) { |
227 |
|
|
--cols; |
228 |
|
|
++start; |
229 |
|
|
} |
230 |
|
|
|
231 |
|
|
if (start >= BWSLEN(s)) |
232 |
|
|
*empty_key = true; |
233 |
|
|
|
234 |
|
|
return start; |
235 |
|
|
} |
236 |
|
|
|
237 |
|
|
/* |
238 |
|
|
* Skip fields according to specs |
239 |
|
|
*/ |
240 |
|
|
static size_t |
241 |
|
|
skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) |
242 |
|
|
{ |
243 |
|
|
if (fields < 2) { |
244 |
|
|
if (BWSLEN(s) == 0) |
245 |
|
|
*empty_field = true; |
246 |
|
|
return 0; |
247 |
|
|
} else if (!(sort_opts_vals.tflag)) { |
248 |
|
|
size_t cpos = 0; |
249 |
|
|
bool pb = true; |
250 |
|
|
|
251 |
|
|
while (cpos < BWSLEN(s)) { |
252 |
|
|
bool isblank; |
253 |
|
|
|
254 |
|
|
isblank = iswblank(BWS_GET(s, cpos)); |
255 |
|
|
|
256 |
|
|
if (isblank && !pb) { |
257 |
|
|
--fields; |
258 |
|
|
if (fields <= 1) |
259 |
|
|
return cpos; |
260 |
|
|
} |
261 |
|
|
pb = isblank; |
262 |
|
|
++cpos; |
263 |
|
|
} |
264 |
|
|
if (fields > 1) |
265 |
|
|
*empty_field = true; |
266 |
|
|
return cpos; |
267 |
|
|
} else { |
268 |
|
|
size_t cpos = 0; |
269 |
|
|
|
270 |
|
|
while (cpos < BWSLEN(s)) { |
271 |
|
|
if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) { |
272 |
|
|
--fields; |
273 |
|
|
if (fields <= 1) |
274 |
|
|
return cpos + 1; |
275 |
|
|
} |
276 |
|
|
++cpos; |
277 |
|
|
} |
278 |
|
|
if (fields > 1) |
279 |
|
|
*empty_field = true; |
280 |
|
|
return cpos; |
281 |
|
|
} |
282 |
|
|
} |
283 |
|
|
|
284 |
|
|
/* |
285 |
|
|
* Find fields start |
286 |
|
|
*/ |
287 |
|
|
static void |
288 |
|
|
find_field_start(const struct bwstring *s, struct key_specs *ks, |
289 |
|
|
size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) |
290 |
|
|
{ |
291 |
|
|
*field_start = skip_fields_to_start(s, ks->f1, empty_field); |
292 |
|
|
if (!*empty_field) |
293 |
|
|
*key_start = skip_cols_to_start(s, ks->c1, *field_start, |
294 |
|
|
ks->pos1b, empty_key); |
295 |
|
|
else |
296 |
|
|
*empty_key = true; |
297 |
|
|
} |
298 |
|
|
|
299 |
|
|
/* |
300 |
|
|
* Find end key position |
301 |
|
|
*/ |
302 |
|
|
static size_t |
303 |
|
|
find_field_end(const struct bwstring *s, struct key_specs *ks) |
304 |
|
|
{ |
305 |
|
|
size_t f2, next_field_start, pos_end; |
306 |
|
|
bool empty_field, empty_key; |
307 |
|
|
|
308 |
|
|
empty_field = false; |
309 |
|
|
empty_key = false; |
310 |
|
|
f2 = ks->f2; |
311 |
|
|
|
312 |
|
|
if (f2 == 0) |
313 |
|
|
return BWSLEN(s) + 1; |
314 |
|
|
else { |
315 |
|
|
if (ks->c2 == 0) { |
316 |
|
|
next_field_start = skip_fields_to_start(s, f2 + 1, |
317 |
|
|
&empty_field); |
318 |
|
|
if ((next_field_start > 0) && sort_opts_vals.tflag && |
319 |
|
|
((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, |
320 |
|
|
next_field_start - 1))) |
321 |
|
|
--next_field_start; |
322 |
|
|
} else |
323 |
|
|
next_field_start = skip_fields_to_start(s, f2, |
324 |
|
|
&empty_field); |
325 |
|
|
} |
326 |
|
|
|
327 |
|
|
if (empty_field || (next_field_start >= BWSLEN(s))) |
328 |
|
|
return BWSLEN(s) + 1; |
329 |
|
|
|
330 |
|
|
if (ks->c2) { |
331 |
|
|
pos_end = skip_cols_to_start(s, ks->c2, next_field_start, |
332 |
|
|
ks->pos2b, &empty_key); |
333 |
|
|
if (pos_end < BWSLEN(s)) |
334 |
|
|
++pos_end; |
335 |
|
|
} else |
336 |
|
|
pos_end = next_field_start; |
337 |
|
|
|
338 |
|
|
return pos_end; |
339 |
|
|
} |
340 |
|
|
|
341 |
|
|
/* |
342 |
|
|
* Cut a field according to the key specs |
343 |
|
|
*/ |
344 |
|
|
static struct bwstring * |
345 |
|
|
cut_field(const struct bwstring *s, struct key_specs *ks) |
346 |
|
|
{ |
347 |
|
|
struct bwstring *ret = NULL; |
348 |
|
|
|
349 |
|
|
if (s && ks) { |
350 |
|
|
size_t field_start, key_end, key_start, sz; |
351 |
|
|
bool empty_field, empty_key; |
352 |
|
|
|
353 |
|
|
field_start = 0; |
354 |
|
|
key_start = 0; |
355 |
|
|
empty_field = false; |
356 |
|
|
empty_key = false; |
357 |
|
|
|
358 |
|
|
find_field_start(s, ks, &field_start, &key_start, |
359 |
|
|
&empty_field, &empty_key); |
360 |
|
|
|
361 |
|
|
if (empty_key) |
362 |
|
|
sz = 0; |
363 |
|
|
else { |
364 |
|
|
key_end = find_field_end(s, ks); |
365 |
|
|
sz = (key_end < key_start) ? 0 : (key_end - key_start); |
366 |
|
|
} |
367 |
|
|
|
368 |
|
|
ret = bwsalloc(sz); |
369 |
|
|
if (sz) |
370 |
|
|
bwsnocpy(ret, s, key_start, sz); |
371 |
|
|
} else |
372 |
|
|
ret = bwsalloc(0); |
373 |
|
|
|
374 |
|
|
return ret; |
375 |
|
|
} |
376 |
|
|
|
377 |
|
|
/* |
378 |
|
|
* Preprocesses a line applying the necessary transformations |
379 |
|
|
* specified by command line options and returns the preprocessed |
380 |
|
|
* string, which can be used to compare. |
381 |
|
|
*/ |
382 |
|
|
int |
383 |
|
|
preproc(struct bwstring *s, struct keys_array *ka) |
384 |
|
3854 |
{ |
385 |
✗✓ |
3854 |
if (sort_opts_vals.kflag) { |
386 |
|
|
size_t i; |
387 |
|
|
for (i = 0; i < keys_num; i++) { |
388 |
|
|
struct bwstring *key; |
389 |
|
|
struct key_specs *kspecs; |
390 |
|
|
struct sort_mods *sm; |
391 |
|
|
|
392 |
|
|
kspecs = &(keys[i]); |
393 |
|
|
key = cut_field(s, kspecs); |
394 |
|
|
|
395 |
|
|
sm = &(kspecs->sm); |
396 |
|
|
if (sm->dflag) |
397 |
|
|
key = dictionary_order(key); |
398 |
|
|
else if (sm->iflag) |
399 |
|
|
key = ignore_nonprinting(key); |
400 |
|
|
if (sm->fflag || sm->Mflag) |
401 |
|
|
key = ignore_case(key); |
402 |
|
|
|
403 |
|
|
set_key_on_keys_array(ka, key, i); |
404 |
|
|
} |
405 |
|
|
} else { |
406 |
|
3854 |
struct bwstring *ret = NULL; |
407 |
|
3854 |
struct sort_mods *sm = default_sort_mods; |
408 |
|
|
|
409 |
|
|
#ifdef GNUSORT_COMPATIBILITY |
410 |
|
|
if (sm->bflag) { |
411 |
|
|
if (ret == NULL) |
412 |
|
|
ret = bwsdup(s); |
413 |
|
|
ret = ignore_leading_blanks(ret); |
414 |
|
|
} |
415 |
|
|
#endif |
416 |
✗✓ |
3854 |
if (sm->dflag) { |
417 |
|
|
if (ret == NULL) |
418 |
|
|
ret = bwsdup(s); |
419 |
|
|
ret = dictionary_order(ret); |
420 |
✗✓ |
3854 |
} else if (sm->iflag) { |
421 |
|
|
if (ret == NULL) |
422 |
|
|
ret = bwsdup(s); |
423 |
|
|
ret = ignore_nonprinting(ret); |
424 |
|
|
} |
425 |
✓✗✗✓
|
3854 |
if (sm->fflag || sm->Mflag) { |
426 |
|
|
if (ret == NULL) |
427 |
|
|
ret = bwsdup(s); |
428 |
|
|
ret = ignore_case(ret); |
429 |
|
|
} |
430 |
✓✗ |
3854 |
if (ret == NULL) |
431 |
|
3854 |
set_key_on_keys_array(ka, s, 0); |
432 |
|
|
else |
433 |
|
|
set_key_on_keys_array(ka, ret, 0); |
434 |
|
|
} |
435 |
|
|
|
436 |
|
3854 |
return 0; |
437 |
|
|
} |
438 |
|
|
|
439 |
|
|
cmpcoll_t |
440 |
|
|
get_sort_func(struct sort_mods *sm) |
441 |
|
34 |
{ |
442 |
✗✓ |
34 |
if (sm->nflag) |
443 |
|
|
return numcoll; |
444 |
✗✓ |
34 |
else if (sm->hflag) |
445 |
|
|
return hnumcoll; |
446 |
✗✓ |
34 |
else if (sm->gflag) |
447 |
|
|
return gnumcoll; |
448 |
✗✓ |
34 |
else if (sm->Mflag) |
449 |
|
|
return monthcoll; |
450 |
✓✓ |
34 |
else if (sm->Rflag) |
451 |
|
8 |
return randomcoll; |
452 |
✗✓ |
26 |
else if (sm->Vflag) |
453 |
|
|
return versioncoll; |
454 |
|
|
else |
455 |
|
26 |
return wstrcoll; |
456 |
|
|
} |
457 |
|
|
|
458 |
|
|
/* |
459 |
|
|
* Compares the given strings. Returns a positive number if |
460 |
|
|
* the first precedes the second, a negative number if the second is |
461 |
|
|
* the preceding one, and zero if they are equal. This function calls |
462 |
|
|
* the underlying collate functions, which done the actual comparison. |
463 |
|
|
*/ |
464 |
|
|
int |
465 |
|
|
key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) |
466 |
|
7451 |
{ |
467 |
|
|
struct sort_mods *sm; |
468 |
|
7451 |
int res = 0; |
469 |
|
|
size_t i; |
470 |
|
|
|
471 |
✓✓ |
7543 |
for (i = 0; i < keys_num; ++i) { |
472 |
|
7451 |
sm = &(keys[i].sm); |
473 |
|
|
|
474 |
✗✓ |
7451 |
if (sm->rflag) |
475 |
|
|
res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); |
476 |
|
|
else |
477 |
|
7451 |
res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); |
478 |
|
|
|
479 |
✓✓ |
7451 |
if (res) |
480 |
|
7359 |
break; |
481 |
|
|
|
482 |
|
|
/* offset applies to only the first key */ |
483 |
|
92 |
offset = 0; |
484 |
|
|
} |
485 |
|
7451 |
return res; |
486 |
|
|
} |
487 |
|
|
|
488 |
|
|
/* |
489 |
|
|
* Compare two strings. |
490 |
|
|
* Plain symbol-by-symbol comparison. |
491 |
|
|
*/ |
492 |
|
|
int |
493 |
|
|
top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) |
494 |
|
|
{ |
495 |
|
|
if (default_sort_mods->rflag) { |
496 |
|
|
const struct bwstring *tmp; |
497 |
|
|
|
498 |
|
|
tmp = s1; |
499 |
|
|
s1 = s2; |
500 |
|
|
s2 = tmp; |
501 |
|
|
} |
502 |
|
|
|
503 |
|
|
return bwscoll(s1, s2, 0); |
504 |
|
|
} |
505 |
|
|
|
506 |
|
|
/* |
507 |
|
|
* Compare a string and a sort list item, according to the sort specs. |
508 |
|
|
*/ |
509 |
|
|
int |
510 |
|
|
str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) |
511 |
|
|
{ |
512 |
|
|
struct keys_array *ka1; |
513 |
|
|
int ret = 0; |
514 |
|
|
|
515 |
|
|
ka1 = keys_array_alloc(); |
516 |
|
|
|
517 |
|
|
preproc(str1, ka1); |
518 |
|
|
|
519 |
|
|
sort_list_item_make_key(*ss2); |
520 |
|
|
|
521 |
|
|
if (debug_sort) { |
522 |
|
|
bwsprintf(stdout, str1, "; s1=<", ">"); |
523 |
|
|
bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); |
524 |
|
|
} |
525 |
|
|
|
526 |
|
|
ret = key_coll(ka1, &((*ss2)->ka), 0); |
527 |
|
|
|
528 |
|
|
if (debug_sort) |
529 |
|
|
printf("; cmp1=%d", ret); |
530 |
|
|
|
531 |
|
|
clean_keys_array(str1, ka1); |
532 |
|
|
sort_free(ka1); |
533 |
|
|
|
534 |
|
|
if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { |
535 |
|
|
ret = top_level_str_coll(str1, ((*ss2)->str)); |
536 |
|
|
if (debug_sort) |
537 |
|
|
printf("; cmp2=%d", ret); |
538 |
|
|
} |
539 |
|
|
|
540 |
|
|
if (debug_sort) |
541 |
|
|
putchar('\n'); |
542 |
|
|
|
543 |
|
|
return ret; |
544 |
|
|
} |
545 |
|
|
|
546 |
|
|
/* |
547 |
|
|
* Compare two sort list items, according to the sort specs. |
548 |
|
|
*/ |
549 |
|
|
int |
550 |
|
|
list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, |
551 |
|
|
size_t offset) |
552 |
|
7451 |
{ |
553 |
|
|
int ret; |
554 |
|
|
|
555 |
|
7451 |
ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); |
556 |
|
|
|
557 |
✗✓ |
7451 |
if (debug_sort) { |
558 |
|
|
if (offset) |
559 |
|
|
printf("; offset=%zu", offset); |
560 |
|
|
bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); |
561 |
|
|
bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); |
562 |
|
|
printf("; cmp1=%d\n", ret); |
563 |
|
|
} |
564 |
|
|
|
565 |
✓✓ |
7451 |
if (ret) |
566 |
|
7359 |
return ret; |
567 |
|
|
|
568 |
✗✓✗✗
|
92 |
if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { |
569 |
|
|
ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); |
570 |
|
|
if (debug_sort) |
571 |
|
|
printf("; cmp2=%d\n", ret); |
572 |
|
|
} |
573 |
|
|
|
574 |
|
92 |
return ret; |
575 |
|
|
} |
576 |
|
|
|
577 |
|
|
/* |
578 |
|
|
* Compare two sort list items, according to the sort specs. |
579 |
|
|
*/ |
580 |
|
|
int |
581 |
|
|
list_coll(const void *ss1, const void *ss2) |
582 |
|
46 |
{ |
583 |
|
46 |
return list_coll_offset((struct sort_list_item **)ss1, |
584 |
|
|
(struct sort_list_item **)ss2, 0); |
585 |
|
|
} |
586 |
|
|
|
587 |
|
|
#define LSCDEF(N) \ |
588 |
|
|
static int \ |
589 |
|
|
list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ |
590 |
|
|
{ \ |
591 |
|
|
\ |
592 |
|
|
return list_coll_offset(ss1, ss2, N); \ |
593 |
|
|
} |
594 |
|
|
|
595 |
|
|
LSCDEF(0) |
596 |
|
425 |
LSCDEF(1) |
597 |
|
281 |
LSCDEF(2) |
598 |
|
124 |
LSCDEF(3) |
599 |
|
973 |
LSCDEF(4) |
600 |
|
|
LSCDEF(5) |
601 |
|
|
LSCDEF(6) |
602 |
|
618 |
LSCDEF(7) |
603 |
|
|
LSCDEF(8) |
604 |
|
|
LSCDEF(9) |
605 |
|
|
LSCDEF(10) |
606 |
|
|
LSCDEF(11) |
607 |
|
4048 |
LSCDEF(12) |
608 |
|
920 |
LSCDEF(13) |
609 |
|
|
LSCDEF(14) |
610 |
|
|
LSCDEF(15) |
611 |
|
|
LSCDEF(16) |
612 |
|
|
LSCDEF(17) |
613 |
|
|
LSCDEF(18) |
614 |
|
|
LSCDEF(19) |
615 |
|
|
LSCDEF(20) |
616 |
|
|
|
617 |
|
|
listcoll_t |
618 |
|
|
get_list_call_func(size_t offset) |
619 |
|
328 |
{ |
620 |
|
|
static const listcoll_t lsarray[] = { list_coll_0, list_coll_1, |
621 |
|
|
list_coll_2, list_coll_3, list_coll_4, list_coll_5, |
622 |
|
|
list_coll_6, list_coll_7, list_coll_8, list_coll_9, |
623 |
|
|
list_coll_10, list_coll_11, list_coll_12, list_coll_13, |
624 |
|
|
list_coll_14, list_coll_15, list_coll_16, list_coll_17, |
625 |
|
|
list_coll_18, list_coll_19, list_coll_20 }; |
626 |
|
|
|
627 |
✓✗ |
328 |
if (offset <= 20) |
628 |
|
328 |
return lsarray[offset]; |
629 |
|
|
|
630 |
|
|
return list_coll_0; |
631 |
|
|
} |
632 |
|
|
|
633 |
|
|
/* |
634 |
|
|
* Compare two sort list items, only by their original string. |
635 |
|
|
*/ |
636 |
|
|
int |
637 |
|
|
list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) |
638 |
|
|
{ |
639 |
|
|
return top_level_str_coll(((*ss1)->str), ((*ss2)->str)); |
640 |
|
|
} |
641 |
|
|
|
642 |
|
|
/* |
643 |
|
|
* Maximum size of a number in the string (before or after decimal point) |
644 |
|
|
*/ |
645 |
|
|
#define MAX_NUM_SIZE (128) |
646 |
|
|
|
647 |
|
|
/* |
648 |
|
|
* Set suffix value |
649 |
|
|
*/ |
650 |
|
|
static void |
651 |
|
|
setsuffix(wchar_t c, unsigned char *si) |
652 |
|
|
{ |
653 |
|
|
switch (c){ |
654 |
|
|
case L'k': |
655 |
|
|
case L'K': |
656 |
|
|
*si = 1; |
657 |
|
|
break; |
658 |
|
|
case L'M': |
659 |
|
|
*si = 2; |
660 |
|
|
break; |
661 |
|
|
case L'G': |
662 |
|
|
*si = 3; |
663 |
|
|
break; |
664 |
|
|
case L'T': |
665 |
|
|
*si = 4; |
666 |
|
|
break; |
667 |
|
|
case L'P': |
668 |
|
|
*si = 5; |
669 |
|
|
break; |
670 |
|
|
case L'E': |
671 |
|
|
*si = 6; |
672 |
|
|
break; |
673 |
|
|
case L'Z': |
674 |
|
|
*si = 7; |
675 |
|
|
break; |
676 |
|
|
case L'Y': |
677 |
|
|
*si = 8; |
678 |
|
|
break; |
679 |
|
|
default: |
680 |
|
|
*si = 0; |
681 |
|
|
}; |
682 |
|
|
} |
683 |
|
|
|
684 |
|
|
/* |
685 |
|
|
* Read string s and parse the string into a fixed-decimal-point number. |
686 |
|
|
* sign equals -1 if the number is negative (explicit plus is not allowed, |
687 |
|
|
* according to GNU sort's "info sort". |
688 |
|
|
* The number part before decimal point is in the smain, after the decimal |
689 |
|
|
* point is in sfrac, tail is the pointer to the remainder of the string. |
690 |
|
|
*/ |
691 |
|
|
static int |
692 |
|
|
read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) |
693 |
|
|
{ |
694 |
|
|
bwstring_iterator s; |
695 |
|
|
|
696 |
|
|
s = bws_begin(s0); |
697 |
|
|
|
698 |
|
|
/* always end the fraction with zero, even if we have no fraction */ |
699 |
|
|
sfrac[0] = 0; |
700 |
|
|
|
701 |
|
|
while (iswblank(bws_get_iter_value(s))) |
702 |
|
|
s = bws_iterator_inc(s, 1); |
703 |
|
|
|
704 |
|
|
if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { |
705 |
|
|
*sign = -1; |
706 |
|
|
s = bws_iterator_inc(s, 1); |
707 |
|
|
} |
708 |
|
|
|
709 |
|
|
// This is '0', not '\0', do not change this |
710 |
|
|
while (iswdigit(bws_get_iter_value(s)) && |
711 |
|
|
(bws_get_iter_value(s) == L'0')) |
712 |
|
|
s = bws_iterator_inc(s, 1); |
713 |
|
|
|
714 |
|
|
while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { |
715 |
|
|
if (iswdigit(bws_get_iter_value(s))) { |
716 |
|
|
smain[*main_len] = bws_get_iter_value(s); |
717 |
|
|
s = bws_iterator_inc(s, 1); |
718 |
|
|
*main_len += 1; |
719 |
|
|
} else if (symbol_thousands_sep && |
720 |
|
|
(bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) |
721 |
|
|
s = bws_iterator_inc(s, 1); |
722 |
|
|
else |
723 |
|
|
break; |
724 |
|
|
} |
725 |
|
|
|
726 |
|
|
smain[*main_len] = 0; |
727 |
|
|
|
728 |
|
|
if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { |
729 |
|
|
s = bws_iterator_inc(s, 1); |
730 |
|
|
while (iswdigit(bws_get_iter_value(s)) && |
731 |
|
|
*frac_len < MAX_NUM_SIZE) { |
732 |
|
|
sfrac[*frac_len] = bws_get_iter_value(s); |
733 |
|
|
s = bws_iterator_inc(s, 1); |
734 |
|
|
*frac_len += 1; |
735 |
|
|
} |
736 |
|
|
sfrac[*frac_len] = 0; |
737 |
|
|
|
738 |
|
|
while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { |
739 |
|
|
--(*frac_len); |
740 |
|
|
sfrac[*frac_len] = L'\0'; |
741 |
|
|
} |
742 |
|
|
} |
743 |
|
|
|
744 |
|
|
setsuffix(bws_get_iter_value(s), si); |
745 |
|
|
|
746 |
|
|
if ((*main_len + *frac_len) == 0) |
747 |
|
|
*sign = 0; |
748 |
|
|
|
749 |
|
|
return 0; |
750 |
|
|
} |
751 |
|
|
|
752 |
|
|
/* |
753 |
|
|
* Implements string sort. |
754 |
|
|
*/ |
755 |
|
|
static int |
756 |
|
|
wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
757 |
|
7451 |
{ |
758 |
|
|
|
759 |
✗✓ |
7451 |
if (debug_sort) { |
760 |
|
|
if (offset) |
761 |
|
|
printf("; offset=%zu\n", offset); |
762 |
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
763 |
|
|
printf("(%zu)", BWSLEN(kv1->k)); |
764 |
|
|
bwsprintf(stdout, kv2->k, ", k2=<", ">"); |
765 |
|
|
printf("(%zu)", BWSLEN(kv2->k)); |
766 |
|
|
} |
767 |
|
|
|
768 |
|
7451 |
return bwscoll(kv1->k, kv2->k, offset); |
769 |
|
|
} |
770 |
|
|
|
771 |
|
|
/* |
772 |
|
|
* Compare two suffixes |
773 |
|
|
*/ |
774 |
|
|
static inline int |
775 |
|
|
cmpsuffix(unsigned char si1, unsigned char si2) |
776 |
|
|
{ |
777 |
|
|
return (char)si1 - (char)si2; |
778 |
|
|
} |
779 |
|
|
|
780 |
|
|
/* |
781 |
|
|
* Implements numeric sort for -n and -h. |
782 |
|
|
*/ |
783 |
|
|
static int |
784 |
|
|
numcoll_impl(struct key_value *kv1, struct key_value *kv2, |
785 |
|
|
size_t offset __unused, bool use_suffix) |
786 |
|
|
{ |
787 |
|
|
struct bwstring *s1, *s2; |
788 |
|
|
wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; |
789 |
|
|
wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; |
790 |
|
|
int cmp_res, sign1, sign2; |
791 |
|
|
size_t frac1, frac2, main1, main2; |
792 |
|
|
unsigned char SI1, SI2; |
793 |
|
|
bool e1, e2, key1_read, key2_read; |
794 |
|
|
|
795 |
|
|
s1 = kv1->k; |
796 |
|
|
s2 = kv2->k; |
797 |
|
|
sign1 = sign2 = 0; |
798 |
|
|
main1 = main2 = 0; |
799 |
|
|
frac1 = frac2 = 0; |
800 |
|
|
|
801 |
|
|
key1_read = key2_read = false; |
802 |
|
|
|
803 |
|
|
if (debug_sort) { |
804 |
|
|
bwsprintf(stdout, s1, "; k1=<", ">"); |
805 |
|
|
bwsprintf(stdout, s2, ", k2=<", ">"); |
806 |
|
|
} |
807 |
|
|
|
808 |
|
|
if (s1 == s2) |
809 |
|
|
return 0; |
810 |
|
|
|
811 |
|
|
if (kv1->hint->status == HS_UNINITIALIZED) { |
812 |
|
|
/* read the number from the string */ |
813 |
|
|
read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); |
814 |
|
|
key1_read = true; |
815 |
|
|
kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); |
816 |
|
|
if (main1 < 1 && frac1 < 1) |
817 |
|
|
kv1->hint->v.nh.empty=true; |
818 |
|
|
kv1->hint->v.nh.si = SI1; |
819 |
|
|
kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? |
820 |
|
|
HS_INITIALIZED : HS_ERROR; |
821 |
|
|
kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; |
822 |
|
|
} |
823 |
|
|
|
824 |
|
|
if (kv2->hint->status == HS_UNINITIALIZED) { |
825 |
|
|
/* read the number from the string */ |
826 |
|
|
read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); |
827 |
|
|
key2_read = true; |
828 |
|
|
kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); |
829 |
|
|
if (main2 < 1 && frac2 < 1) |
830 |
|
|
kv2->hint->v.nh.empty=true; |
831 |
|
|
kv2->hint->v.nh.si = SI2; |
832 |
|
|
kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? |
833 |
|
|
HS_INITIALIZED : HS_ERROR; |
834 |
|
|
kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; |
835 |
|
|
} |
836 |
|
|
|
837 |
|
|
if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == |
838 |
|
|
HS_INITIALIZED) { |
839 |
|
|
unsigned long long n1, n2; |
840 |
|
|
bool neg1, neg2; |
841 |
|
|
|
842 |
|
|
e1 = kv1->hint->v.nh.empty; |
843 |
|
|
e2 = kv2->hint->v.nh.empty; |
844 |
|
|
|
845 |
|
|
if (e1 && e2) |
846 |
|
|
return 0; |
847 |
|
|
|
848 |
|
|
neg1 = kv1->hint->v.nh.neg; |
849 |
|
|
neg2 = kv2->hint->v.nh.neg; |
850 |
|
|
|
851 |
|
|
if (neg1 && !neg2) |
852 |
|
|
return -1; |
853 |
|
|
if (neg2 && !neg1) |
854 |
|
|
return 1; |
855 |
|
|
|
856 |
|
|
if (e1) |
857 |
|
|
return neg2 ? 1 : -1; |
858 |
|
|
else if (e2) |
859 |
|
|
return neg1 ? -1 : 1; |
860 |
|
|
|
861 |
|
|
|
862 |
|
|
if (use_suffix) { |
863 |
|
|
cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); |
864 |
|
|
if (cmp_res) |
865 |
|
|
return neg1 ? -cmp_res : cmp_res; |
866 |
|
|
} |
867 |
|
|
|
868 |
|
|
n1 = kv1->hint->v.nh.n1; |
869 |
|
|
n2 = kv2->hint->v.nh.n1; |
870 |
|
|
if (n1 < n2) |
871 |
|
|
return neg1 ? 1 : -1; |
872 |
|
|
else if (n1 > n2) |
873 |
|
|
return neg1 ? -1 : 1; |
874 |
|
|
} |
875 |
|
|
|
876 |
|
|
/* read the numbers from the strings */ |
877 |
|
|
if (!key1_read) |
878 |
|
|
read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); |
879 |
|
|
if (!key2_read) |
880 |
|
|
read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); |
881 |
|
|
|
882 |
|
|
e1 = ((main1 + frac1) == 0); |
883 |
|
|
e2 = ((main2 + frac2) == 0); |
884 |
|
|
|
885 |
|
|
if (e1 && e2) |
886 |
|
|
return 0; |
887 |
|
|
|
888 |
|
|
/* we know the result if the signs are different */ |
889 |
|
|
if (sign1 < 0 && sign2 >= 0) |
890 |
|
|
return -1; |
891 |
|
|
if (sign1 >= 0 && sign2 < 0) |
892 |
|
|
return 1; |
893 |
|
|
|
894 |
|
|
if (e1) |
895 |
|
|
return (sign2 < 0) ? +1 : -1; |
896 |
|
|
else if (e2) |
897 |
|
|
return (sign1 < 0) ? -1 : 1; |
898 |
|
|
|
899 |
|
|
if (use_suffix) { |
900 |
|
|
cmp_res = cmpsuffix(SI1, SI2); |
901 |
|
|
if (cmp_res) |
902 |
|
|
return (sign1 < 0) ? -cmp_res : cmp_res; |
903 |
|
|
} |
904 |
|
|
|
905 |
|
|
/* if both numbers are empty assume that the strings are equal */ |
906 |
|
|
if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) |
907 |
|
|
return 0; |
908 |
|
|
|
909 |
|
|
/* |
910 |
|
|
* if the main part is of different size, we know the result |
911 |
|
|
* (because the leading zeros are removed) |
912 |
|
|
*/ |
913 |
|
|
if (main1 < main2) |
914 |
|
|
cmp_res = -1; |
915 |
|
|
else if (main1 > main2) |
916 |
|
|
cmp_res = +1; |
917 |
|
|
/* if the sizes are equal then simple non-collate string compare gives the correct result */ |
918 |
|
|
else |
919 |
|
|
cmp_res = wcscmp(smain1, smain2); |
920 |
|
|
|
921 |
|
|
/* check fraction */ |
922 |
|
|
if (!cmp_res) |
923 |
|
|
cmp_res = wcscmp(sfrac1, sfrac2); |
924 |
|
|
|
925 |
|
|
if (!cmp_res) |
926 |
|
|
return 0; |
927 |
|
|
|
928 |
|
|
/* reverse result if the signs are negative */ |
929 |
|
|
if (sign1 < 0 && sign2 < 0) |
930 |
|
|
cmp_res = -cmp_res; |
931 |
|
|
|
932 |
|
|
return cmp_res; |
933 |
|
|
} |
934 |
|
|
|
935 |
|
|
/* |
936 |
|
|
* Implements numeric sort (-n). |
937 |
|
|
*/ |
938 |
|
|
static int |
939 |
|
|
numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
940 |
|
|
{ |
941 |
|
|
return numcoll_impl(kv1, kv2, offset, false); |
942 |
|
|
} |
943 |
|
|
|
944 |
|
|
/* |
945 |
|
|
* Implements 'human' numeric sort (-h). |
946 |
|
|
*/ |
947 |
|
|
static int |
948 |
|
|
hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
949 |
|
|
{ |
950 |
|
|
return numcoll_impl(kv1, kv2, offset, true); |
951 |
|
|
} |
952 |
|
|
|
953 |
|
|
/* |
954 |
|
|
* Implements random sort (-R). |
955 |
|
|
*/ |
956 |
|
|
static int |
957 |
|
|
randomcoll(struct key_value *kv1, struct key_value *kv2, |
958 |
|
|
size_t offset __unused) |
959 |
|
|
{ |
960 |
|
|
struct bwstring *s1, *s2; |
961 |
|
|
MD5_CTX ctx1, ctx2; |
962 |
|
|
char *b1, *b2; |
963 |
|
|
|
964 |
|
|
s1 = kv1->k; |
965 |
|
|
s2 = kv2->k; |
966 |
|
|
|
967 |
|
|
if (debug_sort) { |
968 |
|
|
bwsprintf(stdout, s1, "; k1=<", ">"); |
969 |
|
|
bwsprintf(stdout, s2, ", k2=<", ">"); |
970 |
|
|
} |
971 |
|
|
|
972 |
|
|
if (s1 == s2) |
973 |
|
|
return 0; |
974 |
|
|
|
975 |
|
|
memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); |
976 |
|
|
memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); |
977 |
|
|
|
978 |
|
|
MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); |
979 |
|
|
MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); |
980 |
|
|
b1 = MD5End(&ctx1, NULL); |
981 |
|
|
b2 = MD5End(&ctx2, NULL); |
982 |
|
|
if (b1 == NULL) { |
983 |
|
|
if (b2 == NULL) |
984 |
|
|
return 0; |
985 |
|
|
else { |
986 |
|
|
sort_free(b2); |
987 |
|
|
return -1; |
988 |
|
|
} |
989 |
|
|
} else if (b2 == NULL) { |
990 |
|
|
sort_free(b1); |
991 |
|
|
return 1; |
992 |
|
|
} else { |
993 |
|
|
int cmp_res; |
994 |
|
|
|
995 |
|
|
cmp_res = strcmp(b1, b2); |
996 |
|
|
sort_free(b1); |
997 |
|
|
sort_free(b2); |
998 |
|
|
|
999 |
|
|
if (!cmp_res) |
1000 |
|
|
cmp_res = bwscoll(s1, s2, 0); |
1001 |
|
|
|
1002 |
|
|
return cmp_res; |
1003 |
|
|
} |
1004 |
|
|
} |
1005 |
|
|
|
1006 |
|
|
/* |
1007 |
|
|
* Implements version sort (-V). |
1008 |
|
|
*/ |
1009 |
|
|
static int |
1010 |
|
|
versioncoll(struct key_value *kv1, struct key_value *kv2, |
1011 |
|
|
size_t offset __unused) |
1012 |
|
|
{ |
1013 |
|
|
struct bwstring *s1, *s2; |
1014 |
|
|
|
1015 |
|
|
s1 = kv1->k; |
1016 |
|
|
s2 = kv2->k; |
1017 |
|
|
|
1018 |
|
|
if (debug_sort) { |
1019 |
|
|
bwsprintf(stdout, s1, "; k1=<", ">"); |
1020 |
|
|
bwsprintf(stdout, s2, ", k2=<", ">"); |
1021 |
|
|
} |
1022 |
|
|
|
1023 |
|
|
if (s1 == s2) |
1024 |
|
|
return 0; |
1025 |
|
|
|
1026 |
|
|
return vcmp(s1, s2); |
1027 |
|
|
} |
1028 |
|
|
|
1029 |
|
|
/* |
1030 |
|
|
* Check for minus infinity |
1031 |
|
|
*/ |
1032 |
|
|
static inline bool |
1033 |
|
|
huge_minus(double d, int err1) |
1034 |
|
|
{ |
1035 |
|
|
if (err1 == ERANGE) |
1036 |
|
|
if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) |
1037 |
|
|
return 1; |
1038 |
|
|
|
1039 |
|
|
return 0; |
1040 |
|
|
} |
1041 |
|
|
|
1042 |
|
|
/* |
1043 |
|
|
* Check for plus infinity |
1044 |
|
|
*/ |
1045 |
|
|
static inline bool |
1046 |
|
|
huge_plus(double d, int err1) |
1047 |
|
|
{ |
1048 |
|
|
if (err1 == ERANGE) |
1049 |
|
|
if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) |
1050 |
|
|
return 1; |
1051 |
|
|
|
1052 |
|
|
return 0; |
1053 |
|
|
} |
1054 |
|
|
|
1055 |
|
|
/* |
1056 |
|
|
* Check whether a function is a NAN |
1057 |
|
|
*/ |
1058 |
|
|
static bool |
1059 |
|
|
is_nan(double d) |
1060 |
|
|
{ |
1061 |
|
|
#if defined(NAN) |
1062 |
|
|
return (d == NAN || isnan(d)); |
1063 |
|
|
#else |
1064 |
|
|
return (isnan(d)); |
1065 |
|
|
#endif |
1066 |
|
|
} |
1067 |
|
|
|
1068 |
|
|
/* |
1069 |
|
|
* Compare two NANs |
1070 |
|
|
*/ |
1071 |
|
|
static int |
1072 |
|
|
cmp_nans(double d1, double d2) |
1073 |
|
|
{ |
1074 |
|
|
if (d1 == d2) |
1075 |
|
|
return 0; |
1076 |
|
|
return d1 < d2 ? -1 : 1; |
1077 |
|
|
} |
1078 |
|
|
|
1079 |
|
|
/* |
1080 |
|
|
* Implements general numeric sort (-g). |
1081 |
|
|
*/ |
1082 |
|
|
static int |
1083 |
|
|
gnumcoll(struct key_value *kv1, struct key_value *kv2, |
1084 |
|
|
size_t offset __unused) |
1085 |
|
|
{ |
1086 |
|
|
double d1, d2; |
1087 |
|
|
int err1, err2; |
1088 |
|
|
bool empty1, empty2, key1_read, key2_read; |
1089 |
|
|
|
1090 |
|
|
d1 = d2 = 0; |
1091 |
|
|
err1 = err2 = 0; |
1092 |
|
|
key1_read = key2_read = false; |
1093 |
|
|
|
1094 |
|
|
if (debug_sort) { |
1095 |
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
1096 |
|
|
bwsprintf(stdout, kv2->k, "; k2=<", ">"); |
1097 |
|
|
} |
1098 |
|
|
|
1099 |
|
|
if (kv1->hint->status == HS_UNINITIALIZED) { |
1100 |
|
|
errno = 0; |
1101 |
|
|
d1 = bwstod(kv1->k, &empty1); |
1102 |
|
|
err1 = errno; |
1103 |
|
|
|
1104 |
|
|
if (empty1) { |
1105 |
|
|
kv1->hint->v.gh.notnum = true; |
1106 |
|
|
kv1->hint->status = HS_INITIALIZED; |
1107 |
|
|
} else if (err1 == 0) { |
1108 |
|
|
kv1->hint->v.gh.d = d1; |
1109 |
|
|
kv1->hint->v.gh.nan = is_nan(d1); |
1110 |
|
|
kv1->hint->status = HS_INITIALIZED; |
1111 |
|
|
} else |
1112 |
|
|
kv1->hint->status = HS_ERROR; |
1113 |
|
|
|
1114 |
|
|
key1_read = true; |
1115 |
|
|
} |
1116 |
|
|
|
1117 |
|
|
if (kv2->hint->status == HS_UNINITIALIZED) { |
1118 |
|
|
errno = 0; |
1119 |
|
|
d2 = bwstod(kv2->k, &empty2); |
1120 |
|
|
err2 = errno; |
1121 |
|
|
|
1122 |
|
|
if (empty2) { |
1123 |
|
|
kv2->hint->v.gh.notnum = true; |
1124 |
|
|
kv2->hint->status = HS_INITIALIZED; |
1125 |
|
|
} else if (err2 == 0) { |
1126 |
|
|
kv2->hint->v.gh.d = d2; |
1127 |
|
|
kv2->hint->v.gh.nan = is_nan(d2); |
1128 |
|
|
kv2->hint->status = HS_INITIALIZED; |
1129 |
|
|
} else |
1130 |
|
|
kv2->hint->status = HS_ERROR; |
1131 |
|
|
|
1132 |
|
|
key2_read = true; |
1133 |
|
|
} |
1134 |
|
|
|
1135 |
|
|
if (kv1->hint->status == HS_INITIALIZED && |
1136 |
|
|
kv2->hint->status == HS_INITIALIZED) { |
1137 |
|
|
#ifdef GNUSORT_COMPATIBILITY |
1138 |
|
|
if (kv1->hint->v.gh.notnum) |
1139 |
|
|
return kv2->hint->v.gh.notnum ? 0 : -1; |
1140 |
|
|
else if (kv2->hint->v.gh.notnum) |
1141 |
|
|
return 1; |
1142 |
|
|
#else |
1143 |
|
|
if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum) |
1144 |
|
|
return 0; |
1145 |
|
|
#endif |
1146 |
|
|
|
1147 |
|
|
if (kv1->hint->v.gh.nan) |
1148 |
|
|
return kv2->hint->v.gh.nan ? |
1149 |
|
|
cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1; |
1150 |
|
|
else if (kv2->hint->v.gh.nan) |
1151 |
|
|
return 1; |
1152 |
|
|
|
1153 |
|
|
d1 = kv1->hint->v.gh.d; |
1154 |
|
|
d2 = kv2->hint->v.gh.d; |
1155 |
|
|
|
1156 |
|
|
if (d1 < d2) |
1157 |
|
|
return -1; |
1158 |
|
|
else if (d1 > d2) |
1159 |
|
|
return 1; |
1160 |
|
|
else |
1161 |
|
|
return 0; |
1162 |
|
|
} |
1163 |
|
|
|
1164 |
|
|
if (!key1_read) { |
1165 |
|
|
errno = 0; |
1166 |
|
|
d1 = bwstod(kv1->k, &empty1); |
1167 |
|
|
err1 = errno; |
1168 |
|
|
} |
1169 |
|
|
|
1170 |
|
|
if (!key2_read) { |
1171 |
|
|
errno = 0; |
1172 |
|
|
d2 = bwstod(kv2->k, &empty2); |
1173 |
|
|
err2 = errno; |
1174 |
|
|
} |
1175 |
|
|
|
1176 |
|
|
/* Non-value case */ |
1177 |
|
|
#ifdef GNUSORT_COMPATIBILITY |
1178 |
|
|
if (empty1) |
1179 |
|
|
return empty2 ? 0 : -1; |
1180 |
|
|
else if (empty2) |
1181 |
|
|
return 1; |
1182 |
|
|
#else |
1183 |
|
|
if (empty1 && empty2) |
1184 |
|
|
return 0; |
1185 |
|
|
#endif |
1186 |
|
|
|
1187 |
|
|
/* NAN case */ |
1188 |
|
|
if (is_nan(d1)) |
1189 |
|
|
return is_nan(d2) ? cmp_nans(d1, d2) : -1; |
1190 |
|
|
else if (is_nan(d2)) |
1191 |
|
|
return 1; |
1192 |
|
|
|
1193 |
|
|
/* Infinities */ |
1194 |
|
|
if (err1 == ERANGE || err2 == ERANGE) { |
1195 |
|
|
/* Minus infinity case */ |
1196 |
|
|
if (huge_minus(d1, err1)) { |
1197 |
|
|
if (huge_minus(d2, err2)) { |
1198 |
|
|
if (d1 == d2) |
1199 |
|
|
return 0; |
1200 |
|
|
return d1 < d2 ? -1 : 1; |
1201 |
|
|
} else |
1202 |
|
|
return -1; |
1203 |
|
|
|
1204 |
|
|
} else if (huge_minus(d2, err2)) { |
1205 |
|
|
if (huge_minus(d1, err1)) { |
1206 |
|
|
if (d1 == d2) |
1207 |
|
|
return 0; |
1208 |
|
|
return d1 < d2 ? -1 : 1; |
1209 |
|
|
} else |
1210 |
|
|
return 1; |
1211 |
|
|
} |
1212 |
|
|
|
1213 |
|
|
/* Plus infinity case */ |
1214 |
|
|
if (huge_plus(d1, err1)) { |
1215 |
|
|
if (huge_plus(d2, err2)) { |
1216 |
|
|
if (d1 == d2) |
1217 |
|
|
return 0; |
1218 |
|
|
return d1 < d2 ? -1 : 1; |
1219 |
|
|
} else |
1220 |
|
|
return 1; |
1221 |
|
|
} else if (huge_plus(d2, err2)) { |
1222 |
|
|
if (huge_plus(d1, err1)) { |
1223 |
|
|
if (d1 == d2) |
1224 |
|
|
return 0; |
1225 |
|
|
return d1 < d2 ? -1 : 1; |
1226 |
|
|
} else |
1227 |
|
|
return -1; |
1228 |
|
|
} |
1229 |
|
|
} |
1230 |
|
|
|
1231 |
|
|
if (d1 == d2) |
1232 |
|
|
return 0; |
1233 |
|
|
return d1 < d2 ? -1 : 1; |
1234 |
|
|
} |
1235 |
|
|
|
1236 |
|
|
/* |
1237 |
|
|
* Implements month sort (-M). |
1238 |
|
|
*/ |
1239 |
|
|
static int |
1240 |
|
|
monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) |
1241 |
|
|
{ |
1242 |
|
|
int val1, val2; |
1243 |
|
|
bool key1_read, key2_read; |
1244 |
|
|
|
1245 |
|
|
val1 = val2 = 0; |
1246 |
|
|
key1_read = key2_read = false; |
1247 |
|
|
|
1248 |
|
|
if (debug_sort) { |
1249 |
|
|
bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
1250 |
|
|
bwsprintf(stdout, kv2->k, "; k2=<", ">"); |
1251 |
|
|
} |
1252 |
|
|
|
1253 |
|
|
if (kv1->hint->status == HS_UNINITIALIZED) { |
1254 |
|
|
kv1->hint->v.Mh.m = bws_month_score(kv1->k); |
1255 |
|
|
key1_read = true; |
1256 |
|
|
kv1->hint->status = HS_INITIALIZED; |
1257 |
|
|
} |
1258 |
|
|
|
1259 |
|
|
if (kv2->hint->status == HS_UNINITIALIZED) { |
1260 |
|
|
kv2->hint->v.Mh.m = bws_month_score(kv2->k); |
1261 |
|
|
key2_read = true; |
1262 |
|
|
kv2->hint->status = HS_INITIALIZED; |
1263 |
|
|
} |
1264 |
|
|
|
1265 |
|
|
if (kv1->hint->status == HS_INITIALIZED) { |
1266 |
|
|
val1 = kv1->hint->v.Mh.m; |
1267 |
|
|
key1_read = true; |
1268 |
|
|
} |
1269 |
|
|
|
1270 |
|
|
if (kv2->hint->status == HS_INITIALIZED) { |
1271 |
|
|
val2 = kv2->hint->v.Mh.m; |
1272 |
|
|
key2_read = true; |
1273 |
|
|
} |
1274 |
|
|
|
1275 |
|
|
if (!key1_read) |
1276 |
|
|
val1 = bws_month_score(kv1->k); |
1277 |
|
|
if (!key2_read) |
1278 |
|
|
val2 = bws_month_score(kv2->k); |
1279 |
|
|
|
1280 |
|
|
if (val1 == val2) |
1281 |
|
|
return 0; |
1282 |
|
|
return val1 < val2 ? -1 : 1; |
1283 |
|
|
} |