GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sort/coll.c Lines: 91 564 16.1 %
Date: 2016-12-06 Branches: 39 465 8.4 %

Line Branch Exec Source
1
/*	$OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $	*/
2
3
/*-
4
 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5
 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include <sys/types.h>
31
32
#include <errno.h>
33
#include <err.h>
34
#include <langinfo.h>
35
#include <limits.h>
36
#include <math.h>
37
#include <md5.h>
38
#include <stdlib.h>
39
#include <string.h>
40
#include <wchar.h>
41
#include <wctype.h>
42
43
#include "coll.h"
44
#include "vsort.h"
45
46
struct key_specs *keys;
47
size_t keys_num = 0;
48
49
wint_t symbol_decimal_point = L'.';
50
/* there is no default thousands separator in collate rules: */
51
wint_t symbol_thousands_sep = 0;
52
wint_t symbol_negative_sign = L'-';
53
wint_t symbol_positive_sign = L'+';
54
55
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56
static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57
static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58
static int numcoll(struct key_value*, struct key_value *, size_t offset);
59
static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60
static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61
static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62
63
/*
64
 * Allocate keys array
65
 */
66
struct keys_array *
67
keys_array_alloc(void)
68
{
69
	struct keys_array *ka;
70
	size_t sz;
71
72
	sz = keys_array_size();
73
	ka = sort_calloc(1, sz);
74
75
	return ka;
76
}
77
78
/*
79
 * Calculate whether we need key hint space
80
 */
81
static size_t
82
key_hint_size(void)
83
11562
{
84
11562
	return need_hint ? sizeof(struct key_hint) : 0;
85
}
86
87
/*
88
 * Calculate keys array size
89
 */
90
size_t
91
keys_array_size(void)
92
11562
{
93
11562
	return keys_num * (sizeof(struct key_value) + key_hint_size());
94
}
95
96
/*
97
 * Clean data of keys array
98
 */
99
void
100
clean_keys_array(const struct bwstring *s, struct keys_array *ka)
101
3854
{
102
3854
	if (ka) {
103
		size_t i;
104
105
7708
		for (i = 0; i < keys_num; ++i)
106

3854
			if (ka->key[i].k && ka->key[i].k != s)
107
				bwsfree(ka->key[i].k);
108
3854
		memset(ka, 0, keys_array_size());
109
	}
110
3854
}
111
112
/*
113
 * Set value of a key in the keys set
114
 */
115
void
116
set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
117
3854
{
118

3854
	if (ka && keys_num > ind) {
119
		struct key_value *kv;
120
121
3854
		kv = &(ka->key[ind]);
122
123
3854
		if (kv->k != s)
124
3854
			bwsfree(kv->k);
125
3854
		kv->k = s;
126
	}
127
3854
}
128
129
/*
130
 * Initialize a sort list item
131
 */
132
struct sort_list_item *
133
sort_list_item_alloc(void)
134
3854
{
135
	struct sort_list_item *si;
136
	size_t sz;
137
138
3854
	sz = sizeof(struct sort_list_item) + keys_array_size();
139
3854
	si = sort_calloc(1, sz);
140
141
3854
	return si;
142
}
143
144
size_t
145
sort_list_item_size(struct sort_list_item *si)
146
3854
{
147
3854
	size_t i, ret = 0;
148
149
3854
	if (si) {
150
3854
		ret = sizeof(struct sort_list_item) + keys_array_size();
151
3854
		if (si->str)
152
3854
			ret += bws_memsize(si->str);
153
7708
		for (i = 0; i < keys_num; ++i) {
154
			struct key_value *kv;
155
156
3854
			kv = &(si->ka.key[i]);
157
158
3854
			if (kv->k != si->str)
159
				ret += bws_memsize(kv->k);
160
		}
161
	}
162
3854
	return ret;
163
}
164
165
/*
166
 * Calculate key for a sort list item
167
 */
168
static void
169
sort_list_item_make_key(struct sort_list_item *si)
170
3854
{
171
3854
	preproc(si->str, &(si->ka));
172
3854
}
173
174
/*
175
 * Set value of a sort list item.
176
 * Return combined string and keys memory size.
177
 */
178
void
179
sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
180
3854
{
181
3854
	if (si) {
182
3854
		clean_keys_array(si->str, &(si->ka));
183
3854
		if (si->str) {
184
			if (si->str == str) {
185
				/* we are trying to reset the same string */
186
				return;
187
			} else {
188
				bwsfree(si->str);
189
				si->str = NULL;
190
			}
191
		}
192
3854
		si->str = str;
193
3854
		sort_list_item_make_key(si);
194
	}
195
}
196
197
/*
198
 * De-allocate a sort list item object memory
199
 */
200
void
201
sort_list_item_clean(struct sort_list_item *si)
202
{
203
	if (si) {
204
		clean_keys_array(si->str, &(si->ka));
205
		if (si->str) {
206
			bwsfree(si->str);
207
			si->str = NULL;
208
		}
209
	}
210
}
211
212
/*
213
 * Skip columns according to specs
214
 */
215
static size_t
216
skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
217
    bool skip_blanks, bool *empty_key)
218
{
219
	if (cols < 1)
220
		return BWSLEN(s) + 1;
221
222
	if (skip_blanks)
223
		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
224
			++start;
225
226
	while (start < BWSLEN(s) && cols > 1) {
227
		--cols;
228
		++start;
229
	}
230
231
	if (start >= BWSLEN(s))
232
		*empty_key = true;
233
234
	return start;
235
}
236
237
/*
238
 * Skip fields according to specs
239
 */
240
static size_t
241
skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
242
{
243
	if (fields < 2) {
244
		if (BWSLEN(s) == 0)
245
			*empty_field = true;
246
		return 0;
247
	} else if (!(sort_opts_vals.tflag)) {
248
		size_t cpos = 0;
249
		bool pb = true;
250
251
		while (cpos < BWSLEN(s)) {
252
			bool isblank;
253
254
			isblank = iswblank(BWS_GET(s, cpos));
255
256
			if (isblank && !pb) {
257
				--fields;
258
				if (fields <= 1)
259
					return cpos;
260
			}
261
			pb = isblank;
262
			++cpos;
263
		}
264
		if (fields > 1)
265
			*empty_field = true;
266
		return cpos;
267
	} else {
268
		size_t cpos = 0;
269
270
		while (cpos < BWSLEN(s)) {
271
			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
272
				--fields;
273
				if (fields <= 1)
274
					return cpos + 1;
275
			}
276
			++cpos;
277
		}
278
		if (fields > 1)
279
			*empty_field = true;
280
		return cpos;
281
	}
282
}
283
284
/*
285
 * Find fields start
286
 */
287
static void
288
find_field_start(const struct bwstring *s, struct key_specs *ks,
289
    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
290
{
291
	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
292
	if (!*empty_field)
293
		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
294
		    ks->pos1b, empty_key);
295
	else
296
		*empty_key = true;
297
}
298
299
/*
300
 * Find end key position
301
 */
302
static size_t
303
find_field_end(const struct bwstring *s, struct key_specs *ks)
304
{
305
	size_t f2, next_field_start, pos_end;
306
	bool empty_field, empty_key;
307
308
	empty_field = false;
309
	empty_key = false;
310
	f2 = ks->f2;
311
312
	if (f2 == 0)
313
		return BWSLEN(s) + 1;
314
	else {
315
		if (ks->c2 == 0) {
316
			next_field_start = skip_fields_to_start(s, f2 + 1,
317
			    &empty_field);
318
			if ((next_field_start > 0) && sort_opts_vals.tflag &&
319
			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
320
			    next_field_start - 1)))
321
				--next_field_start;
322
		} else
323
			next_field_start = skip_fields_to_start(s, f2,
324
			    &empty_field);
325
	}
326
327
	if (empty_field || (next_field_start >= BWSLEN(s)))
328
		return BWSLEN(s) + 1;
329
330
	if (ks->c2) {
331
		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
332
		    ks->pos2b, &empty_key);
333
		if (pos_end < BWSLEN(s))
334
			++pos_end;
335
	} else
336
		pos_end = next_field_start;
337
338
	return pos_end;
339
}
340
341
/*
342
 * Cut a field according to the key specs
343
 */
344
static struct bwstring *
345
cut_field(const struct bwstring *s, struct key_specs *ks)
346
{
347
	struct bwstring *ret = NULL;
348
349
	if (s && ks) {
350
		size_t field_start, key_end, key_start, sz;
351
		bool empty_field, empty_key;
352
353
		field_start = 0;
354
		key_start = 0;
355
		empty_field = false;
356
		empty_key = false;
357
358
		find_field_start(s, ks, &field_start, &key_start,
359
		    &empty_field, &empty_key);
360
361
		if (empty_key)
362
			sz = 0;
363
		else {
364
			key_end = find_field_end(s, ks);
365
			sz = (key_end < key_start) ? 0 : (key_end - key_start);
366
		}
367
368
		ret = bwsalloc(sz);
369
		if (sz)
370
			bwsnocpy(ret, s, key_start, sz);
371
	} else
372
		ret = bwsalloc(0);
373
374
	return ret;
375
}
376
377
/*
378
 * Preprocesses a line applying the necessary transformations
379
 * specified by command line options and returns the preprocessed
380
 * string, which can be used to compare.
381
 */
382
int
383
preproc(struct bwstring *s, struct keys_array *ka)
384
3854
{
385
3854
	if (sort_opts_vals.kflag) {
386
		size_t i;
387
		for (i = 0; i < keys_num; i++) {
388
			struct bwstring *key;
389
			struct key_specs *kspecs;
390
			struct sort_mods *sm;
391
392
			kspecs = &(keys[i]);
393
			key = cut_field(s, kspecs);
394
395
			sm = &(kspecs->sm);
396
			if (sm->dflag)
397
				key = dictionary_order(key);
398
			else if (sm->iflag)
399
				key = ignore_nonprinting(key);
400
			if (sm->fflag || sm->Mflag)
401
				key = ignore_case(key);
402
403
			set_key_on_keys_array(ka, key, i);
404
		}
405
	} else {
406
3854
		struct bwstring *ret = NULL;
407
3854
		struct sort_mods *sm = default_sort_mods;
408
409
#ifdef GNUSORT_COMPATIBILITY
410
		if (sm->bflag) {
411
			if (ret == NULL)
412
				ret = bwsdup(s);
413
			ret = ignore_leading_blanks(ret);
414
		}
415
#endif
416
3854
		if (sm->dflag) {
417
			if (ret == NULL)
418
				ret = bwsdup(s);
419
			ret = dictionary_order(ret);
420
3854
		} else if (sm->iflag) {
421
			if (ret == NULL)
422
				ret = bwsdup(s);
423
			ret = ignore_nonprinting(ret);
424
		}
425

3854
		if (sm->fflag || sm->Mflag) {
426
			if (ret == NULL)
427
				ret = bwsdup(s);
428
			ret = ignore_case(ret);
429
		}
430
3854
		if (ret == NULL)
431
3854
			set_key_on_keys_array(ka, s, 0);
432
		else
433
			set_key_on_keys_array(ka, ret, 0);
434
	}
435
436
3854
	return 0;
437
}
438
439
cmpcoll_t
440
get_sort_func(struct sort_mods *sm)
441
34
{
442
34
	if (sm->nflag)
443
		return numcoll;
444
34
	else if (sm->hflag)
445
		return hnumcoll;
446
34
	else if (sm->gflag)
447
		return gnumcoll;
448
34
	else if (sm->Mflag)
449
		return monthcoll;
450
34
	else if (sm->Rflag)
451
8
		return randomcoll;
452
26
	else if (sm->Vflag)
453
		return versioncoll;
454
	else
455
26
		return wstrcoll;
456
}
457
458
/*
459
 * Compares the given strings.  Returns a positive number if
460
 * the first precedes the second, a negative number if the second is
461
 * the preceding one, and zero if they are equal.  This function calls
462
 * the underlying collate functions, which done the actual comparison.
463
 */
464
int
465
key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
466
7451
{
467
	struct sort_mods *sm;
468
7451
	int res = 0;
469
	size_t i;
470
471
7543
	for (i = 0; i < keys_num; ++i) {
472
7451
		sm = &(keys[i].sm);
473
474
7451
		if (sm->rflag)
475
			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
476
		else
477
7451
			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
478
479
7451
		if (res)
480
7359
			break;
481
482
		/* offset applies to only the first key */
483
92
		offset = 0;
484
	}
485
7451
	return res;
486
}
487
488
/*
489
 * Compare two strings.
490
 * Plain symbol-by-symbol comparison.
491
 */
492
int
493
top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
494
{
495
	if (default_sort_mods->rflag) {
496
		const struct bwstring *tmp;
497
498
		tmp = s1;
499
		s1 = s2;
500
		s2 = tmp;
501
	}
502
503
	return bwscoll(s1, s2, 0);
504
}
505
506
/*
507
 * Compare a string and a sort list item, according to the sort specs.
508
 */
509
int
510
str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
511
{
512
	struct keys_array *ka1;
513
	int ret = 0;
514
515
	ka1 = keys_array_alloc();
516
517
	preproc(str1, ka1);
518
519
	sort_list_item_make_key(*ss2);
520
521
	if (debug_sort) {
522
		bwsprintf(stdout, str1, "; s1=<", ">");
523
		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
524
	}
525
526
	ret = key_coll(ka1, &((*ss2)->ka), 0);
527
528
	if (debug_sort)
529
		printf("; cmp1=%d", ret);
530
531
	clean_keys_array(str1, ka1);
532
	sort_free(ka1);
533
534
	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
535
		ret = top_level_str_coll(str1, ((*ss2)->str));
536
		if (debug_sort)
537
			printf("; cmp2=%d", ret);
538
	}
539
540
	if (debug_sort)
541
		putchar('\n');
542
543
	return ret;
544
}
545
546
/*
547
 * Compare two sort list items, according to the sort specs.
548
 */
549
int
550
list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
551
    size_t offset)
552
7451
{
553
	int ret;
554
555
7451
	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
556
557
7451
	if (debug_sort) {
558
		if (offset)
559
			printf("; offset=%zu", offset);
560
		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
561
		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
562
		printf("; cmp1=%d\n", ret);
563
	}
564
565
7451
	if (ret)
566
7359
		return ret;
567
568

92
	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
569
		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
570
		if (debug_sort)
571
			printf("; cmp2=%d\n", ret);
572
	}
573
574
92
	return ret;
575
}
576
577
/*
578
 * Compare two sort list items, according to the sort specs.
579
 */
580
int
581
list_coll(const void *ss1, const void *ss2)
582
46
{
583
46
	return list_coll_offset((struct sort_list_item **)ss1,
584
	    (struct sort_list_item **)ss2, 0);
585
}
586
587
#define LSCDEF(N)											\
588
static int												\
589
list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
590
{													\
591
													\
592
	return list_coll_offset(ss1, ss2, N);								\
593
}
594
595
LSCDEF(0)
596
425
LSCDEF(1)
597
281
LSCDEF(2)
598
124
LSCDEF(3)
599
973
LSCDEF(4)
600
LSCDEF(5)
601
LSCDEF(6)
602
618
LSCDEF(7)
603
LSCDEF(8)
604
LSCDEF(9)
605
LSCDEF(10)
606
LSCDEF(11)
607
4048
LSCDEF(12)
608
920
LSCDEF(13)
609
LSCDEF(14)
610
LSCDEF(15)
611
LSCDEF(16)
612
LSCDEF(17)
613
LSCDEF(18)
614
LSCDEF(19)
615
LSCDEF(20)
616
617
listcoll_t
618
get_list_call_func(size_t offset)
619
328
{
620
	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
621
	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
622
	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
623
	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
624
	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
625
	    list_coll_18, list_coll_19, list_coll_20 };
626
627
328
	if (offset <= 20)
628
328
		return lsarray[offset];
629
630
	return list_coll_0;
631
}
632
633
/*
634
 * Compare two sort list items, only by their original string.
635
 */
636
int
637
list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
638
{
639
	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
640
}
641
642
/*
643
 * Maximum size of a number in the string (before or after decimal point)
644
 */
645
#define MAX_NUM_SIZE (128)
646
647
/*
648
 * Set suffix value
649
 */
650
static void
651
setsuffix(wchar_t c, unsigned char *si)
652
{
653
	switch (c){
654
	case L'k':
655
	case L'K':
656
		*si = 1;
657
		break;
658
	case L'M':
659
		*si = 2;
660
		break;
661
	case L'G':
662
		*si = 3;
663
		break;
664
	case L'T':
665
		*si = 4;
666
		break;
667
	case L'P':
668
		*si = 5;
669
		break;
670
	case L'E':
671
		*si = 6;
672
		break;
673
	case L'Z':
674
		*si = 7;
675
		break;
676
	case L'Y':
677
		*si = 8;
678
		break;
679
	default:
680
		*si = 0;
681
	};
682
}
683
684
/*
685
 * Read string s and parse the string into a fixed-decimal-point number.
686
 * sign equals -1 if the number is negative (explicit plus is not allowed,
687
 * according to GNU sort's "info sort".
688
 * The number part before decimal point is in the smain, after the decimal
689
 * point is in sfrac, tail is the pointer to the remainder of the string.
690
 */
691
static int
692
read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
693
{
694
	bwstring_iterator s;
695
696
	s = bws_begin(s0);
697
698
	/* always end the fraction with zero, even if we have no fraction */
699
	sfrac[0] = 0;
700
701
	while (iswblank(bws_get_iter_value(s)))
702
		s = bws_iterator_inc(s, 1);
703
704
	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
705
		*sign = -1;
706
		s = bws_iterator_inc(s, 1);
707
	}
708
709
	// This is '0', not '\0', do not change this
710
	while (iswdigit(bws_get_iter_value(s)) &&
711
	    (bws_get_iter_value(s) == L'0'))
712
		s = bws_iterator_inc(s, 1);
713
714
	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
715
		if (iswdigit(bws_get_iter_value(s))) {
716
			smain[*main_len] = bws_get_iter_value(s);
717
			s = bws_iterator_inc(s, 1);
718
			*main_len += 1;
719
		} else if (symbol_thousands_sep &&
720
		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
721
			s = bws_iterator_inc(s, 1);
722
		else
723
			break;
724
	}
725
726
	smain[*main_len] = 0;
727
728
	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
729
		s = bws_iterator_inc(s, 1);
730
		while (iswdigit(bws_get_iter_value(s)) &&
731
		    *frac_len < MAX_NUM_SIZE) {
732
			sfrac[*frac_len] = bws_get_iter_value(s);
733
			s = bws_iterator_inc(s, 1);
734
			*frac_len += 1;
735
		}
736
		sfrac[*frac_len] = 0;
737
738
		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
739
			--(*frac_len);
740
			sfrac[*frac_len] = L'\0';
741
		}
742
	}
743
744
	setsuffix(bws_get_iter_value(s), si);
745
746
	if ((*main_len + *frac_len) == 0)
747
		*sign = 0;
748
749
	return 0;
750
}
751
752
/*
753
 * Implements string sort.
754
 */
755
static int
756
wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
757
7451
{
758
759
7451
	if (debug_sort) {
760
		if (offset)
761
			printf("; offset=%zu\n", offset);
762
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
763
		printf("(%zu)", BWSLEN(kv1->k));
764
		bwsprintf(stdout, kv2->k, ", k2=<", ">");
765
		printf("(%zu)", BWSLEN(kv2->k));
766
	}
767
768
7451
	return bwscoll(kv1->k, kv2->k, offset);
769
}
770
771
/*
772
 * Compare two suffixes
773
 */
774
static inline int
775
cmpsuffix(unsigned char si1, unsigned char si2)
776
{
777
	return (char)si1 - (char)si2;
778
}
779
780
/*
781
 * Implements numeric sort for -n and -h.
782
 */
783
static int
784
numcoll_impl(struct key_value *kv1, struct key_value *kv2,
785
    size_t offset __unused, bool use_suffix)
786
{
787
	struct bwstring *s1, *s2;
788
	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
789
	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
790
	int cmp_res, sign1, sign2;
791
	size_t frac1, frac2, main1, main2;
792
	unsigned char SI1, SI2;
793
	bool e1, e2, key1_read, key2_read;
794
795
	s1 = kv1->k;
796
	s2 = kv2->k;
797
	sign1 = sign2 = 0;
798
	main1 = main2 = 0;
799
	frac1 = frac2 = 0;
800
801
	key1_read = key2_read = false;
802
803
	if (debug_sort) {
804
		bwsprintf(stdout, s1, "; k1=<", ">");
805
		bwsprintf(stdout, s2, ", k2=<", ">");
806
	}
807
808
	if (s1 == s2)
809
		return 0;
810
811
	if (kv1->hint->status == HS_UNINITIALIZED) {
812
		/* read the number from the string */
813
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
814
		key1_read = true;
815
		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
816
		if (main1 < 1 && frac1 < 1)
817
			kv1->hint->v.nh.empty=true;
818
		kv1->hint->v.nh.si = SI1;
819
		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
820
		    HS_INITIALIZED : HS_ERROR;
821
		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
822
	}
823
824
	if (kv2->hint->status == HS_UNINITIALIZED) {
825
		/* read the number from the string */
826
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
827
		key2_read = true;
828
		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
829
		if (main2 < 1 && frac2 < 1)
830
			kv2->hint->v.nh.empty=true;
831
		kv2->hint->v.nh.si = SI2;
832
		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
833
		    HS_INITIALIZED : HS_ERROR;
834
		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
835
	}
836
837
	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
838
	    HS_INITIALIZED) {
839
		unsigned long long n1, n2;
840
		bool neg1, neg2;
841
842
		e1 = kv1->hint->v.nh.empty;
843
		e2 = kv2->hint->v.nh.empty;
844
845
		if (e1 && e2)
846
			return 0;
847
848
		neg1 = kv1->hint->v.nh.neg;
849
		neg2 = kv2->hint->v.nh.neg;
850
851
		if (neg1 && !neg2)
852
			return -1;
853
		if (neg2 && !neg1)
854
			return 1;
855
856
		if (e1)
857
			return neg2 ? 1 : -1;
858
		else if (e2)
859
			return neg1 ? -1 : 1;
860
861
862
		if (use_suffix) {
863
			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
864
			if (cmp_res)
865
				return neg1 ? -cmp_res : cmp_res;
866
		}
867
868
		n1 = kv1->hint->v.nh.n1;
869
		n2 = kv2->hint->v.nh.n1;
870
		if (n1 < n2)
871
			return neg1 ? 1 : -1;
872
		else if (n1 > n2)
873
			return neg1 ? -1 : 1;
874
	}
875
876
	/* read the numbers from the strings */
877
	if (!key1_read)
878
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
879
	if (!key2_read)
880
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
881
882
	e1 = ((main1 + frac1) == 0);
883
	e2 = ((main2 + frac2) == 0);
884
885
	if (e1 && e2)
886
		return 0;
887
888
	/* we know the result if the signs are different */
889
	if (sign1 < 0 && sign2 >= 0)
890
		return -1;
891
	if (sign1 >= 0 && sign2 < 0)
892
		return 1;
893
894
	if (e1)
895
		return (sign2 < 0) ? +1 : -1;
896
	else if (e2)
897
		return (sign1 < 0) ? -1 : 1;
898
899
	if (use_suffix) {
900
		cmp_res = cmpsuffix(SI1, SI2);
901
		if (cmp_res)
902
			return (sign1 < 0) ? -cmp_res : cmp_res;
903
	}
904
905
	/* if both numbers are empty assume that the strings are equal */
906
	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
907
		return 0;
908
909
	/*
910
	 * if the main part is of different size, we know the result
911
	 * (because the leading zeros are removed)
912
	 */
913
	if (main1 < main2)
914
		cmp_res = -1;
915
	else if (main1 > main2)
916
		cmp_res = +1;
917
	/* if the sizes are equal then simple non-collate string compare gives the correct result */
918
	else
919
		cmp_res = wcscmp(smain1, smain2);
920
921
	/* check fraction */
922
	if (!cmp_res)
923
		cmp_res = wcscmp(sfrac1, sfrac2);
924
925
	if (!cmp_res)
926
		return 0;
927
928
	/* reverse result if the signs are negative */
929
	if (sign1 < 0 && sign2 < 0)
930
		cmp_res = -cmp_res;
931
932
	return cmp_res;
933
}
934
935
/*
936
 * Implements numeric sort (-n).
937
 */
938
static int
939
numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940
{
941
	return numcoll_impl(kv1, kv2, offset, false);
942
}
943
944
/*
945
 * Implements 'human' numeric sort (-h).
946
 */
947
static int
948
hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
949
{
950
	return numcoll_impl(kv1, kv2, offset, true);
951
}
952
953
/*
954
 * Implements random sort (-R).
955
 */
956
static int
957
randomcoll(struct key_value *kv1, struct key_value *kv2,
958
    size_t offset __unused)
959
{
960
	struct bwstring *s1, *s2;
961
	MD5_CTX ctx1, ctx2;
962
	char *b1, *b2;
963
964
	s1 = kv1->k;
965
	s2 = kv2->k;
966
967
	if (debug_sort) {
968
		bwsprintf(stdout, s1, "; k1=<", ">");
969
		bwsprintf(stdout, s2, ", k2=<", ">");
970
	}
971
972
	if (s1 == s2)
973
		return 0;
974
975
	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
976
	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
977
978
	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
979
	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
980
	b1 = MD5End(&ctx1, NULL);
981
	b2 = MD5End(&ctx2, NULL);
982
	if (b1 == NULL) {
983
		if (b2 == NULL)
984
			return 0;
985
		else {
986
			sort_free(b2);
987
			return -1;
988
		}
989
	} else if (b2 == NULL) {
990
		sort_free(b1);
991
		return 1;
992
	} else {
993
		int cmp_res;
994
995
		cmp_res = strcmp(b1, b2);
996
		sort_free(b1);
997
		sort_free(b2);
998
999
		if (!cmp_res)
1000
			cmp_res = bwscoll(s1, s2, 0);
1001
1002
		return cmp_res;
1003
	}
1004
}
1005
1006
/*
1007
 * Implements version sort (-V).
1008
 */
1009
static int
1010
versioncoll(struct key_value *kv1, struct key_value *kv2,
1011
    size_t offset __unused)
1012
{
1013
	struct bwstring *s1, *s2;
1014
1015
	s1 = kv1->k;
1016
	s2 = kv2->k;
1017
1018
	if (debug_sort) {
1019
		bwsprintf(stdout, s1, "; k1=<", ">");
1020
		bwsprintf(stdout, s2, ", k2=<", ">");
1021
	}
1022
1023
	if (s1 == s2)
1024
		return 0;
1025
1026
	return vcmp(s1, s2);
1027
}
1028
1029
/*
1030
 * Check for minus infinity
1031
 */
1032
static inline bool
1033
huge_minus(double d, int err1)
1034
{
1035
	if (err1 == ERANGE)
1036
		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1037
			return 1;
1038
1039
	return 0;
1040
}
1041
1042
/*
1043
 * Check for plus infinity
1044
 */
1045
static inline bool
1046
huge_plus(double d, int err1)
1047
{
1048
	if (err1 == ERANGE)
1049
		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1050
			return 1;
1051
1052
	return 0;
1053
}
1054
1055
/*
1056
 * Check whether a function is a NAN
1057
 */
1058
static bool
1059
is_nan(double d)
1060
{
1061
#if defined(NAN)
1062
	return (d == NAN || isnan(d));
1063
#else
1064
	return (isnan(d));
1065
#endif
1066
}
1067
1068
/*
1069
 * Compare two NANs
1070
 */
1071
static int
1072
cmp_nans(double d1, double d2)
1073
{
1074
	if (d1 == d2)
1075
		return 0;
1076
	return d1 < d2 ? -1 : 1;
1077
}
1078
1079
/*
1080
 * Implements general numeric sort (-g).
1081
 */
1082
static int
1083
gnumcoll(struct key_value *kv1, struct key_value *kv2,
1084
    size_t offset __unused)
1085
{
1086
	double d1, d2;
1087
	int err1, err2;
1088
	bool empty1, empty2, key1_read, key2_read;
1089
1090
	d1 = d2 = 0;
1091
	err1 = err2 = 0;
1092
	key1_read = key2_read = false;
1093
1094
	if (debug_sort) {
1095
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1096
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1097
	}
1098
1099
	if (kv1->hint->status == HS_UNINITIALIZED) {
1100
		errno = 0;
1101
		d1 = bwstod(kv1->k, &empty1);
1102
		err1 = errno;
1103
1104
		if (empty1) {
1105
			kv1->hint->v.gh.notnum = true;
1106
			kv1->hint->status = HS_INITIALIZED;
1107
		} else if (err1 == 0) {
1108
			kv1->hint->v.gh.d = d1;
1109
			kv1->hint->v.gh.nan = is_nan(d1);
1110
			kv1->hint->status = HS_INITIALIZED;
1111
		} else
1112
			kv1->hint->status = HS_ERROR;
1113
1114
		key1_read = true;
1115
	}
1116
1117
	if (kv2->hint->status == HS_UNINITIALIZED) {
1118
		errno = 0;
1119
		d2 = bwstod(kv2->k, &empty2);
1120
		err2 = errno;
1121
1122
		if (empty2) {
1123
			kv2->hint->v.gh.notnum = true;
1124
			kv2->hint->status = HS_INITIALIZED;
1125
		} else if (err2 == 0) {
1126
			kv2->hint->v.gh.d = d2;
1127
			kv2->hint->v.gh.nan = is_nan(d2);
1128
			kv2->hint->status = HS_INITIALIZED;
1129
		} else
1130
			kv2->hint->status = HS_ERROR;
1131
1132
		key2_read = true;
1133
	}
1134
1135
	if (kv1->hint->status == HS_INITIALIZED &&
1136
	    kv2->hint->status == HS_INITIALIZED) {
1137
#ifdef GNUSORT_COMPATIBILITY
1138
		if (kv1->hint->v.gh.notnum)
1139
			return kv2->hint->v.gh.notnum ? 0 : -1;
1140
		else if (kv2->hint->v.gh.notnum)
1141
			return 1;
1142
#else
1143
		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1144
			return 0;
1145
#endif
1146
1147
		if (kv1->hint->v.gh.nan)
1148
			return kv2->hint->v.gh.nan ?
1149
			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1150
		else if (kv2->hint->v.gh.nan)
1151
			return 1;
1152
1153
		d1 = kv1->hint->v.gh.d;
1154
		d2 = kv2->hint->v.gh.d;
1155
1156
		if (d1 < d2)
1157
			return -1;
1158
		else if (d1 > d2)
1159
			return 1;
1160
		else
1161
			return 0;
1162
	}
1163
1164
	if (!key1_read) {
1165
		errno = 0;
1166
		d1 = bwstod(kv1->k, &empty1);
1167
		err1 = errno;
1168
	}
1169
1170
	if (!key2_read) {
1171
		errno = 0;
1172
		d2 = bwstod(kv2->k, &empty2);
1173
		err2 = errno;
1174
	}
1175
1176
	/* Non-value case */
1177
#ifdef GNUSORT_COMPATIBILITY
1178
	if (empty1)
1179
		return empty2 ? 0 : -1;
1180
	else if (empty2)
1181
		return 1;
1182
#else
1183
	if (empty1 && empty2)
1184
		return 0;
1185
#endif
1186
1187
	/* NAN case */
1188
	if (is_nan(d1))
1189
		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1190
	else if (is_nan(d2))
1191
		return 1;
1192
1193
	/* Infinities */
1194
	if (err1 == ERANGE || err2 == ERANGE) {
1195
		/* Minus infinity case */
1196
		if (huge_minus(d1, err1)) {
1197
			if (huge_minus(d2, err2)) {
1198
				if (d1 == d2)
1199
					return 0;
1200
				return d1 < d2 ? -1 : 1;
1201
			} else
1202
				return -1;
1203
1204
		} else if (huge_minus(d2, err2)) {
1205
			if (huge_minus(d1, err1)) {
1206
				if (d1 == d2)
1207
					return 0;
1208
				return d1 < d2 ? -1 : 1;
1209
			} else
1210
				return 1;
1211
		}
1212
1213
		/* Plus infinity case */
1214
		if (huge_plus(d1, err1)) {
1215
			if (huge_plus(d2, err2)) {
1216
				if (d1 == d2)
1217
					return 0;
1218
				return d1 < d2 ? -1 : 1;
1219
			} else
1220
				return 1;
1221
		} else if (huge_plus(d2, err2)) {
1222
			if (huge_plus(d1, err1)) {
1223
				if (d1 == d2)
1224
					return 0;
1225
				return d1 < d2 ? -1 : 1;
1226
			} else
1227
				return -1;
1228
		}
1229
	}
1230
1231
	if (d1 == d2)
1232
		return 0;
1233
	return d1 < d2 ? -1 : 1;
1234
}
1235
1236
/*
1237
 * Implements month sort (-M).
1238
 */
1239
static int
1240
monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1241
{
1242
	int val1, val2;
1243
	bool key1_read, key2_read;
1244
1245
	val1 = val2 = 0;
1246
	key1_read = key2_read = false;
1247
1248
	if (debug_sort) {
1249
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1250
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1251
	}
1252
1253
	if (kv1->hint->status == HS_UNINITIALIZED) {
1254
		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1255
		key1_read = true;
1256
		kv1->hint->status = HS_INITIALIZED;
1257
	}
1258
1259
	if (kv2->hint->status == HS_UNINITIALIZED) {
1260
		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1261
		key2_read = true;
1262
		kv2->hint->status = HS_INITIALIZED;
1263
	}
1264
1265
	if (kv1->hint->status == HS_INITIALIZED) {
1266
		val1 = kv1->hint->v.Mh.m;
1267
		key1_read = true;
1268
	}
1269
1270
	if (kv2->hint->status == HS_INITIALIZED) {
1271
		val2 = kv2->hint->v.Mh.m;
1272
		key2_read = true;
1273
	}
1274
1275
	if (!key1_read)
1276
		val1 = bws_month_score(kv1->k);
1277
	if (!key2_read)
1278
		val2 = bws_month_score(kv2->k);
1279
1280
	if (val1 == val2)
1281
		return 0;
1282
	return val1 < val2 ? -1 : 1;
1283
}