GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sort/coll.c Lines: 417 566 73.7 %
Date: 2017-11-07 Branches: 269 424 63.4 %

Line Branch Exec Source
1
/*	$OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $	*/
2
3
/*-
4
 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5
 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include <sys/types.h>
31
32
#include <errno.h>
33
#include <err.h>
34
#include <langinfo.h>
35
#include <limits.h>
36
#include <math.h>
37
#include <md5.h>
38
#include <stdlib.h>
39
#include <string.h>
40
#include <wchar.h>
41
#include <wctype.h>
42
43
#include "coll.h"
44
#include "vsort.h"
45
46
struct key_specs *keys;
47
size_t keys_num = 0;
48
49
wint_t symbol_decimal_point = L'.';
50
/* there is no default thousands separator in collate rules: */
51
wint_t symbol_thousands_sep = 0;
52
wint_t symbol_negative_sign = L'-';
53
wint_t symbol_positive_sign = L'+';
54
55
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56
static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57
static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58
static int numcoll(struct key_value*, struct key_value *, size_t offset);
59
static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60
static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61
static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62
63
/*
64
 * Allocate keys array
65
 */
66
struct keys_array *
67
keys_array_alloc(void)
68
{
69
	struct keys_array *ka;
70
	size_t sz;
71
72
2694798
	sz = keys_array_size();
73
1347399
	ka = sort_calloc(1, sz);
74
75
1347399
	return ka;
76
}
77
78
/*
79
 * Calculate whether we need key hint space
80
 */
81
static size_t
82
key_hint_size(void)
83
{
84
28570126
	return need_hint ? sizeof(struct key_hint) : 0;
85
}
86
87
/*
88
 * Calculate keys array size
89
 */
90
size_t
91
keys_array_size(void)
92
{
93
28570126
	return keys_num * (sizeof(struct key_value) + key_hint_size());
94
}
95
96
/*
97
 * Clean data of keys array
98
 */
99
void
100
clean_keys_array(const struct bwstring *s, struct keys_array *ka)
101
{
102
10437236
	if (ka) {
103
		size_t i;
104
105
20878696
		for (i = 0; i < keys_num; ++i)
106

6580397
			if (ka->key[i].k && ka->key[i].k != s)
107
15956
				bwsfree(ka->key[i].k);
108
5218618
		memset(ka, 0, keys_array_size());
109
5218618
	}
110
5218618
}
111
112
/*
113
 * Set value of a key in the keys set
114
 */
115
void
116
set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
117
{
118

15662742
	if (ka && keys_num > ind) {
119
		struct key_value *kv;
120
121
5220914
		kv = &(ka->key[ind]);
122
123
5220914
		if (kv->k != s)
124
5220622
			bwsfree(kv->k);
125
5220914
		kv->k = s;
126
5220914
	}
127
5220914
}
128
129
/*
130
 * Initialize a sort list item
131
 */
132
struct sort_list_item *
133
sort_list_item_alloc(void)
134
{
135
	struct sort_list_item *si;
136
	size_t sz;
137
138
7719246
	sz = sizeof(struct sort_list_item) + keys_array_size();
139
3859623
	si = sort_calloc(1, sz);
140
141
3859623
	return si;
142
}
143
144
size_t
145
sort_list_item_size(struct sort_list_item *si)
146
{
147
	size_t i, ret = 0;
148
149
7718846
	if (si) {
150
3859423
		ret = sizeof(struct sort_list_item) + keys_array_size();
151
3859423
		if (si->str)
152
3859423
			ret += bws_memsize(si->str);
153
15440572
		for (i = 0; i < keys_num; ++i) {
154
			struct key_value *kv;
155
156
3860863
			kv = &(si->ka.key[i]);
157
158
3860863
			if (kv->k != si->str)
159
1200654
				ret += bws_memsize(kv->k);
160
		}
161
	}
162
3859423
	return ret;
163
}
164
165
/*
166
 * Calculate key for a sort list item
167
 */
168
static void
169
sort_list_item_make_key(struct sort_list_item *si)
170
{
171
7742806
	preproc(si->str, &(si->ka));
172
3871403
}
173
174
/*
175
 * Set value of a sort list item.
176
 * Return combined string and keys memory size.
177
 */
178
void
179
sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
180
{
181
7742038
	if (si) {
182
3871019
		clean_keys_array(si->str, &(si->ka));
183
3871019
		if (si->str) {
184
11396
			if (si->str == str) {
185
				/* we are trying to reset the same string */
186
				return;
187
			} else {
188
11396
				bwsfree(si->str);
189
11396
				si->str = NULL;
190
			}
191
11396
		}
192
3871019
		si->str = str;
193
3871019
		sort_list_item_make_key(si);
194
3871019
	}
195
3871019
}
196
197
/*
198
 * De-allocate a sort list item object memory
199
 */
200
void
201
sort_list_item_clean(struct sort_list_item *si)
202
{
203
400
	if (si) {
204
200
		clean_keys_array(si->str, &(si->ka));
205
200
		if (si->str) {
206
200
			bwsfree(si->str);
207
200
			si->str = NULL;
208
200
		}
209
	}
210
200
}
211
212
/*
213
 * Skip columns according to specs
214
 */
215
static size_t
216
skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
217
    bool skip_blanks, bool *empty_key)
218
{
219
275394
	if (cols < 1)
220
		return BWSLEN(s) + 1;
221
222
275394
	if (skip_blanks)
223

15104
		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
224
2240
			++start;
225
226
277346
	while (start < BWSLEN(s) && cols > 1) {
227
976
		--cols;
228
976
		++start;
229
	}
230
231
275394
	if (start >= BWSLEN(s))
232
120
		*empty_key = true;
233
234
275394
	return start;
235
275394
}
236
237
/*
238
 * Skip fields according to specs
239
 */
240
static size_t
241
skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
242
{
243
572204
	if (fields < 2) {
244
10940
		if (BWSLEN(s) == 0)
245
			*empty_field = true;
246
10940
		return 0;
247
275162
	} else if (!(sort_opts_vals.tflag)) {
248
		size_t cpos = 0;
249
		bool pb = true;
250
251
3632662
		while (cpos < BWSLEN(s)) {
252
			bool isblank;
253
254
10076436
			isblank = iswblank(BWS_GET(s, cpos));
255
256

3632910
			if (isblank && !pb) {
257
273714
				--fields;
258
273714
				if (fields <= 1)
259
273146
					return cpos;
260
			}
261
			pb = isblank;
262
3085666
			++cpos;
263
3085666
		}
264
352
		if (fields > 1)
265
352
			*empty_field = true;
266
352
		return cpos;
267
	} else {
268
		size_t cpos = 0;
269
270
32192
		while (cpos < BWSLEN(s)) {
271

48000
			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
272
6256
				--fields;
273
6256
				if (fields <= 1)
274
1568
					return cpos + 1;
275
			}
276
14432
			++cpos;
277
		}
278
96
		if (fields > 1)
279
96
			*empty_field = true;
280
96
		return cpos;
281
	}
282
286102
}
283
284
/*
285
 * Find fields start
286
 */
287
static void
288
find_field_start(const struct bwstring *s, struct key_specs *ks,
289
    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
290
{
291
549924
	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
292
274962
	if (!*empty_field)
293
549844
		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
294
274922
		    ks->pos1b, empty_key);
295
	else
296
40
		*empty_key = true;
297
274962
}
298
299
/*
300
 * Find end key position
301
 */
302
static size_t
303
find_field_end(const struct bwstring *s, struct key_specs *ks)
304
{
305
	size_t f2, next_field_start, pos_end;
306
549620
	bool empty_field, empty_key;
307
308
274810
	empty_field = false;
309
274810
	empty_key = false;
310
274810
	f2 = ks->f2;
311
312
274810
	if (f2 == 0)
313
263670
		return BWSLEN(s) + 1;
314
	else {
315
11140
		if (ks->c2 == 0) {
316
10668
			next_field_start = skip_fields_to_start(s, f2 + 1,
317
			    &empty_field);
318

21472
			if ((next_field_start > 0) && sort_opts_vals.tflag &&
319
408
			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
320
			    next_field_start - 1)))
321
56
				--next_field_start;
322
		} else
323
472
			next_field_start = skip_fields_to_start(s, f2,
324
			    &empty_field);
325
	}
326
327

21872
	if (empty_field || (next_field_start >= BWSLEN(s)))
328
408
		return BWSLEN(s) + 1;
329
330
10732
	if (ks->c2) {
331
472
		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
332
472
		    ks->pos2b, &empty_key);
333
472
		if (pos_end < BWSLEN(s))
334
464
			++pos_end;
335
	} else
336
		pos_end = next_field_start;
337
338
10732
	return pos_end;
339
274810
}
340
341
/*
342
 * Cut a field according to the key specs
343
 */
344
static struct bwstring *
345
cut_field(const struct bwstring *s, struct key_specs *ks)
346
{
347
	struct bwstring *ret = NULL;
348
349
549924
	if (s && ks) {
350
274962
		size_t field_start, key_end, key_start, sz;
351
274962
		bool empty_field, empty_key;
352
353
274962
		field_start = 0;
354
274962
		key_start = 0;
355
274962
		empty_field = false;
356
274962
		empty_key = false;
357
358
274962
		find_field_start(s, ks, &field_start, &key_start,
359
		    &empty_field, &empty_key);
360
361
274962
		if (empty_key)
362
152
			sz = 0;
363
		else {
364
274810
			key_end = find_field_end(s, ks);
365
824390
			sz = (key_end < key_start) ? 0 : (key_end - key_start);
366
		}
367
368
274962
		ret = bwsalloc(sz);
369
274962
		if (sz)
370
274682
			bwsnocpy(ret, s, key_start, sz);
371
274962
	} else
372
		ret = bwsalloc(0);
373
374
274962
	return ret;
375
}
376
377
/*
378
 * Preprocesses a line applying the necessary transformations
379
 * specified by command line options and returns the preprocessed
380
 * string, which can be used to compare.
381
 */
382
int
383
preproc(struct bwstring *s, struct keys_array *ka)
384
{
385
10437604
	if (sort_opts_vals.kflag) {
386
		size_t i;
387
1095624
		for (i = 0; i < keys_num; i++) {
388
			struct bwstring *key;
389
			struct key_specs *kspecs;
390
			struct sort_mods *sm;
391
392
274962
			kspecs = &(keys[i]);
393
274962
			key = cut_field(s, kspecs);
394
395
274962
			sm = &(kspecs->sm);
396
274962
			if (sm->dflag)
397
2304
				key = dictionary_order(key);
398
272658
			else if (sm->iflag)
399
2040
				key = ignore_nonprinting(key);
400

547700
			if (sm->fflag || sm->Mflag)
401
2384
				key = ignore_case(key);
402
403
274962
			set_key_on_keys_array(ka, key, i);
404
		}
405
272850
	} else {
406
		struct bwstring *ret = NULL;
407
4945952
		struct sort_mods *sm = default_sort_mods;
408
409
#ifdef GNUSORT_COMPATIBILITY
410
		if (sm->bflag) {
411
			if (ret == NULL)
412
				ret = bwsdup(s);
413
			ret = ignore_leading_blanks(ret);
414
		}
415
#endif
416
4945952
		if (sm->dflag) {
417
152
			if (ret == NULL)
418
152
				ret = bwsdup(s);
419
152
			ret = dictionary_order(ret);
420
4945952
		} else if (sm->iflag) {
421
			if (ret == NULL)
422
				ret = bwsdup(s);
423
			ret = ignore_nonprinting(ret);
424
		}
425

8950220
		if (sm->fflag || sm->Mflag) {
426
941716
			if (ret == NULL)
427
941588
				ret = bwsdup(s);
428
941716
			ret = ignore_case(ret);
429
941716
		}
430
4945952
		if (ret == NULL)
431
4004212
			set_key_on_keys_array(ka, s, 0);
432
		else
433
941740
			set_key_on_keys_array(ka, ret, 0);
434
	}
435
436
5218802
	return 0;
437
}
438
439
cmpcoll_t
440
get_sort_func(struct sort_mods *sm)
441
{
442
6750
	if (sm->nflag)
443
243
		return numcoll;
444
3132
	else if (sm->hflag)
445
		return hnumcoll;
446
3132
	else if (sm->gflag)
447
72
		return gnumcoll;
448
3060
	else if (sm->Mflag)
449
48
		return monthcoll;
450
3012
	else if (sm->Rflag)
451
190
		return randomcoll;
452
2822
	else if (sm->Vflag)
453
		return versioncoll;
454
	else
455
2822
		return wstrcoll;
456
3375
}
457
458
/*
459
 * Compares the given strings.  Returns a positive number if
460
 * the first precedes the second, a negative number if the second is
461
 * the preceding one, and zero if they are equal.  This function calls
462
 * the underlying collate functions, which done the actual comparison.
463
 */
464
int
465
key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
466
{
467
	struct sort_mods *sm;
468
	int res = 0;
469
	size_t i;
470
471
63734178
	for (i = 0; i < keys_num; ++i) {
472
20004004
		sm = &(keys[i].sm);
473
474
20004004
		if (sm->rflag)
475
1048783
			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
476
		else
477
18955221
			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
478
479
20004004
		if (res)
480
			break;
481
482
		/* offset applies to only the first key */
483
		offset = 0;
484
	}
485
20002456
	return res;
486
}
487
488
/*
489
 * Compare two strings.
490
 * Plain symbol-by-symbol comparison.
491
 */
492
int
493
top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
494
{
495
1264728
	if (default_sort_mods->rflag) {
496
		const struct bwstring *tmp;
497
498
		tmp = s1;
499
		s1 = s2;
500
		s2 = tmp;
501
6409
	}
502
503
632364
	return bwscoll(s1, s2, 0);
504
}
505
506
/*
507
 * Compare a string and a sort list item, according to the sort specs.
508
 */
509
int
510
str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
511
{
512
	struct keys_array *ka1;
513
	int ret = 0;
514
515
768
	ka1 = keys_array_alloc();
516
517
384
	preproc(str1, ka1);
518
519
384
	sort_list_item_make_key(*ss2);
520
521
384
	if (debug_sort) {
522
		bwsprintf(stdout, str1, "; s1=<", ">");
523
		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
524
	}
525
526
384
	ret = key_coll(ka1, &((*ss2)->ka), 0);
527
528
384
	if (debug_sort)
529
		printf("; cmp1=%d", ret);
530
531
384
	clean_keys_array(str1, ka1);
532
384
	sort_free(ka1);
533
534

700
	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
535
		ret = top_level_str_coll(str1, ((*ss2)->str));
536
		if (debug_sort)
537
			printf("; cmp2=%d", ret);
538
	}
539
540
384
	if (debug_sort)
541
		putchar('\n');
542
543
384
	return ret;
544
}
545
546
/*
547
 * Compare two sort list items, according to the sort specs.
548
 */
549
int
550
list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
551
    size_t offset)
552
{
553
	int ret;
554
555
37310738
	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
556
557
18655369
	if (debug_sort) {
558
		if (offset)
559
			printf("; offset=%zu", offset);
560
		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
561
		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
562
		printf("; cmp1=%d\n", ret);
563
	}
564
565
18655369
	if (ret)
566
16831543
		return ret;
567
568

2455591
	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
569
612335
		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
570
612335
		if (debug_sort)
571
			printf("; cmp2=%d\n", ret);
572
	}
573
574
1823826
	return ret;
575
18655369
}
576
577
/*
578
 * Compare two sort list items, according to the sort specs.
579
 */
580
int
581
list_coll(const void *ss1, const void *ss2)
582
{
583
44996718
	return list_coll_offset((struct sort_list_item **)ss1,
584
14998906
	    (struct sort_list_item **)ss2, 0);
585
}
586
587
#define LSCDEF(N)											\
588
static int												\
589
list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
590
{													\
591
													\
592
	return list_coll_offset(ss1, ss2, N);								\
593
}
594
595
LSCDEF(0)
596
33632
LSCDEF(1)
597
216144
LSCDEF(2)
598
468882
LSCDEF(3)
599
969308
LSCDEF(4)
600
2169054
LSCDEF(5)
601
2271952
LSCDEF(6)
602
136170
LSCDEF(7)
603
74298
LSCDEF(8)
604
82962
LSCDEF(9)
605
17156
LSCDEF(10)
606
24952
LSCDEF(11)
607
103724
LSCDEF(12)
608
38428
LSCDEF(13)
609
8214
LSCDEF(14)
610
3540
LSCDEF(15)
611
672148
LSCDEF(16)
612
LSCDEF(17)
613
LSCDEF(18)
614
LSCDEF(19)
615
LSCDEF(20)
616
617
listcoll_t
618
get_list_call_func(size_t offset)
619
{
620
	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
621
	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
622
	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
623
	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
624
	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
625
	    list_coll_18, list_coll_19, list_coll_20 };
626
627
244086
	if (offset <= 20)
628
122043
		return lsarray[offset];
629
630
	return list_coll_0;
631
122043
}
632
633
/*
634
 * Compare two sort list items, only by their original string.
635
 */
636
int
637
list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
638
{
639
	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
640
}
641
642
/*
643
 * Maximum size of a number in the string (before or after decimal point)
644
 */
645
#define MAX_NUM_SIZE (128)
646
647
/*
648
 * Set suffix value
649
 */
650
static void
651
setsuffix(wchar_t c, unsigned char *si)
652
{
653


57160610
	switch (c){
654
	case L'k':
655
	case L'K':
656
		*si = 1;
657
		break;
658
	case L'M':
659
		*si = 2;
660
		break;
661
	case L'G':
662
		*si = 3;
663
		break;
664
	case L'T':
665
		*si = 4;
666
		break;
667
	case L'P':
668
		*si = 5;
669
		break;
670
	case L'E':
671
		*si = 6;
672
		break;
673
	case L'Z':
674
		*si = 7;
675
		break;
676
	case L'Y':
677
		*si = 8;
678
		break;
679
	default:
680
28580305
		*si = 0;
681
28580305
	};
682
28580305
}
683
684
/*
685
 * Read string s and parse the string into a fixed-decimal-point number.
686
 * sign equals -1 if the number is negative (explicit plus is not allowed,
687
 * according to GNU sort's "info sort".
688
 * The number part before decimal point is in the smain, after the decimal
689
 * point is in sfrac, tail is the pointer to the remainder of the string.
690
 */
691
static int
692
read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
693
{
694
	bwstring_iterator s;
695
696
57160610
	s = bws_begin(s0);
697
698
	/* always end the fraction with zero, even if we have no fraction */
699
28580305
	sfrac[0] = 0;
700
701
57217000
	while (iswblank(bws_get_iter_value(s)))
702
28195
		s = bws_iterator_inc(s, 1);
703
704
28580305
	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
705
840
		*sign = -1;
706
840
		s = bws_iterator_inc(s, 1);
707
840
	}
708
709
	// This is '0', not '\0', do not change this
710

114273586
	while (iswdigit(bws_get_iter_value(s)) &&
711
28587097
	    (bws_get_iter_value(s) == L'0'))
712
28553092
		s = bws_iterator_inc(s, 1);
713
714

57256030
	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
715
28616410
		if (iswdigit(bws_get_iter_value(s))) {
716
59315
			smain[*main_len] = bws_get_iter_value(s);
717
59315
			s = bws_iterator_inc(s, 1);
718
59315
			*main_len += 1;
719

28616410
		} else if (symbol_thousands_sep &&
720
		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
721
			s = bws_iterator_inc(s, 1);
722
		else
723
			break;
724
	}
725
726
28580305
	smain[*main_len] = 0;
727
728
28580305
	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
729
28546308
		s = bws_iterator_inc(s, 1);
730

571536123
		while (iswdigit(bws_get_iter_value(s)) &&
731
171481169
		    *frac_len < MAX_NUM_SIZE) {
732
171481169
			sfrac[*frac_len] = bws_get_iter_value(s);
733
171481169
			s = bws_iterator_inc(s, 1);
734
171481169
			*frac_len += 1;
735
		}
736
		sfrac[*frac_len] = 0;
737
738

85638948
		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
739
12
			--(*frac_len);
740
			sfrac[*frac_len] = L'\0';
741
		}
742
	}
743
744
28580305
	setsuffix(bws_get_iter_value(s), si);
745
746
28580305
	if ((*main_len + *frac_len) == 0)
747
24
		*sign = 0;
748
749
28580305
	return 0;
750
}
751
752
/*
753
 * Implements string sort.
754
 */
755
static int
756
wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
757
{
758
759
11258548
	if (debug_sort) {
760
		if (offset)
761
			printf("; offset=%zu\n", offset);
762
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
763
		printf("(%zu)", BWSLEN(kv1->k));
764
		bwsprintf(stdout, kv2->k, ", k2=<", ">");
765
		printf("(%zu)", BWSLEN(kv2->k));
766
	}
767
768
5629274
	return bwscoll(kv1->k, kv2->k, offset);
769
}
770
771
/*
772
 * Compare two suffixes
773
 */
774
static inline int
775
cmpsuffix(unsigned char si1, unsigned char si2)
776
{
777
	return (char)si1 - (char)si2;
778
}
779
780
/*
781
 * Implements numeric sort for -n and -h.
782
 */
783
static int
784
numcoll_impl(struct key_value *kv1, struct key_value *kv2,
785
    size_t offset __unused, bool use_suffix)
786
{
787
	struct bwstring *s1, *s2;
788
14312401
	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
789
14312401
	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
790
14312401
	int cmp_res, sign1, sign2;
791
14312401
	size_t frac1, frac2, main1, main2;
792
14312401
	unsigned char SI1, SI2;
793
	bool e1, e2, key1_read, key2_read;
794
795
14312401
	s1 = kv1->k;
796
14312401
	s2 = kv2->k;
797
14312401
	sign1 = sign2 = 0;
798
14312401
	main1 = main2 = 0;
799
14312401
	frac1 = frac2 = 0;
800
801
	key1_read = key2_read = false;
802
803
14312401
	if (debug_sort) {
804
		bwsprintf(stdout, s1, "; k1=<", ">");
805
		bwsprintf(stdout, s2, ", k2=<", ">");
806
	}
807
808
14312401
	if (s1 == s2)
809
		return 0;
810
811
14312401
	if (kv1->hint->status == HS_UNINITIALIZED) {
812
		/* read the number from the string */
813
1095246
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
814
		key1_read = true;
815
1095246
		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
816
1095246
		if (main1 < 1 && frac1 < 1)
817
16
			kv1->hint->v.nh.empty=true;
818
1095246
		kv1->hint->v.nh.si = SI1;
819
1095246
		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
820
		    HS_INITIALIZED : HS_ERROR;
821
1095246
		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
822
1095246
	}
823
824
14312401
	if (kv2->hint->status == HS_UNINITIALIZED) {
825
		/* read the number from the string */
826
501992
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
827
		key2_read = true;
828
501992
		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
829
501992
		if (main2 < 1 && frac2 < 1)
830
8
			kv2->hint->v.nh.empty=true;
831
501992
		kv2->hint->v.nh.si = SI2;
832
501992
		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
833
		    HS_INITIALIZED : HS_ERROR;
834
501992
		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
835
501992
	}
836
837

28624330
	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
838
	    HS_INITIALIZED) {
839
		unsigned long long n1, n2;
840
		bool neg1, neg2;
841
842
14311917
		e1 = kv1->hint->v.nh.empty;
843
14311917
		e2 = kv2->hint->v.nh.empty;
844
845

14311985
		if (e1 && e2)
846
			return 0;
847
848
14311917
		neg1 = kv1->hint->v.nh.neg;
849
14311917
		neg2 = kv2->hint->v.nh.neg;
850
851

14314961
		if (neg1 && !neg2)
852
488
			return -1;
853

14314269
		if (neg2 && !neg1)
854
284
			return 1;
855
856
14311145
		if (e1)
857
36
			return neg2 ? 1 : -1;
858
14311109
		else if (e2)
859
28
			return neg1 ? -1 : 1;
860
861
862
14311081
		if (use_suffix) {
863
			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
864
			if (cmp_res)
865
				return neg1 ? -cmp_res : cmp_res;
866
		}
867
868
14311081
		n1 = kv1->hint->v.nh.n1;
869
14311081
		n2 = kv2->hint->v.nh.n1;
870
14311081
		if (n1 < n2)
871
15928
			return neg1 ? 1 : -1;
872
14295153
		else if (n1 > n2)
873
10381
			return neg1 ? -1 : 1;
874
14284772
	}
875
876
	/* read the numbers from the strings */
877
14285256
	if (!key1_read)
878
13197981
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
879
14285256
	if (!key2_read)
880
13785086
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
881
882
14285256
	e1 = ((main1 + frac1) == 0);
883
14285256
	e2 = ((main2 + frac2) == 0);
884
885

14285256
	if (e1 && e2)
886
		return 0;
887
888
	/* we know the result if the signs are different */
889
14285256
	if (sign1 < 0 && sign2 >= 0)
890
		return -1;
891
14285256
	if (sign1 >= 0 && sign2 < 0)
892
		return 1;
893
894
14285256
	if (e1)
895
		return (sign2 < 0) ? +1 : -1;
896
14285256
	else if (e2)
897
		return (sign1 < 0) ? -1 : 1;
898
899
14285256
	if (use_suffix) {
900
		cmp_res = cmpsuffix(SI1, SI2);
901
		if (cmp_res)
902
			return (sign1 < 0) ? -cmp_res : cmp_res;
903
	}
904
905
	/* if both numbers are empty assume that the strings are equal */
906
14285256
	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
907
		return 0;
908
909
	/*
910
	 * if the main part is of different size, we know the result
911
	 * (because the leading zeros are removed)
912
	 */
913
14285256
	if (main1 < main2)
914
12
		cmp_res = -1;
915
14285244
	else if (main1 > main2)
916
8
		cmp_res = +1;
917
	/* if the sizes are equal then simple non-collate string compare gives the correct result */
918
	else
919
14285236
		cmp_res = wcscmp(smain1, smain2);
920
921
	/* check fraction */
922
14285256
	if (!cmp_res)
923
14285100
		cmp_res = wcscmp(sfrac1, sfrac2);
924
925
14285256
	if (!cmp_res)
926
83300
		return 0;
927
928
	/* reverse result if the signs are negative */
929
14201956
	if (sign1 < 0 && sign2 < 0)
930
8
		cmp_res = -cmp_res;
931
932
14201956
	return cmp_res;
933
14312401
}
934
935
/*
936
 * Implements numeric sort (-n).
937
 */
938
static int
939
numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940
{
941
28624802
	return numcoll_impl(kv1, kv2, offset, false);
942
}
943
944
/*
945
 * Implements 'human' numeric sort (-h).
946
 */
947
static int
948
hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
949
{
950
	return numcoll_impl(kv1, kv2, offset, true);
951
}
952
953
/*
954
 * Implements random sort (-R).
955
 */
956
static int
957
randomcoll(struct key_value *kv1, struct key_value *kv2,
958
    size_t offset __unused)
959
{
960
	struct bwstring *s1, *s2;
961
123322
	MD5_CTX ctx1, ctx2;
962
	char *b1, *b2;
963
964
61661
	s1 = kv1->k;
965
61661
	s2 = kv2->k;
966
967
61661
	if (debug_sort) {
968
		bwsprintf(stdout, s1, "; k1=<", ">");
969
		bwsprintf(stdout, s2, ", k2=<", ">");
970
	}
971
972
61661
	if (s1 == s2)
973
		return 0;
974
975
61661
	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
976
61661
	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
977
978
61661
	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
979
61661
	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
980
61661
	b1 = MD5End(&ctx1, NULL);
981
61661
	b2 = MD5End(&ctx2, NULL);
982
61661
	if (b1 == NULL) {
983
		if (b2 == NULL)
984
			return 0;
985
		else {
986
			sort_free(b2);
987
			return -1;
988
		}
989
61661
	} else if (b2 == NULL) {
990
		sort_free(b1);
991
		return 1;
992
	} else {
993
		int cmp_res;
994
995
61661
		cmp_res = strcmp(b1, b2);
996
61661
		sort_free(b1);
997
61661
		sort_free(b2);
998
999
61661
		if (!cmp_res)
1000
			cmp_res = bwscoll(s1, s2, 0);
1001
1002
		return cmp_res;
1003
	}
1004
61661
}
1005
1006
/*
1007
 * Implements version sort (-V).
1008
 */
1009
static int
1010
versioncoll(struct key_value *kv1, struct key_value *kv2,
1011
    size_t offset __unused)
1012
{
1013
	struct bwstring *s1, *s2;
1014
1015
	s1 = kv1->k;
1016
	s2 = kv2->k;
1017
1018
	if (debug_sort) {
1019
		bwsprintf(stdout, s1, "; k1=<", ">");
1020
		bwsprintf(stdout, s2, ", k2=<", ">");
1021
	}
1022
1023
	if (s1 == s2)
1024
		return 0;
1025
1026
	return vcmp(s1, s2);
1027
}
1028
1029
/*
1030
 * Check for minus infinity
1031
 */
1032
static inline bool
1033
huge_minus(double d, int err1)
1034
{
1035
	if (err1 == ERANGE)
1036
		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1037
			return 1;
1038
1039
	return 0;
1040
}
1041
1042
/*
1043
 * Check for plus infinity
1044
 */
1045
static inline bool
1046
huge_plus(double d, int err1)
1047
{
1048
	if (err1 == ERANGE)
1049
		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1050
			return 1;
1051
1052
	return 0;
1053
}
1054
1055
/*
1056
 * Check whether a function is a NAN
1057
 */
1058
static bool
1059
is_nan(double d)
1060
{
1061
#if defined(NAN)
1062
688
	return (d == NAN || isnan(d));
1063
#else
1064
	return (isnan(d));
1065
#endif
1066
}
1067
1068
/*
1069
 * Compare two NANs
1070
 */
1071
static int
1072
cmp_nans(double d1, double d2)
1073
{
1074
	if (d1 == d2)
1075
		return 0;
1076
	return d1 < d2 ? -1 : 1;
1077
}
1078
1079
/*
1080
 * Implements general numeric sort (-g).
1081
 */
1082
static int
1083
gnumcoll(struct key_value *kv1, struct key_value *kv2,
1084
    size_t offset __unused)
1085
{
1086
	double d1, d2;
1087
	int err1, err2;
1088
624
	bool empty1, empty2, key1_read, key2_read;
1089
1090
	d1 = d2 = 0;
1091
	err1 = err2 = 0;
1092
	key1_read = key2_read = false;
1093
1094
312
	if (debug_sort) {
1095
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1096
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1097
	}
1098
1099
312
	if (kv1->hint->status == HS_UNINITIALIZED) {
1100
96
		errno = 0;
1101
96
		d1 = bwstod(kv1->k, &empty1);
1102
96
		err1 = errno;
1103
1104
96
		if (empty1) {
1105
4
			kv1->hint->v.gh.notnum = true;
1106
			kv1->hint->status = HS_INITIALIZED;
1107
96
		} else if (err1 == 0) {
1108
92
			kv1->hint->v.gh.d = d1;
1109
92
			kv1->hint->v.gh.nan = is_nan(d1);
1110
			kv1->hint->status = HS_INITIALIZED;
1111
92
		} else
1112
			kv1->hint->status = HS_ERROR;
1113
1114
		key1_read = true;
1115
96
	}
1116
1117
312
	if (kv2->hint->status == HS_UNINITIALIZED) {
1118
88
		errno = 0;
1119
88
		d2 = bwstod(kv2->k, &empty2);
1120
88
		err2 = errno;
1121
1122
88
		if (empty2) {
1123
8
			kv2->hint->v.gh.notnum = true;
1124
			kv2->hint->status = HS_INITIALIZED;
1125
88
		} else if (err2 == 0) {
1126
80
			kv2->hint->v.gh.d = d2;
1127
80
			kv2->hint->v.gh.nan = is_nan(d2);
1128
			kv2->hint->status = HS_INITIALIZED;
1129
80
		} else
1130
			kv2->hint->status = HS_ERROR;
1131
1132
		key2_read = true;
1133
88
	}
1134
1135

624
	if (kv1->hint->status == HS_INITIALIZED &&
1136
312
	    kv2->hint->status == HS_INITIALIZED) {
1137
#ifdef GNUSORT_COMPATIBILITY
1138
		if (kv1->hint->v.gh.notnum)
1139
			return kv2->hint->v.gh.notnum ? 0 : -1;
1140
		else if (kv2->hint->v.gh.notnum)
1141
			return 1;
1142
#else
1143

324
		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1144
			return 0;
1145
#endif
1146
1147
312
		if (kv1->hint->v.gh.nan)
1148
			return kv2->hint->v.gh.nan ?
1149
			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1150
312
		else if (kv2->hint->v.gh.nan)
1151
			return 1;
1152
1153
312
		d1 = kv1->hint->v.gh.d;
1154
312
		d2 = kv2->hint->v.gh.d;
1155
1156
312
		if (d1 < d2)
1157
72
			return -1;
1158
240
		else if (d1 > d2)
1159
136
			return 1;
1160
		else
1161
104
			return 0;
1162
	}
1163
1164
	if (!key1_read) {
1165
		errno = 0;
1166
		d1 = bwstod(kv1->k, &empty1);
1167
		err1 = errno;
1168
	}
1169
1170
	if (!key2_read) {
1171
		errno = 0;
1172
		d2 = bwstod(kv2->k, &empty2);
1173
		err2 = errno;
1174
	}
1175
1176
	/* Non-value case */
1177
#ifdef GNUSORT_COMPATIBILITY
1178
	if (empty1)
1179
		return empty2 ? 0 : -1;
1180
	else if (empty2)
1181
		return 1;
1182
#else
1183
	if (empty1 && empty2)
1184
		return 0;
1185
#endif
1186
1187
	/* NAN case */
1188
	if (is_nan(d1))
1189
		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1190
	else if (is_nan(d2))
1191
		return 1;
1192
1193
	/* Infinities */
1194
	if (err1 == ERANGE || err2 == ERANGE) {
1195
		/* Minus infinity case */
1196
		if (huge_minus(d1, err1)) {
1197
			if (huge_minus(d2, err2)) {
1198
				if (d1 == d2)
1199
					return 0;
1200
				return d1 < d2 ? -1 : 1;
1201
			} else
1202
				return -1;
1203
1204
		} else if (huge_minus(d2, err2)) {
1205
			if (huge_minus(d1, err1)) {
1206
				if (d1 == d2)
1207
					return 0;
1208
				return d1 < d2 ? -1 : 1;
1209
			} else
1210
				return 1;
1211
		}
1212
1213
		/* Plus infinity case */
1214
		if (huge_plus(d1, err1)) {
1215
			if (huge_plus(d2, err2)) {
1216
				if (d1 == d2)
1217
					return 0;
1218
				return d1 < d2 ? -1 : 1;
1219
			} else
1220
				return 1;
1221
		} else if (huge_plus(d2, err2)) {
1222
			if (huge_plus(d1, err1)) {
1223
				if (d1 == d2)
1224
					return 0;
1225
				return d1 < d2 ? -1 : 1;
1226
			} else
1227
				return -1;
1228
		}
1229
	}
1230
1231
	if (d1 == d2)
1232
		return 0;
1233
	return d1 < d2 ? -1 : 1;
1234
312
}
1235
1236
/*
1237
 * Implements month sort (-M).
1238
 */
1239
static int
1240
monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1241
{
1242
	int val1, val2;
1243
	bool key1_read, key2_read;
1244
1245
	val1 = val2 = 0;
1246
	key1_read = key2_read = false;
1247
1248
712
	if (debug_sort) {
1249
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1250
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1251
	}
1252
1253
356
	if (kv1->hint->status == HS_UNINITIALIZED) {
1254
16
		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1255
		key1_read = true;
1256
16
		kv1->hint->status = HS_INITIALIZED;
1257
16
	}
1258
1259
356
	if (kv2->hint->status == HS_UNINITIALIZED) {
1260
16
		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1261
		key2_read = true;
1262
16
		kv2->hint->status = HS_INITIALIZED;
1263
16
	}
1264
1265
356
	if (kv1->hint->status == HS_INITIALIZED) {
1266
36
		val1 = kv1->hint->v.Mh.m;
1267
		key1_read = true;
1268
36
	}
1269
1270
356
	if (kv2->hint->status == HS_INITIALIZED) {
1271
36
		val2 = kv2->hint->v.Mh.m;
1272
		key2_read = true;
1273
36
	}
1274
1275
356
	if (!key1_read)
1276
320
		val1 = bws_month_score(kv1->k);
1277
356
	if (!key2_read)
1278
320
		val2 = bws_month_score(kv2->k);
1279
1280
356
	if (val1 == val2)
1281
40
		return 0;
1282
316
	return val1 < val2 ? -1 : 1;
1283
356
}