GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sort/coll.c Lines: 417 566 73.7 %
Date: 2017-11-13 Branches: 269 424 63.4 %

Line Branch Exec Source
1
/*	$OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $	*/
2
3
/*-
4
 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5
 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include <sys/types.h>
31
32
#include <errno.h>
33
#include <err.h>
34
#include <langinfo.h>
35
#include <limits.h>
36
#include <math.h>
37
#include <md5.h>
38
#include <stdlib.h>
39
#include <string.h>
40
#include <wchar.h>
41
#include <wctype.h>
42
43
#include "coll.h"
44
#include "vsort.h"
45
46
struct key_specs *keys;
47
size_t keys_num = 0;
48
49
wint_t symbol_decimal_point = L'.';
50
/* there is no default thousands separator in collate rules: */
51
wint_t symbol_thousands_sep = 0;
52
wint_t symbol_negative_sign = L'-';
53
wint_t symbol_positive_sign = L'+';
54
55
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56
static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57
static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58
static int numcoll(struct key_value*, struct key_value *, size_t offset);
59
static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60
static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61
static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62
63
/*
64
 * Allocate keys array
65
 */
66
struct keys_array *
67
keys_array_alloc(void)
68
{
69
	struct keys_array *ka;
70
	size_t sz;
71
72
2692130
	sz = keys_array_size();
73
1346065
	ka = sort_calloc(1, sz);
74
75
1346065
	return ka;
76
}
77
78
/*
79
 * Calculate whether we need key hint space
80
 */
81
static size_t
82
key_hint_size(void)
83
{
84
27945476
	return need_hint ? sizeof(struct key_hint) : 0;
85
}
86
87
/*
88
 * Calculate keys array size
89
 */
90
size_t
91
keys_array_size(void)
92
{
93
27945476
	return keys_num * (sizeof(struct key_value) + key_hint_size());
94
}
95
96
/*
97
 * Clean data of keys array
98
 */
99
void
100
clean_keys_array(const struct bwstring *s, struct keys_array *ka)
101
{
102
10228130
	if (ka) {
103
		size_t i;
104
105
20460484
		for (i = 0; i < keys_num; ++i)
106

6474510
			if (ka->key[i].k && ka->key[i].k != s)
107
14426
				bwsfree(ka->key[i].k);
108
5114065
		memset(ka, 0, keys_array_size());
109
5114065
	}
110
5114065
}
111
112
/*
113
 * Set value of a key in the keys set
114
 */
115
void
116
set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
117
{
118

15349083
	if (ka && keys_num > ind) {
119
		struct key_value *kv;
120
121
5116361
		kv = &(ka->key[ind]);
122
123
5116361
		if (kv->k != s)
124
5116069
			bwsfree(kv->k);
125
5116361
		kv->k = s;
126
5116361
	}
127
5116361
}
128
129
/*
130
 * Initialize a sort list item
131
 */
132
struct sort_list_item *
133
sort_list_item_alloc(void)
134
{
135
	struct sort_list_item *si;
136
	size_t sz;
137
138
7512808
	sz = sizeof(struct sort_list_item) + keys_array_size();
139
3756404
	si = sort_calloc(1, sz);
140
141
3756404
	return si;
142
}
143
144
size_t
145
sort_list_item_size(struct sort_list_item *si)
146
{
147
	size_t i, ret = 0;
148
149
7512408
	if (si) {
150
3756204
		ret = sizeof(struct sort_list_item) + keys_array_size();
151
3756204
		if (si->str)
152
3756204
			ret += bws_memsize(si->str);
153
15027696
		for (i = 0; i < keys_num; ++i) {
154
			struct key_value *kv;
155
156
3757644
			kv = &(si->ka.key[i]);
157
158
3757644
			if (kv->k != si->str)
159
1116703
				ret += bws_memsize(kv->k);
160
		}
161
	}
162
3756204
	return ret;
163
}
164
165
/*
166
 * Calculate key for a sort list item
167
 */
168
static void
169
sort_list_item_make_key(struct sort_list_item *si)
170
{
171
7536368
	preproc(si->str, &(si->ka));
172
3768184
}
173
174
/*
175
 * Set value of a sort list item.
176
 * Return combined string and keys memory size.
177
 */
178
void
179
sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
180
{
181
7535600
	if (si) {
182
3767800
		clean_keys_array(si->str, &(si->ka));
183
3767800
		if (si->str) {
184
11396
			if (si->str == str) {
185
				/* we are trying to reset the same string */
186
				return;
187
			} else {
188
11396
				bwsfree(si->str);
189
11396
				si->str = NULL;
190
			}
191
11396
		}
192
3767800
		si->str = str;
193
3767800
		sort_list_item_make_key(si);
194
3767800
	}
195
3767800
}
196
197
/*
198
 * De-allocate a sort list item object memory
199
 */
200
void
201
sort_list_item_clean(struct sort_list_item *si)
202
{
203
400
	if (si) {
204
200
		clean_keys_array(si->str, &(si->ka));
205
200
		if (si->str) {
206
200
			bwsfree(si->str);
207
200
			si->str = NULL;
208
200
		}
209
	}
210
200
}
211
212
/*
213
 * Skip columns according to specs
214
 */
215
static size_t
216
skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
217
    bool skip_blanks, bool *empty_key)
218
{
219
189913
	if (cols < 1)
220
		return BWSLEN(s) + 1;
221
222
189913
	if (skip_blanks)
223

16080
		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
224
2240
			++start;
225
226
381778
	while (start < BWSLEN(s) && cols > 1) {
227
976
		--cols;
228
976
		++start;
229
	}
230
231
189913
	if (start >= BWSLEN(s))
232
120
		*empty_key = true;
233
234
189913
	return start;
235
189913
}
236
237
/*
238
 * Skip fields according to specs
239
 */
240
static size_t
241
skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
242
{
243
401242
	if (fields < 2) {
244
10940
		if (BWSLEN(s) == 0)
245
			*empty_field = true;
246
10940
		return 0;
247
189681
	} else if (!(sort_opts_vals.tflag)) {
248
		size_t cpos = 0;
249
		bool pb = true;
250
251
4813484
		while (cpos < BWSLEN(s)) {
252
			bool isblank;
253
254
7219170
			isblank = iswblank(BWS_GET(s, cpos));
255
256

2595007
			if (isblank && !pb) {
257
188233
				--fields;
258
188233
				if (fields <= 1)
259
187665
					return cpos;
260
			}
261
			pb = isblank;
262
2218725
			++cpos;
263
2218725
		}
264
352
		if (fields > 1)
265
352
			*empty_field = true;
266
352
		return cpos;
267
	} else {
268
		size_t cpos = 0;
269
270
32192
		while (cpos < BWSLEN(s)) {
271

48000
			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
272
6256
				--fields;
273
6256
				if (fields <= 1)
274
1568
					return cpos + 1;
275
			}
276
14432
			++cpos;
277
		}
278
96
		if (fields > 1)
279
96
			*empty_field = true;
280
96
		return cpos;
281
	}
282
200621
}
283
284
/*
285
 * Find fields start
286
 */
287
static void
288
find_field_start(const struct bwstring *s, struct key_specs *ks,
289
    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
290
{
291
378962
	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
292
189481
	if (!*empty_field)
293
378882
		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
294
189441
		    ks->pos1b, empty_key);
295
	else
296
40
		*empty_key = true;
297
189481
}
298
299
/*
300
 * Find end key position
301
 */
302
static size_t
303
find_field_end(const struct bwstring *s, struct key_specs *ks)
304
{
305
	size_t f2, next_field_start, pos_end;
306
378658
	bool empty_field, empty_key;
307
308
189329
	empty_field = false;
309
189329
	empty_key = false;
310
189329
	f2 = ks->f2;
311
312
189329
	if (f2 == 0)
313
178189
		return BWSLEN(s) + 1;
314
	else {
315
11140
		if (ks->c2 == 0) {
316
10668
			next_field_start = skip_fields_to_start(s, f2 + 1,
317
			    &empty_field);
318

21472
			if ((next_field_start > 0) && sort_opts_vals.tflag &&
319
408
			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
320
			    next_field_start - 1)))
321
56
				--next_field_start;
322
		} else
323
472
			next_field_start = skip_fields_to_start(s, f2,
324
			    &empty_field);
325
	}
326
327

21872
	if (empty_field || (next_field_start >= BWSLEN(s)))
328
408
		return BWSLEN(s) + 1;
329
330
10732
	if (ks->c2) {
331
472
		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
332
472
		    ks->pos2b, &empty_key);
333
472
		if (pos_end < BWSLEN(s))
334
464
			++pos_end;
335
	} else
336
		pos_end = next_field_start;
337
338
10732
	return pos_end;
339
189329
}
340
341
/*
342
 * Cut a field according to the key specs
343
 */
344
static struct bwstring *
345
cut_field(const struct bwstring *s, struct key_specs *ks)
346
{
347
	struct bwstring *ret = NULL;
348
349
378962
	if (s && ks) {
350
189481
		size_t field_start, key_end, key_start, sz;
351
189481
		bool empty_field, empty_key;
352
353
189481
		field_start = 0;
354
189481
		key_start = 0;
355
189481
		empty_field = false;
356
189481
		empty_key = false;
357
358
189481
		find_field_start(s, ks, &field_start, &key_start,
359
		    &empty_field, &empty_key);
360
361
189481
		if (empty_key)
362
152
			sz = 0;
363
		else {
364
189329
			key_end = find_field_end(s, ks);
365
567947
			sz = (key_end < key_start) ? 0 : (key_end - key_start);
366
		}
367
368
189481
		ret = bwsalloc(sz);
369
189481
		if (sz)
370
189201
			bwsnocpy(ret, s, key_start, sz);
371
189481
	} else
372
		ret = bwsalloc(0);
373
374
189481
	return ret;
375
}
376
377
/*
378
 * Preprocesses a line applying the necessary transformations
379
 * specified by command line options and returns the preprocessed
380
 * string, which can be used to compare.
381
 */
382
int
383
preproc(struct bwstring *s, struct keys_array *ka)
384
{
385
10228498
	if (sort_opts_vals.kflag) {
386
		size_t i;
387
753700
		for (i = 0; i < keys_num; i++) {
388
			struct bwstring *key;
389
			struct key_specs *kspecs;
390
			struct sort_mods *sm;
391
392
189481
			kspecs = &(keys[i]);
393
189481
			key = cut_field(s, kspecs);
394
395
189481
			sm = &(kspecs->sm);
396
189481
			if (sm->dflag)
397
1284
				key = dictionary_order(key);
398
188197
			else if (sm->iflag)
399
1020
				key = ignore_nonprinting(key);
400

377758
			if (sm->fflag || sm->Mflag)
401
1364
				key = ignore_case(key);
402
403
189481
			set_key_on_keys_array(ka, key, i);
404
		}
405
187369
	} else {
406
		struct bwstring *ret = NULL;
407
4926880
		struct sort_mods *sm = default_sort_mods;
408
409
#ifdef GNUSORT_COMPATIBILITY
410
		if (sm->bflag) {
411
			if (ret == NULL)
412
				ret = bwsdup(s);
413
			ret = ignore_leading_blanks(ret);
414
		}
415
#endif
416
4926880
		if (sm->dflag) {
417
152
			if (ret == NULL)
418
152
				ret = bwsdup(s);
419
152
			ret = dictionary_order(ret);
420
4926880
		} else if (sm->iflag) {
421
			if (ret == NULL)
422
				ret = bwsdup(s);
423
			ret = ignore_nonprinting(ret);
424
		}
425

8912076
		if (sm->fflag || sm->Mflag) {
426
941716
			if (ret == NULL)
427
941588
				ret = bwsdup(s);
428
941716
			ret = ignore_case(ret);
429
941716
		}
430
4926880
		if (ret == NULL)
431
3985140
			set_key_on_keys_array(ka, s, 0);
432
		else
433
941740
			set_key_on_keys_array(ka, ret, 0);
434
	}
435
436
5114249
	return 0;
437
}
438
439
cmpcoll_t
440
get_sort_func(struct sort_mods *sm)
441
{
442
6486
	if (sm->nflag)
443
243
		return numcoll;
444
3000
	else if (sm->hflag)
445
		return hnumcoll;
446
3000
	else if (sm->gflag)
447
72
		return gnumcoll;
448
2928
	else if (sm->Mflag)
449
48
		return monthcoll;
450
2880
	else if (sm->Rflag)
451
230
		return randomcoll;
452
2650
	else if (sm->Vflag)
453
		return versioncoll;
454
	else
455
2650
		return wstrcoll;
456
3243
}
457
458
/*
459
 * Compares the given strings.  Returns a positive number if
460
 * the first precedes the second, a negative number if the second is
461
 * the preceding one, and zero if they are equal.  This function calls
462
 * the underlying collate functions, which done the actual comparison.
463
 */
464
int
465
key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
466
{
467
	struct sort_mods *sm;
468
	int res = 0;
469
	size_t i;
470
471
62261208
	for (i = 0; i < keys_num; ++i) {
472
19643164
		sm = &(keys[i].sm);
473
474
19643164
		if (sm->rflag)
475
1048874
			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
476
		else
477
18594290
			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
478
479
19643164
		if (res)
480
			break;
481
482
		/* offset applies to only the first key */
483
		offset = 0;
484
	}
485
19641616
	return res;
486
}
487
488
/*
489
 * Compare two strings.
490
 * Plain symbol-by-symbol comparison.
491
 */
492
int
493
top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
494
{
495
876406
	if (default_sort_mods->rflag) {
496
		const struct bwstring *tmp;
497
498
		tmp = s1;
499
		s1 = s2;
500
		s2 = tmp;
501
6291
	}
502
503
438203
	return bwscoll(s1, s2, 0);
504
}
505
506
/*
507
 * Compare a string and a sort list item, according to the sort specs.
508
 */
509
int
510
str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
511
{
512
	struct keys_array *ka1;
513
	int ret = 0;
514
515
768
	ka1 = keys_array_alloc();
516
517
384
	preproc(str1, ka1);
518
519
384
	sort_list_item_make_key(*ss2);
520
521
384
	if (debug_sort) {
522
		bwsprintf(stdout, str1, "; s1=<", ">");
523
		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
524
	}
525
526
384
	ret = key_coll(ka1, &((*ss2)->ka), 0);
527
528
384
	if (debug_sort)
529
		printf("; cmp1=%d", ret);
530
531
384
	clean_keys_array(str1, ka1);
532
384
	sort_free(ka1);
533
534

700
	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
535
		ret = top_level_str_coll(str1, ((*ss2)->str));
536
		if (debug_sort)
537
			printf("; cmp2=%d", ret);
538
	}
539
540
384
	if (debug_sort)
541
		putchar('\n');
542
543
384
	return ret;
544
}
545
546
/*
547
 * Compare two sort list items, according to the sort specs.
548
 */
549
int
550
list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
551
    size_t offset)
552
{
553
	int ret;
554
555
36591714
	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
556
557
18295857
	if (debug_sort) {
558
		if (offset)
559
			printf("; offset=%zu", offset);
560
		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
561
		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
562
		printf("; cmp1=%d\n", ret);
563
	}
564
565
18295857
	if (ret)
566
16666147
		return ret;
567
568

2068103
	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
569
419123
		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
570
419123
		if (debug_sort)
571
			printf("; cmp2=%d\n", ret);
572
	}
573
574
1629710
	return ret;
575
18295857
}
576
577
/*
578
 * Compare two sort list items, according to the sort specs.
579
 */
580
int
581
list_coll(const void *ss1, const void *ss2)
582
{
583
44787963
	return list_coll_offset((struct sort_list_item **)ss1,
584
14929321
	    (struct sort_list_item **)ss2, 0);
585
}
586
587
#define LSCDEF(N)											\
588
static int												\
589
list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
590
{													\
591
													\
592
	return list_coll_offset(ss1, ss2, N);								\
593
}
594
595
LSCDEF(0)
596
20822
LSCDEF(1)
597
151292
LSCDEF(2)
598
423792
LSCDEF(3)
599
948402
LSCDEF(4)
600
2145948
LSCDEF(5)
601
2224940
LSCDEF(6)
602
111232
LSCDEF(7)
603
54614
LSCDEF(8)
604
55814
LSCDEF(9)
605
11288
LSCDEF(10)
606
17304
LSCDEF(11)
607
59040
LSCDEF(12)
608
23428
LSCDEF(13)
609
5522
LSCDEF(14)
610
2360
LSCDEF(15)
611
456224
LSCDEF(16)
612
LSCDEF(17)
613
LSCDEF(18)
614
LSCDEF(19)
615
LSCDEF(20)
616
617
listcoll_t
618
get_list_call_func(size_t offset)
619
{
620
	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
621
	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
622
	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
623
	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
624
	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
625
	    list_coll_18, list_coll_19, list_coll_20 };
626
627
234648
	if (offset <= 20)
628
117324
		return lsarray[offset];
629
630
	return list_coll_0;
631
117324
}
632
633
/*
634
 * Compare two sort list items, only by their original string.
635
 */
636
int
637
list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
638
{
639
	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
640
}
641
642
/*
643
 * Maximum size of a number in the string (before or after decimal point)
644
 */
645
#define MAX_NUM_SIZE (128)
646
647
/*
648
 * Set suffix value
649
 */
650
static void
651
setsuffix(wchar_t c, unsigned char *si)
652
{
653


57156046
	switch (c){
654
	case L'k':
655
	case L'K':
656
		*si = 1;
657
		break;
658
	case L'M':
659
		*si = 2;
660
		break;
661
	case L'G':
662
		*si = 3;
663
		break;
664
	case L'T':
665
		*si = 4;
666
		break;
667
	case L'P':
668
		*si = 5;
669
		break;
670
	case L'E':
671
		*si = 6;
672
		break;
673
	case L'Z':
674
		*si = 7;
675
		break;
676
	case L'Y':
677
		*si = 8;
678
		break;
679
	default:
680
28578023
		*si = 0;
681
28578023
	};
682
28578023
}
683
684
/*
685
 * Read string s and parse the string into a fixed-decimal-point number.
686
 * sign equals -1 if the number is negative (explicit plus is not allowed,
687
 * according to GNU sort's "info sort".
688
 * The number part before decimal point is in the smain, after the decimal
689
 * point is in sfrac, tail is the pointer to the remainder of the string.
690
 */
691
static int
692
read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
693
{
694
	bwstring_iterator s;
695
696
57156046
	s = bws_begin(s0);
697
698
	/* always end the fraction with zero, even if we have no fraction */
699
28578023
	sfrac[0] = 0;
700
701
57212382
	while (iswblank(bws_get_iter_value(s)))
702
28168
		s = bws_iterator_inc(s, 1);
703
704
28578023
	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
705
840
		*sign = -1;
706
840
		s = bws_iterator_inc(s, 1);
707
840
	}
708
709
	// This is '0', not '\0', do not change this
710

142842393
	while (iswdigit(bws_get_iter_value(s)) &&
711
28584815
	    (bws_get_iter_value(s) == L'0'))
712
28550766
		s = bws_iterator_inc(s, 1);
713
714

85889161
	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
715
28614219
		if (iswdigit(bws_get_iter_value(s))) {
716
59448
			smain[*main_len] = bws_get_iter_value(s);
717
59448
			s = bws_iterator_inc(s, 1);
718
59448
			*main_len += 1;
719

28614219
		} else if (symbol_thousands_sep &&
720
		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
721
			s = bws_iterator_inc(s, 1);
722
		else
723
			break;
724
	}
725
726
28578023
	smain[*main_len] = 0;
727
728
28578023
	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
729
28543982
		s = bws_iterator_inc(s, 1);
730

571351168
		while (iswdigit(bws_get_iter_value(s)) &&
731
171421068
		    *frac_len < MAX_NUM_SIZE) {
732
171421068
			sfrac[*frac_len] = bws_get_iter_value(s);
733
171421068
			s = bws_iterator_inc(s, 1);
734
171421068
			*frac_len += 1;
735
		}
736
		sfrac[*frac_len] = 0;
737
738

85631970
		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
739
12
			--(*frac_len);
740
			sfrac[*frac_len] = L'\0';
741
		}
742
	}
743
744
28578023
	setsuffix(bws_get_iter_value(s), si);
745
746
28578023
	if ((*main_len + *frac_len) == 0)
747
24
		*sign = 0;
748
749
28578023
	return 0;
750
}
751
752
/*
753
 * Implements string sort.
754
 */
755
static int
756
wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
757
{
758
759
10577658
	if (debug_sort) {
760
		if (offset)
761
			printf("; offset=%zu\n", offset);
762
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
763
		printf("(%zu)", BWSLEN(kv1->k));
764
		bwsprintf(stdout, kv2->k, ", k2=<", ">");
765
		printf("(%zu)", BWSLEN(kv2->k));
766
	}
767
768
5288829
	return bwscoll(kv1->k, kv2->k, offset);
769
}
770
771
/*
772
 * Compare two suffixes
773
 */
774
static inline int
775
cmpsuffix(unsigned char si1, unsigned char si2)
776
{
777
	return (char)si1 - (char)si2;
778
}
779
780
/*
781
 * Implements numeric sort for -n and -h.
782
 */
783
static int
784
numcoll_impl(struct key_value *kv1, struct key_value *kv2,
785
    size_t offset __unused, bool use_suffix)
786
{
787
	struct bwstring *s1, *s2;
788
14311315
	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
789
14311315
	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
790
14311315
	int cmp_res, sign1, sign2;
791
14311315
	size_t frac1, frac2, main1, main2;
792
14311315
	unsigned char SI1, SI2;
793
	bool e1, e2, key1_read, key2_read;
794
795
14311315
	s1 = kv1->k;
796
14311315
	s2 = kv2->k;
797
14311315
	sign1 = sign2 = 0;
798
14311315
	main1 = main2 = 0;
799
14311315
	frac1 = frac2 = 0;
800
801
	key1_read = key2_read = false;
802
803
14311315
	if (debug_sort) {
804
		bwsprintf(stdout, s1, "; k1=<", ">");
805
		bwsprintf(stdout, s2, ", k2=<", ">");
806
	}
807
808
14311315
	if (s1 == s2)
809
		return 0;
810
811
14311315
	if (kv1->hint->status == HS_UNINITIALIZED) {
812
		/* read the number from the string */
813
1095404
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
814
		key1_read = true;
815
1095404
		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
816
1095404
		if (main1 < 1 && frac1 < 1)
817
16
			kv1->hint->v.nh.empty=true;
818
1095404
		kv1->hint->v.nh.si = SI1;
819
1095404
		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
820
		    HS_INITIALIZED : HS_ERROR;
821
1095404
		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
822
1095404
	}
823
824
14311315
	if (kv2->hint->status == HS_UNINITIALIZED) {
825
		/* read the number from the string */
826
501990
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
827
		key2_read = true;
828
501990
		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
829
501990
		if (main2 < 1 && frac2 < 1)
830
8
			kv2->hint->v.nh.empty=true;
831
501990
		kv2->hint->v.nh.si = SI2;
832
501990
		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
833
		    HS_INITIALIZED : HS_ERROR;
834
501990
		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
835
501990
	}
836
837

28622158
	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
838
	    HS_INITIALIZED) {
839
		unsigned long long n1, n2;
840
		bool neg1, neg2;
841
842
14310831
		e1 = kv1->hint->v.nh.empty;
843
14310831
		e2 = kv2->hint->v.nh.empty;
844
845

14310899
		if (e1 && e2)
846
			return 0;
847
848
14310831
		neg1 = kv1->hint->v.nh.neg;
849
14310831
		neg2 = kv2->hint->v.nh.neg;
850
851

14313875
		if (neg1 && !neg2)
852
488
			return -1;
853

14313183
		if (neg2 && !neg1)
854
284
			return 1;
855
856
14310059
		if (e1)
857
36
			return neg2 ? 1 : -1;
858
14310023
		else if (e2)
859
28
			return neg1 ? -1 : 1;
860
861
862
14309995
		if (use_suffix) {
863
			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
864
			if (cmp_res)
865
				return neg1 ? -cmp_res : cmp_res;
866
		}
867
868
14309995
		n1 = kv1->hint->v.nh.n1;
869
14309995
		n2 = kv2->hint->v.nh.n1;
870
14309995
		if (n1 < n2)
871
15959
			return neg1 ? 1 : -1;
872
14294036
		else if (n1 > n2)
873
10416
			return neg1 ? -1 : 1;
874
14283620
	}
875
876
	/* read the numbers from the strings */
877
14284104
	if (!key1_read)
878
13196683
		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
879
14284104
	if (!key2_read)
880
13783946
		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
881
882
14284104
	e1 = ((main1 + frac1) == 0);
883
14284104
	e2 = ((main2 + frac2) == 0);
884
885

14284104
	if (e1 && e2)
886
		return 0;
887
888
	/* we know the result if the signs are different */
889
14284104
	if (sign1 < 0 && sign2 >= 0)
890
		return -1;
891
14284104
	if (sign1 >= 0 && sign2 < 0)
892
		return 1;
893
894
14284104
	if (e1)
895
		return (sign2 < 0) ? +1 : -1;
896
14284104
	else if (e2)
897
		return (sign1 < 0) ? -1 : 1;
898
899
14284104
	if (use_suffix) {
900
		cmp_res = cmpsuffix(SI1, SI2);
901
		if (cmp_res)
902
			return (sign1 < 0) ? -cmp_res : cmp_res;
903
	}
904
905
	/* if both numbers are empty assume that the strings are equal */
906
14284104
	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
907
		return 0;
908
909
	/*
910
	 * if the main part is of different size, we know the result
911
	 * (because the leading zeros are removed)
912
	 */
913
14284104
	if (main1 < main2)
914
12
		cmp_res = -1;
915
14284092
	else if (main1 > main2)
916
8
		cmp_res = +1;
917
	/* if the sizes are equal then simple non-collate string compare gives the correct result */
918
	else
919
14284084
		cmp_res = wcscmp(smain1, smain2);
920
921
	/* check fraction */
922
14284104
	if (!cmp_res)
923
14283948
		cmp_res = wcscmp(sfrac1, sfrac2);
924
925
14284104
	if (!cmp_res)
926
82702
		return 0;
927
928
	/* reverse result if the signs are negative */
929
14201402
	if (sign1 < 0 && sign2 < 0)
930
8
		cmp_res = -cmp_res;
931
932
14201402
	return cmp_res;
933
14311315
}
934
935
/*
936
 * Implements numeric sort (-n).
937
 */
938
static int
939
numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940
{
941
28622630
	return numcoll_impl(kv1, kv2, offset, false);
942
}
943
944
/*
945
 * Implements 'human' numeric sort (-h).
946
 */
947
static int
948
hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
949
{
950
	return numcoll_impl(kv1, kv2, offset, true);
951
}
952
953
/*
954
 * Implements random sort (-R).
955
 */
956
static int
957
randomcoll(struct key_value *kv1, struct key_value *kv2,
958
    size_t offset __unused)
959
{
960
	struct bwstring *s1, *s2;
961
84704
	MD5_CTX ctx1, ctx2;
962
	char *b1, *b2;
963
964
42352
	s1 = kv1->k;
965
42352
	s2 = kv2->k;
966
967
42352
	if (debug_sort) {
968
		bwsprintf(stdout, s1, "; k1=<", ">");
969
		bwsprintf(stdout, s2, ", k2=<", ">");
970
	}
971
972
42352
	if (s1 == s2)
973
		return 0;
974
975
42352
	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
976
42352
	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
977
978
42352
	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
979
42352
	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
980
42352
	b1 = MD5End(&ctx1, NULL);
981
42352
	b2 = MD5End(&ctx2, NULL);
982
42352
	if (b1 == NULL) {
983
		if (b2 == NULL)
984
			return 0;
985
		else {
986
			sort_free(b2);
987
			return -1;
988
		}
989
42352
	} else if (b2 == NULL) {
990
		sort_free(b1);
991
		return 1;
992
	} else {
993
		int cmp_res;
994
995
42352
		cmp_res = strcmp(b1, b2);
996
42352
		sort_free(b1);
997
42352
		sort_free(b2);
998
999
42352
		if (!cmp_res)
1000
			cmp_res = bwscoll(s1, s2, 0);
1001
1002
		return cmp_res;
1003
	}
1004
42352
}
1005
1006
/*
1007
 * Implements version sort (-V).
1008
 */
1009
static int
1010
versioncoll(struct key_value *kv1, struct key_value *kv2,
1011
    size_t offset __unused)
1012
{
1013
	struct bwstring *s1, *s2;
1014
1015
	s1 = kv1->k;
1016
	s2 = kv2->k;
1017
1018
	if (debug_sort) {
1019
		bwsprintf(stdout, s1, "; k1=<", ">");
1020
		bwsprintf(stdout, s2, ", k2=<", ">");
1021
	}
1022
1023
	if (s1 == s2)
1024
		return 0;
1025
1026
	return vcmp(s1, s2);
1027
}
1028
1029
/*
1030
 * Check for minus infinity
1031
 */
1032
static inline bool
1033
huge_minus(double d, int err1)
1034
{
1035
	if (err1 == ERANGE)
1036
		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1037
			return 1;
1038
1039
	return 0;
1040
}
1041
1042
/*
1043
 * Check for plus infinity
1044
 */
1045
static inline bool
1046
huge_plus(double d, int err1)
1047
{
1048
	if (err1 == ERANGE)
1049
		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1050
			return 1;
1051
1052
	return 0;
1053
}
1054
1055
/*
1056
 * Check whether a function is a NAN
1057
 */
1058
static bool
1059
is_nan(double d)
1060
{
1061
#if defined(NAN)
1062
688
	return (d == NAN || isnan(d));
1063
#else
1064
	return (isnan(d));
1065
#endif
1066
}
1067
1068
/*
1069
 * Compare two NANs
1070
 */
1071
static int
1072
cmp_nans(double d1, double d2)
1073
{
1074
	if (d1 == d2)
1075
		return 0;
1076
	return d1 < d2 ? -1 : 1;
1077
}
1078
1079
/*
1080
 * Implements general numeric sort (-g).
1081
 */
1082
static int
1083
gnumcoll(struct key_value *kv1, struct key_value *kv2,
1084
    size_t offset __unused)
1085
{
1086
	double d1, d2;
1087
	int err1, err2;
1088
624
	bool empty1, empty2, key1_read, key2_read;
1089
1090
	d1 = d2 = 0;
1091
	err1 = err2 = 0;
1092
	key1_read = key2_read = false;
1093
1094
312
	if (debug_sort) {
1095
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1096
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1097
	}
1098
1099
312
	if (kv1->hint->status == HS_UNINITIALIZED) {
1100
96
		errno = 0;
1101
96
		d1 = bwstod(kv1->k, &empty1);
1102
96
		err1 = errno;
1103
1104
96
		if (empty1) {
1105
4
			kv1->hint->v.gh.notnum = true;
1106
			kv1->hint->status = HS_INITIALIZED;
1107
96
		} else if (err1 == 0) {
1108
92
			kv1->hint->v.gh.d = d1;
1109
92
			kv1->hint->v.gh.nan = is_nan(d1);
1110
			kv1->hint->status = HS_INITIALIZED;
1111
92
		} else
1112
			kv1->hint->status = HS_ERROR;
1113
1114
		key1_read = true;
1115
96
	}
1116
1117
312
	if (kv2->hint->status == HS_UNINITIALIZED) {
1118
88
		errno = 0;
1119
88
		d2 = bwstod(kv2->k, &empty2);
1120
88
		err2 = errno;
1121
1122
88
		if (empty2) {
1123
8
			kv2->hint->v.gh.notnum = true;
1124
			kv2->hint->status = HS_INITIALIZED;
1125
88
		} else if (err2 == 0) {
1126
80
			kv2->hint->v.gh.d = d2;
1127
80
			kv2->hint->v.gh.nan = is_nan(d2);
1128
			kv2->hint->status = HS_INITIALIZED;
1129
80
		} else
1130
			kv2->hint->status = HS_ERROR;
1131
1132
		key2_read = true;
1133
88
	}
1134
1135

624
	if (kv1->hint->status == HS_INITIALIZED &&
1136
312
	    kv2->hint->status == HS_INITIALIZED) {
1137
#ifdef GNUSORT_COMPATIBILITY
1138
		if (kv1->hint->v.gh.notnum)
1139
			return kv2->hint->v.gh.notnum ? 0 : -1;
1140
		else if (kv2->hint->v.gh.notnum)
1141
			return 1;
1142
#else
1143

324
		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1144
			return 0;
1145
#endif
1146
1147
312
		if (kv1->hint->v.gh.nan)
1148
			return kv2->hint->v.gh.nan ?
1149
			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1150
312
		else if (kv2->hint->v.gh.nan)
1151
			return 1;
1152
1153
312
		d1 = kv1->hint->v.gh.d;
1154
312
		d2 = kv2->hint->v.gh.d;
1155
1156
312
		if (d1 < d2)
1157
72
			return -1;
1158
240
		else if (d1 > d2)
1159
136
			return 1;
1160
		else
1161
104
			return 0;
1162
	}
1163
1164
	if (!key1_read) {
1165
		errno = 0;
1166
		d1 = bwstod(kv1->k, &empty1);
1167
		err1 = errno;
1168
	}
1169
1170
	if (!key2_read) {
1171
		errno = 0;
1172
		d2 = bwstod(kv2->k, &empty2);
1173
		err2 = errno;
1174
	}
1175
1176
	/* Non-value case */
1177
#ifdef GNUSORT_COMPATIBILITY
1178
	if (empty1)
1179
		return empty2 ? 0 : -1;
1180
	else if (empty2)
1181
		return 1;
1182
#else
1183
	if (empty1 && empty2)
1184
		return 0;
1185
#endif
1186
1187
	/* NAN case */
1188
	if (is_nan(d1))
1189
		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1190
	else if (is_nan(d2))
1191
		return 1;
1192
1193
	/* Infinities */
1194
	if (err1 == ERANGE || err2 == ERANGE) {
1195
		/* Minus infinity case */
1196
		if (huge_minus(d1, err1)) {
1197
			if (huge_minus(d2, err2)) {
1198
				if (d1 == d2)
1199
					return 0;
1200
				return d1 < d2 ? -1 : 1;
1201
			} else
1202
				return -1;
1203
1204
		} else if (huge_minus(d2, err2)) {
1205
			if (huge_minus(d1, err1)) {
1206
				if (d1 == d2)
1207
					return 0;
1208
				return d1 < d2 ? -1 : 1;
1209
			} else
1210
				return 1;
1211
		}
1212
1213
		/* Plus infinity case */
1214
		if (huge_plus(d1, err1)) {
1215
			if (huge_plus(d2, err2)) {
1216
				if (d1 == d2)
1217
					return 0;
1218
				return d1 < d2 ? -1 : 1;
1219
			} else
1220
				return 1;
1221
		} else if (huge_plus(d2, err2)) {
1222
			if (huge_plus(d1, err1)) {
1223
				if (d1 == d2)
1224
					return 0;
1225
				return d1 < d2 ? -1 : 1;
1226
			} else
1227
				return -1;
1228
		}
1229
	}
1230
1231
	if (d1 == d2)
1232
		return 0;
1233
	return d1 < d2 ? -1 : 1;
1234
312
}
1235
1236
/*
1237
 * Implements month sort (-M).
1238
 */
1239
static int
1240
monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1241
{
1242
	int val1, val2;
1243
	bool key1_read, key2_read;
1244
1245
	val1 = val2 = 0;
1246
	key1_read = key2_read = false;
1247
1248
712
	if (debug_sort) {
1249
		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1250
		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1251
	}
1252
1253
356
	if (kv1->hint->status == HS_UNINITIALIZED) {
1254
16
		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1255
		key1_read = true;
1256
16
		kv1->hint->status = HS_INITIALIZED;
1257
16
	}
1258
1259
356
	if (kv2->hint->status == HS_UNINITIALIZED) {
1260
16
		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1261
		key2_read = true;
1262
16
		kv2->hint->status = HS_INITIALIZED;
1263
16
	}
1264
1265
356
	if (kv1->hint->status == HS_INITIALIZED) {
1266
36
		val1 = kv1->hint->v.Mh.m;
1267
		key1_read = true;
1268
36
	}
1269
1270
356
	if (kv2->hint->status == HS_INITIALIZED) {
1271
36
		val2 = kv2->hint->v.Mh.m;
1272
		key2_read = true;
1273
36
	}
1274
1275
356
	if (!key1_read)
1276
320
		val1 = bws_month_score(kv1->k);
1277
356
	if (!key2_read)
1278
320
		val2 = bws_month_score(kv2->k);
1279
1280
356
	if (val1 == val2)
1281
40
		return 0;
1282
316
	return val1 < val2 ? -1 : 1;
1283
356
}