GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sort/sort.c Lines: 393 578 68.0 %
Date: 2017-11-07 Branches: 206 392 52.6 %

Line Branch Exec Source
1
/*	$OpenBSD: sort.c,v 1.87 2017/01/04 15:30:58 millert Exp $	*/
2
3
/*-
4
 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5
 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include <sys/resource.h>
31
#include <sys/stat.h>
32
#include <sys/sysctl.h>
33
#include <sys/types.h>
34
35
#include <err.h>
36
#include <errno.h>
37
#include <getopt.h>
38
#include <limits.h>
39
#include <locale.h>
40
#include <md5.h>
41
#include <regex.h>
42
#include <signal.h>
43
#include <stdbool.h>
44
#include <stdint.h>
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48
#include <unistd.h>
49
#include <wchar.h>
50
#include <wctype.h>
51
52
#include "coll.h"
53
#include "file.h"
54
#include "sort.h"
55
56
#ifdef GNUSORT_COMPATIBILITY
57
# define PERMUTE	""
58
#else
59
# define PERMUTE	"+"
60
#endif
61
#define	OPTIONS	PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
62
63
static bool need_random;
64
static const char *random_source;
65
66
MD5_CTX md5_ctx;
67
68
struct sort_opts sort_opts_vals;
69
70
bool debug_sort;
71
bool need_hint;
72
73
static bool gnusort_numeric_compatibility;
74
75
static struct sort_mods default_sort_mods_object;
76
struct sort_mods * const default_sort_mods = &default_sort_mods_object;
77
78
static bool print_symbols_on_debug;
79
80
/*
81
 * Arguments from file (when file0-from option is used:
82
 */
83
static size_t argc_from_file0 = (size_t)-1;
84
static char **argv_from_file0;
85
86
/*
87
 * Placeholder symbols for options which have no single-character equivalent
88
 */
89
enum {
90
	SORT_OPT = CHAR_MAX + 1,
91
	HELP_OPT,
92
	FF_OPT,
93
	BS_OPT,
94
	VERSION_OPT,
95
	DEBUG_OPT,
96
	RANDOMSOURCE_OPT,
97
	COMPRESSPROGRAM_OPT,
98
	QSORT_OPT,
99
	HEAPSORT_OPT,
100
	RADIXSORT_OPT,
101
	MMAP_OPT
102
};
103
104
#define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
105
static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
106
107
static const struct option long_options[] = {
108
    { "batch-size", required_argument, NULL, BS_OPT },
109
    { "buffer-size", required_argument, NULL, 'S' },
110
    { "check", optional_argument, NULL, 'c' },
111
    { "check=silent|quiet", optional_argument, NULL, 'C' },
112
    { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
113
    { "debug", no_argument, NULL, DEBUG_OPT },
114
    { "dictionary-order", no_argument, NULL, 'd' },
115
    { "field-separator", required_argument, NULL, 't' },
116
    { "files0-from", required_argument, NULL, FF_OPT },
117
    { "general-numeric-sort", no_argument, NULL, 'g' },
118
    { "heapsort", no_argument, NULL, HEAPSORT_OPT },
119
    { "help", no_argument, NULL, HELP_OPT },
120
    { "human-numeric-sort", no_argument, NULL, 'h' },
121
    { "ignore-leading-blanks", no_argument, NULL, 'b' },
122
    { "ignore-case", no_argument, NULL, 'f' },
123
    { "ignore-nonprinting", no_argument, NULL, 'i' },
124
    { "key", required_argument, NULL, 'k' },
125
    { "merge", no_argument, NULL, 'm' },
126
    { "mergesort", no_argument, NULL, 'H' },
127
    { "mmap", no_argument, NULL, MMAP_OPT },
128
    { "month-sort", no_argument, NULL, 'M' },
129
    { "numeric-sort", no_argument, NULL, 'n' },
130
    { "output", required_argument, NULL, 'o' },
131
    { "qsort", no_argument, NULL, QSORT_OPT },
132
    { "radixsort", no_argument, NULL, RADIXSORT_OPT },
133
    { "random-sort", no_argument, NULL, 'R' },
134
    { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
135
    { "reverse", no_argument, NULL, 'r' },
136
    { "sort", required_argument, NULL, SORT_OPT },
137
    { "stable", no_argument, NULL, 's' },
138
    { "temporary-directory", required_argument, NULL, 'T' },
139
    { "unique", no_argument, NULL, 'u' },
140
    { "version", no_argument, NULL, VERSION_OPT },
141
    { "version-sort", no_argument, NULL, 'V' },
142
    { "zero-terminated", no_argument, NULL, 'z' },
143
    { NULL, no_argument, NULL, 0 }
144
};
145
146
/*
147
 * Check where sort modifier is present
148
 */
149
static bool
150
sort_modifier_empty(struct sort_mods *sm)
151
{
152


8701
	return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
153


5800
	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
154
}
155
156
/*
157
 * Print out usage text.
158
 */
159
static __dead void
160
usage(int exit_val)
161
{
162
	fprintf(exit_val ? stderr : stdout,
163
	    "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
164
	    "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
165
	exit(exit_val);
166
}
167
168
/*
169
 * Read input file names from a file (file0-from option).
170
 */
171
static void
172
read_fns_from_file0(const char *fn)
173
{
174
	FILE *f;
175
	char *line = NULL;
176
	size_t linesize = 0;
177
	ssize_t linelen;
178
179
	f = fopen(fn, "r");
180
	if (f == NULL)
181
		err(2, "%s", fn);
182
183
	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
184
		if (*line != '\0') {
185
			if (argc_from_file0 == (size_t)-1)
186
				argc_from_file0 = 0;
187
			++argc_from_file0;
188
			argv_from_file0 = sort_reallocarray(argv_from_file0,
189
			    argc_from_file0, sizeof(char *));
190
			argv_from_file0[argc_from_file0 - 1] = line;
191
		} else {
192
			free(line);
193
		}
194
		line = NULL;
195
		linesize = 0;
196
	}
197
	if (ferror(f))
198
		err(2, "%s: getdelim", fn);
199
200
	closefile(f, fn);
201
}
202
203
/*
204
 * Check how much RAM is available for the sort.
205
 */
206
static void
207
set_hw_params(void)
208
{
209
	unsigned long long free_memory;
210
2544
	long long user_memory;
211
1272
	struct rlimit rl;
212
1272
	size_t len;
213
1272
	int mib[] = { CTL_HW, HW_USERMEM64 };
214
215
	/* Get total user (non-kernel) memory. */
216
1272
	len = sizeof(user_memory);
217
1272
	if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
218
	    user_memory = -1;
219
220
	/* Increase our data size to the max */
221
1272
	if (getrlimit(RLIMIT_DATA, &rl) == 0) {
222
1272
		free_memory = (unsigned long long)rl.rlim_cur;
223
1272
		rl.rlim_cur = rl.rlim_max;
224
1272
		if (setrlimit(RLIMIT_DATA, &rl) == 0) {
225
1272
			free_memory = (unsigned long long)rl.rlim_max;
226
1272
		} else {
227
			warn("Can't set resource limit to max data size");
228
		}
229
	} else {
230
		free_memory = 1000000;
231
		warn("Can't get resource limit for data size");
232
	}
233
234
	/* We prefer to use temp files rather than swap space. */
235

2544
	if (user_memory != -1 && free_memory > user_memory)
236
1272
		free_memory = user_memory;
237
238
1272
	available_free_memory = free_memory / 2;
239
1272
}
240
241
/*
242
 * Convert "plain" symbol to wide symbol, with default value.
243
 */
244
static void
245
conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
246
{
247
	int res;
248
249
10176
	res = mbtowc(wc, c, MB_CUR_MAX);
250
5088
	if (res < 1)
251
3816
		*wc = def;
252
5088
}
253
254
/*
255
 * Set current locale symbols.
256
 */
257
static void
258
set_locale(void)
259
{
260
	struct lconv *lc;
261
	const char *locale;
262
263
2544
	setlocale(LC_ALL, "");
264
265
	/* Obtain LC_NUMERIC info */
266
1272
	lc = localeconv();
267
268
	/* Convert to wide char form */
269
2544
	conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
270
1272
	    symbol_decimal_point);
271
2544
	conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
272
1272
	    symbol_thousands_sep);
273
2544
	conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
274
1272
	    symbol_positive_sign);
275
2544
	conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
276
1272
	    symbol_negative_sign);
277
278
1272
	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
279
		gnusort_numeric_compatibility = true;
280
281
1272
	locale = setlocale(LC_COLLATE, NULL);
282
1272
	if (locale != NULL) {
283
		char *tmpl;
284
		const char *byteclocale;
285
286
1272
		tmpl = sort_strdup(locale);
287
1272
		byteclocale = setlocale(LC_COLLATE, "C");
288

2544
		if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
289
1272
			byte_sort = true;
290
1272
		} else {
291
			byteclocale = setlocale(LC_COLLATE, "POSIX");
292
			if (byteclocale && strcmp(byteclocale, tmpl) == 0)
293
				byte_sort = true;
294
			else
295
				setlocale(LC_COLLATE, tmpl);
296
		}
297
1272
		sort_free(tmpl);
298
1272
	}
299
1272
	if (!byte_sort)
300
		sort_mb_cur_max = MB_CUR_MAX;
301
1272
}
302
303
/*
304
 * Set directory temporary files.
305
 */
306
static void
307
set_tmpdir(void)
308
{
309
2544
	if (!issetugid()) {
310
		char *td;
311
312
1272
		td = getenv("TMPDIR");
313
1272
		if (td != NULL)
314
2
			tmpdir = td;
315
1272
	}
316
1272
}
317
318
/*
319
 * Parse -S option.
320
 */
321
static unsigned long long
322
parse_memory_buffer_value(const char *value)
323
{
324
8
	char *endptr;
325
	unsigned long long membuf;
326
327
4
	membuf = strtoll(value, &endptr, 10);
328
4
	if (endptr == value || (long long)membuf < 0 ||
329
4
	    (errno == ERANGE && membuf == LLONG_MAX))
330
		goto invalid;
331
332



8
	switch (*endptr) {
333
	case 'Y':
334
		if (membuf > ULLONG_MAX / 1024)
335
			goto invalid;
336
		membuf *= 1024;
337
		/* FALLTHROUGH */
338
	case 'Z':
339
		if (membuf > ULLONG_MAX / 1024)
340
			goto invalid;
341
		membuf *= 1024;
342
		/* FALLTHROUGH */
343
	case 'E':
344
		if (membuf > ULLONG_MAX / 1024)
345
			goto invalid;
346
		membuf *= 1024;
347
		/* FALLTHROUGH */
348
	case 'P':
349
		if (membuf > ULLONG_MAX / 1024)
350
			goto invalid;
351
		membuf *= 1024;
352
		/* FALLTHROUGH */
353
	case 'T':
354
		if (membuf > ULLONG_MAX / 1024)
355
			goto invalid;
356
		membuf *= 1024;
357
		/* FALLTHROUGH */
358
	case 'G':
359
		if (membuf > ULLONG_MAX / 1024)
360
			goto invalid;
361
		membuf *= 1024;
362
		/* FALLTHROUGH */
363
	case 'M':
364
		if (membuf > ULLONG_MAX / 1024)
365
			goto invalid;
366
		membuf *= 1024;
367
		/* FALLTHROUGH */
368
	case '\0':
369
	case 'K':
370
4
		if (membuf > ULLONG_MAX / 1024)
371
			goto invalid;
372
4
		membuf *= 1024;
373
		/* FALLTHROUGH */
374
	case 'b':
375
		break;
376
	case '%':
377
		if (available_free_memory != 0 &&
378
		    membuf > ULLONG_MAX / available_free_memory)
379
			goto invalid;
380
		membuf = (available_free_memory * membuf) /
381
		    100;
382
		break;
383
	default:
384
		warnc(EINVAL, "%s", optarg);
385
		membuf = available_free_memory;
386
	}
387
4
	if (membuf > SIZE_MAX)
388
		goto invalid;
389
4
	return membuf;
390
invalid:
391
	errx(2, "invalid memory buffer size: %s", value);
392
4
}
393
394
/*
395
 * Signal handler that clears the temporary files.
396
 */
397
static void
398
sig_handler(int sig __unused)
399
{
400
	clear_tmp_files();
401
	_exit(2);
402
}
403
404
/*
405
 * Set signal handler on panic signals.
406
 */
407
static void
408
set_signal_handler(void)
409
{
410
2544
	struct sigaction sa;
411
1272
	int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
412
	    SIGPIPE, SIGXCPU, SIGXFSZ, 0};
413
414
1272
	memset(&sa, 0, sizeof(sa));
415
1272
	sigfillset(&sa.sa_mask);
416
1272
	sa.sa_flags = SA_RESTART;
417
1272
	sa.sa_handler = sig_handler;
418
419
22896
	for (i = 0; signals[i] != 0; i++) {
420
10176
		if (sigaction(signals[i], &sa, NULL) < 0) {
421
			warn("sigaction(%s)", strsignal(signals[i]));
422
			continue;
423
		}
424
	}
425
1272
}
426
427
/*
428
 * Print "unknown" message and exit with status 2.
429
 */
430
static void
431
unknown(const char *what)
432
{
433
	errx(2, "Unknown feature: %s", what);
434
}
435
436
/*
437
 * Check whether contradictory input options are used.
438
 */
439
static void
440
check_mutually_exclusive_flags(char c, bool *mef_flags)
441
{
442
	int i, fo_index, mec;
443
	bool found_others, found_this;
444
445
	found_others = found_this = false;
446
	fo_index = 0;
447
448
40140
	for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
449
16056
		mec = mutually_exclusive_flags[i];
450
451
16056
		if (mec != c) {
452
15786
			if (mef_flags[i]) {
453
29
				if (found_this) {
454
					errx(2,
455
					    "%c:%c: mutually exclusive flags",
456
					    c, mec);
457
				}
458
				found_others = true;
459
				fo_index = i;
460
29
			}
461
		} else {
462
270
			if (found_others) {
463
				errx(2,
464
				    "%c:%c: mutually exclusive flags",
465
				    c, mutually_exclusive_flags[fo_index]);
466
			}
467
270
			mef_flags[i] = true;
468
			found_this = true;
469
		}
470
	}
471
2676
}
472
473
/*
474
 * Initialise sort opts data.
475
 */
476
static void
477
set_sort_opts(void)
478
{
479
2544
	memset(&default_sort_mods_object, 0,
480
	    sizeof(default_sort_mods_object));
481
1272
	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
482
1272
	default_sort_mods_object.func =
483
1272
	    get_sort_func(&default_sort_mods_object);
484
1272
}
485
486
/*
487
 * Set a sort modifier on a sort modifiers object.
488
 */
489
static bool
490
set_sort_modifier(struct sort_mods *sm, int c)
491
{
492



5288
	switch (c) {
493
	case 'b':
494
56
		sm->bflag = true;
495
56
		break;
496
	case 'd':
497
72
		sm->dflag = true;
498
72
		break;
499
	case 'f':
500
68
		sm->fflag = true;
501
68
		break;
502
	case 'g':
503
36
		sm->gflag = true;
504
36
		need_hint = true;
505
36
		break;
506
	case 'i':
507
8
		sm->iflag = true;
508
8
		break;
509
	case 'R':
510
95
		sm->Rflag = true;
511
95
		need_random = true;
512
95
		break;
513
	case 'M':
514
20
		initialise_months();
515
20
		sm->Mflag = true;
516
20
		need_hint = true;
517
20
		break;
518
	case 'n':
519
119
		sm->nflag = true;
520
119
		need_hint = true;
521
119
		print_symbols_on_debug = true;
522
119
		break;
523
	case 'r':
524
149
		sm->rflag = true;
525
149
		break;
526
	case 'V':
527
		sm->Vflag = true;
528
		break;
529
	case 'h':
530
		sm->hflag = true;
531
		need_hint = true;
532
		print_symbols_on_debug = true;
533
		break;
534
	default:
535
2021
		return false;
536
	}
537
623
	sort_opts_vals.complex_sort = true;
538
623
	sm->func = get_sort_func(sm);
539
540
623
	return true;
541
2644
}
542
543
/*
544
 * Parse POS in -k option.
545
 */
546
static int
547
parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
548
{
549
1212
	regmatch_t pmatch[4];
550
1212
	regex_t re;
551
	char *c, *f;
552
	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
553
	size_t len, nmatch;
554
	int ret;
555
556
	ret = -1;
557
	nmatch = 4;
558
	c = f = NULL;
559
560
1212
	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
561
		return -1;
562
563
1212
	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
564
		goto end;
565
566
1212
	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
567
		goto end;
568
569
1212
	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
570
		goto end;
571
572
1212
	len = pmatch[1].rm_eo - pmatch[1].rm_so;
573
574
1212
	f = sort_malloc(len + 1);
575
1212
	memcpy(f, s + pmatch[1].rm_so, len);
576
1212
	f[len] = '\0';
577
578
1212
	if (second) {
579
1212
		errno = 0;
580
1456
		ks->f2 = (size_t)strtoul(f, NULL, 10);
581
244
		if (errno != 0)
582
			goto end;
583
244
		if (ks->f2 == 0) {
584
			warn("0 field in key specs");
585
			goto end;
586
		}
587
	} else {
588
		errno = 0;
589
968
		ks->f1 = (size_t)strtoul(f, NULL, 10);
590
968
		if (errno != 0)
591
			goto end;
592
968
		if (ks->f1 == 0) {
593
			warn("0 field in key specs");
594
			goto end;
595
		}
596
	}
597
598
1212
	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
599
288
		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
600
601
288
		c = sort_malloc(len + 1);
602
288
		memcpy(c, s + pmatch[2].rm_so + 1, len);
603
288
		c[len] = '\0';
604
605
288
		if (second) {
606
288
			errno = 0;
607
408
			ks->c2 = (size_t)strtoul(c, NULL, 10);
608
120
			if (errno != 0)
609
				goto end;
610
		} else {
611
			errno = 0;
612
168
			ks->c1 = (size_t)strtoul(c, NULL, 10);
613
168
			if (errno != 0)
614
				goto end;
615
168
			if (ks->c1 == 0) {
616
				warn("0 column in key specs");
617
				goto end;
618
			}
619
		}
620
	} else {
621
924
		if (second)
622
124
			ks->c2 = 0;
623
		else
624
800
			ks->c1 = 1;
625
	}
626
627
1212
	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
628
		regoff_t i = 0;
629
630
704
		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
631
176
			check_mutually_exclusive_flags(s[i], mef_flags);
632
176
			if (s[i] == 'b') {
633
32
				if (second)
634
16
					ks->pos2b = true;
635
				else
636
16
					ks->pos1b = true;
637
144
			} else if (!set_sort_modifier(&(ks->sm), s[i]))
638
				goto end;
639
		}
640
176
	}
641
642
1212
	ret = 0;
643
644
end:
645
1212
	sort_free(c);
646
1212
	sort_free(f);
647
1212
	regfree(&re);
648
649
1212
	return ret;
650
1212
}
651
652
/*
653
 * Parse -k option value.
654
 */
655
static int
656
parse_k(const char *s, struct key_specs *ks)
657
{
658
	int ret = -1;
659
1936
	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
660
	    { false, false, false, false, false, false };
661
662
968
	if (*s != '\0') {
663
		char *sptr;
664
665
968
		sptr = strchr(s, ',');
666
968
		if (sptr) {
667
			size_t size1;
668
			char *pos1, *pos2;
669
670
244
			size1 = sptr - s;
671
672
244
			if (size1 < 1)
673
				return -1;
674
675
244
			pos1 = sort_malloc(size1 + 1);
676
244
			memcpy(pos1, s, size1);
677
244
			pos1[size1] = '\0';
678
679
244
			ret = parse_pos(pos1, ks, mef_flags, false);
680
681
244
			sort_free(pos1);
682
244
			if (ret < 0)
683
				return ret;
684
685
244
			pos2 = sort_strdup(sptr + 1);
686
244
			ret = parse_pos(pos2, ks, mef_flags, true);
687
244
			sort_free(pos2);
688
244
		} else
689
724
			ret = parse_pos(s, ks, mef_flags, false);
690
968
	}
691
692
968
	return ret;
693
968
}
694
695
/*
696
 * Parse POS in +POS -POS option.
697
 */
698
static int
699
parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
700
{
701
304
	regex_t re;
702
152
	regmatch_t pmatch[4];
703
	char *c, *f;
704
	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
705
	int ret;
706
	size_t len, nmatch;
707
708
	ret = -1;
709
	nmatch = 4;
710
	c = f = NULL;
711
152
	*nc = *nf = 0;
712
713
152
	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
714
		return -1;
715
716
152
	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
717
		goto end;
718
719
152
	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
720
		goto end;
721
722
152
	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
723
		goto end;
724
725
152
	len = pmatch[1].rm_eo - pmatch[1].rm_so;
726
727
152
	f = sort_malloc(len + 1);
728
152
	memcpy(f, s + pmatch[1].rm_so, len);
729
152
	f[len] = '\0';
730
731
152
	errno = 0;
732
152
	*nf = (size_t)strtoul(f, NULL, 10);
733
152
	if (errno != 0)
734
		errx(2, "Invalid key position");
735
736
152
	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
737
40
		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
738
739
40
		c = sort_malloc(len + 1);
740
40
		memcpy(c, s + pmatch[2].rm_so + 1, len);
741
40
		c[len] = '\0';
742
743
40
		errno = 0;
744
40
		*nc = (size_t)strtoul(c, NULL, 10);
745
40
		if (errno != 0)
746
			errx(2, "Invalid key position");
747
	}
748
749
152
	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
750
751
24
		len = pmatch[3].rm_eo - pmatch[3].rm_so;
752
753
24
		if (len >= sopts_size)
754
			errx(2, "Invalid key position");
755
24
		memcpy(sopts, s + pmatch[3].rm_so, len);
756
24
		sopts[len] = '\0';
757
24
	}
758
759
152
	ret = 0;
760
761
end:
762
152
	sort_free(c);
763
152
	sort_free(f);
764
152
	regfree(&re);
765
766
152
	return ret;
767
152
}
768
769
/*
770
 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
771
 */
772
static void
773
fix_obsolete_keys(int *argc, char **argv)
774
{
775
2544
	char sopt[129];
776
	int i;
777
778
11896
	for (i = 1; i < *argc; i++) {
779
4676
		const char *arg1 = argv[i];
780
781
4676
		if (arg1[0] == '+') {
782
104
			size_t c1, f1;
783
104
			char sopts1[128];
784
785
104
			sopts1[0] = 0;
786
104
			c1 = f1 = 0;
787
788
208
			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
789
104
			    sizeof(sopts1)) < 0)
790
				continue;
791
792
104
			f1 += 1;
793
104
			c1 += 1;
794
104
			if (i + 1 < *argc) {
795
100
				const char *arg2 = argv[i + 1];
796
797
100
				if (arg2[0] == '-') {
798
48
					size_t c2, f2;
799
48
					char sopts2[128];
800
801
48
					sopts2[0] = 0;
802
48
					c2 = f2 = 0;
803
804
144
					if (parse_pos_obs(arg2 + 1, &f2, &c2,
805
96
					    sopts2, sizeof(sopts2)) >= 0) {
806
						int j;
807
48
						if (c2 > 0)
808
16
							f2 += 1;
809
96
						snprintf(sopt, sizeof(sopt),
810
						    "-k%zu.%zu%s,%zu.%zu%s",
811
48
						    f1, c1, sopts1, f2,
812
48
						    c2, sopts2);
813
48
						argv[i] = sort_strdup(sopt);
814
240
						for (j = i + 1; j + 1 < *argc; j++)
815
72
							argv[j] = argv[j + 1];
816
48
						*argc -= 1;
817
						continue;
818
					}
819
48
				}
820
52
			}
821
112
			snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
822
56
			    f1, c1, sopts1);
823
56
			argv[i] = sort_strdup(sopt);
824
160
		}
825
4628
	}
826
1272
}
827
828
/*
829
 * Set random seed
830
 */
831
static void
832
set_random_seed(void)
833
{
834
1888
	if (!need_random)
835
		return;
836
837
95
	MD5Init(&md5_ctx);
838
95
	if (random_source != NULL) {
839
		unsigned char buf[BUFSIZ];
840
		size_t nr;
841
		FILE *fp;
842
843
		if ((fp = fopen(random_source, "r")) == NULL)
844
			err(2, "%s", random_source);
845
		while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
846
			MD5Update(&md5_ctx, buf, nr);
847
		if (ferror(fp))
848
			err(2, "%s", random_source);
849
		fclose(fp);
850
	} else {
851
95
		unsigned char rsd[1024];
852
853
95
		arc4random_buf(rsd, sizeof(rsd));
854
95
		MD5Update(&md5_ctx, rsd, sizeof(rsd));
855
95
	}
856
944
}
857
858
/*
859
 * Main function.
860
 */
861
int
862
main(int argc, char *argv[])
863
{
864
1272
	char *outfile, *real_outfile, *sflag;
865
	int c;
866
	size_t i;
867
	struct sort_mods *sm = &default_sort_mods_object;
868
1272
	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
869
	    { false, false, false, false, false, false };
870
871
1272
	set_hw_params();
872
873
1272
	if (pledge("stdio rpath wpath cpath fattr chown proc exec flock", NULL) == -1)
874
		err(2, "pledge");
875
876
1272
	outfile = "-";
877
	real_outfile = NULL;
878
	sflag = NULL;
879
880
1272
	init_tmp_files();
881
882
1272
	set_signal_handler();
883
884
1272
	atexit(clear_tmp_files);
885
886
1272
	set_locale();
887
1272
	set_tmpdir();
888
1272
	set_sort_opts();
889
890
1272
	fix_obsolete_keys(&argc, argv);
891
892
8816
	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
893
3772
	    != -1)) {
894
895
2500
		check_mutually_exclusive_flags(c, mef_flags);
896
897
2500
		if (!set_sort_modifier(sm, c)) {
898






2021
			switch (c) {
899
			case 'c':
900
312
				sort_opts_vals.cflag = true;
901
312
				if (optarg) {
902
					if (!strcmp(optarg, "diagnose-first"))
903
						;
904
					else if (!strcmp(optarg, "silent") ||
905
					    !strcmp(optarg, "quiet"))
906
						sort_opts_vals.csilentflag = true;
907
					else if (*optarg)
908
						unknown(optarg);
909
				}
910
				break;
911
			case 'C':
912
16
				sort_opts_vals.cflag = true;
913
16
				sort_opts_vals.csilentflag = true;
914
16
				break;
915
			case 'k':
916
			{
917
968
				sort_opts_vals.complex_sort = true;
918
968
				sort_opts_vals.kflag = true;
919
920
968
				keys = sort_reallocarray(keys, keys_num + 1,
921
				    sizeof(struct key_specs));
922
968
				memset(&(keys[keys_num]), 0,
923
				    sizeof(struct key_specs));
924
#ifndef GNUSORT_COMPATIBILITY
925
968
				keys[keys_num].pos1b = default_sort_mods->bflag;
926
968
				keys[keys_num].pos2b = default_sort_mods->bflag;
927
#endif
928
929
968
				if (parse_k(optarg, &(keys[keys_num++])) < 0)
930
					errc(2, EINVAL, "-k %s", optarg);
931
932
				break;
933
			}
934
			case 'm':
935
40
				sort_opts_vals.mflag = true;
936
40
				break;
937
			case 'o':
938
428
				outfile = optarg;
939
428
				break;
940
			case 's':
941
20
				sort_opts_vals.sflag = true;
942
20
				break;
943
			case 'S':
944
4
				sflag = optarg;
945
4
				break;
946
			case 'T':
947
5
				tmpdir = optarg;
948
5
				break;
949
			case 't':
950
144
				while (strlen(optarg) > 1) {
951
					if (optarg[0] != '\\') {
952
						errc(2, EINVAL, "%s", optarg);
953
					}
954
					optarg += 1;
955
					if (*optarg == '0') {
956
						*optarg = 0;
957
						break;
958
					}
959
				}
960
144
				sort_opts_vals.tflag = true;
961
144
				sort_opts_vals.field_sep = btowc(optarg[0]);
962
144
				if (sort_opts_vals.field_sep == WEOF) {
963
					errno = EINVAL;
964
					err(2, NULL);
965
				}
966
144
				if (!gnusort_numeric_compatibility) {
967
144
					if (symbol_decimal_point == sort_opts_vals.field_sep)
968
						symbol_decimal_point = WEOF;
969
144
					if (symbol_thousands_sep == sort_opts_vals.field_sep)
970
						symbol_thousands_sep = WEOF;
971
144
					if (symbol_negative_sign == sort_opts_vals.field_sep)
972
						symbol_negative_sign = WEOF;
973
144
					if (symbol_positive_sign == sort_opts_vals.field_sep)
974
						symbol_positive_sign = WEOF;
975
				}
976
				break;
977
			case 'u':
978
82
				sort_opts_vals.uflag = true;
979
				/* stable sort for the correct unique val */
980
82
				sort_opts_vals.sflag = true;
981
82
				break;
982
			case 'z':
983
				sort_opts_vals.zflag = true;
984
				break;
985
			case SORT_OPT:
986
				if (!strcmp(optarg, "general-numeric"))
987
					set_sort_modifier(sm, 'g');
988
				else if (!strcmp(optarg, "human-numeric"))
989
					set_sort_modifier(sm, 'h');
990
				else if (!strcmp(optarg, "numeric"))
991
					set_sort_modifier(sm, 'n');
992
				else if (!strcmp(optarg, "month"))
993
					set_sort_modifier(sm, 'M');
994
				else if (!strcmp(optarg, "random"))
995
					set_sort_modifier(sm, 'R');
996
				else
997
					unknown(optarg);
998
				break;
999
			case QSORT_OPT:
1000
				sort_opts_vals.sort_method = SORT_QSORT;
1001
				break;
1002
			case 'H':
1003
2
				sort_opts_vals.sort_method = SORT_MERGESORT;
1004
2
				break;
1005
			case MMAP_OPT:
1006
				use_mmap = true;
1007
				break;
1008
			case HEAPSORT_OPT:
1009
				sort_opts_vals.sort_method = SORT_HEAPSORT;
1010
				break;
1011
			case RADIXSORT_OPT:
1012
				sort_opts_vals.sort_method = SORT_RADIXSORT;
1013
				break;
1014
			case RANDOMSOURCE_OPT:
1015
				random_source = optarg;
1016
				break;
1017
			case COMPRESSPROGRAM_OPT:
1018
				compress_program = optarg;
1019
				break;
1020
			case FF_OPT:
1021
				read_fns_from_file0(optarg);
1022
				break;
1023
			case BS_OPT:
1024
			{
1025
				const char *errstr;
1026
1027
				max_open_files = strtonum(optarg, 2,
1028
				    UINT_MAX - 1, &errstr) + 1;
1029
				if (errstr != NULL)
1030
					errx(2, "--batch-size argument is %s",
1031
					    errstr);
1032
				break;
1033
			}
1034
			case VERSION_OPT:
1035
				printf("%s\n", VERSION);
1036
				exit(EXIT_SUCCESS);
1037
				/* NOTREACHED */
1038
				break;
1039
			case DEBUG_OPT:
1040
				debug_sort = true;
1041
				break;
1042
			case HELP_OPT:
1043
				usage(0);
1044
				/* NOTREACHED */
1045
				break;
1046
			default:
1047
				usage(2);
1048
				/* NOTREACHED */
1049
			}
1050
		}
1051
	}
1052
1272
	argc -= optind;
1053
1272
	argv += optind;
1054
1055
1272
	if (compress_program == NULL) {
1056
1272
		if (pledge("stdio rpath wpath cpath fattr chown flock", NULL) == -1)
1057
			err(2, "pledge");
1058
	}
1059
1060
#ifndef GNUSORT_COMPATIBILITY
1061

1300
	if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
1062
12
		outfile = argv[argc - 1];
1063
12
		argc -= 2;
1064
12
	}
1065
#endif
1066
1067
1272
	if (argv_from_file0) {
1068
		argc = argc_from_file0;
1069
		argv = argv_from_file0;
1070
	}
1071
1072
1272
	if (sort_opts_vals.cflag) {
1073
328
		if (argc > 1)
1074
			errx(2, "only one input file is allowed with the -%c flag",
1075
			    sort_opts_vals.csilentflag ? 'C' : 'c');
1076
1077

628
		if (argc == 0 || strcmp(argv[0], "-") == 0) {
1078
12
			if (compress_program) {
1079
				if (pledge("stdio proc exec flock rpath cpath wpath", NULL) == -1)
1080
					err(2, "pledge");
1081
			} else {
1082
12
				if (pledge("stdio flock rpath cpath wpath", NULL) == -1)
1083
					err(2, "pledge");
1084
			}
1085
		} else {
1086
308
			if (compress_program) {
1087
				if (pledge("stdio rpath proc exec flock cpath wpath", NULL) == -1)
1088
					err(2, "pledge");
1089
			} else {
1090
308
				if (pledge("stdio rpath flock cpath wpath", NULL) == -1)
1091
					err(2, "pledge");
1092
			}
1093
		}
1094
	} else {
1095
		/* Case when the outfile equals one of the input files: */
1096
944
		if (strcmp(outfile, "-") != 0) {
1097
424
			struct stat sb;
1098
			int fd, i;
1099
1100
1024
			for (i = 0; i < argc; ++i) {
1101
488
				if (strcmp(argv[i], outfile) == 0) {
1102
400
					if (stat(outfile, &sb) == -1)
1103
						err(2, "%s", outfile);
1104
400
					if (access(outfile, W_OK) == -1)
1105
						err(2, "%s", outfile);
1106
					real_outfile = outfile;
1107
400
					sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1108
					    real_outfile);
1109
400
					if ((fd = mkstemp(outfile)) == -1)
1110
						err(2, "%s", outfile);
1111
400
					(void)fchown(fd, sb.st_uid, sb.st_gid);
1112
400
					if (fchmod(fd, sb.st_mode & ACCESSPERMS) == -1)
1113
						err(2, "%s", outfile);
1114
400
					close(fd);
1115
400
					tmp_file_atexit(outfile);
1116
400
					break;
1117
				}
1118
			}
1119
424
		}
1120
1121
944
		if (compress_program) {
1122
			if (pledge("stdio rpath wpath cpath proc exec flock", NULL) == -1)
1123
				err(2, "pledge");
1124
		} else {
1125
944
			if (pledge("stdio rpath wpath cpath flock", NULL) == -1)
1126
				err(2, "pledge");
1127
		}
1128
	}
1129
1130
1264
	if (sflag != NULL)
1131
4
		available_free_memory = parse_memory_buffer_value(sflag);
1132
1133
1264
	if (keys_num == 0) {
1134
512
		keys_num = 1;
1135
512
		keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1136
512
		memset(&(keys[0]), 0, sizeof(struct key_specs));
1137
512
		keys[0].c1 = 1;
1138
#ifdef GNUSORT_COMPATIBILITY
1139
		keys[0].pos1b = sm->bflag;
1140
		keys[0].pos2b = sm->bflag;
1141
#endif
1142
512
		memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
1143
512
	}
1144
1145
5488
	for (i = 0; i < keys_num; i++) {
1146
		struct key_specs *ks;
1147
1148
1480
		ks = &(keys[i]);
1149
1150
1480
		if (sort_modifier_empty(&(ks->sm))) {
1151
#ifdef GNUSORT_COMPATIBILITY
1152
			if (!(ks->pos1b) && !(ks->pos2b)) {
1153
				ks->pos1b = sm->bflag;
1154
				ks->pos2b = sm->bflag;
1155
			}
1156
#endif
1157
1050
			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1158
1050
		}
1159
1160
1480
		ks->sm.func = get_sort_func(&(ks->sm));
1161
	}
1162
1163
1264
	if (debug_sort) {
1164
		printf("Memory to be used for sorting: %llu\n",
1165
		    available_free_memory);
1166
		printf("Using collate rules of %s locale\n",
1167
		    setlocale(LC_COLLATE, NULL));
1168
		if (byte_sort)
1169
			printf("Byte sort is used\n");
1170
		if (print_symbols_on_debug) {
1171
			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1172
			if (symbol_thousands_sep)
1173
				printf("Thousands separator: <%lc>\n",
1174
				    symbol_thousands_sep);
1175
			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1176
			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1177
		}
1178
	}
1179
1180
1264
	if (sort_opts_vals.cflag)
1181
948
		return check(argc ? *argv : "-");
1182
1183
944
	set_random_seed();
1184
1185
944
	if (!sort_opts_vals.mflag) {
1186
912
		struct file_list fl;
1187
912
		struct sort_list list;
1188
1189
912
		sort_list_init(&list);
1190
912
		file_list_init(&fl, true);
1191
1192
912
		if (argc < 1)
1193
200
			procfile("-", &list, &fl);
1194
		else {
1195
2284
			while (argc > 0) {
1196
788
				procfile(*argv, &list, &fl);
1197
788
				--argc;
1198
788
				++argv;
1199
			}
1200
		}
1201
1202
908
		if (fl.count < 1)
1203
908
			sort_list_to_file(&list, outfile);
1204
		else {
1205
			if (list.count > 0) {
1206
				char *flast = new_tmp_file_name();
1207
1208
				sort_list_to_file(&list, flast);
1209
				file_list_add(&fl, flast, false);
1210
			}
1211
			merge_files(&fl, outfile);
1212
		}
1213
1214
908
		file_list_clean(&fl);
1215
1216
		/*
1217
		 * We are about to exit the program, so we can ignore
1218
		 * the clean-up for speed
1219
		 *
1220
		 * sort_list_clean(&list);
1221
		 */
1222
1223
908
	} else {
1224
32
		struct file_list fl;
1225
1226
32
		file_list_init(&fl, false);
1227
32
		if (argc < 1)
1228
			file_list_add(&fl, "-", true);
1229
		else
1230
32
			file_list_populate(&fl, argc, argv, true);
1231
32
		merge_files(&fl, outfile);
1232
32
		file_list_clean(&fl);
1233
32
	}
1234
1235
940
	if (real_outfile) {
1236
400
		if (rename(outfile, real_outfile) < 0)
1237
			err(2, "%s", real_outfile);
1238
400
		sort_free(outfile);
1239
400
	}
1240
1241
940
	return 0;
1242
1260
}