GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/sort/vsort.c Lines: 0 106 0.0 %
Date: 2017-11-13 Branches: 0 112 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: vsort.c,v 1.2 2015/04/01 21:46:38 millert Exp $	*/
2
3
/*-
4
 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5
 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include <sys/types.h>
31
32
#include <ctype.h>
33
#include <stdlib.h>
34
#include <string.h>
35
36
#include "sort.h"
37
#include "vsort.h"
38
39
static inline bool
40
isdigit_clocale(wchar_t c)
41
{
42
	return (c >= L'0' && c <= L'9');
43
}
44
45
static inline bool
46
isalpha_clocale(wchar_t c)
47
{
48
	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
49
}
50
51
static inline bool
52
isalnum_clocale(wchar_t c)
53
{
54
	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
55
	    (c >= L'0' && c <= L'9'));
56
}
57
58
/*
59
 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
60
 * Set length of string before suffix.
61
 */
62
static void
63
find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
64
{
65
	wchar_t c;
66
	size_t clen;
67
	bool expect_alpha, sfx;
68
69
	sfx = false;
70
	expect_alpha = false;
71
	*len = 0;
72
	clen = 0;
73
74
	while ((si < se) && (c = bws_get_iter_value(si))) {
75
		if (expect_alpha) {
76
			expect_alpha = false;
77
			if (!isalpha_clocale(c) && (c != L'~'))
78
				sfx = false;
79
		} else if (c == L'.') {
80
			expect_alpha = true;
81
			if (!sfx) {
82
				sfx = true;
83
				*len = clen;
84
			}
85
		} else if (!isalnum_clocale(c) && (c != L'~'))
86
			sfx = false;
87
88
		si = bws_iterator_inc(si, 1);
89
		++clen;
90
	}
91
92
	/* This code must be here to make the implementation compatible
93
	 * with WORDING of GNU sort documentation.
94
	 * But the GNU sort implementation is not following its own
95
	 * documentation.  GNU sort allows empty file extensions
96
	 * (just dot with nothing after); but the regular expression in
97
	 * their documentation does not allow empty file extensions.
98
	 * We chose to make our implementation compatible with GNU sort
99
	 * implementation.  If they will ever fix their bug, this code
100
	 * must be uncommented. Or they may choose to fix the info page,
101
	 * then the code stays commented.
102
	 *
103
	 if (expect_alpha)
104
	 	sfx = false;
105
	 */
106
107
	if (!sfx)
108
		*len = clen;
109
}
110
111
static inline int
112
cmp_chars(wchar_t c1, wchar_t c2)
113
{
114
	if (c1 == c2)
115
		return 0;
116
117
	if (c1 == L'~')
118
		return -1;
119
	if (c2 == L'~')
120
		return 1;
121
122
	if (isdigit_clocale(c1) || !c1)
123
		return (isdigit_clocale(c2) || !c2) ? 0 : -1;
124
125
	if (isdigit_clocale(c2) || !c2)
126
		return 1;
127
128
	if (isalpha_clocale(c1))
129
		return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
130
131
	if (isalpha_clocale(c2))
132
		return 1;
133
134
	return (int)c1 - (int)c2;
135
}
136
137
static int
138
cmpversions(bwstring_iterator si1, bwstring_iterator se1,
139
    bwstring_iterator si2, bwstring_iterator se2)
140
{
141
	int cmp, diff;
142
143
	while ((si1 < se1) || (si2 < se2)) {
144
		diff = 0;
145
146
		while (((si1 < se1) &&
147
		    !isdigit_clocale(bws_get_iter_value(si1))) ||
148
		    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
149
			wchar_t c1, c2;
150
151
			c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
152
			c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
153
154
			cmp = cmp_chars(c1, c2);
155
			if (cmp)
156
				return cmp;
157
158
			if (si1 < se1)
159
				si1 = bws_iterator_inc(si1, 1);
160
			if (si2 < se2)
161
				si2 = bws_iterator_inc(si2, 1);
162
		}
163
164
		while (bws_get_iter_value(si1) == L'0')
165
			si1 = bws_iterator_inc(si1, 1);
166
167
		while (bws_get_iter_value(si2) == L'0')
168
			si2 = bws_iterator_inc(si2, 1);
169
170
		while (isdigit_clocale(bws_get_iter_value(si1)) &&
171
		    isdigit_clocale(bws_get_iter_value(si2))) {
172
			if (!diff)
173
				diff = ((int)bws_get_iter_value(si1) -
174
				    (int)bws_get_iter_value(si2));
175
			si1 = bws_iterator_inc(si1, 1);
176
			si2 = bws_iterator_inc(si2, 1);
177
		}
178
179
		if (isdigit_clocale(bws_get_iter_value(si1)))
180
			return 1;
181
182
		if (isdigit_clocale(bws_get_iter_value(si2)))
183
			return -1;
184
185
		if (diff)
186
			return diff;
187
	}
188
189
	return 0;
190
}
191
192
/*
193
 * Compare two version strings
194
 */
195
int
196
vcmp(struct bwstring *s1, struct bwstring *s2)
197
{
198
	bwstring_iterator si1, si2;
199
	wchar_t c1, c2;
200
	size_t len1, len2, slen1, slen2;
201
	int cmp_bytes, cmp_res;
202
203
	if (s1 == s2)
204
		return 0;
205
206
	cmp_bytes = bwscmp(s1, s2, 0);
207
	if (cmp_bytes == 0)
208
		return 0;
209
210
	len1 = slen1 = BWSLEN(s1);
211
	len2 = slen2 = BWSLEN(s2);
212
213
	if (slen1 < 1)
214
		return -1;
215
	if (slen2 < 1)
216
		return 1;
217
218
	si1 = bws_begin(s1);
219
	si2 = bws_begin(s2);
220
221
	c1 = bws_get_iter_value(si1);
222
	c2 = bws_get_iter_value(si2);
223
224
	if (c1 == L'.' && (slen1 == 1))
225
		return -1;
226
227
	if (c2 == L'.' && (slen2 == 1))
228
		return 1;
229
230
	if (slen1 == 2 && c1 == L'.' &&
231
	    bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
232
		return -1;
233
	if (slen2 == 2 && c2 == L'.' &&
234
	    bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
235
		return 1;
236
237
	if (c1 == L'.' && c2 != L'.')
238
		return -1;
239
	if (c1 != L'.' && c2 == L'.')
240
		return 1;
241
242
	if (c1 == L'.' && c2 == L'.') {
243
		si1 = bws_iterator_inc(si1, 1);
244
		si2 = bws_iterator_inc(si2, 1);
245
	}
246
247
	find_suffix(si1, bws_end(s1), &len1);
248
	find_suffix(si2, bws_end(s2), &len2);
249
250
	if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
251
		return cmp_bytes;
252
253
	cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
254
	    bws_iterator_inc(si2, len2));
255
256
	if (cmp_res == 0)
257
	  cmp_res = cmp_bytes;
258
259
	return cmp_res;
260
}