1 |
|
|
/* $OpenBSD: vsort.c,v 1.2 2015/04/01 21:46:38 millert Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> |
5 |
|
|
* Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> |
6 |
|
|
* All rights reserved. |
7 |
|
|
* |
8 |
|
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
|
* modification, are permitted provided that the following conditions |
10 |
|
|
* are met: |
11 |
|
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
|
* documentation and/or other materials provided with the distribution. |
16 |
|
|
* |
17 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
|
* SUCH DAMAGE. |
28 |
|
|
*/ |
29 |
|
|
|
30 |
|
|
#include <sys/types.h> |
31 |
|
|
|
32 |
|
|
#include <ctype.h> |
33 |
|
|
#include <stdlib.h> |
34 |
|
|
#include <string.h> |
35 |
|
|
|
36 |
|
|
#include "sort.h" |
37 |
|
|
#include "vsort.h" |
38 |
|
|
|
39 |
|
|
static inline bool |
40 |
|
|
isdigit_clocale(wchar_t c) |
41 |
|
|
{ |
42 |
|
|
return (c >= L'0' && c <= L'9'); |
43 |
|
|
} |
44 |
|
|
|
45 |
|
|
static inline bool |
46 |
|
|
isalpha_clocale(wchar_t c) |
47 |
|
|
{ |
48 |
|
|
return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); |
49 |
|
|
} |
50 |
|
|
|
51 |
|
|
static inline bool |
52 |
|
|
isalnum_clocale(wchar_t c) |
53 |
|
|
{ |
54 |
|
|
return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || |
55 |
|
|
(c >= L'0' && c <= L'9')); |
56 |
|
|
} |
57 |
|
|
|
58 |
|
|
/* |
59 |
|
|
* Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ |
60 |
|
|
* Set length of string before suffix. |
61 |
|
|
*/ |
62 |
|
|
static void |
63 |
|
|
find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) |
64 |
|
|
{ |
65 |
|
|
wchar_t c; |
66 |
|
|
size_t clen; |
67 |
|
|
bool expect_alpha, sfx; |
68 |
|
|
|
69 |
|
|
sfx = false; |
70 |
|
|
expect_alpha = false; |
71 |
|
|
*len = 0; |
72 |
|
|
clen = 0; |
73 |
|
|
|
74 |
|
|
while ((si < se) && (c = bws_get_iter_value(si))) { |
75 |
|
|
if (expect_alpha) { |
76 |
|
|
expect_alpha = false; |
77 |
|
|
if (!isalpha_clocale(c) && (c != L'~')) |
78 |
|
|
sfx = false; |
79 |
|
|
} else if (c == L'.') { |
80 |
|
|
expect_alpha = true; |
81 |
|
|
if (!sfx) { |
82 |
|
|
sfx = true; |
83 |
|
|
*len = clen; |
84 |
|
|
} |
85 |
|
|
} else if (!isalnum_clocale(c) && (c != L'~')) |
86 |
|
|
sfx = false; |
87 |
|
|
|
88 |
|
|
si = bws_iterator_inc(si, 1); |
89 |
|
|
++clen; |
90 |
|
|
} |
91 |
|
|
|
92 |
|
|
/* This code must be here to make the implementation compatible |
93 |
|
|
* with WORDING of GNU sort documentation. |
94 |
|
|
* But the GNU sort implementation is not following its own |
95 |
|
|
* documentation. GNU sort allows empty file extensions |
96 |
|
|
* (just dot with nothing after); but the regular expression in |
97 |
|
|
* their documentation does not allow empty file extensions. |
98 |
|
|
* We chose to make our implementation compatible with GNU sort |
99 |
|
|
* implementation. If they will ever fix their bug, this code |
100 |
|
|
* must be uncommented. Or they may choose to fix the info page, |
101 |
|
|
* then the code stays commented. |
102 |
|
|
* |
103 |
|
|
if (expect_alpha) |
104 |
|
|
sfx = false; |
105 |
|
|
*/ |
106 |
|
|
|
107 |
|
|
if (!sfx) |
108 |
|
|
*len = clen; |
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
static inline int |
112 |
|
|
cmp_chars(wchar_t c1, wchar_t c2) |
113 |
|
|
{ |
114 |
|
|
if (c1 == c2) |
115 |
|
|
return 0; |
116 |
|
|
|
117 |
|
|
if (c1 == L'~') |
118 |
|
|
return -1; |
119 |
|
|
if (c2 == L'~') |
120 |
|
|
return 1; |
121 |
|
|
|
122 |
|
|
if (isdigit_clocale(c1) || !c1) |
123 |
|
|
return (isdigit_clocale(c2) || !c2) ? 0 : -1; |
124 |
|
|
|
125 |
|
|
if (isdigit_clocale(c2) || !c2) |
126 |
|
|
return 1; |
127 |
|
|
|
128 |
|
|
if (isalpha_clocale(c1)) |
129 |
|
|
return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1; |
130 |
|
|
|
131 |
|
|
if (isalpha_clocale(c2)) |
132 |
|
|
return 1; |
133 |
|
|
|
134 |
|
|
return (int)c1 - (int)c2; |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
static int |
138 |
|
|
cmpversions(bwstring_iterator si1, bwstring_iterator se1, |
139 |
|
|
bwstring_iterator si2, bwstring_iterator se2) |
140 |
|
|
{ |
141 |
|
|
int cmp, diff; |
142 |
|
|
|
143 |
|
|
while ((si1 < se1) || (si2 < se2)) { |
144 |
|
|
diff = 0; |
145 |
|
|
|
146 |
|
|
while (((si1 < se1) && |
147 |
|
|
!isdigit_clocale(bws_get_iter_value(si1))) || |
148 |
|
|
((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { |
149 |
|
|
wchar_t c1, c2; |
150 |
|
|
|
151 |
|
|
c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; |
152 |
|
|
c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; |
153 |
|
|
|
154 |
|
|
cmp = cmp_chars(c1, c2); |
155 |
|
|
if (cmp) |
156 |
|
|
return cmp; |
157 |
|
|
|
158 |
|
|
if (si1 < se1) |
159 |
|
|
si1 = bws_iterator_inc(si1, 1); |
160 |
|
|
if (si2 < se2) |
161 |
|
|
si2 = bws_iterator_inc(si2, 1); |
162 |
|
|
} |
163 |
|
|
|
164 |
|
|
while (bws_get_iter_value(si1) == L'0') |
165 |
|
|
si1 = bws_iterator_inc(si1, 1); |
166 |
|
|
|
167 |
|
|
while (bws_get_iter_value(si2) == L'0') |
168 |
|
|
si2 = bws_iterator_inc(si2, 1); |
169 |
|
|
|
170 |
|
|
while (isdigit_clocale(bws_get_iter_value(si1)) && |
171 |
|
|
isdigit_clocale(bws_get_iter_value(si2))) { |
172 |
|
|
if (!diff) |
173 |
|
|
diff = ((int)bws_get_iter_value(si1) - |
174 |
|
|
(int)bws_get_iter_value(si2)); |
175 |
|
|
si1 = bws_iterator_inc(si1, 1); |
176 |
|
|
si2 = bws_iterator_inc(si2, 1); |
177 |
|
|
} |
178 |
|
|
|
179 |
|
|
if (isdigit_clocale(bws_get_iter_value(si1))) |
180 |
|
|
return 1; |
181 |
|
|
|
182 |
|
|
if (isdigit_clocale(bws_get_iter_value(si2))) |
183 |
|
|
return -1; |
184 |
|
|
|
185 |
|
|
if (diff) |
186 |
|
|
return diff; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
return 0; |
190 |
|
|
} |
191 |
|
|
|
192 |
|
|
/* |
193 |
|
|
* Compare two version strings |
194 |
|
|
*/ |
195 |
|
|
int |
196 |
|
|
vcmp(struct bwstring *s1, struct bwstring *s2) |
197 |
|
|
{ |
198 |
|
|
bwstring_iterator si1, si2; |
199 |
|
|
wchar_t c1, c2; |
200 |
|
|
size_t len1, len2, slen1, slen2; |
201 |
|
|
int cmp_bytes, cmp_res; |
202 |
|
|
|
203 |
|
|
if (s1 == s2) |
204 |
|
|
return 0; |
205 |
|
|
|
206 |
|
|
cmp_bytes = bwscmp(s1, s2, 0); |
207 |
|
|
if (cmp_bytes == 0) |
208 |
|
|
return 0; |
209 |
|
|
|
210 |
|
|
len1 = slen1 = BWSLEN(s1); |
211 |
|
|
len2 = slen2 = BWSLEN(s2); |
212 |
|
|
|
213 |
|
|
if (slen1 < 1) |
214 |
|
|
return -1; |
215 |
|
|
if (slen2 < 1) |
216 |
|
|
return 1; |
217 |
|
|
|
218 |
|
|
si1 = bws_begin(s1); |
219 |
|
|
si2 = bws_begin(s2); |
220 |
|
|
|
221 |
|
|
c1 = bws_get_iter_value(si1); |
222 |
|
|
c2 = bws_get_iter_value(si2); |
223 |
|
|
|
224 |
|
|
if (c1 == L'.' && (slen1 == 1)) |
225 |
|
|
return -1; |
226 |
|
|
|
227 |
|
|
if (c2 == L'.' && (slen2 == 1)) |
228 |
|
|
return 1; |
229 |
|
|
|
230 |
|
|
if (slen1 == 2 && c1 == L'.' && |
231 |
|
|
bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') |
232 |
|
|
return -1; |
233 |
|
|
if (slen2 == 2 && c2 == L'.' && |
234 |
|
|
bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') |
235 |
|
|
return 1; |
236 |
|
|
|
237 |
|
|
if (c1 == L'.' && c2 != L'.') |
238 |
|
|
return -1; |
239 |
|
|
if (c1 != L'.' && c2 == L'.') |
240 |
|
|
return 1; |
241 |
|
|
|
242 |
|
|
if (c1 == L'.' && c2 == L'.') { |
243 |
|
|
si1 = bws_iterator_inc(si1, 1); |
244 |
|
|
si2 = bws_iterator_inc(si2, 1); |
245 |
|
|
} |
246 |
|
|
|
247 |
|
|
find_suffix(si1, bws_end(s1), &len1); |
248 |
|
|
find_suffix(si2, bws_end(s2), &len2); |
249 |
|
|
|
250 |
|
|
if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) |
251 |
|
|
return cmp_bytes; |
252 |
|
|
|
253 |
|
|
cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, |
254 |
|
|
bws_iterator_inc(si2, len2)); |
255 |
|
|
|
256 |
|
|
if (cmp_res == 0) |
257 |
|
|
cmp_res = cmp_bytes; |
258 |
|
|
|
259 |
|
|
return cmp_res; |
260 |
|
|
} |