1 |
|
|
/* $OpenBSD: bwstring.c,v 1.7 2015/04/01 22:38:08 millert Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> |
5 |
|
|
* Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> |
6 |
|
|
* All rights reserved. |
7 |
|
|
* |
8 |
|
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
|
* modification, are permitted provided that the following conditions |
10 |
|
|
* are met: |
11 |
|
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
|
* documentation and/or other materials provided with the distribution. |
16 |
|
|
* |
17 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
|
* SUCH DAMAGE. |
28 |
|
|
*/ |
29 |
|
|
|
30 |
|
|
#include <ctype.h> |
31 |
|
|
#include <errno.h> |
32 |
|
|
#include <err.h> |
33 |
|
|
#include <langinfo.h> |
34 |
|
|
#include <math.h> |
35 |
|
|
#include <stdlib.h> |
36 |
|
|
#include <string.h> |
37 |
|
|
#include <wchar.h> |
38 |
|
|
#include <wctype.h> |
39 |
|
|
|
40 |
|
|
#include "bwstring.h" |
41 |
|
|
#include "sort.h" |
42 |
|
|
|
43 |
|
|
bool byte_sort; |
44 |
|
|
size_t sort_mb_cur_max = 1; |
45 |
|
|
|
46 |
|
|
static wchar_t **wmonths; |
47 |
|
|
static char **cmonths; |
48 |
|
|
|
49 |
|
|
/* initialise months */ |
50 |
|
|
|
51 |
|
|
void |
52 |
|
|
initialise_months(void) |
53 |
|
|
{ |
54 |
|
|
const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, |
55 |
|
|
ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, |
56 |
|
|
ABMON_11, ABMON_12 }; |
57 |
|
|
char *tmp; |
58 |
|
|
size_t len; |
59 |
|
|
|
60 |
✓✗ |
40 |
if (sort_mb_cur_max == 1) { |
61 |
✓✗ |
20 |
if (cmonths == NULL) { |
62 |
|
|
char *m; |
63 |
|
|
unsigned int j; |
64 |
|
|
int i; |
65 |
|
|
|
66 |
|
20 |
cmonths = sort_malloc(sizeof(char *) * 12); |
67 |
✓✓ |
520 |
for (i = 0; i < 12; i++) { |
68 |
|
240 |
cmonths[i] = NULL; |
69 |
|
240 |
tmp = nl_langinfo(item[i]); |
70 |
✗✓ |
240 |
if (debug_sort) |
71 |
|
|
printf("month[%d]=%s\n", i, tmp); |
72 |
✓✗ |
240 |
if (*tmp == '\0') |
73 |
|
|
continue; |
74 |
|
240 |
m = sort_strdup(tmp); |
75 |
|
240 |
len = strlen(tmp); |
76 |
✓✓ |
1920 |
for (j = 0; j < len; j++) |
77 |
|
720 |
m[j] = toupper(m[j]); |
78 |
|
240 |
cmonths[i] = m; |
79 |
|
240 |
} |
80 |
|
20 |
} |
81 |
|
|
} else { |
82 |
|
|
if (wmonths == NULL) { |
83 |
|
|
unsigned int j; |
84 |
|
|
wchar_t *m; |
85 |
|
|
int i; |
86 |
|
|
|
87 |
|
|
wmonths = sort_malloc(sizeof(wchar_t *) * 12); |
88 |
|
|
for (i = 0; i < 12; i++) { |
89 |
|
|
wmonths[i] = NULL; |
90 |
|
|
tmp = nl_langinfo(item[i]); |
91 |
|
|
if (debug_sort) |
92 |
|
|
printf("month[%d]=%s\n", i, tmp); |
93 |
|
|
if (*tmp == '\0') |
94 |
|
|
continue; |
95 |
|
|
len = strlen(tmp); |
96 |
|
|
m = sort_reallocarray(NULL, len + 1, |
97 |
|
|
sizeof(wchar_t)); |
98 |
|
|
if (mbstowcs(m, tmp, len) == (size_t)-1) { |
99 |
|
|
sort_free(m); |
100 |
|
|
continue; |
101 |
|
|
} |
102 |
|
|
m[len] = L'\0'; |
103 |
|
|
for (j = 0; j < len; j++) |
104 |
|
|
m[j] = towupper(m[j]); |
105 |
|
|
wmonths[i] = m; |
106 |
|
|
} |
107 |
|
|
} |
108 |
|
|
} |
109 |
|
20 |
} |
110 |
|
|
|
111 |
|
|
/* |
112 |
|
|
* Compare two wide-character strings |
113 |
|
|
*/ |
114 |
|
|
static int |
115 |
|
|
wide_str_coll(const wchar_t *s1, const wchar_t *s2) |
116 |
|
|
{ |
117 |
|
|
int ret = 0; |
118 |
|
|
|
119 |
|
|
errno = 0; |
120 |
|
|
ret = wcscoll(s1, s2); |
121 |
|
|
if (errno == EILSEQ) { |
122 |
|
|
errno = 0; |
123 |
|
|
ret = wcscmp(s1, s2); |
124 |
|
|
if (errno != 0) { |
125 |
|
|
size_t i; |
126 |
|
|
for (i = 0; ; ++i) { |
127 |
|
|
wchar_t c1 = s1[i]; |
128 |
|
|
wchar_t c2 = s2[i]; |
129 |
|
|
if (c1 == L'\0') |
130 |
|
|
return (c2 == L'\0') ? 0 : -1; |
131 |
|
|
if (c2 == L'\0') |
132 |
|
|
return 1; |
133 |
|
|
if (c1 == c2) |
134 |
|
|
continue; |
135 |
|
|
return (int)c1 - (int)c2; |
136 |
|
|
} |
137 |
|
|
} |
138 |
|
|
} |
139 |
|
|
return ret; |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
/* counterparts of wcs functions */ |
143 |
|
|
|
144 |
|
|
void |
145 |
|
|
bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) |
146 |
|
|
{ |
147 |
|
|
if (sort_mb_cur_max == 1) |
148 |
|
|
fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); |
149 |
|
|
else |
150 |
|
|
fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); |
151 |
|
|
} |
152 |
|
|
|
153 |
|
|
const void * |
154 |
|
|
bwsrawdata(const struct bwstring *bws) |
155 |
|
|
{ |
156 |
|
169408 |
return &(bws->data); |
157 |
|
|
} |
158 |
|
|
|
159 |
|
|
size_t |
160 |
|
|
bwsrawlen(const struct bwstring *bws) |
161 |
|
|
{ |
162 |
|
169408 |
return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
size_t |
166 |
|
|
bws_memsize(const struct bwstring *bws) |
167 |
|
|
{ |
168 |
|
14618721 |
return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : |
169 |
|
4872907 |
(SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)); |
170 |
|
|
} |
171 |
|
|
|
172 |
|
|
void |
173 |
|
|
bws_setlen(struct bwstring *bws, size_t newlen) |
174 |
|
|
{ |
175 |
✓✗✓✓ ✓✗ |
8956 |
if (bws && newlen != bws->len && newlen <= bws->len) { |
176 |
|
1588 |
bws->len = newlen; |
177 |
✓✗ |
1588 |
if (sort_mb_cur_max == 1) |
178 |
|
1588 |
bws->data.cstr[newlen] = '\0'; |
179 |
|
|
else |
180 |
|
|
bws->data.wstr[newlen] = L'\0'; |
181 |
|
|
} |
182 |
|
2456 |
} |
183 |
|
|
|
184 |
|
|
/* |
185 |
|
|
* Allocate a new binary string of specified size |
186 |
|
|
*/ |
187 |
|
|
struct bwstring * |
188 |
|
|
bwsalloc(size_t sz) |
189 |
|
|
{ |
190 |
|
|
struct bwstring *ret; |
191 |
|
|
|
192 |
✓✗ |
12489588 |
if (sort_mb_cur_max == 1) { |
193 |
|
6244794 |
ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); |
194 |
|
6244794 |
ret->data.cstr[sz] = '\0'; |
195 |
|
6244794 |
} else { |
196 |
|
|
ret = sort_malloc(sizeof(struct bwstring) + |
197 |
|
|
SIZEOF_WCHAR_STRING(sz + 1)); |
198 |
|
|
ret->data.wstr[sz] = L'\0'; |
199 |
|
|
} |
200 |
|
6244794 |
ret->len = sz; |
201 |
|
|
|
202 |
|
6244794 |
return ret; |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
/* |
206 |
|
|
* Create a copy of binary string. |
207 |
|
|
* New string size equals the length of the old string. |
208 |
|
|
*/ |
209 |
|
|
struct bwstring * |
210 |
|
|
bwsdup(const struct bwstring *s) |
211 |
|
|
{ |
212 |
|
|
struct bwstring *ret; |
213 |
|
|
|
214 |
✗✓ |
1883664 |
if (s == NULL) |
215 |
|
|
return NULL; |
216 |
|
|
|
217 |
|
941832 |
ret = bwsalloc(s->len); |
218 |
|
|
|
219 |
✓✗ |
941832 |
if (sort_mb_cur_max == 1) |
220 |
|
941832 |
memcpy(ret->data.cstr, s->data.cstr, s->len); |
221 |
|
|
else |
222 |
|
|
memcpy(ret->data.wstr, s->data.wstr, |
223 |
|
|
SIZEOF_WCHAR_STRING(s->len)); |
224 |
|
|
|
225 |
|
941832 |
return ret; |
226 |
|
941832 |
} |
227 |
|
|
|
228 |
|
|
/* |
229 |
|
|
* Create a new binary string from a wide character buffer. |
230 |
|
|
*/ |
231 |
|
|
struct bwstring * |
232 |
|
|
bwssbdup(const wchar_t *str, size_t len) |
233 |
|
|
{ |
234 |
|
|
if (str == NULL) |
235 |
|
|
return (len == 0) ? bwsalloc(0) : NULL; |
236 |
|
|
else { |
237 |
|
|
struct bwstring *ret; |
238 |
|
|
size_t i; |
239 |
|
|
|
240 |
|
|
ret = bwsalloc(len); |
241 |
|
|
|
242 |
|
|
if (sort_mb_cur_max == 1) |
243 |
|
|
for (i = 0; i < len; ++i) |
244 |
|
|
ret->data.cstr[i] = (unsigned char) str[i]; |
245 |
|
|
else |
246 |
|
|
memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); |
247 |
|
|
|
248 |
|
|
return ret; |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
|
252 |
|
|
/* |
253 |
|
|
* Create a new binary string from a raw binary buffer. |
254 |
|
|
*/ |
255 |
|
|
struct bwstring * |
256 |
|
|
bwscsbdup(const unsigned char *str, size_t len) |
257 |
|
|
{ |
258 |
|
|
struct bwstring *ret; |
259 |
|
|
|
260 |
|
10226962 |
ret = bwsalloc(len); |
261 |
|
|
|
262 |
✓✗ |
5113481 |
if (str) { |
263 |
✓✗ |
5113481 |
if (sort_mb_cur_max == 1) |
264 |
|
5113481 |
memcpy(ret->data.cstr, str, len); |
265 |
|
|
else { |
266 |
|
|
mbstate_t mbs; |
267 |
|
|
const char *s; |
268 |
|
|
size_t charlen, chars, cptr; |
269 |
|
|
|
270 |
|
|
chars = 0; |
271 |
|
|
cptr = 0; |
272 |
|
|
s = (const char *) str; |
273 |
|
|
|
274 |
|
|
memset(&mbs, 0, sizeof(mbs)); |
275 |
|
|
|
276 |
|
|
while (cptr < len) { |
277 |
|
|
size_t n = sort_mb_cur_max; |
278 |
|
|
|
279 |
|
|
if (n > len - cptr) |
280 |
|
|
n = len - cptr; |
281 |
|
|
charlen = mbrlen(s + cptr, n, &mbs); |
282 |
|
|
switch (charlen) { |
283 |
|
|
case 0: |
284 |
|
|
/* FALLTHROUGH */ |
285 |
|
|
case (size_t) -1: |
286 |
|
|
/* FALLTHROUGH */ |
287 |
|
|
case (size_t) -2: |
288 |
|
|
ret->data.wstr[chars++] = |
289 |
|
|
(unsigned char) s[cptr]; |
290 |
|
|
++cptr; |
291 |
|
|
break; |
292 |
|
|
default: |
293 |
|
|
n = mbrtowc(ret->data.wstr + (chars++), |
294 |
|
|
s + cptr, charlen, &mbs); |
295 |
|
|
if ((n == (size_t)-1) || (n == (size_t)-2)) |
296 |
|
|
/* NOTREACHED */ |
297 |
|
|
err(2, "mbrtowc error"); |
298 |
|
|
cptr += charlen; |
299 |
|
|
}; |
300 |
|
|
} |
301 |
|
|
|
302 |
|
|
ret->len = chars; |
303 |
|
|
ret->data.wstr[ret->len] = L'\0'; |
304 |
|
|
} |
305 |
|
|
} |
306 |
|
5113481 |
return ret; |
307 |
|
|
} |
308 |
|
|
|
309 |
|
|
/* |
310 |
|
|
* De-allocate object memory |
311 |
|
|
*/ |
312 |
|
|
void |
313 |
|
|
bwsfree(struct bwstring *s) |
314 |
|
|
{ |
315 |
|
12976340 |
sort_free(s); |
316 |
|
6488170 |
} |
317 |
|
|
|
318 |
|
|
/* |
319 |
|
|
* Copy content of src binary string to dst. |
320 |
|
|
* If the capacity of the dst string is not sufficient, |
321 |
|
|
* then the data is truncated. |
322 |
|
|
*/ |
323 |
|
|
size_t |
324 |
|
|
bwscpy(struct bwstring *dst, const struct bwstring *src) |
325 |
|
|
{ |
326 |
|
|
size_t nums = src->len; |
327 |
|
|
|
328 |
|
|
if (nums > dst->len) |
329 |
|
|
nums = dst->len; |
330 |
|
|
dst->len = nums; |
331 |
|
|
|
332 |
|
|
if (sort_mb_cur_max == 1) { |
333 |
|
|
memcpy(dst->data.cstr, src->data.cstr, nums); |
334 |
|
|
dst->data.cstr[dst->len] = '\0'; |
335 |
|
|
} else { |
336 |
|
|
memcpy(dst->data.wstr, src->data.wstr, |
337 |
|
|
SIZEOF_WCHAR_STRING(nums + 1)); |
338 |
|
|
dst->data.wstr[dst->len] = L'\0'; |
339 |
|
|
} |
340 |
|
|
|
341 |
|
|
return nums; |
342 |
|
|
} |
343 |
|
|
|
344 |
|
|
/* |
345 |
|
|
* Copy content of src binary string to dst, |
346 |
|
|
* with specified number of symbols to be copied. |
347 |
|
|
* If the capacity of the dst string is not sufficient, |
348 |
|
|
* then the data is truncated. |
349 |
|
|
*/ |
350 |
|
|
struct bwstring * |
351 |
|
|
bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) |
352 |
|
|
{ |
353 |
|
|
size_t nums = src->len; |
354 |
|
|
|
355 |
|
|
if (nums > dst->len) |
356 |
|
|
nums = dst->len; |
357 |
|
|
if (nums > size) |
358 |
|
|
nums = size; |
359 |
|
|
dst->len = nums; |
360 |
|
|
|
361 |
|
|
if (sort_mb_cur_max == 1) { |
362 |
|
|
memcpy(dst->data.cstr, src->data.cstr, nums); |
363 |
|
|
dst->data.cstr[dst->len] = '\0'; |
364 |
|
|
} else { |
365 |
|
|
memcpy(dst->data.wstr, src->data.wstr, |
366 |
|
|
SIZEOF_WCHAR_STRING(nums + 1)); |
367 |
|
|
dst->data.wstr[dst->len] = L'\0'; |
368 |
|
|
} |
369 |
|
|
|
370 |
|
|
return dst; |
371 |
|
|
} |
372 |
|
|
|
373 |
|
|
/* |
374 |
|
|
* Copy content of src binary string to dst, |
375 |
|
|
* with specified number of symbols to be copied. |
376 |
|
|
* An offset value can be specified, from the start of src string. |
377 |
|
|
* If the capacity of the dst string is not sufficient, |
378 |
|
|
* then the data is truncated. |
379 |
|
|
*/ |
380 |
|
|
struct bwstring * |
381 |
|
|
bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, |
382 |
|
|
size_t size) |
383 |
|
|
{ |
384 |
✗✓ |
378402 |
if (offset >= src->len) { |
385 |
|
|
dst->data.wstr[0] = 0; |
386 |
|
|
dst->len = 0; |
387 |
|
|
} else { |
388 |
|
189201 |
size_t nums = src->len - offset; |
389 |
|
|
|
390 |
✓✓ |
189201 |
if (nums > dst->len) |
391 |
|
10452 |
nums = dst->len; |
392 |
✗✓ |
189201 |
if (nums > size) |
393 |
|
|
nums = size; |
394 |
|
189201 |
dst->len = nums; |
395 |
✓✗ |
189201 |
if (sort_mb_cur_max == 1) { |
396 |
|
189201 |
memcpy(dst->data.cstr, src->data.cstr + offset, |
397 |
|
|
(nums)); |
398 |
|
189201 |
dst->data.cstr[dst->len] = '\0'; |
399 |
|
189201 |
} else { |
400 |
|
|
memcpy(dst->data.wstr, src->data.wstr + offset, |
401 |
|
|
SIZEOF_WCHAR_STRING(nums)); |
402 |
|
|
dst->data.wstr[dst->len] = L'\0'; |
403 |
|
|
} |
404 |
|
|
} |
405 |
|
189201 |
return dst; |
406 |
|
|
} |
407 |
|
|
|
408 |
|
|
/* |
409 |
|
|
* Write binary string to the file. |
410 |
|
|
* The output is ended either with '\n' (nl == true) |
411 |
|
|
* or '\0' (nl == false). |
412 |
|
|
*/ |
413 |
|
|
size_t |
414 |
|
|
bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) |
415 |
|
|
{ |
416 |
✓✗ |
2600620 |
if (sort_mb_cur_max == 1) { |
417 |
|
2600620 |
size_t len = bws->len; |
418 |
|
|
|
419 |
✓✗ |
2600620 |
if (!zero_ended) { |
420 |
|
2600620 |
bws->data.cstr[len] = '\n'; |
421 |
|
|
|
422 |
✗✓ |
2600620 |
if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) |
423 |
|
|
err(2, NULL); |
424 |
|
|
|
425 |
|
2600620 |
bws->data.cstr[len] = '\0'; |
426 |
✗✗ |
2600620 |
} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) |
427 |
|
|
err(2, NULL); |
428 |
|
|
|
429 |
|
2600620 |
return len + 1; |
430 |
|
|
|
431 |
|
|
} else { |
432 |
|
|
wchar_t eols; |
433 |
|
|
size_t printed = 0; |
434 |
|
|
|
435 |
|
|
eols = zero_ended ? btowc('\0') : btowc('\n'); |
436 |
|
|
|
437 |
|
|
while (printed < BWSLEN(bws)) { |
438 |
|
|
const wchar_t *s = bws->data.wstr + printed; |
439 |
|
|
|
440 |
|
|
if (*s == L'\0') { |
441 |
|
|
int nums; |
442 |
|
|
|
443 |
|
|
nums = fwprintf(f, L"%lc", *s); |
444 |
|
|
|
445 |
|
|
if (nums != 1) |
446 |
|
|
err(2, NULL); |
447 |
|
|
++printed; |
448 |
|
|
} else { |
449 |
|
|
int nums; |
450 |
|
|
|
451 |
|
|
nums = fwprintf(f, L"%ls", s); |
452 |
|
|
|
453 |
|
|
if (nums < 1) |
454 |
|
|
err(2, NULL); |
455 |
|
|
printed += nums; |
456 |
|
|
} |
457 |
|
|
} |
458 |
|
|
fwprintf(f, L"%lc", eols); |
459 |
|
|
return printed + 1; |
460 |
|
|
} |
461 |
|
2600620 |
} |
462 |
|
|
|
463 |
|
|
/* |
464 |
|
|
* Allocate and read a binary string from file. |
465 |
|
|
* The strings are nl-ended or zero-ended, depending on the sort setting. |
466 |
|
|
*/ |
467 |
|
|
struct bwstring * |
468 |
|
|
bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) |
469 |
|
|
{ |
470 |
|
|
wint_t eols; |
471 |
|
|
|
472 |
|
138752 |
eols = zero_ended ? btowc('\0') : btowc('\n'); |
473 |
|
|
|
474 |
✗✓ |
138752 |
if (!zero_ended && (sort_mb_cur_max > 1)) { |
475 |
|
|
wchar_t *ret; |
476 |
|
|
|
477 |
|
|
ret = fgetwln(f, len); |
478 |
|
|
|
479 |
|
|
if (ret == NULL) { |
480 |
|
|
if (!feof(f)) |
481 |
|
|
err(2, NULL); |
482 |
|
|
return NULL; |
483 |
|
|
} |
484 |
|
|
if (*len > 0) { |
485 |
|
|
if (ret[*len - 1] == (wchar_t)eols) |
486 |
|
|
--(*len); |
487 |
|
|
} |
488 |
|
|
return bwssbdup(ret, *len); |
489 |
|
|
|
490 |
✓✗ |
138752 |
} else if (!zero_ended && (sort_mb_cur_max == 1)) { |
491 |
|
|
char *ret; |
492 |
|
|
|
493 |
|
138752 |
ret = fgetln(f, len); |
494 |
|
|
|
495 |
✓✓ |
138752 |
if (ret == NULL) { |
496 |
✓✗✗✓ ✗✗ |
554 |
if (!feof(f)) |
497 |
|
|
err(2, NULL); |
498 |
|
277 |
return NULL; |
499 |
|
|
} |
500 |
✓✗ |
138475 |
if (*len > 0) { |
501 |
✓✓ |
138475 |
if (ret[*len - 1] == '\n') |
502 |
|
138471 |
--(*len); |
503 |
|
|
} |
504 |
|
138475 |
return bwscsbdup((unsigned char *)ret, *len); |
505 |
|
|
|
506 |
|
|
} else { |
507 |
|
|
*len = 0; |
508 |
|
|
|
509 |
|
|
if (feof(f)) |
510 |
|
|
return NULL; |
511 |
|
|
|
512 |
|
|
if (2 >= rb->fgetwln_z_buffer_size) { |
513 |
|
|
rb->fgetwln_z_buffer_size += 256; |
514 |
|
|
rb->fgetwln_z_buffer = |
515 |
|
|
sort_reallocarray(rb->fgetwln_z_buffer, |
516 |
|
|
rb->fgetwln_z_buffer_size, sizeof(wchar_t)); |
517 |
|
|
} |
518 |
|
|
rb->fgetwln_z_buffer[*len] = 0; |
519 |
|
|
|
520 |
|
|
if (sort_mb_cur_max == 1) { |
521 |
|
|
while (!feof(f)) { |
522 |
|
|
int c; |
523 |
|
|
|
524 |
|
|
c = fgetc(f); |
525 |
|
|
|
526 |
|
|
if (c == EOF) { |
527 |
|
|
if (*len == 0) |
528 |
|
|
return NULL; |
529 |
|
|
goto line_read_done; |
530 |
|
|
} |
531 |
|
|
if (c == eols) |
532 |
|
|
goto line_read_done; |
533 |
|
|
|
534 |
|
|
if (*len + 1 >= rb->fgetwln_z_buffer_size) { |
535 |
|
|
rb->fgetwln_z_buffer_size += 256; |
536 |
|
|
rb->fgetwln_z_buffer = |
537 |
|
|
sort_reallocarray(rb->fgetwln_z_buffer, |
538 |
|
|
rb->fgetwln_z_buffer_size, sizeof(wchar_t)); |
539 |
|
|
} |
540 |
|
|
|
541 |
|
|
rb->fgetwln_z_buffer[*len] = c; |
542 |
|
|
rb->fgetwln_z_buffer[++(*len)] = 0; |
543 |
|
|
} |
544 |
|
|
} else { |
545 |
|
|
while (!feof(f)) { |
546 |
|
|
wint_t c = 0; |
547 |
|
|
|
548 |
|
|
c = fgetwc(f); |
549 |
|
|
|
550 |
|
|
if (c == WEOF) { |
551 |
|
|
if (*len == 0) |
552 |
|
|
return NULL; |
553 |
|
|
goto line_read_done; |
554 |
|
|
} |
555 |
|
|
if (c == eols) |
556 |
|
|
goto line_read_done; |
557 |
|
|
|
558 |
|
|
if (*len + 1 >= rb->fgetwln_z_buffer_size) { |
559 |
|
|
rb->fgetwln_z_buffer_size += 256; |
560 |
|
|
rb->fgetwln_z_buffer = |
561 |
|
|
sort_reallocarray(rb->fgetwln_z_buffer, |
562 |
|
|
rb->fgetwln_z_buffer_size, sizeof(wchar_t)); |
563 |
|
|
} |
564 |
|
|
|
565 |
|
|
rb->fgetwln_z_buffer[*len] = c; |
566 |
|
|
rb->fgetwln_z_buffer[++(*len)] = 0; |
567 |
|
|
} |
568 |
|
|
} |
569 |
|
|
|
570 |
|
|
line_read_done: |
571 |
|
|
/* we do not count the last 0 */ |
572 |
|
|
return bwssbdup(rb->fgetwln_z_buffer, *len); |
573 |
|
|
} |
574 |
|
138752 |
} |
575 |
|
|
|
576 |
|
|
int |
577 |
|
|
bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, |
578 |
|
|
size_t offset, size_t len) |
579 |
|
|
{ |
580 |
|
|
size_t cmp_len, len1, len2; |
581 |
|
|
int res = 0; |
582 |
|
|
|
583 |
|
|
len1 = bws1->len; |
584 |
|
|
len2 = bws2->len; |
585 |
|
|
|
586 |
|
|
if (len1 <= offset) { |
587 |
|
|
return (len2 <= offset) ? 0 : -1; |
588 |
|
|
} else { |
589 |
|
|
if (len2 <= offset) |
590 |
|
|
return 1; |
591 |
|
|
else { |
592 |
|
|
len1 -= offset; |
593 |
|
|
len2 -= offset; |
594 |
|
|
|
595 |
|
|
cmp_len = len1; |
596 |
|
|
|
597 |
|
|
if (len2 < cmp_len) |
598 |
|
|
cmp_len = len2; |
599 |
|
|
|
600 |
|
|
if (len < cmp_len) |
601 |
|
|
cmp_len = len; |
602 |
|
|
|
603 |
|
|
if (sort_mb_cur_max == 1) { |
604 |
|
|
const unsigned char *s1, *s2; |
605 |
|
|
|
606 |
|
|
s1 = bws1->data.cstr + offset; |
607 |
|
|
s2 = bws2->data.cstr + offset; |
608 |
|
|
|
609 |
|
|
res = memcmp(s1, s2, cmp_len); |
610 |
|
|
|
611 |
|
|
} else { |
612 |
|
|
const wchar_t *s1, *s2; |
613 |
|
|
|
614 |
|
|
s1 = bws1->data.wstr + offset; |
615 |
|
|
s2 = bws2->data.wstr + offset; |
616 |
|
|
|
617 |
|
|
res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); |
618 |
|
|
} |
619 |
|
|
} |
620 |
|
|
} |
621 |
|
|
|
622 |
|
|
if (res == 0) { |
623 |
|
|
if (len1 < cmp_len && len1 < len2) |
624 |
|
|
res = -1; |
625 |
|
|
else if (len2 < cmp_len && len2 < len1) |
626 |
|
|
res = +1; |
627 |
|
|
} |
628 |
|
|
|
629 |
|
|
return res; |
630 |
|
|
} |
631 |
|
|
|
632 |
|
|
int |
633 |
|
|
bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) |
634 |
|
|
{ |
635 |
|
|
size_t len1, len2, cmp_len; |
636 |
|
|
int res; |
637 |
|
|
|
638 |
|
|
len1 = bws1->len; |
639 |
|
|
len2 = bws2->len; |
640 |
|
|
|
641 |
|
|
len1 -= offset; |
642 |
|
|
len2 -= offset; |
643 |
|
|
|
644 |
|
|
cmp_len = len1; |
645 |
|
|
|
646 |
|
|
if (len2 < cmp_len) |
647 |
|
|
cmp_len = len2; |
648 |
|
|
|
649 |
|
|
res = bwsncmp(bws1, bws2, offset, cmp_len); |
650 |
|
|
|
651 |
|
|
if (res == 0) { |
652 |
|
|
if (len1 < len2) |
653 |
|
|
res = -1; |
654 |
|
|
else if (len2 < len1) |
655 |
|
|
res = +1; |
656 |
|
|
} |
657 |
|
|
|
658 |
|
|
return res; |
659 |
|
|
} |
660 |
|
|
|
661 |
|
|
int |
662 |
|
|
bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) |
663 |
|
|
{ |
664 |
|
|
wchar_t c1, c2; |
665 |
|
|
size_t i = 0; |
666 |
|
|
|
667 |
|
|
for (i = 0; i < len; ++i) { |
668 |
|
|
c1 = bws_get_iter_value(iter1); |
669 |
|
|
c2 = bws_get_iter_value(iter2); |
670 |
|
|
if (c1 != c2) |
671 |
|
|
return c1 - c2; |
672 |
|
|
iter1 = bws_iterator_inc(iter1, 1); |
673 |
|
|
iter2 = bws_iterator_inc(iter2, 1); |
674 |
|
|
} |
675 |
|
|
|
676 |
|
|
return 0; |
677 |
|
|
} |
678 |
|
|
|
679 |
|
|
int |
680 |
|
|
bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) |
681 |
|
|
{ |
682 |
|
|
size_t len1, len2; |
683 |
|
|
|
684 |
|
11454064 |
len1 = bws1->len; |
685 |
|
5727032 |
len2 = bws2->len; |
686 |
|
|
|
687 |
✓✓ |
5727032 |
if (len1 <= offset) |
688 |
|
35519 |
return (len2 <= offset) ? 0 : -1; |
689 |
|
|
else { |
690 |
✓✓ |
5691513 |
if (len2 <= offset) |
691 |
|
8407 |
return 1; |
692 |
|
|
else { |
693 |
|
5683106 |
len1 -= offset; |
694 |
|
5683106 |
len2 -= offset; |
695 |
|
|
|
696 |
✓✗ |
5683106 |
if (sort_mb_cur_max == 1) { |
697 |
|
|
const unsigned char *s1, *s2; |
698 |
|
|
|
699 |
|
5683106 |
s1 = bws1->data.cstr + offset; |
700 |
|
5683106 |
s2 = bws2->data.cstr + offset; |
701 |
|
|
|
702 |
✓✗ |
5683106 |
if (byte_sort) { |
703 |
|
|
int res = 0; |
704 |
|
|
|
705 |
✓✓ |
5683106 |
if (len1 > len2) { |
706 |
|
894294 |
res = memcmp(s1, s2, len2); |
707 |
|
894294 |
if (!res) |
708 |
|
|
res = +1; |
709 |
|
10471918 |
} else if (len1 < len2) { |
710 |
|
4788812 |
res = memcmp(s1, s2, len1); |
711 |
|
4788812 |
if (!res) |
712 |
|
|
res = -1; |
713 |
|
|
} else |
714 |
|
|
res = memcmp(s1, s2, len1); |
715 |
|
|
|
716 |
|
|
return res; |
717 |
|
|
|
718 |
|
|
} else { |
719 |
|
|
int res = 0; |
720 |
|
|
size_t i, maxlen; |
721 |
|
|
|
722 |
|
|
i = 0; |
723 |
|
|
maxlen = len1; |
724 |
|
|
|
725 |
|
|
if (maxlen > len2) |
726 |
|
|
maxlen = len2; |
727 |
|
|
|
728 |
|
|
while (i < maxlen) { |
729 |
|
|
/* goto next non-zero part: */ |
730 |
|
|
while ((i < maxlen) && |
731 |
|
|
!s1[i] && !s2[i]) |
732 |
|
|
++i; |
733 |
|
|
|
734 |
|
|
if (i >= maxlen) |
735 |
|
|
break; |
736 |
|
|
|
737 |
|
|
if (s1[i] == 0) { |
738 |
|
|
if (s2[i] == 0) |
739 |
|
|
/* NOTREACHED */ |
740 |
|
|
err(2, "bwscoll error 01"); |
741 |
|
|
else |
742 |
|
|
return -1; |
743 |
|
|
} else if (s2[i] == 0) |
744 |
|
|
return 1; |
745 |
|
|
|
746 |
|
|
res = strcoll((const char *)(s1 + i), (const char *)(s2 + i)); |
747 |
|
|
if (res) |
748 |
|
|
return res; |
749 |
|
|
|
750 |
|
|
while ((i < maxlen) && |
751 |
|
|
s1[i] && s2[i]) |
752 |
|
|
++i; |
753 |
|
|
|
754 |
|
|
if (i >= maxlen) |
755 |
|
|
break; |
756 |
|
|
|
757 |
|
|
if (s1[i] == 0) { |
758 |
|
|
if (s2[i] == 0) { |
759 |
|
|
++i; |
760 |
|
|
continue; |
761 |
|
|
} else |
762 |
|
|
return -1; |
763 |
|
|
} else if (s2[i] == 0) |
764 |
|
|
return 1; |
765 |
|
|
else |
766 |
|
|
/* NOTREACHED */ |
767 |
|
|
err(2, "bwscoll error 02"); |
768 |
|
|
} |
769 |
|
|
|
770 |
|
|
if (len1 < len2) |
771 |
|
|
return -1; |
772 |
|
|
else if (len1 > len2) |
773 |
|
|
return 1; |
774 |
|
|
|
775 |
|
|
return 0; |
776 |
|
|
} |
777 |
|
|
} else { |
778 |
|
|
const wchar_t *s1, *s2; |
779 |
|
|
size_t i, maxlen; |
780 |
|
|
int res = 0; |
781 |
|
|
|
782 |
|
|
s1 = bws1->data.wstr + offset; |
783 |
|
|
s2 = bws2->data.wstr + offset; |
784 |
|
|
|
785 |
|
|
i = 0; |
786 |
|
|
maxlen = len1; |
787 |
|
|
|
788 |
|
|
if (maxlen > len2) |
789 |
|
|
maxlen = len2; |
790 |
|
|
|
791 |
|
|
while (i < maxlen) { |
792 |
|
|
|
793 |
|
|
/* goto next non-zero part: */ |
794 |
|
|
while ((i < maxlen) && |
795 |
|
|
!s1[i] && !s2[i]) |
796 |
|
|
++i; |
797 |
|
|
|
798 |
|
|
if (i >= maxlen) |
799 |
|
|
break; |
800 |
|
|
|
801 |
|
|
if (s1[i] == 0) { |
802 |
|
|
if (s2[i] == 0) |
803 |
|
|
/* NOTREACHED */ |
804 |
|
|
err(2, "bwscoll error 1"); |
805 |
|
|
else |
806 |
|
|
return -1; |
807 |
|
|
} else if (s2[i] == 0) |
808 |
|
|
return 1; |
809 |
|
|
|
810 |
|
|
res = wide_str_coll(s1 + i, s2 + i); |
811 |
|
|
if (res) |
812 |
|
|
return res; |
813 |
|
|
|
814 |
|
|
while ((i < maxlen) && s1[i] && s2[i]) |
815 |
|
|
++i; |
816 |
|
|
|
817 |
|
|
if (i >= maxlen) |
818 |
|
|
break; |
819 |
|
|
|
820 |
|
|
if (s1[i] == 0) { |
821 |
|
|
if (s2[i] == 0) { |
822 |
|
|
++i; |
823 |
|
|
continue; |
824 |
|
|
} else |
825 |
|
|
return -1; |
826 |
|
|
} else if (s2[i] == 0) |
827 |
|
|
return 1; |
828 |
|
|
else |
829 |
|
|
/* NOTREACHED */ |
830 |
|
|
err(2, "bwscoll error 2"); |
831 |
|
|
} |
832 |
|
|
|
833 |
|
|
if (len1 == len2) |
834 |
|
|
return 0; |
835 |
|
|
return len1 < len2 ? -1 : 1; |
836 |
|
|
} |
837 |
|
|
} |
838 |
|
|
} |
839 |
|
5727032 |
} |
840 |
|
|
|
841 |
|
|
/* |
842 |
|
|
* Correction of the system API |
843 |
|
|
*/ |
844 |
|
|
double |
845 |
|
|
bwstod(struct bwstring *s0, bool *empty) |
846 |
|
|
{ |
847 |
|
|
double ret = 0; |
848 |
|
|
|
849 |
✓✗ |
368 |
if (sort_mb_cur_max == 1) { |
850 |
|
184 |
char *ep, *end, *s; |
851 |
|
|
|
852 |
|
184 |
s = (char *)s0->data.cstr; |
853 |
|
184 |
end = s + s0->len; |
854 |
|
184 |
ep = NULL; |
855 |
|
|
|
856 |
✗✓✗✓
|
552 |
while (isblank((unsigned char)*s) && s < end) |
857 |
|
|
++s; |
858 |
|
|
|
859 |
✗✓ |
184 |
if (!isprint((unsigned char)*s)) { |
860 |
|
|
*empty = true; |
861 |
|
|
return 0; |
862 |
|
|
} |
863 |
|
|
|
864 |
|
184 |
ret = strtod(s, &ep); |
865 |
✓✓ |
184 |
if (ep == s) { |
866 |
|
12 |
*empty = true; |
867 |
|
12 |
return 0; |
868 |
|
|
} |
869 |
✓✓ |
356 |
} else { |
870 |
|
|
wchar_t *end, *ep, *s; |
871 |
|
|
|
872 |
|
|
s = s0->data.wstr; |
873 |
|
|
end = s + s0->len; |
874 |
|
|
ep = NULL; |
875 |
|
|
|
876 |
|
|
while (iswblank(*s) && s < end) |
877 |
|
|
++s; |
878 |
|
|
|
879 |
|
|
if (!iswprint(*s)) { |
880 |
|
|
*empty = true; |
881 |
|
|
return 0; |
882 |
|
|
} |
883 |
|
|
|
884 |
|
|
ret = wcstod(s, &ep); |
885 |
|
|
if (ep == s) { |
886 |
|
|
*empty = true; |
887 |
|
|
return 0; |
888 |
|
|
} |
889 |
|
|
} |
890 |
|
|
|
891 |
|
172 |
*empty = false; |
892 |
|
172 |
return ret; |
893 |
|
184 |
} |
894 |
|
|
|
895 |
|
|
/* |
896 |
|
|
* A helper function for monthcoll. If a line matches |
897 |
|
|
* a month name, it returns (number of the month - 1), |
898 |
|
|
* while if there is no match, it just return -1. |
899 |
|
|
*/ |
900 |
|
|
int |
901 |
|
|
bws_month_score(const struct bwstring *s0) |
902 |
|
|
{ |
903 |
✓✗ |
1344 |
if (sort_mb_cur_max == 1) { |
904 |
|
|
const char *end, *s; |
905 |
|
|
int i; |
906 |
|
|
|
907 |
|
672 |
s = (char *)s0->data.cstr; |
908 |
|
672 |
end = s + s0->len; |
909 |
|
|
|
910 |
✓✓✓✗
|
1428 |
while (isblank((unsigned char)*s) && s < end) |
911 |
|
28 |
++s; |
912 |
|
|
|
913 |
✓✓ |
10312 |
for (i = 11; i >= 0; --i) { |
914 |
✓✗✓✓
|
10240 |
if (cmonths[i] && |
915 |
|
5120 |
(s == strstr(s, cmonths[i]))) |
916 |
|
636 |
return i; |
917 |
|
|
} |
918 |
✓✓ |
36 |
} else { |
919 |
|
|
const wchar_t *end, *s; |
920 |
|
|
int i; |
921 |
|
|
|
922 |
|
|
s = s0->data.wstr; |
923 |
|
|
end = s + s0->len; |
924 |
|
|
|
925 |
|
|
while (iswblank(*s) && s < end) |
926 |
|
|
++s; |
927 |
|
|
|
928 |
|
|
for (i = 11; i >= 0; --i) { |
929 |
|
|
if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) |
930 |
|
|
return i; |
931 |
|
|
} |
932 |
|
|
} |
933 |
|
|
|
934 |
|
36 |
return -1; |
935 |
|
672 |
} |
936 |
|
|
|
937 |
|
|
/* |
938 |
|
|
* Rips out leading blanks (-b). |
939 |
|
|
*/ |
940 |
|
|
struct bwstring * |
941 |
|
|
ignore_leading_blanks(struct bwstring *str) |
942 |
|
|
{ |
943 |
|
|
if (sort_mb_cur_max == 1) { |
944 |
|
|
unsigned char *dst, *end, *src; |
945 |
|
|
|
946 |
|
|
src = str->data.cstr; |
947 |
|
|
dst = src; |
948 |
|
|
end = src + str->len; |
949 |
|
|
|
950 |
|
|
while (src < end && isblank(*src)) |
951 |
|
|
++src; |
952 |
|
|
|
953 |
|
|
if (src != dst) { |
954 |
|
|
size_t newlen; |
955 |
|
|
|
956 |
|
|
newlen = BWSLEN(str) - (src - dst); |
957 |
|
|
|
958 |
|
|
while (src < end) { |
959 |
|
|
*dst = *src; |
960 |
|
|
++dst; |
961 |
|
|
++src; |
962 |
|
|
} |
963 |
|
|
bws_setlen(str, newlen); |
964 |
|
|
} |
965 |
|
|
} else { |
966 |
|
|
wchar_t *dst, *end, *src; |
967 |
|
|
|
968 |
|
|
src = str->data.wstr; |
969 |
|
|
dst = src; |
970 |
|
|
end = src + str->len; |
971 |
|
|
|
972 |
|
|
while (src < end && iswblank(*src)) |
973 |
|
|
++src; |
974 |
|
|
|
975 |
|
|
if (src != dst) { |
976 |
|
|
|
977 |
|
|
size_t newlen = BWSLEN(str) - (src - dst); |
978 |
|
|
|
979 |
|
|
while (src < end) { |
980 |
|
|
*dst = *src; |
981 |
|
|
++dst; |
982 |
|
|
++src; |
983 |
|
|
} |
984 |
|
|
bws_setlen(str, newlen); |
985 |
|
|
|
986 |
|
|
} |
987 |
|
|
} |
988 |
|
|
return str; |
989 |
|
|
} |
990 |
|
|
|
991 |
|
|
/* |
992 |
|
|
* Rips out nonprinting characters (-i). |
993 |
|
|
*/ |
994 |
|
|
struct bwstring * |
995 |
|
|
ignore_nonprinting(struct bwstring *str) |
996 |
|
|
{ |
997 |
|
2040 |
size_t newlen = str->len; |
998 |
|
|
|
999 |
✓✗ |
1020 |
if (sort_mb_cur_max == 1) { |
1000 |
|
|
unsigned char *dst, *end, *src; |
1001 |
|
|
unsigned char c; |
1002 |
|
|
|
1003 |
|
1020 |
src = str->data.cstr; |
1004 |
|
|
dst = src; |
1005 |
|
1020 |
end = src + str->len; |
1006 |
|
|
|
1007 |
✓✓ |
6120 |
while (src < end) { |
1008 |
|
2040 |
c = *src; |
1009 |
✓✓ |
2040 |
if (isprint(c)) { |
1010 |
|
1400 |
*dst = c; |
1011 |
|
1400 |
++dst; |
1012 |
|
1400 |
++src; |
1013 |
|
1400 |
} else { |
1014 |
|
640 |
++src; |
1015 |
|
640 |
--newlen; |
1016 |
|
|
} |
1017 |
|
|
} |
1018 |
|
1020 |
} else { |
1019 |
|
|
wchar_t *dst, *end, *src; |
1020 |
|
|
wchar_t c; |
1021 |
|
|
|
1022 |
|
|
src = str->data.wstr; |
1023 |
|
|
dst = src; |
1024 |
|
|
end = src + str->len; |
1025 |
|
|
|
1026 |
|
|
while (src < end) { |
1027 |
|
|
c = *src; |
1028 |
|
|
if (iswprint(c)) { |
1029 |
|
|
*dst = c; |
1030 |
|
|
++dst; |
1031 |
|
|
++src; |
1032 |
|
|
} else { |
1033 |
|
|
++src; |
1034 |
|
|
--newlen; |
1035 |
|
|
} |
1036 |
|
|
} |
1037 |
|
|
} |
1038 |
|
1020 |
bws_setlen(str, newlen); |
1039 |
|
|
|
1040 |
|
1020 |
return str; |
1041 |
|
|
} |
1042 |
|
|
|
1043 |
|
|
/* |
1044 |
|
|
* Rips out any characters that are not alphanumeric characters |
1045 |
|
|
* nor blanks (-d). |
1046 |
|
|
*/ |
1047 |
|
|
struct bwstring * |
1048 |
|
|
dictionary_order(struct bwstring *str) |
1049 |
|
|
{ |
1050 |
|
2872 |
size_t newlen = str->len; |
1051 |
|
|
|
1052 |
✓✗ |
1436 |
if (sort_mb_cur_max == 1) { |
1053 |
|
|
unsigned char *dst, *end, *src; |
1054 |
|
|
unsigned char c; |
1055 |
|
|
|
1056 |
|
1436 |
src = str->data.cstr; |
1057 |
|
|
dst = src; |
1058 |
|
1436 |
end = src + str->len; |
1059 |
|
|
|
1060 |
✓✓ |
9064 |
while (src < end) { |
1061 |
|
3096 |
c = *src; |
1062 |
✓✓✓✓
|
5152 |
if (isalnum(c) || isblank(c)) { |
1063 |
|
2148 |
*dst = c; |
1064 |
|
2148 |
++dst; |
1065 |
|
2148 |
++src; |
1066 |
|
2148 |
} else { |
1067 |
|
948 |
++src; |
1068 |
|
948 |
--newlen; |
1069 |
|
|
} |
1070 |
|
|
} |
1071 |
|
1436 |
} else { |
1072 |
|
|
wchar_t *dst, *end, *src; |
1073 |
|
|
wchar_t c; |
1074 |
|
|
|
1075 |
|
|
src = str->data.wstr; |
1076 |
|
|
dst = src; |
1077 |
|
|
end = src + str->len; |
1078 |
|
|
|
1079 |
|
|
while (src < end) { |
1080 |
|
|
c = *src; |
1081 |
|
|
if (iswalnum(c) || iswblank(c)) { |
1082 |
|
|
*dst = c; |
1083 |
|
|
++dst; |
1084 |
|
|
++src; |
1085 |
|
|
} else { |
1086 |
|
|
++src; |
1087 |
|
|
--newlen; |
1088 |
|
|
} |
1089 |
|
|
} |
1090 |
|
|
} |
1091 |
|
1436 |
bws_setlen(str, newlen); |
1092 |
|
|
|
1093 |
|
1436 |
return str; |
1094 |
|
|
} |
1095 |
|
|
|
1096 |
|
|
/* |
1097 |
|
|
* Converts string to lower case(-f). |
1098 |
|
|
*/ |
1099 |
|
|
struct bwstring * |
1100 |
|
|
ignore_case(struct bwstring *str) |
1101 |
|
|
{ |
1102 |
✓✗ |
1886160 |
if (sort_mb_cur_max == 1) { |
1103 |
|
|
unsigned char *end, *s; |
1104 |
|
|
|
1105 |
|
943080 |
s = str->data.cstr; |
1106 |
|
943080 |
end = s + str->len; |
1107 |
|
|
|
1108 |
✓✓ |
251612872 |
while (s < end) { |
1109 |
|
124863356 |
*s = toupper(*s); |
1110 |
|
124863356 |
++s; |
1111 |
|
|
} |
1112 |
|
943080 |
} else { |
1113 |
|
|
wchar_t *end, *s; |
1114 |
|
|
|
1115 |
|
|
s = str->data.wstr; |
1116 |
|
|
end = s + str->len; |
1117 |
|
|
|
1118 |
|
|
while (s < end) { |
1119 |
|
|
*s = towupper(*s); |
1120 |
|
|
++s; |
1121 |
|
|
} |
1122 |
|
|
} |
1123 |
|
943080 |
return str; |
1124 |
|
|
} |
1125 |
|
|
|
1126 |
|
|
void |
1127 |
|
|
bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) |
1128 |
|
|
{ |
1129 |
✓✗ |
40 |
if (sort_mb_cur_max == 1) |
1130 |
|
20 |
warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); |
1131 |
|
|
else |
1132 |
|
|
warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); |
1133 |
|
20 |
} |