1 |
|
|
/* $OpenBSD: wc.c,v 1.21 2016/09/16 09:25:23 fcambus Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 1980, 1987, 1991, 1993 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* Redistribution and use in source and binary forms, with or without |
8 |
|
|
* modification, are permitted provided that the following conditions |
9 |
|
|
* are met: |
10 |
|
|
* 1. Redistributions of source code must retain the above copyright |
11 |
|
|
* notice, this list of conditions and the following disclaimer. |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in the |
14 |
|
|
* documentation and/or other materials provided with the distribution. |
15 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
16 |
|
|
* may be used to endorse or promote products derived from this software |
17 |
|
|
* without specific prior written permission. |
18 |
|
|
* |
19 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
|
* SUCH DAMAGE. |
30 |
|
|
*/ |
31 |
|
|
|
32 |
|
|
#include <sys/param.h> /* MAXBSIZE */ |
33 |
|
|
#include <sys/stat.h> |
34 |
|
|
#include <sys/file.h> |
35 |
|
|
#include <stdio.h> |
36 |
|
|
#include <stdlib.h> |
37 |
|
|
#include <locale.h> |
38 |
|
|
#include <ctype.h> |
39 |
|
|
#include <err.h> |
40 |
|
|
#include <unistd.h> |
41 |
|
|
#include <util.h> |
42 |
|
|
#include <wchar.h> |
43 |
|
|
#include <wctype.h> |
44 |
|
|
|
45 |
|
|
int64_t tlinect, twordct, tcharct; |
46 |
|
|
int doline, doword, dochar, humanchar, multibyte; |
47 |
|
|
int rval; |
48 |
|
|
extern char *__progname; |
49 |
|
|
|
50 |
|
|
static void print_counts(int64_t, int64_t, int64_t, char *); |
51 |
|
|
static void format_and_print(int64_t); |
52 |
|
|
static void cnt(char *); |
53 |
|
|
|
54 |
|
|
int |
55 |
|
|
main(int argc, char *argv[]) |
56 |
|
|
{ |
57 |
|
|
int ch; |
58 |
|
|
|
59 |
|
308 |
setlocale(LC_CTYPE, ""); |
60 |
|
|
|
61 |
✗✓ |
154 |
if (pledge("stdio rpath flock cpath wpath", NULL) == -1) |
62 |
|
|
err(1, "pledge"); |
63 |
|
|
|
64 |
✓✓ |
808 |
while ((ch = getopt(argc, argv, "lwchm")) != -1) |
65 |
✓✓✓✗ ✗✓ |
500 |
switch(ch) { |
66 |
|
|
case 'l': |
67 |
|
|
doline = 1; |
68 |
|
82 |
break; |
69 |
|
|
case 'w': |
70 |
|
|
doword = 1; |
71 |
|
|
break; |
72 |
|
|
case 'm': |
73 |
✓✓ |
64 |
if (MB_CUR_MAX > 1) |
74 |
|
32 |
multibyte = 1; |
75 |
|
|
/* FALLTHROUGH */ |
76 |
|
|
case 'c': |
77 |
|
|
dochar = 1; |
78 |
|
104 |
break; |
79 |
|
|
case 'h': |
80 |
|
|
humanchar = 1; |
81 |
|
|
break; |
82 |
|
|
case '?': |
83 |
|
|
default: |
84 |
|
|
fprintf(stderr, |
85 |
|
|
"usage: %s [-c | -m] [-hlw] [file ...]\n", |
86 |
|
|
__progname); |
87 |
|
|
return 1; |
88 |
|
|
} |
89 |
|
154 |
argv += optind; |
90 |
|
154 |
argc -= optind; |
91 |
|
|
|
92 |
|
|
/* |
93 |
|
|
* wc is unusual in that its flags are on by default, so, |
94 |
|
|
* if you don't get any arguments, you have to turn them |
95 |
|
|
* all on. |
96 |
|
|
*/ |
97 |
✓✓ |
154 |
if (!doline && !doword && !dochar) |
98 |
|
32 |
doline = doword = dochar = 1; |
99 |
|
|
|
100 |
✓✗ |
154 |
if (!*argv) { |
101 |
|
154 |
cnt(NULL); |
102 |
|
154 |
} else { |
103 |
|
|
int dototal = (argc > 1); |
104 |
|
|
|
105 |
|
|
do { |
106 |
|
|
cnt(*argv); |
107 |
|
|
} while(*++argv); |
108 |
|
|
|
109 |
|
|
if (dototal) |
110 |
|
|
print_counts(tlinect, twordct, tcharct, "total"); |
111 |
|
|
} |
112 |
|
|
|
113 |
|
154 |
return rval; |
114 |
|
154 |
} |
115 |
|
|
|
116 |
|
|
static void |
117 |
|
|
cnt(char *file) |
118 |
|
|
{ |
119 |
|
|
static char *buf; |
120 |
|
|
static size_t bufsz; |
121 |
|
|
|
122 |
|
|
FILE *stream; |
123 |
|
|
char *C; |
124 |
|
308 |
wchar_t wc; |
125 |
|
|
short gotsp; |
126 |
|
|
ssize_t len; |
127 |
|
|
int64_t linect, wordct, charct; |
128 |
|
154 |
struct stat sbuf; |
129 |
|
|
int fd; |
130 |
|
|
|
131 |
|
|
linect = wordct = charct = 0; |
132 |
|
|
stream = NULL; |
133 |
✗✓ |
154 |
if (file) { |
134 |
|
|
if ((fd = open(file, O_RDONLY, 0)) < 0) { |
135 |
|
|
warn("%s", file); |
136 |
|
|
rval = 1; |
137 |
|
|
return; |
138 |
|
|
} |
139 |
|
|
} else { |
140 |
|
|
fd = STDIN_FILENO; |
141 |
|
|
} |
142 |
|
|
|
143 |
✓✓ |
154 |
if (!doword && !multibyte) { |
144 |
✓✗✗✓
|
116 |
if (bufsz < MAXBSIZE && |
145 |
|
58 |
(buf = realloc(buf, MAXBSIZE)) == NULL) |
146 |
|
|
err(1, NULL); |
147 |
|
|
/* |
148 |
|
|
* Line counting is split out because it's a lot |
149 |
|
|
* faster to get lines than to get words, since |
150 |
|
|
* the word count requires some logic. |
151 |
|
|
*/ |
152 |
✓✓ |
58 |
if (doline) { |
153 |
✓✓ |
48 |
while ((len = read(fd, buf, MAXBSIZE)) > 0) { |
154 |
|
6 |
charct += len; |
155 |
✓✓ |
1236 |
for (C = buf; len--; ++C) |
156 |
✓✓ |
612 |
if (*C == '\n') |
157 |
|
12 |
++linect; |
158 |
|
|
} |
159 |
✗✓ |
18 |
if (len == -1) { |
160 |
|
|
warn("%s", file); |
161 |
|
|
rval = 1; |
162 |
|
|
} |
163 |
|
|
} |
164 |
|
|
/* |
165 |
|
|
* If all we need is the number of characters and |
166 |
|
|
* it's a directory or a regular or linked file, just |
167 |
|
|
* stat the puppy. We avoid testing for it not being |
168 |
|
|
* a special device in case someone adds a new type |
169 |
|
|
* of inode. |
170 |
|
|
*/ |
171 |
✓✗ |
40 |
else if (dochar) { |
172 |
|
|
mode_t ifmt; |
173 |
|
|
|
174 |
✗✓ |
40 |
if (fstat(fd, &sbuf)) { |
175 |
|
|
warn("%s", file); |
176 |
|
|
rval = 1; |
177 |
|
|
} else { |
178 |
|
40 |
ifmt = sbuf.st_mode & S_IFMT; |
179 |
✓✓ |
80 |
if (ifmt == S_IFREG || ifmt == S_IFLNK |
180 |
|
40 |
|| ifmt == S_IFDIR) { |
181 |
|
12 |
charct = sbuf.st_size; |
182 |
|
12 |
} else { |
183 |
✓✓ |
92 |
while ((len = read(fd, buf, MAXBSIZE)) > 0) |
184 |
|
18 |
charct += len; |
185 |
✗✓ |
28 |
if (len == -1) { |
186 |
|
|
warn("%s", file); |
187 |
|
|
rval = 1; |
188 |
|
|
} |
189 |
|
|
} |
190 |
|
|
} |
191 |
|
40 |
} |
192 |
|
|
} else { |
193 |
✓✗ |
96 |
if (file == NULL) |
194 |
|
96 |
stream = stdin; |
195 |
|
|
else if ((stream = fdopen(fd, "r")) == NULL) { |
196 |
|
|
warn("%s", file); |
197 |
|
|
close(fd); |
198 |
|
|
rval = 1; |
199 |
|
|
return; |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
/* |
203 |
|
|
* Do it the hard way. |
204 |
|
|
* According to POSIX, a word is a "maximal string of |
205 |
|
|
* characters delimited by whitespace." Nothing is said |
206 |
|
|
* about a character being printing or non-printing. |
207 |
|
|
*/ |
208 |
|
|
gotsp = 1; |
209 |
✓✓ |
384 |
while ((len = getline(&buf, &bufsz, stream)) > 0) { |
210 |
✓✓ |
96 |
if (multibyte) { |
211 |
✓✓ |
400 |
for (C = buf; *C != '\0'; C += len) { |
212 |
|
168 |
++charct; |
213 |
|
168 |
len = mbtowc(&wc, C, MB_CUR_MAX); |
214 |
✓✓ |
168 |
if (len == -1) { |
215 |
|
8 |
mbtowc(NULL, NULL, |
216 |
|
8 |
MB_CUR_MAX); |
217 |
|
|
len = 1; |
218 |
|
8 |
wc = L' '; |
219 |
|
8 |
} |
220 |
✓✓ |
168 |
if (iswspace(wc)) { |
221 |
|
|
gotsp = 1; |
222 |
✓✓ |
44 |
if (wc == L'\n') |
223 |
|
20 |
++linect; |
224 |
✓✓ |
124 |
} else if (gotsp) { |
225 |
|
|
gotsp = 0; |
226 |
|
44 |
++wordct; |
227 |
|
44 |
} |
228 |
|
|
} |
229 |
|
|
} else { |
230 |
|
64 |
charct += len; |
231 |
✓✓ |
864 |
for (C = buf; *C != '\0'; ++C) { |
232 |
✓✓ |
368 |
if (isspace((unsigned char)*C)) { |
233 |
|
|
gotsp = 1; |
234 |
✓✓ |
72 |
if (*C == '\n') |
235 |
|
40 |
++linect; |
236 |
✓✓ |
296 |
} else if (gotsp) { |
237 |
|
|
gotsp = 0; |
238 |
|
80 |
++wordct; |
239 |
|
80 |
} |
240 |
|
|
} |
241 |
|
|
} |
242 |
|
|
} |
243 |
✓✗✗✓ ✗✗ |
192 |
if (ferror(stream)) { |
244 |
|
|
warn("%s", file); |
245 |
|
|
rval = 1; |
246 |
|
|
} |
247 |
|
|
} |
248 |
|
|
|
249 |
|
154 |
print_counts(linect, wordct, charct, file); |
250 |
|
|
|
251 |
|
|
/* |
252 |
|
|
* Don't bother checking doline, doword, or dochar -- speeds |
253 |
|
|
* up the common case |
254 |
|
|
*/ |
255 |
|
154 |
tlinect += linect; |
256 |
|
154 |
twordct += wordct; |
257 |
|
154 |
tcharct += charct; |
258 |
|
|
|
259 |
✓✓✗✓
|
462 |
if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { |
260 |
|
|
warn("%s", file); |
261 |
|
|
rval = 1; |
262 |
|
|
} |
263 |
|
308 |
} |
264 |
|
|
|
265 |
|
|
static void |
266 |
|
|
format_and_print(int64_t v) |
267 |
|
|
{ |
268 |
✗✓ |
692 |
if (humanchar) { |
269 |
|
|
char result[FMT_SCALED_STRSIZE]; |
270 |
|
|
|
271 |
|
|
fmt_scaled((long long)v, result); |
272 |
|
|
printf("%7s", result); |
273 |
|
|
} else { |
274 |
|
346 |
printf(" %7lld", v); |
275 |
|
|
} |
276 |
|
346 |
} |
277 |
|
|
|
278 |
|
|
static void |
279 |
|
|
print_counts(int64_t lines, int64_t words, int64_t chars, char *name) |
280 |
|
|
{ |
281 |
✓✓ |
308 |
if (doline) |
282 |
|
114 |
format_and_print(lines); |
283 |
✓✓ |
154 |
if (doword) |
284 |
|
96 |
format_and_print(words); |
285 |
✓✓ |
154 |
if (dochar) |
286 |
|
136 |
format_and_print(chars); |
287 |
|
|
|
288 |
✗✓ |
154 |
if (name) |
289 |
|
|
printf(" %s\n", name); |
290 |
|
|
else |
291 |
|
154 |
printf("\n"); |
292 |
|
154 |
} |