1 |
|
|
/* $OpenBSD: wc.c,v 1.20 2015/12/08 01:00:45 schwarze Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 1980, 1987, 1991, 1993 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* Redistribution and use in source and binary forms, with or without |
8 |
|
|
* modification, are permitted provided that the following conditions |
9 |
|
|
* are met: |
10 |
|
|
* 1. Redistributions of source code must retain the above copyright |
11 |
|
|
* notice, this list of conditions and the following disclaimer. |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in the |
14 |
|
|
* documentation and/or other materials provided with the distribution. |
15 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
16 |
|
|
* may be used to endorse or promote products derived from this software |
17 |
|
|
* without specific prior written permission. |
18 |
|
|
* |
19 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
|
* SUCH DAMAGE. |
30 |
|
|
*/ |
31 |
|
|
|
32 |
|
|
#include <sys/param.h> /* MAXBSIZE */ |
33 |
|
|
#include <sys/stat.h> |
34 |
|
|
#include <sys/file.h> |
35 |
|
|
#include <stdio.h> |
36 |
|
|
#include <stdlib.h> |
37 |
|
|
#include <string.h> |
38 |
|
|
#include <locale.h> |
39 |
|
|
#include <ctype.h> |
40 |
|
|
#include <err.h> |
41 |
|
|
#include <unistd.h> |
42 |
|
|
#include <util.h> |
43 |
|
|
#include <wchar.h> |
44 |
|
|
#include <wctype.h> |
45 |
|
|
|
46 |
|
|
int64_t tlinect, twordct, tcharct; |
47 |
|
|
int doline, doword, dochar, humanchar, multibyte; |
48 |
|
|
int rval; |
49 |
|
|
extern char *__progname; |
50 |
|
|
|
51 |
|
|
void print_counts(int64_t, int64_t, int64_t, char *); |
52 |
|
|
void format_and_print(long long); |
53 |
|
|
void cnt(char *); |
54 |
|
|
|
55 |
|
|
int |
56 |
|
|
main(int argc, char *argv[]) |
57 |
|
6 |
{ |
58 |
|
|
int ch; |
59 |
|
|
|
60 |
|
6 |
setlocale(LC_CTYPE, ""); |
61 |
|
|
|
62 |
✗✓ |
6 |
if (pledge("stdio rpath wpath cpath", NULL) == -1) |
63 |
|
|
err(1, "pledge"); |
64 |
|
|
|
65 |
✓✓ |
12 |
while ((ch = getopt(argc, argv, "lwchm")) != -1) |
66 |
✓✗✗✓ ✗✗ |
6 |
switch(ch) { |
67 |
|
|
case 'l': |
68 |
|
3 |
doline = 1; |
69 |
|
3 |
break; |
70 |
|
|
case 'w': |
71 |
|
|
doword = 1; |
72 |
|
|
break; |
73 |
|
|
case 'm': |
74 |
|
|
if (MB_CUR_MAX > 1) |
75 |
|
|
multibyte = 1; |
76 |
|
|
/* FALLTHROUGH */ |
77 |
|
|
case 'c': |
78 |
|
3 |
dochar = 1; |
79 |
|
3 |
break; |
80 |
|
|
case 'h': |
81 |
|
|
humanchar = 1; |
82 |
|
|
break; |
83 |
|
|
case '?': |
84 |
|
|
default: |
85 |
|
|
(void)fprintf(stderr, |
86 |
|
|
"usage: %s [-c | -m] [-hlw] [file ...]\n", |
87 |
|
|
__progname); |
88 |
|
|
exit(1); |
89 |
|
|
} |
90 |
|
6 |
argv += optind; |
91 |
|
6 |
argc -= optind; |
92 |
|
|
|
93 |
|
|
/* |
94 |
|
|
* wc is unusual in that its flags are on by default, so, |
95 |
|
|
* if you don't get any arguments, you have to turn them |
96 |
|
|
* all on. |
97 |
|
|
*/ |
98 |
✓✓✓✗ ✗✓ |
6 |
if (!doline && !doword && !dochar) |
99 |
|
|
doline = doword = dochar = 1; |
100 |
|
|
|
101 |
✓✗ |
6 |
if (!*argv) { |
102 |
|
6 |
cnt((char *)NULL); |
103 |
|
|
} else { |
104 |
|
|
int dototal = (argc > 1); |
105 |
|
|
|
106 |
|
|
do { |
107 |
|
|
cnt(*argv); |
108 |
|
|
} while(*++argv); |
109 |
|
|
|
110 |
|
|
if (dototal) |
111 |
|
|
print_counts(tlinect, twordct, tcharct, "total"); |
112 |
|
|
} |
113 |
|
|
|
114 |
|
6 |
exit(rval); |
115 |
|
|
} |
116 |
|
|
|
117 |
|
|
void |
118 |
|
|
cnt(char *file) |
119 |
|
6 |
{ |
120 |
|
|
static char *buf; |
121 |
|
|
static ssize_t bufsz; |
122 |
|
|
|
123 |
|
|
FILE *stream; |
124 |
|
|
char *C; |
125 |
|
|
wchar_t wc; |
126 |
|
|
short gotsp; |
127 |
|
|
ssize_t len; |
128 |
|
|
int64_t linect, wordct, charct; |
129 |
|
|
struct stat sbuf; |
130 |
|
|
int fd; |
131 |
|
|
|
132 |
|
6 |
linect = wordct = charct = 0; |
133 |
|
6 |
stream = NULL; |
134 |
✗✓ |
6 |
if (file) { |
135 |
|
|
if ((fd = open(file, O_RDONLY, 0)) < 0) { |
136 |
|
|
warn("%s", file); |
137 |
|
|
rval = 1; |
138 |
|
|
return; |
139 |
|
|
} |
140 |
|
|
} else { |
141 |
|
6 |
fd = STDIN_FILENO; |
142 |
|
|
} |
143 |
|
|
|
144 |
✓✗✓✗
|
6 |
if (!doword && !multibyte) { |
145 |
✓✗✗✓
|
6 |
if (bufsz < MAXBSIZE && |
146 |
|
|
(buf = realloc(buf, MAXBSIZE)) == NULL) |
147 |
|
|
err(1, NULL); |
148 |
|
|
/* |
149 |
|
|
* Line counting is split out because it's a lot |
150 |
|
|
* faster to get lines than to get words, since |
151 |
|
|
* the word count requires some logic. |
152 |
|
|
*/ |
153 |
✓✓ |
6 |
if (doline) { |
154 |
✓✓ |
4 |
while ((len = read(fd, buf, MAXBSIZE)) > 0) { |
155 |
|
1 |
charct += len; |
156 |
✓✓ |
103 |
for (C = buf; len--; ++C) |
157 |
✓✓ |
102 |
if (*C == '\n') |
158 |
|
2 |
++linect; |
159 |
|
|
} |
160 |
✗✓ |
3 |
if (len == -1) { |
161 |
|
|
warn("%s", file); |
162 |
|
|
rval = 1; |
163 |
|
|
} |
164 |
|
|
} |
165 |
|
|
/* |
166 |
|
|
* If all we need is the number of characters and |
167 |
|
|
* it's a directory or a regular or linked file, just |
168 |
|
|
* stat the puppy. We avoid testing for it not being |
169 |
|
|
* a special device in case someone adds a new type |
170 |
|
|
* of inode. |
171 |
|
|
*/ |
172 |
✓✗ |
3 |
else if (dochar) { |
173 |
|
|
mode_t ifmt; |
174 |
|
|
|
175 |
✗✓ |
3 |
if (fstat(fd, &sbuf)) { |
176 |
|
|
warn("%s", file); |
177 |
|
|
rval = 1; |
178 |
|
|
} else { |
179 |
|
3 |
ifmt = sbuf.st_mode & S_IFMT; |
180 |
✓✗✗✓
|
3 |
if (ifmt == S_IFREG || ifmt == S_IFLNK |
181 |
|
|
|| ifmt == S_IFDIR) { |
182 |
|
|
charct = sbuf.st_size; |
183 |
|
|
} else { |
184 |
✓✓ |
6 |
while ((len = read(fd, buf, MAXBSIZE)) > 0) |
185 |
|
3 |
charct += len; |
186 |
✗✓ |
3 |
if (len == -1) { |
187 |
|
|
warn("%s", file); |
188 |
|
|
rval = 1; |
189 |
|
|
} |
190 |
|
|
} |
191 |
|
|
} |
192 |
|
|
} |
193 |
|
|
} else { |
194 |
|
|
if (file == NULL) |
195 |
|
|
stream = stdin; |
196 |
|
|
else if ((stream = fdopen(fd, "r")) == NULL) { |
197 |
|
|
warn("%s", file); |
198 |
|
|
close(fd); |
199 |
|
|
rval = 1; |
200 |
|
|
return; |
201 |
|
|
} |
202 |
|
|
|
203 |
|
|
/* |
204 |
|
|
* Do it the hard way. |
205 |
|
|
* According to POSIX, a word is a "maximal string of |
206 |
|
|
* characters delimited by whitespace." Nothing is said |
207 |
|
|
* about a character being printing or non-printing. |
208 |
|
|
*/ |
209 |
|
|
gotsp = 1; |
210 |
|
|
while ((len = getline(&buf, &bufsz, stream)) > 0) { |
211 |
|
|
if (multibyte) { |
212 |
|
|
for (C = buf; *C != '\0'; C += len) { |
213 |
|
|
++charct; |
214 |
|
|
len = mbtowc(&wc, C, MB_CUR_MAX); |
215 |
|
|
if (len == -1) { |
216 |
|
|
(void)mbtowc(NULL, NULL, |
217 |
|
|
MB_CUR_MAX); |
218 |
|
|
len = 1; |
219 |
|
|
wc = L' '; |
220 |
|
|
} |
221 |
|
|
if (iswspace(wc)) { |
222 |
|
|
gotsp = 1; |
223 |
|
|
if (wc == L'\n') |
224 |
|
|
++linect; |
225 |
|
|
} else if (gotsp) { |
226 |
|
|
gotsp = 0; |
227 |
|
|
++wordct; |
228 |
|
|
} |
229 |
|
|
} |
230 |
|
|
} else { |
231 |
|
|
charct += len; |
232 |
|
|
for (C = buf; *C != '\0'; ++C) { |
233 |
|
|
if (isspace((unsigned char)*C)) { |
234 |
|
|
gotsp = 1; |
235 |
|
|
if (*C == '\n') |
236 |
|
|
++linect; |
237 |
|
|
} else if (gotsp) { |
238 |
|
|
gotsp = 0; |
239 |
|
|
++wordct; |
240 |
|
|
} |
241 |
|
|
} |
242 |
|
|
} |
243 |
|
|
} |
244 |
|
|
if (ferror(stream)) { |
245 |
|
|
warn("%s", file); |
246 |
|
|
rval = 1; |
247 |
|
|
} |
248 |
|
|
} |
249 |
|
|
|
250 |
|
6 |
print_counts(linect, wordct, charct, file); |
251 |
|
|
|
252 |
|
|
/* |
253 |
|
|
* Don't bother checking doline, doword, or dochar -- speeds |
254 |
|
|
* up the common case |
255 |
|
|
*/ |
256 |
|
6 |
tlinect += linect; |
257 |
|
6 |
twordct += wordct; |
258 |
|
6 |
tcharct += charct; |
259 |
|
|
|
260 |
✓✗✗✓
|
6 |
if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { |
261 |
|
|
warn("%s", file); |
262 |
|
|
rval = 1; |
263 |
|
|
} |
264 |
|
|
} |
265 |
|
|
|
266 |
|
|
void |
267 |
|
|
format_and_print(long long v) |
268 |
|
6 |
{ |
269 |
✗✓ |
6 |
if (humanchar) { |
270 |
|
|
char result[FMT_SCALED_STRSIZE]; |
271 |
|
|
|
272 |
|
|
(void)fmt_scaled(v, result); |
273 |
|
|
(void)printf("%7s", result); |
274 |
|
|
} else { |
275 |
|
6 |
(void)printf(" %7lld", v); |
276 |
|
|
} |
277 |
|
6 |
} |
278 |
|
|
|
279 |
|
|
void |
280 |
|
|
print_counts(int64_t lines, int64_t words, int64_t chars, char *name) |
281 |
|
6 |
{ |
282 |
✓✓ |
6 |
if (doline) |
283 |
|
3 |
format_and_print((long long)lines); |
284 |
✗✓ |
6 |
if (doword) |
285 |
|
|
format_and_print((long long)words); |
286 |
✓✓ |
6 |
if (dochar) |
287 |
|
3 |
format_and_print((long long)chars); |
288 |
|
|
|
289 |
✗✓ |
6 |
if (name) |
290 |
|
|
(void)printf(" %s\n", name); |
291 |
|
|
else |
292 |
|
6 |
(void)printf("\n"); |
293 |
|
6 |
} |