1 |
|
|
/* $OpenBSD: uniq.c,v 1.24 2015/12/19 10:21:01 schwarze Exp $ */ |
2 |
|
|
/* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ |
3 |
|
|
|
4 |
|
|
/* |
5 |
|
|
* Copyright (c) 1989, 1993 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* This code is derived from software contributed to Berkeley by |
9 |
|
|
* Case Larsen. |
10 |
|
|
* |
11 |
|
|
* Redistribution and use in source and binary forms, with or without |
12 |
|
|
* modification, are permitted provided that the following conditions |
13 |
|
|
* are met: |
14 |
|
|
* 1. Redistributions of source code must retain the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer. |
16 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer in the |
18 |
|
|
* documentation and/or other materials provided with the distribution. |
19 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
20 |
|
|
* may be used to endorse or promote products derived from this software |
21 |
|
|
* without specific prior written permission. |
22 |
|
|
* |
23 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 |
|
|
* SUCH DAMAGE. |
34 |
|
|
*/ |
35 |
|
|
|
36 |
|
|
#include <ctype.h> |
37 |
|
|
#include <err.h> |
38 |
|
|
#include <errno.h> |
39 |
|
|
#include <limits.h> |
40 |
|
|
#include <locale.h> |
41 |
|
|
#include <stdio.h> |
42 |
|
|
#include <stdlib.h> |
43 |
|
|
#include <string.h> |
44 |
|
|
#include <unistd.h> |
45 |
|
|
#include <wchar.h> |
46 |
|
|
#include <wctype.h> |
47 |
|
|
|
48 |
|
|
#define MAXLINELEN (8 * 1024) |
49 |
|
|
|
50 |
|
|
int cflag, dflag, uflag; |
51 |
|
|
int numchars, numfields, repeats; |
52 |
|
|
|
53 |
|
|
FILE *file(char *, char *); |
54 |
|
|
void show(FILE *, char *); |
55 |
|
|
char *skip(char *); |
56 |
|
|
void obsolete(char *[]); |
57 |
|
|
__dead void usage(void); |
58 |
|
|
|
59 |
|
|
int |
60 |
|
|
main(int argc, char *argv[]) |
61 |
|
|
{ |
62 |
|
|
char *t1, *t2; |
63 |
|
|
FILE *ifp = NULL, *ofp = NULL; |
64 |
|
|
int ch; |
65 |
|
|
char *prevline, *thisline; |
66 |
|
|
|
67 |
|
|
setlocale(LC_CTYPE, ""); |
68 |
|
|
|
69 |
|
|
if (pledge("stdio rpath wpath cpath flock", NULL) == -1) |
70 |
|
|
err(1, "pledge"); |
71 |
|
|
|
72 |
|
|
obsolete(argv); |
73 |
|
|
while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) { |
74 |
|
|
const char *errstr; |
75 |
|
|
|
76 |
|
|
switch (ch) { |
77 |
|
|
case 'c': |
78 |
|
|
cflag = 1; |
79 |
|
|
break; |
80 |
|
|
case 'd': |
81 |
|
|
dflag = 1; |
82 |
|
|
break; |
83 |
|
|
case 'f': |
84 |
|
|
numfields = (int)strtonum(optarg, 0, INT_MAX, |
85 |
|
|
&errstr); |
86 |
|
|
if (errstr) |
87 |
|
|
errx(1, "field skip value is %s: %s", |
88 |
|
|
errstr, optarg); |
89 |
|
|
break; |
90 |
|
|
case 's': |
91 |
|
|
numchars = (int)strtonum(optarg, 0, INT_MAX, |
92 |
|
|
&errstr); |
93 |
|
|
if (errstr) |
94 |
|
|
errx(1, |
95 |
|
|
"character skip value is %s: %s", |
96 |
|
|
errstr, optarg); |
97 |
|
|
break; |
98 |
|
|
case 'u': |
99 |
|
|
uflag = 1; |
100 |
|
|
break; |
101 |
|
|
default: |
102 |
|
|
usage(); |
103 |
|
|
} |
104 |
|
|
} |
105 |
|
|
|
106 |
|
|
argc -= optind; |
107 |
|
|
argv += optind; |
108 |
|
|
|
109 |
|
|
/* If neither -d nor -u are set, default is -d -u. */ |
110 |
|
|
if (!dflag && !uflag) |
111 |
|
|
dflag = uflag = 1; |
112 |
|
|
|
113 |
|
|
switch(argc) { |
114 |
|
|
case 0: |
115 |
|
|
ifp = stdin; |
116 |
|
|
ofp = stdout; |
117 |
|
|
break; |
118 |
|
|
case 1: |
119 |
|
|
ifp = file(argv[0], "r"); |
120 |
|
|
ofp = stdout; |
121 |
|
|
break; |
122 |
|
|
case 2: |
123 |
|
|
ifp = file(argv[0], "r"); |
124 |
|
|
ofp = file(argv[1], "w"); |
125 |
|
|
break; |
126 |
|
|
default: |
127 |
|
|
usage(); |
128 |
|
|
} |
129 |
|
|
|
130 |
|
|
if (pledge("stdio flock rpath cpath wpath", NULL) == -1) |
131 |
|
|
err(1, "pledge"); |
132 |
|
|
|
133 |
|
|
prevline = malloc(MAXLINELEN); |
134 |
|
|
thisline = malloc(MAXLINELEN); |
135 |
|
|
if (prevline == NULL || thisline == NULL) |
136 |
|
|
err(1, "malloc"); |
137 |
|
|
|
138 |
|
|
if (fgets(prevline, MAXLINELEN, ifp) == NULL) |
139 |
|
|
exit(0); |
140 |
|
|
|
141 |
|
|
while (fgets(thisline, MAXLINELEN, ifp)) { |
142 |
|
|
/* If requested get the chosen fields + character offsets. */ |
143 |
|
|
if (numfields || numchars) { |
144 |
|
|
t1 = skip(thisline); |
145 |
|
|
t2 = skip(prevline); |
146 |
|
|
} else { |
147 |
|
|
t1 = thisline; |
148 |
|
|
t2 = prevline; |
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
/* If different, print; set previous to new value. */ |
152 |
|
|
if (strcmp(t1, t2)) { |
153 |
|
|
show(ofp, prevline); |
154 |
|
|
t1 = prevline; |
155 |
|
|
prevline = thisline; |
156 |
|
|
thisline = t1; |
157 |
|
|
repeats = 0; |
158 |
|
|
} else |
159 |
|
|
++repeats; |
160 |
|
|
} |
161 |
|
|
show(ofp, prevline); |
162 |
|
|
exit(0); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
/* |
166 |
|
|
* show -- |
167 |
|
|
* Output a line depending on the flags and number of repetitions |
168 |
|
|
* of the line. |
169 |
|
|
*/ |
170 |
|
|
void |
171 |
|
|
show(FILE *ofp, char *str) |
172 |
|
|
{ |
173 |
|
|
if ((dflag && repeats) || (uflag && !repeats)) { |
174 |
|
|
if (cflag) |
175 |
|
|
(void)fprintf(ofp, "%4d %s", repeats + 1, str); |
176 |
|
|
else |
177 |
|
|
(void)fprintf(ofp, "%s", str); |
178 |
|
|
} |
179 |
|
|
} |
180 |
|
|
|
181 |
|
|
char * |
182 |
|
|
skip(char *str) |
183 |
|
|
{ |
184 |
|
|
wchar_t wc; |
185 |
|
|
int nchars, nfields; |
186 |
|
|
int len; |
187 |
|
|
int field_started; |
188 |
|
|
|
189 |
|
|
for (nfields = numfields; nfields && *str; nfields--) { |
190 |
|
|
/* Skip one field, including preceding blanks. */ |
191 |
|
|
for (field_started = 0; *str != '\0'; str += len) { |
192 |
|
|
if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { |
193 |
|
|
(void)mbtowc(NULL, NULL, MB_CUR_MAX); |
194 |
|
|
wc = L'?'; |
195 |
|
|
len = 1; |
196 |
|
|
} |
197 |
|
|
if (iswblank(wc)) { |
198 |
|
|
if (field_started) |
199 |
|
|
break; |
200 |
|
|
} else |
201 |
|
|
field_started = 1; |
202 |
|
|
} |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
/* Skip some additional characters. */ |
206 |
|
|
for (nchars = numchars; nchars-- && *str != '\0'; str += len) |
207 |
|
|
if ((len = mblen(str, MB_CUR_MAX)) == -1) |
208 |
|
|
len = 1; |
209 |
|
|
|
210 |
|
|
return (str); |
211 |
|
|
} |
212 |
|
|
|
213 |
|
|
FILE * |
214 |
|
|
file(char *name, char *mode) |
215 |
|
|
{ |
216 |
|
|
FILE *fp; |
217 |
|
|
|
218 |
|
|
if (strcmp(name, "-") == 0) |
219 |
|
|
return(*mode == 'r' ? stdin : stdout); |
220 |
|
|
if ((fp = fopen(name, mode)) == NULL) |
221 |
|
|
err(1, "%s", name); |
222 |
|
|
return (fp); |
223 |
|
|
} |
224 |
|
|
|
225 |
|
|
void |
226 |
|
|
obsolete(char *argv[]) |
227 |
|
|
{ |
228 |
|
|
size_t len; |
229 |
|
|
char *ap, *p, *start; |
230 |
|
|
|
231 |
|
|
while ((ap = *++argv)) { |
232 |
|
|
/* Return if "--" or not an option of any form. */ |
233 |
|
|
if (ap[0] != '-') { |
234 |
|
|
if (ap[0] != '+') |
235 |
|
|
return; |
236 |
|
|
} else if (ap[1] == '-') |
237 |
|
|
return; |
238 |
|
|
if (!isdigit((unsigned char)ap[1])) |
239 |
|
|
continue; |
240 |
|
|
/* |
241 |
|
|
* Digit signifies an old-style option. Malloc space for dash, |
242 |
|
|
* new option and argument. |
243 |
|
|
*/ |
244 |
|
|
len = strlen(ap) + 3; |
245 |
|
|
if ((start = p = malloc(len)) == NULL) |
246 |
|
|
err(1, "malloc"); |
247 |
|
|
*p++ = '-'; |
248 |
|
|
*p++ = ap[0] == '+' ? 's' : 'f'; |
249 |
|
|
(void)strlcpy(p, ap + 1, len - 2); |
250 |
|
|
*argv = start; |
251 |
|
|
} |
252 |
|
|
} |
253 |
|
|
|
254 |
|
|
__dead void |
255 |
|
|
usage(void) |
256 |
|
|
{ |
257 |
|
|
extern char *__progname; |
258 |
|
|
|
259 |
|
|
(void)fprintf(stderr, |
260 |
|
|
"usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n", |
261 |
|
|
__progname); |
262 |
|
|
exit(1); |
263 |
|
|
} |