1 |
|
|
/* $OpenBSD: join.c,v 1.27 2015/10/09 01:37:07 deraadt Exp $ */ |
2 |
|
|
|
3 |
|
|
/*- |
4 |
|
|
* Copyright (c) 1991, 1993, 1994 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* This code is derived from software contributed to Berkeley by |
8 |
|
|
* Steve Hayman of the Computer Science Department, Indiana University, |
9 |
|
|
* Michiro Hikida and David Goodenough. |
10 |
|
|
* |
11 |
|
|
* Redistribution and use in source and binary forms, with or without |
12 |
|
|
* modification, are permitted provided that the following conditions |
13 |
|
|
* are met: |
14 |
|
|
* 1. Redistributions of source code must retain the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer. |
16 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer in the |
18 |
|
|
* documentation and/or other materials provided with the distribution. |
19 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
20 |
|
|
* may be used to endorse or promote products derived from this software |
21 |
|
|
* without specific prior written permission. |
22 |
|
|
* |
23 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 |
|
|
* SUCH DAMAGE. |
34 |
|
|
*/ |
35 |
|
|
|
36 |
|
|
#include <err.h> |
37 |
|
|
#include <stdio.h> |
38 |
|
|
#include <stdlib.h> |
39 |
|
|
#include <string.h> |
40 |
|
|
#include <unistd.h> |
41 |
|
|
|
42 |
|
|
#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) |
43 |
|
|
|
44 |
|
|
/* |
45 |
|
|
* There's a structure per input file which encapsulates the state of the |
46 |
|
|
* file. We repeatedly read lines from each file until we've read in all |
47 |
|
|
* the consecutive lines from the file with a common join field. Then we |
48 |
|
|
* compare the set of lines with an equivalent set from the other file. |
49 |
|
|
*/ |
50 |
|
|
typedef struct { |
51 |
|
|
char *line; /* line */ |
52 |
|
|
u_long linealloc; /* line allocated count */ |
53 |
|
|
char **fields; /* line field(s) */ |
54 |
|
|
u_long fieldcnt; /* line field(s) count */ |
55 |
|
|
u_long fieldalloc; /* line field(s) allocated count */ |
56 |
|
|
u_long cfieldc; /* current field count */ |
57 |
|
|
long fpos; /* fpos of start of field */ |
58 |
|
|
} LINE; |
59 |
|
|
|
60 |
|
|
typedef struct { |
61 |
|
|
FILE *fp; /* file descriptor */ |
62 |
|
|
u_long joinf; /* join field (-1, -2, -j) */ |
63 |
|
|
int unpair; /* output unpairable lines (-a) */ |
64 |
|
|
u_long number; /* 1 for file 1, 2 for file 2 */ |
65 |
|
|
LINE *set; /* set of lines with same field */ |
66 |
|
|
int pushbool; /* if pushback is set */ |
67 |
|
|
u_long pushback; /* line on the stack */ |
68 |
|
|
u_long setcnt; /* set count */ |
69 |
|
|
u_long setalloc; /* set allocated count */ |
70 |
|
|
u_long setusedc; /* sets used */ |
71 |
|
|
} INPUT; |
72 |
|
|
INPUT input1 = { NULL, 0, 0, 1, NULL, 0, 0, 0, 0, 0 }, |
73 |
|
|
input2 = { NULL, 0, 0, 2, NULL, 0, 0, 0, 0, 0 }; |
74 |
|
|
|
75 |
|
|
typedef struct { |
76 |
|
|
u_long filenum; /* file number */ |
77 |
|
|
u_long fieldno; /* field number */ |
78 |
|
|
} OLIST; |
79 |
|
|
OLIST *olist; /* output field list */ |
80 |
|
|
u_long olistcnt; /* output field list count */ |
81 |
|
|
u_long olistalloc; /* output field allocated count */ |
82 |
|
|
|
83 |
|
|
int joinout = 1; /* show lines with matched join fields (-v) */ |
84 |
|
|
int needsep; /* need separator character */ |
85 |
|
|
int spans = 1; /* span multiple delimiters (-t) */ |
86 |
|
|
char *empty; /* empty field replacement string (-e) */ |
87 |
|
|
char *tabchar = " \t"; /* delimiter characters (-t) */ |
88 |
|
|
|
89 |
|
|
int cmp(LINE *, u_long, LINE *, u_long); |
90 |
|
|
void fieldarg(char *); |
91 |
|
|
void joinlines(INPUT *, INPUT *); |
92 |
|
|
void obsolete(char **); |
93 |
|
|
void outfield(LINE *, u_long, int); |
94 |
|
|
void outoneline(INPUT *, LINE *); |
95 |
|
|
void outtwoline(INPUT *, LINE *, INPUT *, LINE *); |
96 |
|
|
void slurp(INPUT *); |
97 |
|
|
void slurpit(INPUT *); |
98 |
|
|
void usage(void); |
99 |
|
|
|
100 |
|
|
int |
101 |
|
|
main(int argc, char *argv[]) |
102 |
|
|
{ |
103 |
|
|
INPUT *F1, *F2; |
104 |
|
|
int aflag, ch, cval, vflag; |
105 |
|
|
char *end; |
106 |
|
|
|
107 |
|
|
if (pledge("stdio rpath wpath cpath", NULL) == -1) |
108 |
|
|
err(1, "pledge"); |
109 |
|
|
|
110 |
|
|
F1 = &input1; |
111 |
|
|
F2 = &input2; |
112 |
|
|
|
113 |
|
|
aflag = vflag = 0; |
114 |
|
|
obsolete(argv); |
115 |
|
|
while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) { |
116 |
|
|
switch (ch) { |
117 |
|
|
case '\01': /* See comment in obsolete(). */ |
118 |
|
|
aflag = 1; |
119 |
|
|
F1->unpair = F2->unpair = 1; |
120 |
|
|
break; |
121 |
|
|
case '1': |
122 |
|
|
if ((F1->joinf = strtol(optarg, &end, 10)) < 1) |
123 |
|
|
errx(1, "-1 option field number less than 1"); |
124 |
|
|
if (*end) |
125 |
|
|
errx(1, "illegal field number -- %s", optarg); |
126 |
|
|
--F1->joinf; |
127 |
|
|
break; |
128 |
|
|
case '2': |
129 |
|
|
if ((F2->joinf = strtol(optarg, &end, 10)) < 1) |
130 |
|
|
errx(1, "-2 option field number less than 1"); |
131 |
|
|
if (*end) |
132 |
|
|
errx(1, "illegal field number -- %s", optarg); |
133 |
|
|
--F2->joinf; |
134 |
|
|
break; |
135 |
|
|
case 'a': |
136 |
|
|
aflag = 1; |
137 |
|
|
switch(strtol(optarg, &end, 10)) { |
138 |
|
|
case 1: |
139 |
|
|
F1->unpair = 1; |
140 |
|
|
break; |
141 |
|
|
case 2: |
142 |
|
|
F2->unpair = 1; |
143 |
|
|
break; |
144 |
|
|
default: |
145 |
|
|
errx(1, "-a option file number not 1 or 2"); |
146 |
|
|
break; |
147 |
|
|
} |
148 |
|
|
if (*end) |
149 |
|
|
errx(1, "illegal file number -- %s", optarg); |
150 |
|
|
break; |
151 |
|
|
case 'e': |
152 |
|
|
empty = optarg; |
153 |
|
|
break; |
154 |
|
|
case 'j': |
155 |
|
|
if ((F1->joinf = F2->joinf = strtol(optarg, &end, 10)) < 1) |
156 |
|
|
errx(1, "-j option field number less than 1"); |
157 |
|
|
if (*end) |
158 |
|
|
errx(1, "illegal field number -- %s", optarg); |
159 |
|
|
--F1->joinf; |
160 |
|
|
--F2->joinf; |
161 |
|
|
break; |
162 |
|
|
case 'o': |
163 |
|
|
fieldarg(optarg); |
164 |
|
|
break; |
165 |
|
|
case 't': |
166 |
|
|
spans = 0; |
167 |
|
|
if (strlen(tabchar = optarg) != 1) |
168 |
|
|
errx(1, "illegal tab character specification"); |
169 |
|
|
break; |
170 |
|
|
case 'v': |
171 |
|
|
vflag = 1; |
172 |
|
|
joinout = 0; |
173 |
|
|
switch (strtol(optarg, &end, 10)) { |
174 |
|
|
case 1: |
175 |
|
|
F1->unpair = 1; |
176 |
|
|
break; |
177 |
|
|
case 2: |
178 |
|
|
F2->unpair = 1; |
179 |
|
|
break; |
180 |
|
|
default: |
181 |
|
|
errx(1, "-v option file number not 1 or 2"); |
182 |
|
|
break; |
183 |
|
|
} |
184 |
|
|
if (*end) |
185 |
|
|
errx(1, "illegal file number -- %s", optarg); |
186 |
|
|
break; |
187 |
|
|
case '?': |
188 |
|
|
default: |
189 |
|
|
usage(); |
190 |
|
|
} |
191 |
|
|
} |
192 |
|
|
argc -= optind; |
193 |
|
|
argv += optind; |
194 |
|
|
|
195 |
|
|
if (aflag && vflag) |
196 |
|
|
errx(1, "the -a and -v options are mutually exclusive"); |
197 |
|
|
|
198 |
|
|
if (argc != 2) |
199 |
|
|
usage(); |
200 |
|
|
|
201 |
|
|
/* Open the files; "-" means stdin. */ |
202 |
|
|
if (!strcmp(*argv, "-")) |
203 |
|
|
F1->fp = stdin; |
204 |
|
|
else if ((F1->fp = fopen(*argv, "r")) == NULL) |
205 |
|
|
err(1, "%s", *argv); |
206 |
|
|
++argv; |
207 |
|
|
if (!strcmp(*argv, "-")) |
208 |
|
|
F2->fp = stdin; |
209 |
|
|
else if ((F2->fp = fopen(*argv, "r")) == NULL) |
210 |
|
|
err(1, "%s", *argv); |
211 |
|
|
if (F1->fp == stdin && F2->fp == stdin) |
212 |
|
|
errx(1, "only one input file may be stdin"); |
213 |
|
|
|
214 |
|
|
if (pledge("stdio wpath cpath rpath", NULL) == -1) |
215 |
|
|
err(1, "pledge"); |
216 |
|
|
|
217 |
|
|
F1->setusedc = 0; |
218 |
|
|
F2->setusedc = 0; |
219 |
|
|
slurp(F1); |
220 |
|
|
slurp(F2); |
221 |
|
|
F1->set->cfieldc = 0; |
222 |
|
|
F2->set->cfieldc = 0; |
223 |
|
|
|
224 |
|
|
/* |
225 |
|
|
* We try to let the files have the same field value, advancing |
226 |
|
|
* whoever falls behind and always advancing the file(s) we output |
227 |
|
|
* from. |
228 |
|
|
*/ |
229 |
|
|
while (F1->setcnt && F2->setcnt) { |
230 |
|
|
cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf); |
231 |
|
|
if (cval == 0) { |
232 |
|
|
/* Oh joy, oh rapture, oh beauty divine! */ |
233 |
|
|
if (joinout) |
234 |
|
|
joinlines(F1, F2); |
235 |
|
|
slurp(F1); |
236 |
|
|
slurp(F2); |
237 |
|
|
} |
238 |
|
|
else { |
239 |
|
|
if (F1->unpair |
240 |
|
|
&& (cval < 0 || F2->set->cfieldc == F2->setusedc -1)) { |
241 |
|
|
joinlines(F1, NULL); |
242 |
|
|
slurp(F1); |
243 |
|
|
} |
244 |
|
|
else if (cval < 0) |
245 |
|
|
/* File 1 takes the lead... */ |
246 |
|
|
slurp(F1); |
247 |
|
|
if (F2->unpair |
248 |
|
|
&& (cval > 0 || F1->set->cfieldc == F1->setusedc -1)) { |
249 |
|
|
joinlines(F2, NULL); |
250 |
|
|
slurp(F2); |
251 |
|
|
} |
252 |
|
|
else if (cval > 0) |
253 |
|
|
/* File 2 takes the lead... */ |
254 |
|
|
slurp(F2); |
255 |
|
|
} |
256 |
|
|
} |
257 |
|
|
|
258 |
|
|
/* |
259 |
|
|
* Now that one of the files is used up, optionally output any |
260 |
|
|
* remaining lines from the other file. |
261 |
|
|
*/ |
262 |
|
|
if (F1->unpair) |
263 |
|
|
while (F1->setcnt) { |
264 |
|
|
joinlines(F1, NULL); |
265 |
|
|
slurp(F1); |
266 |
|
|
} |
267 |
|
|
if (F2->unpair) |
268 |
|
|
while (F2->setcnt) { |
269 |
|
|
joinlines(F2, NULL); |
270 |
|
|
slurp(F2); |
271 |
|
|
} |
272 |
|
|
|
273 |
|
|
return 0; |
274 |
|
|
} |
275 |
|
|
|
276 |
|
|
/* wrapper around slurpit() to keep track of what field we are on */ |
277 |
|
|
void slurp(INPUT *F) |
278 |
|
|
{ |
279 |
|
|
long fpos; |
280 |
|
|
u_long cfieldc; |
281 |
|
|
|
282 |
|
|
if (F->set == NULL) { |
283 |
|
|
fpos = 0; |
284 |
|
|
cfieldc = 0; |
285 |
|
|
} |
286 |
|
|
else { |
287 |
|
|
fpos = F->set->fpos; |
288 |
|
|
cfieldc = F->set->cfieldc; |
289 |
|
|
} |
290 |
|
|
slurpit(F); |
291 |
|
|
if (F->set == NULL) |
292 |
|
|
return; |
293 |
|
|
else if (fpos != F->set->fpos) |
294 |
|
|
F->set->cfieldc = cfieldc+1; |
295 |
|
|
} |
296 |
|
|
|
297 |
|
|
void |
298 |
|
|
slurpit(INPUT *F) |
299 |
|
|
{ |
300 |
|
|
LINE *lp, *lastlp, tmp; |
301 |
|
|
size_t len; |
302 |
|
|
u_long cnt; |
303 |
|
|
char *bp, *fieldp; |
304 |
|
|
long fpos; |
305 |
|
|
/* |
306 |
|
|
* Read all of the lines from an input file that have the same |
307 |
|
|
* join field. |
308 |
|
|
*/ |
309 |
|
|
|
310 |
|
|
F->setcnt = 0; |
311 |
|
|
for (lastlp = NULL; ; ++F->setcnt, lastlp = lp) { |
312 |
|
|
/* |
313 |
|
|
* If we're out of space to hold line structures, allocate |
314 |
|
|
* more. Initialize the structure so that we know that this |
315 |
|
|
* is new space. |
316 |
|
|
*/ |
317 |
|
|
if (F->setcnt == F->setalloc) { |
318 |
|
|
LINE *p; |
319 |
|
|
u_long newsize = F->setalloc + 50; |
320 |
|
|
cnt = F->setalloc; |
321 |
|
|
if ((p = reallocarray(F->set, newsize, sizeof(LINE))) |
322 |
|
|
== NULL) |
323 |
|
|
err(1, NULL); |
324 |
|
|
F->set = p; |
325 |
|
|
F->setalloc = newsize; |
326 |
|
|
memset(F->set + cnt, 0, 50 * sizeof(LINE)); |
327 |
|
|
/* re-set lastlp in case it moved */ |
328 |
|
|
if (lastlp != NULL) |
329 |
|
|
lastlp = &F->set[F->setcnt - 1]; |
330 |
|
|
} |
331 |
|
|
/* |
332 |
|
|
* Get any pushed back line, else get the next line. Allocate |
333 |
|
|
* space as necessary. If taking the line from the stack swap |
334 |
|
|
* the two structures so that we don't lose space allocated to |
335 |
|
|
* either structure. This could be avoided by doing another |
336 |
|
|
* level of indirection, but it's probably okay as is. |
337 |
|
|
*/ |
338 |
|
|
lp = &F->set[F->setcnt]; |
339 |
|
|
if (F->pushbool) { |
340 |
|
|
tmp = F->set[F->setcnt]; |
341 |
|
|
F->set[F->setcnt] = F->set[F->pushback]; |
342 |
|
|
F->set[F->pushback] = tmp; |
343 |
|
|
F->pushbool = 0; |
344 |
|
|
continue; |
345 |
|
|
} |
346 |
|
|
if ((bp = fgetln(F->fp, &len)) == NULL) |
347 |
|
|
return; |
348 |
|
|
/* |
349 |
|
|
* we depend on knowing on what field we are, one safe way is |
350 |
|
|
* the file position. |
351 |
|
|
*/ |
352 |
|
|
fpos = ftell(F->fp) - len; |
353 |
|
|
if (lp->linealloc <= len + 1) { |
354 |
|
|
char *p; |
355 |
|
|
u_long newsize = lp->linealloc + |
356 |
|
|
MAXIMUM(100, len + 1 - lp->linealloc); |
357 |
|
|
if ((p = realloc(lp->line, newsize)) == NULL) |
358 |
|
|
err(1, NULL); |
359 |
|
|
lp->line = p; |
360 |
|
|
lp->linealloc = newsize; |
361 |
|
|
} |
362 |
|
|
F->setusedc++; |
363 |
|
|
memmove(lp->line, bp, len); |
364 |
|
|
lp->fpos = fpos; |
365 |
|
|
/* Replace trailing newline, if it exists. */ |
366 |
|
|
if (bp[len - 1] == '\n') |
367 |
|
|
lp->line[len - 1] = '\0'; |
368 |
|
|
else |
369 |
|
|
lp->line[len] = '\0'; |
370 |
|
|
bp = lp->line; |
371 |
|
|
|
372 |
|
|
/* Split the line into fields, allocate space as necessary. */ |
373 |
|
|
lp->fieldcnt = 0; |
374 |
|
|
while ((fieldp = strsep(&bp, tabchar)) != NULL) { |
375 |
|
|
if (spans && *fieldp == '\0') |
376 |
|
|
continue; |
377 |
|
|
if (lp->fieldcnt == lp->fieldalloc) { |
378 |
|
|
char **p; |
379 |
|
|
u_long newsize = lp->fieldalloc + 50; |
380 |
|
|
if ((p = reallocarray(lp->fields, newsize, |
381 |
|
|
sizeof(char *))) == NULL) |
382 |
|
|
err(1, NULL); |
383 |
|
|
lp->fields = p; |
384 |
|
|
lp->fieldalloc = newsize; |
385 |
|
|
} |
386 |
|
|
lp->fields[lp->fieldcnt++] = fieldp; |
387 |
|
|
} |
388 |
|
|
|
389 |
|
|
/* See if the join field value has changed. */ |
390 |
|
|
if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) { |
391 |
|
|
F->pushbool = 1; |
392 |
|
|
F->pushback = F->setcnt; |
393 |
|
|
break; |
394 |
|
|
} |
395 |
|
|
} |
396 |
|
|
} |
397 |
|
|
|
398 |
|
|
int |
399 |
|
|
cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2) |
400 |
|
|
{ |
401 |
|
|
if (lp1->fieldcnt <= fieldno1) |
402 |
|
|
return (-1); |
403 |
|
|
else if (lp2->fieldcnt <= fieldno2) |
404 |
|
|
return (1); |
405 |
|
|
return (strcmp(lp1->fields[fieldno1], lp2->fields[fieldno2])); |
406 |
|
|
} |
407 |
|
|
|
408 |
|
|
void |
409 |
|
|
joinlines(INPUT *F1, INPUT *F2) |
410 |
|
|
{ |
411 |
|
|
u_long cnt1, cnt2; |
412 |
|
|
|
413 |
|
|
/* |
414 |
|
|
* Output the results of a join comparison. The output may be from |
415 |
|
|
* either file 1 or file 2 (in which case the first argument is the |
416 |
|
|
* file from which to output) or from both. |
417 |
|
|
*/ |
418 |
|
|
if (F2 == NULL) { |
419 |
|
|
for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1) |
420 |
|
|
outoneline(F1, &F1->set[cnt1]); |
421 |
|
|
return; |
422 |
|
|
} |
423 |
|
|
for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1) |
424 |
|
|
for (cnt2 = 0; cnt2 < F2->setcnt; ++cnt2) |
425 |
|
|
outtwoline(F1, &F1->set[cnt1], F2, &F2->set[cnt2]); |
426 |
|
|
} |
427 |
|
|
|
428 |
|
|
void |
429 |
|
|
outoneline(INPUT *F, LINE *lp) |
430 |
|
|
{ |
431 |
|
|
u_long cnt; |
432 |
|
|
|
433 |
|
|
/* |
434 |
|
|
* Output a single line from one of the files, according to the |
435 |
|
|
* join rules. This happens when we are writing unmatched single |
436 |
|
|
* lines. Output empty fields in the right places. |
437 |
|
|
*/ |
438 |
|
|
if (olist) |
439 |
|
|
for (cnt = 0; cnt < olistcnt; ++cnt) { |
440 |
|
|
if (olist[cnt].filenum == F->number) |
441 |
|
|
outfield(lp, olist[cnt].fieldno, 0); |
442 |
|
|
else if (olist[cnt].filenum == 0) |
443 |
|
|
outfield(lp, F->joinf, 0); |
444 |
|
|
else |
445 |
|
|
outfield(lp, 0, 1); |
446 |
|
|
} |
447 |
|
|
else { |
448 |
|
|
/* |
449 |
|
|
* Output the join field, then the remaining fields from F |
450 |
|
|
*/ |
451 |
|
|
outfield(lp, F->joinf, 0); |
452 |
|
|
for (cnt = 0; cnt < lp->fieldcnt; ++cnt) |
453 |
|
|
if (F->joinf != cnt) |
454 |
|
|
outfield(lp, cnt, 0); |
455 |
|
|
} |
456 |
|
|
|
457 |
|
|
putchar('\n'); |
458 |
|
|
if (ferror(stdout)) |
459 |
|
|
err(1, "stdout"); |
460 |
|
|
needsep = 0; |
461 |
|
|
} |
462 |
|
|
|
463 |
|
|
void |
464 |
|
|
outtwoline(INPUT *F1, LINE *lp1, INPUT *F2, LINE *lp2) |
465 |
|
|
{ |
466 |
|
|
u_long cnt; |
467 |
|
|
|
468 |
|
|
/* Output a pair of lines according to the join list (if any). */ |
469 |
|
|
if (olist) { |
470 |
|
|
for (cnt = 0; cnt < olistcnt; ++cnt) |
471 |
|
|
if (olist[cnt].filenum == 0) { |
472 |
|
|
if (lp1->fieldcnt >= F1->joinf) |
473 |
|
|
outfield(lp1, F1->joinf, 0); |
474 |
|
|
else |
475 |
|
|
outfield(lp2, F2->joinf, 0); |
476 |
|
|
} else if (olist[cnt].filenum == 1) |
477 |
|
|
outfield(lp1, olist[cnt].fieldno, 0); |
478 |
|
|
else /* if (olist[cnt].filenum == 2) */ |
479 |
|
|
outfield(lp2, olist[cnt].fieldno, 0); |
480 |
|
|
} else { |
481 |
|
|
/* |
482 |
|
|
* Output the join field, then the remaining fields from F1 |
483 |
|
|
* and F2. |
484 |
|
|
*/ |
485 |
|
|
outfield(lp1, F1->joinf, 0); |
486 |
|
|
for (cnt = 0; cnt < lp1->fieldcnt; ++cnt) |
487 |
|
|
if (F1->joinf != cnt) |
488 |
|
|
outfield(lp1, cnt, 0); |
489 |
|
|
for (cnt = 0; cnt < lp2->fieldcnt; ++cnt) |
490 |
|
|
if (F2->joinf != cnt) |
491 |
|
|
outfield(lp2, cnt, 0); |
492 |
|
|
} |
493 |
|
|
putchar('\n'); |
494 |
|
|
if (ferror(stdout)) |
495 |
|
|
err(1, "stdout"); |
496 |
|
|
needsep = 0; |
497 |
|
|
} |
498 |
|
|
|
499 |
|
|
void |
500 |
|
|
outfield(LINE *lp, u_long fieldno, int out_empty) |
501 |
|
|
{ |
502 |
|
|
if (needsep++) |
503 |
|
|
putchar((int)*tabchar); |
504 |
|
|
if (!ferror(stdout)) { |
505 |
|
|
if (lp->fieldcnt <= fieldno || out_empty) { |
506 |
|
|
if (empty != NULL) |
507 |
|
|
fputs(empty, stdout); |
508 |
|
|
} else { |
509 |
|
|
if (*lp->fields[fieldno] == '\0') |
510 |
|
|
return; |
511 |
|
|
fputs(lp->fields[fieldno], stdout); |
512 |
|
|
} |
513 |
|
|
} |
514 |
|
|
if (ferror(stdout)) |
515 |
|
|
err(1, "stdout"); |
516 |
|
|
} |
517 |
|
|
|
518 |
|
|
/* |
519 |
|
|
* Convert an output list argument "2.1, 1.3, 2.4" into an array of output |
520 |
|
|
* fields. |
521 |
|
|
*/ |
522 |
|
|
void |
523 |
|
|
fieldarg(char *option) |
524 |
|
|
{ |
525 |
|
|
u_long fieldno, filenum; |
526 |
|
|
char *end, *token; |
527 |
|
|
|
528 |
|
|
while ((token = strsep(&option, ", \t")) != NULL) { |
529 |
|
|
if (*token == '\0') |
530 |
|
|
continue; |
531 |
|
|
if (token[0] == '0') |
532 |
|
|
filenum = fieldno = 0; |
533 |
|
|
else if ((token[0] == '1' || token[0] == '2') && |
534 |
|
|
token[1] == '.') { |
535 |
|
|
filenum = token[0] - '0'; |
536 |
|
|
fieldno = strtol(token + 2, &end, 10); |
537 |
|
|
if (*end) |
538 |
|
|
errx(1, "malformed -o option field"); |
539 |
|
|
if (fieldno == 0) |
540 |
|
|
errx(1, "field numbers are 1 based"); |
541 |
|
|
--fieldno; |
542 |
|
|
} else |
543 |
|
|
errx(1, "malformed -o option field"); |
544 |
|
|
if (olistcnt == olistalloc) { |
545 |
|
|
OLIST *p; |
546 |
|
|
u_long newsize = olistalloc + 50; |
547 |
|
|
if ((p = reallocarray(olist, newsize, sizeof(OLIST))) |
548 |
|
|
== NULL) |
549 |
|
|
err(1, NULL); |
550 |
|
|
olist = p; |
551 |
|
|
olistalloc = newsize; |
552 |
|
|
} |
553 |
|
|
olist[olistcnt].filenum = filenum; |
554 |
|
|
olist[olistcnt].fieldno = fieldno; |
555 |
|
|
++olistcnt; |
556 |
|
|
} |
557 |
|
|
} |
558 |
|
|
|
559 |
|
|
void |
560 |
|
|
obsolete(char **argv) |
561 |
|
|
{ |
562 |
|
|
size_t len; |
563 |
|
|
char **p, *ap, *t; |
564 |
|
|
|
565 |
|
|
while ((ap = *++argv) != NULL) { |
566 |
|
|
/* Return if "--". */ |
567 |
|
|
if (ap[0] == '-' && ap[1] == '-') |
568 |
|
|
return; |
569 |
|
|
/* skip if not an option */ |
570 |
|
|
if (ap[0] != '-') |
571 |
|
|
continue; |
572 |
|
|
switch (ap[1]) { |
573 |
|
|
case 'a': |
574 |
|
|
/* |
575 |
|
|
* The original join allowed "-a", which meant the |
576 |
|
|
* same as -a1 plus -a2. POSIX 1003.2, Draft 11.2 |
577 |
|
|
* only specifies this as "-a 1" and "a -2", so we |
578 |
|
|
* have to use another option flag, one that is |
579 |
|
|
* unlikely to ever be used or accidentally entered |
580 |
|
|
* on the command line. (Well, we could reallocate |
581 |
|
|
* the argv array, but that hardly seems worthwhile.) |
582 |
|
|
*/ |
583 |
|
|
if (ap[2] == '\0' && (argv[1] == NULL || |
584 |
|
|
(strcmp(argv[1], "1") != 0 && |
585 |
|
|
strcmp(argv[1], "2") != 0))) { |
586 |
|
|
ap[1] = '\01'; |
587 |
|
|
warnx("-a option used without an argument; " |
588 |
|
|
"reverting to historical behavior"); |
589 |
|
|
} |
590 |
|
|
break; |
591 |
|
|
case 'j': |
592 |
|
|
/* |
593 |
|
|
* The original join allowed "-j[12] arg" and "-j arg". |
594 |
|
|
* Convert the former to "-[12] arg". Don't convert |
595 |
|
|
* the latter since getopt(3) can handle it. |
596 |
|
|
*/ |
597 |
|
|
switch(ap[2]) { |
598 |
|
|
case '1': |
599 |
|
|
case '2': |
600 |
|
|
if (ap[3] != '\0') |
601 |
|
|
goto jbad; |
602 |
|
|
ap[1] = ap[2]; |
603 |
|
|
ap[2] = '\0'; |
604 |
|
|
break; |
605 |
|
|
case '\0': |
606 |
|
|
break; |
607 |
|
|
default: |
608 |
|
|
jbad: warnx("unknown option -- %s", ap + 1); |
609 |
|
|
usage(); |
610 |
|
|
} |
611 |
|
|
break; |
612 |
|
|
case 'o': |
613 |
|
|
/* |
614 |
|
|
* The original join allowed "-o arg arg". |
615 |
|
|
* Convert to "-o arg -o arg". |
616 |
|
|
*/ |
617 |
|
|
if (ap[2] != '\0' || argv[1] == NULL) |
618 |
|
|
break; |
619 |
|
|
for (p = argv + 2; *p != NULL; ++p) { |
620 |
|
|
if (p[0][0] == '0' || ((p[0][0] != '1' && |
621 |
|
|
p[0][0] != '2') || p[0][1] != '.')) |
622 |
|
|
break; |
623 |
|
|
len = strlen(*p); |
624 |
|
|
if (len - 2 != strspn(*p + 2, "0123456789")) |
625 |
|
|
break; |
626 |
|
|
if ((t = malloc(len + 3)) == NULL) |
627 |
|
|
err(1, NULL); |
628 |
|
|
t[0] = '-'; |
629 |
|
|
t[1] = 'o'; |
630 |
|
|
memmove(t + 2, *p, len + 1); |
631 |
|
|
*p = t; |
632 |
|
|
} |
633 |
|
|
argv = p - 1; |
634 |
|
|
break; |
635 |
|
|
} |
636 |
|
|
} |
637 |
|
|
} |
638 |
|
|
|
639 |
|
|
void |
640 |
|
|
usage(void) |
641 |
|
|
{ |
642 |
|
|
int len; |
643 |
|
|
extern char *__progname; |
644 |
|
|
|
645 |
|
|
len = strlen(__progname) + sizeof("usage: "); |
646 |
|
|
(void)fprintf(stderr, "usage: %s [-1 field] [-2 field] " |
647 |
|
|
"[-a file_number | -v file_number] [-e string]\n" |
648 |
|
|
"%*s[-o list] [-t char] file1 file2\n", |
649 |
|
|
__progname, len, ""); |
650 |
|
|
exit(1); |
651 |
|
|
} |