GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/join/join.c Lines: 0 281 0.0 %
Date: 2016-12-06 Branches: 0 255 0.0 %

Line Branch Exec Source
1
/* $OpenBSD: join.c,v 1.27 2015/10/09 01:37:07 deraadt Exp $	*/
2
3
/*-
4
 * Copyright (c) 1991, 1993, 1994
5
 *	The Regents of the University of California.  All rights reserved.
6
 *
7
 * This code is derived from software contributed to Berkeley by
8
 * Steve Hayman of the Computer Science Department, Indiana University,
9
 * Michiro Hikida and David Goodenough.
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 * 3. Neither the name of the University nor the names of its contributors
20
 *    may be used to endorse or promote products derived from this software
21
 *    without specific prior written permission.
22
 *
23
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
 * SUCH DAMAGE.
34
 */
35
36
#include <err.h>
37
#include <stdio.h>
38
#include <stdlib.h>
39
#include <string.h>
40
#include <unistd.h>
41
42
#define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
43
44
/*
45
 * There's a structure per input file which encapsulates the state of the
46
 * file.  We repeatedly read lines from each file until we've read in all
47
 * the consecutive lines from the file with a common join field.  Then we
48
 * compare the set of lines with an equivalent set from the other file.
49
 */
50
typedef struct {
51
	char *line;			/* line */
52
	u_long linealloc;		/* line allocated count */
53
	char **fields;			/* line field(s) */
54
	u_long fieldcnt;		/* line field(s) count */
55
	u_long fieldalloc;		/* line field(s) allocated count */
56
	u_long cfieldc;			/* current field count */
57
	long	 fpos;			/* fpos of start of field */
58
} LINE;
59
60
typedef struct {
61
	FILE *fp;			/* file descriptor */
62
	u_long joinf;			/* join field (-1, -2, -j) */
63
	int unpair;			/* output unpairable lines (-a) */
64
	u_long number;			/* 1 for file 1, 2 for file 2 */
65
	LINE *set;			/* set of lines with same field */
66
	int pushbool;			/* if pushback is set */
67
	u_long pushback;		/* line on the stack */
68
	u_long setcnt;			/* set count */
69
	u_long setalloc;		/* set allocated count */
70
	u_long setusedc;		/* sets used  */
71
} INPUT;
72
INPUT input1 = { NULL, 0, 0, 1, NULL, 0, 0, 0, 0, 0 },
73
      input2 = { NULL, 0, 0, 2, NULL, 0, 0, 0, 0, 0 };
74
75
typedef struct {
76
	u_long	filenum;	/* file number */
77
	u_long	fieldno;	/* field number */
78
} OLIST;
79
OLIST *olist;			/* output field list */
80
u_long olistcnt;		/* output field list count */
81
u_long olistalloc;		/* output field allocated count */
82
83
int joinout = 1;		/* show lines with matched join fields (-v) */
84
int needsep;			/* need separator character */
85
int spans = 1;			/* span multiple delimiters (-t) */
86
char *empty;			/* empty field replacement string (-e) */
87
char *tabchar = " \t";		/* delimiter characters (-t) */
88
89
int  cmp(LINE *, u_long, LINE *, u_long);
90
void fieldarg(char *);
91
void joinlines(INPUT *, INPUT *);
92
void obsolete(char **);
93
void outfield(LINE *, u_long, int);
94
void outoneline(INPUT *, LINE *);
95
void outtwoline(INPUT *, LINE *, INPUT *, LINE *);
96
void slurp(INPUT *);
97
void slurpit(INPUT *);
98
void usage(void);
99
100
int
101
main(int argc, char *argv[])
102
{
103
	INPUT *F1, *F2;
104
	int aflag, ch, cval, vflag;
105
	char *end;
106
107
	if (pledge("stdio rpath wpath cpath", NULL) == -1)
108
		err(1, "pledge");
109
110
	F1 = &input1;
111
	F2 = &input2;
112
113
	aflag = vflag = 0;
114
	obsolete(argv);
115
	while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) {
116
		switch (ch) {
117
		case '\01':		/* See comment in obsolete(). */
118
			aflag = 1;
119
			F1->unpair = F2->unpair = 1;
120
			break;
121
		case '1':
122
			if ((F1->joinf = strtol(optarg, &end, 10)) < 1)
123
				errx(1, "-1 option field number less than 1");
124
			if (*end)
125
				errx(1, "illegal field number -- %s", optarg);
126
			--F1->joinf;
127
			break;
128
		case '2':
129
			if ((F2->joinf = strtol(optarg, &end, 10)) < 1)
130
				errx(1, "-2 option field number less than 1");
131
			if (*end)
132
				errx(1, "illegal field number -- %s", optarg);
133
			--F2->joinf;
134
			break;
135
		case 'a':
136
			aflag = 1;
137
			switch(strtol(optarg, &end, 10)) {
138
			case 1:
139
				F1->unpair = 1;
140
				break;
141
			case 2:
142
				F2->unpair = 1;
143
				break;
144
			default:
145
				errx(1, "-a option file number not 1 or 2");
146
				break;
147
			}
148
			if (*end)
149
				errx(1, "illegal file number -- %s", optarg);
150
			break;
151
		case 'e':
152
			empty = optarg;
153
			break;
154
		case 'j':
155
			if ((F1->joinf = F2->joinf = strtol(optarg, &end, 10)) < 1)
156
				errx(1, "-j option field number less than 1");
157
			if (*end)
158
				errx(1, "illegal field number -- %s", optarg);
159
			--F1->joinf;
160
			--F2->joinf;
161
			break;
162
		case 'o':
163
			fieldarg(optarg);
164
			break;
165
		case 't':
166
			spans = 0;
167
			if (strlen(tabchar = optarg) != 1)
168
				errx(1, "illegal tab character specification");
169
			break;
170
		case 'v':
171
			vflag = 1;
172
			joinout = 0;
173
			switch (strtol(optarg, &end, 10)) {
174
			case 1:
175
				F1->unpair = 1;
176
				break;
177
			case 2:
178
				F2->unpair = 1;
179
				break;
180
			default:
181
				errx(1, "-v option file number not 1 or 2");
182
				break;
183
			}
184
			if (*end)
185
				errx(1, "illegal file number -- %s", optarg);
186
			break;
187
		case '?':
188
		default:
189
			usage();
190
		}
191
	}
192
	argc -= optind;
193
	argv += optind;
194
195
	if (aflag && vflag)
196
		errx(1, "the -a and -v options are mutually exclusive");
197
198
	if (argc != 2)
199
		usage();
200
201
	/* Open the files; "-" means stdin. */
202
	if (!strcmp(*argv, "-"))
203
		F1->fp = stdin;
204
	else if ((F1->fp = fopen(*argv, "r")) == NULL)
205
		err(1, "%s", *argv);
206
	++argv;
207
	if (!strcmp(*argv, "-"))
208
		F2->fp = stdin;
209
	else if ((F2->fp = fopen(*argv, "r")) == NULL)
210
		err(1, "%s", *argv);
211
	if (F1->fp == stdin && F2->fp == stdin)
212
		errx(1, "only one input file may be stdin");
213
214
	if (pledge("stdio wpath cpath rpath", NULL) == -1)
215
		err(1, "pledge");
216
217
	F1->setusedc = 0;
218
	F2->setusedc = 0;
219
	slurp(F1);
220
	slurp(F2);
221
	F1->set->cfieldc = 0;
222
	F2->set->cfieldc = 0;
223
224
	/*
225
	 * We try to let the files have the same field value, advancing
226
	 * whoever falls behind and always advancing the file(s) we output
227
	 * from.
228
	*/
229
	while (F1->setcnt && F2->setcnt) {
230
		cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf);
231
		if (cval == 0) {
232
			/* Oh joy, oh rapture, oh beauty divine! */
233
			if (joinout)
234
				joinlines(F1, F2);
235
			slurp(F1);
236
			slurp(F2);
237
		}
238
		else {
239
			if (F1->unpair
240
			&& (cval < 0 || F2->set->cfieldc == F2->setusedc -1)) {
241
				joinlines(F1, NULL);
242
				slurp(F1);
243
			}
244
			else if (cval < 0)
245
				/* File 1 takes the lead... */
246
				slurp(F1);
247
			if (F2->unpair
248
			&& (cval > 0 || F1->set->cfieldc == F1->setusedc -1)) {
249
				joinlines(F2, NULL);
250
				slurp(F2);
251
			}
252
			else if (cval > 0)
253
				/* File 2 takes the lead... */
254
				slurp(F2);
255
		}
256
	}
257
258
	/*
259
	 * Now that one of the files is used up, optionally output any
260
	 * remaining lines from the other file.
261
	 */
262
	if (F1->unpair)
263
		while (F1->setcnt) {
264
			joinlines(F1, NULL);
265
			slurp(F1);
266
		}
267
	if (F2->unpair)
268
		while (F2->setcnt) {
269
			joinlines(F2, NULL);
270
			slurp(F2);
271
		}
272
273
	return 0;
274
}
275
276
/* wrapper around slurpit() to keep track of what field we are on */
277
void slurp(INPUT *F)
278
{
279
	long fpos;
280
	u_long cfieldc;
281
282
	if (F->set == NULL) {
283
		fpos = 0;
284
		cfieldc = 0;
285
	}
286
	else {
287
		fpos = F->set->fpos;
288
		cfieldc = F->set->cfieldc;
289
	}
290
	slurpit(F);
291
	if (F->set == NULL)
292
		return;
293
	else if (fpos != F->set->fpos)
294
		F->set->cfieldc = cfieldc+1;
295
}
296
297
void
298
slurpit(INPUT *F)
299
{
300
	LINE *lp, *lastlp, tmp;
301
	size_t len;
302
	u_long cnt;
303
	char *bp, *fieldp;
304
	long fpos;
305
	/*
306
	 * Read all of the lines from an input file that have the same
307
	 * join field.
308
	 */
309
310
	F->setcnt = 0;
311
	for (lastlp = NULL; ; ++F->setcnt, lastlp = lp) {
312
		/*
313
		 * If we're out of space to hold line structures, allocate
314
		 * more.  Initialize the structure so that we know that this
315
		 * is new space.
316
		 */
317
		if (F->setcnt == F->setalloc) {
318
			LINE *p;
319
			u_long newsize = F->setalloc + 50;
320
			cnt = F->setalloc;
321
			if ((p = reallocarray(F->set, newsize, sizeof(LINE)))
322
			    == NULL)
323
				err(1, NULL);
324
			F->set = p;
325
			F->setalloc = newsize;
326
			memset(F->set + cnt, 0, 50 * sizeof(LINE));
327
			/* re-set lastlp in case it moved */
328
			if (lastlp != NULL)
329
				lastlp = &F->set[F->setcnt - 1];
330
		}
331
		/*
332
		 * Get any pushed back line, else get the next line.  Allocate
333
		 * space as necessary.  If taking the line from the stack swap
334
		 * the two structures so that we don't lose space allocated to
335
		 * either structure.  This could be avoided by doing another
336
		 * level of indirection, but it's probably okay as is.
337
		 */
338
		lp = &F->set[F->setcnt];
339
		if (F->pushbool) {
340
			tmp = F->set[F->setcnt];
341
			F->set[F->setcnt] = F->set[F->pushback];
342
			F->set[F->pushback] = tmp;
343
			F->pushbool = 0;
344
			continue;
345
		}
346
		if ((bp = fgetln(F->fp, &len)) == NULL)
347
			return;
348
		/*
349
		 * we depend on knowing on what field we are, one safe way is
350
		 * the file position.
351
		*/
352
		fpos = ftell(F->fp) - len;
353
		if (lp->linealloc <= len + 1) {
354
			char *p;
355
			u_long newsize = lp->linealloc +
356
			    MAXIMUM(100, len + 1 - lp->linealloc);
357
			if ((p = realloc(lp->line, newsize)) == NULL)
358
				err(1, NULL);
359
			lp->line = p;
360
			lp->linealloc = newsize;
361
		}
362
		F->setusedc++;
363
		memmove(lp->line, bp, len);
364
		lp->fpos = fpos;
365
		/* Replace trailing newline, if it exists. */
366
		if (bp[len - 1] == '\n')
367
			lp->line[len - 1] = '\0';
368
		else
369
			lp->line[len] = '\0';
370
		bp = lp->line;
371
372
		/* Split the line into fields, allocate space as necessary. */
373
		lp->fieldcnt = 0;
374
		while ((fieldp = strsep(&bp, tabchar)) != NULL) {
375
			if (spans && *fieldp == '\0')
376
				continue;
377
			if (lp->fieldcnt == lp->fieldalloc) {
378
				char **p;
379
				u_long newsize = lp->fieldalloc + 50;
380
				if ((p = reallocarray(lp->fields, newsize,
381
				    sizeof(char *))) == NULL)
382
					err(1, NULL);
383
				lp->fields = p;
384
				lp->fieldalloc = newsize;
385
			}
386
			lp->fields[lp->fieldcnt++] = fieldp;
387
		}
388
389
		/* See if the join field value has changed. */
390
		if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) {
391
			F->pushbool = 1;
392
			F->pushback = F->setcnt;
393
			break;
394
		}
395
	}
396
}
397
398
int
399
cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2)
400
{
401
	if (lp1->fieldcnt <= fieldno1)
402
		return (-1);
403
	else if (lp2->fieldcnt <= fieldno2)
404
		return (1);
405
	return (strcmp(lp1->fields[fieldno1], lp2->fields[fieldno2]));
406
}
407
408
void
409
joinlines(INPUT *F1, INPUT *F2)
410
{
411
	u_long cnt1, cnt2;
412
413
	/*
414
	 * Output the results of a join comparison.  The output may be from
415
	 * either file 1 or file 2 (in which case the first argument is the
416
	 * file from which to output) or from both.
417
	 */
418
	if (F2 == NULL) {
419
		for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
420
			outoneline(F1, &F1->set[cnt1]);
421
		return;
422
	}
423
	for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
424
		for (cnt2 = 0; cnt2 < F2->setcnt; ++cnt2)
425
			outtwoline(F1, &F1->set[cnt1], F2, &F2->set[cnt2]);
426
}
427
428
void
429
outoneline(INPUT *F, LINE *lp)
430
{
431
	u_long cnt;
432
433
	/*
434
	 * Output a single line from one of the files, according to the
435
	 * join rules.  This happens when we are writing unmatched single
436
	 * lines.  Output empty fields in the right places.
437
	 */
438
	if (olist)
439
		for (cnt = 0; cnt < olistcnt; ++cnt) {
440
			if (olist[cnt].filenum == F->number)
441
				outfield(lp, olist[cnt].fieldno, 0);
442
			else if (olist[cnt].filenum == 0)
443
				outfield(lp, F->joinf, 0);
444
			else
445
				outfield(lp, 0, 1);
446
		}
447
	else {
448
		/*
449
		 * Output the join field, then the remaining fields from F
450
		 */
451
		outfield(lp, F->joinf, 0);
452
		for (cnt = 0; cnt < lp->fieldcnt; ++cnt)
453
			if (F->joinf != cnt)
454
				outfield(lp, cnt, 0);
455
	}
456
457
	putchar('\n');
458
	if (ferror(stdout))
459
		err(1, "stdout");
460
	needsep = 0;
461
}
462
463
void
464
outtwoline(INPUT *F1, LINE *lp1, INPUT *F2, LINE *lp2)
465
{
466
	u_long cnt;
467
468
	/* Output a pair of lines according to the join list (if any). */
469
	if (olist) {
470
		for (cnt = 0; cnt < olistcnt; ++cnt)
471
			if (olist[cnt].filenum == 0) {
472
				if (lp1->fieldcnt >= F1->joinf)
473
					outfield(lp1, F1->joinf, 0);
474
				else
475
					outfield(lp2, F2->joinf, 0);
476
			} else if (olist[cnt].filenum == 1)
477
				outfield(lp1, olist[cnt].fieldno, 0);
478
			else /* if (olist[cnt].filenum == 2) */
479
				outfield(lp2, olist[cnt].fieldno, 0);
480
	} else {
481
		/*
482
		 * Output the join field, then the remaining fields from F1
483
		 * and F2.
484
		 */
485
		outfield(lp1, F1->joinf, 0);
486
		for (cnt = 0; cnt < lp1->fieldcnt; ++cnt)
487
			if (F1->joinf != cnt)
488
				outfield(lp1, cnt, 0);
489
		for (cnt = 0; cnt < lp2->fieldcnt; ++cnt)
490
			if (F2->joinf != cnt)
491
				outfield(lp2, cnt, 0);
492
	}
493
	putchar('\n');
494
	if (ferror(stdout))
495
		err(1, "stdout");
496
	needsep = 0;
497
}
498
499
void
500
outfield(LINE *lp, u_long fieldno, int out_empty)
501
{
502
	if (needsep++)
503
		putchar((int)*tabchar);
504
	if (!ferror(stdout)) {
505
		if (lp->fieldcnt <= fieldno || out_empty) {
506
			if (empty != NULL)
507
				fputs(empty, stdout);
508
		} else {
509
			if (*lp->fields[fieldno] == '\0')
510
				return;
511
			fputs(lp->fields[fieldno], stdout);
512
		}
513
	}
514
	if (ferror(stdout))
515
		err(1, "stdout");
516
}
517
518
/*
519
 * Convert an output list argument "2.1, 1.3, 2.4" into an array of output
520
 * fields.
521
 */
522
void
523
fieldarg(char *option)
524
{
525
	u_long fieldno, filenum;
526
	char *end, *token;
527
528
	while ((token = strsep(&option, ", \t")) != NULL) {
529
		if (*token == '\0')
530
			continue;
531
		if (token[0] == '0')
532
			filenum = fieldno = 0;
533
		else if ((token[0] == '1' || token[0] == '2') &&
534
		    token[1] == '.') {
535
			filenum = token[0] - '0';
536
			fieldno = strtol(token + 2, &end, 10);
537
			if (*end)
538
				errx(1, "malformed -o option field");
539
			if (fieldno == 0)
540
				errx(1, "field numbers are 1 based");
541
			--fieldno;
542
		} else
543
			errx(1, "malformed -o option field");
544
		if (olistcnt == olistalloc) {
545
			OLIST *p;
546
			u_long newsize = olistalloc + 50;
547
			if ((p = reallocarray(olist, newsize, sizeof(OLIST)))
548
			    == NULL)
549
				err(1, NULL);
550
			olist = p;
551
			olistalloc = newsize;
552
		}
553
		olist[olistcnt].filenum = filenum;
554
		olist[olistcnt].fieldno = fieldno;
555
		++olistcnt;
556
	}
557
}
558
559
void
560
obsolete(char **argv)
561
{
562
	size_t len;
563
	char **p, *ap, *t;
564
565
	while ((ap = *++argv) != NULL) {
566
		/* Return if "--". */
567
		if (ap[0] == '-' && ap[1] == '-')
568
			return;
569
		/* skip if not an option */
570
		if (ap[0] != '-')
571
			continue;
572
		switch (ap[1]) {
573
		case 'a':
574
			/*
575
			 * The original join allowed "-a", which meant the
576
			 * same as -a1 plus -a2.  POSIX 1003.2, Draft 11.2
577
			 * only specifies this as "-a 1" and "a -2", so we
578
			 * have to use another option flag, one that is
579
			 * unlikely to ever be used or accidentally entered
580
			 * on the command line.  (Well, we could reallocate
581
			 * the argv array, but that hardly seems worthwhile.)
582
			 */
583
			if (ap[2] == '\0' && (argv[1] == NULL ||
584
			    (strcmp(argv[1], "1") != 0 &&
585
			    strcmp(argv[1], "2") != 0))) {
586
				ap[1] = '\01';
587
				warnx("-a option used without an argument; "
588
				    "reverting to historical behavior");
589
			}
590
			break;
591
		case 'j':
592
			/*
593
			 * The original join allowed "-j[12] arg" and "-j arg".
594
			 * Convert the former to "-[12] arg".  Don't convert
595
			 * the latter since getopt(3) can handle it.
596
			 */
597
			switch(ap[2]) {
598
			case '1':
599
			case '2':
600
				if (ap[3] != '\0')
601
					goto jbad;
602
				ap[1] = ap[2];
603
				ap[2] = '\0';
604
				break;
605
			case '\0':
606
				break;
607
			default:
608
jbad:				warnx("unknown option -- %s", ap + 1);
609
				usage();
610
			}
611
			break;
612
		case 'o':
613
			/*
614
			 * The original join allowed "-o arg arg".
615
			 * Convert to "-o arg -o arg".
616
			 */
617
			if (ap[2] != '\0' || argv[1] == NULL)
618
				break;
619
			for (p = argv + 2; *p != NULL; ++p) {
620
				if (p[0][0] == '0' || ((p[0][0] != '1' &&
621
				    p[0][0] != '2') || p[0][1] != '.'))
622
					break;
623
				len = strlen(*p);
624
				if (len - 2 != strspn(*p + 2, "0123456789"))
625
					break;
626
				if ((t = malloc(len + 3)) == NULL)
627
					err(1, NULL);
628
				t[0] = '-';
629
				t[1] = 'o';
630
				memmove(t + 2, *p, len + 1);
631
				*p = t;
632
			}
633
			argv = p - 1;
634
			break;
635
		}
636
	}
637
}
638
639
void
640
usage(void)
641
{
642
	int len;
643
	extern char *__progname;
644
645
	len = strlen(__progname) + sizeof("usage: ");
646
	(void)fprintf(stderr, "usage: %s [-1 field] [-2 field] "
647
	    "[-a file_number | -v file_number] [-e string]\n"
648
	    "%*s[-o list] [-t char] file1 file2\n",
649
	    __progname, len, "");
650
	exit(1);
651
}