GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/csplit/csplit.c Lines: 0 195 0.0 %
Date: 2017-11-07 Branches: 0 183 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: csplit.c,v 1.9 2016/10/28 07:22:59 schwarze Exp $	*/
2
/*	$FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $	*/
3
4
/*-
5
 * Copyright (c) 2002 Tim J. Robbins.
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
/*
31
 * csplit -- split files based on context
32
 *
33
 * This utility splits its input into numbered output files by line number
34
 * or by a regular expression. Regular expression matches have an optional
35
 * offset with them, allowing the split to occur a specified number of
36
 * lines before or after the match.
37
 *
38
 * To handle negative offsets, we stop reading when the match occurs and
39
 * store the offset that the file should have been split at, then use
40
 * this output file as input until all the "overflowed" lines have been read.
41
 * The file is then closed and truncated to the correct length.
42
 *
43
 * We assume that the output files can be seeked upon (ie. they cannot be
44
 * symlinks to named pipes or character devices), but make no such
45
 * assumption about the input.
46
 */
47
48
#include <sys/types.h>
49
50
#include <ctype.h>
51
#include <err.h>
52
#include <errno.h>
53
#include <limits.h>
54
#include <regex.h>
55
#include <signal.h>
56
#include <stdint.h>
57
#include <stdio.h>
58
#include <stdlib.h>
59
#include <string.h>
60
#include <unistd.h>
61
62
void	 cleanup(void);
63
void	 do_lineno(const char *);
64
void	 do_rexp(const char *);
65
char	*get_line(void);
66
void	 handlesig(int);
67
FILE	*newfile(void);
68
void	 toomuch(FILE *, long);
69
static void __dead usage(void);
70
71
/*
72
 * Command line options
73
 */
74
const char *prefix;		/* File name prefix */
75
long	 sufflen;		/* Number of decimal digits for suffix */
76
int	 sflag;			/* Suppress output of file names */
77
int	 kflag;			/* Keep output if error occurs */
78
79
/*
80
 * Other miscellaneous globals (XXX too many)
81
 */
82
long	 lineno;		/* Current line number in input file */
83
long	 reps;			/* Number of repetitions for this pattern */
84
long	 nfiles;		/* Number of files output so far */
85
long	 maxfiles;		/* Maximum number of files we can create */
86
char	 currfile[PATH_MAX];	/* Current output file */
87
const char *infn;		/* Name of the input file */
88
FILE	*infile;		/* Input file handle */
89
FILE	*overfile;		/* Overflow file for toomuch() */
90
off_t	 truncofs;		/* Offset this file should be truncated at */
91
int	 doclean;		/* Should cleanup() remove output? */
92
93
int
94
main(int argc, char *argv[])
95
{
96
	struct sigaction sa;
97
	long i;
98
	int ch;
99
	const char *expr;
100
	char *ep, *p;
101
	FILE *ofp;
102
103
	if (pledge("stdio rpath wpath cpath flock", NULL) == -1)
104
		err(1, "pledge");
105
106
	kflag = sflag = 0;
107
	prefix = "xx";
108
	sufflen = 2;
109
	while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
110
		switch (ch) {
111
		case 'f':
112
			prefix = optarg;
113
			break;
114
		case 'k':
115
			kflag = 1;
116
			break;
117
		case 'n':
118
			errno = 0;
119
			sufflen = strtol(optarg, &ep, 10);
120
			if (sufflen <= 0 || *ep != '\0' || errno != 0)
121
				errx(1, "%s: bad suffix length", optarg);
122
			break;
123
		case 's':
124
			sflag = 1;
125
			break;
126
		default:
127
			usage();
128
		}
129
	}
130
131
	if (sufflen + strlen(prefix) >= PATH_MAX)
132
		errx(1, "name too long");
133
134
	argc -= optind;
135
	argv += optind;
136
137
	if ((infn = *argv++) == NULL)
138
		usage();
139
	if (strcmp(infn, "-") == 0) {
140
		infile = stdin;
141
		infn = "stdin";
142
	} else if ((infile = fopen(infn, "r")) == NULL)
143
		err(1, "%s", infn);
144
145
	if (!kflag) {
146
		doclean = 1;
147
		atexit(cleanup);
148
		sa.sa_flags = 0;
149
		sa.sa_handler = handlesig;
150
		sigemptyset(&sa.sa_mask);
151
		sigaddset(&sa.sa_mask, SIGHUP);
152
		sigaddset(&sa.sa_mask, SIGINT);
153
		sigaddset(&sa.sa_mask, SIGTERM);
154
		sigaction(SIGHUP, &sa, NULL);
155
		sigaction(SIGINT, &sa, NULL);
156
		sigaction(SIGTERM, &sa, NULL);
157
	}
158
159
	lineno = 0;
160
	nfiles = 0;
161
	truncofs = 0;
162
	overfile = NULL;
163
164
	/* Ensure 10^sufflen < LONG_MAX. */
165
	for (maxfiles = 1, i = 0; i < sufflen; i++) {
166
		if (maxfiles > LONG_MAX / 10)
167
			errx(1, "%ld: suffix too long (limit %ld)",
168
			    sufflen, i);
169
		maxfiles *= 10;
170
	}
171
172
	/* Create files based on supplied patterns. */
173
	while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
174
		/* Look ahead & see if this pattern has any repetitions. */
175
		if (*argv != NULL && **argv == '{') {
176
			errno = 0;
177
			reps = strtol(*argv + 1, &ep, 10);
178
			if (reps < 0 || *ep != '}' || errno != 0)
179
				errx(1, "%s: bad repetition count", *argv + 1);
180
			argv++;
181
		} else
182
			reps = 0;
183
184
		if (*expr == '/' || *expr == '%') {
185
			do {
186
				do_rexp(expr);
187
			} while (reps-- != 0 && nfiles < maxfiles - 1);
188
		} else if (isdigit((unsigned char)*expr))
189
			do_lineno(expr);
190
		else
191
			errx(1, "%s: unrecognised pattern", expr);
192
	}
193
194
	/* Copy the rest into a new file. */
195
	if (!feof(infile)) {
196
		ofp = newfile();
197
		while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
198
			;
199
		if (!sflag)
200
			printf("%jd\n", (intmax_t)ftello(ofp));
201
		if (fclose(ofp) != 0)
202
			err(1, "%s", currfile);
203
	}
204
205
	toomuch(NULL, 0);
206
	doclean = 0;
207
208
	return (0);
209
}
210
211
static void __dead
212
usage(void)
213
{
214
	extern char *__progname;
215
216
	fprintf(stderr,
217
	    "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
218
	    __progname);
219
	exit(1);
220
}
221
222
/* ARGSUSED */
223
void
224
handlesig(int sig)
225
{
226
	const char msg[] = "csplit: caught signal, cleaning up\n";
227
228
	write(STDERR_FILENO, msg, sizeof(msg) - 1);
229
	cleanup();
230
	_exit(2);
231
}
232
233
/* Create a new output file. */
234
FILE *
235
newfile(void)
236
{
237
	FILE *fp;
238
239
	if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
240
	    (int)sufflen, nfiles) >= sizeof(currfile))
241
		errc(1, ENAMETOOLONG, "%s", currfile);
242
	if ((fp = fopen(currfile, "w+")) == NULL)
243
		err(1, "%s", currfile);
244
	nfiles++;
245
246
	return (fp);
247
}
248
249
/* Remove partial output, called before exiting. */
250
void
251
cleanup(void)
252
{
253
	char fnbuf[PATH_MAX];
254
	long i;
255
256
	if (!doclean)
257
		return;
258
259
	/*
260
	 * NOTE: One cannot portably assume to be able to call snprintf() from
261
	 * inside a signal handler.  It is, however, safe to do on OpenBSD.
262
	 */
263
	for (i = 0; i < nfiles; i++) {
264
		snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
265
		    (int)sufflen, i);
266
		unlink(fnbuf);
267
	}
268
}
269
270
/* Read a line from the input into a static buffer. */
271
char *
272
get_line(void)
273
{
274
	static char lbuf[LINE_MAX];
275
	FILE *src;
276
277
	src = overfile != NULL ? overfile : infile;
278
279
again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
280
		if (src == overfile) {
281
			src = infile;
282
			goto again;
283
		}
284
		return (NULL);
285
	}
286
	if (ferror(src))
287
		err(1, "%s", infn);
288
	lineno++;
289
290
	return (lbuf);
291
}
292
293
/* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
294
void
295
toomuch(FILE *ofp, long n)
296
{
297
	char buf[BUFSIZ];
298
	size_t i, nread;
299
300
	if (overfile != NULL) {
301
		/*
302
		 * Truncate the previous file we overflowed into back to
303
		 * the correct length, close it.
304
		 */
305
		if (fflush(overfile) != 0)
306
			err(1, "overflow");
307
		if (ftruncate(fileno(overfile), truncofs) != 0)
308
			err(1, "overflow");
309
		if (fclose(overfile) != 0)
310
			err(1, "overflow");
311
		overfile = NULL;
312
	}
313
314
	if (n == 0)
315
		/* Just tidying up */
316
		return;
317
318
	lineno -= n;
319
320
	/*
321
	 * Wind the overflow file backwards to `n' lines before the
322
	 * current one.
323
	 */
324
	do {
325
		if (ftello(ofp) < (off_t)sizeof(buf))
326
			rewind(ofp);
327
		else
328
			fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
329
		if (ferror(ofp))
330
			errx(1, "%s: can't seek", currfile);
331
		if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
332
			errx(1, "can't read overflowed output");
333
		if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
334
			err(1, "%s", currfile);
335
		for (i = 1; i <= nread; i++)
336
			if (buf[nread - i] == '\n' && n-- == 0)
337
				break;
338
		if (ftello(ofp) == 0)
339
			break;
340
	} while (n > 0);
341
	if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
342
		err(1, "%s", currfile);
343
344
	/*
345
	 * get_line() will read from here. Next call will truncate to
346
	 * truncofs in this file.
347
	 */
348
	overfile = ofp;
349
	truncofs = ftello(overfile);
350
}
351
352
/* Handle splits for /regexp/ and %regexp% patterns. */
353
void
354
do_rexp(const char *expr)
355
{
356
	regex_t cre;
357
	intmax_t nwritten;
358
	long ofs;
359
	int first;
360
	char *ecopy, *ep, *p, *pofs, *re;
361
	FILE *ofp;
362
363
	if ((ecopy = strdup(expr)) == NULL)
364
		err(1, "strdup");
365
366
	re = ecopy + 1;
367
	if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
368
		errx(1, "%s: missing trailing %c", expr, *expr);
369
	*pofs++ = '\0';
370
371
	if (*pofs != '\0') {
372
		errno = 0;
373
		ofs = strtol(pofs, &ep, 10);
374
		if (*ep != '\0' || errno != 0)
375
			errx(1, "%s: bad offset", pofs);
376
	} else
377
		ofs = 0;
378
379
	if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
380
		errx(1, "%s: bad regular expression", re);
381
382
	if (*expr == '/')
383
		/* /regexp/: Save results to a file. */
384
		ofp = newfile();
385
	else {
386
		/* %regexp%: Make a temporary file for overflow. */
387
		if ((ofp = tmpfile()) == NULL)
388
			err(1, "tmpfile");
389
	}
390
391
	/* Read and output lines until we get a match. */
392
	first = 1;
393
	while ((p = get_line()) != NULL) {
394
		if (fputs(p, ofp) != 0)
395
			break;
396
		if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
397
			break;
398
		first = 0;
399
	}
400
401
	if (p == NULL)
402
		errx(1, "%s: no match", re);
403
404
	if (ofs <= 0) {
405
		/*
406
		 * Negative (or zero) offset: throw back any lines we should
407
		 * not have read yet.
408
		  */
409
		if (p != NULL) {
410
			toomuch(ofp, -ofs + 1);
411
			nwritten = (intmax_t)truncofs;
412
		} else
413
			nwritten = (intmax_t)ftello(ofp);
414
	} else {
415
		/*
416
		 * Positive offset: copy the requested number of lines
417
		 * after the match.
418
		 */
419
		while (--ofs > 0 && (p = get_line()) != NULL)
420
			fputs(p, ofp);
421
		toomuch(NULL, 0);
422
		nwritten = (intmax_t)ftello(ofp);
423
		if (fclose(ofp) != 0)
424
			err(1, "%s", currfile);
425
	}
426
427
	if (!sflag && *expr == '/')
428
		printf("%jd\n", nwritten);
429
430
	regfree(&cre);
431
	free(ecopy);
432
}
433
434
/* Handle splits based on line number. */
435
void
436
do_lineno(const char *expr)
437
{
438
	long lastline, tgtline;
439
	char *ep, *p;
440
	FILE *ofp;
441
442
	errno = 0;
443
	tgtline = strtol(expr, &ep, 10);
444
	if (tgtline <= 0 || errno != 0 || *ep != '\0')
445
		errx(1, "%s: bad line number", expr);
446
	lastline = tgtline;
447
	if (lastline <= lineno)
448
		errx(1, "%s: can't go backwards", expr);
449
450
	while (nfiles < maxfiles - 1) {
451
		ofp = newfile();
452
		while (lineno + 1 != lastline) {
453
			if ((p = get_line()) == NULL)
454
				errx(1, "%ld: out of range", lastline);
455
			if (fputs(p, ofp) != 0)
456
				break;
457
		}
458
		if (!sflag)
459
			printf("%jd\n", (intmax_t)ftello(ofp));
460
		if (fclose(ofp) != 0)
461
			err(1, "%s", currfile);
462
		if (reps-- == 0)
463
			break;
464
		lastline += tgtline;
465
	}
466
}