GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/csplit/csplit.c Lines: 0 194 0.0 %
Date: 2016-12-06 Branches: 0 179 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: csplit.c,v 1.8 2015/10/11 17:43:03 semarie Exp $	*/
2
/*	$FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp $	*/
3
4
/*-
5
 * Copyright (c) 2002 Tim J. Robbins.
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
/*
31
 * csplit -- split files based on context
32
 *
33
 * This utility splits its input into numbered output files by line number
34
 * or by a regular expression. Regular expression matches have an optional
35
 * offset with them, allowing the split to occur a specified number of
36
 * lines before or after the match.
37
 *
38
 * To handle negative offsets, we stop reading when the match occurs and
39
 * store the offset that the file should have been split at, then use
40
 * this output file as input until all the "overflowed" lines have been read.
41
 * The file is then closed and truncated to the correct length.
42
 *
43
 * We assume that the output files can be seeked upon (ie. they cannot be
44
 * symlinks to named pipes or character devices), but make no such
45
 * assumption about the input.
46
 */
47
48
#include <sys/types.h>
49
50
#include <ctype.h>
51
#include <err.h>
52
#include <errno.h>
53
#include <limits.h>
54
#include <locale.h>
55
#include <regex.h>
56
#include <signal.h>
57
#include <stdint.h>
58
#include <stdio.h>
59
#include <stdlib.h>
60
#include <string.h>
61
#include <unistd.h>
62
63
void	 cleanup(void);
64
void	 do_lineno(const char *);
65
void	 do_rexp(const char *);
66
char	*get_line(void);
67
void	 handlesig(int);
68
FILE	*newfile(void);
69
void	 toomuch(FILE *, long);
70
void	 usage(void);
71
72
/*
73
 * Command line options
74
 */
75
const char *prefix;		/* File name prefix */
76
long	 sufflen;		/* Number of decimal digits for suffix */
77
int	 sflag;			/* Suppress output of file names */
78
int	 kflag;			/* Keep output if error occurs */
79
80
/*
81
 * Other miscellaneous globals (XXX too many)
82
 */
83
long	 lineno;		/* Current line number in input file */
84
long	 reps;			/* Number of repetitions for this pattern */
85
long	 nfiles;		/* Number of files output so far */
86
long	 maxfiles;		/* Maximum number of files we can create */
87
char	 currfile[PATH_MAX];	/* Current output file */
88
const char *infn;		/* Name of the input file */
89
FILE	*infile;		/* Input file handle */
90
FILE	*overfile;		/* Overflow file for toomuch() */
91
off_t	 truncofs;		/* Offset this file should be truncated at */
92
int	 doclean;		/* Should cleanup() remove output? */
93
94
int
95
main(int argc, char *argv[])
96
{
97
	struct sigaction sa;
98
	long i;
99
	int ch;
100
	const char *expr;
101
	char *ep, *p;
102
	FILE *ofp;
103
104
	setlocale(LC_ALL, "");
105
106
	if (pledge("stdio rpath wpath cpath", NULL) == -1)
107
		err(1, "pledge");
108
109
	kflag = sflag = 0;
110
	prefix = "xx";
111
	sufflen = 2;
112
	while ((ch = getopt(argc, argv, "f:kn:s")) != -1) {
113
		switch (ch) {
114
		case 'f':
115
			prefix = optarg;
116
			break;
117
		case 'k':
118
			kflag = 1;
119
			break;
120
		case 'n':
121
			errno = 0;
122
			sufflen = strtol(optarg, &ep, 10);
123
			if (sufflen <= 0 || *ep != '\0' || errno != 0)
124
				errx(1, "%s: bad suffix length", optarg);
125
			break;
126
		case 's':
127
			sflag = 1;
128
			break;
129
		default:
130
			usage();
131
			/*NOTREACHED*/
132
		}
133
	}
134
135
	if (sufflen + strlen(prefix) >= PATH_MAX)
136
		errx(1, "name too long");
137
138
	argc -= optind;
139
	argv += optind;
140
141
	if ((infn = *argv++) == NULL)
142
		usage();
143
	if (strcmp(infn, "-") == 0) {
144
		infile = stdin;
145
		infn = "stdin";
146
	} else if ((infile = fopen(infn, "r")) == NULL)
147
		err(1, "%s", infn);
148
149
	if (!kflag) {
150
		doclean = 1;
151
		atexit(cleanup);
152
		sa.sa_flags = 0;
153
		sa.sa_handler = handlesig;
154
		sigemptyset(&sa.sa_mask);
155
		sigaddset(&sa.sa_mask, SIGHUP);
156
		sigaddset(&sa.sa_mask, SIGINT);
157
		sigaddset(&sa.sa_mask, SIGTERM);
158
		sigaction(SIGHUP, &sa, NULL);
159
		sigaction(SIGINT, &sa, NULL);
160
		sigaction(SIGTERM, &sa, NULL);
161
	}
162
163
	lineno = 0;
164
	nfiles = 0;
165
	truncofs = 0;
166
	overfile = NULL;
167
168
	/* Ensure 10^sufflen < LONG_MAX. */
169
	for (maxfiles = 1, i = 0; i < sufflen; i++) {
170
		if (maxfiles > LONG_MAX / 10)
171
			errx(1, "%ld: suffix too long (limit %ld)",
172
			    sufflen, i);
173
		maxfiles *= 10;
174
	}
175
176
	/* Create files based on supplied patterns. */
177
	while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
178
		/* Look ahead & see if this pattern has any repetitions. */
179
		if (*argv != NULL && **argv == '{') {
180
			errno = 0;
181
			reps = strtol(*argv + 1, &ep, 10);
182
			if (reps < 0 || *ep != '}' || errno != 0)
183
				errx(1, "%s: bad repetition count", *argv + 1);
184
			argv++;
185
		} else
186
			reps = 0;
187
188
		if (*expr == '/' || *expr == '%') {
189
			do {
190
				do_rexp(expr);
191
			} while (reps-- != 0 && nfiles < maxfiles - 1);
192
		} else if (isdigit((unsigned char)*expr))
193
			do_lineno(expr);
194
		else
195
			errx(1, "%s: unrecognised pattern", expr);
196
	}
197
198
	/* Copy the rest into a new file. */
199
	if (!feof(infile)) {
200
		ofp = newfile();
201
		while ((p = get_line()) != NULL && fputs(p, ofp) == 0)
202
			;
203
		if (!sflag)
204
			printf("%jd\n", (intmax_t)ftello(ofp));
205
		if (fclose(ofp) != 0)
206
			err(1, "%s", currfile);
207
	}
208
209
	toomuch(NULL, 0);
210
	doclean = 0;
211
212
	return (0);
213
}
214
215
void
216
usage(void)
217
{
218
	extern char *__progname;
219
220
	fprintf(stderr,
221
	    "usage: %s [-ks] [-f prefix] [-n number] file args ...\n",
222
	    __progname);
223
	exit(1);
224
}
225
226
/* ARGSUSED */
227
void
228
handlesig(int sig)
229
{
230
	const char msg[] = "csplit: caught signal, cleaning up\n";
231
232
	write(STDERR_FILENO, msg, sizeof(msg) - 1);
233
	cleanup();
234
	_exit(2);
235
}
236
237
/* Create a new output file. */
238
FILE *
239
newfile(void)
240
{
241
	FILE *fp;
242
243
	if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
244
	    (int)sufflen, nfiles) >= sizeof(currfile))
245
		errc(1, ENAMETOOLONG, "%s", currfile);
246
	if ((fp = fopen(currfile, "w+")) == NULL)
247
		err(1, "%s", currfile);
248
	nfiles++;
249
250
	return (fp);
251
}
252
253
/* Remove partial output, called before exiting. */
254
void
255
cleanup(void)
256
{
257
	char fnbuf[PATH_MAX];
258
	long i;
259
260
	if (!doclean)
261
		return;
262
263
	/*
264
	 * NOTE: One cannot portably assume to be able to call snprintf() from
265
	 * inside a signal handler.  It is, however, safe to do on OpenBSD.
266
	 */
267
	for (i = 0; i < nfiles; i++) {
268
		snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
269
		    (int)sufflen, i);
270
		unlink(fnbuf);
271
	}
272
}
273
274
/* Read a line from the input into a static buffer. */
275
char *
276
get_line(void)
277
{
278
	static char lbuf[LINE_MAX];
279
	FILE *src;
280
281
	src = overfile != NULL ? overfile : infile;
282
283
again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
284
		if (src == overfile) {
285
			src = infile;
286
			goto again;
287
		}
288
		return (NULL);
289
	}
290
	if (ferror(src))
291
		err(1, "%s", infn);
292
	lineno++;
293
294
	return (lbuf);
295
}
296
297
/* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */
298
void
299
toomuch(FILE *ofp, long n)
300
{
301
	char buf[BUFSIZ];
302
	size_t i, nread;
303
304
	if (overfile != NULL) {
305
		/*
306
		 * Truncate the previous file we overflowed into back to
307
		 * the correct length, close it.
308
		 */
309
		if (fflush(overfile) != 0)
310
			err(1, "overflow");
311
		if (ftruncate(fileno(overfile), truncofs) != 0)
312
			err(1, "overflow");
313
		if (fclose(overfile) != 0)
314
			err(1, "overflow");
315
		overfile = NULL;
316
	}
317
318
	if (n == 0)
319
		/* Just tidying up */
320
		return;
321
322
	lineno -= n;
323
324
	/*
325
	 * Wind the overflow file backwards to `n' lines before the
326
	 * current one.
327
	 */
328
	do {
329
		if (ftello(ofp) < (off_t)sizeof(buf))
330
			rewind(ofp);
331
		else
332
			fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
333
		if (ferror(ofp))
334
			errx(1, "%s: can't seek", currfile);
335
		if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
336
			errx(1, "can't read overflowed output");
337
		if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
338
			err(1, "%s", currfile);
339
		for (i = 1; i <= nread; i++)
340
			if (buf[nread - i] == '\n' && n-- == 0)
341
				break;
342
		if (ftello(ofp) == 0)
343
			break;
344
	} while (n > 0);
345
	if (fseeko(ofp, (off_t)(nread - i + 1), SEEK_CUR) != 0)
346
		err(1, "%s", currfile);
347
348
	/*
349
	 * get_line() will read from here. Next call will truncate to
350
	 * truncofs in this file.
351
	 */
352
	overfile = ofp;
353
	truncofs = ftello(overfile);
354
}
355
356
/* Handle splits for /regexp/ and %regexp% patterns. */
357
void
358
do_rexp(const char *expr)
359
{
360
	regex_t cre;
361
	intmax_t nwritten;
362
	long ofs;
363
	int first;
364
	char *ecopy, *ep, *p, *pofs, *re;
365
	FILE *ofp;
366
367
	if ((ecopy = strdup(expr)) == NULL)
368
		err(1, "strdup");
369
370
	re = ecopy + 1;
371
	if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
372
		errx(1, "%s: missing trailing %c", expr, *expr);
373
	*pofs++ = '\0';
374
375
	if (*pofs != '\0') {
376
		errno = 0;
377
		ofs = strtol(pofs, &ep, 10);
378
		if (*ep != '\0' || errno != 0)
379
			errx(1, "%s: bad offset", pofs);
380
	} else
381
		ofs = 0;
382
383
	if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
384
		errx(1, "%s: bad regular expression", re);
385
386
	if (*expr == '/')
387
		/* /regexp/: Save results to a file. */
388
		ofp = newfile();
389
	else {
390
		/* %regexp%: Make a temporary file for overflow. */
391
		if ((ofp = tmpfile()) == NULL)
392
			err(1, "tmpfile");
393
	}
394
395
	/* Read and output lines until we get a match. */
396
	first = 1;
397
	while ((p = get_line()) != NULL) {
398
		if (fputs(p, ofp) != 0)
399
			break;
400
		if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
401
			break;
402
		first = 0;
403
	}
404
405
	if (p == NULL)
406
		errx(1, "%s: no match", re);
407
408
	if (ofs <= 0) {
409
		/*
410
		 * Negative (or zero) offset: throw back any lines we should
411
		 * not have read yet.
412
		  */
413
		if (p != NULL) {
414
			toomuch(ofp, -ofs + 1);
415
			nwritten = (intmax_t)truncofs;
416
		} else
417
			nwritten = (intmax_t)ftello(ofp);
418
	} else {
419
		/*
420
		 * Positive offset: copy the requested number of lines
421
		 * after the match.
422
		 */
423
		while (--ofs > 0 && (p = get_line()) != NULL)
424
			fputs(p, ofp);
425
		toomuch(NULL, 0);
426
		nwritten = (intmax_t)ftello(ofp);
427
		if (fclose(ofp) != 0)
428
			err(1, "%s", currfile);
429
	}
430
431
	if (!sflag && *expr == '/')
432
		printf("%jd\n", nwritten);
433
434
	regfree(&cre);
435
	free(ecopy);
436
}
437
438
/* Handle splits based on line number. */
439
void
440
do_lineno(const char *expr)
441
{
442
	long lastline, tgtline;
443
	char *ep, *p;
444
	FILE *ofp;
445
446
	errno = 0;
447
	tgtline = strtol(expr, &ep, 10);
448
	if (tgtline <= 0 || errno != 0 || *ep != '\0')
449
		errx(1, "%s: bad line number", expr);
450
	lastline = tgtline;
451
	if (lastline <= lineno)
452
		errx(1, "%s: can't go backwards", expr);
453
454
	while (nfiles < maxfiles - 1) {
455
		ofp = newfile();
456
		while (lineno + 1 != lastline) {
457
			if ((p = get_line()) == NULL)
458
				errx(1, "%ld: out of range", lastline);
459
			if (fputs(p, ofp) != 0)
460
				break;
461
		}
462
		if (!sflag)
463
			printf("%jd\n", (intmax_t)ftello(ofp));
464
		if (fclose(ofp) != 0)
465
			err(1, "%s", currfile);
466
		if (reps-- == 0)
467
			break;
468
		lastline += tgtline;
469
	}
470
}