GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/m4/gnum4.c Lines: 199 309 64.4 %
Date: 2017-11-07 Branches: 112 210 53.3 %

Line Branch Exec Source
1
/* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */
2
3
/*
4
 * Copyright (c) 1999 Marc Espie
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
8
 * are met:
9
 * 1. Redistributions of source code must retain the above copyright
10
 *    notice, this list of conditions and the following disclaimer.
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in the
13
 *    documentation and/or other materials provided with the distribution.
14
 *
15
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
 * SUCH DAMAGE.
26
 */
27
28
/*
29
 * functions needed to support gnu-m4 extensions, including a fake freezing
30
 */
31
32
#include <sys/types.h>
33
#include <sys/wait.h>
34
#include <ctype.h>
35
#include <err.h>
36
#include <paths.h>
37
#include <regex.h>
38
#include <stdarg.h>
39
#include <stddef.h>
40
#include <stdlib.h>
41
#include <stdint.h>
42
#include <stdio.h>
43
#include <string.h>
44
#include <errno.h>
45
#include <unistd.h>
46
#include <limits.h>
47
#include "mdef.h"
48
#include "stdd.h"
49
#include "extern.h"
50
51
52
int mimic_gnu = 0;
53
54
/*
55
 * Support for include path search
56
 * First search in the current directory.
57
 * If not found, and the path is not absolute, include path kicks in.
58
 * First, -I options, in the order found on the command line.
59
 * Then M4PATH env variable
60
 */
61
62
struct path_entry {
63
	char *name;
64
	struct path_entry *next;
65
} *first, *last;
66
67
static struct path_entry *new_path_entry(const char *);
68
static void ensure_m4path(void);
69
static struct input_file *dopath(struct input_file *, const char *);
70
71
static struct path_entry *
72
new_path_entry(const char *dirname)
73
{
74
	struct path_entry *n;
75
76
18
	n = malloc(sizeof(struct path_entry));
77
9
	if (!n)
78
		errx(1, "out of memory");
79
9
	n->name = xstrdup(dirname);
80
9
	n->next = 0;
81
9
	return n;
82
}
83
84
void
85
addtoincludepath(const char *dirname)
86
{
87
	struct path_entry *n;
88
89
18
	n = new_path_entry(dirname);
90
91
9
	if (last) {
92
		last->next = n;
93
		last = n;
94
	}
95
	else
96
9
		last = first = n;
97
9
}
98
99
static void
100
ensure_m4path()
101
{
102
	static int envpathdone = 0;
103
	char *envpath;
104
36
	char *sweep;
105
	char *path;
106
107
18
	if (envpathdone)
108
		return;
109
18
	envpathdone = TRUE;
110
18
	envpath = getenv("M4PATH");
111
18
	if (!envpath)
112
18
		return;
113
	/* for portability: getenv result is read-only */
114
	envpath = xstrdup(envpath);
115
	for (sweep = envpath;
116
	    (path = strsep(&sweep, ":")) != NULL;)
117
	    addtoincludepath(path);
118
	free(envpath);
119
18
}
120
121
static
122
struct input_file *
123
dopath(struct input_file *i, const char *filename)
124
{
125
36
	char path[PATH_MAX];
126
	struct path_entry *pe;
127
	FILE *f;
128
129
36
	for (pe = first; pe; pe = pe->next) {
130
		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
131
		if ((f = fopen(path, "r")) != 0) {
132
			set_input(i, f, path);
133
			return i;
134
		}
135
	}
136
18
	return NULL;
137
18
}
138
139
struct input_file *
140
fopen_trypath(struct input_file *i, const char *filename)
141
{
142
	FILE *f;
143
144
666
	f = fopen(filename, "r");
145
333
	if (f != NULL) {
146
315
		set_input(i, f, filename);
147
315
		return i;
148
	}
149
18
	if (filename[0] == '/')
150
		return NULL;
151
152
18
	ensure_m4path();
153
154
18
	return dopath(i, filename);
155
333
}
156
157
void
158
doindir(const char *argv[], int argc)
159
{
160
	ndptr n;
161
	struct macro_definition *p;
162
163
36
	n = lookup(argv[2]);
164

36
	if (n == NULL || (p = macro_getdef(n)) == NULL)
165
		m4errx(1, "indir: undefined macro %s.", argv[2]);
166
18
	argv[1] = p->defn;
167
168
54
	eval(argv+1, argc-1, p->type, is_traced(n));
169
18
}
170
171
void
172
dobuiltin(const char *argv[], int argc)
173
{
174
	ndptr p;
175
176
	argv[1] = NULL;
177
	p = macro_getbuiltin(argv[2]);
178
	if (p != NULL)
179
		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
180
	else
181
		m4errx(1, "unknown builtin %s.", argv[2]);
182
}
183
184
185
/* We need some temporary buffer space, as pb pushes BACK and substitution
186
 * proceeds forward... */
187
static char *buffer;
188
static size_t bufsize = 0;
189
static size_t current = 0;
190
191
static void addchars(const char *, size_t);
192
static void addchar(int);
193
static char *twiddle(const char *);
194
static char *getstring(void);
195
static void exit_regerror(int, regex_t *, const char *);
196
static void do_subst(const char *, regex_t *, const char *, const char *,
197
    regmatch_t *);
198
static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
199
static void do_regexp(const char *, regex_t *, const char *, const char *,
200
    regmatch_t *);
201
static void add_sub(int, const char *, regex_t *, regmatch_t *);
202
static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
203
#define addconstantstring(s) addchars((s), sizeof(s)-1)
204
205
static void
206
addchars(const char *c, size_t n)
207
{
208
936
	if (n == 0)
209
		return;
210
441
	while (current + n > bufsize) {
211
45
		if (bufsize == 0)
212
			bufsize = 1024;
213
27
		else if (bufsize <= SIZE_MAX/2) {
214
27
			bufsize *= 2;
215
		} else {
216
			errx(1, "size overflow");
217
		}
218
45
		buffer = xrealloc(buffer, bufsize, NULL);
219
	}
220
351
	memcpy(buffer+current, c, n);
221
351
	current += n;
222
819
}
223
224
static void
225
addchar(int c)
226
{
227
1854
	if (current +1 > bufsize) {
228
54
		if (bufsize == 0)
229
			bufsize = 1024;
230
		else
231
27
			bufsize *= 2;
232
27
		buffer = xrealloc(buffer, bufsize, NULL);
233
27
	}
234
927
	buffer[current++] = c;
235
927
}
236
237
static char *
238
getstring(void)
239
{
240
252
	addchar('\0');
241
126
	current = 0;
242
126
	return buffer;
243
}
244
245
246
static void
247
exit_regerror(int er, regex_t *re, const char *source)
248
{
249
	size_t	errlen;
250
	char	*errbuf;
251
252
	errlen = regerror(er, re, NULL, 0);
253
	errbuf = xalloc(errlen,
254
	    "malloc in regerror: %lu", (unsigned long)errlen);
255
	regerror(er, re, errbuf, errlen);
256
	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
257
}
258
259
/* warnx() plus check to see if we need to change exit code or exit.
260
 * -E flag functionality.
261
 */
262
void
263
m4_warnx(const char *fmt, ...)
264
{
265
72
	va_list ap;
266
267
36
	va_start(ap, fmt);
268
36
	vwarnx(fmt, ap);
269
36
	va_end(ap);
270
271
36
	if (fatal_warns)
272
		exit(1);
273
27
	if (error_warns)
274
27
		exit_code = 1;
275
27
}
276
277
static void
278
add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
279
{
280
198
	if (n > re->re_nsub)
281
		m4_warnx("No subexpression %d", n);
282
	/* Subexpressions that did not match are
283
	 * not an error.  */
284

198
	else if (pm[n].rm_so != -1 &&
285
99
	    pm[n].rm_eo != -1) {
286
198
		addchars(string + pm[n].rm_so,
287
99
			pm[n].rm_eo - pm[n].rm_so);
288
99
	}
289
99
}
290
291
/* Add replacement string to the output buffer, recognizing special
292
 * constructs and replacing them with substrings of the original string.
293
 */
294
static void
295
add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
296
{
297
	const char *p;
298
299
1359
	for (p = replace; *p != '\0'; p++) {
300
396
		if (*p == '&' && !mimic_gnu) {
301
			add_sub(0, string, re, pm);
302
			continue;
303
		}
304
396
		if (*p == '\\') {
305
117
			if (p[1] == '\\') {
306
18
				addchar(p[1]);
307
18
				p++;
308
18
				continue;
309
			}
310
99
			if (p[1] == '&') {
311
				if (mimic_gnu)
312
					add_sub(0, string, re, pm);
313
				else
314
					addchar(p[1]);
315
				p++;
316
				continue;
317
			}
318
99
			if (isdigit((unsigned char)p[1])) {
319
99
				add_sub(*(++p) - '0', string, re, pm);
320
99
				continue;
321
			}
322
		}
323
279
		addchar(*p);
324
279
	}
325
189
}
326
327
static void
328
do_subst(const char *string, regex_t *re, const char *source,
329
    const char *replace, regmatch_t *pm)
330
{
331
	int error;
332
	int flags = 0;
333
	const char *last_match = NULL;
334
335
594
	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
336
216
		if (pm[0].rm_eo != 0) {
337
180
			if (string[pm[0].rm_eo-1] == '\n')
338
36
				flags = 0;
339
			else
340
				flags = REG_NOTBOL;
341
		}
342
343
		/* NULL length matches are special... We use the `vi-mode'
344
		 * rule: don't allow a NULL-match at the last match
345
		 * position.
346
		 */
347

270
		if (pm[0].rm_so == pm[0].rm_eo &&
348
54
		    string + pm[0].rm_so == last_match) {
349
27
			if (*string == '\0')
350
				return;
351
27
			addchar(*string);
352
27
			if (*string++ == '\n')
353
				flags = 0;
354
			else
355
				flags = REG_NOTBOL;
356
			continue;
357
		}
358
189
		last_match = string + pm[0].rm_so;
359
189
		addchars(string, pm[0].rm_so);
360
189
		add_replace(string, re, replace, pm);
361
189
		string += pm[0].rm_eo;
362
	}
363
63
	if (error != REG_NOMATCH)
364
		exit_regerror(error, re, source);
365
63
	pbstr(string);
366
126
}
367
368
static void
369
do_regexp(const char *string, regex_t *re, const char *source,
370
    const char *replace, regmatch_t *pm)
371
{
372
	int error;
373
374
	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
375
	case 0:
376
		add_replace(string, re, replace, pm);
377
		pbstr(getstring());
378
		break;
379
	case REG_NOMATCH:
380
		break;
381
	default:
382
		exit_regerror(error, re, source);
383
	}
384
}
385
386
static void
387
do_regexpindex(const char *string, regex_t *re, const char *source,
388
    regmatch_t *pm)
389
{
390
	int error;
391
392
	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
393
	case 0:
394
		pbunsigned(pm[0].rm_so);
395
		break;
396
	case REG_NOMATCH:
397
		pbnum(-1);
398
		break;
399
	default:
400
		exit_regerror(error, re, source);
401
	}
402
}
403
404
/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
405
 * says. So we twiddle with the regexp before passing it to regcomp.
406
 */
407
static char *
408
twiddle(const char *p)
409
{
410
	/* + at start of regexp is a normal character for Gnu m4 */
411
72
	if (*p == '^') {
412
9
		addchar(*p);
413
9
		p++;
414
9
	}
415
36
	if (*p == '+') {
416
18
		addchar('\\');
417
18
	}
418
	/* This could use strcspn for speed... */
419
198
	while (*p != '\0') {
420
108
		if (*p == '\\') {
421


54
			switch(p[1]) {
422
			case '(':
423
			case ')':
424
			case '|':
425
36
				addchar(p[1]);
426
36
				break;
427
			case 'w':
428
9
				addconstantstring("[_a-zA-Z0-9]");
429
9
				break;
430
			case 'W':
431
9
				addconstantstring("[^_a-zA-Z0-9]");
432
9
				break;
433
			case '<':
434
				addconstantstring("[[:<:]]");
435
				break;
436
			case '>':
437
				addconstantstring("[[:>:]]");
438
				break;
439
			default:
440
				addchars(p, 2);
441
				break;
442
			}
443
54
			p+=2;
444
54
			continue;
445
		}
446

162
		if (*p == '(' || *p == ')' || *p == '|')
447
			addchar('\\');
448
449
54
		addchar(*p);
450
54
		p++;
451
	}
452
36
	return getstring();
453
}
454
455
/* patsubst(string, regexp, opt replacement) */
456
/* argv[2]: string
457
 * argv[3]: regexp
458
 * argv[4]: opt rep
459
 */
460
void
461
dopatsubst(const char *argv[], int argc)
462
{
463
216
	if (argc <= 3) {
464
36
		m4_warnx("Too few arguments to patsubst");
465
36
		return;
466
	}
467
	/* special case: empty regexp */
468
72
	if (argv[3][0] == '\0') {
469
		const char *s;
470
		size_t len;
471

18
		if (argc > 4 && argv[4])
472
9
			len = strlen(argv[4]);
473
		else
474
			len = 0;
475
234
		for (s = argv[2]; *s != '\0'; s++) {
476
108
			addchars(argv[4], len);
477
108
			addchar(*s);
478
		}
479
9
	} else {
480
		int error;
481
63
		regex_t re;
482
		regmatch_t *pmatch;
483
		int mode = REG_EXTENDED;
484
		const char *source;
485
63
		size_t l = strlen(argv[3]);
486
487

90
		if (!mimic_gnu ||
488
36
		    (argv[3][0] == '^') ||
489
54
		    (l > 0 && argv[3][l-1] == '$'))
490
36
			mode |= REG_NEWLINE;
491
492
162
		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
493
63
		error = regcomp(&re, source, mode);
494
63
		if (error != 0)
495
			exit_regerror(error, &re, source);
496
497
63
		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
498
		    NULL);
499
126
		do_subst(argv[2], &re, source,
500

234
		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
501
63
		free(pmatch);
502
63
		regfree(&re);
503
63
	}
504
72
	pbstr(getstring());
505
171
}
506
507
void
508
doregexp(const char *argv[], int argc)
509
{
510
	int error;
511
	regex_t re;
512
	regmatch_t *pmatch;
513
	const char *source;
514
515
	if (argc <= 3) {
516
		m4_warnx("Too few arguments to regexp");
517
		return;
518
	}
519
	/* special gnu case */
520
	if (argv[3][0] == '\0' && mimic_gnu) {
521
		if (argc == 4 || argv[4] == NULL)
522
			return;
523
		else
524
			pbstr(argv[4]);
525
	}
526
	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
527
	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
528
	if (error != 0)
529
		exit_regerror(error, &re, source);
530
531
	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
532
	if (argc == 4 || argv[4] == NULL)
533
		do_regexpindex(argv[2], &re, source, pmatch);
534
	else
535
		do_regexp(argv[2], &re, source, argv[4], pmatch);
536
	free(pmatch);
537
	regfree(&re);
538
}
539
540
void
541
doformat(const char *argv[], int argc)
542
{
543
18
	const char *format = argv[2];
544
	int pos = 3;
545
	int left_padded;
546
	long width;
547
	size_t l;
548
	const char *thisarg;
549
9
	char temp[2];
550
	long extra;
551
552
135
	while (*format != 0) {
553
117
		if (*format != '%') {
554
72
			addchar(*format++);
555
72
			continue;
556
		}
557
558
		format++;
559
45
		if (*format == '%') {
560
9
			addchar(*format++);
561
9
			continue;
562
		}
563
36
		if (*format == 0) {
564
			addchar('%');
565
			break;
566
		}
567
568
36
		if (*format == '*') {
569
			format++;
570
			if (pos >= argc)
571
				m4errx(1,
572
				    "Format with too many format specifiers.");
573
			width = strtol(argv[pos++], NULL, 10);
574
		} else {
575
36
			width = strtol(format, (char **)&format, 10);
576
		}
577
36
		if (width < 0) {
578
			left_padded = 1;
579
18
			width = -width;
580
18
		} else {
581
			left_padded = 0;
582
		}
583
36
		if (*format == '.') {
584
			format++;
585
			if (*format == '*') {
586
				format++;
587
				if (pos >= argc)
588
					m4errx(1,
589
					    "Format with too many format specifiers.");
590
				extra = strtol(argv[pos++], NULL, 10);
591
			} else {
592
				extra = strtol(format, (char **)&format, 10);
593
			}
594
		} else {
595
			extra = LONG_MAX;
596
		}
597
36
		if (pos >= argc)
598
			m4errx(1, "Format with too many format specifiers.");
599
36
		switch(*format) {
600
		case 's':
601
36
			thisarg = argv[pos++];
602
36
			break;
603
		case 'c':
604
			temp[0] = strtoul(argv[pos++], NULL, 10);
605
			temp[1] = 0;
606
			thisarg = temp;
607
			break;
608
		default:
609
			m4errx(1, "Unsupported format specification: %s.",
610
			    argv[2]);
611
		}
612
36
		format++;
613
36
		l = strlen(thisarg);
614
36
		if (l > extra)
615
			l = extra;
616
36
		if (!left_padded) {
617
180
			while (l < width--)
618
81
				addchar(' ');
619
		}
620
36
		addchars(thisarg, l);
621
36
		if (left_padded) {
622
198
			while (l < width--)
623
90
				addchar(' ');
624
		}
625
	}
626
9
	pbstr(getstring());
627
9
}
628
629
void
630
doesyscmd(const char *cmd)
631
{
632
18
	int p[2];
633
	pid_t cpid;
634
9
	char *argv[4];
635
	int cc;
636
9
	int status;
637
638
	/* Follow gnu m4 documentation: first flush buffers. */
639
9
	fflush(NULL);
640
641
9
	argv[0] = "sh";
642
9
	argv[1] = "-c";
643
9
	argv[2] = (char *)cmd;
644
9
	argv[3] = NULL;
645
646
	/* Just set up standard output, share stderr and stdin with m4 */
647
9
	if (pipe(p) == -1)
648
		err(1, "bad pipe");
649
9
	switch(cpid = fork()) {
650
	case -1:
651
		err(1, "bad fork");
652
		/* NOTREACHED */
653
	case 0:
654
		(void) close(p[0]);
655
		(void) dup2(p[1], 1);
656
		(void) close(p[1]);
657
		execv(_PATH_BSHELL, argv);
658
		exit(1);
659
	default:
660
		/* Read result in two stages, since m4's buffer is
661
		 * pushback-only. */
662
9
		(void) close(p[1]);
663
9
		do {
664
27
			char result[BUFSIZE];
665
27
			cc = read(p[0], result, sizeof result);
666
27
			if (cc > 0)
667
18
				addchars(result, cc);
668

36
		} while (cc > 0 || (cc == -1 && errno == EINTR));
669
670
9
		(void) close(p[0]);
671
18
		while (waitpid(cpid, &status, 0) == -1) {
672
			if (errno != EINTR)
673
				break;
674
		}
675
9
		pbstr(getstring());
676
	}
677
9
}
678
679
void
680
getdivfile(const char *name)
681
{
682
	FILE *f;
683
	int c;
684
685
	f = fopen(name, "r");
686
	if (!f)
687
		return;
688
689
	while ((c = getc(f))!= EOF)
690
		putc(c, active);
691
	(void) fclose(f);
692
}