GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/m4/gnum4.c Lines: 200 310 64.5 %
Date: 2017-11-13 Branches: 112 210 53.3 %

Line Branch Exec Source
1
/* $OpenBSD: gnum4.c,v 1.52 2017/08/21 21:41:13 deraadt Exp $ */
2
3
/*
4
 * Copyright (c) 1999 Marc Espie
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
8
 * are met:
9
 * 1. Redistributions of source code must retain the above copyright
10
 *    notice, this list of conditions and the following disclaimer.
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in the
13
 *    documentation and/or other materials provided with the distribution.
14
 *
15
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
 * SUCH DAMAGE.
26
 */
27
28
/*
29
 * functions needed to support gnu-m4 extensions, including a fake freezing
30
 */
31
32
#include <sys/types.h>
33
#include <sys/wait.h>
34
#include <ctype.h>
35
#include <err.h>
36
#include <paths.h>
37
#include <regex.h>
38
#include <stdarg.h>
39
#include <stddef.h>
40
#include <stdlib.h>
41
#include <stdint.h>
42
#include <stdio.h>
43
#include <string.h>
44
#include <errno.h>
45
#include <unistd.h>
46
#include <limits.h>
47
#include "mdef.h"
48
#include "stdd.h"
49
#include "extern.h"
50
51
52
int mimic_gnu = 0;
53
54
/*
55
 * Support for include path search
56
 * First search in the current directory.
57
 * If not found, and the path is not absolute, include path kicks in.
58
 * First, -I options, in the order found on the command line.
59
 * Then M4PATH env variable
60
 */
61
62
struct path_entry {
63
	char *name;
64
	struct path_entry *next;
65
} *first, *last;
66
67
static struct path_entry *new_path_entry(const char *);
68
static void ensure_m4path(void);
69
static struct input_file *dopath(struct input_file *, const char *);
70
71
static struct path_entry *
72
new_path_entry(const char *dirname)
73
{
74
	struct path_entry *n;
75
76
10
	n = malloc(sizeof(struct path_entry));
77
5
	if (!n)
78
		errx(1, "out of memory");
79
5
	n->name = xstrdup(dirname);
80
5
	n->next = 0;
81
5
	return n;
82
}
83
84
void
85
addtoincludepath(const char *dirname)
86
{
87
	struct path_entry *n;
88
89
10
	n = new_path_entry(dirname);
90
91
5
	if (last) {
92
		last->next = n;
93
		last = n;
94
	}
95
	else
96
5
		last = first = n;
97
5
}
98
99
static void
100
ensure_m4path()
101
{
102
	static int envpathdone = 0;
103
	char *envpath;
104
20
	char *sweep;
105
	char *path;
106
107
10
	if (envpathdone)
108
		return;
109
10
	envpathdone = TRUE;
110
10
	envpath = getenv("M4PATH");
111
10
	if (!envpath)
112
10
		return;
113
	/* for portability: getenv result is read-only */
114
	envpath = xstrdup(envpath);
115
	for (sweep = envpath;
116
	    (path = strsep(&sweep, ":")) != NULL;)
117
	    addtoincludepath(path);
118
	free(envpath);
119
10
}
120
121
static
122
struct input_file *
123
dopath(struct input_file *i, const char *filename)
124
{
125
20
	char path[PATH_MAX];
126
	struct path_entry *pe;
127
	FILE *f;
128
129
20
	for (pe = first; pe; pe = pe->next) {
130
		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
131
		if ((f = fopen(path, "r")) != 0) {
132
			set_input(i, f, path);
133
			return i;
134
		}
135
	}
136
10
	return NULL;
137
10
}
138
139
struct input_file *
140
fopen_trypath(struct input_file *i, const char *filename)
141
{
142
	FILE *f;
143
144
380
	f = fopen(filename, "r");
145
190
	if (f != NULL) {
146
180
		set_input(i, f, filename);
147
180
		return i;
148
	}
149
10
	if (filename[0] == '/')
150
		return NULL;
151
152
10
	ensure_m4path();
153
154
10
	return dopath(i, filename);
155
190
}
156
157
void
158
doindir(const char *argv[], int argc)
159
{
160
	ndptr n;
161
	struct macro_definition *p;
162
163
20
	n = lookup(argv[2]);
164

20
	if (n == NULL || (p = macro_getdef(n)) == NULL)
165
		m4errx(1, "indir: undefined macro %s.", argv[2]);
166
10
	argv[1] = p->defn;
167
168
30
	eval(argv+1, argc-1, p->type, is_traced(n));
169
10
}
170
171
void
172
dobuiltin(const char *argv[], int argc)
173
{
174
	ndptr p;
175
176
	argv[1] = NULL;
177
	p = macro_getbuiltin(argv[2]);
178
	if (p != NULL)
179
		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
180
	else
181
		m4errx(1, "unknown builtin %s.", argv[2]);
182
}
183
184
185
/* We need some temporary buffer space, as pb pushes BACK and substitution
186
 * proceeds forward... */
187
static char *buffer;
188
static size_t bufsize = 0;
189
static size_t current = 0;
190
191
static void addchars(const char *, size_t);
192
static void addchar(int);
193
static char *twiddle(const char *);
194
static char *getstring(void);
195
static void exit_regerror(int, regex_t *, const char *);
196
static void do_subst(const char *, regex_t *, const char *, const char *,
197
    regmatch_t *);
198
static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
199
static void do_regexp(const char *, regex_t *, const char *, const char *,
200
    regmatch_t *);
201
static void add_sub(int, const char *, regex_t *, regmatch_t *);
202
static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
203
#define addconstantstring(s) addchars((s), sizeof(s)-1)
204
205
static void
206
addchars(const char *c, size_t n)
207
{
208
520
	if (n == 0)
209
		return;
210
440
	while (current + n > bufsize) {
211
25
		if (bufsize == 0)
212
			bufsize = 1024;
213
15
		else if (bufsize <= SIZE_MAX/2) {
214
15
			bufsize *= 2;
215
		} else {
216
			errx(1, "size overflow");
217
		}
218
25
		buffer = xrealloc(buffer, bufsize, NULL);
219
	}
220
195
	memcpy(buffer+current, c, n);
221
195
	current += n;
222
455
}
223
224
static void
225
addchar(int c)
226
{
227
1030
	if (current +1 > bufsize) {
228
30
		if (bufsize == 0)
229
			bufsize = 1024;
230
		else
231
15
			bufsize *= 2;
232
15
		buffer = xrealloc(buffer, bufsize, NULL);
233
15
	}
234
515
	buffer[current++] = c;
235
515
}
236
237
static char *
238
getstring(void)
239
{
240
140
	addchar('\0');
241
70
	current = 0;
242
70
	return buffer;
243
}
244
245
246
static void
247
exit_regerror(int er, regex_t *re, const char *source)
248
{
249
	size_t	errlen;
250
	char	*errbuf;
251
252
	errlen = regerror(er, re, NULL, 0);
253
	errbuf = xalloc(errlen,
254
	    "malloc in regerror: %lu", (unsigned long)errlen);
255
	regerror(er, re, errbuf, errlen);
256
	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
257
}
258
259
/* warnx() plus check to see if we need to change exit code or exit.
260
 * -E flag functionality.
261
 */
262
void
263
m4_warnx(const char *fmt, ...)
264
{
265
40
	va_list ap;
266
267
20
	va_start(ap, fmt);
268
20
	vwarnx(fmt, ap);
269
20
	va_end(ap);
270
271
20
	if (fatal_warns)
272
		exit(1);
273
15
	if (error_warns)
274
15
		exit_code = 1;
275
15
}
276
277
static void
278
add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
279
{
280
110
	if (n > re->re_nsub)
281
		m4_warnx("No subexpression %d", n);
282
	/* Subexpressions that did not match are
283
	 * not an error.  */
284

110
	else if (pm[n].rm_so != -1 &&
285
55
	    pm[n].rm_eo != -1) {
286
110
		addchars(string + pm[n].rm_so,
287
55
			pm[n].rm_eo - pm[n].rm_so);
288
55
	}
289
55
}
290
291
/* Add replacement string to the output buffer, recognizing special
292
 * constructs and replacing them with substrings of the original string.
293
 */
294
static void
295
add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
296
{
297
	const char *p;
298
299
755
	for (p = replace; *p != '\0'; p++) {
300
220
		if (*p == '&' && !mimic_gnu) {
301
			add_sub(0, string, re, pm);
302
			continue;
303
		}
304
220
		if (*p == '\\') {
305
65
			if (p[1] == '\\') {
306
10
				addchar(p[1]);
307
10
				p++;
308
10
				continue;
309
			}
310
55
			if (p[1] == '&') {
311
				if (mimic_gnu)
312
					add_sub(0, string, re, pm);
313
				else
314
					addchar(p[1]);
315
				p++;
316
				continue;
317
			}
318
55
			if (isdigit((unsigned char)p[1])) {
319
55
				add_sub(*(++p) - '0', string, re, pm);
320
55
				continue;
321
			}
322
		}
323
155
		addchar(*p);
324
155
	}
325
105
}
326
327
static void
328
do_subst(const char *string, regex_t *re, const char *source,
329
    const char *replace, regmatch_t *pm)
330
{
331
	int error;
332
	int flags = 0;
333
	const char *last_match = NULL;
334
335
330
	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
336
120
		if (pm[0].rm_eo != 0) {
337
100
			if (string[pm[0].rm_eo-1] == '\n')
338
20
				flags = 0;
339
			else
340
				flags = REG_NOTBOL;
341
		}
342
343
		/* NULL length matches are special... We use the `vi-mode'
344
		 * rule: don't allow a NULL-match at the last match
345
		 * position.
346
		 */
347

150
		if (pm[0].rm_so == pm[0].rm_eo &&
348
30
		    string + pm[0].rm_so == last_match) {
349
15
			if (*string == '\0')
350
				return;
351
15
			addchar(*string);
352
15
			if (*string++ == '\n')
353
				flags = 0;
354
			else
355
				flags = REG_NOTBOL;
356
15
			continue;
357
		}
358
105
		last_match = string + pm[0].rm_so;
359
105
		addchars(string, pm[0].rm_so);
360
105
		add_replace(string, re, replace, pm);
361
105
		string += pm[0].rm_eo;
362
	}
363
35
	if (error != REG_NOMATCH)
364
		exit_regerror(error, re, source);
365
35
	pbstr(string);
366
70
}
367
368
static void
369
do_regexp(const char *string, regex_t *re, const char *source,
370
    const char *replace, regmatch_t *pm)
371
{
372
	int error;
373
374
	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
375
	case 0:
376
		add_replace(string, re, replace, pm);
377
		pbstr(getstring());
378
		break;
379
	case REG_NOMATCH:
380
		break;
381
	default:
382
		exit_regerror(error, re, source);
383
	}
384
}
385
386
static void
387
do_regexpindex(const char *string, regex_t *re, const char *source,
388
    regmatch_t *pm)
389
{
390
	int error;
391
392
	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
393
	case 0:
394
		pbunsigned(pm[0].rm_so);
395
		break;
396
	case REG_NOMATCH:
397
		pbnum(-1);
398
		break;
399
	default:
400
		exit_regerror(error, re, source);
401
	}
402
}
403
404
/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
405
 * says. So we twiddle with the regexp before passing it to regcomp.
406
 */
407
static char *
408
twiddle(const char *p)
409
{
410
	/* + at start of regexp is a normal character for Gnu m4 */
411
40
	if (*p == '^') {
412
5
		addchar(*p);
413
5
		p++;
414
5
	}
415
20
	if (*p == '+') {
416
10
		addchar('\\');
417
10
	}
418
	/* This could use strcspn for speed... */
419
130
	while (*p != '\0') {
420
60
		if (*p == '\\') {
421


30
			switch(p[1]) {
422
			case '(':
423
			case ')':
424
			case '|':
425
20
				addchar(p[1]);
426
20
				break;
427
			case 'w':
428
5
				addconstantstring("[_a-zA-Z0-9]");
429
5
				break;
430
			case 'W':
431
5
				addconstantstring("[^_a-zA-Z0-9]");
432
5
				break;
433
			case '<':
434
				addconstantstring("[[:<:]]");
435
				break;
436
			case '>':
437
				addconstantstring("[[:>:]]");
438
				break;
439
			default:
440
				addchars(p, 2);
441
				break;
442
			}
443
30
			p+=2;
444
30
			continue;
445
		}
446

90
		if (*p == '(' || *p == ')' || *p == '|')
447
			addchar('\\');
448
449
30
		addchar(*p);
450
30
		p++;
451
	}
452
20
	return getstring();
453
}
454
455
/* patsubst(string, regexp, opt replacement) */
456
/* argv[2]: string
457
 * argv[3]: regexp
458
 * argv[4]: opt rep
459
 */
460
void
461
dopatsubst(const char *argv[], int argc)
462
{
463
120
	if (argc <= 3) {
464
20
		m4_warnx("Too few arguments to patsubst");
465
20
		return;
466
	}
467
	/* special case: empty regexp */
468
40
	if (argv[3][0] == '\0') {
469
		const char *s;
470
		size_t len;
471

10
		if (argc > 4 && argv[4])
472
5
			len = strlen(argv[4]);
473
		else
474
			len = 0;
475
130
		for (s = argv[2]; *s != '\0'; s++) {
476
60
			addchars(argv[4], len);
477
60
			addchar(*s);
478
		}
479
5
	} else {
480
		int error;
481
35
		regex_t re;
482
		regmatch_t *pmatch;
483
		int mode = REG_EXTENDED;
484
		const char *source;
485
35
		size_t l = strlen(argv[3]);
486
487

50
		if (!mimic_gnu ||
488
20
		    (argv[3][0] == '^') ||
489
30
		    (l > 0 && argv[3][l-1] == '$'))
490
20
			mode |= REG_NEWLINE;
491
492
90
		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
493
35
		error = regcomp(&re, source, mode);
494
35
		if (error != 0)
495
			exit_regerror(error, &re, source);
496
497
35
		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
498
		    NULL);
499
70
		do_subst(argv[2], &re, source,
500

130
		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
501
35
		free(pmatch);
502
35
		regfree(&re);
503
35
	}
504
40
	pbstr(getstring());
505
95
}
506
507
void
508
doregexp(const char *argv[], int argc)
509
{
510
	int error;
511
	regex_t re;
512
	regmatch_t *pmatch;
513
	const char *source;
514
515
	if (argc <= 3) {
516
		m4_warnx("Too few arguments to regexp");
517
		return;
518
	}
519
	/* special gnu case */
520
	if (argv[3][0] == '\0' && mimic_gnu) {
521
		if (argc == 4 || argv[4] == NULL)
522
			return;
523
		else
524
			pbstr(argv[4]);
525
	}
526
	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
527
	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
528
	if (error != 0)
529
		exit_regerror(error, &re, source);
530
531
	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
532
	if (argc == 4 || argv[4] == NULL)
533
		do_regexpindex(argv[2], &re, source, pmatch);
534
	else
535
		do_regexp(argv[2], &re, source, argv[4], pmatch);
536
	free(pmatch);
537
	regfree(&re);
538
}
539
540
void
541
doformat(const char *argv[], int argc)
542
{
543
10
	const char *format = argv[2];
544
	int pos = 3;
545
	int left_padded;
546
	long width;
547
	size_t l;
548
	const char *thisarg;
549
5
	char temp[2];
550
	long extra;
551
552
95
	while (*format != 0) {
553
65
		if (*format != '%') {
554
40
			addchar(*format++);
555
40
			continue;
556
		}
557
558
		format++;
559
25
		if (*format == '%') {
560
5
			addchar(*format++);
561
5
			continue;
562
		}
563
20
		if (*format == 0) {
564
			addchar('%');
565
			break;
566
		}
567
568
20
		if (*format == '*') {
569
			format++;
570
			if (pos >= argc)
571
				m4errx(1,
572
				    "Format with too many format specifiers.");
573
			width = strtol(argv[pos++], NULL, 10);
574
		} else {
575
20
			width = strtol(format, (char **)&format, 10);
576
		}
577
20
		if (width < 0) {
578
			left_padded = 1;
579
10
			width = -width;
580
10
		} else {
581
			left_padded = 0;
582
		}
583
20
		if (*format == '.') {
584
			format++;
585
			if (*format == '*') {
586
				format++;
587
				if (pos >= argc)
588
					m4errx(1,
589
					    "Format with too many format specifiers.");
590
				extra = strtol(argv[pos++], NULL, 10);
591
			} else {
592
				extra = strtol(format, (char **)&format, 10);
593
			}
594
		} else {
595
			extra = LONG_MAX;
596
		}
597
20
		if (pos >= argc)
598
			m4errx(1, "Format with too many format specifiers.");
599
20
		switch(*format) {
600
		case 's':
601
20
			thisarg = argv[pos++];
602
20
			break;
603
		case 'c':
604
			temp[0] = strtoul(argv[pos++], NULL, 10);
605
			temp[1] = 0;
606
			thisarg = temp;
607
			break;
608
		default:
609
			m4errx(1, "Unsupported format specification: %s.",
610
			    argv[2]);
611
		}
612
20
		format++;
613
20
		l = strlen(thisarg);
614
20
		if (l > extra)
615
			l = extra;
616
20
		if (!left_padded) {
617
110
			while (l < width--)
618
45
				addchar(' ');
619
		}
620
20
		addchars(thisarg, l);
621
20
		if (left_padded) {
622
120
			while (l < width--)
623
50
				addchar(' ');
624
		}
625
	}
626
5
	pbstr(getstring());
627
5
}
628
629
void
630
doesyscmd(const char *cmd)
631
{
632
10
	int p[2];
633
	pid_t cpid;
634
5
	char *argv[4];
635
	int cc;
636
5
	int status;
637
638
	/* Follow gnu m4 documentation: first flush buffers. */
639
5
	fflush(NULL);
640
641
5
	argv[0] = "sh";
642
5
	argv[1] = "-c";
643
5
	argv[2] = (char *)cmd;
644
5
	argv[3] = NULL;
645
646
	/* Just set up standard output, share stderr and stdin with m4 */
647
5
	if (pipe(p) == -1)
648
		err(1, "bad pipe");
649
5
	switch(cpid = fork()) {
650
	case -1:
651
		err(1, "bad fork");
652
		/* NOTREACHED */
653
	case 0:
654
		(void) close(p[0]);
655
		(void) dup2(p[1], 1);
656
		(void) close(p[1]);
657
		execv(_PATH_BSHELL, argv);
658
		exit(1);
659
	default:
660
		/* Read result in two stages, since m4's buffer is
661
		 * pushback-only. */
662
5
		(void) close(p[1]);
663
5
		do {
664
15
			char result[BUFSIZE];
665
15
			cc = read(p[0], result, sizeof result);
666
15
			if (cc > 0)
667
10
				addchars(result, cc);
668

20
		} while (cc > 0 || (cc == -1 && errno == EINTR));
669
670
5
		(void) close(p[0]);
671
10
		while (waitpid(cpid, &status, 0) == -1) {
672
			if (errno != EINTR)
673
				break;
674
		}
675
5
		pbstr(getstring());
676
	}
677
5
}
678
679
void
680
getdivfile(const char *name)
681
{
682
	FILE *f;
683
	int c;
684
685
	f = fopen(name, "r");
686
	if (!f)
687
		return;
688
689
	while ((c = getc(f))!= EOF)
690
		putc(c, active);
691
	(void) fclose(f);
692
}