GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/hexdump/parse.c Lines: 129 224 57.6 %
Date: 2017-11-13 Branches: 113 219 51.6 %

Line Branch Exec Source
1
/*	$OpenBSD: parse.c,v 1.22 2016/09/04 16:41:43 tb Exp $	*/
2
/*	$NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $	*/
3
4
/*
5
 * Copyright (c) 1989, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 * 3. Neither the name of the University nor the names of its contributors
17
 *    may be used to endorse or promote products derived from this software
18
 *    without specific prior written permission.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
#include <sys/types.h>
34
#include <sys/file.h>
35
36
#include <ctype.h>
37
#include <err.h>
38
#include <errno.h>
39
#include <fcntl.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h>
43
44
#include "hexdump.h"
45
46
FU *endfu;					/* format at end-of-data */
47
48
static __dead void	 badcnt(char *);
49
static __dead void	 badconv(char *);
50
static __dead void	 badfmt(const char *);
51
static __dead void	 badsfmt(void);
52
static void		 escape(char *);
53
54
void
55
addfile(char *name)
56
{
57
	FILE *fp;
58
	size_t len;
59
	char *buf, *lbuf, *p;
60
61
	if ((fp = fopen(name, "r")) == NULL)
62
		err(1, "fopen %s", name);
63
64
	lbuf = NULL;
65
	while ((buf = fgetln(fp, &len))) {
66
		if (buf[len - 1] == '\n')
67
			buf[len - 1] = '\0';
68
		else {
69
			/* EOF without EOL, copy and add the NUL */
70
			if ((lbuf = malloc(len + 1)) == NULL)
71
				err(1, NULL);
72
			memcpy(lbuf, buf, len);
73
			lbuf[len] = '\0';
74
			buf = lbuf;
75
		}
76
		for (p = buf; isspace((unsigned char)*p); ++p);
77
		if (!*p || *p == '#')
78
			continue;
79
		add(p);
80
	}
81
	free(lbuf);
82
	(void)fclose(fp);
83
}
84
85
void
86
add(const char *fmt)
87
{
88
	const char *p;
89
	static FS **nextfs;
90
	FS *tfs;
91
	FU *tfu, **nextfu;
92
	const char *savep;
93
94
	/* start new linked list of format units */
95
16354
	if ((tfs = calloc(1, sizeof(FS))) == NULL)
96
		err(1, NULL);
97
16354
	if (!fshead)
98
		fshead = tfs;
99
	else
100
8177
		*nextfs = tfs;
101
8177
	nextfs = &tfs->nextfs;
102
8177
	nextfu = &tfs->nextfu;
103
104
	/* take the format string and break it up into format units */
105
29149
	for (p = fmt;;) {
106
		/* skip leading white space */
107
89006
		for (; isspace((unsigned char)*p); ++p);
108
29149
		if (!*p)
109
			break;
110
111
		/* allocate a new format unit and link it in */
112
20972
		if ((tfu = calloc(1, sizeof(FU))) == NULL)
113
			err(1, NULL);
114
20972
		*nextfu = tfu;
115
20972
		nextfu = &tfu->nextfu;
116
20972
		tfu->reps = 1;
117
118
		/* if leading digit, repetition count */
119
20972
		if (isdigit((unsigned char)*p)) {
120
35826
			for (savep = p; isdigit((unsigned char)*p); ++p);
121

15354
			if (!isspace((unsigned char)*p) && *p != '/')
122
				badfmt(fmt);
123
			/* may overwrite either white space or slash */
124
7677
			tfu->reps = atoi(savep);
125
7677
			tfu->flags = F_SETREP;
126
			/* skip trailing white space */
127
15354
			for (++p; isspace((unsigned char)*p); ++p);
128
		}
129
130
		/* skip slash and trailing white space */
131
20972
		if (*p == '/')
132
			while (isspace((unsigned char)*++p));
133
134
		/* byte count */
135
20972
		if (isdigit((unsigned char)*p)) {
136
30708
			for (savep = p; isdigit((unsigned char)*p); ++p);
137
7677
			if (!isspace((unsigned char)*p))
138
				badfmt(fmt);
139
7677
			tfu->bcnt = atoi(savep);
140
			/* skip trailing white space */
141
15354
			for (++p; isspace((unsigned char)*p); ++p);
142
		}
143
144
		/* format */
145
20972
		if (*p != '"')
146
			badfmt(fmt);
147
248664
		for (savep = ++p; *p != '"';)
148
103360
			if (*p++ == 0)
149
				badfmt(fmt);
150
20972
		if ((tfu->fmt = strndup(savep, p - savep)) == NULL)
151
			err(1, NULL);
152
20972
		escape(tfu->fmt);
153
20972
		p++;
154
	}
155
8177
}
156
157
static const char *spec = ".#-+ 0123456789";
158
159
int
160
size(FS *fs)
161
{
162
	FU *fu;
163
	int bcnt, cursize;
164
	char *fmt;
165
	int prec;
166
167
	/* figure out the data block size needed for each format unit */
168
66475
	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
169
20972
		if (fu->bcnt) {
170
7677
			cursize += fu->bcnt * fu->reps;
171
7677
			continue;
172
		}
173
99242
		for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
174
36326
			if (*fmt != '%')
175
				continue;
176
			/*
177
			 * skip any special chars -- save precision in
178
			 * case it's a %s format.
179
			 */
180

47562
			while (*++fmt && strchr(spec + 1, *fmt));
181

10736
			if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
182
5118
				prec = atoi(fmt);
183
10236
				while (isdigit((unsigned char)*++fmt));
184
			}
185



6118
			switch(*fmt) {
186
			case 'c':
187
				bcnt += 1;
188
				break;
189
			case 'd': case 'i': case 'o': case 'u':
190
			case 'x': case 'X':
191
500
				bcnt += 4;
192
500
				break;
193
			case 'e': case 'E': case 'f': case 'g': case 'G':
194
				bcnt += 8;
195
				break;
196
			case 's':
197
				bcnt += prec;
198
				break;
199
			case '_':
200

35826
				switch(*++fmt) {
201
				case 'c': case 'p': case 'u':
202
					bcnt += 1;
203
					break;
204
				}
205
			}
206
		}
207
13295
		cursize += bcnt * fu->reps;
208
13295
	}
209
8177
	return (cursize);
210
}
211
212
void
213
rewrite(FS *fs)
214
{
215
	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
216
	PR *pr, **nextpr;
217
	FU *fu;
218
	char *p1, *p2;
219
16354
	char savech, *fmtp, cs[4];
220
	int nconv, prec;
221
222
	nextpr = NULL;
223
	prec = 0;
224
58298
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
225
		/*
226
		 * Break each format unit into print units; each conversion
227
		 * character gets its own.
228
		 */
229
68534
		for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
230
31208
			if ((pr = calloc(1, sizeof(PR))) == NULL)
231
				err(1, NULL);
232
31208
			if (!fu->nextpr)
233
				fu->nextpr = pr;
234
			else
235
				*nextpr = pr;
236
237
			/* Skip preceding text and up to the next % sign. */
238

167835
			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
239
240
			/* Only text in the string. */
241
31208
			if (!*p1) {
242
17913
				pr->fmt = fmtp;
243
17913
				pr->flags = F_TEXT;
244
17913
				break;
245
			}
246
247
			/*
248
			 * Get precision for %s -- if have a byte count, don't
249
			 * need it.
250
			 */
251
13295
			if (fu->bcnt) {
252
				sokay = USEBCNT;
253
				/* Skip to conversion character. */
254

53739
				for (++p1; *p1 && strchr(spec, *p1); ++p1);
255
			} else {
256
				/* Skip any special chars, field width. */
257

47562
				while (*++p1 && strchr(spec + 1, *p1));
258

10736
				if (*p1 == '.' &&
259
5118
				    isdigit((unsigned char)*++p1)) {
260
					sokay = USEPREC;
261
5118
					prec = atoi(p1);
262
10236
					while (isdigit((unsigned char)*++p1))
263
						continue;
264
				} else
265
					sokay = NOTOKAY;
266
			}
267
268
13295
			p2 = *p1 ? p1 + 1 : p1;	/* Set end pointer. */
269
13295
			cs[0] = *p1;		/* Set conversion string. */
270
13295
			cs[1] = '\0';
271
272
			/*
273
			 * Figure out the byte count for each conversion;
274
			 * rewrite the format as necessary, set up blank-
275
			 * padding for end of data.
276
			 */
277



13295
			switch(cs[0]) {
278
			case 'c':
279
				pr->flags = F_CHAR;
280
				switch(fu->bcnt) {
281
				case 0: case 1:
282
					pr->bcnt = 1;
283
					break;
284
				default:
285
					p1[1] = '\0';
286
					badcnt(p1);
287
				}
288
				break;
289
			case 'd': case 'i':
290
			case 'o': case 'u': case 'x': case 'X':
291

10736
				if (cs[0] == 'd' || cs[0] == 'i')
292
500
					pr->flags = F_INT;
293
				else
294
					pr->flags = F_UINT;
295
296
5618
				cs[3] = '\0';
297
5618
				cs[2] = cs[0];
298
5618
				cs[1] = 'l';
299
5618
				cs[0] = 'l';
300

5618
				switch(fu->bcnt) {
301
				case 0: case 4:
302
					pr->bcnt = 4;
303
500
					break;
304
				case 1:
305
					pr->bcnt = 1;
306
5118
					break;
307
				case 2:
308
					pr->bcnt = 2;
309
					break;
310
				case 8:
311
					pr->bcnt = 8;
312
					break;
313
				default:
314
					p1[1] = '\0';
315
					badcnt(p1);
316
				}
317
5618
				break;
318
			case 'e': case 'E': case 'f': case 'g': case 'G':
319
				pr->flags = F_DBL;
320
				switch(fu->bcnt) {
321
				case 0: case 8:
322
					pr->bcnt = 8;
323
					break;
324
				case 4:
325
					pr->bcnt = 4;
326
					break;
327
				default:
328
					p1[1] = '\0';
329
					badcnt(p1);
330
				}
331
				break;
332
			case 's':
333
				pr->flags = F_STR;
334
				switch(sokay) {
335
				case NOTOKAY:
336
					badsfmt();
337
				case USEBCNT:
338
					pr->bcnt = fu->bcnt;
339
					break;
340
				case USEPREC:
341
					pr->bcnt = prec;
342
					break;
343
				}
344
				break;
345
			case '_':
346
10236
				++p2;
347

10236
				switch(p1[1]) {
348
				case 'A':
349
2559
					endfu = fu;
350
2559
					fu->flags |= F_IGNORE;
351
					/* FALLTHROUGH */
352
				case 'a':
353
5118
					pr->flags = F_ADDRESS;
354
5118
					++p2;
355

5118
					switch(p1[2]) {
356
					case 'd': case 'o': case'x':
357
5118
						cs[0] = 'l';
358
5118
						cs[1] = 'l';
359
5118
						cs[2] = p1[2];
360
5118
						cs[3] = '\0';
361
						break;
362
					default:
363
						if (p1[2])
364
							p1[3] = '\0';
365
						badconv(p1);
366
					}
367
5118
					break;
368
				case 'c':
369
				case 'p':
370
				case 'u':
371
2559
					if (p1[1] == 'c') {
372
						pr->flags = F_C;
373
						/* cs[0] = 'c';	set in conv_c */
374
2559
					} else if (p1[1] == 'p') {
375
2559
						pr->flags = F_P;
376
2559
						cs[0] = 'c';
377
2559
					} else {
378
						pr->flags = F_U;
379
						/* cs[0] = 'c';	set in conv_u */
380
					}
381
382
2559
					switch(fu->bcnt) {
383
					case 0: case 1:
384
2559
						pr->bcnt = 1;
385
						break;
386
					default:
387
						p1[2] = '\0';
388
						badcnt(p1);
389
					}
390
2559
					break;
391
				default:
392
					if (p1[1])
393
						p1[2] = '\0';
394
					badconv(p1);
395
				}
396
				break;
397
			default:
398
				if (cs[0])
399
					p1[1] = '\0';
400
				badconv(p1);
401
			}
402
403
			/*
404
			 * Copy to PR format string, set conversion character
405
			 * pointer, update original.
406
			 */
407
13295
			savech = *p2;
408
13295
			p1[0] = '\0';
409
13295
			if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1)
410
				err(1, NULL);
411
13295
			*p2 = savech;
412
13295
			pr->cchar = pr->fmt + (p1 - fmtp);
413
			fmtp = p2;
414
415
			/* Only one conversion character if byte count. */
416

29149
			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
417
				errx(1,
418
			    "byte count with multiple conversion characters");
419
		}
420
		/*
421
		 * If format unit byte count not specified, figure it out
422
		 * so can adjust rep count later.
423
		 */
424
20972
		if (!fu->bcnt)
425
63416
			for (pr = fu->nextpr; pr; pr = pr->nextpr)
426
18413
				fu->bcnt += pr->bcnt;
427
	}
428
	/*
429
	 * If the format string interprets any data at all, and it's
430
	 * not the same as the blocksize, and its last format unit
431
	 * interprets any data at all, and has no iteration count,
432
	 * repeat it as necessary.
433
	 *
434
	 * If, rep count is greater than 1, no trailing whitespace
435
	 * gets output from the last iteration of the format unit.
436
	 */
437
58298
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
438

31708
		if (!fu->nextfu && fs->bcnt < blocksize &&
439
5118
		    !(fu->flags&F_SETREP) && fu->bcnt)
440
			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
441
20972
		if (fu->reps > 1) {
442
7677
			if (!fu->nextpr)
443
				break;
444
12795
			for (pr = fu->nextpr;; pr = pr->nextpr)
445
12795
				if (!pr->nextpr)
446
					break;
447
35826
			for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
448
10236
				p2 = isspace((unsigned char)*p1) ? p1 : NULL;
449
7677
			if (p2)
450
5118
				pr->nospace = p2;
451
		}
452
	}
453
#ifdef DEBUG
454
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
455
		(void)printf("fmt:");
456
		for (pr = fu->nextpr; pr; pr = pr->nextpr)
457
			(void)printf(" {%s}", pr->fmt);
458
		(void)printf("\n");
459
	}
460
#endif
461
8177
}
462
463
static void
464
escape(char *p1)
465
{
466
	char *p2;
467
468
	/* alphabetic escape sequences have to be done in place */
469
142745
	for (p2 = p1;; ++p1, ++p2) {
470
121773
		if (!*p1) {
471
20972
			*p2 = *p1;
472
			break;
473
		}
474
100801
		if (*p1 == '\\') {
475


2559
			switch(*++p1) {
476
			case '\0':
477
				*p2++ = '\\';
478
				*p2 = '\0';
479
				return;	/* incomplete escape sequence */
480
			case 'a':
481
			     /* *p2 = '\a'; */
482
				*p2 = '\007';
483
				break;
484
			case 'b':
485
				*p2 = '\b';
486
				break;
487
			case 'f':
488
				*p2 = '\f';
489
				break;
490
			case 'n':
491
2559
				*p2 = '\n';
492
2559
				break;
493
			case 'r':
494
				*p2 = '\r';
495
				break;
496
			case 't':
497
				*p2 = '\t';
498
				break;
499
			case 'v':
500
				*p2 = '\v';
501
				break;
502
			default:
503
				*p2 = *p1;
504
				break;
505
			}
506
		} else
507
98242
			*p2 = *p1;
508
	}
509
41944
}
510
511
static __dead void
512
badcnt(char *s)
513
{
514
	errx(1, "%s: bad byte count", s);
515
}
516
517
static __dead void
518
badsfmt(void)
519
{
520
	errx(1, "%%s: requires a precision or a byte count");
521
}
522
523
static __dead void
524
badfmt(const char *fmt)
525
{
526
	errx(1, "\"%s\": bad format", fmt);
527
}
528
529
static __dead void
530
badconv(char *ch)
531
{
532
	errx(1, "%%%s: bad conversion character", ch);
533
}