GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/hexdump/parse.c Lines: 129 222 58.1 %
Date: 2017-11-07 Branches: 113 219 51.6 %

Line Branch Exec Source
1
/*	$OpenBSD: parse.c,v 1.22 2016/09/04 16:41:43 tb Exp $	*/
2
/*	$NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $	*/
3
4
/*
5
 * Copyright (c) 1989, 1993
6
 *	The Regents of the University of California.  All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 * 3. Neither the name of the University nor the names of its contributors
17
 *    may be used to endorse or promote products derived from this software
18
 *    without specific prior written permission.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
#include <sys/types.h>
34
#include <sys/file.h>
35
36
#include <ctype.h>
37
#include <err.h>
38
#include <errno.h>
39
#include <fcntl.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h>
43
44
#include "hexdump.h"
45
46
FU *endfu;					/* format at end-of-data */
47
48
static __dead void	 badcnt(char *);
49
static __dead void	 badconv(char *);
50
static __dead void	 badfmt(const char *);
51
static __dead void	 badsfmt(void);
52
static void		 escape(char *);
53
54
void
55
addfile(char *name)
56
{
57
	FILE *fp;
58
	size_t len;
59
	char *buf, *lbuf, *p;
60
61
	if ((fp = fopen(name, "r")) == NULL)
62
		err(1, "fopen %s", name);
63
64
	lbuf = NULL;
65
	while ((buf = fgetln(fp, &len))) {
66
		if (buf[len - 1] == '\n')
67
			buf[len - 1] = '\0';
68
		else {
69
			/* EOF without EOL, copy and add the NUL */
70
			if ((lbuf = malloc(len + 1)) == NULL)
71
				err(1, NULL);
72
			memcpy(lbuf, buf, len);
73
			lbuf[len] = '\0';
74
			buf = lbuf;
75
		}
76
		for (p = buf; isspace((unsigned char)*p); ++p);
77
		if (!*p || *p == '#')
78
			continue;
79
		add(p);
80
	}
81
	free(lbuf);
82
	(void)fclose(fp);
83
}
84
85
void
86
add(const char *fmt)
87
{
88
	const char *p;
89
	static FS **nextfs;
90
	FS *tfs;
91
	FU *tfu, **nextfu;
92
	const char *savep;
93
94
	/* start new linked list of format units */
95
29074
	if ((tfs = calloc(1, sizeof(FS))) == NULL)
96
		err(1, NULL);
97
29074
	if (!fshead)
98
		fshead = tfs;
99
	else
100
14537
		*nextfs = tfs;
101
14537
	nextfs = &tfs->nextfs;
102
14537
	nextfu = &tfs->nextfu;
103
104
	/* take the format string and break it up into format units */
105
51469
	for (p = fmt;;) {
106
		/* skip leading white space */
107
156686
		for (; isspace((unsigned char)*p); ++p);
108
51469
		if (!*p)
109
			break;
110
111
		/* allocate a new format unit and link it in */
112
36932
		if ((tfu = calloc(1, sizeof(FU))) == NULL)
113
			err(1, NULL);
114
36932
		*nextfu = tfu;
115
36932
		nextfu = &tfu->nextfu;
116
36932
		tfu->reps = 1;
117
118
		/* if leading digit, repetition count */
119
36932
		if (isdigit((unsigned char)*p)) {
120
62706
			for (savep = p; isdigit((unsigned char)*p); ++p);
121

26874
			if (!isspace((unsigned char)*p) && *p != '/')
122
				badfmt(fmt);
123
			/* may overwrite either white space or slash */
124
13437
			tfu->reps = atoi(savep);
125
13437
			tfu->flags = F_SETREP;
126
			/* skip trailing white space */
127
26874
			for (++p; isspace((unsigned char)*p); ++p);
128
		}
129
130
		/* skip slash and trailing white space */
131
36932
		if (*p == '/')
132
			while (isspace((unsigned char)*++p));
133
134
		/* byte count */
135
36932
		if (isdigit((unsigned char)*p)) {
136
53748
			for (savep = p; isdigit((unsigned char)*p); ++p);
137
13437
			if (!isspace((unsigned char)*p))
138
				badfmt(fmt);
139
13437
			tfu->bcnt = atoi(savep);
140
			/* skip trailing white space */
141
26874
			for (++p; isspace((unsigned char)*p); ++p);
142
		}
143
144
		/* format */
145
36932
		if (*p != '"')
146
			badfmt(fmt);
147
255224
		for (savep = ++p; *p != '"';)
148
181360
			if (*p++ == 0)
149
				badfmt(fmt);
150
36932
		if ((tfu->fmt = strndup(savep, p - savep)) == NULL)
151
			err(1, NULL);
152
36932
		escape(tfu->fmt);
153
36932
		p++;
154
	}
155
14537
}
156
157
static const char *spec = ".#-+ 0123456789";
158
159
int
160
size(FS *fs)
161
{
162
	FU *fu;
163
	int bcnt, cursize;
164
	char *fmt;
165
	int prec;
166
167
	/* figure out the data block size needed for each format unit */
168
117475
	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
169
36932
		if (fu->bcnt) {
170
13437
			cursize += fu->bcnt * fu->reps;
171
13437
			continue;
172
		}
173
174602
		for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
174
63806
			if (*fmt != '%')
175
				continue;
176
			/*
177
			 * skip any special chars -- save precision in
178
			 * case it's a %s format.
179
			 */
180

55948
			while (*++fmt && strchr(spec + 1, *fmt));
181

19016
			if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
182
8958
				prec = atoi(fmt);
183
17916
				while (isdigit((unsigned char)*++fmt));
184
			}
185



11158
			switch(*fmt) {
186
			case 'c':
187
				bcnt += 1;
188
				break;
189
			case 'd': case 'i': case 'o': case 'u':
190
			case 'x': case 'X':
191
1100
				bcnt += 4;
192
1100
				break;
193
			case 'e': case 'E': case 'f': case 'g': case 'G':
194
				bcnt += 8;
195
				break;
196
			case 's':
197
				bcnt += prec;
198
				break;
199
			case '_':
200

62706
				switch(*++fmt) {
201
				case 'c': case 'p': case 'u':
202
					bcnt += 1;
203
					break;
204
				}
205
			}
206
		}
207
23495
		cursize += bcnt * fu->reps;
208
23495
	}
209
14537
	return (cursize);
210
}
211
212
void
213
rewrite(FS *fs)
214
{
215
	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
216
	PR *pr, **nextpr;
217
	FU *fu;
218
	char *p1, *p2;
219
29074
	char savech, *fmtp, cs[4];
220
	int nconv, prec;
221
222
	nextpr = NULL;
223
	prec = 0;
224
102938
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
225
		/*
226
		 * Break each format unit into print units; each conversion
227
		 * character gets its own.
228
		 */
229
120854
		for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
230
54848
			if ((pr = calloc(1, sizeof(PR))) == NULL)
231
				err(1, NULL);
232
54848
			if (!fu->nextpr)
233
				fu->nextpr = pr;
234
			else
235
				*nextpr = pr;
236
237
			/* Skip preceding text and up to the next % sign. */
238

294435
			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
239
240
			/* Only text in the string. */
241
54848
			if (!*p1) {
242
31353
				pr->fmt = fmtp;
243
31353
				pr->flags = F_TEXT;
244
31353
				break;
245
			}
246
247
			/*
248
			 * Get precision for %s -- if have a byte count, don't
249
			 * need it.
250
			 */
251
23495
			if (fu->bcnt) {
252
				sokay = USEBCNT;
253
				/* Skip to conversion character. */
254

94059
				for (++p1; *p1 && strchr(spec, *p1); ++p1);
255
			} else {
256
				/* Skip any special chars, field width. */
257

55948
				while (*++p1 && strchr(spec + 1, *p1));
258

19016
				if (*p1 == '.' &&
259
8958
				    isdigit((unsigned char)*++p1)) {
260
					sokay = USEPREC;
261
8958
					prec = atoi(p1);
262
17916
					while (isdigit((unsigned char)*++p1))
263
						continue;
264
				} else
265
					sokay = NOTOKAY;
266
			}
267
268
23495
			p2 = *p1 ? p1 + 1 : p1;	/* Set end pointer. */
269
23495
			cs[0] = *p1;		/* Set conversion string. */
270
23495
			cs[1] = '\0';
271
272
			/*
273
			 * Figure out the byte count for each conversion;
274
			 * rewrite the format as necessary, set up blank-
275
			 * padding for end of data.
276
			 */
277



23495
			switch(cs[0]) {
278
			case 'c':
279
				pr->flags = F_CHAR;
280
				switch(fu->bcnt) {
281
				case 0: case 1:
282
					pr->bcnt = 1;
283
					break;
284
				default:
285
					p1[1] = '\0';
286
					badcnt(p1);
287
				}
288
				break;
289
			case 'd': case 'i':
290
			case 'o': case 'u': case 'x': case 'X':
291

19016
				if (cs[0] == 'd' || cs[0] == 'i')
292
1100
					pr->flags = F_INT;
293
				else
294
					pr->flags = F_UINT;
295
296
10058
				cs[3] = '\0';
297
10058
				cs[2] = cs[0];
298
10058
				cs[1] = 'l';
299
10058
				cs[0] = 'l';
300

10058
				switch(fu->bcnt) {
301
				case 0: case 4:
302
					pr->bcnt = 4;
303
1100
					break;
304
				case 1:
305
					pr->bcnt = 1;
306
8958
					break;
307
				case 2:
308
					pr->bcnt = 2;
309
					break;
310
				case 8:
311
					pr->bcnt = 8;
312
					break;
313
				default:
314
					p1[1] = '\0';
315
					badcnt(p1);
316
				}
317
10058
				break;
318
			case 'e': case 'E': case 'f': case 'g': case 'G':
319
				pr->flags = F_DBL;
320
				switch(fu->bcnt) {
321
				case 0: case 8:
322
					pr->bcnt = 8;
323
					break;
324
				case 4:
325
					pr->bcnt = 4;
326
					break;
327
				default:
328
					p1[1] = '\0';
329
					badcnt(p1);
330
				}
331
				break;
332
			case 's':
333
				pr->flags = F_STR;
334
				switch(sokay) {
335
				case NOTOKAY:
336
					badsfmt();
337
				case USEBCNT:
338
					pr->bcnt = fu->bcnt;
339
					break;
340
				case USEPREC:
341
					pr->bcnt = prec;
342
					break;
343
				}
344
				break;
345
			case '_':
346
17916
				++p2;
347

17916
				switch(p1[1]) {
348
				case 'A':
349
4479
					endfu = fu;
350
4479
					fu->flags |= F_IGNORE;
351
					/* FALLTHROUGH */
352
				case 'a':
353
8958
					pr->flags = F_ADDRESS;
354
8958
					++p2;
355

8958
					switch(p1[2]) {
356
					case 'd': case 'o': case'x':
357
8958
						cs[0] = 'l';
358
8958
						cs[1] = 'l';
359
8958
						cs[2] = p1[2];
360
8958
						cs[3] = '\0';
361
						break;
362
					default:
363
						if (p1[2])
364
							p1[3] = '\0';
365
						badconv(p1);
366
					}
367
8958
					break;
368
				case 'c':
369
				case 'p':
370
				case 'u':
371
4479
					if (p1[1] == 'c') {
372
						pr->flags = F_C;
373
						/* cs[0] = 'c';	set in conv_c */
374
4479
					} else if (p1[1] == 'p') {
375
4479
						pr->flags = F_P;
376
4479
						cs[0] = 'c';
377
4479
					} else {
378
						pr->flags = F_U;
379
						/* cs[0] = 'c';	set in conv_u */
380
					}
381
382
4479
					switch(fu->bcnt) {
383
					case 0: case 1:
384
4479
						pr->bcnt = 1;
385
						break;
386
					default:
387
						p1[2] = '\0';
388
						badcnt(p1);
389
					}
390
4479
					break;
391
				default:
392
					if (p1[1])
393
						p1[2] = '\0';
394
					badconv(p1);
395
				}
396
				break;
397
			default:
398
				if (cs[0])
399
					p1[1] = '\0';
400
				badconv(p1);
401
			}
402
403
			/*
404
			 * Copy to PR format string, set conversion character
405
			 * pointer, update original.
406
			 */
407
23495
			savech = *p2;
408
23495
			p1[0] = '\0';
409
23495
			if (asprintf(&pr->fmt, "%s%s", fmtp, cs) == -1)
410
				err(1, NULL);
411
23495
			*p2 = savech;
412
23495
			pr->cchar = pr->fmt + (p1 - fmtp);
413
			fmtp = p2;
414
415
			/* Only one conversion character if byte count. */
416

51469
			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
417
				errx(1,
418
			    "byte count with multiple conversion characters");
419
		}
420
		/*
421
		 * If format unit byte count not specified, figure it out
422
		 * so can adjust rep count later.
423
		 */
424
36932
		if (!fu->bcnt)
425
111896
			for (pr = fu->nextpr; pr; pr = pr->nextpr)
426
32453
				fu->bcnt += pr->bcnt;
427
	}
428
	/*
429
	 * If the format string interprets any data at all, and it's
430
	 * not the same as the blocksize, and its last format unit
431
	 * interprets any data at all, and has no iteration count,
432
	 * repeat it as necessary.
433
	 *
434
	 * If, rep count is greater than 1, no trailing whitespace
435
	 * gets output from the last iteration of the format unit.
436
	 */
437
102938
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
438

55948
		if (!fu->nextfu && fs->bcnt < blocksize &&
439
8958
		    !(fu->flags&F_SETREP) && fu->bcnt)
440
			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
441
36932
		if (fu->reps > 1) {
442
13437
			if (!fu->nextpr)
443
				break;
444
22395
			for (pr = fu->nextpr;; pr = pr->nextpr)
445
22395
				if (!pr->nextpr)
446
					break;
447
62706
			for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
448
17916
				p2 = isspace((unsigned char)*p1) ? p1 : NULL;
449
13437
			if (p2)
450
8958
				pr->nospace = p2;
451
		}
452
	}
453
#ifdef DEBUG
454
	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
455
		(void)printf("fmt:");
456
		for (pr = fu->nextpr; pr; pr = pr->nextpr)
457
			(void)printf(" {%s}", pr->fmt);
458
		(void)printf("\n");
459
	}
460
#endif
461
14537
}
462
463
static void
464
escape(char *p1)
465
{
466
	char *p2;
467
468
	/* alphabetic escape sequences have to be done in place */
469
250745
	for (p2 = p1;; ++p1, ++p2) {
470
213813
		if (!*p1) {
471
36932
			*p2 = *p1;
472
			break;
473
		}
474
176881
		if (*p1 == '\\') {
475


4479
			switch(*++p1) {
476
			case '\0':
477
				*p2++ = '\\';
478
				*p2 = '\0';
479
				return;	/* incomplete escape sequence */
480
			case 'a':
481
			     /* *p2 = '\a'; */
482
				*p2 = '\007';
483
				break;
484
			case 'b':
485
				*p2 = '\b';
486
				break;
487
			case 'f':
488
				*p2 = '\f';
489
				break;
490
			case 'n':
491
4479
				*p2 = '\n';
492
4479
				break;
493
			case 'r':
494
				*p2 = '\r';
495
				break;
496
			case 't':
497
				*p2 = '\t';
498
				break;
499
			case 'v':
500
				*p2 = '\v';
501
				break;
502
			default:
503
				*p2 = *p1;
504
				break;
505
			}
506
		} else
507
172402
			*p2 = *p1;
508
	}
509
73864
}
510
511
static __dead void
512
badcnt(char *s)
513
{
514
	errx(1, "%s: bad byte count", s);
515
}
516
517
static __dead void
518
badsfmt(void)
519
{
520
	errx(1, "%%s: requires a precision or a byte count");
521
}
522
523
static __dead void
524
badfmt(const char *fmt)
525
{
526
	errx(1, "\"%s\": bad format", fmt);
527
}
528
529
static __dead void
530
badconv(char *ch)
531
{
532
	errx(1, "%%%s: bad conversion character", ch);
533
}