GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/file/magic-load.c Lines: 0 616 0.0 %
Date: 2017-11-13 Branches: 0 1056 0.0 %

Line Branch Exec Source
1
/* $OpenBSD: magic-load.c,v 1.26 2017/07/02 10:58:15 brynet Exp $ */
2
3
/*
4
 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
21
#include <ctype.h>
22
#include <err.h>
23
#include <errno.h>
24
#include <limits.h>
25
#include <regex.h>
26
#include <stdarg.h>
27
#include <stdio.h>
28
#include <stdlib.h>
29
#include <string.h>
30
31
#include "magic.h"
32
#include "xmalloc.h"
33
34
static int
35
magic_odigit(u_char c)
36
{
37
	if (c >= '0' && c <= '7')
38
		return (c - '0');
39
	return (-1);
40
}
41
42
static int
43
magic_xdigit(u_char c)
44
{
45
	if (c >= '0' && c <= '9')
46
		return (c - '0');
47
	if (c >= 'a' && c <= 'f')
48
		return (10 + c - 'a');
49
	if (c >= 'A' && c <= 'F')
50
		return (10 + c - 'A');
51
	return (-1);
52
}
53
54
static void
55
magic_mark_text(struct magic_line *ml, int text)
56
{
57
	do {
58
		ml->text = text;
59
		ml = ml->parent;
60
	} while (ml != NULL);
61
}
62
63
static int
64
magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
65
    const char *p)
66
{
67
	int	error;
68
	char	errbuf[256];
69
70
	error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
71
	if (error != 0) {
72
		regerror(error, re, errbuf, sizeof errbuf);
73
		magic_warn(ml, "bad %s pattern: %s", name, errbuf);
74
		return (-1);
75
	}
76
	return (0);
77
}
78
79
static int
80
magic_set_result(struct magic_line *ml, const char *s)
81
{
82
	const char	*fmt, *endfmt, *cp;
83
	regex_t		*re = NULL;
84
	regmatch_t	 pmatch;
85
	size_t		 fmtlen;
86
87
	while (isspace((u_char)*s))
88
		s++;
89
	if (*s == '\0') {
90
		ml->result = NULL;
91
		return (0);
92
	}
93
	ml->result = xstrdup(s);
94
95
	fmt = NULL;
96
	for (cp = s; *cp != '\0'; cp++) {
97
		if (cp[0] == '%' && cp[1] != '%') {
98
			if (fmt != NULL) {
99
				magic_warn(ml, "multiple formats");
100
				return (-1);
101
			}
102
			fmt = cp;
103
		}
104
	}
105
	if (fmt == NULL)
106
		return (0);
107
	fmt++;
108
109
	for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
110
		if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
111
			break;
112
	}
113
	if (*endfmt == '\0') {
114
		magic_warn(ml, "unterminated format");
115
		return (-1);
116
	}
117
	fmtlen = endfmt + 1 - fmt;
118
	if (fmtlen > 32) {
119
		magic_warn(ml, "format too long");
120
		return (-1);
121
	}
122
123
	if (*endfmt == 's') {
124
		switch (ml->type) {
125
		case MAGIC_TYPE_DATE:
126
		case MAGIC_TYPE_LDATE:
127
		case MAGIC_TYPE_UDATE:
128
		case MAGIC_TYPE_ULDATE:
129
		case MAGIC_TYPE_BEDATE:
130
		case MAGIC_TYPE_BELDATE:
131
		case MAGIC_TYPE_UBEDATE:
132
		case MAGIC_TYPE_UBELDATE:
133
		case MAGIC_TYPE_QDATE:
134
		case MAGIC_TYPE_QLDATE:
135
		case MAGIC_TYPE_UQDATE:
136
		case MAGIC_TYPE_UQLDATE:
137
		case MAGIC_TYPE_BEQDATE:
138
		case MAGIC_TYPE_BEQLDATE:
139
		case MAGIC_TYPE_UBEQDATE:
140
		case MAGIC_TYPE_UBEQLDATE:
141
		case MAGIC_TYPE_LEQDATE:
142
		case MAGIC_TYPE_LEQLDATE:
143
		case MAGIC_TYPE_ULEQDATE:
144
		case MAGIC_TYPE_ULEQLDATE:
145
		case MAGIC_TYPE_LEDATE:
146
		case MAGIC_TYPE_LELDATE:
147
		case MAGIC_TYPE_ULEDATE:
148
		case MAGIC_TYPE_ULELDATE:
149
		case MAGIC_TYPE_MEDATE:
150
		case MAGIC_TYPE_MELDATE:
151
		case MAGIC_TYPE_STRING:
152
		case MAGIC_TYPE_PSTRING:
153
		case MAGIC_TYPE_BESTRING16:
154
		case MAGIC_TYPE_LESTRING16:
155
		case MAGIC_TYPE_REGEX:
156
		case MAGIC_TYPE_SEARCH:
157
			break;
158
		default:
159
			ml->stringify = 1;
160
			break;
161
		}
162
	}
163
164
	if (!ml->root->compiled) {
165
		/*
166
		 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
167
		 * with byte, short, long. We get lucky because our first and
168
		 * only argument ends up in a register. Accept it for now.
169
		 */
170
		if (magic_make_pattern(ml, "short", &ml->root->format_short,
171
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
172
			return (-1);
173
		if (magic_make_pattern(ml, "long", &ml->root->format_long,
174
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
175
			return (-1);
176
		if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
177
		    "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
178
			return (-1);
179
		if (magic_make_pattern(ml, "float", &ml->root->format_float,
180
		    "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
181
			return (-1);
182
		if (magic_make_pattern(ml, "string", &ml->root->format_string,
183
		    "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
184
			return (-1);
185
		ml->root->compiled = 1;
186
	}
187
188
	if (ml->stringify)
189
		re = &ml->root->format_string;
190
	else {
191
		switch (ml->type) {
192
		case MAGIC_TYPE_NONE:
193
		case MAGIC_TYPE_BESTRING16:
194
		case MAGIC_TYPE_LESTRING16:
195
		case MAGIC_TYPE_NAME:
196
		case MAGIC_TYPE_USE:
197
			return (0); /* don't use result */
198
		case MAGIC_TYPE_BYTE:
199
		case MAGIC_TYPE_UBYTE:
200
		case MAGIC_TYPE_SHORT:
201
		case MAGIC_TYPE_USHORT:
202
		case MAGIC_TYPE_BESHORT:
203
		case MAGIC_TYPE_UBESHORT:
204
		case MAGIC_TYPE_LESHORT:
205
		case MAGIC_TYPE_ULESHORT:
206
			re = &ml->root->format_short;
207
			break;
208
		case MAGIC_TYPE_LONG:
209
		case MAGIC_TYPE_ULONG:
210
		case MAGIC_TYPE_BELONG:
211
		case MAGIC_TYPE_UBELONG:
212
		case MAGIC_TYPE_LELONG:
213
		case MAGIC_TYPE_ULELONG:
214
		case MAGIC_TYPE_MELONG:
215
			re = &ml->root->format_long;
216
			break;
217
		case MAGIC_TYPE_QUAD:
218
		case MAGIC_TYPE_UQUAD:
219
		case MAGIC_TYPE_BEQUAD:
220
		case MAGIC_TYPE_UBEQUAD:
221
		case MAGIC_TYPE_LEQUAD:
222
		case MAGIC_TYPE_ULEQUAD:
223
			re = &ml->root->format_quad;
224
			break;
225
		case MAGIC_TYPE_FLOAT:
226
		case MAGIC_TYPE_BEFLOAT:
227
		case MAGIC_TYPE_LEFLOAT:
228
		case MAGIC_TYPE_DOUBLE:
229
		case MAGIC_TYPE_BEDOUBLE:
230
		case MAGIC_TYPE_LEDOUBLE:
231
			re = &ml->root->format_float;
232
			break;
233
		case MAGIC_TYPE_DATE:
234
		case MAGIC_TYPE_LDATE:
235
		case MAGIC_TYPE_UDATE:
236
		case MAGIC_TYPE_ULDATE:
237
		case MAGIC_TYPE_BEDATE:
238
		case MAGIC_TYPE_BELDATE:
239
		case MAGIC_TYPE_UBEDATE:
240
		case MAGIC_TYPE_UBELDATE:
241
		case MAGIC_TYPE_QDATE:
242
		case MAGIC_TYPE_QLDATE:
243
		case MAGIC_TYPE_UQDATE:
244
		case MAGIC_TYPE_UQLDATE:
245
		case MAGIC_TYPE_BEQDATE:
246
		case MAGIC_TYPE_BEQLDATE:
247
		case MAGIC_TYPE_UBEQDATE:
248
		case MAGIC_TYPE_UBEQLDATE:
249
		case MAGIC_TYPE_LEQDATE:
250
		case MAGIC_TYPE_LEQLDATE:
251
		case MAGIC_TYPE_ULEQDATE:
252
		case MAGIC_TYPE_ULEQLDATE:
253
		case MAGIC_TYPE_LEDATE:
254
		case MAGIC_TYPE_LELDATE:
255
		case MAGIC_TYPE_ULEDATE:
256
		case MAGIC_TYPE_ULELDATE:
257
		case MAGIC_TYPE_MEDATE:
258
		case MAGIC_TYPE_MELDATE:
259
		case MAGIC_TYPE_STRING:
260
		case MAGIC_TYPE_PSTRING:
261
		case MAGIC_TYPE_REGEX:
262
		case MAGIC_TYPE_SEARCH:
263
		case MAGIC_TYPE_DEFAULT:
264
		case MAGIC_TYPE_CLEAR:
265
			re = &ml->root->format_string;
266
			break;
267
		}
268
	}
269
270
	pmatch.rm_so = 0;
271
	pmatch.rm_eo = fmtlen;
272
	if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
273
		magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
274
		    (int)fmtlen, fmt);
275
		return (-1);
276
	}
277
278
	return (0);
279
}
280
281
static u_int
282
magic_get_strength(struct magic_line *ml)
283
{
284
	int	n;
285
	size_t	size;
286
287
	if (ml->type == MAGIC_TYPE_NONE)
288
		return (0);
289
290
	if (ml->test_not || ml->test_operator == 'x') {
291
		n = 1;
292
		goto skip;
293
	}
294
295
	n = 2 * MAGIC_STRENGTH_MULTIPLIER;
296
	switch (ml->type) {
297
	case MAGIC_TYPE_NONE:
298
	case MAGIC_TYPE_DEFAULT:
299
		return (0);
300
	case MAGIC_TYPE_CLEAR:
301
	case MAGIC_TYPE_NAME:
302
	case MAGIC_TYPE_USE:
303
		break;
304
	case MAGIC_TYPE_BYTE:
305
	case MAGIC_TYPE_UBYTE:
306
		n += 1 * MAGIC_STRENGTH_MULTIPLIER;
307
		break;
308
	case MAGIC_TYPE_SHORT:
309
	case MAGIC_TYPE_USHORT:
310
	case MAGIC_TYPE_BESHORT:
311
	case MAGIC_TYPE_UBESHORT:
312
	case MAGIC_TYPE_LESHORT:
313
	case MAGIC_TYPE_ULESHORT:
314
		n += 2 * MAGIC_STRENGTH_MULTIPLIER;
315
		break;
316
	case MAGIC_TYPE_LONG:
317
	case MAGIC_TYPE_ULONG:
318
	case MAGIC_TYPE_FLOAT:
319
	case MAGIC_TYPE_DATE:
320
	case MAGIC_TYPE_LDATE:
321
	case MAGIC_TYPE_UDATE:
322
	case MAGIC_TYPE_ULDATE:
323
	case MAGIC_TYPE_BELONG:
324
	case MAGIC_TYPE_UBELONG:
325
	case MAGIC_TYPE_BEFLOAT:
326
	case MAGIC_TYPE_BEDATE:
327
	case MAGIC_TYPE_BELDATE:
328
	case MAGIC_TYPE_UBEDATE:
329
	case MAGIC_TYPE_UBELDATE:
330
		n += 4 * MAGIC_STRENGTH_MULTIPLIER;
331
		break;
332
	case MAGIC_TYPE_QUAD:
333
	case MAGIC_TYPE_UQUAD:
334
	case MAGIC_TYPE_DOUBLE:
335
	case MAGIC_TYPE_QDATE:
336
	case MAGIC_TYPE_QLDATE:
337
	case MAGIC_TYPE_UQDATE:
338
	case MAGIC_TYPE_UQLDATE:
339
	case MAGIC_TYPE_BEQUAD:
340
	case MAGIC_TYPE_UBEQUAD:
341
	case MAGIC_TYPE_BEDOUBLE:
342
	case MAGIC_TYPE_BEQDATE:
343
	case MAGIC_TYPE_BEQLDATE:
344
	case MAGIC_TYPE_UBEQDATE:
345
	case MAGIC_TYPE_UBEQLDATE:
346
	case MAGIC_TYPE_LEQUAD:
347
	case MAGIC_TYPE_ULEQUAD:
348
	case MAGIC_TYPE_LEDOUBLE:
349
	case MAGIC_TYPE_LEQDATE:
350
	case MAGIC_TYPE_LEQLDATE:
351
	case MAGIC_TYPE_ULEQDATE:
352
	case MAGIC_TYPE_ULEQLDATE:
353
	case MAGIC_TYPE_LELONG:
354
	case MAGIC_TYPE_ULELONG:
355
	case MAGIC_TYPE_LEFLOAT:
356
	case MAGIC_TYPE_LEDATE:
357
	case MAGIC_TYPE_LELDATE:
358
	case MAGIC_TYPE_ULEDATE:
359
	case MAGIC_TYPE_ULELDATE:
360
	case MAGIC_TYPE_MELONG:
361
	case MAGIC_TYPE_MEDATE:
362
	case MAGIC_TYPE_MELDATE:
363
		n += 8 * MAGIC_STRENGTH_MULTIPLIER;
364
		break;
365
	case MAGIC_TYPE_STRING:
366
	case MAGIC_TYPE_PSTRING:
367
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
368
		break;
369
	case MAGIC_TYPE_BESTRING16:
370
	case MAGIC_TYPE_LESTRING16:
371
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
372
		break;
373
	case MAGIC_TYPE_REGEX:
374
	case MAGIC_TYPE_SEARCH:
375
		size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
376
		if (size < 1)
377
			size = 1;
378
		n += ml->test_string_size * size;
379
		break;
380
	}
381
	switch (ml->test_operator) {
382
	case '=':
383
		n += MAGIC_STRENGTH_MULTIPLIER;
384
		break;
385
	case '<':
386
	case '>':
387
	case '[':
388
	case ']':
389
		n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
390
		break;
391
	case '^':
392
	case '&':
393
		n -= MAGIC_STRENGTH_MULTIPLIER;
394
		break;
395
	}
396
397
skip:
398
	switch (ml->strength_operator) {
399
	case '+':
400
		n += ml->strength_value;
401
		break;
402
	case '-':
403
		n -= ml->strength_value;
404
		break;
405
	case '*':
406
		n *= ml->strength_value;
407
		break;
408
	case '/':
409
		n /= ml->strength_value;
410
		break;
411
	}
412
	return (n <= 0 ? 1 : n);
413
}
414
415
static int
416
magic_get_string(char **line, char *out, size_t *outlen)
417
{
418
	char	*start, *cp, c;
419
	int	 d0, d1, d2;
420
421
	start = out;
422
	for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
423
		if (*cp != '\\') {
424
			*out++ = *cp;
425
			continue;
426
		}
427
428
		switch (c = *++cp) {
429
		case '\0': /* end of line */
430
			return (-1);
431
		case ' ':
432
			*out++ = ' ';
433
			break;
434
		case '0':
435
		case '1':
436
		case '2':
437
		case '3':
438
		case '4':
439
		case '5':
440
		case '6':
441
		case '7':
442
			d0 = magic_odigit(cp[0]);
443
			if (cp[0] != '\0')
444
				d1 = magic_odigit(cp[1]);
445
			else
446
				d1 = -1;
447
			if (cp[0] != '\0' && cp[1] != '\0')
448
				d2 = magic_odigit(cp[2]);
449
			else
450
				d2 = -1;
451
452
			if (d0 != -1 && d1 != -1 && d2 != -1) {
453
				*out = d2 | (d1 << 3) | (d0 << 6);
454
				cp += 2;
455
			} else if (d0 != -1 && d1 != -1) {
456
				*out = d1 | (d0 << 3);
457
				cp++;
458
			} else if (d0 != -1)
459
				*out = d0;
460
			else
461
				return (-1);
462
			out++;
463
			break;
464
		case 'x':
465
			d0 = magic_xdigit(cp[1]);
466
			if (cp[1] != '\0')
467
				d1 = magic_xdigit(cp[2]);
468
			else
469
				d1 = -1;
470
471
			if (d0 != -1 && d1 != -1) {
472
				*out = d1 | (d0 << 4);
473
				cp += 2;
474
			} else if (d0 != -1) {
475
				*out = d0;
476
				cp++;
477
			} else
478
				return (-1);
479
			out++;
480
481
			break;
482
		case 'a':
483
			*out++ = '\a';
484
			break;
485
		case 'b':
486
			*out++ = '\b';
487
			break;
488
		case 't':
489
			*out++ = '\t';
490
			break;
491
		case 'f':
492
			*out++ = '\f';
493
			break;
494
		case 'n':
495
			*out++ = '\n';
496
			break;
497
		case 'r':
498
			*out++ = '\r';
499
			break;
500
		case '\\':
501
			*out++ = '\\';
502
			break;
503
		case '\'':
504
			*out++ = '\'';
505
			break;
506
		case '\"':
507
			*out++ = '\"';
508
			break;
509
		default:
510
			*out++ = c;
511
			break;
512
		}
513
	}
514
	*out = '\0';
515
	*outlen = out - start;
516
517
	*line = cp;
518
	return (0);
519
}
520
521
static int
522
magic_parse_offset(struct magic_line *ml, char **line)
523
{
524
	char	*copy, *s, *cp, *endptr;
525
526
	while (isspace((u_char)**line))
527
		(*line)++;
528
	copy = s = cp = xmalloc(strlen(*line) + 1);
529
	while (**line != '\0' && !isspace((u_char)**line))
530
		*cp++ = *(*line)++;
531
	*cp = '\0';
532
533
	ml->offset = 0;
534
	ml->offset_relative = 0;
535
536
	ml->indirect_type = ' ';
537
	ml->indirect_relative = 0;
538
	ml->indirect_offset = 0;
539
	ml->indirect_operator = ' ';
540
	ml->indirect_operand = 0;
541
542
	if (*s == '&') {
543
		ml->offset_relative = 1;
544
		s++;
545
	}
546
547
	if (*s != '(') {
548
		endptr = magic_strtoll(s, &ml->offset);
549
		if (endptr == NULL || *endptr != '\0') {
550
			magic_warn(ml, "missing closing bracket");
551
			goto fail;
552
		}
553
		if (ml->offset < 0 && !ml->offset_relative) {
554
			magic_warn(ml, "negative absolute offset");
555
			goto fail;
556
		}
557
		goto done;
558
	}
559
	s++;
560
561
	if (*s == '&') {
562
		ml->indirect_relative = 1;
563
		s++;
564
	}
565
566
	endptr = magic_strtoll(s, &ml->indirect_offset);
567
	if (endptr == NULL) {
568
		magic_warn(ml, "can't parse offset: %s", s);
569
		goto fail;
570
	}
571
	s = endptr;
572
	if (*s == ')')
573
		goto done;
574
575
	if (*s == '.') {
576
		s++;
577
		if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
578
			magic_warn(ml, "unknown offset type: %c", *s);
579
			goto fail;
580
		}
581
		ml->indirect_type = *s;
582
		s++;
583
		if (*s == ')')
584
			goto done;
585
	}
586
587
	if (*s == '\0' || strchr("+-*", *s) == NULL) {
588
		magic_warn(ml, "unknown offset operator: %c", *s);
589
		goto fail;
590
	}
591
	ml->indirect_operator = *s;
592
	s++;
593
	if (*s == ')')
594
		goto done;
595
596
	if (*s == '(') {
597
		s++;
598
		endptr = magic_strtoll(s, &ml->indirect_operand);
599
		if (endptr == NULL || *endptr != ')') {
600
			magic_warn(ml, "missing closing bracket");
601
			goto fail;
602
		}
603
		if (*++endptr != ')') {
604
			magic_warn(ml, "missing closing bracket");
605
			goto fail;
606
		}
607
	} else {
608
		endptr = magic_strtoll(s, &ml->indirect_operand);
609
		if (endptr == NULL || *endptr != ')') {
610
			magic_warn(ml, "missing closing bracket");
611
			goto fail;
612
		}
613
	}
614
615
done:
616
	free(copy);
617
	return (0);
618
619
fail:
620
	free(copy);
621
	return (-1);
622
}
623
624
static int
625
magic_parse_type(struct magic_line *ml, char **line)
626
{
627
	char	*copy, *s, *cp, *endptr;
628
629
	while (isspace((u_char)**line))
630
		(*line)++;
631
	copy = s = cp = xmalloc(strlen(*line) + 1);
632
	while (**line != '\0' && !isspace((u_char)**line))
633
		*cp++ = *(*line)++;
634
	*cp = '\0';
635
636
	ml->type = MAGIC_TYPE_NONE;
637
	ml->type_operator = ' ';
638
	ml->type_operand = 0;
639
640
	if (strcmp(s, "name") == 0) {
641
		ml->type = MAGIC_TYPE_NAME;
642
		ml->type_string = xstrdup(s);
643
		goto done;
644
	}
645
	if (strcmp(s, "use") == 0) {
646
		ml->type = MAGIC_TYPE_USE;
647
		ml->type_string = xstrdup(s);
648
		goto done;
649
	}
650
651
	if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
652
	    strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
653
		if (*s == 'u')
654
			ml->type_string = xstrdup(s + 1);
655
		else
656
			ml->type_string = xstrdup(s);
657
		ml->type = MAGIC_TYPE_STRING;
658
		magic_mark_text(ml, 0);
659
		goto done;
660
	}
661
	if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
662
	    strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
663
		if (*s == 'u')
664
			ml->type_string = xstrdup(s + 1);
665
		else
666
			ml->type_string = xstrdup(s);
667
		ml->type = MAGIC_TYPE_PSTRING;
668
		magic_mark_text(ml, 0);
669
		goto done;
670
	}
671
	if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
672
	    strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
673
		if (*s == 'u')
674
			ml->type_string = xstrdup(s + 1);
675
		else
676
			ml->type_string = xstrdup(s);
677
		ml->type = MAGIC_TYPE_SEARCH;
678
		goto done;
679
	}
680
	if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
681
	    strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
682
		if (*s == 'u')
683
			ml->type_string = xstrdup(s + 1);
684
		else
685
			ml->type_string = xstrdup(s);
686
		ml->type = MAGIC_TYPE_REGEX;
687
		goto done;
688
	}
689
	ml->type_string = xstrdup(s);
690
691
	cp = &s[strcspn(s, "+-&/%*")];
692
	if (*cp != '\0') {
693
		ml->type_operator = *cp;
694
		endptr = magic_strtoull(cp + 1, &ml->type_operand);
695
		if (endptr == NULL || *endptr != '\0') {
696
			magic_warn(ml, "can't parse operand: %s", cp + 1);
697
			goto fail;
698
		}
699
		*cp = '\0';
700
	}
701
702
	if (strcmp(s, "byte") == 0)
703
		ml->type = MAGIC_TYPE_BYTE;
704
	else if (strcmp(s, "short") == 0)
705
		ml->type = MAGIC_TYPE_SHORT;
706
	else if (strcmp(s, "long") == 0)
707
		ml->type = MAGIC_TYPE_LONG;
708
	else if (strcmp(s, "quad") == 0)
709
		ml->type = MAGIC_TYPE_QUAD;
710
	else if (strcmp(s, "ubyte") == 0)
711
		ml->type = MAGIC_TYPE_UBYTE;
712
	else if (strcmp(s, "ushort") == 0)
713
		ml->type = MAGIC_TYPE_USHORT;
714
	else if (strcmp(s, "ulong") == 0)
715
		ml->type = MAGIC_TYPE_ULONG;
716
	else if (strcmp(s, "uquad") == 0)
717
		ml->type = MAGIC_TYPE_UQUAD;
718
	else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
719
		ml->type = MAGIC_TYPE_FLOAT;
720
	else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
721
		ml->type = MAGIC_TYPE_DOUBLE;
722
	else if (strcmp(s, "date") == 0)
723
		ml->type = MAGIC_TYPE_DATE;
724
	else if (strcmp(s, "qdate") == 0)
725
		ml->type = MAGIC_TYPE_QDATE;
726
	else if (strcmp(s, "ldate") == 0)
727
		ml->type = MAGIC_TYPE_LDATE;
728
	else if (strcmp(s, "qldate") == 0)
729
		ml->type = MAGIC_TYPE_QLDATE;
730
	else if (strcmp(s, "udate") == 0)
731
		ml->type = MAGIC_TYPE_UDATE;
732
	else if (strcmp(s, "uqdate") == 0)
733
		ml->type = MAGIC_TYPE_UQDATE;
734
	else if (strcmp(s, "uldate") == 0)
735
		ml->type = MAGIC_TYPE_ULDATE;
736
	else if (strcmp(s, "uqldate") == 0)
737
		ml->type = MAGIC_TYPE_UQLDATE;
738
	else if (strcmp(s, "beshort") == 0)
739
		ml->type = MAGIC_TYPE_BESHORT;
740
	else if (strcmp(s, "belong") == 0)
741
		ml->type = MAGIC_TYPE_BELONG;
742
	else if (strcmp(s, "bequad") == 0)
743
		ml->type = MAGIC_TYPE_BEQUAD;
744
	else if (strcmp(s, "ubeshort") == 0)
745
		ml->type = MAGIC_TYPE_UBESHORT;
746
	else if (strcmp(s, "ubelong") == 0)
747
		ml->type = MAGIC_TYPE_UBELONG;
748
	else if (strcmp(s, "ubequad") == 0)
749
		ml->type = MAGIC_TYPE_UBEQUAD;
750
	else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
751
		ml->type = MAGIC_TYPE_BEFLOAT;
752
	else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
753
		ml->type = MAGIC_TYPE_BEDOUBLE;
754
	else if (strcmp(s, "bedate") == 0)
755
		ml->type = MAGIC_TYPE_BEDATE;
756
	else if (strcmp(s, "beqdate") == 0)
757
		ml->type = MAGIC_TYPE_BEQDATE;
758
	else if (strcmp(s, "beldate") == 0)
759
		ml->type = MAGIC_TYPE_BELDATE;
760
	else if (strcmp(s, "beqldate") == 0)
761
		ml->type = MAGIC_TYPE_BEQLDATE;
762
	else if (strcmp(s, "ubedate") == 0)
763
		ml->type = MAGIC_TYPE_UBEDATE;
764
	else if (strcmp(s, "ubeqdate") == 0)
765
		ml->type = MAGIC_TYPE_UBEQDATE;
766
	else if (strcmp(s, "ubeldate") == 0)
767
		ml->type = MAGIC_TYPE_UBELDATE;
768
	else if (strcmp(s, "ubeqldate") == 0)
769
		ml->type = MAGIC_TYPE_UBEQLDATE;
770
	else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
771
		ml->type = MAGIC_TYPE_BESTRING16;
772
	else if (strcmp(s, "leshort") == 0)
773
		ml->type = MAGIC_TYPE_LESHORT;
774
	else if (strcmp(s, "lelong") == 0)
775
		ml->type = MAGIC_TYPE_LELONG;
776
	else if (strcmp(s, "lequad") == 0)
777
		ml->type = MAGIC_TYPE_LEQUAD;
778
	else if (strcmp(s, "uleshort") == 0)
779
		ml->type = MAGIC_TYPE_ULESHORT;
780
	else if (strcmp(s, "ulelong") == 0)
781
		ml->type = MAGIC_TYPE_ULELONG;
782
	else if (strcmp(s, "ulequad") == 0)
783
		ml->type = MAGIC_TYPE_ULEQUAD;
784
	else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
785
		ml->type = MAGIC_TYPE_LEFLOAT;
786
	else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
787
		ml->type = MAGIC_TYPE_LEDOUBLE;
788
	else if (strcmp(s, "ledate") == 0)
789
		ml->type = MAGIC_TYPE_LEDATE;
790
	else if (strcmp(s, "leqdate") == 0)
791
		ml->type = MAGIC_TYPE_LEQDATE;
792
	else if (strcmp(s, "leldate") == 0)
793
		ml->type = MAGIC_TYPE_LELDATE;
794
	else if (strcmp(s, "leqldate") == 0)
795
		ml->type = MAGIC_TYPE_LEQLDATE;
796
	else if (strcmp(s, "uledate") == 0)
797
		ml->type = MAGIC_TYPE_ULEDATE;
798
	else if (strcmp(s, "uleqdate") == 0)
799
		ml->type = MAGIC_TYPE_ULEQDATE;
800
	else if (strcmp(s, "uleldate") == 0)
801
		ml->type = MAGIC_TYPE_ULELDATE;
802
	else if (strcmp(s, "uleqldate") == 0)
803
		ml->type = MAGIC_TYPE_ULEQLDATE;
804
	else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
805
		ml->type = MAGIC_TYPE_LESTRING16;
806
	else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
807
		ml->type = MAGIC_TYPE_MELONG;
808
	else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
809
		ml->type = MAGIC_TYPE_MEDATE;
810
	else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
811
		ml->type = MAGIC_TYPE_MELDATE;
812
	else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
813
		ml->type = MAGIC_TYPE_DEFAULT;
814
	else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
815
		ml->type = MAGIC_TYPE_CLEAR;
816
	else {
817
		magic_warn(ml, "unknown type: %s", s);
818
		goto fail;
819
	}
820
	magic_mark_text(ml, 0);
821
822
done:
823
	free(copy);
824
	return (0);
825
826
fail:
827
	free(copy);
828
	return (-1);
829
}
830
831
static int
832
magic_parse_value(struct magic_line *ml, char **line)
833
{
834
	char	*copy, *s, *cp, *endptr;
835
	size_t	 slen;
836
	uint64_t u;
837
838
	while (isspace((u_char)**line))
839
		(*line)++;
840
841
	ml->test_operator = '=';
842
	ml->test_not = 0;
843
	ml->test_string = NULL;
844
	ml->test_string_size = 0;
845
	ml->test_unsigned = 0;
846
	ml->test_signed = 0;
847
848
	if (**line == '\0')
849
		return (0);
850
851
	s = *line;
852
	if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
853
		(*line)++;
854
		ml->test_operator = 'x';
855
		return (0);
856
	}
857
858
	if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
859
		magic_warn(ml, "test specified for default or clear");
860
		ml->test_operator = 'x';
861
		return (0);
862
	}
863
864
	if (**line == '!') {
865
		ml->test_not = 1;
866
		(*line)++;
867
	}
868
869
	switch (ml->type) {
870
	case MAGIC_TYPE_NAME:
871
	case MAGIC_TYPE_USE:
872
		copy = s = xmalloc(strlen(*line) + 1);
873
		if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
874
			magic_warn(ml, "can't parse string");
875
			goto fail;
876
		}
877
		if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
878
			magic_warn(ml, "invalid name");
879
			goto fail;
880
		}
881
		ml->name = s;
882
		return (0); /* do not free */
883
	case MAGIC_TYPE_STRING:
884
	case MAGIC_TYPE_PSTRING:
885
	case MAGIC_TYPE_SEARCH:
886
		if (**line == '>' || **line == '<' || **line == '=') {
887
			ml->test_operator = **line;
888
			(*line)++;
889
		}
890
		/* FALLTHROUGH */
891
	case MAGIC_TYPE_REGEX:
892
		if (**line == '=')
893
			(*line)++;
894
		copy = s = xmalloc(strlen(*line) + 1);
895
		if (magic_get_string(line, s, &slen) != 0) {
896
			magic_warn(ml, "can't parse string");
897
			goto fail;
898
		}
899
		ml->test_string_size = slen;
900
		ml->test_string = s;
901
		return (0); /* do not free */
902
	default:
903
		break;
904
	}
905
906
	while (isspace((u_char)**line))
907
		(*line)++;
908
	if ((*line)[0] == '<' && (*line)[1] == '=') {
909
		ml->test_operator = '[';
910
		(*line) += 2;
911
	} else if ((*line)[0] == '>' && (*line)[1] == '=') {
912
		ml->test_operator = ']';
913
		(*line) += 2;
914
	} else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
915
		ml->test_operator = **line;
916
		(*line)++;
917
	}
918
919
	while (isspace((u_char)**line))
920
		(*line)++;
921
	copy = cp = xmalloc(strlen(*line) + 1);
922
	while (**line != '\0' && !isspace((u_char)**line))
923
		*cp++ = *(*line)++;
924
	*cp = '\0';
925
926
	switch (ml->type) {
927
	case MAGIC_TYPE_FLOAT:
928
	case MAGIC_TYPE_DOUBLE:
929
	case MAGIC_TYPE_BEFLOAT:
930
	case MAGIC_TYPE_BEDOUBLE:
931
	case MAGIC_TYPE_LEFLOAT:
932
	case MAGIC_TYPE_LEDOUBLE:
933
		errno = 0;
934
		ml->test_double = strtod(copy, &endptr);
935
		if (errno == ERANGE)
936
			endptr = NULL;
937
		break;
938
	default:
939
		if (*ml->type_string == 'u')
940
			endptr = magic_strtoull(copy, &ml->test_unsigned);
941
		else {
942
			endptr = magic_strtoll(copy, &ml->test_signed);
943
			if (endptr == NULL || *endptr != '\0') {
944
				/*
945
				 * If we can't parse this as a signed number,
946
				 * try as unsigned instead.
947
				 */
948
				endptr = magic_strtoull(copy, &u);
949
				if (endptr != NULL && *endptr == '\0')
950
					ml->test_signed = (int64_t)u;
951
			}
952
		}
953
		break;
954
	}
955
	if (endptr == NULL || *endptr != '\0') {
956
		magic_warn(ml, "can't parse number: %s", copy);
957
		goto fail;
958
	}
959
960
	free(copy);
961
	return (0);
962
963
fail:
964
	free(copy);
965
	return (-1);
966
}
967
968
int
969
magic_compare(struct magic_line *ml1, struct magic_line *ml2)
970
{
971
	if (ml1->strength < ml2->strength)
972
		return (1);
973
	if (ml1->strength > ml2->strength)
974
		return (-1);
975
976
	/*
977
	 * The original file depends on the (undefined!) qsort(3) behaviour
978
	 * when the strength is equal. This is impossible to reproduce with an
979
	 * RB tree so just use the line number and hope for the best.
980
	 */
981
	if (ml1->line < ml2->line)
982
		return (-1);
983
	if (ml1->line > ml2->line)
984
		return (1);
985
986
	return (0);
987
}
988
RB_GENERATE(magic_tree, magic_line, node, magic_compare);
989
990
int
991
magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
992
{
993
	return (strcmp(ml1->name, ml2->name));
994
}
995
RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
996
997
static void
998
magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
999
    char *line)
1000
{
1001
	char	*cp, *s;
1002
	int64_t	 value;
1003
1004
	cp = line + (sizeof "!:strength") - 1;
1005
	while (isspace((u_char)*cp))
1006
		cp++;
1007
	s = cp;
1008
1009
	cp = strchr(s, '#');
1010
	if (cp != NULL)
1011
		*cp = '\0';
1012
	cp = s;
1013
1014
	if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1015
		magic_warnm(m, at, "invalid strength operator: %s", s);
1016
		return;
1017
	}
1018
	ml->strength_operator = *cp++;
1019
1020
	while (isspace((u_char)*cp))
1021
		cp++;
1022
	cp = magic_strtoll(cp, &value);
1023
	while (cp != NULL && isspace((u_char)*cp))
1024
		cp++;
1025
	if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1026
		magic_warnm(m, at, "invalid strength value: %s", s);
1027
		return;
1028
	}
1029
	ml->strength_value = value;
1030
}
1031
1032
static void
1033
magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1034
{
1035
	char	*mimetype, *cp;
1036
1037
	mimetype = line + (sizeof "!:mime") - 1;
1038
	while (isspace((u_char)*mimetype))
1039
		mimetype++;
1040
1041
	cp = strchr(mimetype, '#');
1042
	if (cp != NULL)
1043
		*cp = '\0';
1044
1045
	if (*mimetype != '\0') {
1046
		cp = mimetype + strlen(mimetype) - 1;
1047
		while (cp != mimetype && isspace((u_char)*cp))
1048
			*cp-- = '\0';
1049
	}
1050
1051
	cp = mimetype;
1052
	while (*cp != '\0') {
1053
		if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1054
			break;
1055
		cp++;
1056
	}
1057
	if (*mimetype == '\0' || *cp != '\0') {
1058
		magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1059
		return;
1060
	}
1061
	if (ml == NULL) {
1062
		magic_warnm(m, at, "stray MIME type: %s", mimetype);
1063
		return;
1064
	}
1065
	ml->mimetype = xstrdup(mimetype);
1066
}
1067
1068
struct magic *
1069
magic_load(FILE *f, const char *path, int warnings)
1070
{
1071
	struct magic		*m;
1072
	struct magic_line	*ml = NULL, *parent, *parent0;
1073
	char			*line, *tmp;
1074
	size_t			 size;
1075
	ssize_t			 slen;
1076
	u_int			 at, level, n, i;
1077
1078
	m = xcalloc(1, sizeof *m);
1079
	m->path = xstrdup(path);
1080
	m->warnings = warnings;
1081
	RB_INIT(&m->tree);
1082
1083
	parent = NULL;
1084
	parent0 = NULL;
1085
	level = 0;
1086
1087
	at = 0;
1088
	tmp = NULL;
1089
	size = 0;
1090
	while ((slen = getline(&tmp, &size, f)) != -1) {
1091
		line = tmp;
1092
		if (line[slen - 1] == '\n')
1093
			line[slen - 1] = '\0';
1094
1095
		at++;
1096
1097
		while (isspace((u_char)*line))
1098
		    line++;
1099
		if (*line == '\0' || *line == '#')
1100
			continue;
1101
1102
		if (strncmp (line, "!:mime", 6) == 0) {
1103
			magic_set_mimetype(m, at, ml, line);
1104
			continue;
1105
		}
1106
		if (strncmp (line, "!:strength", 10) == 0) {
1107
			magic_adjust_strength(m, at, ml, line);
1108
			continue;
1109
		}
1110
		if (strncmp (line, "!:", 2) == 0) {
1111
			for (i = 0; i < 64 && line[i] != '\0'; i++) {
1112
				if (isspace((u_char)line[i]))
1113
					break;
1114
			}
1115
			magic_warnm(m, at, "%.*s not supported", i, line);
1116
			continue;
1117
		}
1118
1119
		n = 0;
1120
		for (; *line == '>'; line++)
1121
			n++;
1122
1123
		ml = xcalloc(1, sizeof *ml);
1124
		ml->root = m;
1125
		ml->line = at;
1126
		ml->type = MAGIC_TYPE_NONE;
1127
		TAILQ_INIT(&ml->children);
1128
		ml->text = 1;
1129
1130
		/*
1131
		 * At this point n is the level we want, level is the current
1132
		 * level. parent0 is the last line at the same level and parent
1133
		 * is the last line at the previous level.
1134
		 */
1135
		if (n == level + 1) {
1136
			parent = parent0;
1137
		} else if (n < level) {
1138
			for (i = n; i < level && parent != NULL; i++)
1139
				parent = parent->parent;
1140
		} else if (n != level) {
1141
			magic_warn(ml, "level skipped (%u->%u)", level, n);
1142
			free(ml);
1143
			continue;
1144
		}
1145
		ml->parent = parent;
1146
		level = n;
1147
1148
		if (magic_parse_offset(ml, &line) != 0 ||
1149
		    magic_parse_type(ml, &line) != 0 ||
1150
		    magic_parse_value(ml, &line) != 0 ||
1151
		    magic_set_result(ml, line) != 0) {
1152
			/*
1153
			 * An invalid line still needs to appear in the tree in
1154
			 * case it has any children.
1155
			 */
1156
			ml->type = MAGIC_TYPE_NONE;
1157
		}
1158
1159
		ml->strength = magic_get_strength(ml);
1160
		if (ml->parent == NULL) {
1161
			if (ml->name != NULL)
1162
				RB_INSERT(magic_named_tree, &m->named, ml);
1163
			else
1164
				RB_INSERT(magic_tree, &m->tree, ml);
1165
		} else
1166
			TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1167
		parent0 = ml;
1168
	}
1169
	free(tmp);
1170
	if (ferror(f))
1171
		err(1, "%s", path);
1172
1173
	return (m);
1174
}