GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/file/magic-load.c Lines: 0 630 0.0 %
Date: 2016-12-06 Branches: 0 937 0.0 %

Line Branch Exec Source
1
/* $OpenBSD: magic-load.c,v 1.23 2016/05/01 14:57:15 nicm Exp $ */
2
3
/*
4
 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
21
#include <ctype.h>
22
#include <errno.h>
23
#include <limits.h>
24
#include <regex.h>
25
#include <stdarg.h>
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <string.h>
29
30
#include "magic.h"
31
#include "xmalloc.h"
32
33
static int
34
magic_odigit(u_char c)
35
{
36
	if (c >= '0' && c <= '7')
37
		return (c - '0');
38
	return (-1);
39
}
40
41
static int
42
magic_xdigit(u_char c)
43
{
44
	if (c >= '0' && c <= '9')
45
		return (c - '0');
46
	if (c >= 'a' && c <= 'f')
47
		return (10 + c - 'a');
48
	if (c >= 'A' && c <= 'F')
49
		return (10 + c - 'A');
50
	return (-1);
51
}
52
53
static void
54
magic_mark_text(struct magic_line *ml, int text)
55
{
56
	do {
57
		ml->text = text;
58
		ml = ml->parent;
59
	} while (ml != NULL);
60
}
61
62
static int
63
magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64
    const char *p)
65
{
66
	int	error;
67
	char	errbuf[256];
68
69
	error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70
	if (error != 0) {
71
		regerror(error, re, errbuf, sizeof errbuf);
72
		magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73
		return (-1);
74
	}
75
	return (0);
76
}
77
78
static int
79
magic_set_result(struct magic_line *ml, const char *s)
80
{
81
	const char	*fmt;
82
	const char	*endfmt;
83
	const char	*cp;
84
	regex_t		*re = NULL;
85
	regmatch_t	 pmatch;
86
	size_t		 fmtlen;
87
88
	while (isspace((u_char)*s))
89
		s++;
90
	if (*s == '\0') {
91
		ml->result = NULL;
92
		return (0);
93
	}
94
	ml->result = xstrdup(s);
95
96
	fmt = NULL;
97
	for (cp = s; *cp != '\0'; cp++) {
98
		if (cp[0] == '%' && cp[1] != '%') {
99
			if (fmt != NULL) {
100
				magic_warn(ml, "multiple formats");
101
				return (-1);
102
			}
103
			fmt = cp;
104
		}
105
	}
106
	if (fmt == NULL)
107
		return (0);
108
	fmt++;
109
110
	for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111
		if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112
			break;
113
	}
114
	if (*endfmt == '\0') {
115
		magic_warn(ml, "unterminated format");
116
		return (-1);
117
	}
118
	fmtlen = endfmt + 1 - fmt;
119
	if (fmtlen > 32) {
120
		magic_warn(ml, "format too long");
121
		return (-1);
122
	}
123
124
	if (*endfmt == 's') {
125
		switch (ml->type) {
126
		case MAGIC_TYPE_DATE:
127
		case MAGIC_TYPE_LDATE:
128
		case MAGIC_TYPE_UDATE:
129
		case MAGIC_TYPE_ULDATE:
130
		case MAGIC_TYPE_BEDATE:
131
		case MAGIC_TYPE_BELDATE:
132
		case MAGIC_TYPE_UBEDATE:
133
		case MAGIC_TYPE_UBELDATE:
134
		case MAGIC_TYPE_QDATE:
135
		case MAGIC_TYPE_QLDATE:
136
		case MAGIC_TYPE_UQDATE:
137
		case MAGIC_TYPE_UQLDATE:
138
		case MAGIC_TYPE_BEQDATE:
139
		case MAGIC_TYPE_BEQLDATE:
140
		case MAGIC_TYPE_UBEQDATE:
141
		case MAGIC_TYPE_UBEQLDATE:
142
		case MAGIC_TYPE_LEQDATE:
143
		case MAGIC_TYPE_LEQLDATE:
144
		case MAGIC_TYPE_ULEQDATE:
145
		case MAGIC_TYPE_ULEQLDATE:
146
		case MAGIC_TYPE_LEDATE:
147
		case MAGIC_TYPE_LELDATE:
148
		case MAGIC_TYPE_ULEDATE:
149
		case MAGIC_TYPE_ULELDATE:
150
		case MAGIC_TYPE_MEDATE:
151
		case MAGIC_TYPE_MELDATE:
152
		case MAGIC_TYPE_STRING:
153
		case MAGIC_TYPE_PSTRING:
154
		case MAGIC_TYPE_BESTRING16:
155
		case MAGIC_TYPE_LESTRING16:
156
		case MAGIC_TYPE_REGEX:
157
		case MAGIC_TYPE_SEARCH:
158
			break;
159
		default:
160
			ml->stringify = 1;
161
			break;
162
		}
163
	}
164
165
	if (!ml->root->compiled) {
166
		/*
167
		 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168
		 * with byte, short, long. We get lucky because our first and
169
		 * only argument ends up in a register. Accept it for now.
170
		 */
171
		if (magic_make_pattern(ml, "short", &ml->root->format_short,
172
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173
			return (-1);
174
		if (magic_make_pattern(ml, "long", &ml->root->format_long,
175
		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176
			return (-1);
177
		if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178
		    "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179
			return (-1);
180
		if (magic_make_pattern(ml, "float", &ml->root->format_float,
181
		    "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182
			return (-1);
183
		if (magic_make_pattern(ml, "string", &ml->root->format_string,
184
		    "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185
			return (-1);
186
		ml->root->compiled = 1;
187
	}
188
189
	if (ml->stringify)
190
		re = &ml->root->format_string;
191
	else {
192
		switch (ml->type) {
193
		case MAGIC_TYPE_NONE:
194
		case MAGIC_TYPE_BESTRING16:
195
		case MAGIC_TYPE_LESTRING16:
196
		case MAGIC_TYPE_NAME:
197
		case MAGIC_TYPE_USE:
198
			return (0); /* don't use result */
199
		case MAGIC_TYPE_BYTE:
200
		case MAGIC_TYPE_UBYTE:
201
		case MAGIC_TYPE_SHORT:
202
		case MAGIC_TYPE_USHORT:
203
		case MAGIC_TYPE_BESHORT:
204
		case MAGIC_TYPE_UBESHORT:
205
		case MAGIC_TYPE_LESHORT:
206
		case MAGIC_TYPE_ULESHORT:
207
			re = &ml->root->format_short;
208
			break;
209
		case MAGIC_TYPE_LONG:
210
		case MAGIC_TYPE_ULONG:
211
		case MAGIC_TYPE_BELONG:
212
		case MAGIC_TYPE_UBELONG:
213
		case MAGIC_TYPE_LELONG:
214
		case MAGIC_TYPE_ULELONG:
215
		case MAGIC_TYPE_MELONG:
216
			re = &ml->root->format_long;
217
			break;
218
		case MAGIC_TYPE_QUAD:
219
		case MAGIC_TYPE_UQUAD:
220
		case MAGIC_TYPE_BEQUAD:
221
		case MAGIC_TYPE_UBEQUAD:
222
		case MAGIC_TYPE_LEQUAD:
223
		case MAGIC_TYPE_ULEQUAD:
224
			re = &ml->root->format_quad;
225
			break;
226
		case MAGIC_TYPE_FLOAT:
227
		case MAGIC_TYPE_BEFLOAT:
228
		case MAGIC_TYPE_LEFLOAT:
229
		case MAGIC_TYPE_DOUBLE:
230
		case MAGIC_TYPE_BEDOUBLE:
231
		case MAGIC_TYPE_LEDOUBLE:
232
			re = &ml->root->format_float;
233
			break;
234
		case MAGIC_TYPE_DATE:
235
		case MAGIC_TYPE_LDATE:
236
		case MAGIC_TYPE_UDATE:
237
		case MAGIC_TYPE_ULDATE:
238
		case MAGIC_TYPE_BEDATE:
239
		case MAGIC_TYPE_BELDATE:
240
		case MAGIC_TYPE_UBEDATE:
241
		case MAGIC_TYPE_UBELDATE:
242
		case MAGIC_TYPE_QDATE:
243
		case MAGIC_TYPE_QLDATE:
244
		case MAGIC_TYPE_UQDATE:
245
		case MAGIC_TYPE_UQLDATE:
246
		case MAGIC_TYPE_BEQDATE:
247
		case MAGIC_TYPE_BEQLDATE:
248
		case MAGIC_TYPE_UBEQDATE:
249
		case MAGIC_TYPE_UBEQLDATE:
250
		case MAGIC_TYPE_LEQDATE:
251
		case MAGIC_TYPE_LEQLDATE:
252
		case MAGIC_TYPE_ULEQDATE:
253
		case MAGIC_TYPE_ULEQLDATE:
254
		case MAGIC_TYPE_LEDATE:
255
		case MAGIC_TYPE_LELDATE:
256
		case MAGIC_TYPE_ULEDATE:
257
		case MAGIC_TYPE_ULELDATE:
258
		case MAGIC_TYPE_MEDATE:
259
		case MAGIC_TYPE_MELDATE:
260
		case MAGIC_TYPE_STRING:
261
		case MAGIC_TYPE_PSTRING:
262
		case MAGIC_TYPE_REGEX:
263
		case MAGIC_TYPE_SEARCH:
264
		case MAGIC_TYPE_DEFAULT:
265
		case MAGIC_TYPE_CLEAR:
266
			re = &ml->root->format_string;
267
			break;
268
		}
269
	}
270
271
	pmatch.rm_so = 0;
272
	pmatch.rm_eo = fmtlen;
273
	if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
274
		magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
275
		    (int)fmtlen, fmt);
276
		return (-1);
277
	}
278
279
	return (0);
280
}
281
282
static u_int
283
magic_get_strength(struct magic_line *ml)
284
{
285
	int	n;
286
	size_t	size;
287
288
	if (ml->type == MAGIC_TYPE_NONE)
289
		return (0);
290
291
	if (ml->test_not || ml->test_operator == 'x') {
292
		n = 1;
293
		goto skip;
294
	}
295
296
	n = 2 * MAGIC_STRENGTH_MULTIPLIER;
297
	switch (ml->type) {
298
	case MAGIC_TYPE_NONE:
299
	case MAGIC_TYPE_DEFAULT:
300
		return (0);
301
	case MAGIC_TYPE_CLEAR:
302
	case MAGIC_TYPE_NAME:
303
	case MAGIC_TYPE_USE:
304
		break;
305
	case MAGIC_TYPE_BYTE:
306
	case MAGIC_TYPE_UBYTE:
307
		n += 1 * MAGIC_STRENGTH_MULTIPLIER;
308
		break;
309
	case MAGIC_TYPE_SHORT:
310
	case MAGIC_TYPE_USHORT:
311
	case MAGIC_TYPE_BESHORT:
312
	case MAGIC_TYPE_UBESHORT:
313
	case MAGIC_TYPE_LESHORT:
314
	case MAGIC_TYPE_ULESHORT:
315
		n += 2 * MAGIC_STRENGTH_MULTIPLIER;
316
		break;
317
	case MAGIC_TYPE_LONG:
318
	case MAGIC_TYPE_ULONG:
319
	case MAGIC_TYPE_FLOAT:
320
	case MAGIC_TYPE_DATE:
321
	case MAGIC_TYPE_LDATE:
322
	case MAGIC_TYPE_UDATE:
323
	case MAGIC_TYPE_ULDATE:
324
	case MAGIC_TYPE_BELONG:
325
	case MAGIC_TYPE_UBELONG:
326
	case MAGIC_TYPE_BEFLOAT:
327
	case MAGIC_TYPE_BEDATE:
328
	case MAGIC_TYPE_BELDATE:
329
	case MAGIC_TYPE_UBEDATE:
330
	case MAGIC_TYPE_UBELDATE:
331
		n += 4 * MAGIC_STRENGTH_MULTIPLIER;
332
		break;
333
	case MAGIC_TYPE_QUAD:
334
	case MAGIC_TYPE_UQUAD:
335
	case MAGIC_TYPE_DOUBLE:
336
	case MAGIC_TYPE_QDATE:
337
	case MAGIC_TYPE_QLDATE:
338
	case MAGIC_TYPE_UQDATE:
339
	case MAGIC_TYPE_UQLDATE:
340
	case MAGIC_TYPE_BEQUAD:
341
	case MAGIC_TYPE_UBEQUAD:
342
	case MAGIC_TYPE_BEDOUBLE:
343
	case MAGIC_TYPE_BEQDATE:
344
	case MAGIC_TYPE_BEQLDATE:
345
	case MAGIC_TYPE_UBEQDATE:
346
	case MAGIC_TYPE_UBEQLDATE:
347
	case MAGIC_TYPE_LEQUAD:
348
	case MAGIC_TYPE_ULEQUAD:
349
	case MAGIC_TYPE_LEDOUBLE:
350
	case MAGIC_TYPE_LEQDATE:
351
	case MAGIC_TYPE_LEQLDATE:
352
	case MAGIC_TYPE_ULEQDATE:
353
	case MAGIC_TYPE_ULEQLDATE:
354
	case MAGIC_TYPE_LELONG:
355
	case MAGIC_TYPE_ULELONG:
356
	case MAGIC_TYPE_LEFLOAT:
357
	case MAGIC_TYPE_LEDATE:
358
	case MAGIC_TYPE_LELDATE:
359
	case MAGIC_TYPE_ULEDATE:
360
	case MAGIC_TYPE_ULELDATE:
361
	case MAGIC_TYPE_MELONG:
362
	case MAGIC_TYPE_MEDATE:
363
	case MAGIC_TYPE_MELDATE:
364
		n += 8 * MAGIC_STRENGTH_MULTIPLIER;
365
		break;
366
	case MAGIC_TYPE_STRING:
367
	case MAGIC_TYPE_PSTRING:
368
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
369
		break;
370
	case MAGIC_TYPE_BESTRING16:
371
	case MAGIC_TYPE_LESTRING16:
372
		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
373
		break;
374
	case MAGIC_TYPE_REGEX:
375
	case MAGIC_TYPE_SEARCH:
376
		size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
377
		if (size < 1)
378
			size = 1;
379
		n += ml->test_string_size * size;
380
		break;
381
	}
382
	switch (ml->test_operator) {
383
	case '=':
384
		n += MAGIC_STRENGTH_MULTIPLIER;
385
		break;
386
	case '<':
387
	case '>':
388
	case '[':
389
	case ']':
390
		n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
391
		break;
392
	case '^':
393
	case '&':
394
		n -= MAGIC_STRENGTH_MULTIPLIER;
395
		break;
396
	}
397
398
skip:
399
	switch (ml->strength_operator) {
400
	case '+':
401
		n += ml->strength_value;
402
		break;
403
	case '-':
404
		n -= ml->strength_value;
405
		break;
406
	case '*':
407
		n *= ml->strength_value;
408
		break;
409
	case '/':
410
		n /= ml->strength_value;
411
		break;
412
	}
413
	return (n <= 0 ? 1 : n);
414
}
415
416
static int
417
magic_get_string(char **line, char *out, size_t *outlen)
418
{
419
	char	*start, *cp, c;
420
	int	 d0, d1, d2;
421
422
	start = out;
423
	for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
424
		if (*cp != '\\') {
425
			*out++ = *cp;
426
			continue;
427
		}
428
429
		switch (c = *++cp) {
430
		case '\0': /* end of line */
431
			return (-1);
432
		case ' ':
433
			*out++ = ' ';
434
			break;
435
		case '0':
436
		case '1':
437
		case '2':
438
		case '3':
439
		case '4':
440
		case '5':
441
		case '6':
442
		case '7':
443
			d0 = magic_odigit(cp[0]);
444
			if (cp[0] != '\0')
445
				d1 = magic_odigit(cp[1]);
446
			else
447
				d1 = -1;
448
			if (cp[0] != '\0' && cp[1] != '\0')
449
				d2 = magic_odigit(cp[2]);
450
			else
451
				d2 = -1;
452
453
			if (d0 != -1 && d1 != -1 && d2 != -1) {
454
				*out = d2 | (d1 << 3) | (d0 << 6);
455
				cp += 2;
456
			} else if (d0 != -1 && d1 != -1) {
457
				*out = d1 | (d0 << 3);
458
				cp++;
459
			} else if (d0 != -1)
460
				*out = d0;
461
			else
462
				return (-1);
463
			out++;
464
			break;
465
		case 'x':
466
			d0 = magic_xdigit(cp[1]);
467
			if (cp[1] != '\0')
468
				d1 = magic_xdigit(cp[2]);
469
			else
470
				d1 = -1;
471
472
			if (d0 != -1 && d1 != -1) {
473
				*out = d1 | (d0 << 4);
474
				cp += 2;
475
			} else if (d0 != -1) {
476
				*out = d0;
477
				cp++;
478
			} else
479
				return (-1);
480
			out++;
481
482
			break;
483
		case 'a':
484
			*out++ = '\a';
485
			break;
486
		case 'b':
487
			*out++ = '\b';
488
			break;
489
		case 't':
490
			*out++ = '\t';
491
			break;
492
		case 'f':
493
			*out++ = '\f';
494
			break;
495
		case 'n':
496
			*out++ = '\n';
497
			break;
498
		case 'r':
499
			*out++ = '\r';
500
			break;
501
		case '\\':
502
			*out++ = '\\';
503
			break;
504
		case '\'':
505
			*out++ = '\'';
506
			break;
507
		case '\"':
508
			*out++ = '\"';
509
			break;
510
		default:
511
			*out++ = c;
512
			break;
513
		}
514
	}
515
	*out = '\0';
516
	*outlen = out - start;
517
518
	*line = cp;
519
	return (0);
520
}
521
522
static int
523
magic_parse_offset(struct magic_line *ml, char **line)
524
{
525
	char	*copy, *s, *cp, *endptr;
526
527
	while (isspace((u_char)**line))
528
		(*line)++;
529
	copy = s = cp = xmalloc(strlen(*line) + 1);
530
	while (**line != '\0' && !isspace((u_char)**line))
531
		*cp++ = *(*line)++;
532
	*cp = '\0';
533
534
	ml->offset = 0;
535
	ml->offset_relative = 0;
536
537
	ml->indirect_type = ' ';
538
	ml->indirect_relative = 0;
539
	ml->indirect_offset = 0;
540
	ml->indirect_operator = ' ';
541
	ml->indirect_operand = 0;
542
543
	if (*s == '&') {
544
		ml->offset_relative = 1;
545
		s++;
546
	}
547
548
	if (*s != '(') {
549
		endptr = magic_strtoll(s, &ml->offset);
550
		if (endptr == NULL || *endptr != '\0') {
551
			magic_warn(ml, "missing closing bracket");
552
			goto fail;
553
		}
554
		if (ml->offset < 0 && !ml->offset_relative) {
555
			magic_warn(ml, "negative absolute offset");
556
			goto fail;
557
		}
558
		goto done;
559
	}
560
	s++;
561
562
	if (*s == '&') {
563
		ml->indirect_relative = 1;
564
		s++;
565
	}
566
567
	endptr = magic_strtoll(s, &ml->indirect_offset);
568
	if (endptr == NULL) {
569
		magic_warn(ml, "can't parse offset: %s", s);
570
		goto fail;
571
	}
572
	s = endptr;
573
	if (*s == ')')
574
		goto done;
575
576
	if (*s == '.') {
577
		s++;
578
		if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
579
			magic_warn(ml, "unknown offset type: %c", *s);
580
			goto fail;
581
		}
582
		ml->indirect_type = *s;
583
		s++;
584
		if (*s == ')')
585
			goto done;
586
	}
587
588
	if (*s == '\0' || strchr("+-*", *s) == NULL) {
589
		magic_warn(ml, "unknown offset operator: %c", *s);
590
		goto fail;
591
	}
592
	ml->indirect_operator = *s;
593
	s++;
594
	if (*s == ')')
595
		goto done;
596
597
	if (*s == '(') {
598
		s++;
599
		endptr = magic_strtoll(s, &ml->indirect_operand);
600
		if (endptr == NULL || *endptr != ')') {
601
			magic_warn(ml, "missing closing bracket");
602
			goto fail;
603
		}
604
		if (*++endptr != ')') {
605
			magic_warn(ml, "missing closing bracket");
606
			goto fail;
607
		}
608
	} else {
609
		endptr = magic_strtoll(s, &ml->indirect_operand);
610
		if (endptr == NULL || *endptr != ')') {
611
			magic_warn(ml, "missing closing bracket");
612
			goto fail;
613
		}
614
	}
615
616
done:
617
	free(copy);
618
	return (0);
619
620
fail:
621
	free(copy);
622
	return (-1);
623
}
624
625
static int
626
magic_parse_type(struct magic_line *ml, char **line)
627
{
628
	char	*copy, *s, *cp, *endptr;
629
630
	while (isspace((u_char)**line))
631
		(*line)++;
632
	copy = s = cp = xmalloc(strlen(*line) + 1);
633
	while (**line != '\0' && !isspace((u_char)**line))
634
		*cp++ = *(*line)++;
635
	*cp = '\0';
636
637
	ml->type = MAGIC_TYPE_NONE;
638
	ml->type_operator = ' ';
639
	ml->type_operand = 0;
640
641
	if (strcmp(s, "name") == 0) {
642
		ml->type = MAGIC_TYPE_NAME;
643
		ml->type_string = xstrdup(s);
644
		goto done;
645
	}
646
	if (strcmp(s, "use") == 0) {
647
		ml->type = MAGIC_TYPE_USE;
648
		ml->type_string = xstrdup(s);
649
		goto done;
650
	}
651
652
	if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
653
	    strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
654
		if (*s == 'u')
655
			ml->type_string = xstrdup(s + 1);
656
		else
657
			ml->type_string = xstrdup(s);
658
		ml->type = MAGIC_TYPE_STRING;
659
		magic_mark_text(ml, 0);
660
		goto done;
661
	}
662
	if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
663
	    strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
664
		if (*s == 'u')
665
			ml->type_string = xstrdup(s + 1);
666
		else
667
			ml->type_string = xstrdup(s);
668
		ml->type = MAGIC_TYPE_PSTRING;
669
		magic_mark_text(ml, 0);
670
		goto done;
671
	}
672
	if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
673
	    strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
674
		if (*s == 'u')
675
			ml->type_string = xstrdup(s + 1);
676
		else
677
			ml->type_string = xstrdup(s);
678
		ml->type = MAGIC_TYPE_SEARCH;
679
		goto done;
680
	}
681
	if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
682
	    strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
683
		if (*s == 'u')
684
			ml->type_string = xstrdup(s + 1);
685
		else
686
			ml->type_string = xstrdup(s);
687
		ml->type = MAGIC_TYPE_REGEX;
688
		goto done;
689
	}
690
	ml->type_string = xstrdup(s);
691
692
	cp = &s[strcspn(s, "+-&/%*")];
693
	if (*cp != '\0') {
694
		ml->type_operator = *cp;
695
		endptr = magic_strtoull(cp + 1, &ml->type_operand);
696
		if (endptr == NULL || *endptr != '\0') {
697
			magic_warn(ml, "can't parse operand: %s", cp + 1);
698
			goto fail;
699
		}
700
		*cp = '\0';
701
	}
702
703
	if (strcmp(s, "byte") == 0)
704
		ml->type = MAGIC_TYPE_BYTE;
705
	else if (strcmp(s, "short") == 0)
706
		ml->type = MAGIC_TYPE_SHORT;
707
	else if (strcmp(s, "long") == 0)
708
		ml->type = MAGIC_TYPE_LONG;
709
	else if (strcmp(s, "quad") == 0)
710
		ml->type = MAGIC_TYPE_QUAD;
711
	else if (strcmp(s, "ubyte") == 0)
712
		ml->type = MAGIC_TYPE_UBYTE;
713
	else if (strcmp(s, "ushort") == 0)
714
		ml->type = MAGIC_TYPE_USHORT;
715
	else if (strcmp(s, "ulong") == 0)
716
		ml->type = MAGIC_TYPE_ULONG;
717
	else if (strcmp(s, "uquad") == 0)
718
		ml->type = MAGIC_TYPE_UQUAD;
719
	else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
720
		ml->type = MAGIC_TYPE_FLOAT;
721
	else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
722
		ml->type = MAGIC_TYPE_DOUBLE;
723
	else if (strcmp(s, "date") == 0)
724
		ml->type = MAGIC_TYPE_DATE;
725
	else if (strcmp(s, "qdate") == 0)
726
		ml->type = MAGIC_TYPE_QDATE;
727
	else if (strcmp(s, "ldate") == 0)
728
		ml->type = MAGIC_TYPE_LDATE;
729
	else if (strcmp(s, "qldate") == 0)
730
		ml->type = MAGIC_TYPE_QLDATE;
731
	else if (strcmp(s, "udate") == 0)
732
		ml->type = MAGIC_TYPE_UDATE;
733
	else if (strcmp(s, "uqdate") == 0)
734
		ml->type = MAGIC_TYPE_UQDATE;
735
	else if (strcmp(s, "uldate") == 0)
736
		ml->type = MAGIC_TYPE_ULDATE;
737
	else if (strcmp(s, "uqldate") == 0)
738
		ml->type = MAGIC_TYPE_UQLDATE;
739
	else if (strcmp(s, "beshort") == 0)
740
		ml->type = MAGIC_TYPE_BESHORT;
741
	else if (strcmp(s, "belong") == 0)
742
		ml->type = MAGIC_TYPE_BELONG;
743
	else if (strcmp(s, "bequad") == 0)
744
		ml->type = MAGIC_TYPE_BEQUAD;
745
	else if (strcmp(s, "ubeshort") == 0)
746
		ml->type = MAGIC_TYPE_UBESHORT;
747
	else if (strcmp(s, "ubelong") == 0)
748
		ml->type = MAGIC_TYPE_UBELONG;
749
	else if (strcmp(s, "ubequad") == 0)
750
		ml->type = MAGIC_TYPE_UBEQUAD;
751
	else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
752
		ml->type = MAGIC_TYPE_BEFLOAT;
753
	else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
754
		ml->type = MAGIC_TYPE_BEDOUBLE;
755
	else if (strcmp(s, "bedate") == 0)
756
		ml->type = MAGIC_TYPE_BEDATE;
757
	else if (strcmp(s, "beqdate") == 0)
758
		ml->type = MAGIC_TYPE_BEQDATE;
759
	else if (strcmp(s, "beldate") == 0)
760
		ml->type = MAGIC_TYPE_BELDATE;
761
	else if (strcmp(s, "beqldate") == 0)
762
		ml->type = MAGIC_TYPE_BEQLDATE;
763
	else if (strcmp(s, "ubedate") == 0)
764
		ml->type = MAGIC_TYPE_UBEDATE;
765
	else if (strcmp(s, "ubeqdate") == 0)
766
		ml->type = MAGIC_TYPE_UBEQDATE;
767
	else if (strcmp(s, "ubeldate") == 0)
768
		ml->type = MAGIC_TYPE_UBELDATE;
769
	else if (strcmp(s, "ubeqldate") == 0)
770
		ml->type = MAGIC_TYPE_UBEQLDATE;
771
	else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
772
		ml->type = MAGIC_TYPE_BESTRING16;
773
	else if (strcmp(s, "leshort") == 0)
774
		ml->type = MAGIC_TYPE_LESHORT;
775
	else if (strcmp(s, "lelong") == 0)
776
		ml->type = MAGIC_TYPE_LELONG;
777
	else if (strcmp(s, "lequad") == 0)
778
		ml->type = MAGIC_TYPE_LEQUAD;
779
	else if (strcmp(s, "uleshort") == 0)
780
		ml->type = MAGIC_TYPE_ULESHORT;
781
	else if (strcmp(s, "ulelong") == 0)
782
		ml->type = MAGIC_TYPE_ULELONG;
783
	else if (strcmp(s, "ulequad") == 0)
784
		ml->type = MAGIC_TYPE_ULEQUAD;
785
	else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
786
		ml->type = MAGIC_TYPE_LEFLOAT;
787
	else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
788
		ml->type = MAGIC_TYPE_LEDOUBLE;
789
	else if (strcmp(s, "ledate") == 0)
790
		ml->type = MAGIC_TYPE_LEDATE;
791
	else if (strcmp(s, "leqdate") == 0)
792
		ml->type = MAGIC_TYPE_LEQDATE;
793
	else if (strcmp(s, "leldate") == 0)
794
		ml->type = MAGIC_TYPE_LELDATE;
795
	else if (strcmp(s, "leqldate") == 0)
796
		ml->type = MAGIC_TYPE_LEQLDATE;
797
	else if (strcmp(s, "uledate") == 0)
798
		ml->type = MAGIC_TYPE_ULEDATE;
799
	else if (strcmp(s, "uleqdate") == 0)
800
		ml->type = MAGIC_TYPE_ULEQDATE;
801
	else if (strcmp(s, "uleldate") == 0)
802
		ml->type = MAGIC_TYPE_ULELDATE;
803
	else if (strcmp(s, "uleqldate") == 0)
804
		ml->type = MAGIC_TYPE_ULEQLDATE;
805
	else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
806
		ml->type = MAGIC_TYPE_LESTRING16;
807
	else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
808
		ml->type = MAGIC_TYPE_MELONG;
809
	else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
810
		ml->type = MAGIC_TYPE_MEDATE;
811
	else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
812
		ml->type = MAGIC_TYPE_MELDATE;
813
	else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
814
		ml->type = MAGIC_TYPE_DEFAULT;
815
	else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
816
		ml->type = MAGIC_TYPE_CLEAR;
817
	else {
818
		magic_warn(ml, "unknown type: %s", s);
819
		goto fail;
820
	}
821
	magic_mark_text(ml, 0);
822
823
done:
824
	free(copy);
825
	return (0);
826
827
fail:
828
	free(copy);
829
	return (-1);
830
}
831
832
static int
833
magic_parse_value(struct magic_line *ml, char **line)
834
{
835
	char	*copy, *s, *cp, *endptr;
836
	size_t	 slen;
837
	uint64_t u;
838
839
	while (isspace((u_char)**line))
840
		(*line)++;
841
842
	ml->test_operator = '=';
843
	ml->test_not = 0;
844
	ml->test_string = NULL;
845
	ml->test_string_size = 0;
846
	ml->test_unsigned = 0;
847
	ml->test_signed = 0;
848
849
	if (**line == '\0')
850
		return (0);
851
852
	s = *line;
853
	if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
854
		(*line)++;
855
		ml->test_operator = 'x';
856
		return (0);
857
	}
858
859
	if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
860
		magic_warn(ml, "test specified for default or clear");
861
		ml->test_operator = 'x';
862
		return (0);
863
	}
864
865
	if (**line == '!') {
866
		ml->test_not = 1;
867
		(*line)++;
868
	}
869
870
	switch (ml->type) {
871
	case MAGIC_TYPE_NAME:
872
	case MAGIC_TYPE_USE:
873
		copy = s = xmalloc(strlen(*line) + 1);
874
		if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
875
			magic_warn(ml, "can't parse string");
876
			goto fail;
877
		}
878
		if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
879
			magic_warn(ml, "invalid name");
880
			goto fail;
881
		}
882
		ml->name = s;
883
		return (0); /* do not free */
884
	case MAGIC_TYPE_STRING:
885
	case MAGIC_TYPE_PSTRING:
886
	case MAGIC_TYPE_SEARCH:
887
		if (**line == '>' || **line == '<' || **line == '=') {
888
			ml->test_operator = **line;
889
			(*line)++;
890
		}
891
		/* FALLTHROUGH */
892
	case MAGIC_TYPE_REGEX:
893
		if (**line == '=')
894
			(*line)++;
895
		copy = s = xmalloc(strlen(*line) + 1);
896
		if (magic_get_string(line, s, &slen) != 0) {
897
			magic_warn(ml, "can't parse string");
898
			goto fail;
899
		}
900
		ml->test_string_size = slen;
901
		ml->test_string = s;
902
		return (0); /* do not free */
903
	default:
904
		break;
905
	}
906
907
	while (isspace((u_char)**line))
908
		(*line)++;
909
	if ((*line)[0] == '<' && (*line)[1] == '=') {
910
		ml->test_operator = '[';
911
		(*line) += 2;
912
	} else if ((*line)[0] == '>' && (*line)[1] == '=') {
913
		ml->test_operator = ']';
914
		(*line) += 2;
915
	} else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
916
		ml->test_operator = **line;
917
		(*line)++;
918
	}
919
920
	while (isspace((u_char)**line))
921
		(*line)++;
922
	copy = cp = xmalloc(strlen(*line) + 1);
923
	while (**line != '\0' && !isspace((u_char)**line))
924
		*cp++ = *(*line)++;
925
	*cp = '\0';
926
927
	switch (ml->type) {
928
	case MAGIC_TYPE_FLOAT:
929
	case MAGIC_TYPE_DOUBLE:
930
	case MAGIC_TYPE_BEFLOAT:
931
	case MAGIC_TYPE_BEDOUBLE:
932
	case MAGIC_TYPE_LEFLOAT:
933
	case MAGIC_TYPE_LEDOUBLE:
934
		errno = 0;
935
		ml->test_double = strtod(copy, &endptr);
936
		if (errno == ERANGE)
937
			endptr = NULL;
938
		break;
939
	default:
940
		if (*ml->type_string == 'u')
941
			endptr = magic_strtoull(copy, &ml->test_unsigned);
942
		else {
943
			endptr = magic_strtoll(copy, &ml->test_signed);
944
			if (endptr == NULL || *endptr != '\0') {
945
				/*
946
				 * If we can't parse this as a signed number,
947
				 * try as unsigned instead.
948
				 */
949
				endptr = magic_strtoull(copy, &u);
950
				if (endptr != NULL && *endptr == '\0')
951
					ml->test_signed = (int64_t)u;
952
			}
953
		}
954
		break;
955
	}
956
	if (endptr == NULL || *endptr != '\0') {
957
		magic_warn(ml, "can't parse number: %s", copy);
958
		goto fail;
959
	}
960
961
	free(copy);
962
	return (0);
963
964
fail:
965
	free(copy);
966
	return (-1);
967
}
968
969
int
970
magic_compare(struct magic_line *ml1, struct magic_line *ml2)
971
{
972
	if (ml1->strength < ml2->strength)
973
		return (1);
974
	if (ml1->strength > ml2->strength)
975
		return (-1);
976
977
	/*
978
	 * The original file depends on the (undefined!) qsort(3) behaviour
979
	 * when the strength is equal. This is impossible to reproduce with an
980
	 * RB tree so just use the line number and hope for the best.
981
	 */
982
	if (ml1->line < ml2->line)
983
		return (-1);
984
	if (ml1->line > ml2->line)
985
		return (1);
986
987
	return (0);
988
}
989
RB_GENERATE(magic_tree, magic_line, node, magic_compare);
990
991
int
992
magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
993
{
994
	return (strcmp(ml1->name, ml2->name));
995
}
996
RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
997
998
static void
999
magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
1000
    char *line)
1001
{
1002
	char	*cp, *s;
1003
	int64_t	 value;
1004
1005
	cp = line + (sizeof "!:strength") - 1;
1006
	while (isspace((u_char)*cp))
1007
		cp++;
1008
	s = cp;
1009
1010
	cp = strchr(s, '#');
1011
	if (cp != NULL)
1012
		*cp = '\0';
1013
	cp = s;
1014
1015
	if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1016
		magic_warnm(m, at, "invalid strength operator: %s", s);
1017
		return;
1018
	}
1019
	ml->strength_operator = *cp++;
1020
1021
	while (isspace((u_char)*cp))
1022
		cp++;
1023
	cp = magic_strtoll(cp, &value);
1024
	while (cp != NULL && isspace((u_char)*cp))
1025
		cp++;
1026
	if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1027
		magic_warnm(m, at, "invalid strength value: %s", s);
1028
		return;
1029
	}
1030
	ml->strength_value = value;
1031
}
1032
1033
static void
1034
magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1035
{
1036
	char	*mimetype, *cp;
1037
1038
	mimetype = line + (sizeof "!:mime") - 1;
1039
	while (isspace((u_char)*mimetype))
1040
		mimetype++;
1041
1042
	cp = strchr(mimetype, '#');
1043
	if (cp != NULL)
1044
		*cp = '\0';
1045
1046
	if (*mimetype != '\0') {
1047
		cp = mimetype + strlen(mimetype) - 1;
1048
		while (cp != mimetype && isspace((u_char)*cp))
1049
			*cp-- = '\0';
1050
	}
1051
1052
	cp = mimetype;
1053
	while (*cp != '\0') {
1054
		if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1055
			break;
1056
		cp++;
1057
	}
1058
	if (*mimetype == '\0' || *cp != '\0') {
1059
		magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1060
		return;
1061
	}
1062
	if (ml == NULL) {
1063
		magic_warnm(m, at, "stray MIME type: %s", mimetype);
1064
		return;
1065
	}
1066
	ml->mimetype = xstrdup(mimetype);
1067
}
1068
1069
struct magic *
1070
magic_load(FILE *f, const char *path, int warnings)
1071
{
1072
	struct magic		*m;
1073
	struct magic_line	*ml = NULL, *parent, *parent0;
1074
	char			*line, *tmp;
1075
	size_t			 size;
1076
	u_int			 at, level, n, i;
1077
1078
	m = xcalloc(1, sizeof *m);
1079
	m->path = xstrdup(path);
1080
	m->warnings = warnings;
1081
	RB_INIT(&m->tree);
1082
1083
	parent = NULL;
1084
	parent0 = NULL;
1085
	level = 0;
1086
1087
	at = 0;
1088
	tmp = NULL;
1089
	while ((line = fgetln(f, &size))) {
1090
		if (line[size - 1] == '\n')
1091
			line[size - 1] = '\0';
1092
		else {
1093
			tmp = xmalloc(size + 1);
1094
			memcpy(tmp, line, size);
1095
			tmp[size] = '\0';
1096
			line = tmp;
1097
		}
1098
		at++;
1099
1100
		while (isspace((u_char)*line))
1101
		    line++;
1102
		if (*line == '\0' || *line == '#')
1103
			continue;
1104
1105
		if (strncmp (line, "!:mime", 6) == 0) {
1106
			magic_set_mimetype(m, at, ml, line);
1107
			continue;
1108
		}
1109
		if (strncmp (line, "!:strength", 10) == 0) {
1110
			magic_adjust_strength(m, at, ml, line);
1111
			continue;
1112
		}
1113
		if (strncmp (line, "!:", 2) == 0) {
1114
			for (i = 0; i < 64 && line[i] != '\0'; i++) {
1115
				if (isspace((u_char)line[i]))
1116
					break;
1117
			}
1118
			magic_warnm(m, at, "%.*s not supported", i, line);
1119
			continue;
1120
		}
1121
1122
		n = 0;
1123
		for (; *line == '>'; line++)
1124
			n++;
1125
1126
		ml = xcalloc(1, sizeof *ml);
1127
		ml->root = m;
1128
		ml->line = at;
1129
		ml->type = MAGIC_TYPE_NONE;
1130
		TAILQ_INIT(&ml->children);
1131
		ml->text = 1;
1132
1133
		/*
1134
		 * At this point n is the level we want, level is the current
1135
		 * level. parent0 is the last line at the same level and parent
1136
		 * is the last line at the previous level.
1137
		 */
1138
		if (n == level + 1) {
1139
			parent = parent0;
1140
		} else if (n < level) {
1141
			for (i = n; i < level && parent != NULL; i++)
1142
				parent = parent->parent;
1143
		} else if (n != level) {
1144
			magic_warn(ml, "level skipped (%u->%u)", level, n);
1145
			free(ml);
1146
			continue;
1147
		}
1148
		ml->parent = parent;
1149
		level = n;
1150
1151
		if (magic_parse_offset(ml, &line) != 0 ||
1152
		    magic_parse_type(ml, &line) != 0 ||
1153
		    magic_parse_value(ml, &line) != 0 ||
1154
		    magic_set_result(ml, line) != 0) {
1155
			/*
1156
			 * An invalid line still needs to appear in the tree in
1157
			 * case it has any children.
1158
			 */
1159
			ml->type = MAGIC_TYPE_NONE;
1160
		}
1161
1162
		ml->strength = magic_get_strength(ml);
1163
		if (ml->parent == NULL) {
1164
			if (ml->name != NULL)
1165
				RB_INSERT(magic_named_tree, &m->named, ml);
1166
			else
1167
				RB_INSERT(magic_tree, &m->tree, ml);
1168
		} else
1169
			TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1170
		parent0 = ml;
1171
	}
1172
	free(tmp);
1173
1174
	fclose(f);
1175
	return (m);
1176
}