GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.bin/file/text.c Lines: 0 38 0.0 %
Date: 2017-11-13 Branches: 0 36 0.0 %

Line Branch Exec Source
1
/* $OpenBSD: text.c,v 1.3 2017/04/18 14:16:48 nicm Exp $ */
2
3
/*
4
 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
21
#include <ctype.h>
22
#include <string.h>
23
24
#include "file.h"
25
#include "magic.h"
26
#include "xmalloc.h"
27
28
static const char *text_words[][3] = {
29
	{ "msgid", "PO (gettext message catalogue)", "text/x-po" },
30
	{ "dnl", "M4 macro language pre-processor", "text/x-m4" },
31
	{ "import", "Java program", "text/x-java" },
32
	{ "\"libhdr\"", "BCPL program", "text/x-bcpl" },
33
	{ "\"LIBHDR\"", "BCPL program", "text/x-bcpl" },
34
	{ "//", "C++ program", "text/x-c++" },
35
	{ "virtual", "C++ program", "text/x-c++" },
36
	{ "class", "C++ program", "text/x-c++" },
37
	{ "public:", "C++ program", "text/x-c++" },
38
	{ "private:", "C++ program", "text/x-c++" },
39
	{ "/*", "C program", "text/x-c" },
40
	{ "#include", "C program", "text/x-c" },
41
	{ "char", "C program", "text/x-c" },
42
	{ "The", "English", "text/plain" },
43
	{ "the", "English", "text/plain" },
44
	{ "double", "C program", "text/x-c" },
45
	{ "extern", "C program", "text/x-c" },
46
	{ "float", "C program", "text/x-c" },
47
	{ "struct", "C program", "text/x-c" },
48
	{ "union", "C program", "text/x-c" },
49
	{ "CFLAGS", "make commands", "text/x-makefile" },
50
	{ "LDFLAGS", "make commands", "text/x-makefile" },
51
	{ "all:", "make commands", "text/x-makefile" },
52
	{ ".PRECIOUS", "make commands", "text/x-makefile" },
53
	{ ".ascii", "assembler program", "text/x-asm" },
54
	{ ".asciiz", "assembler program", "text/x-asm" },
55
	{ ".byte", "assembler program", "text/x-asm" },
56
	{ ".even", "assembler program", "text/x-asm" },
57
	{ ".globl", "assembler program", "text/x-asm" },
58
	{ ".text", "assembler program", "text/x-asm" },
59
	{ "clr", "assembler program", "text/x-asm" },
60
	{ "(input", "Pascal program", "text/x-pascal" },
61
	{ "program", "Pascal program", "text/x-pascal" },
62
	{ "record", "Pascal program", "text/x-pascal" },
63
	{ "dcl", "PL/1 program", "text/x-pl1" },
64
	{ "Received:", "mail", "text/x-mail" },
65
	{ ">From", "mail", "text/x-mail" },
66
	{ "Return-Path:", "mail", "text/x-mail" },
67
	{ "Cc:", "mail", "text/x-mail" },
68
	{ "Newsgroups:", "news", "text/x-news" },
69
	{ "Path:", "news", "text/x-news" },
70
	{ "Organization:", "news", "text/x-news" },
71
	{ "href=", "HTML document", "text/html" },
72
	{ "HREF=", "HTML document", "text/html" },
73
	{ "<body", "HTML document", "text/html" },
74
	{ "<BODY", "HTML document", "text/html" },
75
	{ "<html", "HTML document", "text/html" },
76
	{ "<HTML", "HTML document", "text/html" },
77
	{ "<!--", "HTML document", "text/html" },
78
	{ NULL, NULL, NULL }
79
};
80
81
static int
82
text_is_ascii(u_char c)
83
{
84
	const char	cc[] = "\007\010\011\012\014\015\033";
85
86
	if (c == '\0')
87
		return (0);
88
	if (strchr(cc, c) != NULL)
89
		return (1);
90
	return (c > 31 && c < 127);
91
}
92
93
static int
94
text_is_latin1(u_char c)
95
{
96
	if (c >= 160)
97
		return (1);
98
	return (text_is_ascii(c));
99
}
100
101
static int
102
text_is_extended(u_char c)
103
{
104
	if (c >= 128)
105
		return (1);
106
	return (text_is_ascii(c));
107
}
108
109
static int
110
text_try_test(const void *base, size_t size, int (*f)(u_char))
111
{
112
	const u_char	*data = base;
113
	size_t		 offset;
114
115
	for (offset = 0; offset < size; offset++) {
116
		if (!f(data[offset]))
117
			return (0);
118
	}
119
	return (1);
120
}
121
122
const char *
123
text_get_type(const void *base, size_t size)
124
{
125
	if (text_try_test(base, size, text_is_ascii))
126
		return ("ASCII");
127
	if (text_try_test(base, size, text_is_latin1))
128
		return ("ISO-8859");
129
	if (text_try_test(base, size, text_is_extended))
130
		return ("Non-ISO extended-ASCII");
131
	return (NULL);
132
}
133
134
const char *
135
text_try_words(const void *base, size_t size, int flags)
136
{
137
	const char	*cp, *end, *next, *word;
138
	size_t		 wordlen;
139
	u_int		 i;
140
141
	end = (const char *)base + size;
142
	for (cp = base; cp != end; /* nothing */) {
143
		while (cp != end && isspace((u_char)*cp))
144
			cp++;
145
146
		next = cp;
147
		while (next != end && !isspace((u_char)*next))
148
			next++;
149
150
		for (i = 0; /* nothing */; i++) {
151
			word = text_words[i][0];
152
			if (word == NULL)
153
				break;
154
			wordlen = strlen(word);
155
156
			if ((size_t)(next - cp) != wordlen)
157
				continue;
158
			if (memcmp(cp, word, wordlen) != 0)
159
				continue;
160
			if (flags & MAGIC_TEST_MIME)
161
				return (text_words[i][2]);
162
			return (text_words[i][1]);
163
		}
164
165
		cp = next;
166
	}
167
	return (NULL);
168
}