1 |
|
|
/* $OpenBSD: text.c,v 1.3 2017/04/18 14:16:48 nicm Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org> |
5 |
|
|
* |
6 |
|
|
* Permission to use, copy, modify, and distribute this software for any |
7 |
|
|
* purpose with or without fee is hereby granted, provided that the above |
8 |
|
|
* copyright notice and this permission notice appear in all copies. |
9 |
|
|
* |
10 |
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
11 |
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
12 |
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
13 |
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
14 |
|
|
* WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER |
15 |
|
|
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING |
16 |
|
|
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include <sys/types.h> |
20 |
|
|
|
21 |
|
|
#include <ctype.h> |
22 |
|
|
#include <string.h> |
23 |
|
|
|
24 |
|
|
#include "file.h" |
25 |
|
|
#include "magic.h" |
26 |
|
|
#include "xmalloc.h" |
27 |
|
|
|
28 |
|
|
static const char *text_words[][3] = { |
29 |
|
|
{ "msgid", "PO (gettext message catalogue)", "text/x-po" }, |
30 |
|
|
{ "dnl", "M4 macro language pre-processor", "text/x-m4" }, |
31 |
|
|
{ "import", "Java program", "text/x-java" }, |
32 |
|
|
{ "\"libhdr\"", "BCPL program", "text/x-bcpl" }, |
33 |
|
|
{ "\"LIBHDR\"", "BCPL program", "text/x-bcpl" }, |
34 |
|
|
{ "//", "C++ program", "text/x-c++" }, |
35 |
|
|
{ "virtual", "C++ program", "text/x-c++" }, |
36 |
|
|
{ "class", "C++ program", "text/x-c++" }, |
37 |
|
|
{ "public:", "C++ program", "text/x-c++" }, |
38 |
|
|
{ "private:", "C++ program", "text/x-c++" }, |
39 |
|
|
{ "/*", "C program", "text/x-c" }, |
40 |
|
|
{ "#include", "C program", "text/x-c" }, |
41 |
|
|
{ "char", "C program", "text/x-c" }, |
42 |
|
|
{ "The", "English", "text/plain" }, |
43 |
|
|
{ "the", "English", "text/plain" }, |
44 |
|
|
{ "double", "C program", "text/x-c" }, |
45 |
|
|
{ "extern", "C program", "text/x-c" }, |
46 |
|
|
{ "float", "C program", "text/x-c" }, |
47 |
|
|
{ "struct", "C program", "text/x-c" }, |
48 |
|
|
{ "union", "C program", "text/x-c" }, |
49 |
|
|
{ "CFLAGS", "make commands", "text/x-makefile" }, |
50 |
|
|
{ "LDFLAGS", "make commands", "text/x-makefile" }, |
51 |
|
|
{ "all:", "make commands", "text/x-makefile" }, |
52 |
|
|
{ ".PRECIOUS", "make commands", "text/x-makefile" }, |
53 |
|
|
{ ".ascii", "assembler program", "text/x-asm" }, |
54 |
|
|
{ ".asciiz", "assembler program", "text/x-asm" }, |
55 |
|
|
{ ".byte", "assembler program", "text/x-asm" }, |
56 |
|
|
{ ".even", "assembler program", "text/x-asm" }, |
57 |
|
|
{ ".globl", "assembler program", "text/x-asm" }, |
58 |
|
|
{ ".text", "assembler program", "text/x-asm" }, |
59 |
|
|
{ "clr", "assembler program", "text/x-asm" }, |
60 |
|
|
{ "(input", "Pascal program", "text/x-pascal" }, |
61 |
|
|
{ "program", "Pascal program", "text/x-pascal" }, |
62 |
|
|
{ "record", "Pascal program", "text/x-pascal" }, |
63 |
|
|
{ "dcl", "PL/1 program", "text/x-pl1" }, |
64 |
|
|
{ "Received:", "mail", "text/x-mail" }, |
65 |
|
|
{ ">From", "mail", "text/x-mail" }, |
66 |
|
|
{ "Return-Path:", "mail", "text/x-mail" }, |
67 |
|
|
{ "Cc:", "mail", "text/x-mail" }, |
68 |
|
|
{ "Newsgroups:", "news", "text/x-news" }, |
69 |
|
|
{ "Path:", "news", "text/x-news" }, |
70 |
|
|
{ "Organization:", "news", "text/x-news" }, |
71 |
|
|
{ "href=", "HTML document", "text/html" }, |
72 |
|
|
{ "HREF=", "HTML document", "text/html" }, |
73 |
|
|
{ "<body", "HTML document", "text/html" }, |
74 |
|
|
{ "<BODY", "HTML document", "text/html" }, |
75 |
|
|
{ "<html", "HTML document", "text/html" }, |
76 |
|
|
{ "<HTML", "HTML document", "text/html" }, |
77 |
|
|
{ "<!--", "HTML document", "text/html" }, |
78 |
|
|
{ NULL, NULL, NULL } |
79 |
|
|
}; |
80 |
|
|
|
81 |
|
|
static int |
82 |
|
|
text_is_ascii(u_char c) |
83 |
|
|
{ |
84 |
|
|
const char cc[] = "\007\010\011\012\014\015\033"; |
85 |
|
|
|
86 |
|
|
if (c == '\0') |
87 |
|
|
return (0); |
88 |
|
|
if (strchr(cc, c) != NULL) |
89 |
|
|
return (1); |
90 |
|
|
return (c > 31 && c < 127); |
91 |
|
|
} |
92 |
|
|
|
93 |
|
|
static int |
94 |
|
|
text_is_latin1(u_char c) |
95 |
|
|
{ |
96 |
|
|
if (c >= 160) |
97 |
|
|
return (1); |
98 |
|
|
return (text_is_ascii(c)); |
99 |
|
|
} |
100 |
|
|
|
101 |
|
|
static int |
102 |
|
|
text_is_extended(u_char c) |
103 |
|
|
{ |
104 |
|
|
if (c >= 128) |
105 |
|
|
return (1); |
106 |
|
|
return (text_is_ascii(c)); |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
static int |
110 |
|
|
text_try_test(const void *base, size_t size, int (*f)(u_char)) |
111 |
|
|
{ |
112 |
|
|
const u_char *data = base; |
113 |
|
|
size_t offset; |
114 |
|
|
|
115 |
|
|
for (offset = 0; offset < size; offset++) { |
116 |
|
|
if (!f(data[offset])) |
117 |
|
|
return (0); |
118 |
|
|
} |
119 |
|
|
return (1); |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
const char * |
123 |
|
|
text_get_type(const void *base, size_t size) |
124 |
|
|
{ |
125 |
|
|
if (text_try_test(base, size, text_is_ascii)) |
126 |
|
|
return ("ASCII"); |
127 |
|
|
if (text_try_test(base, size, text_is_latin1)) |
128 |
|
|
return ("ISO-8859"); |
129 |
|
|
if (text_try_test(base, size, text_is_extended)) |
130 |
|
|
return ("Non-ISO extended-ASCII"); |
131 |
|
|
return (NULL); |
132 |
|
|
} |
133 |
|
|
|
134 |
|
|
const char * |
135 |
|
|
text_try_words(const void *base, size_t size, int flags) |
136 |
|
|
{ |
137 |
|
|
const char *cp, *end, *next, *word; |
138 |
|
|
size_t wordlen; |
139 |
|
|
u_int i; |
140 |
|
|
|
141 |
|
|
end = (const char *)base + size; |
142 |
|
|
for (cp = base; cp != end; /* nothing */) { |
143 |
|
|
while (cp != end && isspace((u_char)*cp)) |
144 |
|
|
cp++; |
145 |
|
|
|
146 |
|
|
next = cp; |
147 |
|
|
while (next != end && !isspace((u_char)*next)) |
148 |
|
|
next++; |
149 |
|
|
|
150 |
|
|
for (i = 0; /* nothing */; i++) { |
151 |
|
|
word = text_words[i][0]; |
152 |
|
|
if (word == NULL) |
153 |
|
|
break; |
154 |
|
|
wordlen = strlen(word); |
155 |
|
|
|
156 |
|
|
if ((size_t)(next - cp) != wordlen) |
157 |
|
|
continue; |
158 |
|
|
if (memcmp(cp, word, wordlen) != 0) |
159 |
|
|
continue; |
160 |
|
|
if (flags & MAGIC_TEST_MIME) |
161 |
|
|
return (text_words[i][2]); |
162 |
|
|
return (text_words[i][1]); |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
cp = next; |
166 |
|
|
} |
167 |
|
|
return (NULL); |
168 |
|
|
} |