| 1 |  |  | /* $OpenBSD: text.c,v 1.2 2015/04/24 16:45:32 nicm Exp $ */ | 
    
    | 2 |  |  |  | 
    
    | 3 |  |  | /* | 
    
    | 4 |  |  |  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org> | 
    
    | 5 |  |  |  * | 
    
    | 6 |  |  |  * Permission to use, copy, modify, and distribute this software for any | 
    
    | 7 |  |  |  * purpose with or without fee is hereby granted, provided that the above | 
    
    | 8 |  |  |  * copyright notice and this permission notice appear in all copies. | 
    
    | 9 |  |  |  * | 
    
    | 10 |  |  |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
    
    | 11 |  |  |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
    
    | 12 |  |  |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 
    
    | 13 |  |  |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
    
    | 14 |  |  |  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER | 
    
    | 15 |  |  |  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING | 
    
    | 16 |  |  |  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
    
    | 17 |  |  |  */ | 
    
    | 18 |  |  |  | 
    
    | 19 |  |  | #include <sys/types.h> | 
    
    | 20 |  |  |  | 
    
    | 21 |  |  | #include <ctype.h> | 
    
    | 22 |  |  | #include <string.h> | 
    
    | 23 |  |  |  | 
    
    | 24 |  |  | #include "file.h" | 
    
    | 25 |  |  | #include "magic.h" | 
    
    | 26 |  |  | #include "xmalloc.h" | 
    
    | 27 |  |  |  | 
    
    | 28 |  |  | static const char *text_words[][3] = { | 
    
    | 29 |  |  | 	{ "msgid", "PO (gettext message catalogue)", "text/x-po" }, | 
    
    | 30 |  |  | 	{ "dnl", "M4 macro language pre-processor", "text/x-m4" }, | 
    
    | 31 |  |  | 	{ "import", "Java program", "text/x-java" }, | 
    
    | 32 |  |  | 	{ "\"libhdr\"", "BCPL program", "text/x-bcpl" }, | 
    
    | 33 |  |  | 	{ "\"LIBHDR\"", "BCPL program", "text/x-bcpl" }, | 
    
    | 34 |  |  | 	{ "//", "C++ program", "text/x-c++" }, | 
    
    | 35 |  |  | 	{ "virtual", "C++ program", "text/x-c++" }, | 
    
    | 36 |  |  | 	{ "class", "C++ program", "text/x-c++" }, | 
    
    | 37 |  |  | 	{ "public:", "C++ program", "text/x-c++" }, | 
    
    | 38 |  |  | 	{ "private:", "C++ program", "text/x-c++" }, | 
    
    | 39 |  |  | 	{ "/*", "C program", "text/x-c" }, | 
    
    | 40 |  |  | 	{ "#include", "C program", "text/x-c" }, | 
    
    | 41 |  |  | 	{ "char", "C program", "text/x-c" }, | 
    
    | 42 |  |  | 	{ "The", "English", "text/plain" }, | 
    
    | 43 |  |  | 	{ "the", "English", "text/plain" }, | 
    
    | 44 |  |  | 	{ "double", "C program", "text/x-c" }, | 
    
    | 45 |  |  | 	{ "extern", "C program", "text/x-c" }, | 
    
    | 46 |  |  | 	{ "float", "C program", "text/x-c" }, | 
    
    | 47 |  |  | 	{ "struct", "C program", "text/x-c" }, | 
    
    | 48 |  |  | 	{ "union", "C program", "text/x-c" }, | 
    
    | 49 |  |  | 	{ "CFLAGS", "make commands", "text/x-makefile" }, | 
    
    | 50 |  |  | 	{ "LDFLAGS", "make commands", "text/x-makefile" }, | 
    
    | 51 |  |  | 	{ "all:", "make commands", "text/x-makefile" }, | 
    
    | 52 |  |  | 	{ ".PRECIOUS", "make commands", "text/x-makefile" }, | 
    
    | 53 |  |  | 	{ ".ascii", "assembler program", "text/x-asm" }, | 
    
    | 54 |  |  | 	{ ".asciiz", "assembler program", "text/x-asm" }, | 
    
    | 55 |  |  | 	{ ".byte", "assembler program", "text/x-asm" }, | 
    
    | 56 |  |  | 	{ ".even", "assembler program", "text/x-asm" }, | 
    
    | 57 |  |  | 	{ ".globl", "assembler program", "text/x-asm" }, | 
    
    | 58 |  |  | 	{ ".text", "assembler program", "text/x-asm" }, | 
    
    | 59 |  |  | 	{ "clr", "assembler program", "text/x-asm" }, | 
    
    | 60 |  |  | 	{ "(input", "Pascal program", "text/x-pascal" }, | 
    
    | 61 |  |  | 	{ "program", "Pascal program", "text/x-pascal" }, | 
    
    | 62 |  |  | 	{ "record", "Pascal program", "text/x-pascal" }, | 
    
    | 63 |  |  | 	{ "dcl", "PL/1 program", "text/x-pl1" }, | 
    
    | 64 |  |  | 	{ "Received:", "mail", "text/x-mail" }, | 
    
    | 65 |  |  | 	{ ">From", "mail", "text/x-mail" }, | 
    
    | 66 |  |  | 	{ "Return-Path:", "mail", "text/x-mail" }, | 
    
    | 67 |  |  | 	{ "Cc:", "mail", "text/x-mail" }, | 
    
    | 68 |  |  | 	{ "Newsgroups:", "news", "text/x-news" }, | 
    
    | 69 |  |  | 	{ "Path:", "news", "text/x-news" }, | 
    
    | 70 |  |  | 	{ "Organization:", "news", "text/x-news" }, | 
    
    | 71 |  |  | 	{ "href=", "HTML document", "text/html" }, | 
    
    | 72 |  |  | 	{ "HREF=", "HTML document", "text/html" }, | 
    
    | 73 |  |  | 	{ "<body", "HTML document", "text/html" }, | 
    
    | 74 |  |  | 	{ "<BODY", "HTML document", "text/html" }, | 
    
    | 75 |  |  | 	{ "<html", "HTML document", "text/html" }, | 
    
    | 76 |  |  | 	{ "<HTML", "HTML document", "text/html" }, | 
    
    | 77 |  |  | 	{ "<!--", "HTML document", "text/html" }, | 
    
    | 78 |  |  | 	{ NULL, NULL, NULL } | 
    
    | 79 |  |  | }; | 
    
    | 80 |  |  |  | 
    
    | 81 |  |  | static int | 
    
    | 82 |  |  | text_is_ascii(u_char c) | 
    
    | 83 |  |  | { | 
    
    | 84 |  |  | 	const char	cc[] = "\007\010\011\012\014\015\033"; | 
    
    | 85 |  |  |  | 
    
    | 86 |  |  | 	if (c == '\0') | 
    
    | 87 |  |  | 		return (0); | 
    
    | 88 |  |  | 	if (strchr(cc, c) != NULL) | 
    
    | 89 |  |  | 		return (1); | 
    
    | 90 |  |  | 	return (c > 31 && c < 127); | 
    
    | 91 |  |  | } | 
    
    | 92 |  |  |  | 
    
    | 93 |  |  | static int | 
    
    | 94 |  |  | text_is_latin1(u_char c) | 
    
    | 95 |  |  | { | 
    
    | 96 |  |  | 	if (c >= 160) | 
    
    | 97 |  |  | 		return (1); | 
    
    | 98 |  |  | 	return (text_is_ascii(c)); | 
    
    | 99 |  |  | } | 
    
    | 100 |  |  |  | 
    
    | 101 |  |  | static int | 
    
    | 102 |  |  | text_is_extended(u_char c) | 
    
    | 103 |  |  | { | 
    
    | 104 |  |  | 	if (c >= 128) | 
    
    | 105 |  |  | 		return (1); | 
    
    | 106 |  |  | 	return (text_is_ascii(c)); | 
    
    | 107 |  |  | } | 
    
    | 108 |  |  |  | 
    
    | 109 |  |  | static int | 
    
    | 110 |  |  | text_try_test(const void *base, size_t size, int (*f)(u_char)) | 
    
    | 111 |  |  | { | 
    
    | 112 |  |  | 	const u_char	*data = base; | 
    
    | 113 |  |  | 	size_t		 offset; | 
    
    | 114 |  |  |  | 
    
    | 115 |  |  | 	for (offset = 0; offset < size; offset++) { | 
    
    | 116 |  |  | 		if (!f(data[offset])) | 
    
    | 117 |  |  | 			return (0); | 
    
    | 118 |  |  | 	} | 
    
    | 119 |  |  | 	return (1); | 
    
    | 120 |  |  | } | 
    
    | 121 |  |  |  | 
    
    | 122 |  |  | const char * | 
    
    | 123 |  |  | text_get_type(const void *base, size_t size) | 
    
    | 124 |  |  | { | 
    
    | 125 |  |  | 	if (text_try_test(base, size, text_is_ascii)) | 
    
    | 126 |  |  | 		return ("ASCII"); | 
    
    | 127 |  |  | 	if (text_try_test(base, size, text_is_latin1)) | 
    
    | 128 |  |  | 		return ("ISO-8859"); | 
    
    | 129 |  |  | 	if (text_try_test(base, size, text_is_extended)) | 
    
    | 130 |  |  | 		return ("Non-ISO extended-ASCII"); | 
    
    | 131 |  |  | 	return (NULL); | 
    
    | 132 |  |  | } | 
    
    | 133 |  |  |  | 
    
    | 134 |  |  | const char * | 
    
    | 135 |  |  | text_try_words(const void *base, size_t size, int flags) | 
    
    | 136 |  |  | { | 
    
    | 137 |  |  | 	const char	*cp, *end, *next, *word; | 
    
    | 138 |  |  | 	size_t		 wordlen; | 
    
    | 139 |  |  | 	u_int		 i; | 
    
    | 140 |  |  |  | 
    
    | 141 |  |  | 	end = (char *)base + size; | 
    
    | 142 |  |  | 	for (cp = base; cp != end; /* nothing */) { | 
    
    | 143 |  |  | 		while (cp != end && isspace((u_char)*cp)) | 
    
    | 144 |  |  | 			cp++; | 
    
    | 145 |  |  |  | 
    
    | 146 |  |  | 		next = cp; | 
    
    | 147 |  |  | 		while (next != end && !isspace((u_char)*next)) | 
    
    | 148 |  |  | 			next++; | 
    
    | 149 |  |  |  | 
    
    | 150 |  |  | 		for (i = 0; /* nothing */; i++) { | 
    
    | 151 |  |  | 			word = text_words[i][0]; | 
    
    | 152 |  |  | 			if (word == NULL) | 
    
    | 153 |  |  | 				break; | 
    
    | 154 |  |  | 			wordlen = strlen(word); | 
    
    | 155 |  |  |  | 
    
    | 156 |  |  | 			if ((size_t)(next - cp) != wordlen) | 
    
    | 157 |  |  | 				continue; | 
    
    | 158 |  |  | 			if (memcmp(cp, word, wordlen) != 0) | 
    
    | 159 |  |  | 				continue; | 
    
    | 160 |  |  | 			if (flags & MAGIC_TEST_MIME) | 
    
    | 161 |  |  | 				return (text_words[i][2]); | 
    
    | 162 |  |  | 			return (text_words[i][1]); | 
    
    | 163 |  |  | 		} | 
    
    | 164 |  |  |  | 
    
    | 165 |  |  | 		cp = next; | 
    
    | 166 |  |  | 	} | 
    
    | 167 |  |  | 	return (NULL); | 
    
    | 168 |  |  | } |