| 1 |  |  | /*	$OpenBSD: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $	*/ | 
    
    | 2 |  |  |  | 
    
    | 3 |  |  | /* | 
    
    | 4 |  |  |  * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. | 
    
    | 5 |  |  |  * Copyright (c) 1989, 1993 | 
    
    | 6 |  |  |  *	The Regents of the University of California.  All rights reserved. | 
    
    | 7 |  |  |  * | 
    
    | 8 |  |  |  * This code is derived from software contributed to Berkeley by | 
    
    | 9 |  |  |  * James A. Woods. | 
    
    | 10 |  |  |  * | 
    
    | 11 |  |  |  * Redistribution and use in source and binary forms, with or without | 
    
    | 12 |  |  |  * modification, are permitted provided that the following conditions | 
    
    | 13 |  |  |  * are met: | 
    
    | 14 |  |  |  * 1. Redistributions of source code must retain the above copyright | 
    
    | 15 |  |  |  *    notice, this list of conditions and the following disclaimer. | 
    
    | 16 |  |  |  * 2. Redistributions in binary form must reproduce the above copyright | 
    
    | 17 |  |  |  *    notice, this list of conditions and the following disclaimer in the | 
    
    | 18 |  |  |  *    documentation and/or other materials provided with the distribution. | 
    
    | 19 |  |  |  * 3. Neither the name of the University nor the names of its contributors | 
    
    | 20 |  |  |  *    may be used to endorse or promote products derived from this software | 
    
    | 21 |  |  |  *    without specific prior written permission. | 
    
    | 22 |  |  |  * | 
    
    | 23 |  |  |  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 
    
    | 24 |  |  |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
    
    | 25 |  |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
    
    | 26 |  |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 
    
    | 27 |  |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
    
    | 28 |  |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 
    
    | 29 |  |  |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
    
    | 30 |  |  |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 
    
    | 31 |  |  |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 
    
    | 32 |  |  |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
    
    | 33 |  |  |  * SUCH DAMAGE. | 
    
    | 34 |  |  |  * | 
    
    | 35 |  |  |  * $Id: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $ | 
    
    | 36 |  |  |  */ | 
    
    | 37 |  |  |  | 
    
    | 38 |  |  | #ifndef _LOCATE_STATISTIC_ | 
    
    | 39 |  |  | #define _LOCATE_STATISTIC_ | 
    
    | 40 |  |  |  | 
    
    | 41 |  |  | void | 
    
    | 42 |  |  | statistic (fp, path_fcodes) | 
    
    | 43 |  |  | 	FILE *fp;               /* open database */ | 
    
    | 44 |  |  | 	char *path_fcodes;  	/* for error message */ | 
    
    | 45 |  |  | { | 
    
    | 46 |  |  | 	int lines, chars, size, big, zwerg; | 
    
    | 47 |  |  | 	u_char *p, *s; | 
    
    | 48 |  |  | 	int c; | 
    
    | 49 |  |  | 	int count, umlaut; | 
    
    | 50 |  |  | 	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; | 
    
    | 51 |  |  |  | 
    
    | 52 |  |  | 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { | 
    
    | 53 |  |  | 		p[c] = check_bigram_char(getc(fp)); | 
    
    | 54 |  |  | 		s[c] = check_bigram_char(getc(fp)); | 
    
    | 55 |  |  | 	} | 
    
    | 56 |  |  |  | 
    
    | 57 |  |  | 	lines = chars = big = zwerg = umlaut = 0; | 
    
    | 58 |  |  | 	size = NBG + NBG; | 
    
    | 59 |  |  |  | 
    
    | 60 |  |  | 	for (c = getc(fp), count = 0; c != EOF; size++) { | 
    
    | 61 |  |  | 		if (c == SWITCH) { | 
    
    | 62 |  |  | 			count += getwf(fp) - OFFSET; | 
    
    | 63 |  |  | 			size += sizeof(int); | 
    
    | 64 |  |  | 			zwerg++; | 
    
    | 65 |  |  | 		} else | 
    
    | 66 |  |  | 			count += c - OFFSET; | 
    
    | 67 |  |  |  | 
    
    | 68 |  |  | 		sane_count(count); | 
    
    | 69 |  |  | 		for (p = path + count; (c = getc(fp)) > SWITCH; size++) | 
    
    | 70 |  |  | 			if (c < PARITY) { | 
    
    | 71 |  |  | 				if (c == UMLAUT) { | 
    
    | 72 |  |  | 					c = getc(fp); | 
    
    | 73 |  |  | 					size++; | 
    
    | 74 |  |  | 					umlaut++; | 
    
    | 75 |  |  | 				} | 
    
    | 76 |  |  | 				p++; | 
    
    | 77 |  |  | 			} else { | 
    
    | 78 |  |  | 				/* bigram char */ | 
    
    | 79 |  |  | 				big++; | 
    
    | 80 |  |  | 				p += 2; | 
    
    | 81 |  |  | 			} | 
    
    | 82 |  |  |  | 
    
    | 83 |  |  | 		p++; | 
    
    | 84 |  |  | 		lines++; | 
    
    | 85 |  |  | 		chars += (p - path); | 
    
    | 86 |  |  | 	} | 
    
    | 87 |  |  |  | 
    
    | 88 |  |  | 	(void)printf("\nDatabase: %s\n", path_fcodes); | 
    
    | 89 |  |  | 	(void)printf("Compression: Front: %2.2f%%, ", | 
    
    | 90 |  |  | 	    (float)(100 * (size + big - (2 * NBG))) / chars); | 
    
    | 91 |  |  | 	(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); | 
    
    | 92 |  |  | 	(void)printf("Total: %2.2f%%\n", | 
    
    | 93 |  |  | 	    (float)(100 * (size - (2 * NBG))) / chars); | 
    
    | 94 |  |  | 	(void)printf("Filenames: %d, ", lines); | 
    
    | 95 |  |  | 	(void)printf("Characters: %d, ", chars); | 
    
    | 96 |  |  | 	(void)printf("Database size: %d\n", size); | 
    
    | 97 |  |  | 	(void)printf("Bigram characters: %d, ", big); | 
    
    | 98 |  |  | 	(void)printf("Integers: %d, ", zwerg); | 
    
    | 99 |  |  | 	(void)printf("8-Bit characters: %d\n", umlaut); | 
    
    | 100 |  |  |  | 
    
    | 101 |  |  | } | 
    
    | 102 |  |  | #endif /* _LOCATE_STATISTIC_ */ | 
    
    | 103 |  |  |  | 
    
    | 104 |  |  |  | 
    
    | 105 |  |  | void | 
    
    | 106 |  |  |  | 
    
    | 107 |  |  |  | 
    
    | 108 |  |  | #ifdef FF_ICASE | 
    
    | 109 |  |  | fastfind_mmap_icase | 
    
    | 110 |  |  | #else | 
    
    | 111 |  |  | fastfind_mmap | 
    
    | 112 |  |  | #endif /* FF_ICASE */ | 
    
    | 113 |  |  | (pathpart, paddr, len, database) | 
    
    | 114 |  |  | 	char *pathpart; 	/* search string */ | 
    
    | 115 |  |  | 	caddr_t paddr;  	/* mmap pointer */ | 
    
    | 116 |  |  | 	int len;        	/* length of database */ | 
    
    | 117 |  |  | 	char *database; 	/* for error message */ | 
    
    | 118 |  |  |  | 
    
    | 119 |  |  |  | 
    
    | 120 |  |  |  | 
    
    | 121 |  |  | { | 
    
    | 122 |  |  | 	u_char *p, *s, *patend, *q, *foundchar; | 
    
    | 123 |  |  | 	int c, cc; | 
    
    | 124 |  |  | 	int count, found, globflag; | 
    
    | 125 |  |  | 	u_char *cutoff; | 
    
    | 126 |  |  | 	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; | 
    
    | 127 |  |  |  | 
    
    | 128 |  |  | #ifdef FF_ICASE | 
    
    | 129 |  |  | 	/* use a lookup table for case insensitive search */ | 
    
    | 130 |  |  | 	u_char table[UCHAR_MAX + 1]; | 
    
    | 131 |  |  |  | 
    
    | 132 |  |  | 	tolower_word(pathpart); | 
    
    | 133 |  |  | #endif /* FF_ICASE*/ | 
    
    | 134 |  |  |  | 
    
    | 135 |  |  | 	/* init bigram table */ | 
    
    | 136 |  |  | 	if (len < (2*NBG)) { | 
    
    | 137 |  |  | 		(void)fprintf(stderr, "database too small: %s\n", database); | 
    
    | 138 |  |  | 		exit(1); | 
    
    | 139 |  |  | 	} | 
    
    | 140 |  |  |  | 
    
    | 141 |  |  | 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { | 
    
    | 142 |  |  | 		p[c] = check_bigram_char(*paddr++); | 
    
    | 143 |  |  | 		s[c] = check_bigram_char(*paddr++); | 
    
    | 144 |  |  | 	} | 
    
    | 145 |  |  |  | 
    
    | 146 |  |  | 	/* find optimal (last) char for searching */ | 
    
    | 147 |  |  | 	for (p = pathpart; *p != '\0'; p++) | 
    
    | 148 |  |  | 		if (strchr(LOCATE_REG, *p) != NULL) | 
    
    | 149 |  |  | 			break; | 
    
    | 150 |  |  |  | 
    
    | 151 |  |  | 	if (*p == '\0') | 
    
    | 152 |  |  | 		globflag = 0; | 
    
    | 153 |  |  | 	else | 
    
    | 154 |  |  | 		globflag = 1; | 
    
    | 155 |  |  |  | 
    
    | 156 |  |  | 	p = pathpart; | 
    
    | 157 |  |  | 	patend = patprep(p); | 
    
    | 158 |  |  | 	cc = *patend; | 
    
    | 159 |  |  |  | 
    
    | 160 |  |  | #ifdef FF_ICASE | 
    
    | 161 |  |  | 	/* set patend char to true */ | 
    
    | 162 |  |  | 	table[TOLOWER(*patend)] = 1; | 
    
    | 163 |  |  | 	table[toupper(*patend)] = 1; | 
    
    | 164 |  |  | #endif /* FF_ICASE */ | 
    
    | 165 |  |  |  | 
    
    | 166 |  |  |  | 
    
    | 167 |  |  | 	/* main loop */ | 
    
    | 168 |  |  | 	found = count = 0; | 
    
    | 169 |  |  | 	foundchar = 0; | 
    
    | 170 |  |  |  | 
    
    | 171 |  |  | 	c = (u_char)*paddr++; len--; | 
    
    | 172 |  |  | 	for (; len > 0; ) { | 
    
    | 173 |  |  |  | 
    
    | 174 |  |  | 		/* go forward or backward */ | 
    
    | 175 |  |  | 		if (c == SWITCH) { /* big step, an integer */ | 
    
    | 176 |  |  | 			count += getwm(paddr) - OFFSET; | 
    
    | 177 |  |  | 			len -= INTSIZE; paddr += INTSIZE; | 
    
    | 178 |  |  | 		} else {	   /* slow step, =< 14 chars */ | 
    
    | 179 |  |  | 			count += c - OFFSET; | 
    
    | 180 |  |  | 		} | 
    
    | 181 |  |  |  | 
    
    | 182 |  |  | 		sane_count(count); | 
    
    | 183 |  |  | 		/* overlay old path */ | 
    
    | 184 |  |  | 		p = path + count; | 
    
    | 185 |  |  | 		foundchar = p - 1; | 
    
    | 186 |  |  |  | 
    
    | 187 |  |  | 		for (;;) { | 
    
    | 188 |  |  | 			c = (u_char)*paddr++; | 
    
    | 189 |  |  | 			len--; | 
    
    | 190 |  |  | 			/* | 
    
    | 191 |  |  | 			 * == UMLAUT: 8 bit char followed | 
    
    | 192 |  |  | 			 * <= SWITCH: offset | 
    
    | 193 |  |  | 			 * >= PARITY: bigram | 
    
    | 194 |  |  | 			 * rest:      single ascii char | 
    
    | 195 |  |  | 			 * | 
    
    | 196 |  |  | 			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram | 
    
    | 197 |  |  | 			 */ | 
    
    | 198 |  |  | 			if (c < PARITY) { | 
    
    | 199 |  |  | 				if (c <= UMLAUT) { | 
    
    | 200 |  |  | 					if (c == UMLAUT) { | 
    
    | 201 |  |  | 						c = (u_char)*paddr++; | 
    
    | 202 |  |  | 						len--; | 
    
    | 203 |  |  |  | 
    
    | 204 |  |  | 					} else | 
    
    | 205 |  |  | 						break; /* SWITCH */ | 
    
    | 206 |  |  | 				} | 
    
    | 207 |  |  | #ifdef FF_ICASE | 
    
    | 208 |  |  | 				if (table[c]) | 
    
    | 209 |  |  | #else | 
    
    | 210 |  |  | 				if (c == cc) | 
    
    | 211 |  |  | #endif /* FF_ICASE */ | 
    
    | 212 |  |  | 					foundchar = p; | 
    
    | 213 |  |  | 				*p++ = c; | 
    
    | 214 |  |  | 			} else { | 
    
    | 215 |  |  | 				/* bigrams are parity-marked */ | 
    
    | 216 |  |  | 				TO7BIT(c); | 
    
    | 217 |  |  |  | 
    
    | 218 |  |  | #ifndef FF_ICASE | 
    
    | 219 |  |  | 				if (bigram1[c] == cc || | 
    
    | 220 |  |  | 				    bigram2[c] == cc) | 
    
    | 221 |  |  | #else | 
    
    | 222 |  |  |  | 
    
    | 223 |  |  | 					if (table[bigram1[c]] || | 
    
    | 224 |  |  | 					    table[bigram2[c]]) | 
    
    | 225 |  |  | #endif /* FF_ICASE */ | 
    
    | 226 |  |  | 						foundchar = p + 1; | 
    
    | 227 |  |  |  | 
    
    | 228 |  |  | 				*p++ = bigram1[c]; | 
    
    | 229 |  |  | 				*p++ = bigram2[c]; | 
    
    | 230 |  |  | 			} | 
    
    | 231 |  |  | 		} | 
    
    | 232 |  |  |  | 
    
    | 233 |  |  | 		if (found) {			/* previous line matched */ | 
    
    | 234 |  |  | 			cutoff = path; | 
    
    | 235 |  |  | 			*p-- = '\0'; | 
    
    | 236 |  |  | 			foundchar = p; | 
    
    | 237 |  |  | 		} else if (foundchar >= path + count) { /* a char matched */ | 
    
    | 238 |  |  | 			*p-- = '\0'; | 
    
    | 239 |  |  | 			cutoff = path + count; | 
    
    | 240 |  |  | 		} else				/* nothing to do */ | 
    
    | 241 |  |  | 			continue; | 
    
    | 242 |  |  |  | 
    
    | 243 |  |  | 		found = 0; | 
    
    | 244 |  |  | 		for (s = foundchar; s >= cutoff; s--) { | 
    
    | 245 |  |  | 			if (*s == cc | 
    
    | 246 |  |  | #ifdef FF_ICASE | 
    
    | 247 |  |  | 			    || TOLOWER(*s) == cc | 
    
    | 248 |  |  | #endif /* FF_ICASE */ | 
    
    | 249 |  |  | 			    ) {	/* fast first char check */ | 
    
    | 250 |  |  | 				for (p = patend - 1, q = s - 1; *p != '\0'; | 
    
    | 251 |  |  | 				    p--, q--) | 
    
    | 252 |  |  | 					if (*q != *p | 
    
    | 253 |  |  | #ifdef FF_ICASE | 
    
    | 254 |  |  | 					    && TOLOWER(*q) != *p | 
    
    | 255 |  |  | #endif /* FF_ICASE */ | 
    
    | 256 |  |  | 					    ) | 
    
    | 257 |  |  | 						break; | 
    
    | 258 |  |  | 				if (*p == '\0') {   /* fast match success */ | 
    
    | 259 |  |  | 					char	*shortpath; | 
    
    | 260 |  |  |  | 
    
    | 261 |  |  | 					found = 1; | 
    
    | 262 |  |  | 					shortpath = path; | 
    
    | 263 |  |  | 					if (f_basename) | 
    
    | 264 |  |  | 						shortpath = basename(path); | 
    
    | 265 |  |  |  | 
    
    | 266 |  |  | 					if ((!f_basename && (!globflag || | 
    
    | 267 |  |  | #ifdef FF_ICASE | 
    
    | 268 |  |  | 					    !fnmatch(pathpart, shortpath, | 
    
    | 269 |  |  | 						FNM_CASEFOLD))) | 
    
    | 270 |  |  | #else | 
    
    | 271 |  |  | 					    !fnmatch(pathpart, shortpath, 0))) | 
    
    | 272 |  |  | #endif /* FF_ICASE */ | 
    
    | 273 |  |  | 					    || (strstr(shortpath, pathpart) != | 
    
    | 274 |  |  | 					    NULL)) { | 
    
    | 275 |  |  | 						if (f_silent) | 
    
    | 276 |  |  | 							counter++; | 
    
    | 277 |  |  | 						else if (f_limit) { | 
    
    | 278 |  |  | 							counter++; | 
    
    | 279 |  |  | 							if (f_limit >= counter) | 
    
    | 280 |  |  | 								(void)puts(path); | 
    
    | 281 |  |  | 							else  { | 
    
    | 282 |  |  | 								(void)fprintf(stderr, "[show only %d lines]\n", counter - 1); | 
    
    | 283 |  |  | 								exit(0); | 
    
    | 284 |  |  | 							} | 
    
    | 285 |  |  | 						} else | 
    
    | 286 |  |  | 							(void)puts(path); | 
    
    | 287 |  |  | 					} | 
    
    | 288 |  |  | 					break; | 
    
    | 289 |  |  | 				} | 
    
    | 290 |  |  | 			} | 
    
    | 291 |  |  | 		} | 
    
    | 292 |  |  | 	} | 
    
    | 293 |  |  | } |