1 |
|
|
/* $OpenBSD: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. |
5 |
|
|
* Copyright (c) 1989, 1993 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* This code is derived from software contributed to Berkeley by |
9 |
|
|
* James A. Woods. |
10 |
|
|
* |
11 |
|
|
* Redistribution and use in source and binary forms, with or without |
12 |
|
|
* modification, are permitted provided that the following conditions |
13 |
|
|
* are met: |
14 |
|
|
* 1. Redistributions of source code must retain the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer. |
16 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer in the |
18 |
|
|
* documentation and/or other materials provided with the distribution. |
19 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
20 |
|
|
* may be used to endorse or promote products derived from this software |
21 |
|
|
* without specific prior written permission. |
22 |
|
|
* |
23 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 |
|
|
* SUCH DAMAGE. |
34 |
|
|
* |
35 |
|
|
* $Id: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $ |
36 |
|
|
*/ |
37 |
|
|
|
38 |
|
|
#ifndef _LOCATE_STATISTIC_ |
39 |
|
|
#define _LOCATE_STATISTIC_ |
40 |
|
|
|
41 |
|
|
void |
42 |
|
|
statistic (fp, path_fcodes) |
43 |
|
|
FILE *fp; /* open database */ |
44 |
|
|
char *path_fcodes; /* for error message */ |
45 |
|
|
{ |
46 |
|
|
int lines, chars, size, big, zwerg; |
47 |
|
|
u_char *p, *s; |
48 |
|
|
int c; |
49 |
|
|
int count, umlaut; |
50 |
|
|
u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; |
51 |
|
|
|
52 |
|
|
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { |
53 |
|
|
p[c] = check_bigram_char(getc(fp)); |
54 |
|
|
s[c] = check_bigram_char(getc(fp)); |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
lines = chars = big = zwerg = umlaut = 0; |
58 |
|
|
size = NBG + NBG; |
59 |
|
|
|
60 |
|
|
for (c = getc(fp), count = 0; c != EOF; size++) { |
61 |
|
|
if (c == SWITCH) { |
62 |
|
|
count += getwf(fp) - OFFSET; |
63 |
|
|
size += sizeof(int); |
64 |
|
|
zwerg++; |
65 |
|
|
} else |
66 |
|
|
count += c - OFFSET; |
67 |
|
|
|
68 |
|
|
sane_count(count); |
69 |
|
|
for (p = path + count; (c = getc(fp)) > SWITCH; size++) |
70 |
|
|
if (c < PARITY) { |
71 |
|
|
if (c == UMLAUT) { |
72 |
|
|
c = getc(fp); |
73 |
|
|
size++; |
74 |
|
|
umlaut++; |
75 |
|
|
} |
76 |
|
|
p++; |
77 |
|
|
} else { |
78 |
|
|
/* bigram char */ |
79 |
|
|
big++; |
80 |
|
|
p += 2; |
81 |
|
|
} |
82 |
|
|
|
83 |
|
|
p++; |
84 |
|
|
lines++; |
85 |
|
|
chars += (p - path); |
86 |
|
|
} |
87 |
|
|
|
88 |
|
|
(void)printf("\nDatabase: %s\n", path_fcodes); |
89 |
|
|
(void)printf("Compression: Front: %2.2f%%, ", |
90 |
|
|
(float)(100 * (size + big - (2 * NBG))) / chars); |
91 |
|
|
(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); |
92 |
|
|
(void)printf("Total: %2.2f%%\n", |
93 |
|
|
(float)(100 * (size - (2 * NBG))) / chars); |
94 |
|
|
(void)printf("Filenames: %d, ", lines); |
95 |
|
|
(void)printf("Characters: %d, ", chars); |
96 |
|
|
(void)printf("Database size: %d\n", size); |
97 |
|
|
(void)printf("Bigram characters: %d, ", big); |
98 |
|
|
(void)printf("Integers: %d, ", zwerg); |
99 |
|
|
(void)printf("8-Bit characters: %d\n", umlaut); |
100 |
|
|
|
101 |
|
|
} |
102 |
|
|
#endif /* _LOCATE_STATISTIC_ */ |
103 |
|
|
|
104 |
|
|
|
105 |
|
|
void |
106 |
|
|
|
107 |
|
|
|
108 |
|
|
#ifdef FF_ICASE |
109 |
|
|
fastfind_mmap_icase |
110 |
|
|
#else |
111 |
|
|
fastfind_mmap |
112 |
|
|
#endif /* FF_ICASE */ |
113 |
|
|
(pathpart, paddr, len, database) |
114 |
|
|
char *pathpart; /* search string */ |
115 |
|
|
caddr_t paddr; /* mmap pointer */ |
116 |
|
|
int len; /* length of database */ |
117 |
|
|
char *database; /* for error message */ |
118 |
|
|
|
119 |
|
|
|
120 |
|
|
|
121 |
|
|
{ |
122 |
|
|
u_char *p, *s, *patend, *q, *foundchar; |
123 |
|
|
int c, cc; |
124 |
|
|
int count, found, globflag; |
125 |
|
|
u_char *cutoff; |
126 |
|
|
u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX]; |
127 |
|
|
|
128 |
|
|
#ifdef FF_ICASE |
129 |
|
|
/* use a lookup table for case insensitive search */ |
130 |
|
|
u_char table[UCHAR_MAX + 1]; |
131 |
|
|
|
132 |
|
|
tolower_word(pathpart); |
133 |
|
|
#endif /* FF_ICASE*/ |
134 |
|
|
|
135 |
|
|
/* init bigram table */ |
136 |
|
|
if (len < (2*NBG)) { |
137 |
|
|
(void)fprintf(stderr, "database too small: %s\n", database); |
138 |
|
|
exit(1); |
139 |
|
|
} |
140 |
|
|
|
141 |
|
|
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { |
142 |
|
|
p[c] = check_bigram_char(*paddr++); |
143 |
|
|
s[c] = check_bigram_char(*paddr++); |
144 |
|
|
} |
145 |
|
|
|
146 |
|
|
/* find optimal (last) char for searching */ |
147 |
|
|
for (p = pathpart; *p != '\0'; p++) |
148 |
|
|
if (strchr(LOCATE_REG, *p) != NULL) |
149 |
|
|
break; |
150 |
|
|
|
151 |
|
|
if (*p == '\0') |
152 |
|
|
globflag = 0; |
153 |
|
|
else |
154 |
|
|
globflag = 1; |
155 |
|
|
|
156 |
|
|
p = pathpart; |
157 |
|
|
patend = patprep(p); |
158 |
|
|
cc = *patend; |
159 |
|
|
|
160 |
|
|
#ifdef FF_ICASE |
161 |
|
|
/* set patend char to true */ |
162 |
|
|
table[TOLOWER(*patend)] = 1; |
163 |
|
|
table[toupper(*patend)] = 1; |
164 |
|
|
#endif /* FF_ICASE */ |
165 |
|
|
|
166 |
|
|
|
167 |
|
|
/* main loop */ |
168 |
|
|
found = count = 0; |
169 |
|
|
foundchar = 0; |
170 |
|
|
|
171 |
|
|
c = (u_char)*paddr++; len--; |
172 |
|
|
for (; len > 0; ) { |
173 |
|
|
|
174 |
|
|
/* go forward or backward */ |
175 |
|
|
if (c == SWITCH) { /* big step, an integer */ |
176 |
|
|
count += getwm(paddr) - OFFSET; |
177 |
|
|
len -= INTSIZE; paddr += INTSIZE; |
178 |
|
|
} else { /* slow step, =< 14 chars */ |
179 |
|
|
count += c - OFFSET; |
180 |
|
|
} |
181 |
|
|
|
182 |
|
|
sane_count(count); |
183 |
|
|
/* overlay old path */ |
184 |
|
|
p = path + count; |
185 |
|
|
foundchar = p - 1; |
186 |
|
|
|
187 |
|
|
for (;;) { |
188 |
|
|
c = (u_char)*paddr++; |
189 |
|
|
len--; |
190 |
|
|
/* |
191 |
|
|
* == UMLAUT: 8 bit char followed |
192 |
|
|
* <= SWITCH: offset |
193 |
|
|
* >= PARITY: bigram |
194 |
|
|
* rest: single ascii char |
195 |
|
|
* |
196 |
|
|
* offset < SWITCH < UMLAUT < ascii < PARITY < bigram |
197 |
|
|
*/ |
198 |
|
|
if (c < PARITY) { |
199 |
|
|
if (c <= UMLAUT) { |
200 |
|
|
if (c == UMLAUT) { |
201 |
|
|
c = (u_char)*paddr++; |
202 |
|
|
len--; |
203 |
|
|
|
204 |
|
|
} else |
205 |
|
|
break; /* SWITCH */ |
206 |
|
|
} |
207 |
|
|
#ifdef FF_ICASE |
208 |
|
|
if (table[c]) |
209 |
|
|
#else |
210 |
|
|
if (c == cc) |
211 |
|
|
#endif /* FF_ICASE */ |
212 |
|
|
foundchar = p; |
213 |
|
|
*p++ = c; |
214 |
|
|
} else { |
215 |
|
|
/* bigrams are parity-marked */ |
216 |
|
|
TO7BIT(c); |
217 |
|
|
|
218 |
|
|
#ifndef FF_ICASE |
219 |
|
|
if (bigram1[c] == cc || |
220 |
|
|
bigram2[c] == cc) |
221 |
|
|
#else |
222 |
|
|
|
223 |
|
|
if (table[bigram1[c]] || |
224 |
|
|
table[bigram2[c]]) |
225 |
|
|
#endif /* FF_ICASE */ |
226 |
|
|
foundchar = p + 1; |
227 |
|
|
|
228 |
|
|
*p++ = bigram1[c]; |
229 |
|
|
*p++ = bigram2[c]; |
230 |
|
|
} |
231 |
|
|
} |
232 |
|
|
|
233 |
|
|
if (found) { /* previous line matched */ |
234 |
|
|
cutoff = path; |
235 |
|
|
*p-- = '\0'; |
236 |
|
|
foundchar = p; |
237 |
|
|
} else if (foundchar >= path + count) { /* a char matched */ |
238 |
|
|
*p-- = '\0'; |
239 |
|
|
cutoff = path + count; |
240 |
|
|
} else /* nothing to do */ |
241 |
|
|
continue; |
242 |
|
|
|
243 |
|
|
found = 0; |
244 |
|
|
for (s = foundchar; s >= cutoff; s--) { |
245 |
|
|
if (*s == cc |
246 |
|
|
#ifdef FF_ICASE |
247 |
|
|
|| TOLOWER(*s) == cc |
248 |
|
|
#endif /* FF_ICASE */ |
249 |
|
|
) { /* fast first char check */ |
250 |
|
|
for (p = patend - 1, q = s - 1; *p != '\0'; |
251 |
|
|
p--, q--) |
252 |
|
|
if (*q != *p |
253 |
|
|
#ifdef FF_ICASE |
254 |
|
|
&& TOLOWER(*q) != *p |
255 |
|
|
#endif /* FF_ICASE */ |
256 |
|
|
) |
257 |
|
|
break; |
258 |
|
|
if (*p == '\0') { /* fast match success */ |
259 |
|
|
char *shortpath; |
260 |
|
|
|
261 |
|
|
found = 1; |
262 |
|
|
shortpath = path; |
263 |
|
|
if (f_basename) |
264 |
|
|
shortpath = basename(path); |
265 |
|
|
|
266 |
|
|
if ((!f_basename && (!globflag || |
267 |
|
|
#ifdef FF_ICASE |
268 |
|
|
!fnmatch(pathpart, shortpath, |
269 |
|
|
FNM_CASEFOLD))) |
270 |
|
|
#else |
271 |
|
|
!fnmatch(pathpart, shortpath, 0))) |
272 |
|
|
#endif /* FF_ICASE */ |
273 |
|
|
|| (strstr(shortpath, pathpart) != |
274 |
|
|
NULL)) { |
275 |
|
|
if (f_silent) |
276 |
|
|
counter++; |
277 |
|
|
else if (f_limit) { |
278 |
|
|
counter++; |
279 |
|
|
if (f_limit >= counter) |
280 |
|
|
(void)puts(path); |
281 |
|
|
else { |
282 |
|
|
(void)fprintf(stderr, "[show only %d lines]\n", counter - 1); |
283 |
|
|
exit(0); |
284 |
|
|
} |
285 |
|
|
} else |
286 |
|
|
(void)puts(path); |
287 |
|
|
} |
288 |
|
|
break; |
289 |
|
|
} |
290 |
|
|
} |
291 |
|
|
} |
292 |
|
|
} |
293 |
|
|
} |