1 |
|
|
/* |
2 |
|
|
* $OpenBSD: locate.c,v 1.31 2015/11/19 21:46:05 mmcc Exp $ |
3 |
|
|
* |
4 |
|
|
* Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. |
5 |
|
|
* Copyright (c) 1989, 1993 |
6 |
|
|
* The Regents of the University of California. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* This code is derived from software contributed to Berkeley by |
9 |
|
|
* James A. Woods. |
10 |
|
|
* |
11 |
|
|
* Redistribution and use in source and binary forms, with or without |
12 |
|
|
* modification, are permitted provided that the following conditions |
13 |
|
|
* are met: |
14 |
|
|
* 1. Redistributions of source code must retain the above copyright |
15 |
|
|
* notice, this list of conditions and the following disclaimer. |
16 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
17 |
|
|
* notice, this list of conditions and the following disclaimer in the |
18 |
|
|
* documentation and/or other materials provided with the distribution. |
19 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
20 |
|
|
* may be used to endorse or promote products derived from this software |
21 |
|
|
* without specific prior written permission. |
22 |
|
|
* |
23 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 |
|
|
* SUCH DAMAGE. |
34 |
|
|
* |
35 |
|
|
* $Id: locate.c,v 1.31 2015/11/19 21:46:05 mmcc Exp $ |
36 |
|
|
*/ |
37 |
|
|
|
38 |
|
|
/* |
39 |
|
|
* Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. |
40 |
|
|
* |
41 |
|
|
* Locate scans a file list for the full pathname of a file given only part |
42 |
|
|
* of the name. The list has been processed with "front-compression" |
43 |
|
|
* and bigram coding. Front compression reduces space by a factor of 4-5, |
44 |
|
|
* bigram coding by a further 20-25%. |
45 |
|
|
* |
46 |
|
|
* The codes are: |
47 |
|
|
* |
48 |
|
|
* 0-28 likeliest differential counts + offset to make nonnegative |
49 |
|
|
* 30 switch code for out-of-range count to follow in next word |
50 |
|
|
* 31 an 8 bit char followed |
51 |
|
|
* 128-255 bigram codes (128 most common, as determined by 'updatedb') |
52 |
|
|
* 32-127 single character (printable) ascii residue (ie, literal) |
53 |
|
|
* |
54 |
|
|
* A novel two-tiered string search technique is employed: |
55 |
|
|
* |
56 |
|
|
* First, a metacharacter-free subpattern and partial pathname is matched |
57 |
|
|
* BACKWARDS to avoid full expansion of the pathname list. The time savings |
58 |
|
|
* is 40-50% over forward matching, which cannot efficiently handle |
59 |
|
|
* overlapped search patterns and compressed path residue. |
60 |
|
|
* |
61 |
|
|
* Then, the actual shell glob-style regular expression (if in this form) is |
62 |
|
|
* matched against the candidate pathnames using the slower routines provided |
63 |
|
|
* in the standard 'find'. |
64 |
|
|
*/ |
65 |
|
|
|
66 |
|
|
#include <sys/mman.h> |
67 |
|
|
#include <sys/stat.h> |
68 |
|
|
#include <sys/types.h> |
69 |
|
|
|
70 |
|
|
#include <ctype.h> |
71 |
|
|
#include <err.h> |
72 |
|
|
#include <fcntl.h> |
73 |
|
|
#include <fnmatch.h> |
74 |
|
|
#include <libgen.h> |
75 |
|
|
#include <limits.h> |
76 |
|
|
#include <locale.h> |
77 |
|
|
#include <stdio.h> |
78 |
|
|
#include <stdlib.h> |
79 |
|
|
#include <string.h> |
80 |
|
|
#include <unistd.h> |
81 |
|
|
|
82 |
|
|
#include "locate.h" |
83 |
|
|
#include "pathnames.h" |
84 |
|
|
|
85 |
|
|
#ifdef DEBUG |
86 |
|
|
# include <sys/time.h> |
87 |
|
|
# include <sys/types.h> |
88 |
|
|
# include <sys/resource.h> |
89 |
|
|
#endif |
90 |
|
|
|
91 |
|
|
char *path_fcodes; /* locate database */ |
92 |
|
|
int f_mmap; /* use mmap */ |
93 |
|
|
int f_icase; /* ignore case */ |
94 |
|
|
int f_statistic; /* print statistic */ |
95 |
|
|
int f_silent; /* suppress output, show only count of matches */ |
96 |
|
|
int f_limit; /* limit number of output lines, 0 == infinite */ |
97 |
|
|
int f_basename; /* match only on the basename */ |
98 |
|
|
u_int counter; /* counter for matches [-c] */ |
99 |
|
|
|
100 |
|
|
|
101 |
|
|
void usage(void); |
102 |
|
|
void statistic(FILE *, char *); |
103 |
|
|
void fastfind(FILE *, char *, char *); |
104 |
|
|
void fastfind_icase(FILE *, char *, char *); |
105 |
|
|
void fastfind_mmap(char *, caddr_t, int, char *); |
106 |
|
|
void fastfind_mmap_icase(char *, caddr_t, int, char *); |
107 |
|
|
void search_mmap(char *, char **); |
108 |
|
|
void search_statistic(char *, char **); |
109 |
|
|
unsigned long cputime(void); |
110 |
|
|
|
111 |
|
|
extern char **colon(char **, char*, char*); |
112 |
|
|
extern void print_matches(u_int); |
113 |
|
|
extern int getwm(caddr_t); |
114 |
|
|
extern int getwf(FILE *); |
115 |
|
|
extern u_char *tolower_word(u_char *); |
116 |
|
|
extern int check_bigram_char(int); |
117 |
|
|
extern char *patprep(char *); |
118 |
|
|
|
119 |
|
|
|
120 |
|
|
int |
121 |
|
|
main(int argc, char *argv[]) |
122 |
|
|
{ |
123 |
|
|
int ch; |
124 |
|
|
char **dbv = NULL; |
125 |
|
|
(void) setlocale(LC_ALL, ""); |
126 |
|
|
|
127 |
|
|
if (pledge("stdio rpath flock cpath wpath", NULL) == -1) |
128 |
|
|
err(1, "pledge"); |
129 |
|
|
|
130 |
|
|
while ((ch = getopt(argc, argv, "bScd:il:")) != -1) |
131 |
|
|
switch (ch) { |
132 |
|
|
case 'b': |
133 |
|
|
f_basename = 1; |
134 |
|
|
break; |
135 |
|
|
case 'S': /* statistic lines */ |
136 |
|
|
f_statistic = 1; |
137 |
|
|
break; |
138 |
|
|
case 'l': /* limit number of output lines, 0 == infinite */ |
139 |
|
|
f_limit = atoi(optarg); |
140 |
|
|
break; |
141 |
|
|
case 'd': /* database */ |
142 |
|
|
dbv = colon(dbv, optarg, _PATH_FCODES); |
143 |
|
|
break; |
144 |
|
|
case 'i': /* ignore case */ |
145 |
|
|
f_icase = 1; |
146 |
|
|
break; |
147 |
|
|
case 'c': /* suppress output, show only count of matches */ |
148 |
|
|
f_silent = 1; |
149 |
|
|
break; |
150 |
|
|
default: |
151 |
|
|
usage(); |
152 |
|
|
} |
153 |
|
|
argv += optind; |
154 |
|
|
argc -= optind; |
155 |
|
|
|
156 |
|
|
/* to few arguments */ |
157 |
|
|
if (argc < 1 && !(f_statistic)) |
158 |
|
|
usage(); |
159 |
|
|
|
160 |
|
|
/* no (valid) database as argument */ |
161 |
|
|
if (dbv == NULL || *dbv == NULL) { |
162 |
|
|
/* try to read database from environment */ |
163 |
|
|
if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || |
164 |
|
|
*path_fcodes == '\0') |
165 |
|
|
/* use default database */ |
166 |
|
|
dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); |
167 |
|
|
else /* $LOCATE_PATH */ |
168 |
|
|
dbv = colon(dbv, path_fcodes, _PATH_FCODES); |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ |
172 |
|
|
for (ch = 0; ch < UCHAR_MAX + 1; ch++) |
173 |
|
|
myctype[ch] = tolower(ch); |
174 |
|
|
|
175 |
|
|
/* foreach database ... */ |
176 |
|
|
while ((path_fcodes = *dbv) != NULL) { |
177 |
|
|
dbv++; |
178 |
|
|
|
179 |
|
|
if (f_statistic) |
180 |
|
|
search_statistic(path_fcodes, argv); |
181 |
|
|
else |
182 |
|
|
search_mmap(path_fcodes, argv); |
183 |
|
|
} |
184 |
|
|
|
185 |
|
|
if (f_silent) |
186 |
|
|
print_matches(counter); |
187 |
|
|
exit(0); |
188 |
|
|
} |
189 |
|
|
|
190 |
|
|
|
191 |
|
|
void |
192 |
|
|
search_statistic(char *db, char **s) |
193 |
|
|
{ |
194 |
|
|
FILE *fp; |
195 |
|
|
#ifdef DEBUG |
196 |
|
|
long t0; |
197 |
|
|
#endif |
198 |
|
|
|
199 |
|
|
if ((fp = fopen(path_fcodes, "r")) == NULL) |
200 |
|
|
err(1, "`%s'", path_fcodes); |
201 |
|
|
|
202 |
|
|
/* count only chars or lines */ |
203 |
|
|
statistic(fp, path_fcodes); |
204 |
|
|
(void)fclose(fp); |
205 |
|
|
} |
206 |
|
|
|
207 |
|
|
void |
208 |
|
|
search_mmap(char *db, char **s) |
209 |
|
|
{ |
210 |
|
|
struct stat sb; |
211 |
|
|
int fd; |
212 |
|
|
caddr_t p; |
213 |
|
|
off_t len; |
214 |
|
|
#ifdef DEBUG |
215 |
|
|
long t0; |
216 |
|
|
#endif |
217 |
|
|
if ((fd = open(path_fcodes, O_RDONLY)) == -1 || |
218 |
|
|
fstat(fd, &sb) == -1) |
219 |
|
|
err(1, "`%s'", path_fcodes); |
220 |
|
|
len = sb.st_size; |
221 |
|
|
if (len < (2*NBG)) |
222 |
|
|
errx(1, "database too small: %s", db); |
223 |
|
|
|
224 |
|
|
if ((p = mmap((caddr_t)0, (size_t)len, PROT_READ, MAP_SHARED, |
225 |
|
|
fd, (off_t)0)) == MAP_FAILED) |
226 |
|
|
err(1, "mmap ``%s''", path_fcodes); |
227 |
|
|
|
228 |
|
|
/* foreach search string ... */ |
229 |
|
|
while (*s != NULL) { |
230 |
|
|
#ifdef DEBUG |
231 |
|
|
t0 = cputime(); |
232 |
|
|
#endif |
233 |
|
|
if (f_icase) |
234 |
|
|
fastfind_mmap_icase(*s, p, (int)len, path_fcodes); |
235 |
|
|
else |
236 |
|
|
fastfind_mmap(*s, p, (int)len, path_fcodes); |
237 |
|
|
#ifdef DEBUG |
238 |
|
|
(void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0); |
239 |
|
|
#endif |
240 |
|
|
s++; |
241 |
|
|
} |
242 |
|
|
|
243 |
|
|
if (munmap(p, (size_t)len) == -1) |
244 |
|
|
warn("munmap %s", path_fcodes); |
245 |
|
|
|
246 |
|
|
(void)close(fd); |
247 |
|
|
} |
248 |
|
|
|
249 |
|
|
#ifdef DEBUG |
250 |
|
|
unsigned long |
251 |
|
|
cputime(void) |
252 |
|
|
{ |
253 |
|
|
struct rusage rus; |
254 |
|
|
|
255 |
|
|
getrusage(RUSAGE_SELF, &rus); |
256 |
|
|
return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); |
257 |
|
|
} |
258 |
|
|
#endif /* DEBUG */ |
259 |
|
|
|
260 |
|
|
void |
261 |
|
|
usage(void) |
262 |
|
|
{ |
263 |
|
|
(void)fprintf(stderr, "usage: locate [-bciS] [-d database] "); |
264 |
|
|
(void)fprintf(stderr, "[-l limit] pattern ...\n"); |
265 |
|
|
(void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n", |
266 |
|
|
_PATH_FCODES); |
267 |
|
|
exit(1); |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
void |
271 |
|
|
sane_count(int count) |
272 |
|
|
{ |
273 |
|
|
if (count < 0 || count >= PATH_MAX) { |
274 |
|
|
fprintf(stderr, "locate: corrupted database\n"); |
275 |
|
|
exit(1); |
276 |
|
|
} |
277 |
|
|
} |
278 |
|
|
|
279 |
|
|
/* load fastfind functions */ |
280 |
|
|
|
281 |
|
|
#undef FF_ICASE |
282 |
|
|
#include "fastfind.c" |
283 |
|
|
#define FF_ICASE |
284 |
|
|
#include "fastfind.c" |