1  | 
     | 
     | 
    /*	$OpenBSD: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $	*/  | 
    
    
    2  | 
     | 
     | 
     | 
    
    
    3  | 
     | 
     | 
    /*  | 
    
    
    4  | 
     | 
     | 
     * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.  | 
    
    
    5  | 
     | 
     | 
     * Copyright (c) 1989, 1993  | 
    
    
    6  | 
     | 
     | 
     *	The Regents of the University of California.  All rights reserved.  | 
    
    
    7  | 
     | 
     | 
     *  | 
    
    
    8  | 
     | 
     | 
     * This code is derived from software contributed to Berkeley by  | 
    
    
    9  | 
     | 
     | 
     * James A. Woods.  | 
    
    
    10  | 
     | 
     | 
     *  | 
    
    
    11  | 
     | 
     | 
     * Redistribution and use in source and binary forms, with or without  | 
    
    
    12  | 
     | 
     | 
     * modification, are permitted provided that the following conditions  | 
    
    
    13  | 
     | 
     | 
     * are met:  | 
    
    
    14  | 
     | 
     | 
     * 1. Redistributions of source code must retain the above copyright  | 
    
    
    15  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer.  | 
    
    
    16  | 
     | 
     | 
     * 2. Redistributions in binary form must reproduce the above copyright  | 
    
    
    17  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer in the  | 
    
    
    18  | 
     | 
     | 
     *    documentation and/or other materials provided with the distribution.  | 
    
    
    19  | 
     | 
     | 
     * 3. Neither the name of the University nor the names of its contributors  | 
    
    
    20  | 
     | 
     | 
     *    may be used to endorse or promote products derived from this software  | 
    
    
    21  | 
     | 
     | 
     *    without specific prior written permission.  | 
    
    
    22  | 
     | 
     | 
     *  | 
    
    
    23  | 
     | 
     | 
     * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND  | 
    
    
    24  | 
     | 
     | 
     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
    
    
    25  | 
     | 
     | 
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
    
    
    26  | 
     | 
     | 
     * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE  | 
    
    
    27  | 
     | 
     | 
     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
    
    
    28  | 
     | 
     | 
     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
    
    
    29  | 
     | 
     | 
     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
    
    
    30  | 
     | 
     | 
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
    
    
    31  | 
     | 
     | 
     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
    
    
    32  | 
     | 
     | 
     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
    
    
    33  | 
     | 
     | 
     * SUCH DAMAGE.  | 
    
    
    34  | 
     | 
     | 
     *  | 
    
    
    35  | 
     | 
     | 
     * $Id: fastfind.c,v 1.13 2015/10/23 07:57:03 tedu Exp $  | 
    
    
    36  | 
     | 
     | 
     */  | 
    
    
    37  | 
     | 
     | 
     | 
    
    
    38  | 
     | 
     | 
    #ifndef _LOCATE_STATISTIC_  | 
    
    
    39  | 
     | 
     | 
    #define _LOCATE_STATISTIC_  | 
    
    
    40  | 
     | 
     | 
     | 
    
    
    41  | 
     | 
     | 
    void  | 
    
    
    42  | 
     | 
     | 
    statistic (fp, path_fcodes)  | 
    
    
    43  | 
     | 
     | 
    	FILE *fp;               /* open database */  | 
    
    
    44  | 
     | 
     | 
    	char *path_fcodes;  	/* for error message */  | 
    
    
    45  | 
     | 
     | 
    { | 
    
    
    46  | 
     | 
     | 
    	int lines, chars, size, big, zwerg;  | 
    
    
    47  | 
     | 
     | 
    	u_char *p, *s;  | 
    
    
    48  | 
     | 
     | 
    	int c;  | 
    
    
    49  | 
     | 
     | 
    	int count, umlaut;  | 
    
    
    50  | 
     | 
     | 
    	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];  | 
    
    
    51  | 
     | 
     | 
     | 
    
    
    52  | 
     | 
     | 
    	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { | 
    
    
    53  | 
     | 
     | 
    		p[c] = check_bigram_char(getc(fp));  | 
    
    
    54  | 
     | 
     | 
    		s[c] = check_bigram_char(getc(fp));  | 
    
    
    55  | 
     | 
     | 
    	}  | 
    
    
    56  | 
     | 
     | 
     | 
    
    
    57  | 
     | 
     | 
    	lines = chars = big = zwerg = umlaut = 0;  | 
    
    
    58  | 
     | 
     | 
    	size = NBG + NBG;  | 
    
    
    59  | 
     | 
     | 
     | 
    
    
    60  | 
     | 
     | 
    	for (c = getc(fp), count = 0; c != EOF; size++) { | 
    
    
    61  | 
     | 
     | 
    		if (c == SWITCH) { | 
    
    
    62  | 
     | 
     | 
    			count += getwf(fp) - OFFSET;  | 
    
    
    63  | 
     | 
     | 
    			size += sizeof(int);  | 
    
    
    64  | 
     | 
     | 
    			zwerg++;  | 
    
    
    65  | 
     | 
     | 
    		} else  | 
    
    
    66  | 
     | 
     | 
    			count += c - OFFSET;  | 
    
    
    67  | 
     | 
     | 
     | 
    
    
    68  | 
     | 
     | 
    		sane_count(count);  | 
    
    
    69  | 
     | 
     | 
    		for (p = path + count; (c = getc(fp)) > SWITCH; size++)  | 
    
    
    70  | 
     | 
     | 
    			if (c < PARITY) { | 
    
    
    71  | 
     | 
     | 
    				if (c == UMLAUT) { | 
    
    
    72  | 
     | 
     | 
    					c = getc(fp);  | 
    
    
    73  | 
     | 
     | 
    					size++;  | 
    
    
    74  | 
     | 
     | 
    					umlaut++;  | 
    
    
    75  | 
     | 
     | 
    				}  | 
    
    
    76  | 
     | 
     | 
    				p++;  | 
    
    
    77  | 
     | 
     | 
    			} else { | 
    
    
    78  | 
     | 
     | 
    				/* bigram char */  | 
    
    
    79  | 
     | 
     | 
    				big++;  | 
    
    
    80  | 
     | 
     | 
    				p += 2;  | 
    
    
    81  | 
     | 
     | 
    			}  | 
    
    
    82  | 
     | 
     | 
     | 
    
    
    83  | 
     | 
     | 
    		p++;  | 
    
    
    84  | 
     | 
     | 
    		lines++;  | 
    
    
    85  | 
     | 
     | 
    		chars += (p - path);  | 
    
    
    86  | 
     | 
     | 
    	}  | 
    
    
    87  | 
     | 
     | 
     | 
    
    
    88  | 
     | 
     | 
    	(void)printf("\nDatabase: %s\n", path_fcodes); | 
    
    
    89  | 
     | 
     | 
    	(void)printf("Compression: Front: %2.2f%%, ", | 
    
    
    90  | 
     | 
     | 
    	    (float)(100 * (size + big - (2 * NBG))) / chars);  | 
    
    
    91  | 
     | 
     | 
    	(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size); | 
    
    
    92  | 
     | 
     | 
    	(void)printf("Total: %2.2f%%\n", | 
    
    
    93  | 
     | 
     | 
    	    (float)(100 * (size - (2 * NBG))) / chars);  | 
    
    
    94  | 
     | 
     | 
    	(void)printf("Filenames: %d, ", lines); | 
    
    
    95  | 
     | 
     | 
    	(void)printf("Characters: %d, ", chars); | 
    
    
    96  | 
     | 
     | 
    	(void)printf("Database size: %d\n", size); | 
    
    
    97  | 
     | 
     | 
    	(void)printf("Bigram characters: %d, ", big); | 
    
    
    98  | 
     | 
     | 
    	(void)printf("Integers: %d, ", zwerg); | 
    
    
    99  | 
     | 
     | 
    	(void)printf("8-Bit characters: %d\n", umlaut); | 
    
    
    100  | 
     | 
     | 
     | 
    
    
    101  | 
     | 
     | 
    }  | 
    
    
    102  | 
     | 
     | 
    #endif /* _LOCATE_STATISTIC_ */  | 
    
    
    103  | 
     | 
     | 
     | 
    
    
    104  | 
     | 
     | 
     | 
    
    
    105  | 
     | 
     | 
    void  | 
    
    
    106  | 
     | 
     | 
     | 
    
    
    107  | 
     | 
     | 
     | 
    
    
    108  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    109  | 
     | 
     | 
    fastfind_mmap_icase  | 
    
    
    110  | 
     | 
     | 
    #else  | 
    
    
    111  | 
     | 
     | 
    fastfind_mmap  | 
    
    
    112  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    113  | 
     | 
     | 
    (pathpart, paddr, len, database)  | 
    
    
    114  | 
     | 
     | 
    	char *pathpart; 	/* search string */  | 
    
    
    115  | 
     | 
     | 
    	caddr_t paddr;  	/* mmap pointer */  | 
    
    
    116  | 
     | 
     | 
    	int len;        	/* length of database */  | 
    
    
    117  | 
     | 
     | 
    	char *database; 	/* for error message */  | 
    
    
    118  | 
     | 
     | 
     | 
    
    
    119  | 
     | 
     | 
     | 
    
    
    120  | 
     | 
     | 
     | 
    
    
    121  | 
     | 
     | 
    { | 
    
    
    122  | 
     | 
     | 
    	u_char *p, *s, *patend, *q, *foundchar;  | 
    
    
    123  | 
     | 
     | 
    	int c, cc;  | 
    
    
    124  | 
     | 
     | 
    	int count, found, globflag;  | 
    
    
    125  | 
     | 
     | 
    	u_char *cutoff;  | 
    
    
    126  | 
     | 
     | 
    	u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];  | 
    
    
    127  | 
     | 
     | 
     | 
    
    
    128  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    129  | 
     | 
     | 
    	/* use a lookup table for case insensitive search */  | 
    
    
    130  | 
     | 
     | 
    	u_char table[UCHAR_MAX + 1];  | 
    
    
    131  | 
     | 
     | 
     | 
    
    
    132  | 
     | 
     | 
    	tolower_word(pathpart);  | 
    
    
    133  | 
     | 
     | 
    #endif /* FF_ICASE*/  | 
    
    
    134  | 
     | 
     | 
     | 
    
    
    135  | 
     | 
     | 
    	/* init bigram table */  | 
    
    
    136  | 
     | 
     | 
    	if (len < (2*NBG)) { | 
    
    
    137  | 
     | 
     | 
    		(void)fprintf(stderr, "database too small: %s\n", database);  | 
    
    
    138  | 
     | 
     | 
    		exit(1);  | 
    
    
    139  | 
     | 
     | 
    	}  | 
    
    
    140  | 
     | 
     | 
     | 
    
    
    141  | 
     | 
     | 
    	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { | 
    
    
    142  | 
     | 
     | 
    		p[c] = check_bigram_char(*paddr++);  | 
    
    
    143  | 
     | 
     | 
    		s[c] = check_bigram_char(*paddr++);  | 
    
    
    144  | 
     | 
     | 
    	}  | 
    
    
    145  | 
     | 
     | 
     | 
    
    
    146  | 
     | 
     | 
    	/* find optimal (last) char for searching */  | 
    
    
    147  | 
     | 
     | 
    	for (p = pathpart; *p != '\0'; p++)  | 
    
    
    148  | 
     | 
     | 
    		if (strchr(LOCATE_REG, *p) != NULL)  | 
    
    
    149  | 
     | 
     | 
    			break;  | 
    
    
    150  | 
     | 
     | 
     | 
    
    
    151  | 
     | 
     | 
    	if (*p == '\0')  | 
    
    
    152  | 
     | 
     | 
    		globflag = 0;  | 
    
    
    153  | 
     | 
     | 
    	else  | 
    
    
    154  | 
     | 
     | 
    		globflag = 1;  | 
    
    
    155  | 
     | 
     | 
     | 
    
    
    156  | 
     | 
     | 
    	p = pathpart;  | 
    
    
    157  | 
     | 
     | 
    	patend = patprep(p);  | 
    
    
    158  | 
     | 
     | 
    	cc = *patend;  | 
    
    
    159  | 
     | 
     | 
     | 
    
    
    160  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    161  | 
     | 
     | 
    	/* set patend char to true */  | 
    
    
    162  | 
     | 
     | 
    	table[TOLOWER(*patend)] = 1;  | 
    
    
    163  | 
     | 
     | 
    	table[toupper(*patend)] = 1;  | 
    
    
    164  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    165  | 
     | 
     | 
     | 
    
    
    166  | 
     | 
     | 
     | 
    
    
    167  | 
     | 
     | 
    	/* main loop */  | 
    
    
    168  | 
     | 
     | 
    	found = count = 0;  | 
    
    
    169  | 
     | 
     | 
    	foundchar = 0;  | 
    
    
    170  | 
     | 
     | 
     | 
    
    
    171  | 
     | 
     | 
    	c = (u_char)*paddr++; len--;  | 
    
    
    172  | 
     | 
     | 
    	for (; len > 0; ) { | 
    
    
    173  | 
     | 
     | 
     | 
    
    
    174  | 
     | 
     | 
    		/* go forward or backward */  | 
    
    
    175  | 
     | 
     | 
    		if (c == SWITCH) { /* big step, an integer */ | 
    
    
    176  | 
     | 
     | 
    			count += getwm(paddr) - OFFSET;  | 
    
    
    177  | 
     | 
     | 
    			len -= INTSIZE; paddr += INTSIZE;  | 
    
    
    178  | 
     | 
     | 
    		} else {	   /* slow step, =< 14 chars */ | 
    
    
    179  | 
     | 
     | 
    			count += c - OFFSET;  | 
    
    
    180  | 
     | 
     | 
    		}  | 
    
    
    181  | 
     | 
     | 
     | 
    
    
    182  | 
     | 
     | 
    		sane_count(count);  | 
    
    
    183  | 
     | 
     | 
    		/* overlay old path */  | 
    
    
    184  | 
     | 
     | 
    		p = path + count;  | 
    
    
    185  | 
     | 
     | 
    		foundchar = p - 1;  | 
    
    
    186  | 
     | 
     | 
     | 
    
    
    187  | 
     | 
     | 
    		for (;;) { | 
    
    
    188  | 
     | 
     | 
    			c = (u_char)*paddr++;  | 
    
    
    189  | 
     | 
     | 
    			len--;  | 
    
    
    190  | 
     | 
     | 
    			/*  | 
    
    
    191  | 
     | 
     | 
    			 * == UMLAUT: 8 bit char followed  | 
    
    
    192  | 
     | 
     | 
    			 * <= SWITCH: offset  | 
    
    
    193  | 
     | 
     | 
    			 * >= PARITY: bigram  | 
    
    
    194  | 
     | 
     | 
    			 * rest:      single ascii char  | 
    
    
    195  | 
     | 
     | 
    			 *  | 
    
    
    196  | 
     | 
     | 
    			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram  | 
    
    
    197  | 
     | 
     | 
    			 */  | 
    
    
    198  | 
     | 
     | 
    			if (c < PARITY) { | 
    
    
    199  | 
     | 
     | 
    				if (c <= UMLAUT) { | 
    
    
    200  | 
     | 
     | 
    					if (c == UMLAUT) { | 
    
    
    201  | 
     | 
     | 
    						c = (u_char)*paddr++;  | 
    
    
    202  | 
     | 
     | 
    						len--;  | 
    
    
    203  | 
     | 
     | 
     | 
    
    
    204  | 
     | 
     | 
    					} else  | 
    
    
    205  | 
     | 
     | 
    						break; /* SWITCH */  | 
    
    
    206  | 
     | 
     | 
    				}  | 
    
    
    207  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    208  | 
     | 
     | 
    				if (table[c])  | 
    
    
    209  | 
     | 
     | 
    #else  | 
    
    
    210  | 
     | 
     | 
    				if (c == cc)  | 
    
    
    211  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    212  | 
     | 
     | 
    					foundchar = p;  | 
    
    
    213  | 
     | 
     | 
    				*p++ = c;  | 
    
    
    214  | 
     | 
     | 
    			} else { | 
    
    
    215  | 
     | 
     | 
    				/* bigrams are parity-marked */  | 
    
    
    216  | 
     | 
     | 
    				TO7BIT(c);  | 
    
    
    217  | 
     | 
     | 
     | 
    
    
    218  | 
     | 
     | 
    #ifndef FF_ICASE  | 
    
    
    219  | 
     | 
     | 
    				if (bigram1[c] == cc ||  | 
    
    
    220  | 
     | 
     | 
    				    bigram2[c] == cc)  | 
    
    
    221  | 
     | 
     | 
    #else  | 
    
    
    222  | 
     | 
     | 
     | 
    
    
    223  | 
     | 
     | 
    					if (table[bigram1[c]] ||  | 
    
    
    224  | 
     | 
     | 
    					    table[bigram2[c]])  | 
    
    
    225  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    226  | 
     | 
     | 
    						foundchar = p + 1;  | 
    
    
    227  | 
     | 
     | 
     | 
    
    
    228  | 
     | 
     | 
    				*p++ = bigram1[c];  | 
    
    
    229  | 
     | 
     | 
    				*p++ = bigram2[c];  | 
    
    
    230  | 
     | 
     | 
    			}  | 
    
    
    231  | 
     | 
     | 
    		}  | 
    
    
    232  | 
     | 
     | 
     | 
    
    
    233  | 
     | 
     | 
    		if (found) {			/* previous line matched */ | 
    
    
    234  | 
     | 
     | 
    			cutoff = path;  | 
    
    
    235  | 
     | 
     | 
    			*p-- = '\0';  | 
    
    
    236  | 
     | 
     | 
    			foundchar = p;  | 
    
    
    237  | 
     | 
     | 
    		} else if (foundchar >= path + count) { /* a char matched */ | 
    
    
    238  | 
     | 
     | 
    			*p-- = '\0';  | 
    
    
    239  | 
     | 
     | 
    			cutoff = path + count;  | 
    
    
    240  | 
     | 
     | 
    		} else				/* nothing to do */  | 
    
    
    241  | 
     | 
     | 
    			continue;  | 
    
    
    242  | 
     | 
     | 
     | 
    
    
    243  | 
     | 
     | 
    		found = 0;  | 
    
    
    244  | 
     | 
     | 
    		for (s = foundchar; s >= cutoff; s--) { | 
    
    
    245  | 
     | 
     | 
    			if (*s == cc  | 
    
    
    246  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    247  | 
     | 
     | 
    			    || TOLOWER(*s) == cc  | 
    
    
    248  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    249  | 
     | 
     | 
    			    ) {	/* fast first char check */ | 
    
    
    250  | 
     | 
     | 
    				for (p = patend - 1, q = s - 1; *p != '\0';  | 
    
    
    251  | 
     | 
     | 
    				    p--, q--)  | 
    
    
    252  | 
     | 
     | 
    					if (*q != *p  | 
    
    
    253  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    254  | 
     | 
     | 
    					    && TOLOWER(*q) != *p  | 
    
    
    255  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    256  | 
     | 
     | 
    					    )  | 
    
    
    257  | 
     | 
     | 
    						break;  | 
    
    
    258  | 
     | 
     | 
    				if (*p == '\0') {   /* fast match success */ | 
    
    
    259  | 
     | 
     | 
    					char	*shortpath;  | 
    
    
    260  | 
     | 
     | 
     | 
    
    
    261  | 
     | 
     | 
    					found = 1;  | 
    
    
    262  | 
     | 
     | 
    					shortpath = path;  | 
    
    
    263  | 
     | 
     | 
    					if (f_basename)  | 
    
    
    264  | 
     | 
     | 
    						shortpath = basename(path);  | 
    
    
    265  | 
     | 
     | 
     | 
    
    
    266  | 
     | 
     | 
    					if ((!f_basename && (!globflag ||  | 
    
    
    267  | 
     | 
     | 
    #ifdef FF_ICASE  | 
    
    
    268  | 
     | 
     | 
    					    !fnmatch(pathpart, shortpath,  | 
    
    
    269  | 
     | 
     | 
    						FNM_CASEFOLD)))  | 
    
    
    270  | 
     | 
     | 
    #else  | 
    
    
    271  | 
     | 
     | 
    					    !fnmatch(pathpart, shortpath, 0)))  | 
    
    
    272  | 
     | 
     | 
    #endif /* FF_ICASE */  | 
    
    
    273  | 
     | 
     | 
    					    || (strstr(shortpath, pathpart) !=  | 
    
    
    274  | 
     | 
     | 
    					    NULL)) { | 
    
    
    275  | 
     | 
     | 
    						if (f_silent)  | 
    
    
    276  | 
     | 
     | 
    							counter++;  | 
    
    
    277  | 
     | 
     | 
    						else if (f_limit) { | 
    
    
    278  | 
     | 
     | 
    							counter++;  | 
    
    
    279  | 
     | 
     | 
    							if (f_limit >= counter)  | 
    
    
    280  | 
     | 
     | 
    								(void)puts(path);  | 
    
    
    281  | 
     | 
     | 
    							else  { | 
    
    
    282  | 
     | 
     | 
    								(void)fprintf(stderr, "[show only %d lines]\n", counter - 1);  | 
    
    
    283  | 
     | 
     | 
    								exit(0);  | 
    
    
    284  | 
     | 
     | 
    							}  | 
    
    
    285  | 
     | 
     | 
    						} else  | 
    
    
    286  | 
     | 
     | 
    							(void)puts(path);  | 
    
    
    287  | 
     | 
     | 
    					}  | 
    
    
    288  | 
     | 
     | 
    					break;  | 
    
    
    289  | 
     | 
     | 
    				}  | 
    
    
    290  | 
     | 
     | 
    			}  | 
    
    
    291  | 
     | 
     | 
    		}  | 
    
    
    292  | 
     | 
     | 
    	}  | 
    
    
    293  | 
     | 
     | 
    }  |