1  | 
     | 
     | 
    /*  | 
    
    
    2  | 
     | 
     | 
     *	$OpenBSD: locate.bigram.c,v 1.15 2015/12/09 01:58:34 jsg Exp $  | 
    
    
    3  | 
     | 
     | 
     *  | 
    
    
    4  | 
     | 
     | 
     * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.  | 
    
    
    5  | 
     | 
     | 
     * Copyright (c) 1989, 1993  | 
    
    
    6  | 
     | 
     | 
     *	The Regents of the University of California.  All rights reserved.  | 
    
    
    7  | 
     | 
     | 
     *  | 
    
    
    8  | 
     | 
     | 
     * This code is derived from software contributed to Berkeley by  | 
    
    
    9  | 
     | 
     | 
     * James A. Woods.  | 
    
    
    10  | 
     | 
     | 
     *  | 
    
    
    11  | 
     | 
     | 
     * Redistribution and use in source and binary forms, with or without  | 
    
    
    12  | 
     | 
     | 
     * modification, are permitted provided that the following conditions  | 
    
    
    13  | 
     | 
     | 
     * are met:  | 
    
    
    14  | 
     | 
     | 
     * 1. Redistributions of source code must retain the above copyright  | 
    
    
    15  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer.  | 
    
    
    16  | 
     | 
     | 
     * 2. Redistributions in binary form must reproduce the above copyright  | 
    
    
    17  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer in the  | 
    
    
    18  | 
     | 
     | 
     *    documentation and/or other materials provided with the distribution.  | 
    
    
    19  | 
     | 
     | 
     * 3. Neither the name of the University nor the names of its contributors  | 
    
    
    20  | 
     | 
     | 
     *    may be used to endorse or promote products derived from this software  | 
    
    
    21  | 
     | 
     | 
     *    without specific prior written permission.  | 
    
    
    22  | 
     | 
     | 
     *  | 
    
    
    23  | 
     | 
     | 
     * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND  | 
    
    
    24  | 
     | 
     | 
     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
    
    
    25  | 
     | 
     | 
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
    
    
    26  | 
     | 
     | 
     * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE  | 
    
    
    27  | 
     | 
     | 
     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
    
    
    28  | 
     | 
     | 
     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
    
    
    29  | 
     | 
     | 
     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
    
    
    30  | 
     | 
     | 
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
    
    
    31  | 
     | 
     | 
     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
    
    
    32  | 
     | 
     | 
     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
    
    
    33  | 
     | 
     | 
     * SUCH DAMAGE.  | 
    
    
    34  | 
     | 
     | 
     *  | 
    
    
    35  | 
     | 
     | 
     * 	$Id: locate.bigram.c,v 1.15 2015/12/09 01:58:34 jsg Exp $  | 
    
    
    36  | 
     | 
     | 
     */  | 
    
    
    37  | 
     | 
     | 
     | 
    
    
    38  | 
     | 
     | 
    /*  | 
    
    
    39  | 
     | 
     | 
     *  bigram < sorted_file_names | sort -nr |  | 
    
    
    40  | 
     | 
     | 
     *  	awk 'NR <= 128 { printf $2 }' > bigrams | 
    
    
    41  | 
     | 
     | 
     *  | 
    
    
    42  | 
     | 
     | 
     * List bigrams for 'updatedb' script.  | 
    
    
    43  | 
     | 
     | 
     * Use 'code' to encode a file using this output.  | 
    
    
    44  | 
     | 
     | 
     */  | 
    
    
    45  | 
     | 
     | 
     | 
    
    
    46  | 
     | 
     | 
    #include <stdio.h>  | 
    
    
    47  | 
     | 
     | 
    #include <stdlib.h>  | 
    
    
    48  | 
     | 
     | 
    #include <limits.h>  | 
    
    
    49  | 
     | 
     | 
    #include <unistd.h>  | 
    
    
    50  | 
     | 
     | 
    #include <err.h>  | 
    
    
    51  | 
     | 
     | 
    #include "locate.h"  | 
    
    
    52  | 
     | 
     | 
     | 
    
    
    53  | 
     | 
     | 
    u_char buf1[PATH_MAX] = " ";  | 
    
    
    54  | 
     | 
     | 
    u_char buf2[PATH_MAX];  | 
    
    
    55  | 
     | 
     | 
    u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1];  | 
    
    
    56  | 
     | 
     | 
     | 
    
    
    57  | 
     | 
     | 
    int  | 
    
    
    58  | 
     | 
     | 
    main(void)  | 
    
    
    59  | 
     | 
     | 
    { | 
    
    
    60  | 
     | 
     | 
    	u_char *cp;  | 
    
    
    61  | 
     | 
     | 
    	u_char *oldpath = buf1, *path = buf2;  | 
    
    
    62  | 
     | 
     | 
    	u_int i, j;  | 
    
    
    63  | 
     | 
     | 
     | 
    
    
    64  | 
    ✗✓ | 
    6  | 
    	if (pledge("stdio flock rpath cpath wpath", NULL) == -1) | 
    
    
    65  | 
     | 
     | 
    		err(1, "pledge");  | 
    
    
    66  | 
     | 
     | 
     | 
    
    
    67  | 
    ✓✓ | 
    43410  | 
    	while (fgets(path, sizeof(buf2), stdin) != NULL) { | 
    
    
    68  | 
     | 
     | 
     | 
    
    
    69  | 
     | 
     | 
    		/*  | 
    
    
    70  | 
     | 
     | 
    		 * We don't need remove newline character '\n'.  | 
    
    
    71  | 
     | 
     | 
    		 * '\n' is less than ASCII_MIN and will be later  | 
    
    
    72  | 
     | 
     | 
    		 * ignored at output.  | 
    
    
    73  | 
     | 
     | 
    		 */  | 
    
    
    74  | 
     | 
     | 
     | 
    
    
    75  | 
     | 
     | 
     | 
    
    
    76  | 
     | 
     | 
    		/* skip longest common prefix */  | 
    
    
    77  | 
    ✓✓ | 
    2963328  | 
    		for (cp = path; *cp == *oldpath; cp++, oldpath++)  | 
    
    
    78  | 
    ✓✗ | 
    1438257  | 
    			if (*cp == '\0')  | 
    
    
    79  | 
     | 
     | 
    				break;  | 
    
    
    80  | 
     | 
     | 
     | 
    
    
    81  | 
    ✓✓✓✓
  | 
    507045  | 
    		while (*cp != '\0' && *(cp + 1) != '\0') { | 
    
    
    82  | 
     | 
    147792  | 
    			bigram[(u_char)*cp][(u_char)*(cp + 1)]++;  | 
    
    
    83  | 
     | 
    147792  | 
    			cp += 2;  | 
    
    
    84  | 
     | 
     | 
    		}  | 
    
    
    85  | 
     | 
     | 
     | 
    
    
    86  | 
     | 
     | 
    		/* swap pointers */  | 
    
    
    87  | 
    ✓✓ | 
    43407  | 
    		if (path == buf1) { | 
    
    
    88  | 
     | 
     | 
    			path = buf2;  | 
    
    
    89  | 
     | 
     | 
    			oldpath = buf1;  | 
    
    
    90  | 
     | 
    21702  | 
    		} else { | 
    
    
    91  | 
     | 
     | 
    			path = buf1;  | 
    
    
    92  | 
     | 
     | 
    			oldpath = buf2;  | 
    
    
    93  | 
     | 
     | 
    		}  | 
    
    
    94  | 
     | 
     | 
    	}  | 
    
    
    95  | 
     | 
     | 
     | 
    
    
    96  | 
     | 
     | 
    	/* output, boundary check */  | 
    
    
    97  | 
    ✓✓ | 
    582  | 
    	for (i = ASCII_MIN; i <= ASCII_MAX; i++)  | 
    
    
    98  | 
    ✓✓ | 
    55872  | 
    		for (j = ASCII_MIN; j <= ASCII_MAX; j++)  | 
    
    
    99  | 
    ✓✓ | 
    27648  | 
    			if (bigram[i][j] != 0)  | 
    
    
    100  | 
     | 
    5568  | 
    				(void)printf("%4u\t%c%c\n", bigram[i][j], i, j); | 
    
    
    101  | 
     | 
     | 
     | 
    
    
    102  | 
     | 
     | 
    	exit(0);  | 
    
    
    103  | 
     | 
     | 
    }  |