1  | 
     | 
     | 
    /*	$OpenBSD: bt_open.c,v 1.19 2015/12/28 22:08:18 mmcc Exp $	*/  | 
    
    
    2  | 
     | 
     | 
     | 
    
    
    3  | 
     | 
     | 
    /*-  | 
    
    
    4  | 
     | 
     | 
     * Copyright (c) 1990, 1993, 1994  | 
    
    
    5  | 
     | 
     | 
     *	The Regents of the University of California.  All rights reserved.  | 
    
    
    6  | 
     | 
     | 
     *  | 
    
    
    7  | 
     | 
     | 
     * This code is derived from software contributed to Berkeley by  | 
    
    
    8  | 
     | 
     | 
     * Mike Olson.  | 
    
    
    9  | 
     | 
     | 
     *  | 
    
    
    10  | 
     | 
     | 
     * Redistribution and use in source and binary forms, with or without  | 
    
    
    11  | 
     | 
     | 
     * modification, are permitted provided that the following conditions  | 
    
    
    12  | 
     | 
     | 
     * are met:  | 
    
    
    13  | 
     | 
     | 
     * 1. Redistributions of source code must retain the above copyright  | 
    
    
    14  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer.  | 
    
    
    15  | 
     | 
     | 
     * 2. Redistributions in binary form must reproduce the above copyright  | 
    
    
    16  | 
     | 
     | 
     *    notice, this list of conditions and the following disclaimer in the  | 
    
    
    17  | 
     | 
     | 
     *    documentation and/or other materials provided with the distribution.  | 
    
    
    18  | 
     | 
     | 
     * 3. Neither the name of the University nor the names of its contributors  | 
    
    
    19  | 
     | 
     | 
     *    may be used to endorse or promote products derived from this software  | 
    
    
    20  | 
     | 
     | 
     *    without specific prior written permission.  | 
    
    
    21  | 
     | 
     | 
     *  | 
    
    
    22  | 
     | 
     | 
     * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND  | 
    
    
    23  | 
     | 
     | 
     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
    
    
    24  | 
     | 
     | 
     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
    
    
    25  | 
     | 
     | 
     * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE  | 
    
    
    26  | 
     | 
     | 
     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
    
    
    27  | 
     | 
     | 
     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
    
    
    28  | 
     | 
     | 
     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
    
    
    29  | 
     | 
     | 
     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
    
    
    30  | 
     | 
     | 
     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
    
    
    31  | 
     | 
     | 
     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
    
    
    32  | 
     | 
     | 
     * SUCH DAMAGE.  | 
    
    
    33  | 
     | 
     | 
     */  | 
    
    
    34  | 
     | 
     | 
     | 
    
    
    35  | 
     | 
     | 
    /*  | 
    
    
    36  | 
     | 
     | 
     * Implementation of btree access method for 4.4BSD.  | 
    
    
    37  | 
     | 
     | 
     *  | 
    
    
    38  | 
     | 
     | 
     * The design here was originally based on that of the btree access method  | 
    
    
    39  | 
     | 
     | 
     * used in the Postgres database system at UC Berkeley.  This implementation  | 
    
    
    40  | 
     | 
     | 
     * is wholly independent of the Postgres code.  | 
    
    
    41  | 
     | 
     | 
     */  | 
    
    
    42  | 
     | 
     | 
     | 
    
    
    43  | 
     | 
     | 
    #include <sys/stat.h>  | 
    
    
    44  | 
     | 
     | 
     | 
    
    
    45  | 
     | 
     | 
    #include <errno.h>  | 
    
    
    46  | 
     | 
     | 
    #include <fcntl.h>  | 
    
    
    47  | 
     | 
     | 
    #include <limits.h>  | 
    
    
    48  | 
     | 
     | 
    #include <signal.h>  | 
    
    
    49  | 
     | 
     | 
    #include <stdio.h>  | 
    
    
    50  | 
     | 
     | 
    #include <stdlib.h>  | 
    
    
    51  | 
     | 
     | 
    #include <string.h>  | 
    
    
    52  | 
     | 
     | 
    #include <unistd.h>  | 
    
    
    53  | 
     | 
     | 
     | 
    
    
    54  | 
     | 
     | 
    #include <db.h>  | 
    
    
    55  | 
     | 
     | 
    #include "btree.h"  | 
    
    
    56  | 
     | 
     | 
     | 
    
    
    57  | 
     | 
     | 
    #ifdef DEBUG  | 
    
    
    58  | 
     | 
     | 
    #undef	MINPSIZE  | 
    
    
    59  | 
     | 
     | 
    #define	MINPSIZE	128  | 
    
    
    60  | 
     | 
     | 
    #endif  | 
    
    
    61  | 
     | 
     | 
     | 
    
    
    62  | 
     | 
     | 
    static int byteorder(void);  | 
    
    
    63  | 
     | 
     | 
    static int nroot(BTREE *);  | 
    
    
    64  | 
     | 
     | 
    static int tmp(void);  | 
    
    
    65  | 
     | 
     | 
     | 
    
    
    66  | 
     | 
     | 
    /*  | 
    
    
    67  | 
     | 
     | 
     * __BT_OPEN -- Open a btree.  | 
    
    
    68  | 
     | 
     | 
     *  | 
    
    
    69  | 
     | 
     | 
     * Creates and fills a DB struct, and calls the routine that actually  | 
    
    
    70  | 
     | 
     | 
     * opens the btree.  | 
    
    
    71  | 
     | 
     | 
     *  | 
    
    
    72  | 
     | 
     | 
     * Parameters:  | 
    
    
    73  | 
     | 
     | 
     *	fname:	filename (NULL for in-memory trees)  | 
    
    
    74  | 
     | 
     | 
     *	flags:	open flag bits  | 
    
    
    75  | 
     | 
     | 
     *	mode:	open permission bits  | 
    
    
    76  | 
     | 
     | 
     *	b:	BTREEINFO pointer  | 
    
    
    77  | 
     | 
     | 
     *  | 
    
    
    78  | 
     | 
     | 
     * Returns:  | 
    
    
    79  | 
     | 
     | 
     *	NULL on failure, pointer to DB on success.  | 
    
    
    80  | 
     | 
     | 
     *  | 
    
    
    81  | 
     | 
     | 
     */  | 
    
    
    82  | 
     | 
     | 
    DB *  | 
    
    
    83  | 
     | 
     | 
    __bt_open(const char *fname, int flags, int mode, const BTREEINFO *openinfo,  | 
    
    
    84  | 
     | 
     | 
        int dflags)  | 
    
    
    85  | 
     | 
     | 
    { | 
    
    
    86  | 
     | 
     | 
    	struct stat sb;  | 
    
    
    87  | 
     | 
     | 
    	BTMETA m;  | 
    
    
    88  | 
     | 
     | 
    	BTREE *t;  | 
    
    
    89  | 
     | 
     | 
    	BTREEINFO b;  | 
    
    
    90  | 
     | 
     | 
    	DB *dbp;  | 
    
    
    91  | 
     | 
     | 
    	pgno_t ncache;  | 
    
    
    92  | 
     | 
     | 
    	ssize_t nr;  | 
    
    
    93  | 
     | 
     | 
    	int machine_lorder, saved_errno;  | 
    
    
    94  | 
     | 
     | 
     | 
    
    
    95  | 
     | 
     | 
    	t = NULL;  | 
    
    
    96  | 
     | 
     | 
     | 
    
    
    97  | 
     | 
     | 
    	/*  | 
    
    
    98  | 
     | 
     | 
    	 * Intention is to make sure all of the user's selections are okay  | 
    
    
    99  | 
     | 
     | 
    	 * here and then use them without checking.  Can't be complete, since  | 
    
    
    100  | 
     | 
     | 
    	 * we don't know the right page size, lorder or flags until the backing  | 
    
    
    101  | 
     | 
     | 
    	 * file is opened.  Also, the file's page size can cause the cachesize  | 
    
    
    102  | 
     | 
     | 
    	 * to change.  | 
    
    
    103  | 
     | 
     | 
    	 */  | 
    
    
    104  | 
     | 
     | 
    	machine_lorder = byteorder();  | 
    
    
    105  | 
     | 
     | 
    	if (openinfo) { | 
    
    
    106  | 
     | 
     | 
    		b = *openinfo;  | 
    
    
    107  | 
     | 
     | 
     | 
    
    
    108  | 
     | 
     | 
    		/* Flags: R_DUP. */  | 
    
    
    109  | 
     | 
     | 
    		if (b.flags & ~(R_DUP))  | 
    
    
    110  | 
     | 
     | 
    			goto einval;  | 
    
    
    111  | 
     | 
     | 
     | 
    
    
    112  | 
     | 
     | 
    		/*  | 
    
    
    113  | 
     | 
     | 
    		 * Page size must be indx_t aligned and >= MINPSIZE.  Default  | 
    
    
    114  | 
     | 
     | 
    		 * page size is set farther on, based on the underlying file  | 
    
    
    115  | 
     | 
     | 
    		 * transfer size.  | 
    
    
    116  | 
     | 
     | 
    		 */  | 
    
    
    117  | 
     | 
     | 
    		if (b.psize &&  | 
    
    
    118  | 
     | 
     | 
    		    (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 ||  | 
    
    
    119  | 
     | 
     | 
    		    b.psize & (sizeof(indx_t) - 1)))  | 
    
    
    120  | 
     | 
     | 
    			goto einval;  | 
    
    
    121  | 
     | 
     | 
     | 
    
    
    122  | 
     | 
     | 
    		/* Minimum number of keys per page; absolute minimum is 2. */  | 
    
    
    123  | 
     | 
     | 
    		if (b.minkeypage) { | 
    
    
    124  | 
     | 
     | 
    			if (b.minkeypage < 2)  | 
    
    
    125  | 
     | 
     | 
    				goto einval;  | 
    
    
    126  | 
     | 
     | 
    		} else  | 
    
    
    127  | 
     | 
     | 
    			b.minkeypage = DEFMINKEYPAGE;  | 
    
    
    128  | 
     | 
     | 
     | 
    
    
    129  | 
     | 
     | 
    		/* If no comparison, use default comparison and prefix. */  | 
    
    
    130  | 
     | 
     | 
    		if (b.compare == NULL) { | 
    
    
    131  | 
     | 
     | 
    			b.compare = __bt_defcmp;  | 
    
    
    132  | 
     | 
     | 
    			if (b.prefix == NULL)  | 
    
    
    133  | 
     | 
     | 
    				b.prefix = __bt_defpfx;  | 
    
    
    134  | 
     | 
     | 
    		}  | 
    
    
    135  | 
     | 
     | 
     | 
    
    
    136  | 
     | 
     | 
    		if (b.lorder == 0)  | 
    
    
    137  | 
     | 
     | 
    			b.lorder = machine_lorder;  | 
    
    
    138  | 
     | 
     | 
    	} else { | 
    
    
    139  | 
     | 
     | 
    		b.compare = __bt_defcmp;  | 
    
    
    140  | 
     | 
     | 
    		b.cachesize = 0;  | 
    
    
    141  | 
     | 
     | 
    		b.flags = 0;  | 
    
    
    142  | 
     | 
     | 
    		b.lorder = machine_lorder;  | 
    
    
    143  | 
     | 
     | 
    		b.minkeypage = DEFMINKEYPAGE;  | 
    
    
    144  | 
     | 
     | 
    		b.prefix = __bt_defpfx;  | 
    
    
    145  | 
     | 
     | 
    		b.psize = 0;  | 
    
    
    146  | 
     | 
     | 
    	}  | 
    
    
    147  | 
     | 
     | 
     | 
    
    
    148  | 
     | 
     | 
    	/* Check for the ubiquitous PDP-11. */  | 
    
    
    149  | 
     | 
     | 
    	if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN)  | 
    
    
    150  | 
     | 
     | 
    		goto einval;  | 
    
    
    151  | 
     | 
     | 
     | 
    
    
    152  | 
     | 
     | 
    	/* Allocate and initialize DB and BTREE structures. */  | 
    
    
    153  | 
     | 
     | 
    	if ((t = calloc(1, sizeof(BTREE))) == NULL)  | 
    
    
    154  | 
     | 
     | 
    		goto err;  | 
    
    
    155  | 
     | 
     | 
    	t->bt_fd = -1;			/* Don't close unopened fd on error. */  | 
    
    
    156  | 
     | 
     | 
    	t->bt_lorder = b.lorder;  | 
    
    
    157  | 
     | 
     | 
    	t->bt_order = NOT;  | 
    
    
    158  | 
     | 
     | 
    	t->bt_cmp = b.compare;  | 
    
    
    159  | 
     | 
     | 
    	t->bt_pfx = b.prefix;  | 
    
    
    160  | 
     | 
     | 
    	t->bt_rfd = -1;  | 
    
    
    161  | 
     | 
     | 
     | 
    
    
    162  | 
     | 
     | 
    	if ((t->bt_dbp = dbp = calloc(1, sizeof(DB))) == NULL)  | 
    
    
    163  | 
     | 
     | 
    		goto err;  | 
    
    
    164  | 
     | 
     | 
    	if (t->bt_lorder != machine_lorder)  | 
    
    
    165  | 
     | 
     | 
    		F_SET(t, B_NEEDSWAP);  | 
    
    
    166  | 
     | 
     | 
     | 
    
    
    167  | 
     | 
     | 
    	dbp->type = DB_BTREE;  | 
    
    
    168  | 
     | 
     | 
    	dbp->internal = t;  | 
    
    
    169  | 
     | 
     | 
    	dbp->close = __bt_close;  | 
    
    
    170  | 
     | 
     | 
    	dbp->del = __bt_delete;  | 
    
    
    171  | 
     | 
     | 
    	dbp->fd = __bt_fd;  | 
    
    
    172  | 
     | 
     | 
    	dbp->get = __bt_get;  | 
    
    
    173  | 
     | 
     | 
    	dbp->put = __bt_put;  | 
    
    
    174  | 
     | 
     | 
    	dbp->seq = __bt_seq;  | 
    
    
    175  | 
     | 
     | 
    	dbp->sync = __bt_sync;  | 
    
    
    176  | 
     | 
     | 
     | 
    
    
    177  | 
     | 
     | 
    	/*  | 
    
    
    178  | 
     | 
     | 
    	 * If no file name was supplied, this is an in-memory btree and we  | 
    
    
    179  | 
     | 
     | 
    	 * open a backing temporary file.  Otherwise, it's a disk-based tree.  | 
    
    
    180  | 
     | 
     | 
    	 */  | 
    
    
    181  | 
     | 
     | 
    	if (fname) { | 
    
    
    182  | 
     | 
     | 
    		switch (flags & O_ACCMODE) { | 
    
    
    183  | 
     | 
     | 
    		case O_RDONLY:  | 
    
    
    184  | 
     | 
     | 
    			F_SET(t, B_RDONLY);  | 
    
    
    185  | 
     | 
     | 
    			break;  | 
    
    
    186  | 
     | 
     | 
    		case O_RDWR:  | 
    
    
    187  | 
     | 
     | 
    			break;  | 
    
    
    188  | 
     | 
     | 
    		case O_WRONLY:  | 
    
    
    189  | 
     | 
     | 
    		default:  | 
    
    
    190  | 
     | 
     | 
    			goto einval;  | 
    
    
    191  | 
     | 
     | 
    		}  | 
    
    
    192  | 
     | 
     | 
     | 
    
    
    193  | 
     | 
     | 
    		if ((t->bt_fd = open(fname, flags | O_CLOEXEC, mode)) < 0)  | 
    
    
    194  | 
     | 
     | 
    			goto err;  | 
    
    
    195  | 
     | 
     | 
     | 
    
    
    196  | 
     | 
     | 
    	} else { | 
    
    
    197  | 
     | 
     | 
    		if ((flags & O_ACCMODE) != O_RDWR)  | 
    
    
    198  | 
     | 
     | 
    			goto einval;  | 
    
    
    199  | 
     | 
     | 
    		if ((t->bt_fd = tmp()) == -1)  | 
    
    
    200  | 
     | 
     | 
    			goto err;  | 
    
    
    201  | 
     | 
     | 
    		F_SET(t, B_INMEM);  | 
    
    
    202  | 
     | 
     | 
    	}  | 
    
    
    203  | 
     | 
     | 
     | 
    
    
    204  | 
     | 
     | 
    	if (fstat(t->bt_fd, &sb))  | 
    
    
    205  | 
     | 
     | 
    		goto err;  | 
    
    
    206  | 
     | 
     | 
    	if (sb.st_size) { | 
    
    
    207  | 
     | 
     | 
    		if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0)  | 
    
    
    208  | 
     | 
     | 
    			goto err;  | 
    
    
    209  | 
     | 
     | 
    		if (nr != sizeof(BTMETA))  | 
    
    
    210  | 
     | 
     | 
    			goto eftype;  | 
    
    
    211  | 
     | 
     | 
     | 
    
    
    212  | 
     | 
     | 
    		/*  | 
    
    
    213  | 
     | 
     | 
    		 * Read in the meta-data.  This can change the notion of what  | 
    
    
    214  | 
     | 
     | 
    		 * the lorder, page size and flags are, and, when the page size  | 
    
    
    215  | 
     | 
     | 
    		 * changes, the cachesize value can change too.  If the user  | 
    
    
    216  | 
     | 
     | 
    		 * specified the wrong byte order for an existing database, we  | 
    
    
    217  | 
     | 
     | 
    		 * don't bother to return an error, we just clear the NEEDSWAP  | 
    
    
    218  | 
     | 
     | 
    		 * bit.  | 
    
    
    219  | 
     | 
     | 
    		 */  | 
    
    
    220  | 
     | 
     | 
    		if (m.magic == BTREEMAGIC)  | 
    
    
    221  | 
     | 
     | 
    			F_CLR(t, B_NEEDSWAP);  | 
    
    
    222  | 
     | 
     | 
    		else { | 
    
    
    223  | 
     | 
     | 
    			F_SET(t, B_NEEDSWAP);  | 
    
    
    224  | 
     | 
     | 
    			M_32_SWAP(m.magic);  | 
    
    
    225  | 
     | 
     | 
    			M_32_SWAP(m.version);  | 
    
    
    226  | 
     | 
     | 
    			M_32_SWAP(m.psize);  | 
    
    
    227  | 
     | 
     | 
    			M_32_SWAP(m.free);  | 
    
    
    228  | 
     | 
     | 
    			M_32_SWAP(m.nrecs);  | 
    
    
    229  | 
     | 
     | 
    			M_32_SWAP(m.flags);  | 
    
    
    230  | 
     | 
     | 
    		}  | 
    
    
    231  | 
     | 
     | 
    		if (m.magic != BTREEMAGIC || m.version != BTREEVERSION)  | 
    
    
    232  | 
     | 
     | 
    			goto eftype;  | 
    
    
    233  | 
     | 
     | 
    		if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 ||  | 
    
    
    234  | 
     | 
     | 
    		    m.psize & (sizeof(indx_t) - 1))  | 
    
    
    235  | 
     | 
     | 
    			goto eftype;  | 
    
    
    236  | 
     | 
     | 
    		if (m.flags & ~SAVEMETA)  | 
    
    
    237  | 
     | 
     | 
    			goto eftype;  | 
    
    
    238  | 
     | 
     | 
    		b.psize = m.psize;  | 
    
    
    239  | 
     | 
     | 
    		F_SET(t, m.flags);  | 
    
    
    240  | 
     | 
     | 
    		t->bt_free = m.free;  | 
    
    
    241  | 
     | 
     | 
    		t->bt_nrecs = m.nrecs;  | 
    
    
    242  | 
     | 
     | 
    	} else { | 
    
    
    243  | 
     | 
     | 
    		/*  | 
    
    
    244  | 
     | 
     | 
    		 * Set the page size to the best value for I/O to this file.  | 
    
    
    245  | 
     | 
     | 
    		 * Don't overflow the page offset type.  | 
    
    
    246  | 
     | 
     | 
    		 */  | 
    
    
    247  | 
     | 
     | 
    		if (b.psize == 0) { | 
    
    
    248  | 
     | 
     | 
    			b.psize = sb.st_blksize;  | 
    
    
    249  | 
     | 
     | 
    			if (b.psize < MINPSIZE)  | 
    
    
    250  | 
     | 
     | 
    				b.psize = MINPSIZE;  | 
    
    
    251  | 
     | 
     | 
    			if (b.psize > MAX_PAGE_OFFSET + 1)  | 
    
    
    252  | 
     | 
     | 
    				b.psize = MAX_PAGE_OFFSET + 1;  | 
    
    
    253  | 
     | 
     | 
    		}  | 
    
    
    254  | 
     | 
     | 
     | 
    
    
    255  | 
     | 
     | 
    		/* Set flag if duplicates permitted. */  | 
    
    
    256  | 
     | 
     | 
    		if (!(b.flags & R_DUP))  | 
    
    
    257  | 
     | 
     | 
    			F_SET(t, B_NODUPS);  | 
    
    
    258  | 
     | 
     | 
     | 
    
    
    259  | 
     | 
     | 
    		t->bt_free = P_INVALID;  | 
    
    
    260  | 
     | 
     | 
    		t->bt_nrecs = 0;  | 
    
    
    261  | 
     | 
     | 
    		F_SET(t, B_METADIRTY);  | 
    
    
    262  | 
     | 
     | 
    	}  | 
    
    
    263  | 
     | 
     | 
     | 
    
    
    264  | 
     | 
     | 
    	t->bt_psize = b.psize;  | 
    
    
    265  | 
     | 
     | 
     | 
    
    
    266  | 
     | 
     | 
    	/* Set the cache size; must be a multiple of the page size. */  | 
    
    
    267  | 
     | 
     | 
    	if (b.cachesize && b.cachesize & (b.psize - 1))  | 
    
    
    268  | 
     | 
     | 
    		b.cachesize += (~b.cachesize & (b.psize - 1)) + 1;  | 
    
    
    269  | 
     | 
     | 
    	if (b.cachesize < b.psize * MINCACHE)  | 
    
    
    270  | 
     | 
     | 
    		b.cachesize = b.psize * MINCACHE;  | 
    
    
    271  | 
     | 
     | 
     | 
    
    
    272  | 
     | 
     | 
    	/* Calculate number of pages to cache. */  | 
    
    
    273  | 
     | 
     | 
    	ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize;  | 
    
    
    274  | 
     | 
     | 
     | 
    
    
    275  | 
     | 
     | 
    	/*  | 
    
    
    276  | 
     | 
     | 
    	 * The btree data structure requires that at least two keys can fit on  | 
    
    
    277  | 
     | 
     | 
    	 * a page, but other than that there's no fixed requirement.  The user  | 
    
    
    278  | 
     | 
     | 
    	 * specified a minimum number per page, and we translated that into the  | 
    
    
    279  | 
     | 
     | 
    	 * number of bytes a key/data pair can use before being placed on an  | 
    
    
    280  | 
     | 
     | 
    	 * overflow page.  This calculation includes the page header, the size  | 
    
    
    281  | 
     | 
     | 
    	 * of the index referencing the leaf item and the size of the leaf item  | 
    
    
    282  | 
     | 
     | 
    	 * structure.  Also, don't let the user specify a minkeypage such that  | 
    
    
    283  | 
     | 
     | 
    	 * a key/data pair won't fit even if both key and data are on overflow  | 
    
    
    284  | 
     | 
     | 
    	 * pages.  | 
    
    
    285  | 
     | 
     | 
    	 */  | 
    
    
    286  | 
     | 
     | 
    	t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage -  | 
    
    
    287  | 
     | 
     | 
    	    (sizeof(indx_t) + NBLEAFDBT(0, 0));  | 
    
    
    288  | 
     | 
     | 
    	if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t))  | 
    
    
    289  | 
     | 
     | 
    		t->bt_ovflsize =  | 
    
    
    290  | 
     | 
     | 
    		    NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t);  | 
    
    
    291  | 
     | 
     | 
     | 
    
    
    292  | 
     | 
     | 
    	/* Initialize the buffer pool. */  | 
    
    
    293  | 
     | 
     | 
    	if ((t->bt_mp =  | 
    
    
    294  | 
     | 
     | 
    	    mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL)  | 
    
    
    295  | 
     | 
     | 
    		goto err;  | 
    
    
    296  | 
     | 
     | 
    	if (!F_ISSET(t, B_INMEM))  | 
    
    
    297  | 
     | 
     | 
    		mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t);  | 
    
    
    298  | 
     | 
     | 
     | 
    
    
    299  | 
     | 
     | 
    	/* Create a root page if new tree. */  | 
    
    
    300  | 
     | 
     | 
    	if (nroot(t) == RET_ERROR)  | 
    
    
    301  | 
     | 
     | 
    		goto err;  | 
    
    
    302  | 
     | 
     | 
     | 
    
    
    303  | 
     | 
     | 
    	/* Global flags. */  | 
    
    
    304  | 
     | 
     | 
    	if (dflags & DB_LOCK)  | 
    
    
    305  | 
     | 
     | 
    		F_SET(t, B_DB_LOCK);  | 
    
    
    306  | 
     | 
     | 
    	if (dflags & DB_SHMEM)  | 
    
    
    307  | 
     | 
     | 
    		F_SET(t, B_DB_SHMEM);  | 
    
    
    308  | 
     | 
     | 
    	if (dflags & DB_TXN)  | 
    
    
    309  | 
     | 
     | 
    		F_SET(t, B_DB_TXN);  | 
    
    
    310  | 
     | 
     | 
     | 
    
    
    311  | 
     | 
     | 
    	return (dbp);  | 
    
    
    312  | 
     | 
     | 
     | 
    
    
    313  | 
     | 
     | 
    einval:	errno = EINVAL;  | 
    
    
    314  | 
     | 
     | 
    	goto err;  | 
    
    
    315  | 
     | 
     | 
     | 
    
    
    316  | 
     | 
     | 
    eftype:	errno = EFTYPE;  | 
    
    
    317  | 
     | 
     | 
    	goto err;  | 
    
    
    318  | 
     | 
     | 
     | 
    
    
    319  | 
     | 
     | 
    err:	saved_errno = errno;  | 
    
    
    320  | 
     | 
     | 
    	if (t) { | 
    
    
    321  | 
     | 
     | 
    		free(t->bt_dbp);  | 
    
    
    322  | 
     | 
     | 
    		if (t->bt_fd != -1)  | 
    
    
    323  | 
     | 
     | 
    			(void)close(t->bt_fd);  | 
    
    
    324  | 
     | 
     | 
    		free(t);  | 
    
    
    325  | 
     | 
     | 
    	}  | 
    
    
    326  | 
     | 
     | 
    	errno = saved_errno;  | 
    
    
    327  | 
     | 
     | 
    	return (NULL);  | 
    
    
    328  | 
     | 
     | 
    }  | 
    
    
    329  | 
     | 
     | 
     | 
    
    
    330  | 
     | 
     | 
    /*  | 
    
    
    331  | 
     | 
     | 
     * NROOT -- Create the root of a new tree.  | 
    
    
    332  | 
     | 
     | 
     *  | 
    
    
    333  | 
     | 
     | 
     * Parameters:  | 
    
    
    334  | 
     | 
     | 
     *	t:	tree  | 
    
    
    335  | 
     | 
     | 
     *  | 
    
    
    336  | 
     | 
     | 
     * Returns:  | 
    
    
    337  | 
     | 
     | 
     *	RET_ERROR, RET_SUCCESS  | 
    
    
    338  | 
     | 
     | 
     */  | 
    
    
    339  | 
     | 
     | 
    static int  | 
    
    
    340  | 
     | 
     | 
    nroot(BTREE *t)  | 
    
    
    341  | 
     | 
     | 
    { | 
    
    
    342  | 
     | 
     | 
    	PAGE *meta, *root;  | 
    
    
    343  | 
     | 
     | 
    	pgno_t npg;  | 
    
    
    344  | 
     | 
     | 
     | 
    
    
    345  | 
     | 
     | 
    	if ((root = mpool_get(t->bt_mp, 1, 0)) != NULL) { | 
    
    
    346  | 
     | 
     | 
    		if (root->lower == 0 &&  | 
    
    
    347  | 
     | 
     | 
    		    root->pgno == 0 &&  | 
    
    
    348  | 
     | 
     | 
    		    root->linp[0] == 0) { | 
    
    
    349  | 
     | 
     | 
    			mpool_delete(t->bt_mp, root);  | 
    
    
    350  | 
     | 
     | 
    			errno = EINVAL;  | 
    
    
    351  | 
     | 
     | 
    		} else { | 
    
    
    352  | 
     | 
     | 
    			mpool_put(t->bt_mp, root, 0);  | 
    
    
    353  | 
     | 
     | 
    			return (RET_SUCCESS);  | 
    
    
    354  | 
     | 
     | 
    		}  | 
    
    
    355  | 
     | 
     | 
    	}  | 
    
    
    356  | 
     | 
     | 
    	if (errno != EINVAL)		/* It's OK to not exist. */  | 
    
    
    357  | 
     | 
     | 
    		return (RET_ERROR);  | 
    
    
    358  | 
     | 
     | 
    	errno = 0;  | 
    
    
    359  | 
     | 
     | 
     | 
    
    
    360  | 
     | 
     | 
    	if ((meta = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)  | 
    
    
    361  | 
     | 
     | 
    		return (RET_ERROR);  | 
    
    
    362  | 
     | 
     | 
     | 
    
    
    363  | 
     | 
     | 
    	if ((root = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)  | 
    
    
    364  | 
     | 
     | 
    		return (RET_ERROR);  | 
    
    
    365  | 
     | 
     | 
     | 
    
    
    366  | 
     | 
     | 
    	if (npg != P_ROOT)  | 
    
    
    367  | 
     | 
     | 
    		return (RET_ERROR);  | 
    
    
    368  | 
     | 
     | 
    	root->pgno = npg;  | 
    
    
    369  | 
     | 
     | 
    	root->prevpg = root->nextpg = P_INVALID;  | 
    
    
    370  | 
     | 
     | 
    	root->lower = BTDATAOFF;  | 
    
    
    371  | 
     | 
     | 
    	root->upper = t->bt_psize;  | 
    
    
    372  | 
     | 
     | 
    	root->flags = P_BLEAF;  | 
    
    
    373  | 
     | 
     | 
    	memset(meta, 0, t->bt_psize);  | 
    
    
    374  | 
     | 
     | 
    	mpool_put(t->bt_mp, meta, MPOOL_DIRTY);  | 
    
    
    375  | 
     | 
     | 
    	mpool_put(t->bt_mp, root, MPOOL_DIRTY);  | 
    
    
    376  | 
     | 
     | 
    	return (RET_SUCCESS);  | 
    
    
    377  | 
     | 
     | 
    }  | 
    
    
    378  | 
     | 
     | 
     | 
    
    
    379  | 
     | 
     | 
    static int  | 
    
    
    380  | 
     | 
     | 
    tmp(void)  | 
    
    
    381  | 
     | 
     | 
    { | 
    
    
    382  | 
     | 
     | 
    	sigset_t set, oset;  | 
    
    
    383  | 
     | 
     | 
    	int fd, len;  | 
    
    
    384  | 
     | 
     | 
    	char *envtmp = NULL;  | 
    
    
    385  | 
     | 
     | 
    	char path[PATH_MAX];  | 
    
    
    386  | 
     | 
     | 
     | 
    
    
    387  | 
     | 
     | 
    	if (issetugid() == 0)  | 
    
    
    388  | 
     | 
     | 
    		envtmp = getenv("TMPDIR"); | 
    
    
    389  | 
     | 
     | 
    	len = snprintf(path,  | 
    
    
    390  | 
     | 
     | 
    	    sizeof(path), "%s/bt.XXXXXX", envtmp ? envtmp : "/tmp");  | 
    
    
    391  | 
     | 
     | 
    	if (len < 0 || len >= sizeof(path)) { | 
    
    
    392  | 
     | 
     | 
    		errno = ENAMETOOLONG;  | 
    
    
    393  | 
     | 
     | 
    		return(-1);  | 
    
    
    394  | 
     | 
     | 
    	}  | 
    
    
    395  | 
     | 
     | 
     | 
    
    
    396  | 
     | 
     | 
    	(void)sigfillset(&set);  | 
    
    
    397  | 
     | 
     | 
    	(void)sigprocmask(SIG_BLOCK, &set, &oset);  | 
    
    
    398  | 
     | 
     | 
    	if ((fd = mkostemp(path, O_CLOEXEC)) != -1)  | 
    
    
    399  | 
     | 
     | 
    		(void)unlink(path);  | 
    
    
    400  | 
     | 
     | 
    	(void)sigprocmask(SIG_SETMASK, &oset, NULL);  | 
    
    
    401  | 
     | 
     | 
    	return(fd);  | 
    
    
    402  | 
     | 
     | 
    }  | 
    
    
    403  | 
     | 
     | 
     | 
    
    
    404  | 
     | 
     | 
    static int  | 
    
    
    405  | 
     | 
     | 
    byteorder(void)  | 
    
    
    406  | 
     | 
     | 
    { | 
    
    
    407  | 
     | 
     | 
    	u_int32_t x;  | 
    
    
    408  | 
     | 
     | 
    	u_char *p;  | 
    
    
    409  | 
     | 
     | 
     | 
    
    
    410  | 
     | 
     | 
    	x = 0x01020304;  | 
    
    
    411  | 
     | 
     | 
    	p = (u_char *)&x;  | 
    
    
    412  | 
     | 
     | 
    	switch (*p) { | 
    
    
    413  | 
     | 
     | 
    	case 1:  | 
    
    
    414  | 
     | 
     | 
    		return (BIG_ENDIAN);  | 
    
    
    415  | 
     | 
     | 
    	case 4:  | 
    
    
    416  | 
     | 
     | 
    		return (LITTLE_ENDIAN);  | 
    
    
    417  | 
     | 
     | 
    	default:  | 
    
    
    418  | 
     | 
     | 
    		return (0);  | 
    
    
    419  | 
     | 
     | 
    	}  | 
    
    
    420  | 
     | 
     | 
    }  | 
    
    
    421  | 
     | 
     | 
     | 
    
    
    422  | 
     | 
     | 
    int  | 
    
    
    423  | 
     | 
     | 
    __bt_fd(const DB *dbp)  | 
    
    
    424  | 
     | 
     | 
    { | 
    
    
    425  | 
     | 
     | 
    	BTREE *t;  | 
    
    
    426  | 
     | 
     | 
     | 
    
    
    427  | 
     | 
     | 
    	t = dbp->internal;  | 
    
    
    428  | 
     | 
     | 
     | 
    
    
    429  | 
     | 
     | 
    	/* Toss any page pinned across calls. */  | 
    
    
    430  | 
     | 
     | 
    	if (t->bt_pinned != NULL) { | 
    
    
    431  | 
     | 
     | 
    		mpool_put(t->bt_mp, t->bt_pinned, 0);  | 
    
    
    432  | 
     | 
     | 
    		t->bt_pinned = NULL;  | 
    
    
    433  | 
     | 
     | 
    	}  | 
    
    
    434  | 
     | 
     | 
     | 
    
    
    435  | 
     | 
     | 
    	/* In-memory database can't have a file descriptor. */  | 
    
    
    436  | 
     | 
     | 
    	if (F_ISSET(t, B_INMEM)) { | 
    
    
    437  | 
     | 
     | 
    		errno = ENOENT;  | 
    
    
    438  | 
     | 
     | 
    		return (-1);  | 
    
    
    439  | 
     | 
     | 
    	}  | 
    
    
    440  | 
     | 
     | 
    	return (t->bt_fd);  | 
    
    
    441  | 
     | 
     | 
    }  |