aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/kdb/db2/libdb2/hash/hash_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/kdb/db2/libdb2/hash/hash_page.c')
-rw-r--r--src/plugins/kdb/db2/libdb2/hash/hash_page.c1387
1 files changed, 1387 insertions, 0 deletions
diff --git a/src/plugins/kdb/db2/libdb2/hash/hash_page.c b/src/plugins/kdb/db2/libdb2/hash/hash_page.c
new file mode 100644
index 0000000..e25115d
--- /dev/null
+++ b/src/plugins/kdb/db2/libdb2/hash/hash_page.c
@@ -0,0 +1,1387 @@
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)hash_page.c 8.11 (Berkeley) 11/7/95";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * PACKAGE: hashing
+ *
+ * DESCRIPTION:
+ * Page manipulation for hashing package.
+ *
+ * ROUTINES:
+ *
+ * External
+ * __get_page
+ * __add_ovflpage
+ * Internal
+ * overflow_page
+ * open_temp
+ */
+
+#include <sys/types.h>
+
+#ifdef DEBUG
+#include <assert.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "db-int.h"
+#include "hash.h"
+#include "page.h"
+#include "extern.h"
+
+static int32_t add_bigptr __P((HTAB *, ITEM_INFO *, indx_t));
+static u_int32_t *fetch_bitmap __P((HTAB *, int32_t));
+static u_int32_t first_free __P((u_int32_t));
+static indx_t next_realkey __P((PAGE16 *, indx_t));
+static u_int16_t overflow_page __P((HTAB *));
+static void page_init __P((HTAB *, PAGE16 *, db_pgno_t, u_int8_t));
+static indx_t prev_realkey __P((PAGE16 *, indx_t));
+static void putpair __P((PAGE8 *, const DBT *, const DBT *));
+static void swap_page_header_in __P((PAGE16 *));
+static void swap_page_header_out __P((PAGE16 *));
+
+#ifdef DEBUG_SLOW
+static void account_page(HTAB *, db_pgno_t, int);
+#endif
+
+u_int32_t
+__get_item(hashp, cursorp, key, val, item_info)
+ HTAB *hashp;
+ CURSOR *cursorp;
+ DBT *key, *val;
+ ITEM_INFO *item_info;
+{
+ db_pgno_t next_pgno;
+ int32_t i;
+
+ /* Check if we need to get a page. */
+ if (!cursorp->pagep) {
+ if (cursorp->pgno == INVALID_PGNO) {
+ cursorp->pagep =
+ __get_page(hashp, cursorp->bucket, A_BUCKET);
+ cursorp->pgno = ADDR(cursorp->pagep);
+ cursorp->ndx = 0;
+ cursorp->pgndx = 0;
+ } else
+ cursorp->pagep =
+ __get_page(hashp, cursorp->pgno, A_RAW);
+ if (!cursorp->pagep) {
+ item_info->status = ITEM_ERROR;
+ return (-1);
+ }
+ }
+ if (item_info->seek_size &&
+ FREESPACE(cursorp->pagep) > item_info->seek_size)
+ item_info->seek_found_page = cursorp->pgno;
+
+ if (cursorp->pgndx == NUM_ENT(cursorp->pagep)) {
+ /* Fetch next page. */
+ if (NEXT_PGNO(cursorp->pagep) == INVALID_PGNO) {
+ item_info->status = ITEM_NO_MORE;
+ return (-1);
+ }
+ next_pgno = NEXT_PGNO(cursorp->pagep);
+ cursorp->pgndx = 0;
+ __put_page(hashp, cursorp->pagep, A_RAW, 0);
+ cursorp->pagep = __get_page(hashp, next_pgno, A_RAW);
+ if (!cursorp->pagep) {
+ item_info->status = ITEM_ERROR;
+ return (-1);
+ }
+ cursorp->pgno = next_pgno;
+ }
+ if (KEY_OFF(cursorp->pagep, cursorp->pgndx) != BIGPAIR) {
+ if ((i = prev_realkey(cursorp->pagep, cursorp->pgndx)) ==
+ cursorp->pgndx)
+ key->size = hashp->hdr.bsize -
+ KEY_OFF(cursorp->pagep, cursorp->pgndx);
+ else
+ key->size = DATA_OFF(cursorp->pagep, i) -
+ KEY_OFF(cursorp->pagep, cursorp->pgndx);
+ }
+
+ /*
+ * All of this information will be set incorrectly for big keys, but
+ * it will be ignored anyway.
+ */
+ val->size = KEY_OFF(cursorp->pagep, cursorp->pgndx) -
+ DATA_OFF(cursorp->pagep, cursorp->pgndx);
+ key->data = KEY(cursorp->pagep, cursorp->pgndx);
+ val->data = DATA(cursorp->pagep, cursorp->pgndx);
+ item_info->pgno = cursorp->pgno;
+ item_info->bucket = cursorp->bucket;
+ item_info->ndx = cursorp->ndx;
+ item_info->pgndx = cursorp->pgndx;
+ item_info->key_off = KEY_OFF(cursorp->pagep, cursorp->pgndx);
+ item_info->data_off = DATA_OFF(cursorp->pagep, cursorp->pgndx);
+ item_info->status = ITEM_OK;
+
+ return (0);
+}
+
+u_int32_t
+__get_item_reset(hashp, cursorp)
+ HTAB *hashp;
+ CURSOR *cursorp;
+{
+ if (cursorp->pagep)
+ __put_page(hashp, cursorp->pagep, A_RAW, 0);
+ cursorp->pagep = NULL;
+ cursorp->bucket = -1;
+ cursorp->ndx = 0;
+ cursorp->pgndx = 0;
+ cursorp->pgno = INVALID_PGNO;
+ return (0);
+}
+
+u_int32_t
+__get_item_done(hashp, cursorp)
+ HTAB *hashp;
+ CURSOR *cursorp;
+{
+ if (cursorp->pagep)
+ __put_page(hashp, cursorp->pagep, A_RAW, 0);
+ cursorp->pagep = NULL;
+
+ /*
+ * We don't throw out the page number since we might want to
+ * continue getting on this page.
+ */
+ return (0);
+}
+
+u_int32_t
+__get_item_first(hashp, cursorp, key, val, item_info)
+ HTAB *hashp;
+ CURSOR *cursorp;
+ DBT *key, *val;
+ ITEM_INFO *item_info;
+{
+ __get_item_reset(hashp, cursorp);
+ cursorp->bucket = 0;
+ return (__get_item_next(hashp, cursorp, key, val, item_info));
+}
+
+/*
+ * Returns a pointer to key/data pair on a page. In the case of bigkeys,
+ * just returns the page number and index of the bigkey pointer pair.
+ */
+u_int32_t
+__get_item_next(hashp, cursorp, key, val, item_info)
+ HTAB *hashp;
+ CURSOR *cursorp;
+ DBT *key, *val;
+ ITEM_INFO *item_info;
+{
+ int status;
+
+ status = __get_item(hashp, cursorp, key, val, item_info);
+ cursorp->ndx++;
+ cursorp->pgndx++;
+ return (status);
+}
+
+/*
+ * Put a non-big pair on a page.
+ */
+static void
+putpair(p, key, val)
+ PAGE8 *p;
+ const DBT *key, *val;
+{
+ u_int16_t *pagep, n, off;
+
+ pagep = (PAGE16 *)p;
+
+ /* Items on the page are 0-indexed. */
+ n = NUM_ENT(pagep);
+ off = OFFSET(pagep) - key->size + 1;
+ memmove(p + off, key->data, key->size);
+ KEY_OFF(pagep, n) = off;
+
+ off -= val->size;
+ memmove(p + off, val->data, val->size);
+ DATA_OFF(pagep, n) = off;
+
+ /* Adjust page info. */
+ NUM_ENT(pagep) = n + 1;
+ OFFSET(pagep) = off - 1;
+}
+
+/*
+ * Returns the index of the next non-bigkey pair after n on the page.
+ * Returns -1 if there are no more non-big things on the page.
+ */
+static indx_t
+#ifdef __STDC__
+next_realkey(PAGE16 * pagep, indx_t n)
+#else
+next_realkey(pagep, n)
+ PAGE16 *pagep;
+ u_int32_t n;
+#endif
+{
+ indx_t i;
+
+ for (i = n + 1; i < NUM_ENT(pagep); i++)
+ if (KEY_OFF(pagep, i) != BIGPAIR)
+ return (i);
+ return (-1);
+}
+
+/*
+ * Returns the index of the previous non-bigkey pair after n on the page.
+ * Returns n if there are no previous non-big things on the page.
+ */
+static indx_t
+#ifdef __STDC__
+prev_realkey(PAGE16 * pagep, indx_t n)
+#else
+prev_realkey(pagep, n)
+ PAGE16 *pagep;
+ u_int32_t n;
+#endif
+{
+ int32_t i;
+
+ /* Need a signed value to do the compare properly. */
+ for (i = n - 1; i > -1; i--)
+ if (KEY_OFF(pagep, i) != BIGPAIR)
+ return (i);
+ return (n);
+}
+
+/*
+ * Returns:
+ * 0 OK
+ * -1 error
+ */
+extern int32_t
+__delpair(hashp, cursorp, item_info)
+ HTAB *hashp;
+ CURSOR *cursorp;
+ ITEM_INFO *item_info;
+{
+ PAGE16 *pagep;
+ indx_t ndx;
+ short check_ndx;
+ int16_t delta, len, next_key;
+ int32_t n;
+ u_int8_t *src, *dest;
+
+ ndx = cursorp->pgndx;
+ if (!cursorp->pagep) {
+ pagep = __get_page(hashp, cursorp->pgno, A_RAW);
+ if (!pagep)
+ return (-1);
+ /*
+ * KLUGE: pgndx has gone one too far, because cursor points
+ * to the _next_ item. Use pgndx - 1.
+ */
+ --ndx;
+ } else
+ pagep = cursorp->pagep;
+#ifdef DEBUG
+ assert(ADDR(pagep) == cursorp->pgno);
+#endif
+
+ if (KEY_OFF(pagep, ndx) == BIGPAIR) {
+ delta = 0;
+ __big_delete(hashp, pagep, ndx);
+ } else {
+ /*
+ * Compute "delta", the amount we have to shift all of the
+ * offsets. To find the delta, we need to make sure that
+ * we aren't looking at the DATA_OFF of a big/keydata pair.
+ */
+ for (check_ndx = (short)(ndx - 1);
+ check_ndx >= 0 && KEY_OFF(pagep, check_ndx) == BIGPAIR;
+ check_ndx--);
+ if (check_ndx < 0)
+ delta = hashp->hdr.bsize - DATA_OFF(pagep, ndx);
+ else
+ delta =
+ DATA_OFF(pagep, check_ndx) - DATA_OFF(pagep, ndx);
+
+ /*
+ * The hard case: we want to remove something other than
+ * the last item on the page. We need to shift data and
+ * offsets down.
+ */
+ if (ndx != NUM_ENT(pagep) - 1) {
+ /*
+ * Move the data: src is the address of the last data
+ * item on the page.
+ */
+ src = (u_int8_t *)pagep + OFFSET(pagep) + 1;
+ /*
+ * Length is the distance between where to start
+ * deleting and end of the data on the page.
+ */
+ len = DATA_OFF(pagep, ndx) - (OFFSET(pagep) + 1);
+ /*
+ * Dest is the location of the to-be-deleted item
+ * occupied - length.
+ */
+ if (check_ndx < 0)
+ dest =
+ (u_int8_t *)pagep + hashp->hdr.bsize - len;
+ else
+ dest = (u_int8_t *)pagep +
+ DATA_OFF(pagep, (check_ndx)) - len;
+ memmove(dest, src, len);
+ }
+ }
+
+ /* Adjust the offsets. */
+ for (n = ndx; n < NUM_ENT(pagep) - 1; n++)
+ if (KEY_OFF(pagep, (n + 1)) != BIGPAIR) {
+ next_key = next_realkey(pagep, n);
+#ifdef DEBUG
+ assert(next_key != -1);
+#endif
+ KEY_OFF(pagep, n) = KEY_OFF(pagep, (n + 1)) + delta;
+ DATA_OFF(pagep, n) = DATA_OFF(pagep, (n + 1)) + delta;
+ } else {
+ KEY_OFF(pagep, n) = KEY_OFF(pagep, (n + 1));
+ DATA_OFF(pagep, n) = DATA_OFF(pagep, (n + 1));
+ }
+
+ /* Adjust page metadata. */
+ OFFSET(pagep) = OFFSET(pagep) + delta;
+ NUM_ENT(pagep) = NUM_ENT(pagep) - 1;
+
+ --hashp->hdr.nkeys;
+
+ /* Is this page now an empty overflow page? If so, free it. */
+ if (TYPE(pagep) == HASH_OVFLPAGE && NUM_ENT(pagep) == 0) {
+ PAGE16 *empty_page;
+ db_pgno_t to_find, next_pgno, link_page;
+
+ /*
+ * We need to go back to the first page in the chain and
+ * look for this page so that we can update the previous
+ * page's NEXT_PGNO field.
+ */
+ to_find = ADDR(pagep);
+ empty_page = pagep;
+ link_page = NEXT_PGNO(empty_page);
+ pagep = __get_page(hashp, item_info->bucket, A_BUCKET);
+ if (!pagep)
+ return (-1);
+ while (NEXT_PGNO(pagep) != to_find) {
+ next_pgno = NEXT_PGNO(pagep);
+#ifdef DEBUG
+ assert(next_pgno != INVALID_PGNO);
+#endif
+ __put_page(hashp, pagep, A_RAW, 0);
+ pagep = __get_page(hashp, next_pgno, A_RAW);
+ if (!pagep)
+ return (-1);
+ }
+
+ /*
+ * At this point, pagep should point to the page before the
+ * page to be deleted.
+ */
+ NEXT_PGNO(pagep) = link_page;
+ if (item_info->pgno == to_find) {
+ item_info->pgno = ADDR(pagep);
+ item_info->pgndx = NUM_ENT(pagep);
+ item_info->seek_found_page = ADDR(pagep);
+ }
+ __delete_page(hashp, empty_page, A_OVFL);
+ }
+ __put_page(hashp, pagep, A_RAW, 1);
+
+ return (0);
+}
+
+extern int32_t
+__split_page(hashp, obucket, nbucket)
+ HTAB *hashp;
+ u_int32_t obucket, nbucket;
+{
+ DBT key, val;
+ ITEM_INFO old_ii, new_ii;
+ PAGE16 *old_pagep, *temp_pagep;
+ db_pgno_t next_pgno;
+ int32_t off;
+ u_int16_t n;
+ int8_t base_page;
+
+ off = hashp->hdr.bsize;
+ old_pagep = __get_page(hashp, obucket, A_BUCKET);
+
+ base_page = 1;
+
+ temp_pagep = hashp->split_buf;
+ memcpy(temp_pagep, old_pagep, hashp->hdr.bsize);
+
+ page_init(hashp, old_pagep, ADDR(old_pagep), HASH_PAGE);
+ __put_page(hashp, old_pagep, A_RAW, 1);
+
+ old_ii.pgno = BUCKET_TO_PAGE(obucket);
+ new_ii.pgno = BUCKET_TO_PAGE(nbucket);
+ old_ii.bucket = obucket;
+ new_ii.bucket = nbucket;
+ old_ii.seek_found_page = new_ii.seek_found_page = 0;
+
+ while (temp_pagep != 0) {
+ off = hashp->hdr.bsize;
+ for (n = 0; n < NUM_ENT(temp_pagep); n++) {
+ if (KEY_OFF(temp_pagep, n) == BIGPAIR) {
+ __get_bigkey(hashp, temp_pagep, n, &key);
+ if (__call_hash(hashp,
+ key.data, key.size) == obucket)
+ add_bigptr(hashp, &old_ii,
+ DATA_OFF(temp_pagep, n));
+ else
+ add_bigptr(hashp, &new_ii,
+ DATA_OFF(temp_pagep, n));
+ } else {
+ key.size = off - KEY_OFF(temp_pagep, n);
+ key.data = KEY(temp_pagep, n);
+ off = KEY_OFF(temp_pagep, n);
+ val.size = off - DATA_OFF(temp_pagep, n);
+ val.data = DATA(temp_pagep, n);
+ if (__call_hash(hashp,
+ key.data, key.size) == obucket)
+ __addel(hashp, &old_ii, &key, &val,
+ NO_EXPAND, 1);
+ else
+ __addel(hashp, &new_ii, &key, &val,
+ NO_EXPAND, 1);
+ off = DATA_OFF(temp_pagep, n);
+ }
+ }
+ next_pgno = NEXT_PGNO(temp_pagep);
+
+ /* Clear temp_page; if it's an overflow page, free it. */
+ if (!base_page)
+ __delete_page(hashp, temp_pagep, A_OVFL);
+ else
+ base_page = 0;
+ if (next_pgno != INVALID_PGNO)
+ temp_pagep = __get_page(hashp, next_pgno, A_RAW);
+ else
+ break;
+ }
+ return (0);
+}
+
+/*
+ * Add the given pair to the page.
+ *
+ *
+ * Returns:
+ * 0 ==> OK
+ * -1 ==> failure
+ */
+extern int32_t
+#ifdef __STDC__
+__addel(HTAB *hashp, ITEM_INFO *item_info, const DBT *key, const DBT *val,
+ u_int32_t num_items, const u_int8_t expanding)
+#else
+__addel(hashp, item_info, key, val, num_items, expanding)
+ HTAB *hashp;
+ ITEM_INFO *item_info;
+ const DBT *key, *val;
+ u_int32_t num_items;
+ const u_int32_t expanding;
+#endif
+{
+ PAGE16 *pagep;
+ int32_t do_expand;
+ db_pgno_t next_pgno;
+
+ do_expand = 0;
+
+ pagep = __get_page(hashp,
+ item_info->seek_found_page != 0 ?
+ item_info->seek_found_page : item_info->pgno, A_RAW);
+ if (!pagep)
+ return (-1);
+
+ /* Advance to first page in chain with room for item. */
+ while (NUM_ENT(pagep) && NEXT_PGNO(pagep) != INVALID_PGNO) {
+ /*
+ * This may not be the end of the chain, but the pair may fit
+ * anyway.
+ */
+ if (ISBIG(PAIRSIZE(key, val), hashp) && BIGPAIRFITS(pagep))
+ break;
+ if (PAIRFITS(pagep, key, val))
+ break;
+ next_pgno = NEXT_PGNO(pagep);
+ __put_page(hashp, pagep, A_RAW, 0);
+ pagep = (PAGE16 *)__get_page(hashp, next_pgno, A_RAW);
+ if (!pagep)
+ return (-1);
+ }
+
+ if ((ISBIG(PAIRSIZE(key, val), hashp) &&
+ !BIGPAIRFITS(pagep)) ||
+ (!ISBIG(PAIRSIZE(key, val), hashp) &&
+ !PAIRFITS(pagep, key, val))) {
+ do_expand = 1;
+ pagep = __add_ovflpage(hashp, pagep);
+ if (!pagep)
+ return (-1);
+
+ if ((ISBIG(PAIRSIZE(key, val), hashp) &&
+ !BIGPAIRFITS(pagep)) ||
+ (!ISBIG(PAIRSIZE(key, val), hashp) &&
+ !PAIRFITS(pagep, key, val))) {
+ __put_page(hashp, pagep, A_RAW, 0);
+ return (-1);
+ }
+ }
+
+ /* At this point, we know the page fits, so we just add it */
+
+ if (ISBIG(PAIRSIZE(key, val), hashp)) {
+ if (__big_insert(hashp, pagep, key, val))
+ return (-1);
+ } else {
+ putpair((PAGE8 *)pagep, key, val);
+ }
+
+ /*
+ * For splits, we are going to update item_info's page number
+ * field, so that we can easily return to the same page the
+ * next time we come in here. For other operations, this shouldn't
+ * matter, since adds are the last thing that happens before we
+ * return to the user program.
+ */
+ item_info->pgno = ADDR(pagep);
+
+ if (!expanding)
+ hashp->hdr.nkeys++;
+
+ /* Kludge: if this is a big page, then it's already been put. */
+ if (!ISBIG(PAIRSIZE(key, val), hashp))
+ __put_page(hashp, pagep, A_RAW, 1);
+
+ if (expanding)
+ item_info->caused_expand = 0;
+ else
+ switch (num_items) {
+ case NO_EXPAND:
+ item_info->caused_expand = 0;
+ break;
+ case UNKNOWN:
+ item_info->caused_expand |=
+ (hashp->hdr.nkeys / hashp->hdr.max_bucket) >
+ hashp->hdr.ffactor ||
+ item_info->pgndx > hashp->hdr.ffactor;
+ break;
+ default:
+ item_info->caused_expand =
+ num_items > hashp->hdr.ffactor ? 1 : do_expand;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * Special __addel used in big splitting; this one just puts the pointer
+ * to an already-allocated big page in the appropriate bucket.
+ */
+static int32_t
+#ifdef __STDC__
+add_bigptr(HTAB * hashp, ITEM_INFO * item_info, indx_t big_pgno)
+#else
+add_bigptr(hashp, item_info, big_pgno)
+ HTAB *hashp;
+ ITEM_INFO *item_info;
+ u_int32_t big_pgno;
+#endif
+{
+ PAGE16 *pagep;
+ db_pgno_t next_pgno;
+
+ pagep = __get_page(hashp, item_info->bucket, A_BUCKET);
+ if (!pagep)
+ return (-1);
+
+ /*
+ * Note: in __addel(), we used item_info->pgno for the beginning of
+ * our search for space. Now, we use item_info->bucket, since we
+ * know that the space required by a big pair on the base page is
+ * quite small, and we may very well find that space early in the
+ * chain.
+ */
+
+ /* Find first page in chain that has space for a big pair. */
+ while (NUM_ENT(pagep) && (NEXT_PGNO(pagep) != INVALID_PGNO)) {
+ if (BIGPAIRFITS(pagep))
+ break;
+ next_pgno = NEXT_PGNO(pagep);
+ __put_page(hashp, pagep, A_RAW, 0);
+ pagep = __get_page(hashp, next_pgno, A_RAW);
+ if (!pagep)
+ return (-1);
+ }
+ if (!BIGPAIRFITS(pagep)) {
+ pagep = __add_ovflpage(hashp, pagep);
+ if (!pagep)
+ return (-1);
+#ifdef DEBUG
+ assert(BIGPAIRFITS(pagep));
+#endif
+ }
+ KEY_OFF(pagep, NUM_ENT(pagep)) = BIGPAIR;
+ DATA_OFF(pagep, NUM_ENT(pagep)) = big_pgno;
+ NUM_ENT(pagep) = NUM_ENT(pagep) + 1;
+
+ __put_page(hashp, pagep, A_RAW, 1);
+
+ return (0);
+}
+
+/*
+ *
+ * Returns:
+ * pointer on success
+ * NULL on error
+ */
+extern PAGE16 *
+__add_ovflpage(hashp, pagep)
+ HTAB *hashp;
+ PAGE16 *pagep;
+{
+ PAGE16 *new_pagep;
+ u_int16_t ovfl_num;
+
+ /* Check if we are dynamically determining the fill factor. */
+ if (hashp->hdr.ffactor == DEF_FFACTOR) {
+ hashp->hdr.ffactor = NUM_ENT(pagep) >> 1;
+ if (hashp->hdr.ffactor < MIN_FFACTOR)
+ hashp->hdr.ffactor = MIN_FFACTOR;
+ }
+ ovfl_num = overflow_page(hashp);
+ if (!ovfl_num)
+ return (NULL);
+
+ if (__new_page(hashp, (u_int32_t)ovfl_num, A_OVFL) != 0)
+ return (NULL);
+
+ if (!ovfl_num || !(new_pagep = __get_page(hashp, ovfl_num, A_OVFL)))
+ return (NULL);
+
+ NEXT_PGNO(pagep) = (db_pgno_t)OADDR_TO_PAGE(ovfl_num);
+ TYPE(new_pagep) = HASH_OVFLPAGE;
+
+ __put_page(hashp, pagep, A_RAW, 1);
+
+#ifdef HASH_STATISTICS
+ hash_overflows++;
+#endif
+ return (new_pagep);
+}
+
+/*
+ *
+ * Returns:
+ * pointer on success
+ * NULL on error
+ */
+extern PAGE16 *
+#ifdef __STDC__
+__add_bigpage(HTAB * hashp, PAGE16 * pagep, indx_t ndx, const u_int8_t
+ is_basepage)
+#else
+__add_bigpage(hashp, pagep, ndx, is_basepage)
+ HTAB *hashp;
+ PAGE16 *pagep;
+ u_int32_t ndx;
+ const u_int32_t is_basepage;
+#endif
+{
+ PAGE16 *new_pagep;
+ u_int16_t ovfl_num;
+
+ ovfl_num = overflow_page(hashp);
+ if (!ovfl_num)
+ return (NULL);
+
+ if (__new_page(hashp, (u_int32_t)ovfl_num, A_OVFL) != 0)
+ return (NULL);
+
+ if (!ovfl_num || !(new_pagep = __get_page(hashp, ovfl_num, A_OVFL)))
+ return (NULL);
+
+ if (is_basepage) {
+ KEY_OFF(pagep, ndx) = BIGPAIR;
+ DATA_OFF(pagep, ndx) = (indx_t)ovfl_num;
+ } else
+ NEXT_PGNO(pagep) = ADDR(new_pagep);
+
+ __put_page(hashp, pagep, A_RAW, 1);
+
+ TYPE(new_pagep) = HASH_BIGPAGE;
+
+#ifdef HASH_STATISTICS
+ hash_bigpages++;
+#endif
+ return (new_pagep);
+}
+
+static void
+#ifdef __STDC__
+page_init(HTAB * hashp, PAGE16 * pagep, db_pgno_t pgno, u_int8_t type)
+#else
+page_init(hashp, pagep, pgno, type)
+ HTAB *hashp;
+ PAGE16 *pagep;
+ db_pgno_t pgno;
+ u_int32_t type;
+#endif
+{
+ NUM_ENT(pagep) = 0;
+ PREV_PGNO(pagep) = NEXT_PGNO(pagep) = INVALID_PGNO;
+ TYPE(pagep) = type;
+ OFFSET(pagep) = hashp->hdr.bsize - 1;
+ /*
+ * Note: since in the current version ADDR(pagep) == PREV_PGNO(pagep),
+ * make sure that ADDR(pagep) is set after resetting PREV_PGNO(pagep).
+ * We reset PREV_PGNO(pagep) just in case the macros are changed.
+ */
+ ADDR(pagep) = pgno;
+
+ return;
+}
+
+int32_t
+__new_page(hashp, addr, addr_type)
+ HTAB *hashp;
+ u_int32_t addr;
+ int32_t addr_type;
+{
+ db_pgno_t paddr;
+ PAGE16 *pagep;
+
+ switch (addr_type) { /* Convert page number. */
+ case A_BUCKET:
+ paddr = BUCKET_TO_PAGE(addr);
+ break;
+ case A_OVFL:
+ case A_BITMAP:
+ paddr = OADDR_TO_PAGE(addr);
+ break;
+ default:
+ paddr = addr;
+ break;
+ }
+ pagep = mpool_new(hashp->mp, &paddr, MPOOL_PAGE_REQUEST);
+ if (!pagep)
+ return (-1);
+#if DEBUG_SLOW
+ account_page(hashp, paddr, 1);
+#endif
+
+ if (addr_type != A_BITMAP)
+ page_init(hashp, pagep, paddr, HASH_PAGE);
+
+ __put_page(hashp, pagep, addr_type, 1);
+
+ return (0);
+}
+
+int32_t
+__delete_page(hashp, pagep, page_type)
+ HTAB *hashp;
+ PAGE16 *pagep;
+ int32_t page_type;
+{
+ if (page_type == A_OVFL)
+ __free_ovflpage(hashp, pagep);
+ return (mpool_delete(hashp->mp, pagep));
+}
+
+static u_int8_t
+is_bitmap_pgno(hashp, pgno)
+ HTAB *hashp;
+ db_pgno_t pgno;
+{
+ int32_t i;
+
+ for (i = 0; i < hashp->nmaps; i++)
+ if (OADDR_TO_PAGE(hashp->hdr.bitmaps[i]) == pgno)
+ return (1);
+ return (0);
+}
+
+void
+__pgin_routine(pg_cookie, pgno, page)
+ void *pg_cookie;
+ db_pgno_t pgno;
+ void *page;
+{
+ HTAB *hashp;
+ PAGE16 *pagep;
+ int32_t max, i;
+
+ pagep = (PAGE16 *)page;
+ hashp = (HTAB *)pg_cookie;
+
+ /*
+ * There are the following cases for swapping:
+ * 0) New page that may be unitialized.
+ * 1) Bucket page or overflow page. Either swap
+ * the header or initialize the page.
+ * 2) Bitmap page. Swap the whole page!
+ * 3) Header pages. Not handled here; these are written directly
+ * to the file.
+ */
+
+ if (NUM_ENT(pagep) == 0 && NEXT_PGNO(pagep) == 0 &&
+ !is_bitmap_pgno(hashp, pgno)) {
+ /* XXX check for !0 LSN */
+ page_init(hashp, pagep, pgno, HASH_PAGE);
+ return;
+ }
+
+ if (hashp->hdr.lorder == DB_BYTE_ORDER)
+ return;
+ if (is_bitmap_pgno(hashp, pgno)) {
+ max = hashp->hdr.bsize >> 2; /* divide by 4 bytes */
+ for (i = 0; i < max; i++)
+ M_32_SWAP(((int32_t *)pagep)[i]);
+ } else
+ swap_page_header_in(pagep);
+}
+
+void
+__pgout_routine(pg_cookie, pgno, page)
+ void *pg_cookie;
+ db_pgno_t pgno;
+ void *page;
+{
+ HTAB *hashp;
+ PAGE16 *pagep;
+ int32_t i, max;
+
+ pagep = (PAGE16 *)page;
+ hashp = (HTAB *)pg_cookie;
+
+ /*
+ * There are the following cases for swapping:
+ * 1) Bucket page or overflow page. Just swap the header.
+ * 2) Bitmap page. Swap the whole page!
+ * 3) Header pages. Not handled here; these are written directly
+ * to the file.
+ */
+
+ if (hashp->hdr.lorder == DB_BYTE_ORDER)
+ return;
+ if (is_bitmap_pgno(hashp, pgno)) {
+ max = hashp->hdr.bsize >> 2; /* divide by 4 bytes */
+ for (i = 0; i < max; i++)
+ M_32_SWAP(((int32_t *)pagep)[i]);
+ } else
+ swap_page_header_out(pagep);
+}
+
+/*
+ *
+ * Returns:
+ * 0 ==> OK
+ * -1 ==>failure
+ */
+extern int32_t
+__put_page(hashp, pagep, addr_type, is_dirty)
+ HTAB *hashp;
+ PAGE16 *pagep;
+ int32_t addr_type, is_dirty;
+{
+#if DEBUG_SLOW
+ account_page(hashp,
+ ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
+#endif
+
+ return (mpool_put(hashp->mp, pagep, (is_dirty ? MPOOL_DIRTY : 0)));
+}
+
+/*
+ * Returns:
+ * 0 indicates SUCCESS
+ * -1 indicates FAILURE
+ */
+extern PAGE16 *
+__get_page(hashp, addr, addr_type)
+ HTAB *hashp;
+ u_int32_t addr;
+ int32_t addr_type;
+{
+ PAGE16 *pagep;
+ db_pgno_t paddr;
+
+ switch (addr_type) { /* Convert page number. */
+ case A_BUCKET:
+ paddr = BUCKET_TO_PAGE(addr);
+ break;
+ case A_OVFL:
+ case A_BITMAP:
+ paddr = OADDR_TO_PAGE(addr);
+ break;
+ default:
+ paddr = addr;
+ break;
+ }
+ pagep = (PAGE16 *)mpool_get(hashp->mp, paddr, 0);
+
+#if DEBUG_SLOW
+ account_page(hashp, paddr, 1);
+#endif
+#ifdef DEBUG
+ assert(ADDR(pagep) == paddr || ADDR(pagep) == 0 ||
+ addr_type == A_BITMAP || addr_type == A_HEADER);
+#endif
+
+ return (pagep);
+}
+
+static void
+swap_page_header_in(pagep)
+ PAGE16 *pagep;
+{
+ u_int32_t i;
+
+ /* can leave type and filler alone, since they're 1-byte quantities */
+
+ M_32_SWAP(PREV_PGNO(pagep));
+ M_32_SWAP(NEXT_PGNO(pagep));
+ M_16_SWAP(NUM_ENT(pagep));
+ M_16_SWAP(OFFSET(pagep));
+
+ for (i = 0; i < NUM_ENT(pagep); i++) {
+ M_16_SWAP(KEY_OFF(pagep, i));
+ M_16_SWAP(DATA_OFF(pagep, i));
+ }
+}
+
+static void
+swap_page_header_out(pagep)
+ PAGE16 *pagep;
+{
+ u_int32_t i;
+
+ for (i = 0; i < NUM_ENT(pagep); i++) {
+ M_16_SWAP(KEY_OFF(pagep, i));
+ M_16_SWAP(DATA_OFF(pagep, i))
+ }
+
+ /* can leave type and filler alone, since they're 1-byte quantities */
+
+ M_32_SWAP(PREV_PGNO(pagep));
+ M_32_SWAP(NEXT_PGNO(pagep));
+ M_16_SWAP(NUM_ENT(pagep));
+ M_16_SWAP(OFFSET(pagep));
+}
+
+#define BYTE_MASK ((1 << INT32_T_BYTE_SHIFT) -1)
+/*
+ * Initialize a new bitmap page. Bitmap pages are left in memory
+ * once they are read in.
+ */
+extern int32_t
+__ibitmap(hashp, pnum, nbits, ndx)
+ HTAB *hashp;
+ int32_t pnum, nbits, ndx;
+{
+ u_int32_t *ip;
+ int32_t clearbytes, clearints;
+
+ /* make a new bitmap page */
+ if (__new_page(hashp, pnum, A_BITMAP) != 0)
+ return (1);
+ if (!(ip = (u_int32_t *)__get_page(hashp, pnum, A_BITMAP)))
+ return (1);
+ hashp->nmaps++;
+ clearints = ((nbits - 1) >> INT32_T_BYTE_SHIFT) + 1;
+ clearbytes = clearints << INT32_T_TO_BYTE;
+ (void)memset((int8_t *)ip, 0, clearbytes);
+ (void)memset((int8_t *)ip + clearbytes,
+ 0xFF, hashp->hdr.bsize - clearbytes);
+ ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK);
+ SETBIT(ip, 0);
+ hashp->hdr.bitmaps[ndx] = (u_int16_t)pnum;
+ hashp->mapp[ndx] = ip;
+ return (0);
+}
+
+static u_int32_t
+first_free(map)
+ u_int32_t map;
+{
+ u_int32_t i, mask;
+
+ for (mask = 0x1, i = 0; i < BITS_PER_MAP; i++) {
+ if (!(mask & map))
+ return (i);
+ mask = mask << 1;
+ }
+ return (i);
+}
+
+/*
+ * returns 0 on error
+ */
+static u_int16_t
+overflow_page(hashp)
+ HTAB *hashp;
+{
+ u_int32_t *freep;
+ int32_t bit, first_page, free_bit, free_page, i, in_use_bits, j;
+ int32_t max_free, offset, splitnum;
+ u_int16_t addr;
+#ifdef DEBUG2
+ int32_t tmp1, tmp2;
+#endif
+
+ splitnum = hashp->hdr.ovfl_point;
+ max_free = hashp->hdr.spares[splitnum];
+
+ free_page = (max_free - 1) >> (hashp->hdr.bshift + BYTE_SHIFT);
+ free_bit = (max_free - 1) & ((hashp->hdr.bsize << BYTE_SHIFT) - 1);
+
+ /*
+ * Look through all the free maps to find the first free block.
+ * The compiler under -Wall will complain that freep may be used
+ * before being set, however, this loop will ALWAYS get executed
+ * at least once, so freep is guaranteed to be set.
+ */
+ first_page = hashp->hdr.last_freed >> (hashp->hdr.bshift + BYTE_SHIFT);
+ for (i = first_page; i <= free_page; i++) {
+ if (!(freep = fetch_bitmap(hashp, i)))
+ return (0);
+ if (i == free_page)
+ in_use_bits = free_bit;
+ else
+ in_use_bits = (hashp->hdr.bsize << BYTE_SHIFT) - 1;
+
+ if (i == first_page) {
+ bit = hashp->hdr.last_freed &
+ ((hashp->hdr.bsize << BYTE_SHIFT) - 1);
+ j = bit / BITS_PER_MAP;
+ bit = bit & ~(BITS_PER_MAP - 1);
+ } else {
+ bit = 0;
+ j = 0;
+ }
+ for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
+ if (freep[j] != ALL_SET)
+ goto found;
+ }
+
+ /* No Free Page Found */
+ hashp->hdr.last_freed = hashp->hdr.spares[splitnum];
+ hashp->hdr.spares[splitnum]++;
+ offset = hashp->hdr.spares[splitnum] -
+ (splitnum ? hashp->hdr.spares[splitnum - 1] : 0);
+
+#define OVMSG "HASH: Out of overflow pages. Increase page size\n"
+
+ if (offset > SPLITMASK) {
+ if (++splitnum >= NCACHED) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (0);
+ }
+ hashp->hdr.ovfl_point = splitnum;
+ hashp->hdr.spares[splitnum] = hashp->hdr.spares[splitnum - 1];
+ hashp->hdr.spares[splitnum - 1]--;
+ offset = 1;
+ }
+ /* Check if we need to allocate a new bitmap page. */
+ if (free_bit == (hashp->hdr.bsize << BYTE_SHIFT) - 1) {
+ free_page++;
+ if (free_page >= NCACHED) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (0);
+ }
+ /*
+ * This is tricky. The 1 indicates that you want the new page
+ * allocated with 1 clear bit. Actually, you are going to
+ * allocate 2 pages from this map. The first is going to be
+ * the map page, the second is the overflow page we were
+ * looking for. The __ibitmap routine automatically, sets
+ * the first bit of itself to indicate that the bitmap itself
+ * is in use. We would explicitly set the second bit, but
+ * don't have to if we tell __ibitmap not to leave it clear
+ * in the first place.
+ */
+ if (__ibitmap(hashp,
+ (int32_t)OADDR_OF(splitnum, offset), 1, free_page))
+ return (0);
+ hashp->hdr.spares[splitnum]++;
+#ifdef DEBUG2
+ free_bit = 2;
+#endif
+ offset++;
+ if (offset > SPLITMASK) {
+ if (++splitnum >= NCACHED) {
+ (void)write(STDERR_FILENO,
+ OVMSG, sizeof(OVMSG) - 1);
+ return (0);
+ }
+ hashp->hdr.ovfl_point = splitnum;
+ hashp->hdr.spares[splitnum] =
+ hashp->hdr.spares[splitnum - 1];
+ hashp->hdr.spares[splitnum - 1]--;
+ offset = 0;
+ }
+ } else {
+ /*
+ * Free_bit addresses the last used bit. Bump it to address
+ * the first available bit.
+ */
+ free_bit++;
+ SETBIT(freep, free_bit);
+ }
+
+ /* Calculate address of the new overflow page */
+ addr = OADDR_OF(splitnum, offset);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
+ addr, free_bit, free_page);
+#endif
+
+ if (OADDR_TO_PAGE(addr) > MAX_PAGES(hashp)) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (0);
+ }
+ return (addr);
+
+found:
+ bit = bit + first_free(freep[j]);
+ SETBIT(freep, bit);
+#ifdef DEBUG2
+ tmp1 = bit;
+ tmp2 = i;
+#endif
+ /*
+ * Bits are addressed starting with 0, but overflow pages are addressed
+ * beginning at 1. Bit is a bit address number, so we need to increment
+ * it to convert it to a page number.
+ */
+ bit = 1 + bit + (i * (hashp->hdr.bsize << BYTE_SHIFT));
+ if (bit >= hashp->hdr.last_freed)
+ hashp->hdr.last_freed = bit - 1;
+
+ /* Calculate the split number for this page */
+ for (i = 0; i < splitnum && (bit > hashp->hdr.spares[i]); i++);
+ offset = (i ? bit - hashp->hdr.spares[i - 1] : bit);
+ if (offset >= SPLITMASK)
+ return (0); /* Out of overflow pages */
+ addr = OADDR_OF(i, offset);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n",
+ addr, tmp1, tmp2);
+#endif
+
+ if (OADDR_TO_PAGE(addr) > MAX_PAGES(hashp)) {
+ (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1);
+ return (0);
+ }
+ /* Allocate and return the overflow page */
+ return (addr);
+}
+
+#ifdef DEBUG
+int
+bucket_to_page(hashp, n)
+ HTAB *hashp;
+ int n;
+{
+ int ret_val;
+
+ ret_val = n + hashp->hdr.hdrpages;
+ if (n != 0)
+ ret_val += hashp->hdr.spares[__log2(n + 1) - 1];
+ return (ret_val);
+}
+
+int32_t
+oaddr_to_page(hashp, n)
+ HTAB *hashp;
+ int n;
+{
+ int ret_val, temp;
+
+ temp = (1 << SPLITNUM(n)) - 1;
+ ret_val = bucket_to_page(hashp, temp);
+ ret_val += (n & SPLITMASK);
+
+ return (ret_val);
+}
+#endif /* DEBUG */
+
+static indx_t
+page_to_oaddr(hashp, pgno)
+ HTAB *hashp;
+ db_pgno_t pgno;
+{
+ int32_t sp, ret_val;
+
+ /*
+ * To convert page number to overflow address:
+ *
+ * 1. Find a starting split point -- use 0 since there are only
+ * 32 split points.
+ * 2. Find the split point s.t. 2^sp + hdr.spares[sp] < pgno and
+ * 2^(sp+1) = hdr.spares[sp+1] > pgno. The overflow address will
+ * be located at sp.
+ * 3. return...
+ */
+ pgno -= hashp->hdr.hdrpages;
+ for (sp = 0; sp < NCACHED; sp++)
+ if (POW2(sp) + hashp->hdr.spares[sp] < pgno &&
+ (POW2(sp + 1) + hashp->hdr.spares[sp + 1]) > pgno)
+ break;
+
+ ret_val = OADDR_OF(sp + 1,
+ pgno - ((POW2(sp + 1) - 1) + hashp->hdr.spares[sp]));
+#ifdef DEBUG
+ assert(OADDR_TO_PAGE(ret_val) == (pgno + hashp->hdr.hdrpages));
+#endif
+ return (ret_val);
+}
+
+/*
+ * Mark this overflow page as free.
+ */
+extern void
+__free_ovflpage(hashp, pagep)
+ HTAB *hashp;
+ PAGE16 *pagep;
+{
+ u_int32_t *freep;
+ int32_t bit_address, free_page, free_bit;
+ u_int16_t addr, ndx;
+
+ addr = page_to_oaddr(hashp, ADDR(pagep));
+
+#ifdef DEBUG2
+ (void)fprintf(stderr, "Freeing %d\n", addr);
+#endif
+ ndx = ((u_int16_t)addr) >> SPLITSHIFT;
+ bit_address =
+ (ndx ? hashp->hdr.spares[ndx - 1] : 0) + (addr & SPLITMASK) - 1;
+ if (bit_address < hashp->hdr.last_freed)
+ hashp->hdr.last_freed = bit_address;
+ free_page = (bit_address >> (hashp->hdr.bshift + BYTE_SHIFT));
+ free_bit = bit_address & ((hashp->hdr.bsize << BYTE_SHIFT) - 1);
+
+ freep = fetch_bitmap(hashp, free_page);
+#ifdef DEBUG
+ /*
+ * This had better never happen. It means we tried to read a bitmap
+ * that has already had overflow pages allocated off it, and we
+ * failed to read it from the file.
+ */
+ if (!freep)
+ assert(0);
+#endif
+ CLRBIT(freep, free_bit);
+#ifdef DEBUG2
+ (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n",
+ obufp->addr, free_bit, free_page);
+#endif
+}
+
+static u_int32_t *
+fetch_bitmap(hashp, ndx)
+ HTAB *hashp;
+ int32_t ndx;
+{
+ if (ndx >= hashp->nmaps)
+ return (NULL);
+ if (!hashp->mapp[ndx])
+ hashp->mapp[ndx] = (u_int32_t *)__get_page(hashp,
+ hashp->hdr.bitmaps[ndx], A_BITMAP);
+
+ return (hashp->mapp[ndx]);
+}
+
+#ifdef DEBUG_SLOW
+static void
+account_page(hashp, pgno, inout)
+ HTAB *hashp;
+ db_pgno_t pgno;
+ int inout;
+{
+ static struct {
+ db_pgno_t pgno;
+ int times;
+ } list[100];
+ static int last;
+ int i, j;
+
+ if (inout == -1) /* XXX: Kluge */
+ inout = 0;
+
+ /* Find page in list. */
+ for (i = 0; i < last; i++)
+ if (list[i].pgno == pgno)
+ break;
+ /* Not found. */
+ if (i == last) {
+ list[last].times = inout;
+ list[last].pgno = pgno;
+ last++;
+ }
+ list[i].times = inout;
+ if (list[i].times == 0) {
+ for (j = i; j < last; j++)
+ list[j] = list[j + 1];
+ last--;
+ }
+ for (i = 0; i < last; i++, list[i].times++)
+ if (list[i].times > 20 && !is_bitmap_pgno(hashp, list[i].pgno))
+ (void)fprintf(stderr,
+ "Warning: pg %d has been out for %d times\n",
+ list[i].pgno, list[i].times);
+}
+#endif /* DEBUG_SLOW */