diff options
Diffstat (limited to 'db2/hash')
-rw-r--r-- | db2/hash/hash.c | 1440 | ||||
-rw-r--r-- | db2/hash/hash.src | 211 | ||||
-rw-r--r-- | db2/hash/hash_auto.c | 1343 | ||||
-rw-r--r-- | db2/hash/hash_conv.c | 101 | ||||
-rw-r--r-- | db2/hash/hash_debug.c | 96 | ||||
-rw-r--r-- | db2/hash/hash_dup.c | 544 | ||||
-rw-r--r-- | db2/hash/hash_func.c | 219 | ||||
-rw-r--r-- | db2/hash/hash_page.c | 1775 | ||||
-rw-r--r-- | db2/hash/hash_rec.c | 810 | ||||
-rw-r--r-- | db2/hash/hash_stat.c | 58 |
10 files changed, 6597 insertions, 0 deletions
diff --git a/db2/hash/hash.c b/db2/hash/hash.c new file mode 100644 index 0000000..6d8c400 --- /dev/null +++ b/db2/hash/hash.c @@ -0,0 +1,1440 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash.c 10.25 (Sleepycat) 8/24/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "shqueue.h" +#include "db_int.h" +#include "db_page.h" +#include "db_am.h" +#include "db_ext.h" +#include "hash.h" +#include "log.h" + +static int __ham_c_close __P((DBC *)); +static int __ham_c_del __P((DBC *, int)); +static int __ham_c_get __P((DBC *, DBT *, DBT *, int)); +static int __ham_c_put __P((DBC *, DBT *, DBT *, int)); +static int __ham_c_init __P((DB *, DB_TXN *, DBC **)); +static int __ham_cursor __P((DB *, DB_TXN *, DBC **)); +static int __ham_delete __P((DB *, DB_TXN *, DBT *, int)); +static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int)); +static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int)); +static void __ham_init_htab __P((HTAB *)); +static int __ham_lookup __P((HTAB *, + HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t)); +static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *)); +static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int)); +static int __ham_sync __P((DB *, int)); + +/************************** INTERFACE ROUTINES ***************************/ +/* OPEN/CLOSE */ + +/* + * __ham_open -- + * + * PUBLIC: int __ham_open __P((DB *, DB_INFO *)); + */ +int +__ham_open(dbp, dbinfo) + DB *dbp; + DB_INFO *dbinfo; +{ + DB_ENV *dbenv; + DBC *curs; + HTAB *hashp; + int file_existed, ret; + + dbenv = dbp->dbenv; + + if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL) + return (ENOMEM); + hashp->dbp = dbp; + + /* Set the hash function if specified by the user. */ + if (dbinfo != NULL && dbinfo->h_hash != NULL) + hashp->hash = dbinfo->h_hash; + + /* + * Initialize the remaining fields of the dbp. The type, close and + * fd functions are all set in db_open. + */ + dbp->internal = hashp; + dbp->cursor = __ham_cursor; + dbp->del = __ham_delete; + dbp->get = __ham_get; + dbp->put = __ham_put; + dbp->sync = __ham_sync; + + /* If locking is turned on, lock the meta data page. */ + if (F_ISSET(dbp, DB_AM_LOCKING)) { + dbp->lock.pgno = BUCKET_INVALID; + if ((ret = lock_get(dbenv->lk_info, dbp->locker, + 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) { + if (ret < 0) + ret = EAGAIN; + goto out; + } + } + + /* + * Now, we can try to read the meta-data page and figure out + * if we set up locking and get the meta-data page properly. + * If this is a new file, initialize it, and put it back dirty. + */ + if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0) + goto out; + + /* Initialize the hashp structure */ + if (hashp->hdr->magic == DB_HASHMAGIC) { + file_existed = 1; + /* File exists, verify the data in the header. */ + if (hashp->hash == NULL) + hashp->hash = + hashp->hdr->version < 5 ? __ham_func4 : __ham_func5; + if (hashp->hash(CHARKEY, sizeof(CHARKEY)) != + hashp->hdr->h_charkey) { + __db_err(hashp->dbp->dbenv, + "hash: incompatible hash function"); + ret = EINVAL; + goto out; + } + if (F_ISSET(hashp->hdr, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + } else { + /* + * File does not exist, we must initialize the header. If + * locking is enabled that means getting a write lock first. + */ + file_existed = 0; + if (F_ISSET(dbp, DB_AM_LOCKING) && + ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 || + (ret = lock_get(dbenv->lk_info, dbp->locker, 0, + &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) { + if (ret < 0) + ret = EAGAIN; + goto out; + } + + hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0; + hashp->hdr->ffactor = + dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0; + __ham_init_htab(hashp); + if (F_ISSET(dbp, DB_AM_DUP)) + F_SET(hashp->hdr, DB_HASH_DUP); + if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0) + goto out; + } + + /* Initialize the default cursor. */ + __ham_c_init(dbp, NULL, &curs); + TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links); + + /* Allocate memory for our split buffer. */ + if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) { + ret = ENOMEM; + goto out; + } + +#ifdef NO_STATISTICS_FOR_DB_ERR + __db_err(dbp->dbenv, + "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx", + "TABLE POINTER ", (long)hashp, + "BUCKET SIZE ", (long)hashp->hdr->pagesize, + "FILL FACTOR ", (long)hashp->hdr->ffactor, + "MAX BUCKET ", (long)hashp->hdr->max_bucket, + "OVFL POINT ", (long)hashp->hdr->ovfl_point, + "LAST FREED ", (long)hashp->hdr->last_freed, + "HIGH MASK ", (long)hashp->hdr->high_mask, + "LOW MASK ", (long)hashp->hdr->low_mask, + "NELEM ", (long)hashp->hdr->nelem, + "FLAGS ", (long)hashp->hdr->flags); +#endif + + /* Release the meta data page */ + (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0); + if (F_ISSET(dbp, DB_AM_LOCKING) && + (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) { + if (ret < 0) + ret = EAGAIN; + goto out; + } + + hashp->hlock = 0; + hashp->hdr = NULL; + /* Sync the file so that we know that the meta data goes to disk. */ + if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0) + goto out; + return (0); + +out: (void)__ham_close(dbp); + return (ret); +} + +/* + * PUBLIC: int __ham_close __P((DB *)); + */ +int +__ham_close(dbp) + DB *dbp; +{ + HTAB *hashp; + int ret, t_ret; + + DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0); + hashp = (HTAB *)dbp->internal; + ret = 0; + + /* Free the split page. */ + if (hashp->split_buf) + FREE(hashp->split_buf, dbp->pgsize); + + if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp, + (PAGE *)hashp->hdr, 0)) != 0 && ret == 0) + ret = t_ret; + if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info, + hashp->hlock)) != 0 && ret == 0) + ret = t_ret; + + FREE(hashp, sizeof(HTAB)); + dbp->internal = NULL; + return (ret); +} + +/************************** LOCAL CREATION ROUTINES **********************/ +/* + * Returns 0 on No Error + */ +static void +__ham_init_htab(hashp) + HTAB *hashp; +{ + u_int32_t nelem; + int32_t l2, nbuckets; + + nelem = hashp->hdr->nelem; + hashp->hdr->pagesize = hashp->dbp->pgsize; + ZERO_LSN(hashp->hdr->lsn); + hashp->hdr->magic = DB_HASHMAGIC; + hashp->hdr->version = DB_HASHVERSION; + if (hashp->hash == NULL) + hashp->hash = + hashp->hdr->version < 5 ? __ham_func4 : __ham_func5; + hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY)); + if (nelem != 0 && hashp->hdr->ffactor != 0) { + nelem = (nelem - 1) / hashp->hdr->ffactor + 1; + l2 = __db_log2(nelem > 2 ? nelem : 2); + } else + l2 = 2; + + nbuckets = 1 << l2; + + hashp->hdr->spares[l2] = 0; + hashp->hdr->spares[l2 + 1] = 0; + hashp->hdr->ovfl_point = l2; + hashp->hdr->last_freed = PGNO_INVALID; + + hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1; + hashp->hdr->low_mask = (nbuckets >> 1) - 1; + memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN); +} + +/********************** DESTROY/CLOSE ROUTINES ************************/ + + +/* + * Write modified pages to disk + * + * Returns: + * 0 == OK + * -1 ERROR + */ +static int +__ham_sync(dbp, flags) + DB *dbp; + int flags; +{ + int ret; + + DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags); + if ((ret = __db_syncchk(dbp, flags)) != 0) + return (ret); + if (F_ISSET(dbp, DB_AM_RDONLY)) + return (0); + + if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) + ret = 0; + + return (ret); +} + +/*******************************SEARCH ROUTINES *****************************/ +/* + * All the access routines return + * + * Returns: + * 0 on SUCCESS + * 1 to indicate an external ERROR (i.e. key not found, etc) + * -1 to indicate an internal ERROR (i.e. out of memory, etc) + */ + +static int +__ham_get(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + DBT *data; + int flags; +{ + DB *ldbp; + DBC *cp; + HTAB *hashp; + HASH_CURSOR *hcp; + int ret, t_ret; + + DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags); + if ((ret = __db_getchk(dbp, key, data, flags)) != 0) + return (ret); + + ldbp = dbp; + if (F_ISSET(dbp, DB_AM_THREAD) && + (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + + hashp = (HTAB *)ldbp->internal; + SET_LOCKER(ldbp, txn); + GET_META(ldbp, hashp); + cp = TAILQ_FIRST(&ldbp->curs_queue); + + hashp->hash_accesses++; + hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal; + if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0) + if (F_ISSET(hcp, H_OK)) + ret = __ham_dup_return(hashp, hcp, data, DB_FIRST); + else /* Key was not found */ + ret = DB_NOTFOUND; + + if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0) + ret = t_ret; + RELEASE_META(ldbp, hashp); + if (F_ISSET(dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +static int +__ham_put(dbp, txn, key, data, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + DBT *data; + int flags; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp; + DBT tmp_val, *myval; + int ret, t_ret; + u_int32_t nbytes; + + DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags); + if ((ret = __db_putchk(dbp, key, data, + flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0) + return (ret); + + ldbp = dbp; + if (F_ISSET(dbp, DB_AM_THREAD) && + (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + + hashp = (HTAB *)ldbp->internal; + SET_LOCKER(ldbp, txn); + GET_META(ldbp, hashp); + hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal; + + nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(key->size)) + + (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(data->size)); + + hashp->hash_accesses++; + ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE); + + if (ret == DB_NOTFOUND) { + ret = 0; + if (hcp->seek_found_page != PGNO_INVALID && + hcp->seek_found_page != hcp->pgno) { + if ((ret = __ham_item_done(hashp, hcp, 0)) != 0) + goto out; + hcp->pgno = hcp->seek_found_page; + hcp->bndx = NDX_INVALID; + } + + if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) { + /* + * Doing a partial put, but the key does not exist + * and we are not beginning the write at 0. We + * must create a data item padded up to doff and + * then write the new bytes represented by val. + */ + ret = __ham_init_dbt(&tmp_val, data->size + data->doff, + &hcp->big_data, &hcp->big_datalen); + if (ret == 0) { + memset(tmp_val.data, 0, data->doff); + memcpy((u_int8_t *)tmp_val.data + data->doff, + data->data, data->size); + myval = &tmp_val; + } + } else + myval = (DBT *)data; + + if (ret == 0) + ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA); + } else if (ret == 0 && F_ISSET(hcp, H_OK)) { + if (flags == DB_NOOVERWRITE) + ret = DB_KEYEXIST; + else if (F_ISSET(ldbp, DB_AM_DUP)) + ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST); + else + ret = __ham_overwrite(hashp, hcp, data); + } + + /* Free up all the cursor pages. */ + if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) + ret = t_ret; + /* Now check if we have to grow. */ +out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { + ret = __ham_expand_table(hashp); + F_CLR(hcp, H_EXPAND); + } + + if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) + ret = t_ret; + RELEASE_META(ldbp, hashp); + if (F_ISSET(dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +static int +__ham_cursor(dbp, txnid, dbcp) + DB *dbp; + DB_TXN *txnid; + DBC **dbcp; +{ + int ret; + + DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0); + if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0) + return (ret); + + DB_THREAD_LOCK(dbp); + TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links); + DB_THREAD_UNLOCK(dbp); + return (ret); +} + +static int +__ham_c_init(dbp, txnid, dbcp) + DB *dbp; + DB_TXN *txnid; + DBC **dbcp; +{ + DBC *db_curs; + HASH_CURSOR *new_curs; + + if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL) + return (ENOMEM); + + if ((new_curs = + (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) { + FREE(db_curs, sizeof(DBC)); + return (ENOMEM); + } + + db_curs->internal = new_curs; + db_curs->c_close = __ham_c_close; + db_curs->c_del = __ham_c_del; + db_curs->c_get = __ham_c_get; + db_curs->c_put = __ham_c_put; + db_curs->txn = txnid; + db_curs->dbp = dbp; + + new_curs->db_cursor = db_curs; + __ham_item_init(new_curs); + + if (dbcp != NULL) + *dbcp = db_curs; + return (0); +} + +static int +__ham_delete(dbp, txn, key, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + int flags; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp; + int ret, t_ret; + + DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags); + if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) + return (ret); + + ldbp = dbp; + if (F_ISSET(dbp, DB_AM_THREAD) && + (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + hashp = (HTAB *)ldbp->internal; + SET_LOCKER(ldbp, txn); + GET_META(ldbp, hashp); + hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal; + + hashp->hash_accesses++; + if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0) + if (F_ISSET(hcp, H_OK)) + ret = __ham_del_pair(hashp, hcp); + else + ret = DB_NOTFOUND; + + if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) + ret = t_ret; + RELEASE_META(ldbp, hashp); + if (F_ISSET(dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +/* ****************** CURSORS ********************************** */ +static int +__ham_c_close(cursor) + DBC *cursor; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp; + int ret; + + DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0); + /* + * If the pagep, dpagep, and lock fields of the cursor are all NULL, + * then there really isn't a need to get a handle here. However, + * the normal case is that at least one of those fields is non-NULL, + * and putting those checks in here would couple the ham_item_done + * functionality with cursor close which would be pretty disgusting. + * Instead, we pay the overhead here of always getting the handle. + */ + ldbp = cursor->dbp; + if (F_ISSET(cursor->dbp, DB_AM_THREAD) && + (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + hashp = (HTAB *)ldbp->internal; + hcp = (HASH_CURSOR *)cursor->internal; + ret = __ham_item_done(hashp, hcp, 0); + + if (hcp->big_key) + FREE(hcp->big_key, hcp->big_keylen); + if (hcp->big_data) + FREE(hcp->big_data, hcp->big_datalen); + + /* + * All cursors (except the default ones) are linked off the master. + * Therefore, when we close the cursor, we have to remove it from + * the master, not the local one. When we are closing the file in + * its entirety, then we clear the THREAD bit and the master and + * local are identical, so we remove the correct one. + */ + DB_THREAD_LOCK(cursor->dbp); + TAILQ_REMOVE(&cursor->dbp->curs_queue, cursor, links); + DB_THREAD_UNLOCK(cursor->dbp); + + if (F_ISSET(cursor->dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + + FREE(hcp, sizeof(HASH_CURSOR)); + FREE(cursor, sizeof(DBC)); + return (ret); +} + +static int +__ham_c_del(cursor, flags) + DBC *cursor; + int flags; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp; + HASH_CURSOR save_curs; + db_pgno_t ppgno, chg_pgno; + int ret, t_ret; + + DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags); + ldbp = cursor->dbp; + if (F_ISSET(cursor->dbp, DB_AM_THREAD) && + (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + hashp = (HTAB *)ldbp->internal; + hcp = (HASH_CURSOR *)cursor->internal; + save_curs = *hcp; + if ((ret = __db_cdelchk(ldbp, flags, + F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0) + return (ret); + if (F_ISSET(hcp, H_DELETED)) + return (DB_NOTFOUND); + + SET_LOCKER(hashp->dbp, cursor->txn); + GET_META(hashp->dbp, hashp); + hashp->hash_accesses++; + if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0) + goto out; + if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) { + ppgno = PREV_PGNO(hcp->dpagep); + + /* Remove item from duplicate page. */ + chg_pgno = hcp->dpgno; + if ((ret = __db_drem(hashp->dbp, + &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0) + goto out; + + /* + * There are 4 cases. + * 1. We removed an item on a page, but nothing else changed. + * 2. We removed the last item on a page, but there is a + * following page of duplicates. + * 3. We removed the last item on a page, this page was the + * last page in a duplicate set, but there were dups before + * it. + * 4. We removed the last item on a page, removing the last + * duplicate. + * In case 1 hcp->dpagep is unchanged. + * In case 2 hcp->dpagep comes back pointing to the next dup + * page. + * In case 3 hcp->dpagep comes back NULL. + * In case 4 hcp->dpagep comes back NULL. + */ + if (hcp->dpagep == NULL) { + if (ppgno != PGNO_INVALID) { /* Case 3 */ + hcp->dpgno = ppgno; + if ((ret = __ham_get_cpage(hashp, hcp, + DB_LOCK_READ)) != 0) + goto out; + hcp->dndx = NUM_ENT(hcp->dpagep); + F_SET(hcp, H_DELETED); + } else { /* Case 4 */ + ret = __ham_del_pair(hashp, hcp); + hcp->dpgno = PGNO_INVALID; + /* + * Delpair updated the cursor queue, so we + * don't have to do that here. + */ + chg_pgno = PGNO_INVALID; + } + } else if (PGNO(hcp->dpagep) != hcp->dpgno) { + hcp->dndx = 0; /* Case 2 */ + hcp->dpgno = PGNO(hcp->dpagep); + if (ppgno == PGNO_INVALID) + memcpy(P_ENTRY(hcp->pagep, + H_DATAINDEX(hcp->bndx)) + + SSZ(HOFFDUP, pgno), &hcp->dpgno, + sizeof(db_pgno_t)); + F_SET(hcp, H_DELETED); + } else /* Case 1 */ + F_SET(hcp, H_DELETED); + if (chg_pgno != PGNO_INVALID) + __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1); + } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */ + if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) == + LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx)) + ret = __ham_del_pair(hashp, hcp); + else { + DBT repldbt; + + repldbt.flags = 0; + F_SET(&repldbt, DB_DBT_PARTIAL); + repldbt.doff = hcp->dup_off; + repldbt.dlen = DUP_SIZE(hcp->dup_len); + repldbt.size = 0; + ret = __ham_replpair(hashp, hcp, &repldbt, 0); + hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); + __ham_c_update(hashp, hcp, hcp->pgno, + DUP_SIZE(hcp->dup_len), 0, 1); + F_SET(hcp, H_DELETED); + } + + } else + /* Not a duplicate */ + ret = __ham_del_pair(hashp, hcp); + +out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) + t_ret = ret; + if (ret != 0) + *hcp = save_curs; + RELEASE_META(hashp->dbp, hashp); + if (F_ISSET(cursor->dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +static int +__ham_c_get(cursor, key, data, flags) + DBC *cursor; + DBT *key; + DBT *data; + int flags; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp, save_curs; + int get_key, ret, t_ret; + + DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get", + flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, + NULL, flags); + ldbp = cursor->dbp; + if (F_ISSET(cursor->dbp, DB_AM_THREAD) && + (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + hashp = (HTAB *)(ldbp->internal); + hcp = (HASH_CURSOR *)cursor->internal; + save_curs = *hcp; + if ((ret = + __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0) + return (ret); + + SET_LOCKER(hashp->dbp, cursor->txn); + GET_META(hashp->dbp, hashp); + hashp->hash_accesses++; + + hcp->seek_size = 0; + + ret = 0; + get_key = 1; + switch (flags) { + case DB_PREV: + if (hcp->bucket != BUCKET_INVALID) { + ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ); + break; + } + /* FALL THROUGH */ + case DB_LAST: + ret = __ham_item_last(hashp, hcp, DB_LOCK_READ); + break; + case DB_FIRST: + ret = __ham_item_first(hashp, hcp, DB_LOCK_READ); + break; + case DB_NEXT: + if (hcp->bucket == BUCKET_INVALID) + hcp->bucket = 0; + ret = __ham_item_next(hashp, hcp, DB_LOCK_READ); + break; + case DB_SET: + case DB_SET_RANGE: + ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ); + get_key = 0; + break; + case DB_CURRENT: + if (F_ISSET(hcp, H_DELETED)) { + ret = DB_KEYEMPTY; + goto out; + } + + ret = __ham_item(hashp, hcp, DB_LOCK_READ); + break; + } + + /* + * Must always enter this loop to do error handling and + * check for big key/data pair. + */ + while (1) { + if (ret != 0 && ret != DB_NOTFOUND) + goto out1; + else if (F_ISSET(hcp, H_OK)) { + /* Get the key. */ + if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep, + H_KEYINDEX(hcp->bndx), key, &hcp->big_key, + &hcp->big_keylen)) != 0) + goto out1; + + ret = __ham_dup_return(hashp, hcp, data, flags); + break; + } else if (!F_ISSET(hcp, H_NOMORE)) { + abort(); + break; + } + + /* + * Ran out of entries in a bucket; change buckets. + */ + switch (flags) { + case DB_LAST: + case DB_PREV: + ret = __ham_item_done(hashp, hcp, 0); + if (hcp->bucket == 0) { + ret = DB_NOTFOUND; + goto out1; + } + hcp->bucket--; + hcp->bndx = NDX_INVALID; + if (ret == 0) + ret = __ham_item_prev(hashp, + hcp, DB_LOCK_READ); + break; + case DB_FIRST: + case DB_NEXT: + ret = __ham_item_done(hashp, hcp, 0); + hcp->bndx = NDX_INVALID; + hcp->bucket++; + hcp->pgno = PGNO_INVALID; + hcp->pagep = NULL; + if (hcp->bucket > hashp->hdr->max_bucket) { + ret = DB_NOTFOUND; + goto out1; + } + if (ret == 0) + ret = __ham_item_next(hashp, + hcp, DB_LOCK_READ); + break; + case DB_SET: + case DB_SET_RANGE: + /* Key not found. */ + ret = DB_NOTFOUND; + goto out1; + } + } +out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0) + t_ret = ret; +out: if (ret) + *hcp = save_curs; + RELEASE_META(hashp->dbp, hashp); + if (F_ISSET(cursor->dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +static int +__ham_c_put(cursor, key, data, flags) + DBC *cursor; + DBT *key; + DBT *data; + int flags; +{ + DB *ldbp; + HTAB *hashp; + HASH_CURSOR *hcp, save_curs; + int ret, t_ret; + u_int32_t nbytes; + + DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put", + flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL, + NULL, flags); + ldbp = cursor->dbp; + if (F_ISSET(cursor->dbp, DB_AM_THREAD) && + (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0) + return (ret); + hashp = (HTAB *)(ldbp->internal); + hcp = (HASH_CURSOR *)cursor->internal; + save_curs = *hcp; + + if ((ret = __db_cputchk(hashp->dbp, key, data, flags, + F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0) + return (ret); + if (F_ISSET(hcp, H_DELETED)) + return (DB_NOTFOUND); + + SET_LOCKER(hashp->dbp, cursor->txn); + GET_META(hashp->dbp, hashp); + ret = 0; + + switch (flags) { + case DB_KEYLAST: + case DB_KEYFIRST: + nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(key->size)) + + (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(data->size)); + ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE); + break; + case DB_BEFORE: + case DB_AFTER: + case DB_CURRENT: + ret = __ham_item(hashp, hcp, DB_LOCK_WRITE); + break; + } + + if (ret == 0) { + if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP)) + ret = __ham_overwrite(hashp, hcp, data); + else + ret = __ham_add_dup(hashp, hcp, data, flags); + } + + if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { + ret = __ham_expand_table(hashp); + F_CLR(hcp, H_EXPAND); + } + + if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + *hcp = save_curs; + RELEASE_META(hashp->dbp, hashp); + if (F_ISSET(cursor->dbp, DB_AM_THREAD)) + __db_puthandle(ldbp); + return (ret); +} + +/********************************* UTILITIES ************************/ + +/* + * __ham_expand_table -- + * + * PUBLIC: int __ham_expand_table __P((HTAB *)); + */ +int +__ham_expand_table(hashp) + HTAB *hashp; +{ + u_int32_t old_bucket, new_bucket; + u_int32_t spare_ndx; + int ret; + + ret = 0; + DIRTY_META(hashp, ret); + if (ret) + return (ret); + + if (DB_LOGGING(hashp->dbp)) { + DB_LSN new_lsn; + + if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, + hashp->hdr->max_bucket, hashp->hdr->ovfl_point, + hashp->hdr->spares[hashp->hdr->ovfl_point], + &hashp->hdr->lsn)) != 0) + return (ret); + + hashp->hdr->lsn = new_lsn; + } + + hashp->hash_expansions++; + new_bucket = ++hashp->hdr->max_bucket; + old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask); + + /* + * If the split point is increasing (hdr.max_bucket's log base 2 + * increases), max sure that we have enough extra pages, then + * copy the current contents of the spare split bucket to the + * next bucket. + */ + spare_ndx = __db_log2(hashp->hdr->max_bucket + 1); + if (spare_ndx > hashp->hdr->ovfl_point) { + /* + * We are about to shift the split point. Make sure that + * if the next doubling is going to be big (more than 8 + * pages), we have some extra pages around. + */ + if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 && + new_bucket >= 8) + __ham_init_ovflpages(hashp); + + hashp->hdr->spares[spare_ndx] = + hashp->hdr->spares[hashp->hdr->ovfl_point]; + hashp->hdr->ovfl_point = spare_ndx; + } + + if (new_bucket > hashp->hdr->high_mask) { + /* Starting a new doubling */ + hashp->hdr->low_mask = hashp->hdr->high_mask; + hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask; + } + + if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) { + __db_err(hashp->dbp->dbenv, + "hash: Cannot allocate new bucket. Pages exhausted."); + return (ENOSPC); + } + + /* Relocate records to the new bucket */ + return (__ham_split_page(hashp, old_bucket, new_bucket)); +} + +/* + * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t)); + */ +u_int32_t +__ham_call_hash(hashp, k, len) + HTAB *hashp; + u_int8_t *k; + int32_t len; +{ + u_int32_t n, bucket; + + n = (u_int32_t)hashp->hash(k, len); + bucket = n & hashp->hdr->high_mask; + if (bucket > hashp->hdr->max_bucket) + bucket = bucket & hashp->hdr->low_mask; + return (bucket); +} + +/* + * Check for duplicates, and call __db_ret appropriately. Release + * everything held by the cursor. + */ +static int +__ham_dup_return(hashp, hcp, val, flags) + HTAB *hashp; + HASH_CURSOR *hcp; + DBT *val; + int flags; +{ + HKEYDATA *hk; + PAGE *pp; + DBT *myval, tmp_val; + db_indx_t ndx; + db_pgno_t pgno; + u_int8_t type; + int indx, ret; + db_indx_t len; + + /* Check for duplicate and return the first one. */ + ndx = H_DATAINDEX(hcp->bndx); + type = GET_HKEYDATA(hcp->pagep, ndx)->type; + pp = hcp->pagep; + myval = val; + + /* + * There are 3 cases: + * 1. We are not in duplicate, simply call db_ret. + * 2. We are looking at keys and stumbled onto a duplicate. + * 3. We are in the middle of a duplicate set. (ISDUP set) + */ + + /* + * Here we check for the case where we just stumbled onto a + * duplicate. In this case, we do initialization and then + * let the normal duplicate code handle it. + */ + if (!F_ISSET(hcp, H_ISDUP)) + if (type == H_DUPLICATE) { + F_SET(hcp, H_ISDUP); + hcp->dup_tlen = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + if (flags == DB_LAST || flags == DB_PREV) { + hcp->dndx = 0; + hcp->dup_off = 0; + do { + memcpy(&len, hk->data + hcp->dup_off, + sizeof(db_indx_t)); + hcp->dup_off += DUP_SIZE(len); + hcp->dndx++; + } while (hcp->dup_off < hcp->dup_tlen); + hcp->dup_off -= DUP_SIZE(len); + hcp->dndx--; + } else { + memcpy(&len, hk->data, sizeof(db_indx_t)); + hcp->dup_off = 0; + hcp->dndx = 0; + } + hcp->dup_len = len; + } else if (type == H_OFFDUP) { + F_SET(hcp, H_ISDUP); + memcpy(&pgno, + P_ENTRY(hcp->pagep, ndx) + SSZ(HOFFDUP, pgno), + sizeof(db_pgno_t)); + if (flags == DB_LAST || flags == DB_PREV) { + indx = (int)hcp->dndx; + if ((ret = __db_dend(hashp->dbp, + pgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dpgno = PGNO(hcp->dpagep); + hcp->dndx = NUM_ENT(hcp->dpagep) - 1; + } else if ((ret = __ham_next_cpage(hashp, + hcp, pgno, 0, H_ISDUP)) != 0) + return (ret); + } + + + /* + * Now, everything is initialized, grab a duplicate if + * necessary. + */ + if (F_ISSET(hcp, H_ISDUP)) + if (hcp->dpgno != PGNO_INVALID) { + pp = hcp->dpagep; + ndx = hcp->dndx; + } else { + /* + * Copy the DBT in case we are retrieving into + * user memory and we need the parameters for + * it. + */ + memcpy(&tmp_val, val, sizeof(*val)); + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.dlen = hcp->dup_len; + tmp_val.doff = hcp->dup_off + sizeof(db_indx_t); + myval = &tmp_val; + } + + + /* + * Finally, if we had a duplicate, pp, ndx, and myval should be + * set appropriately. + */ + if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data, + &hcp->big_datalen)) != 0) + return (ret); + + /* + * In case we sent a temporary off to db_ret, set the real + * return values. + */ + val->data = myval->data; + val->size = myval->size; + + return (0); +} + +static int +__ham_overwrite(hashp, hcp, nval) + HTAB *hashp; + HASH_CURSOR *hcp; + DBT *nval; +{ + DBT *myval, tmp_val; + HKEYDATA *hk; + + if (F_ISSET(hashp->dbp, DB_AM_DUP)) + return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST)); + else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { + /* Put/overwrite */ + memcpy(&tmp_val, nval, sizeof(*nval)); + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.doff = 0; + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + if (hk->type == H_OFFPAGE) + memcpy(&tmp_val.dlen, + (u_int8_t *)hk + SSZ(HOFFPAGE, tlen), + sizeof(u_int32_t)); + else + tmp_val.dlen = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize,hcp->bndx); + myval = &tmp_val; + } else /* Regular partial put */ + myval = nval; + + return (__ham_replpair(hashp, hcp, myval, 0)); +} + +/* + * Given a key and a cursor, sets the cursor to the page/ndx on which + * the key resides. If the key is found, the cursor H_OK flag is set + * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set. + * If the key is not found, the H_OK flag is not set. If the sought + * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields + * are set indicating where an add might take place. If it is 0, + * non of the cursor pointer field are valid. + */ +static int +__ham_lookup(hashp, hcp, key, sought, mode) + HTAB *hashp; + HASH_CURSOR *hcp; + const DBT *key; + u_int32_t sought; + db_lockmode_t mode; +{ + HKEYDATA *hk; + db_pgno_t pgno; + u_int32_t tlen; + int match, ret, t_ret; + + /* + * Set up cursor so that we're looking for space to add an item + * as we cycle through the pages looking for the key. + */ + if ((ret = __ham_item_reset(hashp, hcp)) != 0) + return (ret); + hcp->seek_size = sought; + + hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size); + while (1) { + if ((ret = __ham_item_next(hashp, hcp, mode)) != 0) + return (ret); + + if (F_ISSET(hcp, H_NOMORE)) + break; + + hk = H_PAIRKEY(hcp->pagep, hcp->bndx); + switch (hk->type) { + case H_OFFPAGE: + memcpy(&tlen, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen), + sizeof(u_int32_t)); + if (tlen == key->size) { + memcpy(&pgno, + (u_int8_t *)hk + SSZ(HOFFPAGE, pgno), + sizeof(db_pgno_t)); + match = __db_moff(hashp->dbp, key, pgno); + if (match == 0) { + F_SET(hcp, H_OK); + return (0); + } + } + break; + case H_KEYDATA: + if (key->size == LEN_HKEY(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx) && + memcmp(key->data, hk->data, key->size) == 0) { + F_SET(hcp, H_OK); + return (0); + } + break; + case H_DUPLICATE: + case H_OFFDUP: + /* + * These are errors because keys are never + * duplicated, only data items are. + */ + return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep))); + } + hashp->hash_collisions++; + } + + /* + * Item was not found, adjust cursor properly. + */ + + if (sought != 0) + return (ret); + + if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * Initialize a dbt using some possibly already allocated storage + * for items. + * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *)); + */ +int +__ham_init_dbt(dbt, size, bufp, sizep) + DBT *dbt; + u_int32_t size; + void **bufp; + u_int32_t *sizep; +{ + memset(dbt, 0, sizeof(*dbt)); + if (*sizep < size) { + if ((*bufp = (void *)(*bufp == NULL ? + malloc(size) : realloc(*bufp, size))) == NULL) { + *sizep = 0; + return (ENOMEM); + } + *sizep = size; + } + dbt->data = *bufp; + dbt->size = size; + return (0); +} + +/* + * Adjust the cursor after an insert or delete. The cursor passed is + * the one that was operated upon; we just need to check any of the + * others. + * + * len indicates the length of the item added/deleted + * add indicates if the item indicated by the cursor has just been + * added (add == 1) or deleted (add == 0). + * dup indicates if the addition occurred into a duplicate set. + * + * PUBLIC: void __ham_c_update __P((HTAB *, + * PUBLIC: HASH_CURSOR *, db_pgno_t, u_int32_t, int, int)); + */ +void +__ham_c_update(hashp, hcp, chg_pgno, len, add, dup) + HTAB *hashp; + HASH_CURSOR *hcp; + db_pgno_t chg_pgno; + u_int32_t len; + int add; + int dup; +{ + DBC *cp; + HTAB *hp; + HASH_CURSOR *lcp; + int page_deleted; + + /* + * Regular adds are always at the end of a given page, + * so we never have to adjust anyone's cursor after + * a regular add. + */ + if (!dup && add) + return; + + page_deleted = chg_pgno != PGNO_INVALID && + ((!dup && chg_pgno != hcp->pgno) || + (dup && chg_pgno != hcp->dpgno)); + + hp = hcp->db_cursor->dbp->master->internal; + DB_THREAD_LOCK(hp->dbp); + + for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) { + if (cp->internal == hcp) + continue; + + lcp = (HASH_CURSOR *)cp->internal; + + if (!dup && lcp->pgno != chg_pgno) + continue; + + if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno) + continue; + + if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno) + continue; + + if (page_deleted) { + if (dup) { + lcp->dpgno = hcp->dpgno; + lcp->dndx = hcp->dndx; + } else { + lcp->pgno = hcp->pgno; + lcp->bndx = hcp->bndx; + lcp->bucket = hcp->bucket; + } + F_CLR(lcp, H_ISDUP); + continue; + } + + if (!dup && lcp->bndx > hcp->bndx) + lcp->bndx--; + else if (!dup && lcp->bndx == hcp->bndx) + F_SET(lcp, H_DELETED); + else if (dup && lcp->bndx == hcp->bndx) { + /* Assign dpgno in case there was page conversion. */ + lcp->dpgno = hcp->dpgno; + if (add && lcp->dndx >= hcp->dndx ) + lcp->dndx++; + else if (!add && lcp->dndx > hcp->dndx) + lcp->dndx--; + else if (!add && lcp->dndx == hcp->dndx) + F_SET(lcp, H_DELETED); + + /* Now adjust on-page information. */ + if (lcp->dpgno == PGNO_INVALID) + if (add) { + lcp->dup_tlen += len; + if (lcp->dndx > hcp->dndx) + lcp->dup_off += len; + } else { + lcp->dup_tlen -= len; + if (lcp->dndx > hcp->dndx) + lcp->dup_off -= len; + } + } + } + DB_THREAD_UNLOCK(hp->dbp); +} + +/* + * __ham_hdup -- + * This function gets called when we create a duplicate handle for a + * threaded DB. It should create the private part of the DB structure. + * PUBLIC: int __ham_hdup __P((DB *, DB *)); + */ +int +__ham_hdup(orig, new) + DB *orig, *new; +{ + HTAB *hashp; + DBC *curs; + int ret; + + if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL) + return (ENOMEM); + + new->internal = hashp; + + hashp->dbp = new; + hashp->hlock = 0; + hashp->hdr = NULL; + hashp->hash = ((HTAB *)orig->internal)->hash; + if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL) + return (ENOMEM); + hashp->local_errno = 0; + hashp->hash_accesses = 0; + hashp->hash_collisions = 0; + hashp->hash_expansions = 0; + hashp->hash_overflows = 0; + hashp->hash_bigpages = 0; + /* Initialize the cursor queue. */ + ret = __ham_c_init(new, NULL, &curs); + TAILQ_INSERT_TAIL(&new->curs_queue, curs, links); + return (ret); +} diff --git a/db2/hash/hash.src b/db2/hash/hash.src new file mode 100644 index 0000000..04a98d3 --- /dev/null +++ b/db2/hash/hash.src @@ -0,0 +1,211 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)hash.src 10.1 (Sleepycat) 4/12/97 + */ + +#include "config.h" + +/* + * This is the source file used to create the logging functions for the + * hash package. Each access method (or set of routines wishing to register + * record types with the transaction system) should have a file like this. + * Each type of log record and its parameters is defined. The basic + * format of a record definition is: + * + * BEGIN <RECORD_TYPE> + * ARG|STRING|POINTER <variable name> <variable type> <printf format> + * ... + * END + * ARG the argument is a simple parameter of the type * specified. + * DBT the argument is a DBT (db.h) containing a length and pointer. + * PTR the argument is a pointer to the data type specified; the entire + * type should be logged. + * + * There are a set of shell scripts of the form xxx.sh that generate c + * code and or h files to process these. (This is probably better done + * in a single PERL script, but for now, this works.) + * + * The DB recovery system requires the following three fields appear in + * every record, and will assign them to the per-record-type structures + * as well as making them the first parameters to the appropriate logging + * call. + * rectype: record-type, identifies the structure and log/read call + * txnid: transaction id, a DBT in this implementation + * prev: the last LSN for this transaction + */ + +/* + * Use the argument of PREFIX as the prefix for all record types, + * routines, id numbers, etc. + */ +PREFIX ham + +/* + * HASH-insdel: used for hash to insert/delete a pair of entries onto a master + * page. The pair might be regular key/data pairs or they might be the + * structures that refer to off page items, duplicates or offpage duplicates. + * opcode - PUTPAIR/DELPAIR + big masks + * fileid - identifies the file referenced + * pgno - page within file + * ndx - index on the page of the item being added (item index) + * pagelsn - lsn on the page before the update + * key - the key being inserted + * data - the data being inserted + */ +BEGIN insdel +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +DBT key DBT s +DBT data DBT s +END + +/* + * Used to add and remove overflow pages. + * prev_pgno is the previous page that is going to get modified to + * point to this one. If this is the first page in a chain + * then prev_pgno should be PGNO_INVALID. + * new_pgno is the page being allocated. + * next_pgno is the page that follows this one. On allocation, + * this should be PGNO_INVALID. For deletes, it may exist. + * pagelsn is the old lsn on the page. + */ +BEGIN newpage +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG prev_pgno db_pgno_t lu +POINTER prevlsn DB_LSN * lu +ARG new_pgno db_pgno_t lu +POINTER pagelsn DB_LSN * lu +ARG next_pgno db_pgno_t lu +POINTER nextlsn DB_LSN * lu +END + +/* + * Splitting requires two types of log messages. The first + * logs the meta-data of the split. The second logs the + * data on the original page. To redo the split, we have + * to visit the new page (pages) and add the items back + * on the page if they are not yet there. + * For the meta-data split + * bucket: max_bucket in table before split + * ovflpoint: overflow point before split. + * spares: spares[ovflpoint] before split. + */ +BEGIN splitmeta +ARG fileid u_int32_t lu +ARG bucket u_int32_t lu +ARG ovflpoint u_int32_t lu +ARG spares u_int32_t lu +POINTER metalsn DB_LSN * lu +END + +BEGIN splitdata +ARG fileid u_int32_t lu +ARG opcode u_int32_t lu +ARG pgno db_pgno_t lu +DBT pageimage DBT s +POINTER pagelsn DB_LSN * lu +END + +/* + * HASH-replace: is used for hash to handle partial puts that only + * affect a single master page. + * fileid - identifies the file referenced + * pgno - page within file + * ndx - index on the page of the item being modified (item index) + * pagelsn - lsn on the page before the update + * off - offset in the old item where the new item is going. + * olditem - DBT that describes the part of the item being replaced. + * newitem - DBT of the new item. + * makedup - this was a replacement that made an item a duplicate. + */ +BEGIN replace +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +ARG ndx u_int32_t lu +POINTER pagelsn DB_LSN * lu +ARG off int32_t ld +DBT olditem DBT s +DBT newitem DBT s +ARG makedup u_int32_t lu +END + +/* + * HASH-newpgno: is used to record getting/deleting a new page number. + * This doesn't require much data modification, just modifying the + * meta-data. + * pgno is the page being allocated/freed. + * free_pgno is the next_pgno on the free list. + * old_type was the type of a page being deallocated. + * old_pgno was the next page number before the deallocation. We use it + * to indicate whether we incremented the spares count or not + * during this allocation. + */ +BEGIN newpgno +ARG opcode u_int32_t lu +ARG fileid u_int32_t lu +ARG pgno db_pgno_t lu +ARG free_pgno db_pgno_t lu +ARG old_type u_int32_t lu +ARG old_pgno db_pgno_t lu +ARG new_type u_int32_t lu +POINTER pagelsn DB_LSN * lu +POINTER metalsn DB_LSN * lu +END + +/* + * ovfl: initialize a set of overflow pages. + */ +BEGIN ovfl +ARG fileid u_int32_t lu +ARG start_pgno db_pgno_t lu +ARG npages u_int32_t lu +ARG free_pgno db_pgno_t lu +POINTER metalsn DB_LSN * lu +END diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c new file mode 100644 index 0000000..f8ab80c --- /dev/null +++ b/db2/hash/hash_auto.c @@ -0,0 +1,1343 @@ +/* Do not edit: automatically built by dist/db_gen.sh. */ +#include "config.h" + +#ifndef NO_SYSTEM_INCLUDES +#include <ctype.h> +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "db_dispatch.h" +#include "hash.h" +#include "db_am.h" +#include "common_ext.h" + +/* + * PUBLIC: int __ham_insdel_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t, + * PUBLIC: DB_LSN *, DBT *, DBT *)); + */ +int __ham_insdel_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, pgno, ndx, pagelsn, key, + data) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN * pagelsn; + DBT *key; + DBT *data; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_insdel; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(ndx) + + sizeof(*pagelsn) + + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) + + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + memcpy(bp, &ndx, sizeof(ndx)); + bp += sizeof(ndx); + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + if (key == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &key->size, sizeof(key->size)); + bp += sizeof(key->size); + memcpy(bp, key->data, key->size); + bp += key->size; + } + if (data == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &data->size, sizeof(data->size)); + bp += sizeof(data->size); + memcpy(bp, data->data, data->size); + bp += data->size; + } +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_insdel_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_insdel_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_insdel_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_insdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tndx: %lu\n", (u_long)argp->ndx); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\tkey: "); + for (i = 0; i < argp->key.size; i++) { + c = ((char *)argp->key.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tdata: "); + for (i = 0; i < argp->data.size; i++) { + c = ((char *)argp->data.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_insdel_read __P((void *, __ham_insdel_args **)); + */ +int +__ham_insdel_read(recbuf, argpp) + void *recbuf; + __ham_insdel_args **argpp; +{ + __ham_insdel_args *argp; + u_int8_t *bp; + + argp = (__ham_insdel_args *)malloc(sizeof(__ham_insdel_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->ndx, bp, sizeof(argp->ndx)); + bp += sizeof(argp->ndx); + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->key.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->key.data = bp; + bp += argp->key.size; + memcpy(&argp->data.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->data.data = bp; + bp += argp->data.size; + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_newpage_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, + * PUBLIC: db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); + */ +int __ham_newpage_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, prev_pgno, prevlsn, new_pgno, pagelsn, + next_pgno, nextlsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t prev_pgno; + DB_LSN * prevlsn; + db_pgno_t new_pgno; + DB_LSN * pagelsn; + db_pgno_t next_pgno; + DB_LSN * nextlsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_newpage; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(prev_pgno) + + sizeof(*prevlsn) + + sizeof(new_pgno) + + sizeof(*pagelsn) + + sizeof(next_pgno) + + sizeof(*nextlsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &prev_pgno, sizeof(prev_pgno)); + bp += sizeof(prev_pgno); + if (prevlsn != NULL) + memcpy(bp, prevlsn, sizeof(*prevlsn)); + else + memset(bp, 0, sizeof(*prevlsn)); + bp += sizeof(*prevlsn); + memcpy(bp, &new_pgno, sizeof(new_pgno)); + bp += sizeof(new_pgno); + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + memcpy(bp, &next_pgno, sizeof(next_pgno)); + bp += sizeof(next_pgno); + if (nextlsn != NULL) + memcpy(bp, nextlsn, sizeof(*nextlsn)); + else + memset(bp, 0, sizeof(*nextlsn)); + bp += sizeof(*nextlsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_newpage_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_newpage_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_newpage_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_newpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno); + printf("\tprevlsn: [%lu][%lu]\n", + (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); + printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); + printf("\tnextlsn: [%lu][%lu]\n", + (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_newpage_read __P((void *, __ham_newpage_args **)); + */ +int +__ham_newpage_read(recbuf, argpp) + void *recbuf; + __ham_newpage_args **argpp; +{ + __ham_newpage_args *argp; + u_int8_t *bp; + + argp = (__ham_newpage_args *)malloc(sizeof(__ham_newpage_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno)); + bp += sizeof(argp->prev_pgno); + memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn)); + bp += sizeof(argp->prevlsn); + memcpy(&argp->new_pgno, bp, sizeof(argp->new_pgno)); + bp += sizeof(argp->new_pgno); + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno)); + bp += sizeof(argp->next_pgno); + memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); + bp += sizeof(argp->nextlsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_splitmeta_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, u_int32_t, u_int32_t, + * PUBLIC: DB_LSN *)); + */ +int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags, + fileid, bucket, ovflpoint, spares, metalsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + u_int32_t bucket; + u_int32_t ovflpoint; + u_int32_t spares; + DB_LSN * metalsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_splitmeta; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(bucket) + + sizeof(ovflpoint) + + sizeof(spares) + + sizeof(*metalsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &bucket, sizeof(bucket)); + bp += sizeof(bucket); + memcpy(bp, &ovflpoint, sizeof(ovflpoint)); + bp += sizeof(ovflpoint); + memcpy(bp, &spares, sizeof(spares)); + bp += sizeof(spares); + if (metalsn != NULL) + memcpy(bp, metalsn, sizeof(*metalsn)); + else + memset(bp, 0, sizeof(*metalsn)); + bp += sizeof(*metalsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_splitmeta_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_splitmeta_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_splitmeta_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_splitmeta: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tbucket: %lu\n", (u_long)argp->bucket); + printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint); + printf("\tspares: %lu\n", (u_long)argp->spares); + printf("\tmetalsn: [%lu][%lu]\n", + (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **)); + */ +int +__ham_splitmeta_read(recbuf, argpp) + void *recbuf; + __ham_splitmeta_args **argpp; +{ + __ham_splitmeta_args *argp; + u_int8_t *bp; + + argp = (__ham_splitmeta_args *)malloc(sizeof(__ham_splitmeta_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->bucket, bp, sizeof(argp->bucket)); + bp += sizeof(argp->bucket); + memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint)); + bp += sizeof(argp->ovflpoint); + memcpy(&argp->spares, bp, sizeof(argp->spares)); + bp += sizeof(argp->spares); + memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); + bp += sizeof(argp->metalsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_splitdata_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DBT *, + * PUBLIC: DB_LSN *)); + */ +int __ham_splitdata_log(logp, txnid, ret_lsnp, flags, + fileid, opcode, pgno, pageimage, pagelsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + u_int32_t opcode; + db_pgno_t pgno; + DBT *pageimage; + DB_LSN * pagelsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_splitdata; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(opcode) + + sizeof(pgno) + + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) + + sizeof(*pagelsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + if (pageimage == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &pageimage->size, sizeof(pageimage->size)); + bp += sizeof(pageimage->size); + memcpy(bp, pageimage->data, pageimage->size); + bp += pageimage->size; + } + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_splitdata_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_splitdata_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_splitdata_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_splitdata: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tpageimage: "); + for (i = 0; i < argp->pageimage.size; i++) { + c = ((char *)argp->pageimage.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_splitdata_read __P((void *, __ham_splitdata_args **)); + */ +int +__ham_splitdata_read(recbuf, argpp) + void *recbuf; + __ham_splitdata_args **argpp; +{ + __ham_splitdata_args *argp; + u_int8_t *bp; + + argp = (__ham_splitdata_args *)malloc(sizeof(__ham_splitdata_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->pageimage.data = bp; + bp += argp->pageimage.size; + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_replace_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, + * PUBLIC: int32_t, DBT *, DBT *, u_int32_t)); + */ +int __ham_replace_log(logp, txnid, ret_lsnp, flags, + fileid, pgno, ndx, pagelsn, off, olditem, + newitem, makedup) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t pgno; + u_int32_t ndx; + DB_LSN * pagelsn; + int32_t off; + DBT *olditem; + DBT *newitem; + u_int32_t makedup; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t zero; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_replace; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(ndx) + + sizeof(*pagelsn) + + sizeof(off) + + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size) + + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size) + + sizeof(makedup); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + memcpy(bp, &ndx, sizeof(ndx)); + bp += sizeof(ndx); + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + memcpy(bp, &off, sizeof(off)); + bp += sizeof(off); + if (olditem == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &olditem->size, sizeof(olditem->size)); + bp += sizeof(olditem->size); + memcpy(bp, olditem->data, olditem->size); + bp += olditem->size; + } + if (newitem == NULL) { + zero = 0; + memcpy(bp, &zero, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + } else { + memcpy(bp, &newitem->size, sizeof(newitem->size)); + bp += sizeof(newitem->size); + memcpy(bp, newitem->data, newitem->size); + bp += newitem->size; + } + memcpy(bp, &makedup, sizeof(makedup)); + bp += sizeof(makedup); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_replace_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_replace_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_replace_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_replace: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tndx: %lu\n", (u_long)argp->ndx); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\toff: %ld\n", (long)argp->off); + printf("\tolditem: "); + for (i = 0; i < argp->olditem.size; i++) { + c = ((char *)argp->olditem.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tnewitem: "); + for (i = 0; i < argp->newitem.size; i++) { + c = ((char *)argp->newitem.data)[i]; + if (isprint(c) || c == 0xa) + putchar(c); + else + printf("%#x ", c); + } + printf("\n"); + printf("\tmakedup: %lu\n", (u_long)argp->makedup); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_replace_read __P((void *, __ham_replace_args **)); + */ +int +__ham_replace_read(recbuf, argpp) + void *recbuf; + __ham_replace_args **argpp; +{ + __ham_replace_args *argp; + u_int8_t *bp; + + argp = (__ham_replace_args *)malloc(sizeof(__ham_replace_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->ndx, bp, sizeof(argp->ndx)); + bp += sizeof(argp->ndx); + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->off, bp, sizeof(argp->off)); + bp += sizeof(argp->off); + memcpy(&argp->olditem.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->olditem.data = bp; + bp += argp->olditem.size; + memcpy(&argp->newitem.size, bp, sizeof(u_int32_t)); + bp += sizeof(u_int32_t); + argp->newitem.data = bp; + bp += argp->newitem.size; + memcpy(&argp->makedup, bp, sizeof(argp->makedup)); + bp += sizeof(argp->makedup); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_newpgno_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, + * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, + * PUBLIC: DB_LSN *)); + */ +int __ham_newpgno_log(logp, txnid, ret_lsnp, flags, + opcode, fileid, pgno, free_pgno, old_type, old_pgno, + new_type, pagelsn, metalsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t opcode; + u_int32_t fileid; + db_pgno_t pgno; + db_pgno_t free_pgno; + u_int32_t old_type; + db_pgno_t old_pgno; + u_int32_t new_type; + DB_LSN * pagelsn; + DB_LSN * metalsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_newpgno; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(opcode) + + sizeof(fileid) + + sizeof(pgno) + + sizeof(free_pgno) + + sizeof(old_type) + + sizeof(old_pgno) + + sizeof(new_type) + + sizeof(*pagelsn) + + sizeof(*metalsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &opcode, sizeof(opcode)); + bp += sizeof(opcode); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &pgno, sizeof(pgno)); + bp += sizeof(pgno); + memcpy(bp, &free_pgno, sizeof(free_pgno)); + bp += sizeof(free_pgno); + memcpy(bp, &old_type, sizeof(old_type)); + bp += sizeof(old_type); + memcpy(bp, &old_pgno, sizeof(old_pgno)); + bp += sizeof(old_pgno); + memcpy(bp, &new_type, sizeof(new_type)); + bp += sizeof(new_type); + if (pagelsn != NULL) + memcpy(bp, pagelsn, sizeof(*pagelsn)); + else + memset(bp, 0, sizeof(*pagelsn)); + bp += sizeof(*pagelsn); + if (metalsn != NULL) + memcpy(bp, metalsn, sizeof(*metalsn)); + else + memset(bp, 0, sizeof(*metalsn)); + bp += sizeof(*metalsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_newpgno_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_newpgno_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_newpgno_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_newpgno: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\topcode: %lu\n", (u_long)argp->opcode); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tpgno: %lu\n", (u_long)argp->pgno); + printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno); + printf("\told_type: %lu\n", (u_long)argp->old_type); + printf("\told_pgno: %lu\n", (u_long)argp->old_pgno); + printf("\tnew_type: %lu\n", (u_long)argp->new_type); + printf("\tpagelsn: [%lu][%lu]\n", + (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); + printf("\tmetalsn: [%lu][%lu]\n", + (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_newpgno_read __P((void *, __ham_newpgno_args **)); + */ +int +__ham_newpgno_read(recbuf, argpp) + void *recbuf; + __ham_newpgno_args **argpp; +{ + __ham_newpgno_args *argp; + u_int8_t *bp; + + argp = (__ham_newpgno_args *)malloc(sizeof(__ham_newpgno_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->opcode, bp, sizeof(argp->opcode)); + bp += sizeof(argp->opcode); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->pgno, bp, sizeof(argp->pgno)); + bp += sizeof(argp->pgno); + memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno)); + bp += sizeof(argp->free_pgno); + memcpy(&argp->old_type, bp, sizeof(argp->old_type)); + bp += sizeof(argp->old_type); + memcpy(&argp->old_pgno, bp, sizeof(argp->old_pgno)); + bp += sizeof(argp->old_pgno); + memcpy(&argp->new_type, bp, sizeof(argp->new_type)); + bp += sizeof(argp->new_type); + memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); + bp += sizeof(argp->pagelsn); + memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); + bp += sizeof(argp->metalsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_ovfl_log + * PUBLIC: __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, + * PUBLIC: DB_LSN *)); + */ +int __ham_ovfl_log(logp, txnid, ret_lsnp, flags, + fileid, start_pgno, npages, free_pgno, metalsn) + DB_LOG *logp; + DB_TXN *txnid; + DB_LSN *ret_lsnp; + u_int32_t flags; + u_int32_t fileid; + db_pgno_t start_pgno; + u_int32_t npages; + db_pgno_t free_pgno; + DB_LSN * metalsn; +{ + DBT logrec; + DB_LSN *lsnp, null_lsn; + u_int32_t rectype, txn_num; + int ret; + u_int8_t *bp; + + rectype = DB_ham_ovfl; + txn_num = txnid == NULL ? 0 : txnid->txnid; + if (txnid == NULL) { + null_lsn.file = 0; + null_lsn.offset = 0; + lsnp = &null_lsn; + } else + lsnp = &txnid->last_lsn; + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) + + sizeof(fileid) + + sizeof(start_pgno) + + sizeof(npages) + + sizeof(free_pgno) + + sizeof(*metalsn); + if ((logrec.data = (void *)malloc(logrec.size)) == NULL) + return (ENOMEM); + + bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); + bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); + bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(bp, &fileid, sizeof(fileid)); + bp += sizeof(fileid); + memcpy(bp, &start_pgno, sizeof(start_pgno)); + bp += sizeof(start_pgno); + memcpy(bp, &npages, sizeof(npages)); + bp += sizeof(npages); + memcpy(bp, &free_pgno, sizeof(free_pgno)); + bp += sizeof(free_pgno); + if (metalsn != NULL) + memcpy(bp, metalsn, sizeof(*metalsn)); + else + memset(bp, 0, sizeof(*metalsn)); + bp += sizeof(*metalsn); +#ifdef DEBUG + if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size) + fprintf(stderr, "Error in log record length"); +#endif + ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags); + if (txnid != NULL) + txnid->last_lsn = *ret_lsnp; + free(logrec.data); + return (ret); +} + +/* + * PUBLIC: int __ham_ovfl_print + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ + +int +__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4) + DB_LOG *notused1; + DBT *dbtp; + DB_LSN *lsnp; + int notused3; + void *notused4; +{ + __ham_ovfl_args *argp; + u_int32_t i; + int c, ret; + + i = 0; + c = 0; + notused1 = NULL; + notused3 = 0; + notused4 = NULL; + + if((ret = __ham_ovfl_read(dbtp->data, &argp)) != 0) + return (ret); + printf("[%lu][%lu]ham_ovfl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, + (u_long)lsnp->offset, + (u_long)argp->type, + (u_long)argp->txnid->txnid, + (u_long)argp->prev_lsn.file, + (u_long)argp->prev_lsn.offset); + printf("\tfileid: %lu\n", (u_long)argp->fileid); + printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno); + printf("\tnpages: %lu\n", (u_long)argp->npages); + printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno); + printf("\tmetalsn: [%lu][%lu]\n", + (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); + printf("\n"); + free(argp); + return (0); +} + +/* + * PUBLIC: int __ham_ovfl_read __P((void *, __ham_ovfl_args **)); + */ +int +__ham_ovfl_read(recbuf, argpp) + void *recbuf; + __ham_ovfl_args **argpp; +{ + __ham_ovfl_args *argp; + u_int8_t *bp; + + argp = (__ham_ovfl_args *)malloc(sizeof(__ham_ovfl_args) + + sizeof(DB_TXN)); + if (argp == NULL) + return (ENOMEM); + argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; + memcpy(&argp->type, bp, sizeof(argp->type)); + bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); + bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); + bp += sizeof(DB_LSN); + memcpy(&argp->fileid, bp, sizeof(argp->fileid)); + bp += sizeof(argp->fileid); + memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno)); + bp += sizeof(argp->start_pgno); + memcpy(&argp->npages, bp, sizeof(argp->npages)); + bp += sizeof(argp->npages); + memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno)); + bp += sizeof(argp->free_pgno); + memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); + bp += sizeof(argp->metalsn); + *argpp = argp; + return (0); +} + +/* + * PUBLIC: int __ham_init_print __P((DB_ENV *)); + */ +int +__ham_init_print(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __ham_insdel_print, DB_ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_newpage_print, DB_ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_splitmeta_print, DB_ham_splitmeta)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_splitdata_print, DB_ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_replace_print, DB_ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_newpgno_print, DB_ham_newpgno)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_ovfl_print, DB_ham_ovfl)) != 0) + return (ret); + return (0); +} + +/* + * PUBLIC: int __ham_init_recover __P((DB_ENV *)); + */ +int +__ham_init_recover(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, + __ham_insdel_recover, DB_ham_insdel)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_newpage_recover, DB_ham_newpage)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_splitmeta_recover, DB_ham_splitmeta)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_splitdata_recover, DB_ham_splitdata)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_replace_recover, DB_ham_replace)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_newpgno_recover, DB_ham_newpgno)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, + __ham_ovfl_recover, DB_ham_ovfl)) != 0) + return (ret); + return (0); +} + diff --git a/db2/hash/hash_conv.c b/db2/hash/hash_conv.c new file mode 100644 index 0000000..22901af --- /dev/null +++ b/db2/hash/hash_conv.c @@ -0,0 +1,101 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_conv.c 10.3 (Sleepycat) 6/21/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "hash.h" + +/* + * __h_pgin, __ham_pgout -- + * Convert host-specific page layout to/from the host-independent + * format stored on disk. + * + * PUBLIC: int __ham_pgin __P((db_pgno_t, void *, DBT *)); + * PUBLIC: int __ham_pgout __P((db_pgno_t, void *, DBT *)); + */ +int +__ham_pgin(pg, pp, cookie) + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + u_int32_t tpgno; + + pginfo = (DB_PGINFO *)cookie->data; + tpgno = PGNO((PAGE *)pp); + if (pginfo->needswap) + M_32_SWAP(tpgno); + + if (pg != PGNO_METADATA && pg != tpgno) { + P_INIT(pp, pginfo->db_pagesize, + pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + return (0); + } + + if (!pginfo->needswap) + return (0); + return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgin(pg, pp)); +} + +int +__ham_pgout(pg, pp, cookie) + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + DB_PGINFO *pginfo; + + pginfo = (DB_PGINFO *)cookie->data; + if (!pginfo->needswap) + return (0); + return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgout(pg, pp)); +} + +/* + * __ham_mswap -- + * Swap the bytes on the hash metadata page. + * + * PUBLIC: int __ham_mswap __P((void *)); + */ +int +__ham_mswap(pg) + void *pg; +{ + u_int8_t *p; + int i; + + p = (u_int8_t *)pg; + SWAP32(p); /* lsn part 1 */ + SWAP32(p); /* lsn part 2 */ + SWAP32(p); /* pgno */ + SWAP32(p); /* magic */ + SWAP32(p); /* version */ + SWAP32(p); /* pagesize */ + SWAP32(p); /* ovfl_point */ + SWAP32(p); /* last_freed */ + SWAP32(p); /* max_bucket */ + SWAP32(p); /* high_mask */ + SWAP32(p); /* low_mask */ + SWAP32(p); /* ffactor */ + SWAP32(p); /* nelem */ + SWAP32(p); /* h_charkey */ + SWAP32(p); /* flags */ + for (i = 0; i < NCACHED; ++i) + SWAP32(p); /* spares */ + return (0); +} diff --git a/db2/hash/hash_debug.c b/db2/hash/hash_debug.c new file mode 100644 index 0000000..979ddd7 --- /dev/null +++ b/db2/hash/hash_debug.c @@ -0,0 +1,96 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jeremy Rassen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97"; +#endif /* not lint */ + +#ifdef DEBUG +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Debug routines. + * + * ROUTINES: + * + * External + * __dump_bucket + */ +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdio.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "hash.h" + +/* + * __ham_dump_bucket -- + * + * PUBLIC: #ifdef DEBUG + * PUBLIC: void __ham_dump_bucket __P((HTAB *, u_int32_t)); + * PUBLIC: #endif + */ +void +__ham_dump_bucket(hashp, bucket) + HTAB *hashp; + u_int32_t bucket; +{ + PAGE *p; + db_pgno_t pgno; + int ret; + + for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) { + if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0) + break; + (void)__db_prpage(p, 1); + pgno = p->next_pgno; + (void)memp_fput(hashp->dbp->mpf, p, 0); + } +} +#endif /* DEBUG */ diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c new file mode 100644 index 0000000..059eec6 --- /dev/null +++ b/db2/hash/hash_dup.c @@ -0,0 +1,544 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_dup.c 10.5 (Sleepycat) 7/27/97"; +#endif /* not lint */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Manipulation of duplicates for the hash package. + * + * ROUTINES: + * + * External + * __add_dup + * Internal + */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "hash.h" + +static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t)); +static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *)); +static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *)); + +/* + * Called from hash_access to add a duplicate key. nval is the new + * value that we want to add. The flags correspond to the flag values + * to cursor_put indicating where to add the new element. + * There are 4 cases. + * Case 1: The existing duplicate set already resides on a separate page. + * We can use common code for this. + * Case 2: The element is small enough to just be added to the existing set. + * Case 3: The element is large enough to be a big item, so we're going to + * have to push the set onto a new page. + * Case 4: The element is large enough to push the duplicate set onto a + * separate page. + * + * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int)); + */ +int +__ham_add_dup(hashp, hcp, nval, flags) + HTAB *hashp; + HASH_CURSOR *hcp; + DBT *nval; + int flags; +{ + DBT pval, tmp_val; + HKEYDATA *hk; + u_int32_t del_len, new_size; + int ret; + + if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID) + del_len = hcp->dup_len; + else + del_len = 0; + + if ((ret = __ham_check_move(hashp, hcp, + (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0) + return (ret); + + /* + * Check if resulting duplicate set is going to need to go + * onto a separate duplicate page. If so, convert the + * duplicate set and add the new one. After conversion, + * hcp->dndx is the first free ndx or the index of the + * current pointer into the duplicate set. + */ + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep, + hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); + + /* + * We convert to off-page duplicates if the item is a big item, + * the addition of the new item will make the set large, or + * if there isn't enough room on this page to add the next item. + */ + if (hk->type != H_OFFDUP && + (hk->type == H_OFFPAGE || ISBIG(hashp, new_size) || + DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) { + + if ((ret = __ham_dup_convert(hashp, hcp)) != 0) + return (ret); + else + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + } + + /* There are two separate cases here: on page and off page. */ + if (hk->type != H_OFFDUP) { + if (hk->type != H_DUPLICATE) { + hk->type = H_DUPLICATE; + pval.flags = 0; + pval.data = hk->data; + pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, + hcp->bndx); + if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data, + &hcp->big_datalen)) != 0 || + (ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0) + return (ret); + } + + /* Now make the new entry a duplicate. */ + if ((ret = __ham_make_dup(nval, + &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0) + return (ret); + + tmp_val.dlen = 0; + switch (flags) { /* On page. */ + case DB_KEYFIRST: + tmp_val.doff = 0; + break; + case DB_KEYLAST: + tmp_val.doff = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + break; + case DB_CURRENT: + tmp_val.doff = hcp->dup_off; + tmp_val.dlen = DUP_SIZE(hcp->dup_len); + break; + case DB_BEFORE: + tmp_val.doff = hcp->dup_off; + break; + case DB_AFTER: + tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); + break; + } + /* Add the duplicate. */ + ret = __ham_replpair(hashp, hcp, &tmp_val, 0); + if (ret == 0) + ret = __ham_dirty_page(hashp, hcp->pagep); + __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1); + return (ret); + } + + /* If we get here, then we're on duplicate pages. */ + if (hcp->dpgno == PGNO_INVALID) { + memcpy(&hcp->dpgno, + (u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t)); + hcp->dndx = 0; + } + + switch (flags) { + case DB_KEYFIRST: + /* + * The only way that we are already on a dup page is + * if we just converted the on-page representation. + * In that case, we've only got one page of duplicates. + */ + if (hcp->dpagep == NULL && (ret = + __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dndx = 0; + break; + case DB_KEYLAST: + if (hcp->dpagep == NULL && (ret = + __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dpgno = PGNO(hcp->dpagep); + hcp->dndx = NUM_ENT(hcp->dpagep); + break; + case DB_CURRENT: + if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx, + BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len))) + != 0) + return (ret); + break; + case DB_BEFORE: /* The default behavior is correct. */ + break; + case DB_AFTER: + hcp->dndx++; + break; + } + + ret = __db_dput(hashp->dbp, + nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page); + hcp->pgno = PGNO(hcp->pagep); + __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1); + return (ret); +} + +/* + * Convert an on-page set of duplicates to an offpage set of duplicates. + */ +static int +__ham_dup_convert(hashp, hcp) + HTAB *hashp; + HASH_CURSOR *hcp; +{ + BOVERFLOW bo; + DBT dbt; + HOFFPAGE ho; + db_indx_t dndx, len; + int ret; + u_int8_t *p, *pend; + + /* + * Create a new page for the duplicates. + */ + if ((ret = + __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0) + return (ret); + hcp->dpagep->type = P_DUPLICATE; + hcp->dpgno = PGNO(hcp->dpagep); + + /* + * Now put the duplicates onto the new page. + */ + dbt.flags = 0; + switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) { + case H_KEYDATA: + /* Simple case, one key on page; move it to dup page. */ + dndx = 0; + dbt.size = + LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx); + dbt.data = + ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data; + ret = __db_pitem(hashp->dbp, hcp->dpagep, + (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt); + if (ret == 0) + __ham_dirty_page(hashp, hcp->dpagep); + break; + case H_OFFPAGE: + /* Simple case, one key on page; move it to dup page. */ + dndx = 0; + memcpy(&ho, + P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE); + bo.deleted = 0; + bo.type = ho.type; + bo.pgno = ho.pgno; + bo.tlen = ho.tlen; + dbt.size = BOVERFLOW_SIZE; + dbt.data = &bo; + + ret = __db_pitem(hashp->dbp, hcp->dpagep, + (u_int32_t)dndx, dbt.size, &dbt, NULL); + if (ret == 0) + __ham_dirty_page(hashp, hcp->dpagep); + break; + case H_DUPLICATE: + p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data; + pend = p + + LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx); + + for (dndx = 0; p < pend; dndx++) { + memcpy(&len, p, sizeof(db_indx_t)); + dbt.size = len; + p += sizeof(db_indx_t); + dbt.data = p; + p += len + sizeof(db_indx_t); + ret = __db_dput(hashp->dbp, &dbt, + &hcp->dpagep, &dndx, __ham_overflow_page); + if (ret != 0) + break; + } + break; + default: + ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno); + } + if (ret == 0) { + /* + * Now attach this to the source page in place of + * the old duplicate item. + */ + __ham_move_offpage(hashp, hcp->pagep, + (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno); + + /* Can probably just do a "put" here. */ + ret = __ham_dirty_page(hashp, hcp->pagep); + } else { + (void)__ham_del_page(hashp->dbp, hcp->dpagep); + hcp->dpagep = NULL; + } + return (ret); +} + +static int +__ham_make_dup(notdup, dup, bufp, sizep) + const DBT *notdup; + DBT *dup; + void **bufp; + u_int32_t *sizep; +{ + db_indx_t tsize, item_size; + int ret; + u_int8_t *p; + + item_size = (db_indx_t)notdup->size; + tsize = DUP_SIZE(item_size); + if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0) + return (ret); + + dup->dlen = 0; + dup->flags = notdup->flags; + F_SET(dup, DB_DBT_PARTIAL); + + p = dup->data; + memcpy(p, &item_size, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + memcpy(p, notdup->data, notdup->size); + p += notdup->size; + memcpy(p, &item_size, sizeof(db_indx_t)); + + dup->doff = 0; + dup->dlen = notdup->size; + + return (0); +} + +static int +__ham_check_move(hashp, hcp, add_len) + HTAB *hashp; + HASH_CURSOR *hcp; + int32_t add_len; +{ + DBT k, d; + DB_LSN new_lsn; + HKEYDATA *hk; + PAGE *next_pagep; + db_pgno_t next_pgno; + int rectype, ret; + u_int32_t new_datalen, old_len; + + /* + * Check if we can do whatever we need to on this page. If not, + * then we'll have to move the current element to a new page. + */ + + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + + /* + * If the item is already off page duplicates or an offpage item, + * then we know we can do whatever we need to do in-place + */ + if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE) + return (0); + + old_len = + LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); + new_datalen = old_len - HKEYDATA_SIZE(0) + add_len; + + /* + * We need to add a new page under two conditions: + * 1. The addition makes the total data length cross the BIG + * threshold and the OFFDUP structure won't fit on this page. + * 2. The addition does not make the total data cross the + * threshold, but the new data won't fit on the page. + * If neither of these is true, then we can return. + */ + if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE || + HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep))) + return (0); + + if (!ISBIG(hashp, new_datalen) && + add_len <= (int32_t)P_FREESPACE(hcp->pagep)) + return (0); + + /* + * If we get here, then we need to move the item to a new page. + * Check if there are more pages in the chain. + */ + + new_datalen = ISBIG(hashp, new_datalen) ? + HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); + + next_pagep = NULL; + for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID; + next_pgno = NEXT_PGNO(next_pagep)) { + if (next_pagep != NULL && + (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0) + return (ret); + + if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0) + return (ret); + + if (P_FREESPACE(next_pagep) >= new_datalen) + break; + } + + /* No more pages, add one. */ + if (next_pagep == NULL && + (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0) + return (ret); + + /* Add new page at the end of the chain. */ + if (P_FREESPACE(next_pagep) < new_datalen && + (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0) + return (ret); + + /* Copy the item to the new page. */ + if (DB_LOGGING(hashp->dbp)) { + rectype = PUTPAIR; + k.flags = 0; + d.flags = 0; + if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) { + rectype |= PAIR_KEYMASK; + k.data = H_PAIRKEY(hcp->pagep, hcp->bndx); + k.size = HOFFPAGE_SIZE; + } else { + k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data; + k.size = LEN_HKEY(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + } + + if (hk->type == H_OFFPAGE) { + rectype |= PAIR_DATAMASK; + d.data = H_PAIRDATA(hcp->pagep, hcp->bndx); + d.size = HOFFPAGE_SIZE; + } else { + d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data; + d.size = LEN_HDATA(hcp->pagep, + hashp->hdr->pagesize, hcp->bndx); + } + + + if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype, + hashp->dbp->log_fileid, PGNO(next_pagep), + (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep), + &k, &d)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(next_pagep) = new_lsn; /* Structure assignment. */ + } + + __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep); + __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep); + + /* Now delete the pair from the current page. */ + ret = __ham_del_pair(hashp, hcp); + + (void)__ham_put_page(hashp->dbp, hcp->pagep, 1); + hcp->pagep = next_pagep; + hcp->pgno = PGNO(hcp->pagep); + hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1; + F_SET(hcp, H_EXPAND); + return (ret); +} + +/* + * Replace an onpage set of duplicates with the OFFDUP structure that + * references the duplicate page. + * XXX This is really just a special case of __onpage_replace; we should + * probably combine them. + * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t)); + */ +void +__ham_move_offpage(hashp, pagep, ndx, pgno) + HTAB *hashp; + PAGE *pagep; + u_int32_t ndx; + db_pgno_t pgno; +{ + DBT new_dbt; + DBT old_dbt; + HOFFDUP od; + db_indx_t i; + int32_t shrink; + u_int8_t *src; + + od.type = H_OFFDUP; + od.pgno = pgno; + + if (DB_LOGGING(hashp->dbp)) { + new_dbt.data = &od; + new_dbt.size = HOFFDUP_SIZE; + old_dbt.data = P_ENTRY(pagep, ndx); + old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx); + (void)__ham_replace_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0, + hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx, + &LSN(pagep), -1, &old_dbt, &new_dbt, 0); + } + + shrink = + LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE; + + if (shrink != 0) { + /* Copy data. */ + src = (u_int8_t *)(pagep) + HOFFSET(pagep); + memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep)); + HOFFSET(pagep) += shrink; + + /* Update index table. */ + for (i = ndx; i < NUM_ENT(pagep); i++) + pagep->inp[i] += shrink; + } + + /* Now copy the offdup entry onto the page. */ + memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE); +} diff --git a/db2/hash/hash_func.c b/db2/hash/hash_func.c new file mode 100644 index 0000000..2ef47af --- /dev/null +++ b/db2/hash/hash_func.c @@ -0,0 +1,219 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_func.c 10.6 (Sleepycat) 7/26/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "hash.h" + +/* + * __ham_func2 -- + * Phong Vo's linear congruential hash. + * + * PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t)); + */ +#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) + +u_int32_t +__ham_func2(key, len) + const void *key; + u_int32_t len; +{ + const u_int8_t *e, *k; + u_int32_t h; + u_int8_t c; + + k = key; + e = k + len; + for (h = 0; k != e;) { + c = *k++; + if (!c && k > e) + break; + dcharhash(h, c); + } + return (h); +} + +/* + * __ham_func3 -- + * Ozan Yigit's original sdbm hash. + * + * Ugly, but fast. Break the string up into 8 byte units. On the first time + * through the loop get the "leftover bytes" (strlen % 8). On every other + * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this + * saves us 7 cmp & branch instructions. + * + * PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t)); + */ +u_int32_t +__ham_func3(key, len) + const void *key; + u_int32_t len; +{ + const u_int8_t *k; + u_int32_t n, loop; + + if (len == 0) + return (0); + +#define HASHC n = *k++ + 65599 * n + n = 0; + k = key; + + loop = (len + 8 - 1) >> 3; + switch (len & (8 - 1)) { + case 0: + do { + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } + return (n); +} + +/* + * __ham_func4 -- + * Chris Torek's hash function. Although this function performs only + * slightly worse than __ham_func5 on strings, it performs horribly on + * numbers. + * + * PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t)); + */ +u_int32_t +__ham_func4(key, len) + const void *key; + u_int32_t len; +{ + const u_int8_t *k; + u_int32_t h, loop; + + if (len == 0) + return (0); + +#define HASH4a h = (h << 5) - h + *k++; +#define HASH4b h = (h << 5) + h + *k++; +#define HASH4 HASH4b + h = 0; + k = key; + + loop = (len + 8 - 1) >> 3; + switch (len & (8 - 1)) { + case 0: + do { + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } + return (h); +} + +/* + * Fowler/Noll/Vo hash + * + * The basis of the hash algorithm was taken from an idea sent by email to the + * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and + * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) + * later improved on their algorithm. + * + * The magic is in the interesting relationship between the special prime + * 16777619 (2^24 + 403) and 2^32 and 2^8. + * + * This hash produces the fewest collisions of any function that we've seen so + * far, and works well on both numbers and strings. + * + * PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t)); + */ +u_int32_t +__ham_func5(key, len) + const void *key; + u_int32_t len; +{ + const u_int8_t *k, *e; + u_int32_t h; + + k = key; + e = k + len; + for (h = 0; k < e; ++k) { + h *= 16777619; + h ^= *k; + } + return (h); +} diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c new file mode 100644 index 0000000..68c31b1 --- /dev/null +++ b/db2/hash/hash_page.c @@ -0,0 +1,1775 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_page.c 10.18 (Sleepycat) 8/21/97"; +#endif /* not lint */ + + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Page manipulation for hashing package. + * + * ROUTINES: + * + * External + * __get_page + * __add_ovflpage + * __overflow_page + * Internal + * open_temp + */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_swap.h" +#include "hash.h" + +static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t)); + +#ifdef DEBUG_SLOW +static void account_page(HTAB *, db_pgno_t, int); +#endif + +/* + * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_item(hashp, cursorp, mode) + HTAB *hashp; + HASH_CURSOR *cursorp; + db_lockmode_t mode; +{ + db_pgno_t next_pgno; + int ret; + + if (F_ISSET(cursorp, H_DELETED)) + return (EINVAL); + F_CLR(cursorp, H_OK | H_NOMORE); + + /* Check if we need to get a page for this cursor. */ + if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0) + return (ret); + + /* Check if we are looking for space in which to insert an item. */ + if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID + && cursorp->seek_size < P_FREESPACE(cursorp->pagep)) + cursorp->seek_found_page = cursorp->pgno; + + /* Check if we need to go on to the next page. */ + if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID) + /* + * ISDUP is set, and offset is at the beginning of the datum. + * We need to grab the length of the datum, then set the datum + * pointer to be the beginning of the datum. + */ + memcpy(&cursorp->dup_len, + H_PAIRDATA(cursorp->pagep, cursorp->bndx)->data + + cursorp->dup_off, sizeof(db_indx_t)); + else if (F_ISSET(cursorp, H_ISDUP)) { + /* Make sure we're not about to run off the page. */ + if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp, + cursorp->dpgno, &cursorp->dpagep)) != 0) + return (ret); + + if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) { + if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) { + if ((ret = __ham_put_page(hashp->dbp, + cursorp->dpagep, 0)) != 0) + return (ret); + F_CLR(cursorp, H_ISDUP); + cursorp->dpagep = NULL; + cursorp->dpgno = PGNO_INVALID; + cursorp->dndx = NDX_INVALID; + cursorp->bndx++; + } else if ((ret = __ham_next_cpage(hashp, cursorp, + NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0) + return (ret); + } + } + + if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) { + /* Fetch next page. */ + if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) { + F_SET(cursorp, H_NOMORE); + if (cursorp->dpagep != NULL && + (ret = __ham_put_page(hashp->dbp, + cursorp->dpagep, 0)) != 0) + return (ret); + cursorp->dpgno = PGNO_INVALID; + return (DB_NOTFOUND); + } + next_pgno = NEXT_PGNO(cursorp->pagep); + cursorp->bndx = 0; + if ((ret = __ham_next_cpage(hashp, + cursorp, next_pgno, 0, 0)) != 0) + return (ret); + } + + F_SET(cursorp, H_OK); + return (0); +} + +/* + * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *)); + */ +int +__ham_item_reset(hashp, cursorp) + HTAB *hashp; + HASH_CURSOR *cursorp; +{ + int ret; + + if (cursorp->pagep) + ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0); + else + ret = 0; + + __ham_item_init(cursorp); + return (ret); +} + +/* + * PUBLIC: void __ham_item_init __P((HASH_CURSOR *)); + */ +void +__ham_item_init(cursorp) + HASH_CURSOR *cursorp; +{ + cursorp->pagep = NULL; + cursorp->bucket = BUCKET_INVALID; + cursorp->lock = 0; + cursorp->bndx = NDX_INVALID; + cursorp->pgno = PGNO_INVALID; + cursorp->dpgno = PGNO_INVALID; + cursorp->dndx = NDX_INVALID; + cursorp->dpagep = NULL; + cursorp->flags = 0; + cursorp->seek_size = 0; + cursorp->seek_found_page = PGNO_INVALID; +} + +/* + * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int)); + */ +int +__ham_item_done(hashp, cursorp, dirty) + HTAB *hashp; + HASH_CURSOR *cursorp; + int dirty; +{ + int ret, t_ret; + + t_ret = ret = 0; + + if (cursorp->pagep) + ret = __ham_put_page(hashp->dbp, cursorp->pagep, + dirty && cursorp->dpagep == NULL); + cursorp->pagep = NULL; + + if (cursorp->dpagep) + t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty); + cursorp->dpagep = NULL; + + if (ret == 0 && t_ret != 0) + ret = t_ret; + + /* + * If we are running with transactions, then we must + * not relinquish locks explicitly. + */ + if (cursorp->lock && hashp->dbp->txn == NULL) + t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock); + cursorp->lock = 0; + + + /* + * We don't throw out the page number since we might want to + * continue getting on this page. + */ + return (ret != 0 ? ret : t_ret); +} + +/* + * Returns the last item in a bucket. + * + * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_item_last(hashp, cursorp, mode) + HTAB *hashp; + HASH_CURSOR *cursorp; + db_lockmode_t mode; +{ + int ret; + + if ((ret = __ham_item_reset(hashp, cursorp)) != 0) + return (ret); + + cursorp->bucket = hashp->hdr->max_bucket; + F_SET(cursorp, H_OK); + return (__ham_item_prev(hashp, cursorp, mode)); +} +/* + * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_item_first(hashp, cursorp, mode) + HTAB *hashp; + HASH_CURSOR *cursorp; + db_lockmode_t mode; +{ + int ret; + + if ((ret = __ham_item_reset(hashp, cursorp)) != 0) + return (ret); + F_SET(cursorp, H_OK); + cursorp->bucket = 0; + return (__ham_item_next(hashp, cursorp, mode)); +} + +/* + * Returns a pointer to key/data pair on a page. In the case of bigkeys, + * just returns the page number and index of the bigkey pointer pair. + * + * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_item_prev(hashp, cursorp, mode) + HTAB *hashp; + HASH_CURSOR *cursorp; + db_lockmode_t mode; +{ + db_pgno_t next_pgno; + int ret; + + /* + * There are N cases for backing up in a hash file. + * Case 1: In the middle of a page, no duplicates, just dec the index. + * Case 2: In the middle of a duplicate set, back up one. + * Case 3: At the beginning of a duplicate set, get out of set and + * back up to next key. + * Case 4: At the beginning of a page; go to previous page. + * Case 5: At the beginning of a bucket; go to prev bucket. + */ + F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED); + + /* + * First handle the duplicates. Either you'll get the key here + * or you'll exit the duplicate set and drop into the code below + * to handle backing up through keys. + */ + if (F_ISSET(cursorp, H_ISDUP)) { + if (cursorp->dpgno == PGNO_INVALID) { + /* Duplicates are on-page. */ + if (cursorp->dup_off != 0) + if ((ret = __ham_get_cpage(hashp, + cursorp, mode)) != 0) + return (ret); + else { + HASH_CURSOR *h; + h = cursorp; + memcpy(&h->dup_len, + H_PAIRDATA(h->pagep, h->bndx)->data + + h->dup_off - sizeof(db_indx_t), + sizeof(db_indx_t)); + cursorp->dup_off -= + DUP_SIZE(cursorp->dup_len); + cursorp->dndx--; + return (__ham_item(hashp, + cursorp, mode)); + } + } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */ + cursorp->dndx--; + return (__ham_item(hashp, cursorp, mode)); + } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0) + return (ret); + else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) { + F_CLR(cursorp, H_ISDUP); /* End of dups */ + cursorp->dpgno = PGNO_INVALID; + if (cursorp->dpagep != NULL) + (void)__ham_put_page(hashp->dbp, + cursorp->dpagep, 0); + cursorp->dpagep = NULL; + } else if ((ret = __ham_next_cpage(hashp, cursorp, + PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0) + return (ret); + else { + cursorp->dndx = NUM_ENT(cursorp->pagep) - 1; + return (__ham_item(hashp, cursorp, mode)); + } + } + + /* + * If we get here, we are not in a duplicate set, and just need + * to back up the cursor. There are still three cases: + * midpage, beginning of page, beginning of bucket. + */ + + if (cursorp->bndx == 0) { /* Beginning of page. */ + if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0) + return (ret); + cursorp->pgno = PREV_PGNO(cursorp->pagep); + if (cursorp->pgno == PGNO_INVALID) { + /* Beginning of bucket. */ + F_SET(cursorp, H_NOMORE); + return (DB_NOTFOUND); + } else if ((ret = __ham_next_cpage(hashp, + cursorp, cursorp->pgno, 0, 0)) != 0) + return (ret); + else + cursorp->bndx = H_NUMPAIRS(cursorp->pagep); + } + + /* + * Either we've got the cursor set up to be decremented, or we + * have to find the end of a bucket. + */ + if (cursorp->bndx == NDX_INVALID) { + if (cursorp->pagep == NULL) + next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket); + else + goto got_page; + + do { + if ((ret = __ham_next_cpage(hashp, + cursorp, next_pgno, 0, 0)) != 0) + return (ret); +got_page: next_pgno = NEXT_PGNO(cursorp->pagep); + cursorp->bndx = H_NUMPAIRS(cursorp->pagep); + } while (next_pgno != PGNO_INVALID); + + if (cursorp->bndx == 0) { + /* Bucket was empty. */ + F_SET(cursorp, H_NOMORE); + return (DB_NOTFOUND); + } + } + + cursorp->bndx--; + + return (__ham_item(hashp, cursorp, mode)); +} + +/* + * Sets the cursor to the next key/data pair on a page. + * + * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_item_next(hashp, cursorp, mode) + HTAB *hashp; + HASH_CURSOR *cursorp; + db_lockmode_t mode; +{ + /* + * Deleted on-page duplicates are a weird case. If we delete the last + * one, then our cursor is at the very end of a duplicate set and + * we actually need to go on to the next key. + */ + if (F_ISSET(cursorp, H_DELETED)) { + if (cursorp->bndx != NDX_INVALID && + F_ISSET(cursorp, H_ISDUP) && + cursorp->dpgno == PGNO_INVALID && + cursorp->dup_tlen == cursorp->dup_off) { + F_CLR(cursorp, H_ISDUP); + cursorp->dpgno = PGNO_INVALID; + cursorp->bndx++; + } + F_CLR(cursorp, H_DELETED); + } else if (cursorp->bndx == NDX_INVALID) { + cursorp->bndx = 0; + cursorp->dpgno = PGNO_INVALID; + F_CLR(cursorp, H_ISDUP); + } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID) + cursorp->dndx++; + else if (F_ISSET(cursorp, H_ISDUP)) { + cursorp->dndx++; + cursorp->dup_off += DUP_SIZE(cursorp->dup_len); + if (cursorp->dup_off >= cursorp->dup_tlen) { + F_CLR(cursorp, H_ISDUP); + cursorp->dpgno = PGNO_INVALID; + cursorp->bndx++; + } + } else + cursorp->bndx++; + + return (__ham_item(hashp, cursorp, mode)); +} + +/* + * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int)); + * + * This is a little bit sleazy in that we're overloading the meaning + * of the H_OFFPAGE type here. When we recover deletes, we have the + * entire entry instead of having only the DBT, so we'll pass type + * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing + * an H_KEYDATA around it. + */ +void +__ham_putitem(p, dbt, type) + PAGE *p; + const DBT *dbt; + int type; +{ + u_int16_t n, off; + + n = NUM_ENT(p); + + /* Put the item element on the page. */ + if (type == H_OFFPAGE) { + off = HOFFSET(p) - dbt->size; + HOFFSET(p) = p->inp[n] = off; + memcpy(P_ENTRY(p, n), dbt->data, dbt->size); + } else { + off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size); + HOFFSET(p) = p->inp[n] = off; + PUT_HKEYDATA(GET_HKEYDATA(p, n), dbt->data, dbt->size, type); + } + + /* Adjust page info. */ + NUM_ENT(p) += 1; +} + + +/* + * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *)); + * XXX TODO: if the item is an offdup, delete the other pages and + * then remove the pair. If the offpage page is 0, then you can + * just remove the pair. + */ +int +__ham_del_pair(hashp, cursorp) + HTAB *hashp; + HASH_CURSOR *cursorp; +{ + DBT data_dbt, key_dbt; + DB_ENV *dbenv; + DB_LSN new_lsn, *n_lsn; + PAGE *p; + db_indx_t ndx; + db_pgno_t chg_pgno, pgno; + int ret, tret; + + dbenv = hashp->dbp->dbenv; + ndx = cursorp->bndx; + if (cursorp->pagep == NULL && (ret = + __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0) + return (ret); + + p = cursorp->pagep; + + /* + * We optimize for the normal case which is when neither the key nor + * the data are large. In this case, we write a single log record + * and do the delete. If either is large, we'll call __big_delete + * to remove the big item and then update the page to remove the + * entry referring to the big item. + */ + ret = 0; + if (H_PAIRKEY(p, ndx)->type == H_OFFPAGE) { + memcpy(&pgno, (u_int8_t *)GET_HOFFPAGE(p, H_KEYINDEX(ndx)) + + SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t)); + ret = __db_doff(hashp->dbp, pgno, __ham_del_page); + } + + if (ret == 0) + switch (H_PAIRDATA(p, ndx)->type) { + case H_OFFPAGE: + memcpy(&pgno, + (u_int8_t *)GET_HOFFPAGE(p, H_DATAINDEX(ndx)) + + SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t)); + ret = __db_doff(hashp->dbp, pgno, __ham_del_page); + break; + case H_OFFDUP: + memcpy(&pgno, + (u_int8_t *)GET_HOFFDUP(p, H_DATAINDEX(ndx)) + + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t)); + ret = __db_ddup(hashp->dbp, pgno, __ham_del_page); + break; + } + + if (ret) + return (ret); + + /* Now log the delete off this page. */ + if (DB_LOGGING(hashp->dbp)) { + key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx)); + key_dbt.size = + LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx)); + data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx)); + data_dbt.size = + LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx)); + + if ((ret = __ham_insdel_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR, + hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx, + &LSN(p), &key_dbt, &data_dbt)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(p) = new_lsn; + } + + __ham_dpair(hashp->dbp, p, ndx); + + /* + * If we are locking, we will not maintain this. + * XXXX perhaps we can retain incremental numbers and apply them + * later. + */ + if (!F_ISSET(hashp->dbp, DB_AM_LOCKING)) + --hashp->hdr->nelem; + + /* + * Check if the page is empty. There are two cases. If it's + * empty and it's not the first chain in the bucket (i.e., the + * bucket page) then we can simply remove it. If it is the first + * chain in the bucket, then we need to copy the second page into + * it and remove the second page. + */ + if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID && + NEXT_PGNO(p) != PGNO_INVALID) { + PAGE *n_pagep, *nn_pagep; + db_pgno_t tmp_pgno; + + /* + * First page in chain is empty and we know that there + * are more pages in the chain. + * XXX Need to log this. + */ + if ((ret = + __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0) + return (ret); + + if (NEXT_PGNO(n_pagep) != PGNO_INVALID) { + if ((ret = + __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep), + &nn_pagep)) != 0) { + (void) __ham_put_page(hashp->dbp, n_pagep, 0); + return (ret); + } + PREV_PGNO(nn_pagep) = PGNO(p); + (void)__ham_put_page(hashp->dbp, nn_pagep, 1); + } + + tmp_pgno = PGNO(p); + memcpy(p, n_pagep, hashp->hdr->pagesize); + PGNO(p) = tmp_pgno; + PREV_PGNO(p) = PGNO_INVALID; + + /* + * Cursor is advanced to the beginning of the next page. + */ + cursorp->bndx = NDX_INVALID; + cursorp->pgno = PGNO(p); + chg_pgno = PGNO(p); + if ((ret = __ham_dirty_page(hashp, p)) != 0 || + (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0) + return (ret); + } else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) { + PAGE *n_pagep, *p_pagep; + + if ((ret = + __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0) + return (ret); + + if (NEXT_PGNO(p) != PGNO_INVALID) { + if ((ret = __ham_get_page(hashp->dbp, + NEXT_PGNO(p), &n_pagep)) != 0) { + (void)__ham_put_page(hashp->dbp, p_pagep, 0); + return (ret); + } + n_lsn = &LSN(n_pagep); + } else { + n_pagep = NULL; + n_lsn = NULL; + } + + NEXT_PGNO(p_pagep) = NEXT_PGNO(p); + if (n_pagep != NULL) + PREV_PGNO(n_pagep) = PGNO(p_pagep); + + if (DB_LOGGING(hashp->dbp)) { + if ((ret = __ham_newpage_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL, + hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep), + PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(p_pagep) = new_lsn; /* Structure assignment. */ + if (n_pagep) + LSN(n_pagep) = new_lsn; + LSN(p) = new_lsn; + } + cursorp->pgno = NEXT_PGNO(p); + cursorp->bndx = 0; + /* + * Since we are about to delete the cursor page and we have + * just moved the cursor, we need to make sure that the + * old page pointer isn't left hanging around in the cursor. + */ + cursorp->pagep = NULL; + chg_pgno = PGNO(p); + ret = __ham_del_page(hashp->dbp, p); + if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 && + ret == 0) + ret = tret; + if (n_pagep != NULL && + (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 && + ret == 0) + ret = tret; + if (ret != 0) + return (ret); + } else { + /* + * Mark item deleted so that we don't try to return it, and + * so that we update the cursor correctly on the next call + * to next. + */ + F_SET(cursorp, H_DELETED); + chg_pgno = cursorp->pgno; + ret = __ham_dirty_page(hashp, p); + } + __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0); + + F_CLR(cursorp, H_OK); + return (ret); +} +/* + * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t)); + * Given the key data indicated by the cursor, replace part/all of it + * according to the fields in the dbt. + */ +int +__ham_replpair(hashp, hcp, dbt, make_dup) + HTAB *hashp; + HASH_CURSOR *hcp; + DBT *dbt; + u_int32_t make_dup; +{ + DBT old_dbt, tmp; + DB_LSN new_lsn; + HKEYDATA *hk; + u_int32_t len; + int32_t change; + int is_big, ret, type; + u_int8_t *beg, *dest, *end, *src; + + /* + * Big item replacements are handled in generic code. + * Items that fit on the current page fall into 4 classes. + * 1. On-page element, same size + * 2. On-page element, new is bigger (fits) + * 3. On-page element, new is bigger (does not fit) + * 4. On-page element, old is bigger + * Numbers 1, 2, and 4 are essentially the same (and should + * be the common case). We handle case 3 as a delete and + * add. + */ + + /* + * We need to compute the number of bytes that we are adding or + * removing from the entry. Normally, we can simply substract + * the number of bytes we are replacing (dbt->dlen) from the + * number of bytes we are inserting (dbt->size). However, if + * we are doing a partial put off the end of a record, then this + * formula doesn't work, because we are essentially adding + * new bytes. + */ + change = dbt->size - dbt->dlen; + + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + is_big = hk->type == H_OFFPAGE; + + if (is_big) + memcpy(&len, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen), + sizeof(u_int32_t)); + else + len = LEN_HKEYDATA(hcp->pagep, + hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx)); + + if (dbt->doff + dbt->dlen > len) + change += dbt->doff + dbt->dlen - len; + + + if (change > (int)P_FREESPACE(hcp->pagep) || is_big) { + /* + * Case 3 -- two subcases. + * A. This is not really a partial operation, but an overwrite. + * Simple del and add works. + * B. This is a partial and we need to construct the data that + * we are really inserting (yuck). + * In both cases, we need to grab the key off the page (in + * some cases we could do this outside of this routine; for + * cleanliness we do it here. If you happen to be on a big + * key, this could be a performance hit). + */ + tmp.flags = 0; + F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL); + if ((ret = + __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx), + &tmp, &hcp->big_key, &hcp->big_keylen)) != 0) + return (ret); + + type = hk->type; + if (dbt->doff == 0 && dbt->dlen == len) { + ret = __ham_del_pair(hashp, hcp); + if (ret == 0) + ret = __ham_add_el(hashp, hcp, &tmp, dbt, type); + } else { /* Case B */ + DBT tdata; + tdata.flags = 0; + F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL); + + if ((ret = __db_ret(hashp->dbp, hcp->pagep, + H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data, + &hcp->big_datalen)) != 0) + goto err; + + /* Now we can delete the item. */ + if ((ret = __ham_del_pair(hashp, hcp)) != 0) { + free(tdata.data); + goto err; + } + + /* Now shift old data around to make room for new. */ + if (change > 0) { + tdata.data = (void *) + realloc(tdata.data, tdata.size + change); + memset((u_int8_t *)tdata.data + tdata.size, + 0, change); + } + if (tdata.data == NULL) + return (ENOMEM); + end = (u_int8_t *)tdata.data + tdata.size; + + src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen; + if (src < end && tdata.size > dbt->doff + dbt->dlen) { + len = tdata.size - dbt->doff - dbt->dlen; + dest = src + change; + memmove(dest, src, len); + } + memcpy((u_int8_t *)tdata.data + dbt->doff, + dbt->data, dbt->size); + tdata.size += change; + + /* Now add the pair. */ + ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type); + free(tdata.data); + } +err: free(tmp.data); + return (ret); + } + + /* + * Set up pointer into existing data. Do it before the log + * message so we can use it inside of the log setup. + */ + beg = H_PAIRDATA(hcp->pagep, hcp->bndx)->data; + beg += dbt->doff; + + /* + * If we are going to have to move bytes at all, figure out + * all the parameters here. Then log the call before moving + * anything around. + */ + if (DB_LOGGING(hashp->dbp)) { + old_dbt.data = beg; + old_dbt.size = dbt->dlen; + if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, PGNO(hcp->pagep), + (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep), + (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0) + return (ret); + + LSN(hcp->pagep) = new_lsn; /* Structure assignment. */ + } + + __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize, + (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt); + + return (0); +} + +/* + * Replace data on a page with new data, possibly growing or shrinking what's + * there. This is called on two different occasions. On one (from replpair) + * we are interested in changing only the data. On the other (from recovery) + * we are replacing the entire data (header and all) with a new element. In + * the latter case, the off argument is negative. + * pagep: the page that we're changing + * ndx: page index of the element that is growing/shrinking. + * off: Offset at which we are beginning the replacement. + * change: the number of bytes (+ or -) that the element is growing/shrinking. + * dbt: the new data that gets written at beg. + * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, + * PUBLIC: int32_t, DBT *)); + */ +void +__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt) + PAGE *pagep; + size_t pgsize; + u_int32_t ndx; + int32_t off; + int32_t change; + DBT *dbt; +{ + db_indx_t i; + int32_t len; + u_int8_t *src, *dest; + int zero_me; + + if (change != 0) { + zero_me = 0; + src = (u_int8_t *)(pagep) + HOFFSET(pagep); + if (off < 0) + len = pagep->inp[ndx] - HOFFSET(pagep); + else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) { + len = GET_HKEYDATA(pagep, ndx)->data + + LEN_HKEYDATA(pagep, pgsize, ndx) - src; + zero_me = 1; + } else + len = (GET_HKEYDATA(pagep, ndx)->data + off) - src; + dest = src - change; + memmove(dest, src, len); + if (zero_me) + memset(dest + len, 0, change); + + /* Now update the indices. */ + for (i = ndx; i < NUM_ENT(pagep); i++) + pagep->inp[i] -= change; + HOFFSET(pagep) -= change; + } + if (off >= 0) + memcpy(GET_HKEYDATA(pagep, ndx)->data + off, + dbt->data, dbt->size); + else + memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size); +} + +/* + * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t)); + */ +int +__ham_split_page(hashp, obucket, nbucket) + HTAB *hashp; + u_int32_t obucket, nbucket; +{ + DBT key, val, page_dbt; + DB_ENV *dbenv; + DB_LSN new_lsn; + PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; + db_indx_t n; + db_pgno_t bucket_pgno, next_pgno; + u_int32_t big_len, len; + int ret, tret; + void *big_buf; + + dbenv = hashp->dbp->dbenv; + temp_pagep = old_pagep = new_pagep = NULL; + + bucket_pgno = BUCKET_TO_PAGE(hashp, obucket); + if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0) + return (ret); + if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH, + &new_pagep)) != 0) + goto err; + + temp_pagep = hashp->split_buf; + memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize); + + if (DB_LOGGING(hashp->dbp)) { + page_dbt.size = hashp->hdr->pagesize; + page_dbt.data = old_pagep; + if ((ret = __ham_splitdata_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep), + &page_dbt, &LSN(old_pagep))) != 0) + goto err; + } + + P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID, + PGNO_INVALID, 0, P_HASH); + + if (DB_LOGGING(hashp->dbp)) + LSN(old_pagep) = new_lsn; /* Structure assignment. */ + + big_len = 0; + big_buf = NULL; + val.flags = key.flags = 0; + while (temp_pagep != NULL) { + for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) { + if ((ret = + __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n), + &key, &big_buf, &big_len)) != 0) + goto err; + + if (__ham_call_hash(hashp, key.data, key.size) + == obucket) + pp = &old_pagep; + else + pp = &new_pagep; + + /* + * Figure out how many bytes we need on the new + * page to store the key/data pair. + */ + + len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize, + H_DATAINDEX(n)) + + LEN_HITEM(temp_pagep, hashp->hdr->pagesize, + H_KEYINDEX(n)) + + 2 * sizeof(db_indx_t); + + if (P_FREESPACE(*pp) < len) { + if (DB_LOGGING(hashp->dbp)) { + page_dbt.size = hashp->hdr->pagesize; + page_dbt.data = *pp; + if ((ret = __ham_splitdata_log( + dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, + &new_lsn, 0, + hashp->dbp->log_fileid, SPLITNEW, + PGNO(*pp), &page_dbt, + &LSN(*pp))) != 0) + goto err; + LSN(*pp) = new_lsn; + } + if ((ret = __ham_add_ovflpage(hashp, + *pp, 1, pp)) != 0) + goto err; + } + __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp); + __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp); + } + next_pgno = NEXT_PGNO(temp_pagep); + + /* Clear temp_page; if it's a link overflow page, free it. */ + if (PGNO(temp_pagep) != bucket_pgno && (ret = + __ham_del_page(hashp->dbp, temp_pagep)) != 0) + goto err; + + if (next_pgno == PGNO_INVALID) + temp_pagep = NULL; + else if ((ret = + __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0) + goto err; + + if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) { + page_dbt.size = hashp->hdr->pagesize; + page_dbt.data = temp_pagep; + if ((ret = __ham_splitdata_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep), + &page_dbt, &LSN(temp_pagep))) != 0) + goto err; + LSN(temp_pagep) = new_lsn; + } + } + if (big_buf != NULL) + free(big_buf); + + /* + * If the original bucket spanned multiple pages, then we've got + * a pointer to a page that used to be on the bucket chain. It + * should be deleted. + */ + if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno && + (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0) + goto err; + + /* + * Write new buckets out. + */ + if (DB_LOGGING(hashp->dbp)) { + page_dbt.size = hashp->hdr->pagesize; + page_dbt.data = old_pagep; + if ((ret = __ham_splitdata_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep), + &page_dbt, &LSN(old_pagep))) != 0) + goto err; + LSN(old_pagep) = new_lsn; + + page_dbt.data = new_pagep; + if ((ret = __ham_splitdata_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, + hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep), + &page_dbt, &LSN(new_pagep))) != 0) + goto err; + LSN(new_pagep) = new_lsn; + } + ret = __ham_put_page(hashp->dbp, old_pagep, 1); + if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 && + ret == 0) + ret = tret; + +err: if (0) { + if (old_pagep != NULL) + (void)__ham_put_page(hashp->dbp, old_pagep, 1); + if (new_pagep != NULL) + (void)__ham_put_page(hashp->dbp, new_pagep, 1); + if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) + (void)__ham_put_page(hashp->dbp, temp_pagep, 1); + } + return (ret); +} + +/* + * Add the given pair to the page. The page in question may already be + * held (i.e. it was already gotten). If it is, then the page is passed + * in via the pagep parameter. On return, pagep will contain the page + * to which we just added something. This allows us to link overflow + * pages and return the new page having correctly put the last page. + * + * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, + * PUBLIC: int)); + */ +int +__ham_add_el(hashp, hcp, key, val, type) + HTAB *hashp; + HASH_CURSOR *hcp; + const DBT *key, *val; + int type; +{ + DBT *pkey, *pdata, key_dbt, data_dbt; + DB_LSN new_lsn; + HOFFPAGE doff, koff; + db_pgno_t next_pgno; + u_int32_t data_size, key_size, pairsize; + int do_expand, is_keybig, is_databig, rectype, ret; + int key_type, data_type; + + do_expand = 0; + + if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp, + hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page : + hcp->pgno, &hcp->pagep)) != 0) + return (ret); + + key_size = HKEYDATA_PSIZE(key->size); + data_size = HKEYDATA_PSIZE(val->size); + is_keybig = ISBIG(hashp, key->size); + is_databig = ISBIG(hashp, val->size); + if (is_keybig) + key_size = HOFFPAGE_PSIZE; + if (is_databig) + data_size = HOFFPAGE_PSIZE; + + pairsize = key_size + data_size; + + /* Advance to first page in chain with room for item. */ + while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) != + PGNO_INVALID) { + /* + * This may not be the end of the chain, but the pair may fit + * anyway. Check if it's a bigpair that fits or a regular + * pair that fits. + */ + if (P_FREESPACE(hcp->pagep) >= pairsize) + break; + next_pgno = NEXT_PGNO(hcp->pagep); + if ((ret = + __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0) + return (ret); + } + + /* + * Check if we need to allocate a new page. + */ + if (P_FREESPACE(hcp->pagep) < pairsize) { + do_expand = 1; + if ((ret = __ham_add_ovflpage(hashp, + hcp->pagep, 1, &hcp->pagep)) != 0) + return (ret); + hcp->pgno = PGNO(hcp->pagep); + } + + /* + * Update cursor. + */ + hcp->bndx = H_NUMPAIRS(hcp->pagep); + F_CLR(hcp, H_DELETED); + if (is_keybig) { + if ((ret = __db_poff(hashp->dbp, + key, &koff.pgno, __ham_overflow_page)) != 0) + return (ret); + koff.type = H_OFFPAGE; + koff.tlen = key->size; + key_dbt.data = &koff; + key_dbt.size = sizeof(koff); + pkey = &key_dbt; + key_type = H_OFFPAGE; + } else { + pkey = (DBT *)key; + key_type = H_KEYDATA; + } + + if (is_databig) { + if ((ret = __db_poff(hashp->dbp, + val, &doff.pgno, __ham_overflow_page)) != 0) + return (ret); + doff.type = H_OFFPAGE; + doff.tlen = val->size; + data_dbt.data = &doff; + data_dbt.size = sizeof(doff); + pdata = &data_dbt; + data_type = H_OFFPAGE; + } else { + pdata = (DBT *)val; + data_type = type; + } + + if (DB_LOGGING(hashp->dbp)) { + rectype = PUTPAIR; + if (is_databig) + rectype |= PAIR_DATAMASK; + if (is_keybig) + rectype |= PAIR_KEYMASK; + + if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype, + hashp->dbp->log_fileid, PGNO(hcp->pagep), + (u_int32_t)H_NUMPAIRS(hcp->pagep), + &LSN(hcp->pagep), pkey, pdata)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(hcp->pagep) = new_lsn; /* Structure assignment. */ + } + + __ham_putitem(hcp->pagep, pkey, key_type); + __ham_putitem(hcp->pagep, pdata, data_type); + + /* + * For splits, we are going to update item_info's page number + * field, so that we can easily return to the same page the + * next time we come in here. For other operations, this shouldn't + * matter, since odds are this is the last thing that happens before + * we return to the user program. + */ + hcp->pgno = PGNO(hcp->pagep); + + /* + * XXX Maybe keep incremental numbers here + */ + if (!F_ISSET(hashp->dbp, DB_AM_LOCKING)) + hashp->hdr->nelem++; + + if (do_expand || (hashp->hdr->ffactor != 0 && + (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor)) + F_SET(hcp, H_EXPAND); + return (0); +} + + +/* + * Special __putitem call used in splitting -- copies one entry to + * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, + * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we + * do not need to do any logging here. + * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *)); + */ +void +__ham_copy_item(hashp, src_page, src_ndx, dest_page) + HTAB *hashp; + PAGE *src_page; + int src_ndx; + PAGE *dest_page; +{ + u_int32_t len; + void *src, *dest; + + /* + * Copy the key and data entries onto this new page. + */ + src = P_ENTRY(src_page, src_ndx); + + /* Set up space on dest. */ + len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx); + HOFFSET(dest_page) -= len; + dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page); + dest = P_ENTRY(dest_page, NUM_ENT(dest_page)); + NUM_ENT(dest_page)++; + + memcpy(dest, src, len); +} + +/* + * + * Returns: + * pointer on success + * NULL on error + * + * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **)); + */ +int +__ham_add_ovflpage(hashp, pagep, release, pp) + HTAB *hashp; + PAGE *pagep; + int release; + PAGE **pp; +{ + DB_ENV *dbenv; + DB_LSN new_lsn; + PAGE *new_pagep; + int ret; + + dbenv = hashp->dbp->dbenv; + + if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0) + return (ret); + + if (DB_LOGGING(hashp->dbp)) { + if ((ret = __ham_newpage_log(dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL, + hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep), + PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) + return (ret); + + /* Move lsn onto page. */ + LSN(pagep) = LSN(new_pagep) = new_lsn; + } + NEXT_PGNO(pagep) = PGNO(new_pagep); + PREV_PGNO(new_pagep) = PGNO(pagep); + + if (release) + ret = __ham_put_page(hashp->dbp, pagep, 1); + + hashp->hash_overflows++; + *pp = new_pagep; + return (ret); +} + + +/* + * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **)); + */ +int +__ham_new_page(hashp, addr, type, pp) + HTAB *hashp; + u_int32_t addr, type; + PAGE **pp; +{ + PAGE *pagep; + int ret; + + if ((ret = memp_fget(hashp->dbp->mpf, + &addr, DB_MPOOL_CREATE, &pagep)) != 0) + return (ret); + +#ifdef DEBUG_SLOW + account_page(hashp, addr, 1); +#endif + /* This should not be necessary because page-in should do it. */ + P_INIT(pagep, + hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type); + + *pp = pagep; + return (0); +} + +/* + * PUBLIC: int __ham_del_page __P((DB *, PAGE *)); + */ +int +__ham_del_page(dbp, pagep) + DB *dbp; + PAGE *pagep; +{ + DB_LSN new_lsn; + HTAB *hashp; + int ret; + + hashp = (HTAB *)dbp->internal; + ret = 0; + DIRTY_META(hashp, ret); + if (ret != 0) { + if (ret != EAGAIN) + __db_err(hashp->dbp->dbenv, + "free_ovflpage: unable to lock meta data page %s\n", + strerror(ret)); + /* + * If we are going to return an error, then we should free + * the page, so it doesn't stay pinned forever. + */ + (void)__ham_put_page(hashp->dbp, pagep, 0); + return (ret); + } + + if (DB_LOGGING(hashp->dbp)) { + if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO, + hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed, + (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID, + &LSN(pagep), &hashp->hdr->lsn)) != 0) + return (ret); + + hashp->hdr->lsn = new_lsn; + LSN(pagep) = new_lsn; + } + +#ifdef DEBUG + { + db_pgno_t __pgno; + DB_LSN __lsn; + __pgno = pagep->pgno; + __lsn = pagep->lsn; + memset(pagep, 0xff, dbp->pgsize); + pagep->pgno = __pgno; + pagep->lsn = __lsn; + } +#endif + TYPE(pagep) = P_INVALID; + NEXT_PGNO(pagep) = hashp->hdr->last_freed; + hashp->hdr->last_freed = PGNO(pagep); + + return (__ham_put_page(hashp->dbp, pagep, 1)); +} + + +/* + * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t)); + */ +int +__ham_put_page(dbp, pagep, is_dirty) + DB *dbp; + PAGE *pagep; + int32_t is_dirty; +{ +#ifdef DEBUG_SLOW + account_page((HTAB *)dbp->cookie, + ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1); +#endif + return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0))); +} + +/* + * __ham_dirty_page -- + * Mark a page dirty. + * + * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *)); + */ +int +__ham_dirty_page(hashp, pagep) + HTAB *hashp; + PAGE *pagep; +{ + return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY)); +} + +/* + * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **)); + */ +int +__ham_get_page(dbp, addr, pagep) + DB *dbp; + db_pgno_t addr; + PAGE **pagep; +{ + int ret; + + ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep); +#ifdef DEBUG_SLOW + if (*pagep != NULL) + account_page((HTAB *)dbp->internal, addr, 1); +#endif + return (ret); +} + +/* + * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **)); + */ +int +__ham_overflow_page(dbp, type, pp) + DB *dbp; + u_int32_t type; + PAGE **pp; +{ + DB_LSN *lsnp, new_lsn; + HTAB *hashp; + PAGE *p; + db_pgno_t new_addr, next_free, newalloc_flag; + u_int32_t offset, splitnum; + int ret; + + hashp = (HTAB *)dbp->internal; + + ret = 0; + DIRTY_META(hashp, ret); + if (ret != 0) + return (ret); + + /* + * This routine is split up into two parts. First we have + * to figure out the address of the new page that we are + * allocating. Then we have to log the allocation. Only + * after the log do we get to complete allocation of the + * new page. + */ + new_addr = hashp->hdr->last_freed; + if (new_addr != PGNO_INVALID) { + if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0) + return (ret); + next_free = NEXT_PGNO(p); + lsnp = &LSN(p); + newalloc_flag = 0; + } else { + splitnum = hashp->hdr->ovfl_point; + hashp->hdr->spares[splitnum]++; + offset = hashp->hdr->spares[splitnum] - + (splitnum ? hashp->hdr->spares[splitnum - 1] : 0); + new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset); + if (new_addr > MAX_PAGES(hashp)) { + __db_err(hashp->dbp->dbenv, "hash: out of file pages"); + hashp->hdr->spares[splitnum]--; + return (ENOMEM); + } + next_free = PGNO_INVALID; + p = NULL; + lsnp = NULL; + newalloc_flag = 1; + } + + if (DB_LOGGING(hashp->dbp)) { + if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info, + (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO, + hashp->dbp->log_fileid, new_addr, next_free, + 0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0) + return (ret); + + hashp->hdr->lsn = new_lsn; + if (lsnp != NULL) + *lsnp = new_lsn; + } + + if (p != NULL) { + /* We just took something off the free list, initialize it. */ + hashp->hdr->last_freed = next_free; + P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID, + PGNO_INVALID, 0, (u_int8_t)type); + } else { + /* Get the new page. */ + if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0) + return (ret); + } + if (DB_LOGGING(hashp->dbp)) + LSN(p) = new_lsn; + + *pp = p; + return (0); +} + +#ifdef DEBUG +/* + * PUBLIC: #ifdef DEBUG + * PUBLIC: int bucket_to_page __P((HTAB *, int)); + * PUBLIC: #endif + */ +int +bucket_to_page(hashp, n) + HTAB *hashp; + int n; +{ + int ret_val; + + ret_val = n + 1; + if (n != 0) + ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1]; + return (ret_val); +} +#endif + + +/* + * Create a bunch of overflow pages at the current split point. + * PUBLIC: void __ham_init_ovflpages __P((HTAB *)); + */ +void +__ham_init_ovflpages(hp) + HTAB *hp; +{ + DB_LSN new_lsn; + PAGE *p; + db_pgno_t last_pgno; + u_int32_t i, numpages; + + numpages = hp->hdr->ovfl_point + 1; + + last_pgno = hp->hdr->last_freed; + if (DB_LOGGING(hp->dbp)) { + (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info, + (DB_TXN *)hp->dbp->txn, &new_lsn, 0, + hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1), + numpages, last_pgno, &hp->hdr->lsn); + hp->hdr->lsn = new_lsn; + } else + ZERO_LSN(new_lsn); + + hp->hdr->spares[hp->hdr->ovfl_point] += numpages; + for (i = numpages; i > 0; i--) { + if (__ham_new_page(hp, + PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0) + break; + LSN(p) = new_lsn; + NEXT_PGNO(p) = last_pgno; + last_pgno = PGNO(p); + (void)__ham_put_page(hp->dbp, p, 1); + } + hp->hdr->last_freed = last_pgno; +} + +/* + * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t)); + */ +int +__ham_get_cpage(hashp, hcp, mode) + HTAB *hashp; + HASH_CURSOR *hcp; + db_lockmode_t mode; +{ + int ret; + + if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) && + (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0) + return (ret); + + if (hcp->pagep == NULL) { + if (hcp->pgno == PGNO_INVALID) { + hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket); + hcp->bndx = 0; + } + + if ((ret = + __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0) + return (ret); + } + + if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL) + if ((ret = + __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0) + return (ret); + return (0); +} + +/* + * Get a new page at the cursor, putting the last page if necessary. + * If the flag is set to H_ISDUP, then we are talking about the + * duplicate page, not the main page. + * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t, + * PUBLIC: int, int)); + */ +int +__ham_next_cpage(hashp, hcp, pgno, dirty, flags) + HTAB *hashp; + HASH_CURSOR *hcp; + db_pgno_t pgno; + int dirty; + int flags; +{ + PAGE *p; + int ret; + + if (flags & H_ISDUP && hcp->dpagep != NULL && + (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0) + return (ret); + else if (!(flags & H_ISDUP) && hcp->pagep != NULL && + (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0) + return (ret); + + if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0) + return (ret); + + if (flags & H_ISDUP) { + hcp->dpagep = p; + hcp->dpgno = pgno; + hcp->dndx = 0; + } else { + hcp->pagep = p; + hcp->pgno = pgno; + hcp->bndx = 0; + } + + return (0); +} + +/* + * __ham_lock_bucket -- + * Get the lock on a particular bucket. + */ +static int +__ham_lock_bucket(dbp, hcp, mode) + DB *dbp; + HASH_CURSOR *hcp; + db_lockmode_t mode; +{ + int ret; + + /* + * What a way to trounce on the memory system. It might be + * worth copying the lk_info into the hashp. + */ + ret = 0; + dbp->lock.pgno = (db_pgno_t)(hcp->bucket); + ret = lock_get(dbp->dbenv->lk_info, + dbp->txn == NULL ? dbp->locker : dbp->txn->txnid, 0, + &dbp->lock_dbt, mode, &hcp->lock); + + return (ret < 0 ? EAGAIN : ret); +} + +/* + * __ham_dpair -- + * Delete a pair on a page, paying no attention to what the pair + * represents. The caller is responsible for freeing up duplicates + * or offpage entries that might be referenced by this pair. + * + * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); + */ +void +__ham_dpair(dbp, p, pndx) + DB *dbp; + PAGE *p; + u_int32_t pndx; +{ + db_indx_t delta, n; + u_int8_t *dest, *src; + + /* + * Compute "delta", the amount we have to shift all of the + * offsets. To find the delta, we just need to calculate + * the size of the pair of elements we are removing. + */ + delta = H_PAIRSIZE(p, dbp->pgsize, pndx); + + /* + * The hard case: we want to remove something other than + * the last item on the page. We need to shift data and + * offsets down. + */ + if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) { + /* + * Move the data: src is the first occupied byte on + * the page. (Length is delta.) + */ + src = (u_int8_t *)p + HOFFSET(p); + + /* + * Destination is delta bytes beyond src. This might + * be an overlapping copy, so we have to use memmove. + */ + dest = src + delta; + memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p)); + } + + /* Adjust the offsets. */ + for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) { + p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta; + p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta; + } + + /* Adjust page metadata. */ + HOFFSET(p) = HOFFSET(p) + delta; + NUM_ENT(p) = NUM_ENT(p) - 2; +} + +#ifdef DEBUG_SLOW +static void +account_page(hashp, pgno, inout) + HTAB *hashp; + db_pgno_t pgno; + int inout; +{ + static struct { + db_pgno_t pgno; + int times; + } list[100]; + static int last; + int i, j; + + if (inout == -1) /* XXX: Kluge */ + inout = 0; + + /* Find page in list. */ + for (i = 0; i < last; i++) + if (list[i].pgno == pgno) + break; + /* Not found. */ + if (i == last) { + list[last].times = inout; + list[last].pgno = pgno; + last++; + } + list[i].times = inout; + if (list[i].times == 0) { + for (j = i; j < last; j++) + list[j] = list[j + 1]; + last--; + } + for (i = 0; i < last; i++, list[i].times++) + if (list[i].times > 20 && !is_bitmap_pgno(hashp, list[i].pgno)) + (void)fprintf(stderr, + "Warning: pg %lu has been out for %d times\n", + (u_long)list[i].pgno, list[i].times); +} +#endif /* DEBUG_SLOW */ diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c new file mode 100644 index 0000000..81d9bb5 --- /dev/null +++ b/db2/hash/hash_rec.c @@ -0,0 +1,810 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_rec.c 10.12 (Sleepycat) 8/22/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "shqueue.h" +#include "db_page.h" +#include "hash.h" +#include "btree.h" +#include "log.h" +#include "db_dispatch.h" +#include "common_ext.h" + +/* + * __ham_insdel_recover -- + * + * PUBLIC: int __ham_insdel_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_insdel_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_insdel_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + u_int32_t op; + int cmp_n, cmp_p, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_insdel_print); + REC_INTRO(__ham_insdel_read); + + ret = memp_fget(mpf, &argp->pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else if ((ret = memp_fget(mpf, &argp->pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + /* + * Two possible things going on: + * redo a delete/undo a put: delete the item from the page. + * redo a put/undo a delete: add the item to the page. + * If we are undoing a delete, then the information logged is the + * entire entry off the page, not just the data of a dbt. In + * this case, we want to copy it back onto the page verbatim. + * We do this by calling __putitem with the type H_OFFPAGE instead + * of H_KEYDATA. + */ + op = OPCODE_OF(argp->opcode); + + if ((op == DELPAIR && cmp_n == 0 && !redo) || + (op == PUTPAIR && cmp_p == 0 && redo)) { + /* Need to redo a PUT or undo a delete. */ + __ham_putitem(pagep, &argp->key, + !redo || PAIR_ISKEYBIG(argp->opcode) ? + H_OFFPAGE : H_KEYDATA); + __ham_putitem(pagep, &argp->data, + !redo || PAIR_ISDATABIG(argp->opcode) ? + H_OFFPAGE : H_KEYDATA); + + LSN(pagep) = redo ? *lsnp : argp->pagelsn; + if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0) + goto out; + + } else if ((op == DELPAIR && cmp_p == 0 && redo) + || (op == PUTPAIR && cmp_n == 0 && !redo)) { + /* Need to undo a put or redo a delete. */ + __ham_dpair(file_dbp, pagep, argp->ndx); + LSN(pagep) = redo ? *lsnp : argp->pagelsn; + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0) + goto out; + } else + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0) + goto out; + + /* Return the previous LSN. */ + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} + +/* + * __ham_newpage_recover -- + * This log message is used when we add/remove overflow pages. This + * message takes care of the pointer chains, not the data on the pages. + * + * PUBLIC: int __ham_newpage_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_newpage_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_newpage_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + int cmp_n, cmp_p, change, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_newpage_print); + REC_INTRO(__ham_newpage_read); + + ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + ret = 0; + goto ppage; + } else if ((ret = memp_fget(mpf, &argp->new_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + /* + * There are potentially three pages we need to check: the one + * that we created/deleted, the one before it and the one after + * it. + */ + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + change = 0; + + if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) || + (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + P_INIT(pagep, file_dbp->pgsize, argp->new_pgno, + argp->prev_pgno, argp->next_pgno, 0, P_HASH); + change = 1; + } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) || + (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) { + /* + * Redo a delete or undo a create new page. All we + * really need to do is change the LSN. + */ + change = 1; + } + + if (!change) { + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0) + goto out; + } else { + LSN(pagep) = redo ? *lsnp : argp->pagelsn; + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0) + goto out; + } + + /* Now do the prev page. */ +ppage: if (argp->prev_pgno != PGNO_INVALID) { + ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep); + + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. + * That is equivalent to having a pagelsn of 0, + * so we would not have to undo anything. In + * this case, don't bother creating a page. + */ + ret = 0; + goto npage; + } else if ((ret = + memp_fget(mpf, &argp->prev_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->prevlsn); + change = 0; + + if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) || + (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + pagep->next_pgno = argp->new_pgno; + change = 1; + } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) || + (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) { + /* Redo a delete or undo a create new page. */ + pagep->next_pgno = argp->next_pgno; + change = 1; + } + + if (!change) { + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0) + goto out; + } else { + LSN(pagep) = redo ? *lsnp : argp->prevlsn; + if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0) + goto out; + } + } + + /* Now time to do the next page */ +npage: if (argp->next_pgno != PGNO_INVALID) { + ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep); + + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. + * That is equivalent to having a pagelsn of 0, + * so we would not have to undo anything. In + * this case, don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else if ((ret = + memp_fget(mpf, &argp->next_pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->nextlsn); + change = 0; + + if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) || + (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) { + /* Redo a create new page or undo a delete new page. */ + pagep->prev_pgno = argp->new_pgno; + change = 1; + } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) || + (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) { + /* Redo a delete or undo a create new page. */ + pagep->prev_pgno = argp->prev_pgno; + change = 1; + } + + if (!change) { + if ((ret = + __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0) + goto out; + } else { + LSN(pagep) = redo ? *lsnp : argp->nextlsn; + if ((ret = + __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0) + goto out; + } + } + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} + + +/* + * __ham_replace_recover -- + * This log message refers to partial puts that are local to a single + * page. You can think of them as special cases of the more general + * insdel log message. + * + * PUBLIC: int __ham_replace_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_replace_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_replace_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + DBT dbt; + HKEYDATA *hk; + HTAB *hashp; + PAGE *pagep; + int32_t grow; + int change, cmp_n, cmp_p, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_replace_print); + REC_INTRO(__ham_replace_read); + + ret = memp_fget(mpf, &argp->pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else if ((ret = memp_fget(mpf, &argp->pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + if (cmp_p == 0 && redo) { + change = 1; + /* Reapply the change as specified. */ + dbt.data = argp->newitem.data; + dbt.size = argp->newitem.size; + grow = argp->newitem.size - argp->olditem.size; + LSN(pagep) = *lsnp; + } else if (cmp_n == 0 && !redo) { + change = 1; + /* Undo the already applied change. */ + dbt.data = argp->olditem.data; + dbt.size = argp->olditem.size; + grow = argp->olditem.size - argp->newitem.size; + LSN(pagep) = argp->pagelsn; + } else { + change = 0; + grow = 0; + } + + if (change) { + __ham_onpage_replace(pagep, + file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt); + if (argp->makedup) { + hk = GET_HKEYDATA(pagep, argp->ndx); + if (redo) + hk->type = H_DUPLICATE; + else + hk->type = H_KEYDATA; + } + } + + if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0) + goto out; + + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} + +/* + * __ham_newpgno_recover -- + * This log message is used when allocating or deleting an overflow + * page. It takes care of modifying the meta data. + * + * PUBLIC: int __ham_newpgno_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_newpgno_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_newpgno_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + int change, cmp_n, cmp_p, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_newpgno_print); + REC_INTRO(__ham_newpgno_read); + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + /* + * There are two phases to the recovery here. First we need + * to update the meta data; then we need to update the page. + * We'll do the meta-data first. + */ + cmp_n = log_compare(lsnp, &hashp->hdr->lsn); + cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn); + + change = 0; + if ((cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) || + (cmp_n == 0 && !redo && argp->opcode == DELPGNO)) { + /* Need to redo an allocation or undo a deletion. */ + hashp->hdr->last_freed = argp->free_pgno; + if (redo && argp->old_pgno != 0) /* Must be ALLOCPGNO */ + hashp->hdr->spares[hashp->hdr->ovfl_point]++; + change = 1; + } else if (cmp_p == 0 && redo && argp->opcode == DELPGNO) { + /* Need to redo a deletion */ + hashp->hdr->last_freed = argp->pgno; + change = 1; + } else if (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO) { + /* undo an allocation. */ + if (argp->old_pgno == 0) + hashp->hdr->last_freed = argp->pgno; + else { + hashp->hdr->spares[hashp->hdr->ovfl_point]--; + hashp->hdr->last_freed = 0; + } + change = 1; + } + if (change) { + hashp->hdr->lsn = redo ? *lsnp : argp->metalsn; + F_SET(file_dbp, DB_HS_DIRTYMETA); + } + + + /* Now check the newly allocated/freed page. */ + ret = memp_fget(mpf, &argp->pgno, 0, &pagep); + + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else if ((ret = memp_fget(mpf, &argp->pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + change = 0; + if (cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) { + /* Need to redo an allocation. */ + P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, + PGNO_INVALID, 0, argp->new_type); + change = 1; + } else if (cmp_n == 0 && !redo && argp->opcode == DELPGNO) { + /* Undoing a delete. */ + P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, + argp->old_pgno, 0, argp->old_type); + change = 1; + } else if ((cmp_p == 0 && redo && argp->opcode == DELPGNO) || + (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO)) { + /* Need to redo a deletion or undo an allocation. */ + NEXT_PGNO(pagep) = argp->free_pgno; + TYPE(pagep) = P_INVALID; + change = 1; + } + if (change) + LSN(pagep) = redo ? *lsnp : argp->pagelsn; + + if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0) + goto out; + + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; + +} + +/* + * __ham_splitmeta_recover -- + * This is the meta-data part of the split. Records the new and old + * bucket numbers and the new/old mask information. + * + * PUBLIC: int __ham_splitmeta_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_splitmeta_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_splitmeta_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + int change, cmp_n, cmp_p, getmeta, ret; + u_int32_t pow; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_splitmeta_print); + REC_INTRO(__ham_splitmeta_read); + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + /* + * There are two phases to the recovery here. First we need + * to update the meta data; then we need to update the page. + * We'll do the meta-data first. + */ + cmp_n = log_compare(lsnp, &hashp->hdr->lsn); + cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn); + + change = 0; + if (cmp_p == 0 && redo) { + /* Need to redo the split information. */ + hashp->hdr->max_bucket = argp->bucket + 1; + pow = __db_log2(hashp->hdr->max_bucket + 1); + if (pow > hashp->hdr->ovfl_point) { + hashp->hdr->spares[pow] = + hashp->hdr->spares[hashp->hdr->ovfl_point]; + hashp->hdr->ovfl_point = pow; + } + if (hashp->hdr->max_bucket > hashp->hdr->high_mask) { + hashp->hdr->low_mask = hashp->hdr->high_mask; + hashp->hdr->high_mask = + hashp->hdr->max_bucket | hashp->hdr->low_mask; + } + change = 1; + } else if (cmp_n == 0 && !redo) { + /* Need to undo the split information. */ + hashp->hdr->max_bucket = argp->bucket; + hashp->hdr->ovfl_point = argp->ovflpoint; + hashp->hdr->spares[hashp->hdr->ovfl_point] = argp->spares; + pow = 1 << __db_log2(hashp->hdr->max_bucket + 1); + hashp->hdr->high_mask = pow - 1; + hashp->hdr->low_mask = (pow >> 1) - 1; + change = 1; + } + if (change) { + hashp->hdr->lsn = redo ? *lsnp : argp->metalsn; + F_SET(file_dbp, DB_HS_DIRTYMETA); + } + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} + +/* + * __ham_splitdata_recover -- + * + * PUBLIC: int __ham_splitdata_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_splitdata_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_splitdata_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + int change, cmp_n, cmp_p, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_splitdata_print); + REC_INTRO(__ham_splitdata_read); + + ret = memp_fget(mpf, &argp->pgno, 0, &pagep); + if (ret != 0) + if (!redo) { + /* + * We are undoing and the page doesn't exist. That + * is equivalent to having a pagelsn of 0, so we + * would not have to undo anything. In this case, + * don't bother creating a page. + */ + *lsnp = argp->prev_lsn; + ret = 0; + goto out; + } else if ((ret = memp_fget(mpf, &argp->pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + + cmp_n = log_compare(lsnp, &LSN(pagep)); + cmp_p = log_compare(&LSN(pagep), &argp->pagelsn); + + /* + * There are two types of log messages here, one for the old page + * and one for the new pages created. The original image in the + * SPLITOLD record is used for undo. The image in the SPLITNEW + * is used for redo. We should never have a case where there is + * a redo operation and the SPLITOLD record is on disk, but not + * the SPLITNEW record. Therefore, we only have work to do when + * redo NEW messages and undo OLD messages, but we have to update + * LSNs in both cases. + */ + change = 0; + if (cmp_p == 0 && redo) { + if (argp->opcode == SPLITNEW) + /* Need to redo the split described. */ + memcpy(pagep, argp->pageimage.data, + argp->pageimage.size); + LSN(pagep) = *lsnp; + change = 1; + } else if (cmp_n == 0 && !redo) { + if (argp->opcode == SPLITOLD) { + /* Put back the old image. */ + memcpy(pagep, argp->pageimage.data, + argp->pageimage.size); + } else + P_INIT(pagep, file_dbp->pgsize, argp->pgno, + PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + LSN(pagep) = argp->pagelsn; + change = 1; + } + if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0) + goto out; + + *lsnp = argp->prev_lsn; + +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} + +/* + * __ham_ovfl_recover -- + * This message is generated when we initialize a set of overflow pages. + * + * PUBLIC: int __ham_ovfl_recover + * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); + */ +int +__ham_ovfl_recover(logp, dbtp, lsnp, redo, info) + DB_LOG *logp; + DBT *dbtp; + DB_LSN *lsnp; + int redo; + void *info; +{ + __ham_ovfl_args *argp; + DB *mdbp, *file_dbp; + DB_MPOOLFILE *mpf; + HTAB *hashp; + PAGE *pagep; + db_pgno_t max_pgno, pgno; + int cmp_n, cmp_p, getmeta, ret; + + getmeta = 0; + hashp = NULL; /* XXX: shut the compiler up. */ + REC_PRINT(__ham_ovfl_print); + REC_INTRO(__ham_ovfl_read); + + hashp = (HTAB *)file_dbp->internal; + GET_META(file_dbp, hashp); + getmeta = 1; + file_dbp = NULL; + + cmp_n = log_compare(lsnp, &hashp->hdr->lsn); + cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn); + + if (cmp_p == 0 && redo) { + /* Redo the allocation. */ + hashp->hdr->last_freed = argp->start_pgno; + hashp->hdr->spares[argp->npages - 1] += argp->npages; + hashp->hdr->lsn = *lsnp; + F_SET(file_dbp, DB_HS_DIRTYMETA); + } else if (cmp_n == 0 && !redo) { + hashp->hdr->last_freed = argp->free_pgno; + hashp->hdr->spares[argp->npages - 1] -= argp->npages; + hashp->hdr->lsn = argp->metalsn; + F_SET(file_dbp, DB_HS_DIRTYMETA); + } + + max_pgno = argp->start_pgno + argp->npages - 1; + ret = 0; + for (pgno = argp->start_pgno; pgno <= max_pgno; pgno++) { + ret = memp_fget(mpf, &pgno, 0, &pagep); + if (ret != 0) { + if (redo && (ret = memp_fget(mpf, &pgno, + DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + else if (!redo) { + (void)__ham_put_page(file_dbp, pagep, 0); + continue; + } + } + if (redo && log_compare((const DB_LSN *)lsnp, + (const DB_LSN *)&LSN(pagep)) > 0) { + P_INIT(pagep, file_dbp->pgsize, pgno, PGNO_INVALID, + pgno == max_pgno ? argp->free_pgno : pgno + 1, + 0, P_HASH); + LSN(pagep) = *lsnp; + ret = __ham_put_page(file_dbp, pagep, 1); + } else if (!redo) { + ZERO_LSN(pagep->lsn); + ret = __ham_put_page(file_dbp, pagep, 1); + } else + ret = __ham_put_page(file_dbp, pagep, 0); + if (ret) + goto out; + } + + *lsnp = argp->prev_lsn; +out: if (getmeta) + RELEASE_META(file_dbp, hashp); + REC_CLOSE; +} diff --git a/db2/hash/hash_stat.c b/db2/hash/hash_stat.c new file mode 100644 index 0000000..99c6078 --- /dev/null +++ b/db2/hash/hash_stat.c @@ -0,0 +1,58 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997 + * Sleepycat Software. All rights reserved. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash_stat.c 10.6 (Sleepycat) 7/2/97"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "hash.h" +#include "common_ext.h" + +/* + * __ham_stat -- + * Gather/print the hash statistics. + * + * PUBLIC: int __ham_stat __P((DB *, FILE *)); + */ +int +__ham_stat(dbp, fp) + DB *dbp; + FILE *fp; +{ + HTAB *hashp; + int i; + + hashp = (HTAB *)dbp->internal; + + fprintf(fp, "hash: accesses %lu collisions %lu\n", + hashp->hash_accesses, hashp->hash_collisions); + fprintf(fp, "hash: expansions %lu\n", hashp->hash_expansions); + fprintf(fp, "hash: overflows %lu\n", hashp->hash_overflows); + fprintf(fp, "hash: big key/data pages %lu\n", hashp->hash_bigpages); + + SET_LOCKER(dbp, NULL); + GET_META(dbp, hashp); + fprintf(fp, "keys %lu maxp %lu\n", + (u_long)hashp->hdr->nelem, (u_long)hashp->hdr->max_bucket); + + for (i = 0; i < NCACHED; i++) + fprintf(fp, + "spares[%d] = %lu\n", i, (u_long)hashp->hdr->spares[i]); + + RELEASE_META(dbp, hashp); + return (0); +} |