aboutsummaryrefslogtreecommitdiff
path: root/db2/mp
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-06-09 15:16:55 +0000
committerUlrich Drepper <drepper@redhat.com>1998-06-09 15:16:55 +0000
commitbf7997b65c7887d2acda95f5201d818a19d81711 (patch)
treeda3583de3a0b5892f90a4b1eb773a87b554ae37e /db2/mp
parent7646e67e6cc4c738a7b402c60fed39d52db0433b (diff)
downloadglibc-bf7997b65c7887d2acda95f5201d818a19d81711.zip
glibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.gz
glibc-bf7997b65c7887d2acda95f5201d818a19d81711.tar.bz2
Update.
1998-06-09 Ulrich Drepper <drepper@cygnus.com> * sysdeps/unix/sysv/linux/netinet/ip.h (struct ip_options): Define __data member only for gcc. Reported by ak@muc.de. * misc/mntent.h: Undo last patch. * sysdeps/unix/sysv/linux/fstatvfs.c (fstatvfs): Undo last patch. * misc/tst/mntent.c: Adjust code for this change. * io/fts.c: Updated from a slightly more recent BSD version. * io/fts.h: Likewise. * libc.map: Add __libc_stack_end. * db2/Makefile (routines): Add lock_region. * db2/config.h: Update from db-2.4.14. * db2/db.h: Likewise. * db2/db_185.h: Likewise. * db2/db_int.h: Likewise. * db2/bt_close.c: Likewise. * db2/bt_compare.c: Likewise. * db2/bt_conv.c: Likewise. * db2/bt_cursor.c: Likewise. * db2/bt_delete.c: Likewise. * db2/bt_open.c: Likewise. * db2/bt_page.c: Likewise. * db2/bt_put.c: Likewise. * db2/bt_rec.c: Likewise. * db2/bt_recno.c: Likewise. * db2/bt_rsearch.c: Likewise. * db2/bt_search.c: Likewise. * db2/bt_split.c: Likewise. * db2/bt_stat.c: Likewise. * db2/btree.src: Likewise. * db2/btree_auto.c: Likewise. * db2/getlong.c: Likewise. * db2/db_appinit.c: Likewise. * db2/db_apprec.c: Likewise. * db2/db_byteorder.c: Likewise. * db2/db_err.c: Likewise. * db2/db_log2.c: Likewise. * db2/db_region.c: Likewise. * db2/db_salloc.c: Likewise. * db2/db_shash.c: Likewise. * db2/db.c: Likewise. * db2/db.src: Likewise. * db2/db_auto.c: Likewise. * db2/db_conv.c: Likewise. * db2/db_dispatch.c: Likewise. * db2/db_dup.c: Likewise. * db2/db_overflow.c: Likewise. * db2/db_pr.c: Likewise. * db2/db_rec.c: Likewise. * db2/db_ret.c: Likewise. * db2/db_thread.c: Likewise. * db2/db185.c: Likewise. * db2/db185_int.h: Likewise. * db2/dbm.c: Likewise. * db2/hash.c: Likewise. * db2/hash.src: Likewise. * db2/hash_auto.c: Likewise. * db2/hash_conv.c: Likewise. * db2/hash_debug.c: Likewise. * db2/hash_dup.c: Likewise. * db2/hash_func.c: Likewise. * db2/hash_page.c: Likewise. * db2/hash_rec.c: Likewise. * db2/hash_stat.c: Likewise. * db2/btree.h: Likewise. * db2/btree_ext.h: Likewise. * db2/clib_ext.h: Likewise. * db2/common_ext.h: Likewise. * db2/cxx_int.h: Likewise. * db2/db.h.src: Likewise. * db2/db_185.h.src: Likewise. * db2/db_am.h: Likewise. * db2/db_auto.h: Likewise. * db2/db_cxx.h: Likewise. * db2/db_dispatch.h: Likewise. * db2/db_ext.h: Likewise. * db2/db_int.h.src: Likewise. * db2/db_page.h: Likewise. * db2/db_shash.h: Likewise. * db2/db_swap.h: Likewise. * db2/hash.h: Likewise. * db2/hash_ext.h: Likewise. * db2/lock.h: Likewise. * db2/lock_ext.h: Likewise. * db2/log.h: Likewise. * db2/log_ext.h: Likewise. * db2/mp.h: Likewise. * db2/mp_ext.h: Likewise. * db2/mutex_ext.h: Likewise. * db2/os_ext.h: Likewise. * db2/os_func.h: Likewise. * db2/queue.h: Likewise. * db2/shqueue.h: Likewise. * db2/txn.h: Likewise. * db2/lock.c: Likewise. * db2/lock_conflict.c: Likewise. * db2/lock_deadlock.c: Likewise. * db2/lock_region.c: Likewise. * db2/lock_util.c: Likewise. * db2/log.c: Likewise. * db2/log.src: Likewise. * db2/log_archive.c: Likewise. * db2/log_auto.c: Likewise. * db2/log_compare.c: Likewise. * db2/log_findckp.c: Likewise. * db2/log_get.c: Likewise. * db2/log_put.c: Likewise. * db2/log_rec.c: Likewise. * db2/log_register.c: Likewise. * db2/mp_bh.c: Likewise. * db2/mp_fget.c: Likewise. * db2/mp_fopen.c: Likewise. * db2/mp_fput.c: Likewise. * db2/mp_fset.c: Likewise. * db2/mp_open.c: Likewise. * db2/mp_pr.c: Likewise. * db2/mp_region.c: Likewise. * db2/mp_sync.c: Likewise. * db2/68020.gcc: Likewise. * db2/mutex.c: Likewise. * db2/parisc.gcc: Likewise. * db2/parisc.hp: Likewise. * db2/sco.cc: Likewise. * db2/os_abs.c: Likewise. * db2/os_alloc.c: Likewise. * db2/os_config.c: Likewise. * db2/os_dir.c: Likewise. * db2/os_fid.c: Likewise. * db2/os_fsync.c: Likewise. * db2/os_map.c: Likewise. * db2/os_oflags.c: Likewise. * db2/os_open.c: Likewise. * db2/os_rpath.c: Likewise. * db2/os_rw.c: Likewise. * db2/os_seek.c: Likewise. * db2/os_sleep.c: Likewise. * db2/os_spin.c: Likewise. * db2/os_stat.c: Likewise. * db2/os_unlink.c: Likewise. * db2/db_archive.c: Likewise. * db2/db_checkpoint.c: Likewise. * db2/db_deadlock.c: Likewise. * db2/db_dump.c: Likewise. * db2/db_dump185.c: Likewise. * db2/db_load.c: Likewise. * db2/db_printlog.c: Likewise. * db2/db_recover.c: Likewise. * db2/db_stat.c: Likewise. * db2/txn.c: Likewise. * db2/txn.src: Likewise. * db2/txn_auto.c: Likewise. * db2/txn_rec.c: Likewise. * elf/rtld.c: Move definition of __libc_stack_end to ... * sysdeps/generic/dl-sysdep.h: ...here. * sysdeps/unix/sysv/linux/fstatvfs.c: Handle nodiratime option. * sysdeps/unix/sysv/linux/bits/statvfs.h: Define ST_NODIRATIME. * sysdeps/unix/sysv/linux/sys/mount.h: Define MS_NODIRATIME. 1998-06-08 21:44 Ulrich Drepper <drepper@cygnus.com> * sysdeps/unix/sysv/linux/fstatvfs.c: Handle constant option string from mntent correctly. 1998-06-06 Andreas Jaeger <aj@arthur.rhein-neckar.de> * sunrpc/Makefile (generated): Correct typo. 1998-06-04 Philip Blundell <philb@gnu.org> * elf/elf.h (EM_ARM, et al.): New definitions. * sysdeps/arm/dl-machine.h: Update for new draft ARM ELF ABI.
Diffstat (limited to 'db2/mp')
-rw-r--r--db2/mp/mp_bh.c79
-rw-r--r--db2/mp/mp_fget.c359
-rw-r--r--db2/mp/mp_fopen.c128
-rw-r--r--db2/mp/mp_fput.c64
-rw-r--r--db2/mp/mp_fset.c8
-rw-r--r--db2/mp/mp_open.c41
-rw-r--r--db2/mp/mp_pr.c294
-rw-r--r--db2/mp/mp_region.c229
-rw-r--r--db2/mp/mp_sync.c74
9 files changed, 640 insertions, 636 deletions
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
index c23abdd..d89f9c2 100644
--- a/db2/mp/mp_bh.c
+++ b/db2/mp/mp_bh.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_bh.c 10.28 (Sleepycat) 1/8/98";
+static const char sccsid[] = "@(#)mp_bh.c 10.38 (Sleepycat) 5/20/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -59,8 +59,10 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
if (dbmfp->mfp == mfp) {
if (F_ISSET(dbmfp, MP_READONLY) &&
- __memp_upgrade(dbmp, dbmfp, mfp))
+ __memp_upgrade(dbmp, dbmfp, mfp)) {
+ UNLOCKHANDLE(dbmp, dbmp->mutexp);
return (0);
+ }
break;
}
UNLOCKHANDLE(dbmp, dbmp->mutexp);
@@ -111,8 +113,8 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
if (F_ISSET(mfp, MP_TEMP))
return (0);
- if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off), mfp->ftype,
- 0, 0, mfp->stat.st_pagesize, 0, NULL, NULL, 0, &dbmfp) != 0)
+ if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp, mfp->path_off),
+ 0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
return (0);
found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
@@ -152,7 +154,7 @@ __memp_pgread(dbmfp, bhp, can_create)
ret = 0;
LOCKHANDLE(dbmp, dbmfp->mutexp);
if (dbmfp->fd == -1 || (ret =
- __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
+ __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0) {
if (!can_create) {
if (dbmfp->fd == -1)
ret = EINVAL;
@@ -164,8 +166,17 @@ __memp_pgread(dbmfp, bhp, can_create)
}
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- /* Clear any uninitialized data. */
- memset(bhp->buf, 0, pagesize);
+ /* Clear the created page. */
+ if (mfp->clear_len == 0)
+ memset(bhp->buf, 0, pagesize);
+ else {
+ memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+ memset(bhp->buf + mfp->clear_len,
+ 0xff, pagesize - mfp->clear_len);
+#endif
+ }
+
goto pgin;
}
@@ -186,8 +197,16 @@ __memp_pgread(dbmfp, bhp, can_create)
goto err;
}
- /* Clear any uninitialized data. */
- memset(bhp->buf + nr, 0, pagesize - nr);
+ /*
+ * If we didn't fail until we tried the read, don't clear the
+ * whole page, it wouldn't be insane for a filesystem to just
+ * always behave that way. Else, clear any uninitialized data.
+ */
+ if (nr == 0)
+ memset(bhp->buf, 0,
+ mfp->clear_len == 0 ? pagesize : mfp->clear_len);
+ else
+ memset(bhp->buf + nr, 0, pagesize - nr);
}
/* Call any pgin function. */
@@ -308,31 +327,31 @@ __memp_pgwrite(dbmfp, bhp, restartp, wrotep)
/* Temporary files may not yet have been created. */
LOCKHANDLE(dbmp, dbmfp->mutexp);
- if (dbmfp->fd == -1)
- if ((ret = __db_appname(dbenv, DB_APP_TMP,
- NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1) {
- UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- __db_err(dbenv,
- "unable to create temporary backing file");
- goto err;
- }
+ if (dbmfp->fd == -1 &&
+ ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL,
+ DB_CREATE | DB_EXCL | DB_TEMPORARY, &dbmfp->fd, NULL)) != 0 ||
+ dbmfp->fd == -1)) {
+ UNLOCKHANDLE(dbmp, dbmfp->mutexp);
+ __db_err(dbenv, "unable to create temporary backing file");
+ goto err;
+ }
- /* Write the page out. */
- if ((ret = __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
+ /*
+ * Write the page out.
+ *
+ * XXX
+ * Shut the compiler up; it doesn't understand the correlation between
+ * the failing clauses to __db_lseek and __db_write and this ret != 0.
+ */
+ COMPQUIET(fail, NULL);
+ if ((ret =
+ __db_seek(dbmfp->fd, pagesize, bhp->pgno, 0, 0, SEEK_SET)) != 0)
fail = "seek";
else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0)
fail = "write";
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- if (ret != 0) {
- /*
- * XXX
- * Shut the compiler up; it doesn't understand the correlation
- * between the failing clauses to __db_lseek and __db_write and
- * this ret != 0.
- */
- COMPQUIET(fail, NULL);
+ if (ret != 0)
goto syserr;
- }
if (nw != (ssize_t)pagesize) {
ret = EIO;
@@ -548,7 +567,7 @@ __memp_upgrade(dbmp, dbmfp, mfp)
* way we could have gotten a file descriptor of any kind.
*/
if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
- NULL, R_ADDR(dbmp, mfp->path_off), NULL, &rpath)) != 0)
+ NULL, R_ADDR(dbmp, mfp->path_off), 0, NULL, &rpath)) != 0)
return (ret);
if (__db_open(rpath, 0, 0, 0, &fd) != 0) {
F_SET(dbmfp, MP_UPGRADE_FAIL);
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
index f5955c4..c8ae2e9 100644
--- a/db2/mp/mp_fget.c
+++ b/db2/mp/mp_fget.c
@@ -1,21 +1,19 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97";
+static const char sccsid[] = "@(#)mp_fget.c 10.48 (Sleepycat) 6/2/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <sys/stat.h>
#include <errno.h>
-#include <stdlib.h>
#include <string.h>
#endif
@@ -25,8 +23,6 @@ static const char sccsid[] = "@(#)mp_fget.c 10.33 (Sleepycat) 12/2/97";
#include "mp.h"
#include "common_ext.h"
-int __sleep_on_every_page_get; /* XXX: thread debugging option. */
-
/*
* memp_fget --
* Get a page from the file.
@@ -35,7 +31,7 @@ int
memp_fget(dbmfp, pgnoaddr, flags, addrp)
DB_MPOOLFILE *dbmfp;
db_pgno_t *pgnoaddr;
- int flags;
+ u_int32_t flags;
void *addrp;
{
BH *bhp;
@@ -43,11 +39,12 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
MPOOL *mp;
MPOOLFILE *mfp;
size_t bucket, mf_offset;
- u_long cnt;
- int b_incr, b_inserted, readonly_alloc, ret;
- void *addr;
+ u_int32_t st_hsearch;
+ int b_incr, first, ret;
dbmp = dbmfp->dbmp;
+ mp = dbmp->mp;
+ mfp = dbmfp->mfp;
/*
* Validate arguments.
@@ -79,32 +76,62 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
}
}
-#ifdef DEBUG
+#ifdef DIAGNOSTIC
/*
* XXX
* We want to switch threads as often as possible. Sleep every time
* we get a new page to make it more likely.
*/
- if (__sleep_on_every_page_get &&
+ if (DB_GLOBAL(db_pageyield) &&
(__db_yield == NULL || __db_yield() != 0))
__db_sleep(0, 1);
#endif
- mp = dbmp->mp;
- mfp = dbmfp->mfp;
+ /* Initialize remaining local variables. */
mf_offset = R_OFFSET(dbmp, mfp);
- addr = NULL;
bhp = NULL;
- b_incr = b_inserted = ret = 0;
+ st_hsearch = 0;
+ b_incr = ret = 0;
+
+ /* Determine the hash bucket where this page will live. */
+ bucket = BUCKET(mp, mf_offset, *pgnoaddr);
LOCKREGION(dbmp);
/*
- * If mmap'ing the file, just return a pointer. However, if another
- * process has opened the file for writing since we mmap'd it, start
- * playing the game by their rules, i.e. everything goes through the
- * cache. All pages previously returned should be safe, as long as
- * a locking protocol was observed.
+ * Check for the last or last + 1 page requests.
+ *
+ * Examine and update the file's last_pgno value. We don't care if
+ * the last_pgno value immediately changes due to another thread --
+ * at this instant in time, the value is correct. We do increment the
+ * current last_pgno value if the thread is asking for a new page,
+ * however, to ensure that two threads creating pages don't get the
+ * same one.
+ */
+ if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
+ if (LF_ISSET(DB_MPOOL_NEW))
+ ++mfp->last_pgno;
+ *pgnoaddr = mfp->last_pgno;
+ bucket = BUCKET(mp, mf_offset, mfp->last_pgno);
+
+ if (LF_ISSET(DB_MPOOL_NEW))
+ goto alloc;
+ }
+
+ /*
+ * If mmap'ing the file and the page is not past the end of the file,
+ * just return a pointer.
+ *
+ * The page may be past the end of the file, so check the page number
+ * argument against the original length of the file. If we previously
+ * returned pages past the original end of the file, last_pgno will
+ * have been updated to match the "new" end of the file, and checking
+ * against it would return pointers past the end of the mmap'd region.
+ *
+ * If another process has opened the file for writing since we mmap'd
+ * it, we will start playing the game by their rules, i.e. everything
+ * goes through the cache. All pages previously returned will be safe,
+ * as long as the correct locking protocol was observed.
*
* XXX
* We don't discard the map because we don't know when all of the
@@ -112,203 +139,180 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
* It would be possible to do so by reference counting the open
* pages from the mmap, but it's unclear to me that it's worth it.
*/
- if (dbmfp->addr != NULL && F_ISSET(dbmfp->mfp, MP_CAN_MMAP)) {
- readonly_alloc = 0;
- if (LF_ISSET(DB_MPOOL_LAST))
- *pgnoaddr = mfp->last_pgno;
- else {
+ if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP))
+ if (*pgnoaddr > mfp->orig_last_pgno) {
/*
* !!!
- * Allocate a page that can never really exist. See
- * the comment above about non-existent pages and the
- * hash access method.
+ * See the comment above about non-existent pages and
+ * the hash access method.
*/
- if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
- readonly_alloc = 1;
- else if (*pgnoaddr > mfp->last_pgno) {
+ if (!LF_ISSET(DB_MPOOL_CREATE)) {
__db_err(dbmp->dbenv,
"%s: page %lu doesn't exist",
__memp_fn(dbmfp), (u_long)*pgnoaddr);
ret = EINVAL;
goto err;
}
- }
- if (!readonly_alloc) {
- addr = R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
-
+ } else {
+ *(void **)addrp =
+ R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
++mp->stat.st_map;
++mfp->stat.st_map;
+ goto done;
+ }
- goto mapret;
+ /* Search the hash chain for the page. */
+ for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
+ ++st_hsearch;
+ if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
+ continue;
+
+ /* Increment the reference count. */
+ if (bhp->ref == UINT16_T_MAX) {
+ __db_err(dbmp->dbenv,
+ "%s: page %lu: reference count overflow",
+ __memp_fn(dbmfp), (u_long)bhp->pgno);
+ ret = EINVAL;
+ goto err;
}
- }
- /* Check if requesting the last page or a new page. */
- if (LF_ISSET(DB_MPOOL_LAST))
- *pgnoaddr = mfp->last_pgno;
+ /*
+ * Increment the reference count. We may discard the region
+ * lock as we evaluate and/or read the buffer, so we need to
+ * ensure that it doesn't move and that its contents remain
+ * unchanged.
+ */
+ ++bhp->ref;
+ b_incr = 1;
- if (LF_ISSET(DB_MPOOL_NEW)) {
- *pgnoaddr = mfp->last_pgno + 1;
- goto alloc;
- }
+ /*
+ * Any buffer we find might be trouble.
+ *
+ * BH_LOCKED --
+ * I/O is in progress. Because we've incremented the buffer
+ * reference count, we know the buffer can't move. Unlock
+ * the region lock, wait for the I/O to complete, and reacquire
+ * the region.
+ */
+ for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
+ UNLOCKREGION(dbmp);
- /* Check the BH hash bucket queue. */
- bucket = BUCKET(mp, mf_offset, *pgnoaddr);
- for (cnt = 0,
- bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
- ++cnt;
- if (bhp->pgno == *pgnoaddr && bhp->mf_offset == mf_offset) {
- addr = bhp->buf;
- ++mp->stat.st_hash_searches;
- if (cnt > mp->stat.st_hash_longest)
- mp->stat.st_hash_longest = cnt;
- mp->stat.st_hash_examined += cnt;
- goto found;
+ /*
+ * Explicitly yield the processor if it's not the first
+ * pass through this loop -- if we don't, we might end
+ * up running to the end of our CPU quantum as we will
+ * simply be swapping between the two locks.
+ */
+ if (!first && (__db_yield == NULL || __db_yield() != 0))
+ __db_sleep(0, 1);
+
+ LOCKBUFFER(dbmp, bhp);
+ /* Wait for I/O to finish... */
+ UNLOCKBUFFER(dbmp, bhp);
+ LOCKREGION(dbmp);
}
- }
- if (cnt != 0) {
- ++mp->stat.st_hash_searches;
- if (cnt > mp->stat.st_hash_longest)
- mp->stat.st_hash_longest = cnt;
- mp->stat.st_hash_examined += cnt;
+
+ /*
+ * BH_TRASH --
+ * The contents of the buffer are garbage. Shouldn't happen,
+ * and this read is likely to fail, but might as well try.
+ */
+ if (F_ISSET(bhp, BH_TRASH))
+ goto reread;
+
+ /*
+ * BH_CALLPGIN --
+ * The buffer was converted so it could be written, and the
+ * contents need to be converted again.
+ */
+ if (F_ISSET(bhp, BH_CALLPGIN)) {
+ if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
+ goto err;
+ F_CLR(bhp, BH_CALLPGIN);
+ }
+
+ ++mp->stat.st_cache_hit;
+ ++mfp->stat.st_cache_hit;
+ *(void **)addrp = bhp->buf;
+ goto done;
}
-alloc: /*
- * Allocate a new buffer header and data space, and mark the contents
- * as useless.
- */
+alloc: /* Allocate new buffer header and data space. */
if ((ret = __memp_ralloc(dbmp, sizeof(BH) -
sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
goto err;
- addr = bhp->buf;
-#ifdef DEBUG
- if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) {
+
+#ifdef DIAGNOSTIC
+ if ((ALIGNTYPE)bhp->buf & (sizeof(size_t) - 1)) {
__db_err(dbmp->dbenv,
"Internal error: BH data NOT size_t aligned.");
- abort();
+ ret = EINVAL;
+ goto err;
}
#endif
+ /* Initialize the BH fields. */
memset(bhp, 0, sizeof(BH));
LOCKINIT(dbmp, &bhp->mutex);
+ bhp->ref = 1;
+ bhp->pgno = *pgnoaddr;
+ bhp->mf_offset = mf_offset;
/*
* Prepend the bucket header to the head of the appropriate MPOOL
* bucket hash list. Append the bucket header to the tail of the
* MPOOL LRU chain.
- *
- * We have to do this before we read in the page so we can discard
- * our region lock without screwing up the world.
*/
- bucket = BUCKET(mp, mf_offset, *pgnoaddr);
SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, hq, __bh);
SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
- ++mp->stat.st_page_clean;
- b_inserted = 1;
-
- /* Set the page number, and associated MPOOLFILE. */
- bhp->mf_offset = mf_offset;
- bhp->pgno = *pgnoaddr;
/*
- * If we know we created the page, zero it out and continue.
+ * If we created the page, zero it out and continue.
*
* !!!
- * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function.
+ * Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
* If DB_MPOOL_CREATE is used, then the application's pgin function
* has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
* it can detect all of its page creates, and not bother.
*
* Otherwise, read the page into memory, optionally creating it if
* DB_MPOOL_CREATE is set.
- *
- * Increment the reference count for created buffers, but importantly,
- * increment the reference count for buffers we're about to read so
- * that the buffer can't move.
*/
- ++bhp->ref;
- b_incr = 1;
+ if (LF_ISSET(DB_MPOOL_NEW)) {
+ if (mfp->clear_len == 0)
+ memset(bhp->buf, 0, mfp->stat.st_pagesize);
+ else {
+ memset(bhp->buf, 0, mfp->clear_len);
+#ifdef DIAGNOSTIC
+ memset(bhp->buf + mfp->clear_len, 0xff,
+ mfp->stat.st_pagesize - mfp->clear_len);
+#endif
+ }
- if (LF_ISSET(DB_MPOOL_NEW))
- memset(addr, 0, mfp->stat.st_pagesize);
- else {
+ ++mp->stat.st_page_create;
+ ++mfp->stat.st_page_create;
+ } else {
/*
* It's possible for the read function to fail, which means
- * that we fail as well.
+ * that we fail as well. Note, the __memp_pgread() function
+ * discards the region lock, so the buffer must be pinned
+ * down so that it cannot move and its contents are unchanged.
*/
reread: if ((ret = __memp_pgread(dbmfp,
- bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0)
- goto err;
-
- /*
- * !!!
- * The __memp_pgread call discarded and reacquired the region
- * lock. Because the buffer reference count was incremented
- * before the region lock was discarded the buffer can't move
- * and its contents can't change.
- */
- ++mp->stat.st_cache_miss;
- ++mfp->stat.st_cache_miss;
- }
-
- if (0) {
-found: /* Increment the reference count. */
- if (bhp->ref == UINT16_T_MAX) {
- __db_err(dbmp->dbenv,
- "%s: too many references to page %lu",
- __memp_fn(dbmfp), bhp->pgno);
- ret = EINVAL;
- goto err;
- }
- ++bhp->ref;
- b_incr = 1;
-
- /*
- * Any found buffer might be trouble.
- *
- * BH_LOCKED --
- * I/O in progress, wait for it to finish. Because the buffer
- * reference count was incremented before the region lock was
- * discarded we know the buffer can't move and its contents
- * can't change.
- */
- for (cnt = 0; F_ISSET(bhp, BH_LOCKED); ++cnt) {
- UNLOCKREGION(dbmp);
-
+ bhp, LF_ISSET(DB_MPOOL_CREATE))) != 0) {
/*
- * Sleep so that we don't simply spin, switching locks.
- * (See the comment in include/mp.h.)
+ * !!!
+ * Discard the buffer unless another thread is waiting
+ * on our I/O to complete. Regardless, the header has
+ * the BH_TRASH flag set.
*/
- if (cnt != 0 &&
- (__db_yield == NULL || __db_yield() != 0))
- __db_sleep(0, 1);
-
- LOCKBUFFER(dbmp, bhp);
- /* Waiting for I/O to finish... */
- UNLOCKBUFFER(dbmp, bhp);
- LOCKREGION(dbmp);
- }
-
- /*
- * BH_TRASH --
- * The buffer is garbage.
- */
- if (F_ISSET(bhp, BH_TRASH))
- goto reread;
-
- /*
- * BH_CALLPGIN --
- * The buffer was written, and the contents need to be
- * converted again.
- */
- if (F_ISSET(bhp, BH_CALLPGIN)) {
- if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
- goto err;
- F_CLR(bhp, BH_CALLPGIN);
+ if (bhp->ref == 1)
+ __memp_bhfree(dbmp, mfp, bhp, 1);
+ goto err;
}
- ++mp->stat.st_cache_hit;
- ++mfp->stat.st_cache_hit;
+ ++mp->stat.st_cache_miss;
+ ++mfp->stat.st_cache_miss;
}
/*
@@ -319,23 +323,30 @@ found: /* Increment the reference count. */
if (bhp->pgno > mfp->last_pgno)
mfp->last_pgno = bhp->pgno;
-mapret: LOCKHANDLE(dbmp, dbmfp->mutexp);
+ ++mp->stat.st_page_clean;
+ *(void **)addrp = bhp->buf;
+
+done: /* Update the chain search statistics. */
+ if (st_hsearch) {
+ ++mp->stat.st_hash_searches;
+ if (st_hsearch > mp->stat.st_hash_longest)
+ mp->stat.st_hash_longest = st_hsearch;
+ mp->stat.st_hash_examined += st_hsearch;
+ }
+
+ UNLOCKREGION(dbmp);
+
+ LOCKHANDLE(dbmp, dbmfp->mutexp);
++dbmfp->pinref;
UNLOCKHANDLE(dbmp, dbmfp->mutexp);
- if (0) {
-err: /*
- * If no other process is already waiting on a created buffer,
- * go ahead and discard it, it's not useful.
- */
- if (b_incr)
- --bhp->ref;
- if (b_inserted && bhp->ref == 0)
- __memp_bhfree(dbmp, mfp, bhp, 1);
- }
+ return (0);
+err: /* Discard our reference. */
+ if (b_incr)
+ --bhp->ref;
UNLOCKREGION(dbmp);
- *(void **)addrp = addr;
+ *(void **)addrp = NULL;
return (ret);
}
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
index 0f41122..a4cbac8 100644
--- a/db2/mp/mp_fopen.c
+++ b/db2/mp/mp_fopen.c
@@ -1,24 +1,20 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98";
+static const char sccsid[] = "@(#)mp_fopen.c 10.47 (Sleepycat) 5/4/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
#include <errno.h>
-#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
#endif
#include "db_int.h"
@@ -28,22 +24,21 @@ static const char sccsid[] = "@(#)mp_fopen.c 10.37 (Sleepycat) 1/18/98";
#include "common_ext.h"
static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
-static int __memp_mf_open __P((DB_MPOOL *, const char *,
- int, size_t, db_pgno_t, int, DBT *, u_int8_t *, MPOOLFILE **));
+static int __memp_mf_open __P((DB_MPOOL *,
+ const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
/*
* memp_fopen --
* Open a backing file for the memory pool.
*/
int
-memp_fopen(dbmp, path, ftype,
- flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp)
+memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
DB_MPOOL *dbmp;
const char *path;
- int ftype, flags, mode, lsn_offset;
+ u_int32_t flags;
+ int mode;
size_t pagesize;
- DBT *pgcookie;
- u_int8_t *fileid;
+ DB_MPOOL_FINFO *finfop;
DB_MPOOLFILE **retp;
{
int ret;
@@ -59,31 +54,31 @@ memp_fopen(dbmp, path, ftype,
return (EINVAL);
}
- return (__memp_fopen(dbmp, NULL, path, ftype,
- flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
+ return (__memp_fopen(dbmp,
+ NULL, path, flags, mode, pagesize, 1, finfop, retp));
}
/*
* __memp_fopen --
* Open a backing file for the memory pool; internal version.
*
- * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, int,
- * PUBLIC: int, int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
+ * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
*/
int
-__memp_fopen(dbmp, mfp, path,
- ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
+__memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
DB_MPOOL *dbmp;
MPOOLFILE *mfp;
const char *path;
- int ftype, flags, mode, lsn_offset, needlock;
+ u_int32_t flags;
+ int mode, needlock;
size_t pagesize;
- DBT *pgcookie;
- u_int8_t *fileid;
+ DB_MPOOL_FINFO *finfop;
DB_MPOOLFILE **retp;
{
DB_ENV *dbenv;
DB_MPOOLFILE *dbmfp;
+ DB_MPOOL_FINFO finfo;
db_pgno_t last_pgno;
size_t size;
u_int32_t mbytes, bytes;
@@ -91,18 +86,34 @@ __memp_fopen(dbmp, mfp, path,
u_int8_t idbuf[DB_FILE_ID_LEN];
char *rpath;
- /*
- * XXX
- * If mfp is provided, the following arguments do NOT need to be
- * specified:
- * lsn_offset
- * pgcookie
- * fileid
- */
dbenv = dbmp->dbenv;
ret = 0;
rpath = NULL;
+ /*
+ * If mfp is provided, we take the DB_MPOOL_FINFO information from
+ * the mfp. We don't bother initializing everything, because some
+ * of them are expensive to acquire. If no mfp is provided and the
+ * finfop argument is NULL, we default the values.
+ */
+ if (finfop == NULL) {
+ memset(&finfo, 0, sizeof(finfo));
+ if (mfp != NULL) {
+ finfo.ftype = mfp->ftype;
+ finfo.pgcookie = NULL;
+ finfo.fileid = NULL;
+ finfo.lsn_offset = mfp->lsn_off;
+ finfo.clear_len = mfp->clear_len;
+ } else {
+ finfo.ftype = 0;
+ finfo.pgcookie = NULL;
+ finfo.fileid = NULL;
+ finfo.lsn_offset = -1;
+ finfo.clear_len = 0;
+ }
+ finfop = &finfo;
+ }
+
/* Allocate and initialize the per-process structure. */
if ((dbmfp =
(DB_MPOOLFILE *)__db_calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
@@ -126,11 +137,11 @@ __memp_fopen(dbmp, mfp, path,
} else {
/* Get the real name for this file and open it. */
if ((ret = __db_appname(dbenv,
- DB_APP_DATA, NULL, path, NULL, &rpath)) != 0)
+ DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
goto err;
if ((ret = __db_open(rpath,
- LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
- mode, &dbmfp->fd)) != 0) {
+ LF_ISSET(DB_CREATE | DB_RDONLY),
+ DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
__db_err(dbenv, "%s: %s", rpath, strerror(ret));
goto err;
}
@@ -156,12 +167,11 @@ __memp_fopen(dbmp, mfp, path,
* don't use timestamps, otherwise there'd be no chance of any
* other process joining the party.
*/
- if (mfp == NULL && fileid == NULL) {
+ if (finfop->fileid == NULL) {
if ((ret = __db_fileid(dbenv, rpath, 0, idbuf)) != 0)
goto err;
- fileid = idbuf;
+ finfop->fileid = idbuf;
}
- FREES(rpath);
}
/*
@@ -173,8 +183,8 @@ __memp_fopen(dbmp, mfp, path,
LOCKREGION(dbmp);
if (mfp == NULL)
- ret = __memp_mf_open(dbmp, path, ftype,
- pagesize, last_pgno, lsn_offset, pgcookie, fileid, &mfp);
+ ret = __memp_mf_open(dbmp,
+ path, pagesize, last_pgno, finfop, &mfp);
else {
++mfp->ref;
ret = 0;
@@ -218,7 +228,7 @@ __memp_fopen(dbmp, mfp, path,
F_CLR(mfp, MP_CAN_MMAP);
if (path == NULL)
F_CLR(mfp, MP_CAN_MMAP);
- if (ftype != 0)
+ if (finfop->ftype != 0)
F_CLR(mfp, MP_CAN_MMAP);
if (LF_ISSET(DB_NOMMAP))
F_CLR(mfp, MP_CAN_MMAP);
@@ -229,11 +239,14 @@ __memp_fopen(dbmp, mfp, path,
dbmfp->addr = NULL;
if (F_ISSET(mfp, MP_CAN_MMAP)) {
dbmfp->len = size;
- if (__db_map(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
+ if (__db_mapfile(rpath,
+ dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
dbmfp->addr = NULL;
F_CLR(mfp, MP_CAN_MMAP);
}
}
+ if (rpath != NULL)
+ FREES(rpath);
LOCKHANDLE(dbmp, dbmp->mutexp);
TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
@@ -260,15 +273,12 @@ err: /*
* Open an MPOOLFILE.
*/
static int
-__memp_mf_open(dbmp, path,
- ftype, pagesize, last_pgno, lsn_offset, pgcookie, fileid, retp)
+__memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
DB_MPOOL *dbmp;
const char *path;
- int ftype, lsn_offset;
size_t pagesize;
db_pgno_t last_pgno;
- DBT *pgcookie;
- u_int8_t *fileid;
+ DB_MPOOL_FINFO *finfop;
MPOOLFILE **retp;
{
MPOOLFILE *mfp;
@@ -286,12 +296,13 @@ __memp_mf_open(dbmp, path,
mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
if (F_ISSET(mfp, MP_TEMP))
continue;
- if (!memcmp(fileid,
+ if (!memcmp(finfop->fileid,
R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
- if (ftype != mfp->ftype ||
+ if (finfop->clear_len != mfp->clear_len ||
+ finfop->ftype != mfp->ftype ||
pagesize != mfp->stat.st_pagesize) {
__db_err(dbmp->dbenv,
- "%s: ftype or pagesize changed",
+ "%s: ftype, clear length or pagesize changed",
path);
return (EINVAL);
}
@@ -311,8 +322,9 @@ __memp_mf_open(dbmp, path,
/* Initialize the structure. */
memset(mfp, 0, sizeof(MPOOLFILE));
mfp->ref = 1;
- mfp->ftype = ftype;
- mfp->lsn_off = lsn_offset;
+ mfp->ftype = finfop->ftype;
+ mfp->lsn_off = finfop->lsn_offset;
+ mfp->clear_len = finfop->clear_len;
/*
* If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
@@ -320,7 +332,7 @@ __memp_mf_open(dbmp, path,
* it away.
*/
mfp->stat.st_pagesize = pagesize;
- mfp->last_pgno = last_pgno;
+ mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
F_SET(mfp, MP_CAN_MMAP);
if (ISTEMPORARY)
@@ -336,19 +348,19 @@ __memp_mf_open(dbmp, path,
if ((ret = __memp_ralloc(dbmp,
DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
goto err;
- memcpy(p, fileid, DB_FILE_ID_LEN);
+ memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
}
/* Copy the page cookie into shared memory. */
- if (pgcookie == NULL || pgcookie->size == 0) {
+ if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
mfp->pgcookie_len = 0;
mfp->pgcookie_off = 0;
} else {
if ((ret = __memp_ralloc(dbmp,
- pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
+ finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
goto err;
- memcpy(p, pgcookie->data, pgcookie->size);
- mfp->pgcookie_len = pgcookie->size;
+ memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
+ mfp->pgcookie_len = finfop->pgcookie->size;
}
/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
@@ -397,7 +409,7 @@ memp_fclose(dbmfp)
/* Discard any mmap information. */
if (dbmfp->addr != NULL &&
- (ret = __db_unmap(dbmfp->addr, dbmfp->len)) != 0)
+ (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
__db_err(dbmp->dbenv,
"%s: %s", __memp_fn(dbmfp), strerror(ret));
@@ -480,13 +492,13 @@ __memp_mf_close(dbmp, dbmfp)
SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
/* Free the space. */
- __db_shalloc_free(dbmp->addr, mfp);
if (mfp->path_off != 0)
__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
if (mfp->fileid_off != 0)
__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
if (mfp->pgcookie_off != 0)
__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
+ __db_shalloc_free(dbmp->addr, mfp);
ret1: UNLOCKREGION(dbmp);
return (0);
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
index 335ee9f..5675493 100644
--- a/db2/mp/mp_fput.c
+++ b/db2/mp/mp_fput.c
@@ -1,20 +1,19 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fput.c 10.17 (Sleepycat) 12/20/97";
+static const char sccsid[] = "@(#)mp_fput.c 10.22 (Sleepycat) 4/26/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
-#include <stdlib.h>
#endif
#include "db_int.h"
@@ -31,12 +30,11 @@ int
memp_fput(dbmfp, pgaddr, flags)
DB_MPOOLFILE *dbmfp;
void *pgaddr;
- int flags;
+ u_int32_t flags;
{
BH *bhp;
DB_MPOOL *dbmp;
MPOOL *mp;
- MPOOLFILE *mfp;
int wrote, ret;
dbmp = dbmfp->dbmp;
@@ -71,8 +69,9 @@ memp_fput(dbmfp, pgaddr, flags)
/*
* If we're mapping the file, there's nothing to do. Because we can
- * quit mapping at any time, we have to check on each buffer to see
- * if it's in the map region.
+ * stop mapping the file at any time, we have to check on each buffer
+ * to see if the address we gave the application was part of the map
+ * region.
*/
if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
(u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
@@ -98,36 +97,33 @@ memp_fput(dbmfp, pgaddr, flags)
F_SET(bhp, BH_DISCARD);
/*
- * If more than one reference to the page, we're done. Ignore discard
- * flags (for now) and leave it at its position in the LRU chain. The
- * rest gets done at last reference close.
+ * Check for a reference count going to zero. This can happen if the
+ * application returns a page twice.
*/
-#ifdef DEBUG
if (bhp->ref == 0) {
- __db_err(dbmp->dbenv,
- "Unpinned page returned: reference count on page %lu went negative.",
- (u_long)bhp->pgno);
- abort();
+ __db_err(dbmp->dbenv, "%s: page %lu: unpinned page returned",
+ __memp_fn(dbmfp), (u_long)bhp->pgno);
+ UNLOCKREGION(dbmp);
+ return (EINVAL);
}
-#endif
+
+ /*
+ * If more than one reference to the page, we're done. Ignore the
+ * discard flags (for now) and leave it at its position in the LRU
+ * chain. The rest gets done at last reference close.
+ */
if (--bhp->ref > 0) {
UNLOCKREGION(dbmp);
return (0);
}
- /* Move the buffer to the head/tail of the LRU chain. */
- SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
- if (F_ISSET(bhp, BH_DISCARD))
- SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
- else
- SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
-
/*
- * If this buffer is scheduled for writing because of a checkpoint,
- * write it now. If we can't write it, set a flag so that the next
- * time the memp_sync function is called we try writing it there,
- * as the checkpoint application better be able to write all of the
- * files.
+ * If this buffer is scheduled for writing because of a checkpoint, we
+ * need to write it (if we marked it dirty), or update the checkpoint
+ * counters (if we didn't mark it dirty). If we try to write it and
+ * can't, that's not necessarily an error, but set a flag so that the
+ * next time the memp_sync function runs we try writing it there, as
+ * the checkpoint application better be able to write all of the files.
*/
if (F_ISSET(bhp, BH_WRITE))
if (F_ISSET(bhp, BH_DIRTY)) {
@@ -137,12 +133,18 @@ memp_fput(dbmfp, pgaddr, flags)
} else {
F_CLR(bhp, BH_WRITE);
- mfp = R_ADDR(dbmp, bhp->mf_offset);
- --mfp->lsn_cnt;
-
+ --dbmfp->mfp->lsn_cnt;
--mp->lsn_cnt;
}
+ /* Move the buffer to the head/tail of the LRU chain. */
+ SH_TAILQ_REMOVE(&mp->bhq, bhp, q, __bh);
+ if (F_ISSET(bhp, BH_DISCARD))
+ SH_TAILQ_INSERT_HEAD(&mp->bhq, bhp, q, __bh);
+ else
+ SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
+
+
UNLOCKREGION(dbmp);
return (0);
}
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
index 2eff7dd..3b352aa 100644
--- a/db2/mp/mp_fset.c
+++ b/db2/mp/mp_fset.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_fset.c 10.12 (Sleepycat) 11/26/97";
+static const char sccsid[] = "@(#)mp_fset.c 10.15 (Sleepycat) 4/26/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -30,16 +30,14 @@ int
memp_fset(dbmfp, pgaddr, flags)
DB_MPOOLFILE *dbmfp;
void *pgaddr;
- int flags;
+ u_int32_t flags;
{
BH *bhp;
DB_MPOOL *dbmp;
MPOOL *mp;
- MPOOLFILE *mfp;
int ret;
dbmp = dbmfp->dbmp;
- mfp = dbmfp->mfp;
mp = dbmp->mp;
/* Validate arguments. */
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
index ca81f8d..fc985bc 100644
--- a/db2/mp/mp_open.c
+++ b/db2/mp/mp_open.c
@@ -1,23 +1,20 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97";
+static const char sccsid[] = "@(#)mp_open.c 10.23 (Sleepycat) 5/3/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
#endif
#include "db_int.h"
@@ -33,13 +30,14 @@ static const char sccsid[] = "@(#)mp_open.c 10.16 (Sleepycat) 11/28/97";
int
memp_open(path, flags, mode, dbenv, retp)
const char *path;
- int flags, mode;
+ u_int32_t flags;
+ int mode;
DB_ENV *dbenv;
DB_MPOOL **retp;
{
DB_MPOOL *dbmp;
size_t cachesize;
- int ret;
+ int is_private, ret;
/* Validate arguments. */
#ifdef HAVE_SPINLOCKS
@@ -62,15 +60,16 @@ memp_open(path, flags, mode, dbenv, retp)
dbmp->dbenv = dbenv;
/* Decide if it's possible for anyone else to access the pool. */
- if ((dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE))
- F_SET(dbmp, MP_ISPRIVATE);
+ is_private =
+ (dbenv == NULL && path == NULL) || LF_ISSET(DB_MPOOL_PRIVATE);
/*
* Map in the region. We do locking regardless, as portions of it are
* implemented in common code (if we put the region in a file, that is).
*/
F_SET(dbmp, MP_LOCKREGION);
- if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0)
+ if ((ret = __memp_ropen(dbmp,
+ path, cachesize, mode, is_private, LF_ISSET(DB_CREATE))) != 0)
goto err;
F_CLR(dbmp, MP_LOCKREGION);
@@ -79,7 +78,7 @@ memp_open(path, flags, mode, dbenv, retp)
* If it's threaded, then we have to lock both the handles and the
* region, and we need to allocate a mutex for that purpose.
*/
- if (!F_ISSET(dbmp, MP_ISPRIVATE))
+ if (!is_private)
F_SET(dbmp, MP_LOCKREGION);
if (LF_ISSET(DB_THREAD)) {
F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
@@ -135,10 +134,11 @@ memp_close(dbmp)
}
/* Close the region. */
- if ((t_ret = __memp_rclose(dbmp)) && ret == 0)
+ if ((t_ret = __db_rdetach(&dbmp->reginfo)) != 0 && ret == 0)
ret = t_ret;
- /* Discard the structure. */
+ if (dbmp->reginfo.path != NULL)
+ FREES(dbmp->reginfo.path);
FREE(dbmp, sizeof(DB_MPOOL));
return (ret);
@@ -154,8 +154,19 @@ memp_unlink(path, force, dbenv)
int force;
DB_ENV *dbenv;
{
- return (__db_runlink(dbenv,
- DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force));
+ REGINFO reginfo;
+ int ret;
+
+ memset(&reginfo, 0, sizeof(reginfo));
+ reginfo.dbenv = dbenv;
+ reginfo.appname = DB_APP_NONE;
+ if (path != NULL && (reginfo.path = __db_strdup(path)) == NULL)
+ return (ENOMEM);
+ reginfo.file = DB_DEFAULT_MPOOL_FILE;
+ ret = __db_runlink(&reginfo, force);
+ if (reginfo.path != NULL)
+ FREES(reginfo.path);
+ return (ret);
}
/*
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
index 13a6c62..e83e0f4 100644
--- a/db2/mp/mp_pr.c
+++ b/db2/mp/mp_pr.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98";
+static const char sccsid[] = "@(#)mp_pr.c 10.26 (Sleepycat) 5/23/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -15,20 +15,20 @@ static const char sccsid[] = "@(#)mp_pr.c 10.21 (Sleepycat) 1/6/98";
#include <errno.h>
#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
+#include "db_page.h"
#include "shqueue.h"
#include "db_shash.h"
#include "mp.h"
+#include "db_auto.h"
+#include "db_ext.h"
+#include "common_ext.h"
-static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int));
-static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int));
-static void __memp_pmf __P((FILE *, MPOOLFILE *, int));
-static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int));
+static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
/*
* memp_stat --
@@ -64,6 +64,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
dbmp->mp->rlayout.lock.mutex_set_wait;
(*gspp)->st_region_nowait =
dbmp->mp->rlayout.lock.mutex_set_nowait;
+ (*gspp)->st_refcnt = dbmp->mp->rlayout.refcnt;
+ (*gspp)->st_regsize = dbmp->mp->rlayout.size;
UNLOCKREGION(dbmp);
}
@@ -77,7 +79,8 @@ memp_stat(dbmp, gspp, fspp, db_malloc)
for (len = 0,
mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
mfp != NULL;
- ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
+ ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+ ;
UNLOCKREGION(dbmp);
@@ -148,174 +151,118 @@ __memp_fns(dbmp, mfp)
return ((char *)R_ADDR(dbmp, mfp->path_off));
}
+#define FMAP_ENTRIES 200 /* Files we map. */
+
+#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */
+#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */
+#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */
+#define MPOOL_DUMP_ALL 0x07 /* Debug all. */
+
+
/*
- * __memp_debug --
+ * __memp_dump_region --
* Display MPOOL structures.
*
- * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int));
+ * PUBLIC: void __memp_dump_region __P((DB_MPOOL *, char *, FILE *));
*/
void
-__memp_debug(dbmp, fp, data)
+__memp_dump_region(dbmp, area, fp)
DB_MPOOL *dbmp;
+ char *area;
FILE *fp;
- int data;
{
+ BH *bhp;
+ DB_HASHTAB *htabp;
DB_MPOOLFILE *dbmfp;
- u_long cnt;
+ MPOOL *mp;
+ MPOOLFILE *mfp;
+ size_t bucket, fmap[FMAP_ENTRIES + 1];
+ u_int32_t flags;
+ int cnt;
/* Make it easy to call from the debugger. */
if (fp == NULL)
fp = stderr;
- /* Welcome message. */
- (void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n",
- DB_LINE, (u_long)getpid());
-
- if (data)
- (void)fprintf(fp, " fd: %d; addr %lx; maddr %lx\n",
- dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr);
-
- /* Display the DB_MPOOLFILE structures. */
- for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q));
- (void)fprintf(fp, "%lu process-local files\n", cnt);
- for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
- dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
- (void)fprintf(fp, "%s\n", __memp_fn(dbmfp));
- __memp_pdbmf(fp, dbmfp, data);
- }
+ for (flags = 0; *area != '\0'; ++area)
+ switch (*area) {
+ case 'A':
+ LF_SET(MPOOL_DUMP_ALL);
+ break;
+ case 'h':
+ LF_SET(MPOOL_DUMP_HASH);
+ break;
+ case 'l':
+ LF_SET(MPOOL_DUMP_LRU);
+ break;
+ case 'm':
+ LF_SET(MPOOL_DUMP_MEM);
+ break;
+ }
- /* Switch to global statistics. */
- (void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE);
+ LOCKREGION(dbmp);
- /* Display the MPOOL structure. */
- __memp_pmp(fp, dbmp, dbmp->mp, data);
+ mp = dbmp->mp;
- /* Flush in case we're debugging. */
- (void)fflush(fp);
-}
-
-/*
- * __memp_pdbmf --
- * Display a DB_MPOOLFILE structure.
- */
-static void
-__memp_pdbmf(fp, dbmfp, data)
- FILE *fp;
- DB_MPOOLFILE *dbmfp;
- int data;
-{
- if (!data)
- return;
-
- (void)fprintf(fp, " fd: %d; %s\n",
- dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
-}
-
-/*
- * __memp_pmp --
- * Display the MPOOL structure.
- */
-static void
-__memp_pmp(fp, dbmp, mp, data)
- FILE *fp;
- DB_MPOOL *dbmp;
- MPOOL *mp;
- int data;
-{
- BH *bhp;
- MPOOLFILE *mfp;
- DB_HASHTAB *htabp;
- size_t bucket;
- int cnt;
- const char *sep;
-
- (void)fprintf(fp, "references: %lu; cachesize: %lu\n",
- (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize);
- (void)fprintf(fp,
- " %lu pages created\n", (u_long)mp->stat.st_page_create);
- (void)fprintf(fp,
- " %lu mmap pages returned\n", (u_long)mp->stat.st_map);
- (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n",
- (u_long)mp->stat.st_page_in + mp->stat.st_page_out,
- (u_long)mp->stat.st_page_in, (u_long)mp->stat.st_page_out);
- if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0)
- (void)fprintf(fp,
- " %.0f%% cache hit rate (%lu hit, %lu miss)\n",
- ((double)mp->stat.st_cache_hit /
- (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100,
- (u_long)mp->stat.st_cache_hit,
- (u_long)mp->stat.st_cache_miss);
+ /* Display MPOOL structures. */
+ (void)fprintf(fp, "%s\nPool (region addr 0x%lx, alloc addr 0x%lx)\n",
+ DB_LINE, (u_long)dbmp->reginfo.addr, (u_long)dbmp->addr);
/* Display the MPOOLFILE structures. */
- for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
- (void)fprintf(fp, "%d total files\n", cnt);
- for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
- mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
- (void)fprintf(fp, "file %d\n", cnt);
- __memp_pmf(fp, mfp, data);
+ cnt = 0;
+ for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+ mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
+ (void)fprintf(fp, "file #%d: %s: %lu references: %s\n",
+ cnt + 1, __memp_fns(dbmp, mfp), (u_long)mfp->ref,
+ F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
+ if (cnt < FMAP_ENTRIES)
+ fmap[cnt] = R_OFFSET(dbmp, mfp);
}
- if (!data)
- return;
+ for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+ dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
+ (void)fprintf(fp, "file #%d: %s: fd: %d: per-process, %s\n",
+ cnt + 1, __memp_fn(dbmfp), dbmfp->fd,
+ F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
+ if (cnt < FMAP_ENTRIES)
+ fmap[cnt] = R_OFFSET(dbmp, mfp);
+ }
+ if (cnt < FMAP_ENTRIES)
+ fmap[cnt] = INVALID;
+ else
+ fmap[FMAP_ENTRIES] = INVALID;
/* Display the hash table list of BH's. */
- (void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n",
- DB_LINE, (u_long)mp->htab_buckets);
- (void)fprintf(fp,
- "longest chain searched %lu\n", (u_long)mp->stat.st_hash_longest);
- (void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n",
- (u_long)mp->stat.st_hash_examined /
- (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1),
- (u_long)mp->stat.st_hash_examined,
- (u_long)mp->stat.st_hash_searches);
- for (htabp = dbmp->htab,
- bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
- if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
- (void)fprintf(fp, "%lu:\n", (u_long)bucket);
- for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
- __memp_pbh(fp, dbmp, bhp, data);
+ if (LF_ISSET(MPOOL_DUMP_HASH)) {
+ (void)fprintf(fp,
+ "%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
+ DB_LINE, (u_long)mp->htab_buckets);
+ for (htabp = dbmp->htab,
+ bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
+ if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
+ (void)fprintf(fp, "%lu:\n", (u_long)bucket);
+ for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+ __memp_pbh(dbmp, bhp, fmap, fp);
+ }
}
/* Display the LRU list of BH's. */
- (void)fprintf(fp, "LRU list of BH's (pgno/offset):");
- for (sep = "\n ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
- bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh))
- (void)fprintf(fp, "%s%lu/%lu", sep,
- (u_long)bhp->pgno, (u_long)R_OFFSET(dbmp, bhp));
- (void)fprintf(fp, "\n");
-}
+ if (LF_ISSET(MPOOL_DUMP_LRU)) {
+ (void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
+ (void)fprintf(fp, "pageno, file, ref, address\n");
+ for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+ __memp_pbh(dbmp, bhp, fmap, fp);
+ }
-/*
- * __memp_pmf --
- * Display an MPOOLFILE structure.
- */
-static void
-__memp_pmf(fp, mfp, data)
- FILE *fp;
- MPOOLFILE *mfp;
- int data;
-{
- (void)fprintf(fp, " %lu pages created\n",
- (u_long)mfp->stat.st_page_create);
- (void)fprintf(fp, " %lu I/O's (%lu read, %lu written)\n",
- (u_long)mfp->stat.st_page_in + mfp->stat.st_page_out,
- (u_long)mfp->stat.st_page_in, (u_long)mfp->stat.st_page_out);
- if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0)
- (void)fprintf(fp,
- " %.0f%% cache hit rate (%lu hit, %lu miss)\n",
- ((double)mfp->stat.st_cache_hit /
- (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100,
- (u_long)mfp->stat.st_cache_hit,
- (u_long)mfp->stat.st_cache_miss);
- if (!data)
- return;
-
- (void)fprintf(fp, " %d references; %s; pagesize: %lu\n", mfp->ref,
- F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write",
- (u_long)mfp->stat.st_pagesize);
+ if (LF_ISSET(MPOOL_DUMP_MEM))
+ __db_shalloc_dump(dbmp->addr, fp);
+
+ UNLOCKREGION(dbmp);
+
+ /* Flush in case we're debugging. */
+ (void)fflush(fp);
}
/*
@@ -323,28 +270,37 @@ __memp_pmf(fp, mfp, data)
* Display a BH structure.
*/
static void
-__memp_pbh(fp, dbmp, bhp, data)
- FILE *fp;
+__memp_pbh(dbmp, bhp, fmap, fp)
DB_MPOOL *dbmp;
BH *bhp;
- int data;
+ size_t *fmap;
+ FILE *fp;
{
- const char *sep;
-
- if (!data)
- return;
-
- (void)fprintf(fp, " BH @ %lu (mf: %lu): page %lu; ref %lu",
- (u_long)R_OFFSET(dbmp, bhp),
- (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref);
- sep = "; ";
- if (F_ISSET(bhp, BH_DIRTY)) {
- (void)fprintf(fp, "%sdirty", sep);
- sep = ", ";
- }
- if (F_ISSET(bhp, BH_WRITE)) {
- (void)fprintf(fp, "%schk_write", sep);
- sep = ", ";
- }
+ static const FN fn[] = {
+ { BH_CALLPGIN, "callpgin" },
+ { BH_DIRTY, "dirty" },
+ { BH_DISCARD, "discard" },
+ { BH_LOCKED, "locked" },
+ { BH_TRASH, "trash" },
+ { BH_WRITE, "write" },
+ { 0 },
+ };
+ int i;
+
+ for (i = 0; i < FMAP_ENTRIES; ++i)
+ if (fmap[i] == INVALID || fmap[i] == bhp->mf_offset)
+ break;
+
+ if (fmap[i] == INVALID)
+ (void)fprintf(fp, " %4lu, %lu, %2lu, %lu",
+ (u_long)bhp->pgno, (u_long)bhp->mf_offset,
+ (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+ else
+ (void)fprintf(fp, " %4lu, #%d, %2lu, %lu",
+ (u_long)bhp->pgno, i + 1,
+ (u_long)bhp->ref, (u_long)R_OFFSET(dbmp, bhp));
+
+ __db_prflags(bhp->flags, fn, fp);
+
(void)fprintf(fp, "\n");
}
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
index c20e669..6b92fbd 100644
--- a/db2/mp/mp_region.c
+++ b/db2/mp/mp_region.c
@@ -1,24 +1,20 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_region.c 10.18 (Sleepycat) 11/29/97";
+static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <sys/stat.h>
#include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
#endif
#include "db_int.h"
@@ -86,7 +82,7 @@ alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
/*
* Retry as soon as we've freed up sufficient space. If we
- * have to coalesce of memory to satisfy the request, don't
+ * will have to coalesce memory to satisfy the request, don't
* try until it's likely (possible?) that we'll succeed.
*/
total += fsize = __db_shsizeof(bhp);
@@ -179,18 +175,19 @@ retry: /* Find a buffer we can flush; pure LRU. */
* Attach to, and optionally create, the mpool region.
*
* PUBLIC: int __memp_ropen
- * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int));
+ * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
*/
int
-__memp_ropen(dbmp, path, cachesize, mode, flags)
+__memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
DB_MPOOL *dbmp;
const char *path;
size_t cachesize;
- int mode, flags;
+ int mode, is_private;
+ u_int32_t flags;
{
MPOOL *mp;
size_t rlen;
- int fd, newregion, ret, retry_cnt;
+ int defcache, ret;
/*
* Unlike other DB subsystems, mpool can't simply grow the region
@@ -204,155 +201,107 @@ __memp_ropen(dbmp, path, cachesize, mode, flags)
*
* Up the user's cachesize by 25% to account for our overhead.
*/
+ defcache = 0;
if (cachesize < DB_CACHESIZE_MIN)
- if (cachesize == 0)
+ if (cachesize == 0) {
+ defcache = 1;
cachesize = DB_CACHESIZE_DEF;
- else
+ } else
cachesize = DB_CACHESIZE_MIN;
rlen = cachesize + cachesize / 4;
- /* Map in the region. */
- retry_cnt = newregion = 0;
-retry: if (LF_ISSET(DB_CREATE)) {
- /*
- * If it's a private mpool, use malloc, it's a lot faster than
- * instantiating a region.
- *
- * XXX
- * If we're doing locking and don't have spinlocks for this
- * architecture, we'd have to instantiate the file, we need
- * the file descriptor for locking. However, it should not
- * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
- * defined.
- *
- * XXX
- * HP-UX won't permit mutexes to live in anything but shared
- * memory. So, instantiate the shared mpool region file on
- * that architecture, regardless. If this turns out to be a
- * performance problem, we could use anonymous memory instead.
- */
-#if !defined(__hppa)
- if (F_ISSET(dbmp, MP_ISPRIVATE))
- if ((dbmp->maddr = __db_malloc(rlen)) == NULL)
- ret = ENOMEM;
- else {
- F_SET(dbmp, MP_MALLOC);
- ret = __db_rinit(dbmp->dbenv,
- dbmp->maddr, 0, rlen, 0);
- }
- else
-#endif
- ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
- DB_DEFAULT_MPOOL_FILE, mode, rlen,
- F_ISSET(dbmp, MP_ISPRIVATE) ? DB_TEMPORARY : 0,
- &fd, &dbmp->maddr);
- if (ret == 0) {
- /* Put the MPOOL structure first in the region. */
- mp = dbmp->maddr;
-
- SH_TAILQ_INIT(&mp->bhq);
- SH_TAILQ_INIT(&mp->bhfq);
- SH_TAILQ_INIT(&mp->mpfq);
-
- /* Initialize the rest of the region as free space. */
- dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
- __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
-
- /*
- *
- * Pretend that the cache will be broken up into 4K
- * pages, and that we want to keep it under, say, 10
- * pages on each chain. This means a 256MB cache will
- * allocate ~6500 offset pairs.
- */
- mp->htab_buckets =
- __db_tablesize((cachesize / (4 * 1024)) / 10);
+ /*
+ * Map in the region.
+ *
+ * If it's a private mpool, use malloc, it's a lot faster than
+ * instantiating a region.
+ */
+ dbmp->reginfo.dbenv = dbmp->dbenv;
+ dbmp->reginfo.appname = DB_APP_NONE;
+ if (path == NULL)
+ dbmp->reginfo.path = NULL;
+ else
+ if ((dbmp->reginfo.path = __db_strdup(path)) == NULL)
+ return (ENOMEM);
+ dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
+ dbmp->reginfo.mode = mode;
+ dbmp->reginfo.size = rlen;
+ dbmp->reginfo.dbflags = flags;
+ dbmp->reginfo.flags = 0;
+ if (defcache)
+ F_SET(&dbmp->reginfo, REGION_SIZEDEF);
- /* Allocate hash table space and initialize it. */
- if ((ret = __db_shalloc(dbmp->addr,
- mp->htab_buckets * sizeof(DB_HASHTAB),
- 0, &dbmp->htab)) != 0)
- goto err;
- __db_hashinit(dbmp->htab, mp->htab_buckets);
- mp->htab = R_OFFSET(dbmp, dbmp->htab);
+ /*
+ * If we're creating a temporary region, don't use any standard
+ * naming.
+ */
+ if (is_private) {
+ dbmp->reginfo.appname = DB_APP_TMP;
+ dbmp->reginfo.file = NULL;
+ F_SET(&dbmp->reginfo, REGION_PRIVATE);
+ }
- ZERO_LSN(mp->lsn);
- mp->lsn_cnt = 0;
+ if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
+ if (dbmp->reginfo.path != NULL)
+ FREES(dbmp->reginfo.path);
+ return (ret);
+ }
- memset(&mp->stat, 0, sizeof(mp->stat));
- mp->stat.st_cachesize = cachesize;
+ /*
+ * The MPOOL structure is first in the region, the rest of the region
+ * is free space.
+ */
+ dbmp->mp = dbmp->reginfo.addr;
+ dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
- mp->flags = 0;
+ /* Initialize a created region. */
+ if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
+ mp = dbmp->mp;
+ SH_TAILQ_INIT(&mp->bhq);
+ SH_TAILQ_INIT(&mp->bhfq);
+ SH_TAILQ_INIT(&mp->mpfq);
- newregion = 1;
- } else if (ret != EEXIST)
- return (ret);
- }
+ __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
- /* If we didn't or couldn't create the region, try and join it. */
- if (!newregion &&
- (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE,
- path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) {
/*
- * If we failed because the file wasn't available, wait a
- * second and try again.
+ * Assume we want to keep the hash chains with under 10 pages
+ * on each chain. We don't know the pagesize in advance, and
+ * it may differ for different files. Use a pagesize of 1K for
+ * the calculation -- we walk these chains a lot, they should
+ * be short.
*/
- if (ret == EAGAIN && ++retry_cnt < 3) {
- (void)__db_sleep(1, 0);
- goto retry;
- }
- return (ret);
- }
+ mp->htab_buckets =
+ __db_tablesize((cachesize / (1 * 1024)) / 10);
- /* Set up the common pointers. */
- dbmp->mp = dbmp->maddr;
- dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+ /* Allocate hash table space and initialize it. */
+ if ((ret = __db_shalloc(dbmp->addr,
+ mp->htab_buckets * sizeof(DB_HASHTAB),
+ 0, &dbmp->htab)) != 0)
+ goto err;
+ __db_hashinit(dbmp->htab, mp->htab_buckets);
+ mp->htab = R_OFFSET(dbmp, dbmp->htab);
- /*
- * If not already locked, lock the region -- if it's a new region,
- * then either __db_rcreate() locked it for us or we malloc'd it
- * instead of creating a region, neither of which requires locking
- * here.
- */
- if (!newregion)
- LOCKREGION(dbmp);
+ ZERO_LSN(mp->lsn);
+ mp->lsn_cnt = 0;
- /*
- * Get the hash table address; it's on the shared page, so we have
- * to lock first.
- */
- dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
+ memset(&mp->stat, 0, sizeof(mp->stat));
+ mp->stat.st_cachesize = cachesize;
- dbmp->fd = fd;
+ mp->flags = 0;
+ }
- /* If we locked the region, release it now. */
- if (!F_ISSET(dbmp, MP_MALLOC))
- UNLOCKREGION(dbmp);
- return (0);
+ /* Get the local hash table address. */
+ dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
-err: if (fd != -1) {
- dbmp->fd = fd;
- (void)__memp_rclose(dbmp);
- }
+ UNLOCKREGION(dbmp);
+ return (0);
- if (newregion)
+err: UNLOCKREGION(dbmp);
+ (void)__db_rdetach(&dbmp->reginfo);
+ if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
(void)memp_unlink(path, 1, dbmp->dbenv);
- return (ret);
-}
-/*
- * __memp_rclose --
- * Close the mpool region.
- *
- * PUBLIC: int __memp_rclose __P((DB_MPOOL *));
- */
-int
-__memp_rclose(dbmp)
- DB_MPOOL *dbmp;
-{
- if (F_ISSET(dbmp, MP_MALLOC)) {
- __db_free(dbmp->maddr);
- return (0);
- }
- return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
+ if (dbmp->reginfo.path != NULL)
+ FREES(dbmp->reginfo.path);
+ return (ret);
}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
index 6d16cf3..33218ee 100644
--- a/db2/mp/mp_sync.c
+++ b/db2/mp/mp_sync.c
@@ -1,13 +1,13 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997
+ * Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
#include "config.h"
#ifndef lint
-static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97";
+static const char sccsid[] = "@(#)mp_sync.c 10.25 (Sleepycat) 4/26/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -15,7 +15,6 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97";
#include <errno.h>
#include <stdlib.h>
-#include <string.h>
#endif
#include "db_int.h"
@@ -25,6 +24,7 @@ static const char sccsid[] = "@(#)mp_sync.c 10.19 (Sleepycat) 12/3/97";
#include "common_ext.h"
static int __bhcmp __P((const void *, const void *));
+static int __memp_fsync __P((DB_MPOOLFILE *));
/*
* memp_sync --
@@ -145,7 +145,8 @@ memp_sync(dbmp, lsnp)
bharray[ar_cnt++] = bhp;
}
} else
- F_CLR(bhp, BH_WRITE);
+ if (F_ISSET(bhp, BH_WRITE))
+ F_CLR(bhp, BH_WRITE);
/* If there no buffers we can write immediately, we're done. */
if (ar_cnt == 0) {
@@ -235,10 +236,8 @@ int
memp_fsync(dbmfp)
DB_MPOOLFILE *dbmfp;
{
- BH *bhp, **bharray;
DB_MPOOL *dbmp;
- size_t mf_offset;
- int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+ int is_tmp;
dbmp = dbmfp->dbmp;
@@ -250,14 +249,62 @@ memp_fsync(dbmfp)
if (F_ISSET(dbmfp, MP_READONLY))
return (0);
- ret = 0;
LOCKREGION(dbmp);
- if (F_ISSET(dbmfp->mfp, MP_TEMP))
- ret = 1;
+ is_tmp = F_ISSET(dbmfp->mfp, MP_TEMP);
UNLOCKREGION(dbmp);
- if (ret)
+ if (is_tmp)
return (0);
+ return (__memp_fsync(dbmfp));
+}
+
+/*
+ * __mp_xxx_fd --
+ * Return a file descriptor for DB 1.85 compatibility locking.
+ *
+ * PUBLIC: int __mp_xxx_fd __P((DB_MPOOLFILE *, int *));
+ */
+int
+__mp_xxx_fd(dbmfp, fdp)
+ DB_MPOOLFILE *dbmfp;
+ int *fdp;
+{
+ int ret;
+
+ /*
+ * This is a truly spectacular layering violation, intended ONLY to
+ * support compatibility for the DB 1.85 DB->fd call.
+ *
+ * Sync the database file to disk, creating the file as necessary.
+ *
+ * We skip the MP_READONLY and MP_TEMP tests done by memp_fsync(3).
+ * The MP_READONLY test isn't interesting because we will either
+ * already have a file descriptor (we opened the database file for
+ * reading) or we aren't readonly (we created the database which
+ * requires write privileges). The MP_TEMP test isn't interesting
+ * because we want to write to the backing file regardless so that
+ * we get a file descriptor to return.
+ */
+ ret = dbmfp->fd == -1 ? __memp_fsync(dbmfp) : 0;
+
+ return ((*fdp = dbmfp->fd) == -1 ? ENOENT : ret);
+}
+
+/*
+ * __memp_fsync --
+ * Mpool file internal sync function.
+ */
+static int
+__memp_fsync(dbmfp)
+ DB_MPOOLFILE *dbmfp;
+{
+ BH *bhp, **bharray;
+ DB_MPOOL *dbmp;
+ size_t mf_offset;
+ int ar_cnt, cnt, nalloc, next, pincnt, ret, wrote;
+
+ ret = 0;
+ dbmp = dbmfp->dbmp;
mf_offset = R_OFFSET(dbmp, dbmfp->mfp);
/*
@@ -359,7 +406,6 @@ err: UNLOCKREGION(dbmp);
if (ret == 0)
return (pincnt == 0 ? __db_fsync(dbmfp->fd) : DB_INCOMPLETE);
return (ret);
-
}
/*
@@ -453,8 +499,8 @@ __bhcmp(p1, p2)
{
BH *bhp1, *bhp2;
- bhp1 = *(BH **)p1;
- bhp2 = *(BH **)p2;
+ bhp1 = *(BH * const *)p1;
+ bhp2 = *(BH * const *)p2;
/* Sort by file (shared memory pool offset). */
if (bhp1->mf_offset < bhp2->mf_offset)