diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2016-06-13 10:12:44 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2016-06-13 10:12:44 +0100 |
commit | da2fdd0bd1514a44309dd5be162ebfb6c166a716 (patch) | |
tree | 6c7a1856e3889fd91bcaedd2c89f6cebb702e805 | |
parent | a93c1bdf0bd4689287094ddb2aae3dc907da3535 (diff) | |
parent | 329844d4bc3d5a11f1e63938d66f74c9584c7abc (diff) | |
download | qemu-da2fdd0bd1514a44309dd5be162ebfb6c166a716.zip qemu-da2fdd0bd1514a44309dd5be162ebfb6c166a716.tar.gz qemu-da2fdd0bd1514a44309dd5be162ebfb6c166a716.tar.bz2 |
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160611' into staging
TB hashing improvements
# gpg: Signature made Sun 12 Jun 2016 01:12:50 BST
# gpg: using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg: aka "Richard Henderson <rth@redhat.com>"
# gpg: aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B
* remotes/rth/tags/pull-tcg-20160611:
translate-all: add tb hash bucket info to 'info jit' dump
tb hash: track translated blocks with qht
qht: add test-qht-par to invoke qht-bench from 'check' target
qht: add qht-bench, a performance benchmark
qht: add test program
qht: QEMU's fast, resizable and scalable Hash Table
qdist: add test program
qdist: add module to represent frequency distributions of data
tb hash: hash phys_pc, pc, and flags with xxhash
exec: add tb_hash_func5, derived from xxhash
qemu-thread: add simple test-and-set spinlock
include/processor.h: define cpu_relax()
seqlock: rename write_lock/unlock to write_begin/end
seqlock: remove optional mutex
compiler.h: add QEMU_ALIGNED() to enforce struct alignment
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | cpu-exec.c | 92 | ||||
-rw-r--r-- | cpus.c | 30 | ||||
-rw-r--r-- | include/exec/exec-all.h | 2 | ||||
-rw-r--r-- | include/exec/tb-context.h | 7 | ||||
-rw-r--r-- | include/exec/tb-hash-xx.h | 94 | ||||
-rw-r--r-- | include/exec/tb-hash.h | 7 | ||||
-rw-r--r-- | include/qemu/compiler.h | 2 | ||||
-rw-r--r-- | include/qemu/processor.h | 30 | ||||
-rw-r--r-- | include/qemu/qdist.h | 63 | ||||
-rw-r--r-- | include/qemu/qht.h | 183 | ||||
-rw-r--r-- | include/qemu/seqlock.h | 14 | ||||
-rw-r--r-- | include/qemu/thread.h | 35 | ||||
-rw-r--r-- | tests/.gitignore | 4 | ||||
-rw-r--r-- | tests/Makefile.include | 14 | ||||
-rw-r--r-- | tests/qht-bench.c | 488 | ||||
-rw-r--r-- | tests/test-qdist.c | 384 | ||||
-rw-r--r-- | tests/test-qht-par.c | 56 | ||||
-rw-r--r-- | tests/test-qht.c | 159 | ||||
-rw-r--r-- | translate-all.c | 131 | ||||
-rw-r--r-- | util/Makefile.objs | 2 | ||||
-rw-r--r-- | util/qdist.c | 395 | ||||
-rw-r--r-- | util/qht.c | 833 |
22 files changed, 2893 insertions, 132 deletions
@@ -225,57 +225,57 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, } #endif -static TranslationBlock *tb_find_physical(CPUState *cpu, - target_ulong pc, - target_ulong cs_base, - uint32_t flags) +struct tb_desc { + target_ulong pc; + target_ulong cs_base; + CPUArchState *env; + tb_page_addr_t phys_page1; + uint32_t flags; +}; + +static bool tb_cmp(const void *p, const void *d) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; - TranslationBlock *tb, **tb_hash_head, **ptb1; - unsigned int h; - tb_page_addr_t phys_pc, phys_page1; - - /* find translated block using physical mappings */ - phys_pc = get_page_addr_code(env, pc); - phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_phys_hash_func(phys_pc); - - /* Start at head of the hash entry */ - ptb1 = tb_hash_head = &tcg_ctx.tb_ctx.tb_phys_hash[h]; - tb = *ptb1; - - while (tb) { - if (tb->pc == pc && - tb->page_addr[0] == phys_page1 && - tb->cs_base == cs_base && - tb->flags == flags) { - - if (tb->page_addr[1] == -1) { - /* done, we have a match */ - break; - } else { - /* check next page if needed */ - target_ulong virt_page2 = (pc & TARGET_PAGE_MASK) + - TARGET_PAGE_SIZE; - tb_page_addr_t phys_page2 = get_page_addr_code(env, virt_page2); - - if (tb->page_addr[1] == phys_page2) { - break; - } + const TranslationBlock *tb = p; + const struct tb_desc *desc = d; + + if (tb->pc == desc->pc && + tb->page_addr[0] == desc->phys_page1 && + tb->cs_base == desc->cs_base && + tb->flags == desc->flags) { + /* check next page if needed */ + if (tb->page_addr[1] == -1) { + return true; + } else { + tb_page_addr_t phys_page2; + target_ulong virt_page2; + + virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; + phys_page2 = get_page_addr_code(desc->env, virt_page2); + if (tb->page_addr[1] == phys_page2) { + return true; } } - - ptb1 = &tb->phys_hash_next; - tb = *ptb1; } + return false; +} - if (tb) { - /* Move the TB to the head of the list */ - *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = *tb_hash_head; - *tb_hash_head = tb; - } - return tb; +static TranslationBlock *tb_find_physical(CPUState *cpu, + target_ulong pc, + target_ulong cs_base, + uint32_t flags) +{ + tb_page_addr_t phys_pc; + struct tb_desc desc; + uint32_t h; + + desc.env = (CPUArchState *)cpu->env_ptr; + desc.cs_base = cs_base; + desc.flags = flags; + desc.pc = pc; + phys_pc = get_page_addr_code(desc.env, pc); + desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; + h = tb_hash_func(phys_pc, pc, flags); + return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h); } static TranslationBlock *tb_find_slow(CPUState *cpu, @@ -249,13 +249,13 @@ int64_t cpu_get_clock(void) void cpu_enable_ticks(void) { /* Here, the really thing protected by seqlock is cpu_clock_offset. */ - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); if (!timers_state.cpu_ticks_enabled) { timers_state.cpu_ticks_offset -= cpu_get_host_ticks(); timers_state.cpu_clock_offset -= get_clock(); timers_state.cpu_ticks_enabled = 1; } - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); } /* disable cpu_get_ticks() : the clock is stopped. You must not call @@ -265,13 +265,13 @@ void cpu_enable_ticks(void) void cpu_disable_ticks(void) { /* Here, the really thing protected by seqlock is cpu_clock_offset. */ - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); if (timers_state.cpu_ticks_enabled) { timers_state.cpu_ticks_offset += cpu_get_host_ticks(); timers_state.cpu_clock_offset = cpu_get_clock_locked(); timers_state.cpu_ticks_enabled = 0; } - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); } /* Correlation between real and virtual time is always going to be @@ -294,7 +294,7 @@ static void icount_adjust(void) return; } - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); cur_time = cpu_get_clock_locked(); cur_icount = cpu_get_icount_locked(); @@ -315,7 +315,7 @@ static void icount_adjust(void) last_delta = delta; timers_state.qemu_icount_bias = cur_icount - (timers_state.qemu_icount << icount_time_shift); - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); } static void icount_adjust_rt(void *opaque) @@ -355,7 +355,7 @@ static void icount_warp_rt(void) return; } - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); if (runstate_is_running()) { int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock_locked()); @@ -374,7 +374,7 @@ static void icount_warp_rt(void) timers_state.qemu_icount_bias += warp_delta; } vm_clock_warp_start = -1; - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); @@ -399,9 +399,9 @@ void qtest_clock_warp(int64_t dest) int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t warp = qemu_soonest_timeout(dest - clock, deadline); - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); timers_state.qemu_icount_bias += warp; - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); @@ -468,9 +468,9 @@ void qemu_start_warp_timer(void) * It is useful when we want a deterministic execution time, * isolated from host latencies. */ - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); timers_state.qemu_icount_bias += deadline; - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } else { /* @@ -481,11 +481,11 @@ void qemu_start_warp_timer(void) * you will not be sending network packets continuously instead of * every 100ms. */ - seqlock_write_lock(&timers_state.vm_clock_seqlock); + seqlock_write_begin(&timers_state.vm_clock_seqlock); if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { vm_clock_warp_start = clock; } - seqlock_write_unlock(&timers_state.vm_clock_seqlock); + seqlock_write_end(&timers_state.vm_clock_seqlock); timer_mod_anticipate(icount_warp_timer, clock + deadline); } } else if (deadline == 0) { @@ -621,7 +621,7 @@ int cpu_throttle_get_percentage(void) void cpu_ticks_init(void) { - seqlock_init(&timers_state.vm_clock_seqlock, NULL); + seqlock_init(&timers_state.vm_clock_seqlock); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, cpu_throttle_timer_tick, NULL); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index e076397..c1f59fa 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -215,8 +215,6 @@ struct TranslationBlock { void *tc_ptr; /* pointer to the translated code */ uint8_t *tc_search; /* pointer to search data */ - /* next matching tb for physical address. */ - struct TranslationBlock *phys_hash_next; /* original tb when cflags has CF_NOCACHE */ struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index 5efe3d9..e209c1c 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -21,9 +21,10 @@ #define QEMU_TB_CONTEXT_H_ #include "qemu/thread.h" +#include "qemu/qht.h" -#define CODE_GEN_PHYS_HASH_BITS 15 -#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS) +#define CODE_GEN_HTABLE_BITS 15 +#define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS) typedef struct TranslationBlock TranslationBlock; typedef struct TBContext TBContext; @@ -31,7 +32,7 @@ typedef struct TBContext TBContext; struct TBContext { TranslationBlock *tbs; - TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + struct qht htable; int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h new file mode 100644 index 0000000..9f3fc05 --- /dev/null +++ b/include/exec/tb-hash-xx.h @@ -0,0 +1,94 @@ +/* + * xxHash - Fast Hash algorithm + * Copyright (C) 2012-2016, Yann Collet + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * + Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + */ +#ifndef EXEC_TB_HASH_XX +#define EXEC_TB_HASH_XX + +#include <qemu/bitops.h> + +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + +#define TB_HASH_XX_SEED 1 + +/* + * xxhash32, customized for input variables that are not guaranteed to be + * contiguous in memory. + */ +static inline +uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e) +{ + uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2; + uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2; + uint32_t v3 = TB_HASH_XX_SEED + 0; + uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1; + uint32_t a = a0 >> 32; + uint32_t b = a0; + uint32_t c = b0 >> 32; + uint32_t d = b0; + uint32_t h32; + + v1 += a * PRIME32_2; + v1 = rol32(v1, 13); + v1 *= PRIME32_1; + + v2 += b * PRIME32_2; + v2 = rol32(v2, 13); + v2 *= PRIME32_1; + + v3 += c * PRIME32_2; + v3 = rol32(v3, 13); + v3 *= PRIME32_1; + + v4 += d * PRIME32_2; + v4 = rol32(v4, 13); + v4 *= PRIME32_1; + + h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18); + h32 += 20; + + h32 += e * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + +#endif /* EXEC_TB_HASH_XX */ diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 0f4e8a0..1d0200b 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -20,6 +20,8 @@ #ifndef EXEC_TB_HASH #define EXEC_TB_HASH +#include "exec/tb-hash-xx.h" + /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for addresses on the same page. The top bits are the same. This allows TLB invalidation to quickly clear a subset of the hash table. */ @@ -43,9 +45,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) | (tmp & TB_JMP_ADDR_MASK)); } -static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc) +static inline +uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags) { - return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1); + return tb_hash_func5(phys_pc, pc, flags); } #endif diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 8f1cc7b..b64f899 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -41,6 +41,8 @@ # define QEMU_PACKED __attribute__((packed)) #endif +#define QEMU_ALIGNED(X) __attribute__((aligned(X))) + #ifndef glue #define xglue(x, y) x ## y #define glue(x, y) xglue(x, y) diff --git a/include/qemu/processor.h b/include/qemu/processor.h new file mode 100644 index 0000000..8b25702 --- /dev/null +++ b/include/qemu/processor.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_PROCESSOR_H +#define QEMU_PROCESSOR_H + +#include "qemu/atomic.h" + +#if defined(__i386__) || defined(__x86_64__) +# define cpu_relax() asm volatile("rep; nop" ::: "memory") + +#elif defined(__ia64__) +# define cpu_relax() asm volatile("hint @pause" ::: "memory") + +#elif defined(__aarch64__) +# define cpu_relax() asm volatile("yield" ::: "memory") + +#elif defined(__powerpc64__) +/* set Hardware Multi-Threading (HMT) priority to low; then back to medium */ +# define cpu_relax() asm volatile("or 1, 1, 1;" \ + "or 2, 2, 2;" ::: "memory") + +#else +# define cpu_relax() barrier() +#endif + +#endif /* QEMU_PROCESSOR_H */ diff --git a/include/qemu/qdist.h b/include/qemu/qdist.h new file mode 100644 index 0000000..f30050c --- /dev/null +++ b/include/qemu/qdist.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_QDIST_H +#define QEMU_QDIST_H + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/bitops.h" + +/* + * Samples with the same 'x value' end up in the same qdist_entry, + * e.g. inc(0.1) and inc(0.1) end up as {x=0.1, count=2}. + * + * Binning happens only at print time, so that we retain the flexibility to + * choose the binning. This might not be ideal for workloads that do not care + * much about precision and insert many samples all with different x values; + * in that case, pre-binning (e.g. entering both 0.115 and 0.097 as 0.1) + * should be considered. + */ +struct qdist_entry { + double x; + unsigned long count; +}; + +struct qdist { + struct qdist_entry *entries; + size_t n; + size_t size; +}; + +#define QDIST_PR_BORDER BIT(0) +#define QDIST_PR_LABELS BIT(1) +/* the remaining options only work if PR_LABELS is set */ +#define QDIST_PR_NODECIMAL BIT(2) +#define QDIST_PR_PERCENT BIT(3) +#define QDIST_PR_100X BIT(4) +#define QDIST_PR_NOBINRANGE BIT(5) + +void qdist_init(struct qdist *dist); +void qdist_destroy(struct qdist *dist); + +void qdist_add(struct qdist *dist, double x, long count); +void qdist_inc(struct qdist *dist, double x); +double qdist_xmin(const struct qdist *dist); +double qdist_xmax(const struct qdist *dist); +double qdist_avg(const struct qdist *dist); +unsigned long qdist_sample_count(const struct qdist *dist); +size_t qdist_unique_entries(const struct qdist *dist); + +/* callers must free the returned string with g_free() */ +char *qdist_pr_plain(const struct qdist *dist, size_t n_groups); + +/* callers must free the returned string with g_free() */ +char *qdist_pr(const struct qdist *dist, size_t n_groups, uint32_t opt); + +/* Only qdist code and test code should ever call this function */ +void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n); + +#endif /* QEMU_QDIST_H */ diff --git a/include/qemu/qht.h b/include/qemu/qht.h new file mode 100644 index 0000000..aec60aa --- /dev/null +++ b/include/qemu/qht.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_QHT_H +#define QEMU_QHT_H + +#include "qemu/osdep.h" +#include "qemu/seqlock.h" +#include "qemu/thread.h" +#include "qemu/qdist.h" + +struct qht { + struct qht_map *map; + QemuMutex lock; /* serializes setters of ht->map */ + unsigned int mode; +}; + +/** + * struct qht_stats - Statistics of a QHT + * @head_buckets: number of head buckets + * @used_head_buckets: number of non-empty head buckets + * @entries: total number of entries + * @chain: frequency distribution representing the number of buckets in each + * chain, excluding empty chains. + * @occupancy: frequency distribution representing chain occupancy rate. + * Valid range: from 0.0 (empty) to 1.0 (full occupancy). + * + * An entry is a pointer-hash pair. + * Each bucket can host several entries. + * Chains are chains of buckets, whose first link is always a head bucket. + */ +struct qht_stats { + size_t head_buckets; + size_t used_head_buckets; + size_t entries; + struct qdist chain; + struct qdist occupancy; +}; + +typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp); +typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up); + +#define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */ + +/** + * qht_init - Initialize a QHT + * @ht: QHT to be initialized + * @n_elems: number of entries the hash table should be optimized for. + * @mode: bitmask with OR'ed QHT_MODE_* + */ +void qht_init(struct qht *ht, size_t n_elems, unsigned int mode); + +/** + * qht_destroy - destroy a previously initialized QHT + * @ht: QHT to be destroyed + * + * Call only when there are no readers/writers left. + */ +void qht_destroy(struct qht *ht); + +/** + * qht_insert - Insert a pointer into the hash table + * @ht: QHT to insert to + * @p: pointer to be inserted + * @hash: hash corresponding to @p + * + * Attempting to insert a NULL @p is a bug. + * Inserting the same pointer @p with different @hash values is a bug. + * + * Returns true on sucess. + * Returns false if the @p-@hash pair already exists in the hash table. + */ +bool qht_insert(struct qht *ht, void *p, uint32_t hash); + +/** + * qht_lookup - Look up a pointer in a QHT + * @ht: QHT to be looked up + * @func: function to compare existing pointers against @userp + * @userp: pointer to pass to @func + * @hash: hash of the pointer to be looked up + * + * Needs to be called under an RCU read-critical section. + * + * The user-provided @func compares pointers in QHT against @userp. + * If the function returns true, a match has been found. + * + * Returns the corresponding pointer when a match is found. + * Returns NULL otherwise. + */ +void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp, + uint32_t hash); + +/** + * qht_remove - remove a pointer from the hash table + * @ht: QHT to remove from + * @p: pointer to be removed + * @hash: hash corresponding to @p + * + * Attempting to remove a NULL @p is a bug. + * + * Just-removed @p pointers cannot be immediately freed; they need to remain + * valid until the end of the RCU grace period in which qht_remove() is called. + * This guarantees that concurrent lookups will always compare against valid + * data. + * + * Returns true on success. + * Returns false if the @p-@hash pair was not found. + */ +bool qht_remove(struct qht *ht, const void *p, uint32_t hash); + +/** + * qht_reset - reset a QHT + * @ht: QHT to be reset + * + * All entries in the hash table are reset. No resizing is performed. + * + * If concurrent readers may exist, the objects pointed to by the hash table + * must remain valid for the existing RCU grace period -- see qht_remove(). + * See also: qht_reset_size() + */ +void qht_reset(struct qht *ht); + +/** + * qht_reset_size - reset and resize a QHT + * @ht: QHT to be reset and resized + * @n_elems: number of entries the resized hash table should be optimized for. + * + * Returns true if the resize was necessary and therefore performed. + * Returns false otherwise. + * + * If concurrent readers may exist, the objects pointed to by the hash table + * must remain valid for the existing RCU grace period -- see qht_remove(). + * See also: qht_reset(), qht_resize(). + */ +bool qht_reset_size(struct qht *ht, size_t n_elems); + +/** + * qht_resize - resize a QHT + * @ht: QHT to be resized + * @n_elems: number of entries the resized hash table should be optimized for + * + * Returns true on success. + * Returns false if the resize was not necessary and therefore not performed. + * See also: qht_reset_size(). + */ +bool qht_resize(struct qht *ht, size_t n_elems); + +/** + * qht_iter - Iterate over a QHT + * @ht: QHT to be iterated over + * @func: function to be called for each entry in QHT + * @userp: additional pointer to be passed to @func + * + * Each time it is called, user-provided @func is passed a pointer-hash pair, + * plus @userp. + */ +void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp); + +/** + * qht_statistics_init - Gather statistics from a QHT + * @ht: QHT to gather statistics from + * @stats: pointer to a struct qht_stats to be filled in + * + * Does NOT need to be called under an RCU read-critical section, + * since it does not dereference any pointers stored in the hash table. + * + * When done with @stats, pass the struct to qht_statistics_destroy(). + * Failing to do this will leak memory. + */ +void qht_statistics_init(struct qht *ht, struct qht_stats *stats); + +/** + * qht_statistics_destroy - Destroy a struct qht_stats + * @stats: stuct qht_stats to be destroyed + * + * See also: qht_statistics_init(). + */ +void qht_statistics_destroy(struct qht_stats *stats); + +#endif /* QEMU_QHT_H */ diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h index 70b01fd..4dfc055 100644 --- a/include/qemu/seqlock.h +++ b/include/qemu/seqlock.h @@ -19,37 +19,29 @@ typedef struct QemuSeqLock QemuSeqLock; struct QemuSeqLock { - QemuMutex *mutex; unsigned sequence; }; -static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex) +static inline void seqlock_init(QemuSeqLock *sl) { - sl->mutex = mutex; sl->sequence = 0; } /* Lock out other writers and update the count. */ -static inline void seqlock_write_lock(QemuSeqLock *sl) +static inline void seqlock_write_begin(QemuSeqLock *sl) { - if (sl->mutex) { - qemu_mutex_lock(sl->mutex); - } ++sl->sequence; /* Write sequence before updating other fields. */ smp_wmb(); } -static inline void seqlock_write_unlock(QemuSeqLock *sl) +static inline void seqlock_write_end(QemuSeqLock *sl) { /* Write other fields before finalizing sequence. */ smp_wmb(); ++sl->sequence; - if (sl->mutex) { - qemu_mutex_unlock(sl->mutex); - } } static inline unsigned seqlock_read_begin(QemuSeqLock *sl) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index bdae6df..c5d71cf8 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -1,6 +1,8 @@ #ifndef __QEMU_THREAD_H #define __QEMU_THREAD_H 1 +#include "qemu/processor.h" +#include "qemu/atomic.h" typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; @@ -60,4 +62,37 @@ struct Notifier; void qemu_thread_atexit_add(struct Notifier *notifier); void qemu_thread_atexit_remove(struct Notifier *notifier); +typedef struct QemuSpin { + int value; +} QemuSpin; + +static inline void qemu_spin_init(QemuSpin *spin) +{ + __sync_lock_release(&spin->value); +} + +static inline void qemu_spin_lock(QemuSpin *spin) +{ + while (unlikely(__sync_lock_test_and_set(&spin->value, true))) { + while (atomic_read(&spin->value)) { + cpu_relax(); + } + } +} + +static inline bool qemu_spin_trylock(QemuSpin *spin) +{ + return __sync_lock_test_and_set(&spin->value, true); +} + +static inline bool qemu_spin_locked(QemuSpin *spin) +{ + return atomic_read(&spin->value); +} + +static inline void qemu_spin_unlock(QemuSpin *spin) +{ + __sync_lock_release(&spin->value); +} + #endif diff --git a/tests/.gitignore b/tests/.gitignore index a06a8ba..840ea39 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -7,6 +7,7 @@ check-qnull check-qstring check-qom-interface check-qom-proplist +qht-bench rcutorture test-aio test-base64 @@ -48,7 +49,10 @@ test-qapi-types.[ch] test-qapi-visit.[ch] test-qdev-global-props test-qemu-opts +test-qdist test-qga +test-qht +test-qht-par test-qmp-commands test-qmp-commands.h test-qmp-event diff --git a/tests/Makefile.include b/tests/Makefile.include index a3e20e3..7d63d16 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -70,6 +70,12 @@ check-unit-y += tests/rcutorture$(EXESUF) gcov-files-rcutorture-y = util/rcu.c check-unit-y += tests/test-rcu-list$(EXESUF) gcov-files-test-rcu-list-y = util/rcu.c +check-unit-y += tests/test-qdist$(EXESUF) +gcov-files-test-qdist-y = util/qdist.c +check-unit-y += tests/test-qht$(EXESUF) +gcov-files-test-qht-y = util/qht.c +check-unit-y += tests/test-qht-par$(EXESUF) +gcov-files-test-qht-par-y = util/qht.c check-unit-y += tests/test-bitops$(EXESUF) check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF) check-unit-y += tests/check-qom-interface$(EXESUF) @@ -394,7 +400,9 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o \ tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \ tests/test-opts-visitor.o tests/test-qmp-event.o \ - tests/rcutorture.o tests/test-rcu-list.o + tests/rcutorture.o tests/test-rcu-list.o \ + tests/test-qdist.o \ + tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o $(test-obj-y): QEMU_INCLUDES += -Itests QEMU_CFLAGS += -I$(SRC_PATH)/tests @@ -433,6 +441,10 @@ tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o tests/test-int128$(EXESUF): tests/test-int128.o tests/rcutorture$(EXESUF): tests/rcutorture.o $(test-util-obj-y) tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o $(test-util-obj-y) +tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y) +tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y) +tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y) +tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y) tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ diff --git a/tests/qht-bench.c b/tests/qht-bench.c new file mode 100644 index 0000000..ad8efbc --- /dev/null +++ b/tests/qht-bench.c @@ -0,0 +1,488 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include <glib.h> +#include "qemu/processor.h" +#include "qemu/atomic.h" +#include "qemu/qht.h" +#include "qemu/rcu.h" +#include "exec/tb-hash-xx.h" + +struct thread_stats { + size_t rd; + size_t not_rd; + size_t in; + size_t not_in; + size_t rm; + size_t not_rm; + size_t rz; + size_t not_rz; +}; + +struct thread_info { + void (*func)(struct thread_info *); + struct thread_stats stats; + uint64_t r; + bool write_op; /* writes alternate between insertions and removals */ + bool resize_down; +} QEMU_ALIGNED(64); /* avoid false sharing among threads */ + +static struct qht ht; +static QemuThread *rw_threads; + +#define DEFAULT_RANGE (4096) +#define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE + +static unsigned int duration = 1; +static unsigned int n_rw_threads = 1; +static unsigned long lookup_range = DEFAULT_RANGE; +static unsigned long update_range = DEFAULT_RANGE; +static size_t init_range = DEFAULT_RANGE; +static size_t init_size = DEFAULT_RANGE; +static size_t n_ready_threads; +static long populate_offset; +static long *keys; + +static size_t resize_min; +static size_t resize_max; +static struct thread_info *rz_info; +static unsigned long resize_delay = 1000; +static double resize_rate; /* 0.0 to 1.0 */ +static unsigned int n_rz_threads = 1; +static QemuThread *rz_threads; + +static double update_rate; /* 0.0 to 1.0 */ +static uint64_t update_threshold; +static uint64_t resize_threshold; + +static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; +static int qht_mode; + +static bool test_start; +static bool test_stop; + +static struct thread_info *rw_info; + +static const char commands_string[] = + " -d = duration, in seconds\n" + " -n = number of threads\n" + "\n" + " -o = offset at which keys start\n" + "\n" + " -g = set -s,-k,-K,-l,-r to the same value\n" + " -s = initial size hint\n" + " -k = initial number of keys\n" + " -K = initial range of keys (will be rounded up to pow2)\n" + " -l = lookup range of keys (will be rounded up to pow2)\n" + " -r = update range of keys (will be rounded up to pow2)\n" + "\n" + " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" + "\n" + " -R = enable auto-resize\n" + " -S = resize rate (0.0 to 100.0)\n" + " -D = delay (in us) between potential resizes\n" + " -N = number of resize threads"; + +static void usage_complete(int argc, char *argv[]) +{ + fprintf(stderr, "Usage: %s [options]\n", argv[0]); + fprintf(stderr, "options:\n%s\n", commands_string); + exit(-1); +} + +static bool is_equal(const void *obj, const void *userp) +{ + const long *a = obj; + const long *b = userp; + + return *a == *b; +} + +static inline uint32_t h(unsigned long v) +{ + return tb_hash_func5(v, 0, 0); +} + +/* + * From: https://en.wikipedia.org/wiki/Xorshift + * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only + * guaranteed to be >= INT_MAX). + */ +static uint64_t xorshift64star(uint64_t x) +{ + x ^= x >> 12; /* a */ + x ^= x << 25; /* b */ + x ^= x >> 27; /* c */ + return x * UINT64_C(2685821657736338717); +} + +static void do_rz(struct thread_info *info) +{ + struct thread_stats *stats = &info->stats; + + if (info->r < resize_threshold) { + size_t size = info->resize_down ? resize_min : resize_max; + bool resized; + + resized = qht_resize(&ht, size); + info->resize_down = !info->resize_down; + + if (resized) { + stats->rz++; + } else { + stats->not_rz++; + } + } + g_usleep(resize_delay); +} + +static void do_rw(struct thread_info *info) +{ + struct thread_stats *stats = &info->stats; + uint32_t hash; + long *p; + + if (info->r >= update_threshold) { + bool read; + + p = &keys[info->r & (lookup_range - 1)]; + hash = h(*p); + read = qht_lookup(&ht, is_equal, p, hash); + if (read) { + stats->rd++; + } else { + stats->not_rd++; + } + } else { + p = &keys[info->r & (update_range - 1)]; + hash = h(*p); + if (info->write_op) { + bool written = false; + + if (qht_lookup(&ht, is_equal, p, hash) == NULL) { + written = qht_insert(&ht, p, hash); + } + if (written) { + stats->in++; + } else { + stats->not_in++; + } + } else { + bool removed = false; + + if (qht_lookup(&ht, is_equal, p, hash)) { + removed = qht_remove(&ht, p, hash); + } + if (removed) { + stats->rm++; + } else { + stats->not_rm++; + } + } + info->write_op = !info->write_op; + } +} + +static void *thread_func(void *p) +{ + struct thread_info *info = p; + + rcu_register_thread(); + + atomic_inc(&n_ready_threads); + while (!atomic_mb_read(&test_start)) { + cpu_relax(); + } + + rcu_read_lock(); + while (!atomic_read(&test_stop)) { + info->r = xorshift64star(info->r); + info->func(info); + } + rcu_read_unlock(); + + rcu_unregister_thread(); + return NULL; +} + +/* sets everything except info->func */ +static void prepare_thread_info(struct thread_info *info, int i) +{ + /* seed for the RNG; each thread should have a different one */ + info->r = (i + 1) ^ time(NULL); + /* the first update will be a write */ + info->write_op = true; + /* the first resize will be down */ + info->resize_down = true; + + memset(&info->stats, 0, sizeof(info->stats)); +} + +static void +th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, + void (*func)(struct thread_info *), int offset, int n) +{ + struct thread_info *info; + QemuThread *th; + int i; + + th = g_malloc(sizeof(*th) * n); + *threads = th; + + info = qemu_memalign(64, sizeof(*info) * n); + *infos = info; + + for (i = 0; i < n; i++) { + prepare_thread_info(&info[i], offset + i); + info[i].func = func; + qemu_thread_create(&th[i], name, thread_func, &info[i], + QEMU_THREAD_JOINABLE); + } +} + +static void create_threads(void) +{ + th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); + th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); +} + +static void pr_params(void) +{ + printf("Parameters:\n"); + printf(" duration: %d s\n", duration); + printf(" # of threads: %u\n", n_rw_threads); + printf(" initial # of keys: %zu\n", init_size); + printf(" initial size hint: %zu\n", qht_n_elems); + printf(" auto-resize: %s\n", + qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); + if (resize_rate) { + printf(" resize_rate: %f%%\n", resize_rate * 100.0); + printf(" resize range: %zu-%zu\n", resize_min, resize_max); + printf(" # resize threads %u\n", n_rz_threads); + } + printf(" update rate: %f%%\n", update_rate * 100.0); + printf(" offset: %ld\n", populate_offset); + printf(" initial key range: %zu\n", init_range); + printf(" lookup range: %lu\n", lookup_range); + printf(" update range: %lu\n", update_range); +} + +static void do_threshold(double rate, uint64_t *threshold) +{ + if (rate == 1.0) { + *threshold = UINT64_MAX; + } else { + *threshold = rate * UINT64_MAX; + } +} + +static void htable_init(void) +{ + unsigned long n = MAX(init_range, update_range); + uint64_t r = time(NULL); + size_t retries = 0; + size_t i; + + /* avoid allocating memory later by allocating all the keys now */ + keys = g_malloc(sizeof(*keys) * n); + for (i = 0; i < n; i++) { + keys[i] = populate_offset + i; + } + + /* some sanity checks */ + g_assert_cmpuint(lookup_range, <=, n); + + /* compute thresholds */ + do_threshold(update_rate, &update_threshold); + do_threshold(resize_rate, &resize_threshold); + + if (resize_rate) { + resize_min = n / 2; + resize_max = n; + assert(resize_min < resize_max); + } else { + n_rz_threads = 0; + } + + /* initialize the hash table */ + qht_init(&ht, qht_n_elems, qht_mode); + assert(init_size <= init_range); + + pr_params(); + + fprintf(stderr, "Initialization: populating %zu items...", init_size); + for (i = 0; i < init_size; i++) { + for (;;) { + uint32_t hash; + long *p; + + r = xorshift64star(r); + p = &keys[r & (init_range - 1)]; + hash = h(*p); + if (qht_insert(&ht, p, hash)) { + break; + } + retries++; + } + } + fprintf(stderr, " populated after %zu retries\n", retries); +} + +static void add_stats(struct thread_stats *s, struct thread_info *info, int n) +{ + int i; + + for (i = 0; i < n; i++) { + struct thread_stats *stats = &info[i].stats; + + s->rd += stats->rd; + s->not_rd += stats->not_rd; + + s->in += stats->in; + s->not_in += stats->not_in; + + s->rm += stats->rm; + s->not_rm += stats->not_rm; + + s->rz += stats->rz; + s->not_rz += stats->not_rz; + } +} + +static void pr_stats(void) +{ + struct thread_stats s = {}; + double tx; + + add_stats(&s, rw_info, n_rw_threads); + add_stats(&s, rz_info, n_rz_threads); + + printf("Results:\n"); + + if (resize_rate) { + printf(" Resizes: %zu (%.2f%% of %zu)\n", + s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); + } + + printf(" Read: %.2f M (%.2f%% of %.2fM)\n", + (double)s.rd / 1e6, + (double)s.rd / (s.rd + s.not_rd) * 100, + (double)(s.rd + s.not_rd) / 1e6); + printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", + (double)s.in / 1e6, + (double)s.in / (s.in + s.not_in) * 100, + (double)(s.in + s.not_in) / 1e6); + printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", + (double)s.rm / 1e6, + (double)s.rm / (s.rm + s.not_rm) * 100, + (double)(s.rm + s.not_rm) / 1e6); + + tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; + printf(" Throughput: %.2f MT/s\n", tx); + printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); +} + +static void run_test(void) +{ + unsigned int remaining; + int i; + + while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { + cpu_relax(); + } + atomic_mb_set(&test_start, true); + do { + remaining = sleep(duration); + } while (remaining); + atomic_mb_set(&test_stop, true); + + for (i = 0; i < n_rw_threads; i++) { + qemu_thread_join(&rw_threads[i]); + } + for (i = 0; i < n_rz_threads; i++) { + qemu_thread_join(&rz_threads[i]); + } +} + +static void parse_args(int argc, char *argv[]) +{ + int c; + + for (;;) { + c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:r:Rs:S:u:"); + if (c < 0) { + break; + } + switch (c) { + case 'd': + duration = atoi(optarg); + break; + case 'D': + resize_delay = atol(optarg); + break; + case 'g': + init_range = pow2ceil(atol(optarg)); + lookup_range = pow2ceil(atol(optarg)); + update_range = pow2ceil(atol(optarg)); + qht_n_elems = atol(optarg); + init_size = atol(optarg); + break; + case 'h': + usage_complete(argc, argv); + exit(0); + case 'k': + init_size = atol(optarg); + break; + case 'K': + init_range = pow2ceil(atol(optarg)); + break; + case 'l': + lookup_range = pow2ceil(atol(optarg)); + break; + case 'n': + n_rw_threads = atoi(optarg); + break; + case 'N': + n_rz_threads = atoi(optarg); + break; + case 'o': + populate_offset = atol(optarg); + break; + case 'r': + update_range = pow2ceil(atol(optarg)); + break; + case 'R': + qht_mode |= QHT_MODE_AUTO_RESIZE; + break; + case 's': + qht_n_elems = atol(optarg); + break; + case 'S': + resize_rate = atof(optarg) / 100.0; + if (resize_rate > 1.0) { + resize_rate = 1.0; + } + break; + case 'u': + update_rate = atof(optarg) / 100.0; + if (update_rate > 1.0) { + update_rate = 1.0; + } + break; + } + } +} + +int main(int argc, char *argv[]) +{ + parse_args(argc, argv); + htable_init(); + create_threads(); + run_test(); + pr_stats(); + return 0; +} diff --git a/tests/test-qdist.c b/tests/test-qdist.c new file mode 100644 index 0000000..a67f260 --- /dev/null +++ b/tests/test-qdist.c @@ -0,0 +1,384 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include <glib.h> +#include "qemu/qdist.h" + +#include <math.h> + +struct entry_desc { + double x; + unsigned long count; + + /* 0 prints a space, 1-8 prints from qdist_blocks[] */ + int fill_code; +}; + +/* See: https://en.wikipedia.org/wiki/Block_Elements */ +static const gunichar qdist_blocks[] = { + 0x2581, + 0x2582, + 0x2583, + 0x2584, + 0x2585, + 0x2586, + 0x2587, + 0x2588 +}; + +#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) + +static char *pr_hist(const struct entry_desc *darr, size_t n) +{ + GString *s = g_string_new(""); + size_t i; + + for (i = 0; i < n; i++) { + int fill = darr[i].fill_code; + + if (fill) { + assert(fill <= QDIST_NR_BLOCK_CODES); + g_string_append_unichar(s, qdist_blocks[fill - 1]); + } else { + g_string_append_c(s, ' '); + } + } + return g_string_free(s, FALSE); +} + +static void +histogram_check(const struct qdist *dist, const struct entry_desc *darr, + size_t n, size_t n_bins) +{ + char *pr = qdist_pr_plain(dist, n_bins); + char *str = pr_hist(darr, n); + + g_assert_cmpstr(pr, ==, str); + g_free(pr); + g_free(str); +} + +static void histogram_check_single_full(const struct qdist *dist, size_t n_bins) +{ + struct entry_desc desc = { .fill_code = 8 }; + + histogram_check(dist, &desc, 1, n_bins); +} + +static void +entries_check(const struct qdist *dist, const struct entry_desc *darr, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + struct qdist_entry *e = &dist->entries[i]; + + g_assert_cmpuint(e->count, ==, darr[i].count); + } +} + +static void +entries_insert(struct qdist *dist, const struct entry_desc *darr, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + qdist_add(dist, darr[i].x, darr[i].count); + } +} + +static void do_test_bin(const struct entry_desc *a, size_t n_a, + const struct entry_desc *b, size_t n_b) +{ + struct qdist qda; + struct qdist qdb; + + qdist_init(&qda); + + entries_insert(&qda, a, n_a); + qdist_inc(&qda, a[0].x); + qdist_add(&qda, a[0].x, -1); + + g_assert_cmpuint(qdist_unique_entries(&qda), ==, n_a); + g_assert_cmpfloat(qdist_xmin(&qda), ==, a[0].x); + g_assert_cmpfloat(qdist_xmax(&qda), ==, a[n_a - 1].x); + histogram_check(&qda, a, n_a, 0); + histogram_check(&qda, a, n_a, n_a); + + qdist_bin__internal(&qdb, &qda, n_b); + g_assert_cmpuint(qdb.n, ==, n_b); + entries_check(&qdb, b, n_b); + g_assert_cmpuint(qdist_sample_count(&qda), ==, qdist_sample_count(&qdb)); + /* + * No histogram_check() for $qdb, since we'd rebin it and that is a bug. + * Instead, regenerate it from $qda. + */ + histogram_check(&qda, b, n_b, n_b); + + qdist_destroy(&qdb); + qdist_destroy(&qda); +} + +static void do_test_pr(uint32_t opt) +{ + static const struct entry_desc desc[] = { + [0] = { 1, 900, 8 }, + [1] = { 2, 1, 1 }, + [2] = { 3, 2, 1 } + }; + static const char border[] = "|"; + const char *llabel = NULL; + const char *rlabel = NULL; + struct qdist dist; + GString *s; + char *str; + char *pr; + size_t n; + + n = ARRAY_SIZE(desc); + qdist_init(&dist); + + entries_insert(&dist, desc, n); + histogram_check(&dist, desc, n, 0); + + s = g_string_new(""); + + if (opt & QDIST_PR_LABELS) { + unsigned int lopts = opt & (QDIST_PR_NODECIMAL | + QDIST_PR_PERCENT | + QDIST_PR_100X | + QDIST_PR_NOBINRANGE); + + if (lopts == 0) { + llabel = "[1.0,1.7)"; + rlabel = "[2.3,3.0]"; + } else if (lopts == QDIST_PR_NODECIMAL) { + llabel = "[1,2)"; + rlabel = "[2,3]"; + } else if (lopts == (QDIST_PR_PERCENT | QDIST_PR_NODECIMAL)) { + llabel = "[1,2)%"; + rlabel = "[2,3]%"; + } else if (lopts == QDIST_PR_100X) { + llabel = "[100.0,166.7)"; + rlabel = "[233.3,300.0]"; + } else if (lopts == (QDIST_PR_NOBINRANGE | QDIST_PR_NODECIMAL)) { + llabel = "1"; + rlabel = "3"; + } else { + g_assert_cmpstr("BUG", ==, "This is not meant to be exhaustive"); + } + } + + if (llabel) { + g_string_append(s, llabel); + } + if (opt & QDIST_PR_BORDER) { + g_string_append(s, border); + } + + str = pr_hist(desc, n); + g_string_append(s, str); + g_free(str); + + if (opt & QDIST_PR_BORDER) { + g_string_append(s, border); + } + if (rlabel) { + g_string_append(s, rlabel); + } + + str = g_string_free(s, FALSE); + pr = qdist_pr(&dist, n, opt); + g_assert_cmpstr(pr, ==, str); + g_free(pr); + g_free(str); + + qdist_destroy(&dist); +} + +static inline void do_test_pr_label(uint32_t opt) +{ + opt |= QDIST_PR_LABELS; + do_test_pr(opt); +} + +static void test_pr(void) +{ + do_test_pr(0); + + do_test_pr(QDIST_PR_BORDER); + + /* 100X should be ignored because we're not setting LABELS */ + do_test_pr(QDIST_PR_100X); + + do_test_pr_label(0); + do_test_pr_label(QDIST_PR_NODECIMAL); + do_test_pr_label(QDIST_PR_PERCENT | QDIST_PR_NODECIMAL); + do_test_pr_label(QDIST_PR_100X); + do_test_pr_label(QDIST_PR_NOBINRANGE | QDIST_PR_NODECIMAL); +} + +static void test_bin_shrink(void) +{ + static const struct entry_desc a[] = { + [0] = { 0.0, 42922, 7 }, + [1] = { 0.25, 47834, 8 }, + [2] = { 0.50, 26628, 0 }, + [3] = { 0.625, 597, 4 }, + [4] = { 0.75, 10298, 1 }, + [5] = { 0.875, 22, 2 }, + [6] = { 1.0, 2771, 1 } + }; + static const struct entry_desc b[] = { + [0] = { 0.0, 42922, 7 }, + [1] = { 0.25, 47834, 8 }, + [2] = { 0.50, 27225, 3 }, + [3] = { 0.75, 13091, 1 } + }; + + return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b)); +} + +static void test_bin_expand(void) +{ + static const struct entry_desc a[] = { + [0] = { 0.0, 11713, 5 }, + [1] = { 0.25, 20294, 0 }, + [2] = { 0.50, 17266, 8 }, + [3] = { 0.625, 1506, 0 }, + [4] = { 0.75, 10355, 6 }, + [5] = { 0.833, 2, 1 }, + [6] = { 0.875, 99, 4 }, + [7] = { 1.0, 4301, 2 } + }; + static const struct entry_desc b[] = { + [0] = { 0.0, 11713, 5 }, + [1] = { 0.0, 0, 0 }, + [2] = { 0.0, 20294, 8 }, + [3] = { 0.0, 0, 0 }, + [4] = { 0.0, 0, 0 }, + [5] = { 0.0, 17266, 6 }, + [6] = { 0.0, 1506, 1 }, + [7] = { 0.0, 10355, 4 }, + [8] = { 0.0, 101, 1 }, + [9] = { 0.0, 4301, 2 } + }; + + return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b)); +} + +static void test_bin_precision(void) +{ + static const struct entry_desc a[] = { + [0] = { 0, 213549, 8 }, + [1] = { 1, 70, 1 }, + }; + static const struct entry_desc b[] = { + [0] = { 0, 213549, 8 }, + [1] = { 0, 70, 1 }, + }; + + return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b)); +} + +static void test_bin_simple(void) +{ + static const struct entry_desc a[] = { + [0] = { 10, 101, 8 }, + [1] = { 11, 0, 0 }, + [2] = { 12, 2, 1 } + }; + static const struct entry_desc b[] = { + [0] = { 0, 101, 8 }, + [1] = { 0, 0, 0 }, + [2] = { 0, 0, 0 }, + [3] = { 0, 0, 0 }, + [4] = { 0, 2, 1 } + }; + + return do_test_bin(a, ARRAY_SIZE(a), b, ARRAY_SIZE(b)); +} + +static void test_single_full(void) +{ + struct qdist dist; + + qdist_init(&dist); + + qdist_add(&dist, 3, 102); + g_assert_cmpfloat(qdist_avg(&dist), ==, 3); + g_assert_cmpfloat(qdist_xmin(&dist), ==, 3); + g_assert_cmpfloat(qdist_xmax(&dist), ==, 3); + + histogram_check_single_full(&dist, 0); + histogram_check_single_full(&dist, 1); + histogram_check_single_full(&dist, 10); + + qdist_destroy(&dist); +} + +static void test_single_empty(void) +{ + struct qdist dist; + char *pr; + + qdist_init(&dist); + + qdist_add(&dist, 3, 0); + g_assert_cmpuint(qdist_sample_count(&dist), ==, 0); + g_assert(isnan(qdist_avg(&dist))); + g_assert_cmpfloat(qdist_xmin(&dist), ==, 3); + g_assert_cmpfloat(qdist_xmax(&dist), ==, 3); + + pr = qdist_pr_plain(&dist, 0); + g_assert_cmpstr(pr, ==, " "); + g_free(pr); + + pr = qdist_pr_plain(&dist, 1); + g_assert_cmpstr(pr, ==, " "); + g_free(pr); + + pr = qdist_pr_plain(&dist, 2); + g_assert_cmpstr(pr, ==, " "); + g_free(pr); + + qdist_destroy(&dist); +} + +static void test_none(void) +{ + struct qdist dist; + char *pr; + + qdist_init(&dist); + + g_assert(isnan(qdist_avg(&dist))); + g_assert(isnan(qdist_xmin(&dist))); + g_assert(isnan(qdist_xmax(&dist))); + + pr = qdist_pr_plain(&dist, 0); + g_assert(pr == NULL); + + pr = qdist_pr_plain(&dist, 2); + g_assert(pr == NULL); + + qdist_destroy(&dist); +} + +int main(int argc, char *argv[]) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/qdist/none", test_none); + g_test_add_func("/qdist/single/empty", test_single_empty); + g_test_add_func("/qdist/single/full", test_single_full); + g_test_add_func("/qdist/binning/simple", test_bin_simple); + g_test_add_func("/qdist/binning/precision", test_bin_precision); + g_test_add_func("/qdist/binning/expand", test_bin_expand); + g_test_add_func("/qdist/binning/shrink", test_bin_shrink); + g_test_add_func("/qdist/pr", test_pr); + return g_test_run(); +} diff --git a/tests/test-qht-par.c b/tests/test-qht-par.c new file mode 100644 index 0000000..f09e004 --- /dev/null +++ b/tests/test-qht-par.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include <glib.h> + +#define TEST_QHT_STRING "tests/qht-bench 1>/dev/null 2>&1 -R -S0.1 -D10000 -N1 " + +static void test_qht(int n_threads, int update_rate, int duration) +{ + char *str; + int rc; + + str = g_strdup_printf(TEST_QHT_STRING "-n %d -u %d -d %d", + n_threads, update_rate, duration); + rc = system(str); + g_free(str); + g_assert_cmpint(rc, ==, 0); +} + +static void test_2th0u1s(void) +{ + test_qht(2, 0, 1); +} + +static void test_2th20u1s(void) +{ + test_qht(2, 20, 1); +} + +static void test_2th0u5s(void) +{ + test_qht(2, 0, 5); +} + +static void test_2th20u5s(void) +{ + test_qht(2, 20, 5); +} + +int main(int argc, char *argv[]) +{ + g_test_init(&argc, &argv, NULL); + + if (g_test_quick()) { + g_test_add_func("/qht/parallel/2threads-0%updates-1s", test_2th0u1s); + g_test_add_func("/qht/parallel/2threads-20%updates-1s", test_2th20u1s); + } else { + g_test_add_func("/qht/parallel/2threads-0%updates-5s", test_2th0u5s); + g_test_add_func("/qht/parallel/2threads-20%updates-5s", test_2th20u5s); + } + return g_test_run(); +} diff --git a/tests/test-qht.c b/tests/test-qht.c new file mode 100644 index 0000000..c8eb930 --- /dev/null +++ b/tests/test-qht.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include <glib.h> +#include "qemu/qht.h" + +#define N 5000 + +static struct qht ht; +static int32_t arr[N * 2]; + +static bool is_equal(const void *obj, const void *userp) +{ + const int32_t *a = obj; + const int32_t *b = userp; + + return *a == *b; +} + +static void insert(int a, int b) +{ + int i; + + for (i = a; i < b; i++) { + uint32_t hash; + + arr[i] = i; + hash = i; + + qht_insert(&ht, &arr[i], hash); + } +} + +static void rm(int init, int end) +{ + int i; + + for (i = init; i < end; i++) { + uint32_t hash; + + hash = arr[i]; + g_assert_true(qht_remove(&ht, &arr[i], hash)); + } +} + +static void check(int a, int b, bool expected) +{ + struct qht_stats stats; + int i; + + for (i = a; i < b; i++) { + void *p; + uint32_t hash; + int32_t val; + + val = i; + hash = i; + p = qht_lookup(&ht, is_equal, &val, hash); + g_assert_true(!!p == expected); + } + qht_statistics_init(&ht, &stats); + if (stats.used_head_buckets) { + g_assert_cmpfloat(qdist_avg(&stats.chain), >=, 1.0); + } + g_assert_cmpuint(stats.head_buckets, >, 0); + qht_statistics_destroy(&stats); +} + +static void count_func(struct qht *ht, void *p, uint32_t hash, void *userp) +{ + unsigned int *curr = userp; + + (*curr)++; +} + +static void check_n(size_t expected) +{ + struct qht_stats stats; + + qht_statistics_init(&ht, &stats); + g_assert_cmpuint(stats.entries, ==, expected); + qht_statistics_destroy(&stats); +} + +static void iter_check(unsigned int count) +{ + unsigned int curr = 0; + + qht_iter(&ht, count_func, &curr); + g_assert_cmpuint(curr, ==, count); +} + +static void qht_do_test(unsigned int mode, size_t init_entries) +{ + qht_init(&ht, 0, mode); + + insert(0, N); + check(0, N, true); + check_n(N); + check(-N, -1, false); + iter_check(N); + + rm(101, 102); + check_n(N - 1); + insert(N, N * 2); + check_n(N + N - 1); + rm(N, N * 2); + check_n(N - 1); + insert(101, 102); + check_n(N); + + rm(10, 200); + check_n(N - 190); + insert(150, 200); + check_n(N - 190 + 50); + insert(10, 150); + check_n(N); + + rm(1, 2); + check_n(N - 1); + qht_reset_size(&ht, 0); + check_n(0); + check(0, N, false); + + qht_destroy(&ht); +} + +static void qht_test(unsigned int mode) +{ + qht_do_test(mode, 0); + qht_do_test(mode, 1); + qht_do_test(mode, 2); + qht_do_test(mode, 8); + qht_do_test(mode, 16); + qht_do_test(mode, 8192); + qht_do_test(mode, 16384); +} + +static void test_default(void) +{ + qht_test(0); +} + +static void test_resize(void) +{ + qht_test(QHT_MODE_AUTO_RESIZE); +} + +int main(int argc, char *argv[]) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/qht/mode/default", test_default); + g_test_add_func("/qht/mode/resize", test_resize); + return g_test_run(); +} diff --git a/translate-all.c b/translate-all.c index 118e7d3..e8b88b4 100644 --- a/translate-all.c +++ b/translate-all.c @@ -735,6 +735,13 @@ static inline void code_gen_alloc(size_t tb_size) qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } +static void tb_htable_init(void) +{ + unsigned int mode = QHT_MODE_AUTO_RESIZE; + + qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode); +} + /* Must be called before using the QEMU cpus. 'tb_size' is the size (in bytes) allocated to the translation buffer. Zero means default size. */ @@ -742,6 +749,7 @@ void tcg_exec_init(unsigned long tb_size) { cpu_gen_init(); page_init(); + tb_htable_init(); code_gen_alloc(tb_size); #if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and @@ -846,7 +854,7 @@ void tb_flush(CPUState *cpu) cpu->tb_flushed = true; } - memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); + qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(); tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; @@ -857,60 +865,46 @@ void tb_flush(CPUState *cpu) #ifdef DEBUG_TB_CHECK -static void tb_invalidate_check(target_ulong address) +static void +do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) { - TranslationBlock *tb; - int i; + TranslationBlock *tb = p; + target_ulong addr = *(target_ulong *)userp; - address &= TARGET_PAGE_MASK; - for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; - tb = tb->phys_hash_next) { - if (!(address + TARGET_PAGE_SIZE <= tb->pc || - address >= tb->pc + tb->size)) { - printf("ERROR invalidate: address=" TARGET_FMT_lx - " PC=%08lx size=%04x\n", - address, (long)tb->pc, tb->size); - } - } + if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) { + printf("ERROR invalidate: address=" TARGET_FMT_lx + " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size); } } -/* verify that all the pages have correct rights for code */ -static void tb_page_check(void) +static void tb_invalidate_check(target_ulong address) { - TranslationBlock *tb; - int i, flags1, flags2; - - for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; - tb = tb->phys_hash_next) { - flags1 = page_get_flags(tb->pc); - flags2 = page_get_flags(tb->pc + tb->size - 1); - if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { - printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", - (long)tb->pc, tb->size, flags1, flags2); - } - } - } + address &= TARGET_PAGE_MASK; + qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address); } -#endif - -static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb) +static void +do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp) { - TranslationBlock *tb1; + TranslationBlock *tb = p; + int flags1, flags2; - for (;;) { - tb1 = *ptb; - if (tb1 == tb) { - *ptb = tb1->phys_hash_next; - break; - } - ptb = &tb1->phys_hash_next; + flags1 = page_get_flags(tb->pc); + flags2 = page_get_flags(tb->pc + tb->size - 1); + if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { + printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", + (long)tb->pc, tb->size, flags1, flags2); } } +/* verify that all the pages have correct rights for code */ +static void tb_page_check(void) +{ + qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL); +} + +#endif + static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) { TranslationBlock *tb1; @@ -992,13 +986,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { CPUState *cpu; PageDesc *p; - unsigned int h; + uint32_t h; tb_page_addr_t phys_pc; /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_phys_hash_func(phys_pc); - tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); + h = tb_hash_func(phys_pc, tb->pc, tb->flags); + qht_remove(&tcg_ctx.tb_ctx.htable, tb, h); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -1127,14 +1121,11 @@ static inline void tb_alloc_page(TranslationBlock *tb, static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { - unsigned int h; - TranslationBlock **ptb; + uint32_t h; - /* add in the physical hash table */ - h = tb_phys_hash_func(phys_pc); - ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; - tb->phys_hash_next = *ptb; - *ptb = tb; + /* add in the hash table */ + h = tb_hash_func(phys_pc, tb->pc, tb->flags); + qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); /* add in the page list */ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); @@ -1677,6 +1668,10 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) int i, target_code_size, max_target_code_size; int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; + struct qht_stats hst; + uint32_t hgram_opts; + size_t hgram_bins; + char *hgram; target_code_size = 0; max_target_code_size = 0; @@ -1727,6 +1722,38 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) direct_jmp2_count, tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / tcg_ctx.tb_ctx.nb_tbs : 0); + + qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst); + + cpu_fprintf(f, "TB hash buckets %zu/%zu (%0.2f%% head buckets used)\n", + hst.used_head_buckets, hst.head_buckets, + (double)hst.used_head_buckets / hst.head_buckets * 100); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; + if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { + hgram_opts |= QDIST_PR_NODECIMAL; + } + hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); + cpu_fprintf(f, "TB hash occupancy %0.2f%% avg chain occ. Histogram: %s\n", + qdist_avg(&hst.occupancy) * 100, hgram); + g_free(hgram); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); + if (hgram_bins > 10) { + hgram_bins = 10; + } else { + hgram_bins = 0; + hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; + } + hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); + cpu_fprintf(f, "TB hash avg chain %0.3f buckets. Histogram: %s\n", + qdist_avg(&hst.chain), hgram); + g_free(hgram); + + qht_statistics_destroy(&hst); + cpu_fprintf(f, "\nStatistics:\n"); cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); cpu_fprintf(f, "TB invalidate count %d\n", diff --git a/util/Makefile.objs b/util/Makefile.objs index a8a777e..45f8794 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -32,3 +32,5 @@ util-obj-y += buffer.o util-obj-y += timed-average.o util-obj-y += base64.o util-obj-y += log.o +util-obj-y += qdist.o +util-obj-y += qht.o diff --git a/util/qdist.c b/util/qdist.c new file mode 100644 index 0000000..4ea2e34 --- /dev/null +++ b/util/qdist.c @@ -0,0 +1,395 @@ +/* + * qdist.c - QEMU helpers for handling frequency distributions of data. + * + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/qdist.h" + +#include <math.h> +#ifndef NAN +#define NAN (0.0 / 0.0) +#endif + +void qdist_init(struct qdist *dist) +{ + dist->entries = g_malloc(sizeof(*dist->entries)); + dist->size = 1; + dist->n = 0; +} + +void qdist_destroy(struct qdist *dist) +{ + g_free(dist->entries); +} + +static inline int qdist_cmp_double(double a, double b) +{ + if (a > b) { + return 1; + } else if (a < b) { + return -1; + } + return 0; +} + +static int qdist_cmp(const void *ap, const void *bp) +{ + const struct qdist_entry *a = ap; + const struct qdist_entry *b = bp; + + return qdist_cmp_double(a->x, b->x); +} + +void qdist_add(struct qdist *dist, double x, long count) +{ + struct qdist_entry *entry = NULL; + + if (dist->n) { + struct qdist_entry e; + + e.x = x; + entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); + } + + if (entry) { + entry->count += count; + return; + } + + if (unlikely(dist->n == dist->size)) { + dist->size *= 2; + dist->entries = g_realloc(dist->entries, + sizeof(*dist->entries) * (dist->size)); + } + dist->n++; + entry = &dist->entries[dist->n - 1]; + entry->x = x; + entry->count = count; + qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); +} + +void qdist_inc(struct qdist *dist, double x) +{ + qdist_add(dist, x, 1); +} + +/* + * Unicode for block elements. See: + * https://en.wikipedia.org/wiki/Block_Elements + */ +static const gunichar qdist_blocks[] = { + 0x2581, + 0x2582, + 0x2583, + 0x2584, + 0x2585, + 0x2586, + 0x2587, + 0x2588 +}; + +#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) + +/* + * Print a distribution into a string. + * + * This function assumes that appropriate binning has been done on the input; + * see qdist_bin__internal() and qdist_pr_plain(). + * + * Callers must free the returned string with g_free(). + */ +static char *qdist_pr_internal(const struct qdist *dist) +{ + double min, max; + GString *s = g_string_new(""); + size_t i; + + /* if only one entry, its printout will be either full or empty */ + if (dist->n == 1) { + if (dist->entries[0].count) { + g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); + } else { + g_string_append_c(s, ' '); + } + goto out; + } + + /* get min and max counts */ + min = dist->entries[0].count; + max = min; + for (i = 0; i < dist->n; i++) { + struct qdist_entry *e = &dist->entries[i]; + + if (e->count < min) { + min = e->count; + } + if (e->count > max) { + max = e->count; + } + } + + for (i = 0; i < dist->n; i++) { + struct qdist_entry *e = &dist->entries[i]; + int index; + + /* make an exception with 0; instead of using block[0], print a space */ + if (e->count) { + /* divide first to avoid loss of precision when e->count == max */ + index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); + g_string_append_unichar(s, qdist_blocks[index]); + } else { + g_string_append_c(s, ' '); + } + } + out: + return g_string_free(s, FALSE); +} + +/* + * Bin the distribution in @from into @n bins of consecutive, non-overlapping + * intervals, copying the result to @to. + * + * This function is internal to qdist: only this file and test code should + * ever call it. + * + * Note: calling this function on an already-binned qdist is a bug. + * + * If @n == 0 or @from->n == 1, use @from->n. + */ +void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) +{ + double xmin, xmax; + double step; + size_t i, j; + + qdist_init(to); + + if (from->n == 0) { + return; + } + if (n == 0 || from->n == 1) { + n = from->n; + } + + /* set equally-sized bins between @from's left and right */ + xmin = qdist_xmin(from); + xmax = qdist_xmax(from); + step = (xmax - xmin) / n; + + if (n == from->n) { + /* if @from's entries are equally spaced, no need to re-bin */ + for (i = 0; i < from->n; i++) { + if (from->entries[i].x != xmin + i * step) { + goto rebin; + } + } + /* they're equally spaced, so copy the dist and bail out */ + to->entries = g_new(struct qdist_entry, from->n); + to->n = from->n; + memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); + return; + } + + rebin: + j = 0; + for (i = 0; i < n; i++) { + double x; + double left, right; + + left = xmin + i * step; + right = xmin + (i + 1) * step; + + /* Add x, even if it might not get any counts later */ + x = left; + qdist_add(to, x, 0); + + /* + * To avoid double-counting we capture [left, right) ranges, except for + * the righmost bin, which captures a [left, right] range. + */ + while (j < from->n && (from->entries[j].x < right || i == n - 1)) { + struct qdist_entry *o = &from->entries[j]; + + qdist_add(to, x, o->count); + j++; + } + } +} + +/* + * Print @dist into a string, after re-binning it into @n bins of consecutive, + * non-overlapping intervals. + * + * If @n == 0, use @orig->n. + * + * Callers must free the returned string with g_free(). + */ +char *qdist_pr_plain(const struct qdist *dist, size_t n) +{ + struct qdist binned; + char *ret; + + if (dist->n == 0) { + return NULL; + } + qdist_bin__internal(&binned, dist, n); + ret = qdist_pr_internal(&binned); + qdist_destroy(&binned); + return ret; +} + +static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, + uint32_t opt, bool is_left) +{ + const char *percent; + const char *lparen; + const char *rparen; + GString *s; + double x1, x2, step; + double x; + double n; + int dec; + + s = g_string_new(""); + if (!(opt & QDIST_PR_LABELS)) { + goto out; + } + + dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; + percent = opt & QDIST_PR_PERCENT ? "%" : ""; + + n = n_bins ? n_bins : dist->n; + x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); + step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; + + if (opt & QDIST_PR_100X) { + x *= 100.0; + step *= 100.0; + } + if (opt & QDIST_PR_NOBINRANGE) { + lparen = rparen = ""; + x1 = x; + x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ + } else { + lparen = "["; + rparen = is_left ? ")" : "]"; + if (is_left) { + x1 = x; + x2 = x + step; + } else { + x1 = x - step; + x2 = x; + } + } + g_string_append_printf(s, "%s%.*f", lparen, dec, x1); + if (!(opt & QDIST_PR_NOBINRANGE)) { + g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); + } + g_string_append(s, percent); + out: + return g_string_free(s, FALSE); +} + +/* + * Print the distribution's histogram into a string. + * + * See also: qdist_pr_plain(). + * + * Callers must free the returned string with g_free(). + */ +char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) +{ + const char *border = opt & QDIST_PR_BORDER ? "|" : ""; + char *llabel, *rlabel; + char *hgram; + GString *s; + + if (dist->n == 0) { + return NULL; + } + + s = g_string_new(""); + + llabel = qdist_pr_label(dist, n_bins, opt, true); + rlabel = qdist_pr_label(dist, n_bins, opt, false); + hgram = qdist_pr_plain(dist, n_bins); + g_string_append_printf(s, "%s%s%s%s%s", + llabel, border, hgram, border, rlabel); + g_free(llabel); + g_free(rlabel); + g_free(hgram); + + return g_string_free(s, FALSE); +} + +static inline double qdist_x(const struct qdist *dist, int index) +{ + if (dist->n == 0) { + return NAN; + } + return dist->entries[index].x; +} + +double qdist_xmin(const struct qdist *dist) +{ + return qdist_x(dist, 0); +} + +double qdist_xmax(const struct qdist *dist) +{ + return qdist_x(dist, dist->n - 1); +} + +size_t qdist_unique_entries(const struct qdist *dist) +{ + return dist->n; +} + +unsigned long qdist_sample_count(const struct qdist *dist) +{ + unsigned long count = 0; + size_t i; + + for (i = 0; i < dist->n; i++) { + struct qdist_entry *e = &dist->entries[i]; + + count += e->count; + } + return count; +} + +static double qdist_pairwise_avg(const struct qdist *dist, size_t index, + size_t n, unsigned long count) +{ + /* amortize the recursion by using a base case > 2 */ + if (n <= 8) { + size_t i; + double ret = 0; + + for (i = 0; i < n; i++) { + struct qdist_entry *e = &dist->entries[index + i]; + + ret += e->x * e->count / count; + } + return ret; + } else { + size_t n2 = n / 2; + + return qdist_pairwise_avg(dist, index, n2, count) + + qdist_pairwise_avg(dist, index + n2, n - n2, count); + } +} + +double qdist_avg(const struct qdist *dist) +{ + unsigned long count; + + count = qdist_sample_count(dist); + if (!count) { + return NAN; + } + return qdist_pairwise_avg(dist, 0, dist->n, count); +} diff --git a/util/qht.c b/util/qht.c new file mode 100644 index 0000000..6f74909 --- /dev/null +++ b/util/qht.c @@ -0,0 +1,833 @@ +/* + * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. + * + * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Assumptions: + * - NULL cannot be inserted/removed as a pointer value. + * - Trying to insert an already-existing hash-pointer pair is OK. However, + * it is not OK to insert into the same hash table different hash-pointer + * pairs that have the same pointer value, but not the hashes. + * - Lookups are performed under an RCU read-critical section; removals + * must wait for a grace period to elapse before freeing removed objects. + * + * Features: + * - Reads (i.e. lookups and iterators) can be concurrent with other reads. + * Lookups that are concurrent with writes to the same bucket will retry + * via a seqlock; iterators acquire all bucket locks and therefore can be + * concurrent with lookups and are serialized wrt writers. + * - Writes (i.e. insertions/removals) can be concurrent with writes to + * different buckets; writes to the same bucket are serialized through a lock. + * - Optional auto-resizing: the hash table resizes up if the load surpasses + * a certain threshold. Resizing is done concurrently with readers; writes + * are serialized with the resize operation. + * + * The key structure is the bucket, which is cacheline-sized. Buckets + * contain a few hash values and pointers; the u32 hash values are stored in + * full so that resizing is fast. Having this structure instead of directly + * chaining items has two advantages: + * - Failed lookups fail fast, and touch a minimum number of cache lines. + * - Resizing the hash table with concurrent lookups is easy. + * + * There are two types of buckets: + * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. + * 2. all "non-head" buckets (i.e. all others) are members of a chain that + * starts from a head bucket. + * Note that the seqlock and spinlock of a head bucket applies to all buckets + * chained to it; these two fields are unused in non-head buckets. + * + * On removals, we move the last valid item in the chain to the position of the + * just-removed entry. This makes lookups slightly faster, since the moment an + * invalid entry is found, the (failed) lookup is over. + * + * Resizing is done by taking all bucket spinlocks (so that no other writers can + * race with us) and then copying all entries into a new hash map. Then, the + * ht->map pointer is set, and the old map is freed once no RCU readers can see + * it anymore. + * + * Writers check for concurrent resizes by comparing ht->map before and after + * acquiring their bucket lock. If they don't match, a resize has occured + * while the bucket spinlock was being acquired. + * + * Related Work: + * - Idea of cacheline-sized buckets with full hashes taken from: + * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: + * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. + * - Why not RCU-based hash tables? They would allow us to get rid of the + * seqlock, but resizing would take forever since RCU read critical + * sections in QEMU take quite a long time. + * More info on relativistic hash tables: + * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash + * Tables via Relativistic Programming", USENIX ATC'11. + * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. + * https://lwn.net/Articles/612021/ + */ +#include "qemu/qht.h" +#include "qemu/atomic.h" +#include "qemu/rcu.h" + +//#define QHT_DEBUG + +/* + * We want to avoid false sharing of cache lines. Most systems have 64-byte + * cache lines so we go with it for simplicity. + * + * Note that systems with smaller cache lines will be fine (the struct is + * almost 64-bytes); systems with larger cache lines might suffer from + * some false sharing. + */ +#define QHT_BUCKET_ALIGN 64 + +/* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ +#if HOST_LONG_BITS == 32 +#define QHT_BUCKET_ENTRIES 6 +#else /* 64-bit */ +#define QHT_BUCKET_ENTRIES 4 +#endif + +/* + * Note: reading partially-updated pointers in @pointers could lead to + * segfaults. We thus access them with atomic_read/set; this guarantees + * that the compiler makes all those accesses atomic. We also need the + * volatile-like behavior in atomic_read, since otherwise the compiler + * might refetch the pointer. + * atomic_read's are of course not necessary when the bucket lock is held. + * + * If both ht->lock and b->lock are grabbed, ht->lock should always + * be grabbed first. + */ +struct qht_bucket { + QemuSpin lock; + QemuSeqLock sequence; + uint32_t hashes[QHT_BUCKET_ENTRIES]; + void *pointers[QHT_BUCKET_ENTRIES]; + struct qht_bucket *next; +} QEMU_ALIGNED(QHT_BUCKET_ALIGN); + +QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); + +/** + * struct qht_map - structure to track an array of buckets + * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind + * find the whole struct. + * @buckets: array of head buckets. It is constant once the map is created. + * @n_buckets: number of head buckets. It is constant once the map is created. + * @n_added_buckets: number of added (i.e. "non-head") buckets + * @n_added_buckets_threshold: threshold to trigger an upward resize once the + * number of added buckets surpasses it. + * + * Buckets are tracked in what we call a "map", i.e. this structure. + */ +struct qht_map { + struct rcu_head rcu; + struct qht_bucket *buckets; + size_t n_buckets; + size_t n_added_buckets; + size_t n_added_buckets_threshold; +}; + +/* trigger a resize when n_added_buckets > n_buckets / div */ +#define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 + +static void qht_do_resize(struct qht *ht, struct qht_map *new); +static void qht_grow_maybe(struct qht *ht); + +#ifdef QHT_DEBUG + +#define qht_debug_assert(X) do { assert(X); } while (0) + +static void qht_bucket_debug__locked(struct qht_bucket *b) +{ + bool seen_empty = false; + bool corrupt = false; + int i; + + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->pointers[i] == NULL) { + seen_empty = true; + continue; + } + if (seen_empty) { + fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n", + __func__, b, i, b->hashes[i], b->pointers[i]); + corrupt = true; + } + } + b = b->next; + } while (b); + qht_debug_assert(!corrupt); +} + +static void qht_map_debug__all_locked(struct qht_map *map) +{ + int i; + + for (i = 0; i < map->n_buckets; i++) { + qht_bucket_debug__locked(&map->buckets[i]); + } +} +#else + +#define qht_debug_assert(X) do { (void)(X); } while (0) + +static inline void qht_bucket_debug__locked(struct qht_bucket *b) +{ } + +static inline void qht_map_debug__all_locked(struct qht_map *map) +{ } +#endif /* QHT_DEBUG */ + +static inline size_t qht_elems_to_buckets(size_t n_elems) +{ + return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); +} + +static inline void qht_head_init(struct qht_bucket *b) +{ + memset(b, 0, sizeof(*b)); + qemu_spin_init(&b->lock); + seqlock_init(&b->sequence); +} + +static inline +struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash) +{ + return &map->buckets[hash & (map->n_buckets - 1)]; +} + +/* acquire all bucket locks from a map */ +static void qht_map_lock_buckets(struct qht_map *map) +{ + size_t i; + + for (i = 0; i < map->n_buckets; i++) { + struct qht_bucket *b = &map->buckets[i]; + + qemu_spin_lock(&b->lock); + } +} + +static void qht_map_unlock_buckets(struct qht_map *map) +{ + size_t i; + + for (i = 0; i < map->n_buckets; i++) { + struct qht_bucket *b = &map->buckets[i]; + + qemu_spin_unlock(&b->lock); + } +} + +/* + * Call with at least a bucket lock held. + * @map should be the value read before acquiring the lock (or locks). + */ +static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map) +{ + return map != ht->map; +} + +/* + * Grab all bucket locks, and set @pmap after making sure the map isn't stale. + * + * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. + * + * Note: callers cannot have ht->lock held. + */ +static inline +void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) +{ + struct qht_map *map; + + map = atomic_rcu_read(&ht->map); + qht_map_lock_buckets(map); + if (likely(!qht_map_is_stale__locked(ht, map))) { + *pmap = map; + return; + } + qht_map_unlock_buckets(map); + + /* we raced with a resize; acquire ht->lock to see the updated ht->map */ + qemu_mutex_lock(&ht->lock); + map = ht->map; + qht_map_lock_buckets(map); + qemu_mutex_unlock(&ht->lock); + *pmap = map; + return; +} + +/* + * Get a head bucket and lock it, making sure its parent map is not stale. + * @pmap is filled with a pointer to the bucket's parent map. + * + * Unlock with qemu_spin_unlock(&b->lock). + * + * Note: callers cannot have ht->lock held. + */ +static inline +struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, + struct qht_map **pmap) +{ + struct qht_bucket *b; + struct qht_map *map; + + map = atomic_rcu_read(&ht->map); + b = qht_map_to_bucket(map, hash); + + qemu_spin_lock(&b->lock); + if (likely(!qht_map_is_stale__locked(ht, map))) { + *pmap = map; + return b; + } + qemu_spin_unlock(&b->lock); + + /* we raced with a resize; acquire ht->lock to see the updated ht->map */ + qemu_mutex_lock(&ht->lock); + map = ht->map; + b = qht_map_to_bucket(map, hash); + qemu_spin_lock(&b->lock); + qemu_mutex_unlock(&ht->lock); + *pmap = map; + return b; +} + +static inline bool qht_map_needs_resize(struct qht_map *map) +{ + return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; +} + +static inline void qht_chain_destroy(struct qht_bucket *head) +{ + struct qht_bucket *curr = head->next; + struct qht_bucket *prev; + + while (curr) { + prev = curr; + curr = curr->next; + qemu_vfree(prev); + } +} + +/* pass only an orphan map */ +static void qht_map_destroy(struct qht_map *map) +{ + size_t i; + + for (i = 0; i < map->n_buckets; i++) { + qht_chain_destroy(&map->buckets[i]); + } + qemu_vfree(map->buckets); + g_free(map); +} + +static struct qht_map *qht_map_create(size_t n_buckets) +{ + struct qht_map *map; + size_t i; + + map = g_malloc(sizeof(*map)); + map->n_buckets = n_buckets; + + map->n_added_buckets = 0; + map->n_added_buckets_threshold = n_buckets / + QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; + + /* let tiny hash tables to at least add one non-head bucket */ + if (unlikely(map->n_added_buckets_threshold == 0)) { + map->n_added_buckets_threshold = 1; + } + + map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, + sizeof(*map->buckets) * n_buckets); + for (i = 0; i < n_buckets; i++) { + qht_head_init(&map->buckets[i]); + } + return map; +} + +void qht_init(struct qht *ht, size_t n_elems, unsigned int mode) +{ + struct qht_map *map; + size_t n_buckets = qht_elems_to_buckets(n_elems); + + ht->mode = mode; + qemu_mutex_init(&ht->lock); + map = qht_map_create(n_buckets); + atomic_rcu_set(&ht->map, map); +} + +/* call only when there are no readers/writers left */ +void qht_destroy(struct qht *ht) +{ + qht_map_destroy(ht->map); + memset(ht, 0, sizeof(*ht)); +} + +static void qht_bucket_reset__locked(struct qht_bucket *head) +{ + struct qht_bucket *b = head; + int i; + + seqlock_write_begin(&head->sequence); + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->pointers[i] == NULL) { + goto done; + } + b->hashes[i] = 0; + atomic_set(&b->pointers[i], NULL); + } + b = b->next; + } while (b); + done: + seqlock_write_end(&head->sequence); +} + +/* call with all bucket locks held */ +static void qht_map_reset__all_locked(struct qht_map *map) +{ + size_t i; + + for (i = 0; i < map->n_buckets; i++) { + qht_bucket_reset__locked(&map->buckets[i]); + } + qht_map_debug__all_locked(map); +} + +void qht_reset(struct qht *ht) +{ + struct qht_map *map; + + qht_map_lock_buckets__no_stale(ht, &map); + qht_map_reset__all_locked(map); + qht_map_unlock_buckets(map); +} + +bool qht_reset_size(struct qht *ht, size_t n_elems) +{ + struct qht_map *new; + struct qht_map *map; + size_t n_buckets; + bool resize = false; + + n_buckets = qht_elems_to_buckets(n_elems); + + qemu_mutex_lock(&ht->lock); + map = ht->map; + if (n_buckets != map->n_buckets) { + new = qht_map_create(n_buckets); + resize = true; + } + + qht_map_lock_buckets(map); + qht_map_reset__all_locked(map); + if (resize) { + qht_do_resize(ht, new); + } + qht_map_unlock_buckets(map); + qemu_mutex_unlock(&ht->lock); + + return resize; +} + +static inline +void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, + const void *userp, uint32_t hash) +{ + struct qht_bucket *b = head; + int i; + + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->hashes[i] == hash) { + void *p = atomic_read(&b->pointers[i]); + + if (likely(p) && likely(func(p, userp))) { + return p; + } + } + } + b = atomic_rcu_read(&b->next); + } while (b); + + return NULL; +} + +static __attribute__((noinline)) +void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func, + const void *userp, uint32_t hash) +{ + unsigned int version; + void *ret; + + do { + version = seqlock_read_begin(&b->sequence); + ret = qht_do_lookup(b, func, userp, hash); + } while (seqlock_read_retry(&b->sequence, version)); + return ret; +} + +void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp, + uint32_t hash) +{ + struct qht_bucket *b; + struct qht_map *map; + unsigned int version; + void *ret; + + map = atomic_rcu_read(&ht->map); + b = qht_map_to_bucket(map, hash); + + version = seqlock_read_begin(&b->sequence); + ret = qht_do_lookup(b, func, userp, hash); + if (likely(!seqlock_read_retry(&b->sequence, version))) { + return ret; + } + /* + * Removing the do/while from the fastpath gives a 4% perf. increase when + * running a 100%-lookup microbenchmark. + */ + return qht_lookup__slowpath(b, func, userp, hash); +} + +/* call with head->lock held */ +static bool qht_insert__locked(struct qht *ht, struct qht_map *map, + struct qht_bucket *head, void *p, uint32_t hash, + bool *needs_resize) +{ + struct qht_bucket *b = head; + struct qht_bucket *prev = NULL; + struct qht_bucket *new = NULL; + int i; + + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->pointers[i]) { + if (unlikely(b->pointers[i] == p)) { + return false; + } + } else { + goto found; + } + } + prev = b; + b = b->next; + } while (b); + + b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); + memset(b, 0, sizeof(*b)); + new = b; + i = 0; + atomic_inc(&map->n_added_buckets); + if (unlikely(qht_map_needs_resize(map)) && needs_resize) { + *needs_resize = true; + } + + found: + /* found an empty key: acquire the seqlock and write */ + seqlock_write_begin(&head->sequence); + if (new) { + atomic_rcu_set(&prev->next, b); + } + b->hashes[i] = hash; + atomic_set(&b->pointers[i], p); + seqlock_write_end(&head->sequence); + return true; +} + +static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) +{ + struct qht_map *map; + + /* + * If the lock is taken it probably means there's an ongoing resize, + * so bail out. + */ + if (qemu_mutex_trylock(&ht->lock)) { + return; + } + map = ht->map; + /* another thread might have just performed the resize we were after */ + if (qht_map_needs_resize(map)) { + struct qht_map *new = qht_map_create(map->n_buckets * 2); + + qht_map_lock_buckets(map); + qht_do_resize(ht, new); + qht_map_unlock_buckets(map); + } + qemu_mutex_unlock(&ht->lock); +} + +bool qht_insert(struct qht *ht, void *p, uint32_t hash) +{ + struct qht_bucket *b; + struct qht_map *map; + bool needs_resize = false; + bool ret; + + /* NULL pointers are not supported */ + qht_debug_assert(p); + + b = qht_bucket_lock__no_stale(ht, hash, &map); + ret = qht_insert__locked(ht, map, b, p, hash, &needs_resize); + qht_bucket_debug__locked(b); + qemu_spin_unlock(&b->lock); + + if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { + qht_grow_maybe(ht); + } + return ret; +} + +static inline bool qht_entry_is_last(struct qht_bucket *b, int pos) +{ + if (pos == QHT_BUCKET_ENTRIES - 1) { + if (b->next == NULL) { + return true; + } + return b->next->pointers[0] == NULL; + } + return b->pointers[pos + 1] == NULL; +} + +static void +qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) +{ + qht_debug_assert(!(to == from && i == j)); + qht_debug_assert(to->pointers[i]); + qht_debug_assert(from->pointers[j]); + + to->hashes[i] = from->hashes[j]; + atomic_set(&to->pointers[i], from->pointers[j]); + + from->hashes[j] = 0; + atomic_set(&from->pointers[j], NULL); +} + +/* + * Find the last valid entry in @head, and swap it with @orig[pos], which has + * just been invalidated. + */ +static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) +{ + struct qht_bucket *b = orig; + struct qht_bucket *prev = NULL; + int i; + + if (qht_entry_is_last(orig, pos)) { + orig->hashes[pos] = 0; + atomic_set(&orig->pointers[pos], NULL); + return; + } + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->pointers[i]) { + continue; + } + if (i > 0) { + return qht_entry_move(orig, pos, b, i - 1); + } + qht_debug_assert(prev); + return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); + } + prev = b; + b = b->next; + } while (b); + /* no free entries other than orig[pos], so swap it with the last one */ + qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); +} + +/* call with b->lock held */ +static inline +bool qht_remove__locked(struct qht_map *map, struct qht_bucket *head, + const void *p, uint32_t hash) +{ + struct qht_bucket *b = head; + int i; + + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + void *q = b->pointers[i]; + + if (unlikely(q == NULL)) { + return false; + } + if (q == p) { + qht_debug_assert(b->hashes[i] == hash); + seqlock_write_begin(&head->sequence); + qht_bucket_remove_entry(b, i); + seqlock_write_end(&head->sequence); + return true; + } + } + b = b->next; + } while (b); + return false; +} + +bool qht_remove(struct qht *ht, const void *p, uint32_t hash) +{ + struct qht_bucket *b; + struct qht_map *map; + bool ret; + + /* NULL pointers are not supported */ + qht_debug_assert(p); + + b = qht_bucket_lock__no_stale(ht, hash, &map); + ret = qht_remove__locked(map, b, p, hash); + qht_bucket_debug__locked(b); + qemu_spin_unlock(&b->lock); + return ret; +} + +static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b, + qht_iter_func_t func, void *userp) +{ + int i; + + do { + for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { + if (b->pointers[i] == NULL) { + return; + } + func(ht, b->pointers[i], b->hashes[i], userp); + } + b = b->next; + } while (b); +} + +/* call with all of the map's locks held */ +static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map, + qht_iter_func_t func, void *userp) +{ + size_t i; + + for (i = 0; i < map->n_buckets; i++) { + qht_bucket_iter(ht, &map->buckets[i], func, userp); + } +} + +void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) +{ + struct qht_map *map; + + map = atomic_rcu_read(&ht->map); + qht_map_lock_buckets(map); + /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */ + qht_map_iter__all_locked(ht, map, func, userp); + qht_map_unlock_buckets(map); +} + +static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) +{ + struct qht_map *new = userp; + struct qht_bucket *b = qht_map_to_bucket(new, hash); + + /* no need to acquire b->lock because no thread has seen this map yet */ + qht_insert__locked(ht, new, b, p, hash, NULL); +} + +/* + * Call with ht->lock and all bucket locks held. + * + * Creating the @new map here would add unnecessary delay while all the locks + * are held--holding up the bucket locks is particularly bad, since no writes + * can occur while these are held. Thus, we let callers create the new map, + * hopefully without the bucket locks held. + */ +static void qht_do_resize(struct qht *ht, struct qht_map *new) +{ + struct qht_map *old; + + old = ht->map; + g_assert_cmpuint(new->n_buckets, !=, old->n_buckets); + + qht_map_iter__all_locked(ht, old, qht_map_copy, new); + qht_map_debug__all_locked(new); + + atomic_rcu_set(&ht->map, new); + call_rcu(old, qht_map_destroy, rcu); +} + +bool qht_resize(struct qht *ht, size_t n_elems) +{ + size_t n_buckets = qht_elems_to_buckets(n_elems); + size_t ret = false; + + qemu_mutex_lock(&ht->lock); + if (n_buckets != ht->map->n_buckets) { + struct qht_map *new; + struct qht_map *old = ht->map; + + new = qht_map_create(n_buckets); + qht_map_lock_buckets(old); + qht_do_resize(ht, new); + qht_map_unlock_buckets(old); + ret = true; + } + qemu_mutex_unlock(&ht->lock); + + return ret; +} + +/* pass @stats to qht_statistics_destroy() when done */ +void qht_statistics_init(struct qht *ht, struct qht_stats *stats) +{ + struct qht_map *map; + int i; + + map = atomic_rcu_read(&ht->map); + + stats->head_buckets = map->n_buckets; + stats->used_head_buckets = 0; + stats->entries = 0; + qdist_init(&stats->chain); + qdist_init(&stats->occupancy); + + for (i = 0; i < map->n_buckets; i++) { + struct qht_bucket *head = &map->buckets[i]; + struct qht_bucket *b; + unsigned int version; + size_t buckets; + size_t entries; + int j; + + do { + version = seqlock_read_begin(&head->sequence); + buckets = 0; + entries = 0; + b = head; + do { + for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { + if (atomic_read(&b->pointers[j]) == NULL) { + break; + } + entries++; + } + buckets++; + b = atomic_rcu_read(&b->next); + } while (b); + } while (seqlock_read_retry(&head->sequence, version)); + + if (entries) { + qdist_inc(&stats->chain, buckets); + qdist_inc(&stats->occupancy, + (double)entries / QHT_BUCKET_ENTRIES / buckets); + stats->used_head_buckets++; + stats->entries += entries; + } else { + qdist_inc(&stats->occupancy, 0); + } + } +} + +void qht_statistics_destroy(struct qht_stats *stats) +{ + qdist_destroy(&stats->occupancy); + qdist_destroy(&stats->chain); +} |