diff options
Diffstat (limited to 'util')
46 files changed, 1266 insertions, 771 deletions
diff --git a/util/aio-posix.c b/util/aio-posix.c index 266c9dd..2e0a5da 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -17,6 +17,7 @@ #include "block/block.h" #include "block/thread-pool.h" #include "qemu/main-loop.h" +#include "qemu/lockcnt.h" #include "qemu/rcu.h" #include "qemu/rcu_queue.h" #include "qemu/sockets.h" @@ -27,6 +28,9 @@ /* Stop userspace polling on a handler if it isn't active for some time */ #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND) +static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll, + int64_t block_ns); + bool aio_poll_disabled(AioContext *ctx) { return qatomic_read(&ctx->poll_disable_cnt); @@ -391,7 +395,8 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) * scanning all handlers with aio_dispatch_handlers(). */ static bool aio_dispatch_ready_handlers(AioContext *ctx, - AioHandlerList *ready_list) + AioHandlerList *ready_list, + int64_t block_ns) { bool progress = false; AioHandler *node; @@ -399,6 +404,14 @@ static bool aio_dispatch_ready_handlers(AioContext *ctx, while ((node = QLIST_FIRST(ready_list))) { QLIST_REMOVE(node, node_ready); progress = aio_dispatch_handler(ctx, node) || progress; + + /* + * Adjust polling time only after aio_dispatch_handler(), which can + * add the handler to ctx->poll_aio_handlers. + */ + if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) { + adjust_polling_time(ctx, &node->poll, block_ns); + } } return progress; @@ -578,13 +591,19 @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list, static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list, int64_t *timeout) { + AioHandler *node; int64_t max_ns; if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) { return false; } - max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns); + max_ns = 0; + QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) { + max_ns = MAX(max_ns, node->poll.ns); + } + max_ns = qemu_soonest_timeout(*timeout, max_ns); + if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) { /* * Enable poll mode. It pairs with the poll_set_started() in @@ -599,6 +618,46 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list, return false; } +static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll, + int64_t block_ns) +{ + if (block_ns <= poll->ns) { + /* This is the sweet spot, no adjustment needed */ + } else if (block_ns > ctx->poll_max_ns) { + /* We'd have to poll for too long, poll less */ + int64_t old = poll->ns; + + if (ctx->poll_shrink) { + poll->ns /= ctx->poll_shrink; + } else { + poll->ns = 0; + } + + trace_poll_shrink(ctx, old, poll->ns); + } else if (poll->ns < ctx->poll_max_ns && + block_ns < ctx->poll_max_ns) { + /* There is room to grow, poll longer */ + int64_t old = poll->ns; + int64_t grow = ctx->poll_grow; + + if (grow == 0) { + grow = 2; + } + + if (poll->ns) { + poll->ns *= grow; + } else { + poll->ns = 4000; /* start polling at 4 microseconds */ + } + + if (poll->ns > ctx->poll_max_ns) { + poll->ns = ctx->poll_max_ns; + } + + trace_poll_grow(ctx, old, poll->ns); + } +} + bool aio_poll(AioContext *ctx, bool blocking) { AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list); @@ -606,6 +665,7 @@ bool aio_poll(AioContext *ctx, bool blocking) bool use_notify_me; int64_t timeout; int64_t start = 0; + int64_t block_ns = 0; /* * There cannot be two concurrent aio_poll calls for the same AioContext (or @@ -678,49 +738,13 @@ bool aio_poll(AioContext *ctx, bool blocking) aio_notify_accept(ctx); - /* Adjust polling time */ + /* Calculate blocked time for adaptive polling */ if (ctx->poll_max_ns) { - int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start; - - if (block_ns <= ctx->poll_ns) { - /* This is the sweet spot, no adjustment needed */ - } else if (block_ns > ctx->poll_max_ns) { - /* We'd have to poll for too long, poll less */ - int64_t old = ctx->poll_ns; - - if (ctx->poll_shrink) { - ctx->poll_ns /= ctx->poll_shrink; - } else { - ctx->poll_ns = 0; - } - - trace_poll_shrink(ctx, old, ctx->poll_ns); - } else if (ctx->poll_ns < ctx->poll_max_ns && - block_ns < ctx->poll_max_ns) { - /* There is room to grow, poll longer */ - int64_t old = ctx->poll_ns; - int64_t grow = ctx->poll_grow; - - if (grow == 0) { - grow = 2; - } - - if (ctx->poll_ns) { - ctx->poll_ns *= grow; - } else { - ctx->poll_ns = 4000; /* start polling at 4 microseconds */ - } - - if (ctx->poll_ns > ctx->poll_max_ns) { - ctx->poll_ns = ctx->poll_max_ns; - } - - trace_poll_grow(ctx, old, ctx->poll_ns); - } + block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start; } progress |= aio_bh_poll(ctx); - progress |= aio_dispatch_ready_handlers(ctx, &ready_list); + progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns); aio_free_deleted_handlers(ctx); @@ -766,11 +790,18 @@ void aio_context_use_g_source(AioContext *ctx) void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, int64_t grow, int64_t shrink, Error **errp) { + AioHandler *node; + + qemu_lockcnt_inc(&ctx->list_lock); + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + node->poll.ns = 0; + } + qemu_lockcnt_dec(&ctx->list_lock); + /* No thread synchronization here, it doesn't matter if an incorrect value * is used once. */ ctx->poll_max_ns = max_ns; - ctx->poll_ns = 0; ctx->poll_grow = grow; ctx->poll_shrink = shrink; diff --git a/util/aio-posix.h b/util/aio-posix.h index 4264c51..82a0201 100644 --- a/util/aio-posix.h +++ b/util/aio-posix.h @@ -38,6 +38,7 @@ struct AioHandler { #endif int64_t poll_idle_timeout; /* when to stop userspace polling */ bool poll_ready; /* has polling detected an event? */ + AioPolledEvent poll; }; /* Add a handler to a ready list */ diff --git a/util/aio-win32.c b/util/aio-win32.c index d144f93..6583d5c 100644 --- a/util/aio-win32.c +++ b/util/aio-win32.c @@ -18,6 +18,7 @@ #include "qemu/osdep.h" #include "block/block.h" #include "qemu/main-loop.h" +#include "qemu/lockcnt.h" #include "qemu/queue.h" #include "qemu/sockets.h" #include "qapi/error.h" diff --git a/util/async.c b/util/async.c index 3e3e4fc..2719c62 100644 --- a/util/async.c +++ b/util/async.c @@ -30,11 +30,12 @@ #include "block/graph-lock.h" #include "qemu/main-loop.h" #include "qemu/atomic.h" +#include "qemu/lockcnt.h" #include "qemu/rcu_queue.h" #include "block/raw-aio.h" #include "qemu/coroutine_int.h" #include "qemu/coroutine-tls.h" -#include "sysemu/cpu-timers.h" +#include "exec/icount.h" #include "trace.h" /***********************************************************/ @@ -368,7 +369,7 @@ aio_ctx_finalize(GSource *source) QEMUBH *bh; unsigned flags; - thread_pool_free(ctx->thread_pool); + thread_pool_free_aio(ctx->thread_pool); #ifdef CONFIG_LINUX_AIO if (ctx->linux_aio) { @@ -434,10 +435,10 @@ GSource *aio_get_g_source(AioContext *ctx) return &ctx->source; } -ThreadPool *aio_get_thread_pool(AioContext *ctx) +ThreadPoolAio *aio_get_thread_pool(AioContext *ctx) { if (!ctx->thread_pool) { - ctx->thread_pool = thread_pool_new(ctx); + ctx->thread_pool = thread_pool_new_aio(ctx); } return ctx->thread_pool; } @@ -608,7 +609,6 @@ AioContext *aio_context_new(Error **errp) qemu_rec_mutex_init(&ctx->lock); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); - ctx->poll_ns = 0; ctx->poll_max_ns = 0; ctx->poll_grow = 0; ctx->poll_shrink = 0; diff --git a/util/block-helpers.c b/util/block-helpers.c index c485143..052b4e1 100644 --- a/util/block-helpers.c +++ b/util/block-helpers.c @@ -10,12 +10,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/qerror.h" #include "block-helpers.h" /** * check_block_size: - * @id: The unique ID of the object * @name: The name of the property being validated * @value: The block size in bytes * @errp: A pointer to an area to store an error @@ -24,23 +22,23 @@ * 1. At least MIN_BLOCK_SIZE * 2. No larger than MAX_BLOCK_SIZE * 3. A power of 2 + * + * Returns: true on success, false on failure */ -void check_block_size(const char *id, const char *name, int64_t value, - Error **errp) +bool check_block_size(const char *name, int64_t value, Error **errp) { - /* value of 0 means "unset" */ - if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) { - error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, - id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); - return; + if (!value) { + /* unset */ + return true; } - /* We rely on power-of-2 blocksizes for bitmasks */ - if ((value & (value - 1)) != 0) { + if (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE + || (value & (value - 1))) { error_setg(errp, - "Property %s.%s doesn't take value '%" PRId64 - "', it's not a power of 2", - id, name, value); - return; + "parameter %s must be a power of 2 between %" PRId64 + " and %" PRId64, + name, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); + return false; } + return true; } diff --git a/util/block-helpers.h b/util/block-helpers.h index b53295a..838b082 100644 --- a/util/block-helpers.h +++ b/util/block-helpers.h @@ -13,7 +13,6 @@ #define MAX_BLOCK_SIZE (2 * MiB) #define MAX_BLOCK_SIZE_STR "2 MiB" -void check_block_size(const char *id, const char *name, int64_t value, - Error **errp); +bool check_block_size(const char *name, int64_t value, Error **errp); #endif /* BLOCK_HELPERS_H */ diff --git a/util/cacheflush.c b/util/cacheflush.c index a089061..17c5891 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -229,6 +229,10 @@ static void __attribute__((constructor)) init_cache_info(void) /* Caches are coherent and do not require flushing; symbol inline. */ +#elif defined(EMSCRIPTEN) + +/* Wasm doesn't have executable region of memory. */ + #elif defined(__aarch64__) && !defined(CONFIG_WIN32) /* * For Windows, we use generic implementation of flush_idcache_range, that @@ -279,9 +283,11 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); } - asm volatile("dsb\tish" : : : "memory"); } + /* DSB unconditionally to ensure any outstanding writes are committed. */ + asm volatile("dsb\tish" : : : "memory"); + /* * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point * of Unification is not required for instruction to data coherence. diff --git a/util/coroutine-wasm.c b/util/coroutine-wasm.c new file mode 100644 index 0000000..cb1ec92 --- /dev/null +++ b/util/coroutine-wasm.c @@ -0,0 +1,127 @@ +/* + * emscripten fiber coroutine initialization code + * based on coroutine-ucontext.c + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" + +#include <emscripten/fiber.h> + +typedef struct { + Coroutine base; + void *stack; + size_t stack_size; + + void *asyncify_stack; + size_t asyncify_stack_size; + + CoroutineAction action; + + emscripten_fiber_t fiber; +} CoroutineEmscripten; + +/** + * Per-thread coroutine bookkeeping + */ +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); +QEMU_DEFINE_STATIC_CO_TLS(CoroutineEmscripten *, leader); +size_t leader_asyncify_stack_size = COROUTINE_STACK_SIZE; + +static void coroutine_trampoline(void *co_) +{ + Coroutine *co = co_; + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineEmscripten *co; + + co = g_malloc0(sizeof(*co)); + + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); + + co->asyncify_stack_size = COROUTINE_STACK_SIZE; + co->asyncify_stack = g_malloc0(co->asyncify_stack_size); + emscripten_fiber_init(&co->fiber, coroutine_trampoline, &co->base, + co->stack, co->stack_size, co->asyncify_stack, + co->asyncify_stack_size); + + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineEmscripten *co = DO_UPCAST(CoroutineEmscripten, base, co_); + + qemu_free_stack(co->stack, co->stack_size); + g_free(co->asyncify_stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineEmscripten *from = DO_UPCAST(CoroutineEmscripten, base, from_); + CoroutineEmscripten *to = DO_UPCAST(CoroutineEmscripten, base, to_); + + set_current(to_); + to->action = action; + emscripten_fiber_swap(&from->fiber, &to->fiber); + return from->action; +} + +Coroutine *qemu_coroutine_self(void) +{ + Coroutine *self = get_current(); + + if (!self) { + CoroutineEmscripten *leaderp = get_leader(); + if (!leaderp) { + leaderp = g_malloc0(sizeof(*leaderp)); + leaderp->asyncify_stack = g_malloc0(leader_asyncify_stack_size); + leaderp->asyncify_stack_size = leader_asyncify_stack_size; + emscripten_fiber_init_from_current_context( + &leaderp->fiber, + leaderp->asyncify_stack, + leaderp->asyncify_stack_size); + leaderp->stack = leaderp->fiber.stack_limit; + leaderp->stack_size = + leaderp->fiber.stack_base - leaderp->fiber.stack_limit; + set_leader(leaderp); + } + self = &leaderp->base; + set_current(self); + } + return self; +} + +bool qemu_in_coroutine(void) +{ + Coroutine *self = get_current(); + + return self && self->caller; +} diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 90f92a4..c8c8a1b 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) __cpuid(1, a, b, c, d); info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0); + info |= (c & bit_OSXSAVE ? CPUINFO_OSXSAVE : 0); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c index 497ce12..0291b72 100644 --- a/util/cpuinfo-riscv.c +++ b/util/cpuinfo-riscv.c @@ -4,14 +4,17 @@ */ #include "qemu/osdep.h" +#include "qemu/host-utils.h" #include "host/cpuinfo.h" #ifdef CONFIG_ASM_HWPROBE_H #include <asm/hwprobe.h> #include <sys/syscall.h> +#include <asm/unistd.h> #endif unsigned cpuinfo; +unsigned riscv_lg2_vlenb; static volatile sig_atomic_t got_sigill; static void sigill_handler(int signo, siginfo_t *si, void *data) @@ -33,7 +36,8 @@ static void sigill_handler(int signo, siginfo_t *si, void *data) /* Called both as constructor and (possibly) via other constructors. */ unsigned __attribute__((constructor)) cpuinfo_init(void) { - unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND; + unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS + | CPUINFO_ZICOND | CPUINFO_ZVE64X; unsigned info = cpuinfo; if (info) { @@ -47,9 +51,16 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) #if defined(__riscv_arch_test) && defined(__riscv_zbb) info |= CPUINFO_ZBB; #endif +#if defined(__riscv_arch_test) && defined(__riscv_zbs) + info |= CPUINFO_ZBS; +#endif #if defined(__riscv_arch_test) && defined(__riscv_zicond) info |= CPUINFO_ZICOND; #endif +#if defined(__riscv_arch_test) && \ + (defined(__riscv_vector) || defined(__riscv_zve64x)) + info |= CPUINFO_ZVE64X; +#endif left &= ~info; #ifdef CONFIG_ASM_HWPROBE_H @@ -64,15 +75,27 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) && pair.key >= 0) { info |= pair.value & RISCV_HWPROBE_EXT_ZBA ? CPUINFO_ZBA : 0; info |= pair.value & RISCV_HWPROBE_EXT_ZBB ? CPUINFO_ZBB : 0; - left &= ~(CPUINFO_ZBA | CPUINFO_ZBB); + info |= pair.value & RISCV_HWPROBE_EXT_ZBS ? CPUINFO_ZBS : 0; + left &= ~(CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS); #ifdef RISCV_HWPROBE_EXT_ZICOND info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0; left &= ~CPUINFO_ZICOND; #endif + /* For rv64, V is Zve64d, a superset of Zve64x. */ + info |= pair.value & RISCV_HWPROBE_IMA_V ? CPUINFO_ZVE64X : 0; +#ifdef RISCV_HWPROBE_EXT_ZVE64X + info |= pair.value & RISCV_HWPROBE_EXT_ZVE64X ? CPUINFO_ZVE64X : 0; +#endif } } #endif /* CONFIG_ASM_HWPROBE_H */ + /* + * We only detect support for vectors with hwprobe. All kernels with + * support for vectors in userspace also support the hwprobe syscall. + */ + left &= ~CPUINFO_ZVE64X; + if (left) { struct sigaction sa_old, sa_new; @@ -99,6 +122,15 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) left &= ~CPUINFO_ZBB; } + if (left & CPUINFO_ZBS) { + /* Probe for Zbs: bext zero,zero,zero. */ + got_sigill = 0; + asm volatile(".insn r 0x33, 5, 0x24, zero, zero, zero" + : : : "memory"); + info |= got_sigill ? 0 : CPUINFO_ZBS; + left &= ~CPUINFO_ZBS; + } + if (left & CPUINFO_ZICOND) { /* Probe for Zicond: czero.eqz zero,zero,zero. */ got_sigill = 0; @@ -112,6 +144,21 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) assert(left == 0); } + if (info & CPUINFO_ZVE64X) { + /* + * We are guaranteed by RVV-1.0 that VLEN is a power of 2. + * We are guaranteed by Zve64x that VLEN >= 64, and that + * EEW of {8,16,32,64} are supported. + */ + unsigned long vlenb; + /* csrr %0, vlenb */ + asm volatile(".insn i 0x73, 0x2, %0, zero, -990" : "=r"(vlenb)); + assert(vlenb >= 8); + assert(is_power_of_2(vlenb)); + /* Cache VLEN in a convenient form. */ + riscv_lg2_vlenb = ctz32(vlenb); + } + info |= CPUINFO_ALWAYS; cpuinfo = info; return info; diff --git a/util/cutils.c b/util/cutils.c index 4236403..9803f11 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -1144,11 +1144,6 @@ void qemu_init_exec_dir(const char *argv0) #endif } -const char *qemu_get_exec_dir(void) -{ - return exec_dir; -} - char *get_relocated_path(const char *dir) { size_t prefix_len = strlen(CONFIG_PREFIX); diff --git a/util/envlist.c b/util/envlist.c index db937c0..15fdbb1 100644 --- a/util/envlist.c +++ b/util/envlist.c @@ -12,9 +12,6 @@ struct envlist { size_t el_count; /* number of entries */ }; -static int envlist_parse(envlist_t *envlist, - const char *env, int (*)(envlist_t *, const char *)); - /* * Allocates new envlist and returns pointer to it. */ @@ -52,72 +49,6 @@ envlist_free(envlist_t *envlist) } /* - * Parses comma separated list of set/modify environment - * variable entries and updates given enlist accordingly. - * - * For example: - * envlist_parse(el, "HOME=foo,SHELL=/bin/sh"); - * - * inserts/sets environment variables HOME and SHELL. - * - * Returns 0 on success, errno otherwise. - */ -int -envlist_parse_set(envlist_t *envlist, const char *env) -{ - return (envlist_parse(envlist, env, &envlist_setenv)); -} - -/* - * Parses comma separated list of unset environment variable - * entries and removes given variables from given envlist. - * - * Returns 0 on success, errno otherwise. - */ -int -envlist_parse_unset(envlist_t *envlist, const char *env) -{ - return (envlist_parse(envlist, env, &envlist_unsetenv)); -} - -/* - * Parses comma separated list of set, modify or unset entries - * and calls given callback for each entry. - * - * Returns 0 in case of success, errno otherwise. - */ -static int -envlist_parse(envlist_t *envlist, const char *env, - int (*callback)(envlist_t *, const char *)) -{ - char *tmpenv, *envvar; - char *envsave = NULL; - int ret = 0; - assert(callback != NULL); - - if ((envlist == NULL) || (env == NULL)) - return (EINVAL); - - tmpenv = g_strdup(env); - envsave = tmpenv; - - do { - envvar = strchr(tmpenv, ','); - if (envvar != NULL) { - *envvar = '\0'; - } - if ((*callback)(envlist, tmpenv) != 0) { - ret = errno; - break; - } - tmpenv = envvar + 1; - } while (envvar != NULL); - - g_free(envsave); - return ret; -} - -/* * Sets environment value to envlist in similar manner * than putenv(3). * diff --git a/util/error.c b/util/error.c index e5e2472..daea214 100644 --- a/util/error.c +++ b/util/error.c @@ -15,15 +15,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" - -struct Error -{ - char *msg; - ErrorClass err_class; - const char *src, *func; - int line; - GString *hint; -}; +#include "qapi/error-internal.h" Error *error_abort; Error *error_fatal; @@ -32,8 +24,13 @@ Error *error_warn; static void error_handle(Error **errp, Error *err) { if (errp == &error_abort) { - fprintf(stderr, "Unexpected error in %s() at %s:%d:\n", - err->func, err->src, err->line); + if (err->func) { + fprintf(stderr, "Unexpected error in %s() at %.*s:%d:\n", + err->func, err->src_len, err->src, err->line); + } else { + fprintf(stderr, "Unexpected error at %.*s:%d:\n", + err->src_len, err->src, err->line); + } error_report("%s", error_get_pretty(err)); if (err->hint) { error_printf("%s", err->hint->str); @@ -75,6 +72,7 @@ static void error_setv(Error **errp, g_free(msg); } err->err_class = err_class; + err->src_len = -1; err->src = src; err->line = line; err->func = func; @@ -247,6 +245,17 @@ void warn_report_err(Error *err) error_free(err); } +bool warn_report_err_once_cond(bool *printed, Error *err) +{ + if (*printed) { + error_free(err); + return false; + } + *printed = true; + warn_report_err(err); + return true; +} + void error_reportf_err(Error *err, const char *fmt, ...) { va_list ap; diff --git a/util/event.c b/util/event.c new file mode 100644 index 0000000..5a8141c --- /dev/null +++ b/util/event.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" + +/* + * Valid transitions: + * - FREE -> SET (qemu_event_set) + * - BUSY -> SET (qemu_event_set) + * - SET -> FREE (qemu_event_reset) + * - FREE -> BUSY (qemu_event_wait) + * + * With futex, the waking and blocking operations follow + * BUSY -> SET and FREE -> BUSY, respectively. + * + * Without futex, BUSY -> SET and FREE -> BUSY never happen. Instead, the waking + * operation follows FREE -> SET and the blocking operation will happen in + * qemu_event_wait() if the event is not SET. + * + * SET->BUSY does not happen (it can be observed from the outside but + * it really is SET->FREE->BUSY). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef HAVE_FUTEX + pthread_mutex_init(&ev->lock, NULL); + pthread_cond_init(&ev->cond, NULL); +#endif + + ev->value = (init ? EV_SET : EV_FREE); + ev->initialized = true; +} + +void qemu_event_destroy(QemuEvent *ev) +{ + assert(ev->initialized); + ev->initialized = false; +#ifndef HAVE_FUTEX + pthread_mutex_destroy(&ev->lock); + pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * Pairs with both qemu_event_reset() and qemu_event_wait(). + * + * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { + int old = qatomic_xchg(&ev->value, EV_SET); + + /* Pairs with memory barrier in kernel futex_wait system call. */ + smp_mb__after_rmw(); + if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake_all(ev); + } + } +#else + pthread_mutex_lock(&ev->lock); + /* Pairs with qemu_event_reset()'s load acquire. */ + qatomic_store_release(&ev->value, EV_SET); + pthread_cond_broadcast(&ev->cond); + pthread_mutex_unlock(&ev->lock); +#endif +} + +void qemu_event_reset(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + qatomic_or(&ev->value, EV_FREE); + + /* + * Order reset before checking the condition in the caller. + * Pairs with the first memory barrier in qemu_event_set(). + */ + smp_mb__after_rmw(); +#else + /* + * If futexes are not available, there are no EV_FREE->EV_BUSY + * transitions because wakeups are done entirely through the + * condition variable. Since qatomic_set() only writes EV_FREE, + * the load seems useless but in reality, the acquire synchronizes + * with qemu_event_set()'s store release: if qemu_event_reset() + * sees EV_SET here, then the caller will certainly see a + * successful condition and skip qemu_event_wait(): + * + * done = 1; if (done == 0) + * qemu_event_set() { qemu_event_reset() { + * lock(); + * ev->value = EV_SET -----> load ev->value + * ev->value = old value | EV_FREE + * cond_broadcast() + * unlock(); } + * } if (done == 0) + * // qemu_event_wait() not called + */ + qatomic_set(&ev->value, qatomic_load_acquire(&ev->value) | EV_FREE); +#endif +} + +void qemu_event_wait(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + while (true) { + /* + * qemu_event_wait must synchronize with qemu_event_set even if it does + * not go down the slow path, so this load-acquire is needed that + * synchronizes with the first memory barrier in qemu_event_set(). + */ + unsigned value = qatomic_load_acquire(&ev->value); + if (value == EV_SET) { + break; + } + + if (value == EV_FREE) { + /* + * Leave the event reset and tell qemu_event_set that there are + * waiters. No need to retry, because there cannot be a concurrent + * busy->free transition. After the CAS, the event will be either + * set or busy. + * + * This cmpxchg doesn't have particular ordering requirements if it + * succeeds (moving the store earlier can only cause + * qemu_event_set() to issue _more_ wakeups), the failing case needs + * acquire semantics like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + break; + } + } + + /* + * This is the final check for a concurrent set, so it does need + * a smp_mb() pairing with the second barrier of qemu_event_set(). + * The barrier is inside the FUTEX_WAIT system call. + */ + qemu_futex_wait(ev, EV_BUSY); + } +#else + pthread_mutex_lock(&ev->lock); + while (qatomic_read(&ev->value) != EV_SET) { + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); +#endif +} diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c index c6413cb..9fb8800 100644 --- a/util/fdmon-epoll.c +++ b/util/fdmon-epoll.c @@ -5,6 +5,7 @@ #include "qemu/osdep.h" #include <sys/epoll.h> +#include "qemu/lockcnt.h" #include "qemu/rcu_queue.h" #include "aio-posix.h" diff --git a/util/fifo8.c b/util/fifo8.c index 1ffa19d..a26da66 100644 --- a/util/fifo8.c +++ b/util/fifo8.c @@ -71,18 +71,27 @@ uint8_t fifo8_pop(Fifo8 *fifo) return ret; } -static const uint8_t *fifo8_peekpop_buf(Fifo8 *fifo, uint32_t max, - uint32_t *numptr, bool do_pop) +uint8_t fifo8_peek(Fifo8 *fifo) +{ + assert(fifo->num > 0); + return fifo->data[fifo->head]; +} + +static const uint8_t *fifo8_peekpop_bufptr(Fifo8 *fifo, uint32_t max, + uint32_t skip, uint32_t *numptr, + bool do_pop) { uint8_t *ret; - uint32_t num; + uint32_t num, head; assert(max > 0 && max <= fifo->num); - num = MIN(fifo->capacity - fifo->head, max); - ret = &fifo->data[fifo->head]; + assert(skip <= fifo->num); + head = (fifo->head + skip) % fifo->capacity; + num = MIN(fifo->capacity - head, max); + ret = &fifo->data[head]; if (do_pop) { - fifo->head += num; + fifo->head = head + num; fifo->head %= fifo->capacity; fifo->num -= num; } @@ -94,15 +103,16 @@ static const uint8_t *fifo8_peekpop_buf(Fifo8 *fifo, uint32_t max, const uint8_t *fifo8_peek_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr) { - return fifo8_peekpop_buf(fifo, max, numptr, false); + return fifo8_peekpop_bufptr(fifo, max, 0, numptr, false); } const uint8_t *fifo8_pop_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr) { - return fifo8_peekpop_buf(fifo, max, numptr, true); + return fifo8_peekpop_bufptr(fifo, max, 0, numptr, true); } -uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) +static uint32_t fifo8_peekpop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen, + bool do_pop) { const uint8_t *buf; uint32_t n1, n2 = 0; @@ -113,7 +123,7 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) } len = destlen; - buf = fifo8_pop_bufptr(fifo, len, &n1); + buf = fifo8_peekpop_bufptr(fifo, len, 0, &n1, do_pop); if (dest) { memcpy(dest, buf, n1); } @@ -122,7 +132,7 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) len -= n1; len = MIN(len, fifo8_num_used(fifo)); if (len) { - buf = fifo8_pop_bufptr(fifo, len, &n2); + buf = fifo8_peekpop_bufptr(fifo, len, do_pop ? 0 : n1, &n2, do_pop); if (dest) { memcpy(&dest[n1], buf, n2); } @@ -131,6 +141,16 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) return n1 + n2; } +uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) +{ + return fifo8_peekpop_buf(fifo, dest, destlen, true); +} + +uint32_t fifo8_peek_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen) +{ + return fifo8_peekpop_buf(fifo, dest, destlen, false); +} + void fifo8_drop(Fifo8 *fifo, uint32_t len) { len -= fifo8_pop_buf(fifo, NULL, len); diff --git a/util/hbitmap.c b/util/hbitmap.c index 6d6e1b5..d9a1dab 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -949,7 +949,7 @@ char *hbitmap_sha256(const HBitmap *bitmap, Error **errp) size_t size = bitmap->sizes[HBITMAP_LEVELS - 1] * sizeof(unsigned long); char *data = (char *)bitmap->levels[HBITMAP_LEVELS - 1]; char *hash = NULL; - qcrypto_hash_digest(QCRYPTO_HASH_ALG_SHA256, data, size, &hash, errp); + qcrypto_hash_digest(QCRYPTO_HASH_ALGO_SHA256, data, size, &hash, errp); return hash; } diff --git a/util/hexdump.c b/util/hexdump.c index ae0d499..f29ffce 100644 --- a/util/hexdump.c +++ b/util/hexdump.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/host-utils.h" static inline char hexdump_nibble(unsigned x) { @@ -97,3 +98,20 @@ void qemu_hexdump(FILE *fp, const char *prefix, } } + +void qemu_hexdump_to_buffer(char *restrict buffer, size_t buffer_size, + const uint8_t *restrict data, size_t data_size) +{ + size_t i; + uint64_t required_buffer_size; + bool overflow = umul64_overflow(data_size, 2, &required_buffer_size); + overflow |= uadd64_overflow(required_buffer_size, 1, &required_buffer_size); + assert(!overflow && buffer_size >= required_buffer_size); + + for (i = 0; i < data_size; i++) { + uint8_t val = data[i]; + *(buffer++) = hexdump_nibble(val >> 4); + *(buffer++) = hexdump_nibble(val & 0xf); + } + *buffer = '\0'; +} @@ -3,6 +3,7 @@ * * Copyright IBM, Corp. 2007, 2008 * Copyright (C) 2010 Red Hat, Inc. + * Copyright (c) 2024 Seagate Technology LLC and/or its Affiliates * * Author(s): * Anthony Liguori <aliguori@us.ibm.com> @@ -36,7 +37,6 @@ size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, offset -= iov[i].iov_len; } } - assert(offset == 0); return done; } @@ -55,7 +55,6 @@ size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt, offset -= iov[i].iov_len; } } - assert(offset == 0); return done; } @@ -74,7 +73,6 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, offset -= iov[i].iov_len; } } - assert(offset == 0); return done; } @@ -92,7 +90,8 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) /* helper function for iov_send_recv() */ static ssize_t -do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +do_send_recv(int sockfd, int flags, struct iovec *iov, unsigned iov_cnt, + bool do_send) { #ifdef CONFIG_POSIX ssize_t ret; @@ -102,8 +101,8 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) msg.msg_iovlen = iov_cnt; do { ret = do_send - ? sendmsg(sockfd, &msg, 0) - : recvmsg(sockfd, &msg, 0); + ? sendmsg(sockfd, &msg, flags) + : recvmsg(sockfd, &msg, flags); } while (ret < 0 && errno == EINTR); return ret; #else @@ -114,8 +113,8 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) ssize_t off = 0; while (i < iov_cnt) { ssize_t r = do_send - ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0) - : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0); + ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, flags) + : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, flags); if (r > 0) { ret += r; off += r; @@ -145,6 +144,15 @@ ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send) { + return iov_send_recv_with_flags(sockfd, 0, _iov, iov_cnt, offset, bytes, + do_send); +} + +ssize_t iov_send_recv_with_flags(int sockfd, int sockflags, + const struct iovec *_iov, + unsigned iov_cnt, size_t offset, + size_t bytes, bool do_send) +{ ssize_t total = 0; ssize_t ret; size_t orig_len, tail; @@ -192,11 +200,11 @@ ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt, assert(iov[niov].iov_len > tail); orig_len = iov[niov].iov_len; iov[niov++].iov_len = tail; - ret = do_send_recv(sockfd, iov, niov, do_send); + ret = do_send_recv(sockfd, sockflags, iov, niov, do_send); /* Undo the changes above before checking for errors */ iov[niov-1].iov_len = orig_len; } else { - ret = do_send_recv(sockfd, iov, niov, do_send); + ret = do_send_recv(sockfd, sockflags, iov, niov, do_send); } if (offset) { iov[0].iov_base -= offset; @@ -266,7 +274,6 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, bytes -= len; offset = 0; } - assert(offset == 0); return j; } @@ -337,7 +344,6 @@ size_t qemu_iovec_concat_iov(QEMUIOVector *dst, soffset -= src_iov[i].iov_len; } } - assert(soffset == 0); /* offset beyond end of src */ return done; } diff --git a/util/iova-tree.c b/util/iova-tree.c index 5367897..5b0c95f 100644 --- a/util/iova-tree.c +++ b/util/iova-tree.c @@ -115,13 +115,6 @@ const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map) return args.result; } -const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova) -{ - const DMAMap map = { .iova = iova, .size = 0 }; - - return iova_tree_find(tree, &map); -} - static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range) { /* Key and value are sharing the same range data */ @@ -148,22 +141,6 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map) return IOVA_OK; } -static gboolean iova_tree_traverse(gpointer key, gpointer value, - gpointer data) -{ - iova_tree_iterator iterator = data; - DMAMap *map = key; - - g_assert(key == value); - - return iterator(map); -} - -void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) -{ - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); -} - void iova_tree_remove(IOVATree *tree, DMAMap map) { const DMAMap *overlap; @@ -280,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree) g_tree_destroy(tree->tree); g_free(tree); } + +static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data) +{ + const DMAMap *m1 = a, *m2 = b; + + if (m1->translated_addr > m2->translated_addr + m2->size) { + return 1; + } + + if (m1->translated_addr + m1->size < m2->translated_addr) { + return -1; + } + + /* Overlapped */ + return 0; +} + +IOVATree *gpa_tree_new(void) +{ + IOVATree *gpa_tree = g_new0(IOVATree, 1); + + gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL); + + return gpa_tree; +} + +int gpa_tree_insert(IOVATree *tree, const DMAMap *map) +{ + DMAMap *new; + + if (map->translated_addr + map->size < map->translated_addr || + map->perm == IOMMU_NONE) { + return IOVA_ERR_INVALID; + } + + /* We don't allow inserting ranges that overlap with existing ones */ + if (iova_tree_find(tree, map)) { + return IOVA_ERR_OVERLAP; + } + + new = g_new0(DMAMap, 1); + memcpy(new, map, sizeof(*new)); + iova_tree_insert_internal(tree->tree, new); + + return IOVA_OK; +} diff --git a/util/keyval.c b/util/keyval.c index 66a5b47..a70629a 100644 --- a/util/keyval.c +++ b/util/keyval.c @@ -91,9 +91,9 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qlist.h" -#include "qapi/qmp/qstring.h" +#include "qobject/qdict.h" +#include "qobject/qlist.h" +#include "qobject/qstring.h" #include "qemu/cutils.h" #include "qemu/keyval.h" #include "qemu/help_option.h" diff --git a/util/lockcnt.c b/util/lockcnt.c index 5da3694..92c9f8c 100644 --- a/util/lockcnt.c +++ b/util/lockcnt.c @@ -7,14 +7,16 @@ * Paolo Bonzini <pbonzini@redhat.com> */ #include "qemu/osdep.h" +#include "qemu/lockcnt.h" #include "qemu/thread.h" #include "qemu/atomic.h" #include "trace.h" -#ifdef CONFIG_LINUX -#include "qemu/futex.h" +#ifdef HAVE_FUTEX -/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. +/* + * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the + * counter. * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, * this is not the most relaxing citation I could make...). It is similar * to mutex2 in the paper. @@ -105,7 +107,7 @@ static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, static void lockcnt_wake(QemuLockCnt *lockcnt) { trace_lockcnt_futex_wake(lockcnt); - qemu_futex_wake(&lockcnt->count, 1); + qemu_futex_wake_single(&lockcnt->count); } void qemu_lockcnt_inc(QemuLockCnt *lockcnt) @@ -503,6 +503,8 @@ const QEMULogItem qemu_log_items[] = { "open a separate log file per thread; filename must contain '%d'" }, { CPU_LOG_TB_VPU, "vpu", "include VPU registers in the 'cpu' logging" }, + { LOG_INVALID_MEM, "invalid_mem", + "log invalid memory accesses" }, { 0, NULL, NULL }, }; diff --git a/util/main-loop.c b/util/main-loop.c index a0386cf..51aeb24 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -26,8 +26,9 @@ #include "qapi/error.h" #include "qemu/cutils.h" #include "qemu/timer.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/replay.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "system/replay.h" #include "qemu/main-loop.h" #include "block/aio.h" #include "block/thread-pool.h" @@ -212,7 +213,6 @@ static void main_loop_init(EventLoopBase *base, Error **errp) main_loop_update_params(base, errp); mloop = m; - return; } static bool main_loop_can_be_deleted(EventLoopBase *base) @@ -220,7 +220,7 @@ static bool main_loop_can_be_deleted(EventLoopBase *base) return false; } -static void main_loop_class_init(ObjectClass *oc, void *class_data) +static void main_loop_class_init(ObjectClass *oc, const void *class_data) { EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); diff --git a/util/memfd.c b/util/memfd.c index 4a3c07e..07beab1 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -28,6 +28,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qemu/memfd.h" #include "qemu/host-utils.h" @@ -149,11 +150,15 @@ err: void qemu_memfd_free(void *ptr, size_t size, int fd) { if (ptr) { - munmap(ptr, size); + if (munmap(ptr, size) != 0) { + error_report("memfd munmap() failed: %s", strerror(errno)); + } } if (fd != -1) { - close(fd); + if (close(fd) != 0) { + error_report("memfd close() failed: %s", strerror(errno)); + } } } @@ -189,17 +194,27 @@ bool qemu_memfd_alloc_check(void) /** * qemu_memfd_check(): * - * Check if host supports memfd. + * Check if host supports memfd. Cache the answer for the common case flags=0. */ bool qemu_memfd_check(unsigned int flags) { #ifdef CONFIG_LINUX - int mfd = memfd_create("test", flags | MFD_CLOEXEC); + int mfd; + static int memfd_check = MEMFD_TODO; + if (!flags && memfd_check != MEMFD_TODO) { + return memfd_check; + } + + mfd = memfd_create("test", flags | MFD_CLOEXEC); if (mfd >= 0) { close(mfd); - return true; } + if (!flags) { + memfd_check = (mfd >= 0) ? MEMFD_OK : MEMFD_KO; + } + return (mfd >= 0); + #endif return false; diff --git a/util/meson.build b/util/meson.build index 5d8bef9..3502938 100644 --- a/util/meson.build +++ b/util/meson.build @@ -11,7 +11,9 @@ if host_os != 'windows' endif util_ss.add(files('compatfd.c')) util_ss.add(files('event_notifier-posix.c')) - util_ss.add(files('mmap-alloc.c')) + if host_os != 'emscripten' + util_ss.add(files('mmap-alloc.c')) + endif freebsd_dep = [] if host_os == 'freebsd' freebsd_dep = util @@ -25,7 +27,7 @@ else util_ss.add(files('event_notifier-win32.c')) util_ss.add(files('oslib-win32.c')) util_ss.add(files('qemu-thread-win32.c')) - util_ss.add(winmm, pathcch) + util_ss.add(winmm, pathcch, synchronization) endif util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c')) if glib_has_gslice @@ -33,6 +35,7 @@ if glib_has_gslice endif util_ss.add(files('defer-call.c')) util_ss.add(files('envlist.c', 'path.c', 'module.c')) +util_ss.add(files('event.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) @@ -84,6 +87,8 @@ if have_block or have_ga util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) util_ss.add(files(f'coroutine-@coroutine_backend@.c')) util_ss.add(files('thread-pool.c', 'qemu-timer.c')) +endif +if have_block or have_ga or have_user util_ss.add(files('qemu-sockets.c')) endif if have_block @@ -129,4 +134,6 @@ elif cpu in ['ppc', 'ppc64'] util_ss.add(files('cpuinfo-ppc.c')) elif cpu in ['riscv32', 'riscv64'] util_ss.add(files('cpuinfo-riscv.c')) +elif cpu == 's390x' + util_ss.add(files('s390x_pci_mmio.c')) endif diff --git a/util/module.c b/util/module.c index 3eb0f06..1aa2079 100644 --- a/util/module.c +++ b/util/module.c @@ -234,7 +234,7 @@ int module_load(const char *prefix, const char *name, Error **errp) search_dir = getenv("QEMU_MODULE_DIR"); if (search_dir != NULL) { - dirs[n_dirs++] = g_strdup_printf("%s", search_dir); + dirs[n_dirs++] = g_strdup(search_dir); } dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 11b35e4..4ff577e 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -31,7 +31,7 @@ #include <glib/gprintf.h> -#include "sysemu/sysemu.h" +#include "system/system.h" #include "trace.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -58,6 +58,7 @@ #include <lwp.h> #endif +#include "qemu/memalign.h" #include "qemu/mmap-alloc.h" #define MAX_MEM_PREALLOC_THREAD_COUNT 16 @@ -111,6 +112,21 @@ int qemu_get_thread_id(void) #endif } +int qemu_kill_thread(int tid, int sig) +{ +#if defined(__linux__) + return syscall(__NR_tgkill, getpid(), tid, sig); +#elif defined(__FreeBSD__) + return thr_kill2(getpid(), tid, sig); +#elif defined(__NetBSD__) + return _lwp_kill(tid, sig); +#elif defined(__OpenBSD__) + return thrkill(tid, sig, NULL); +#else + return kill(tid, sig); +#endif +} + int qemu_daemon(int nochdir, int noclose) { return daemon(nochdir, noclose); @@ -195,11 +211,21 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) | (noreserve ? QEMU_MAP_NORESERVE : 0); size_t align = QEMU_VMALLOC_ALIGN; +#ifndef EMSCRIPTEN void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0); if (ptr == MAP_FAILED) { return NULL; } +#else + /* + * qemu_ram_mmap is not implemented for Emscripten. Use qemu_memalign + * for the anonymous allocation. noreserve is ignored as there is no swap + * space on Emscripten, and shared is ignored as there is no other + * processes on Emscripten. + */ + void *ptr = qemu_memalign(align, size); +#endif if (alignment) { *alignment = align; @@ -212,7 +238,16 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); +#ifndef EMSCRIPTEN qemu_ram_munmap(-1, ptr, size); +#else + /* + * qemu_ram_munmap is not implemented for Emscripten and qemu_memalign + * was used for the allocation. Use the corresponding freeing function + * here. + */ + qemu_vfree(ptr); +#endif } void qemu_socket_set_block(int fd) @@ -573,7 +608,15 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, { static gsize initialized; int ret; +#ifndef EMSCRIPTEN size_t hpagesize = qemu_fd_getpagesize(fd); +#else + /* + * mmap-alloc.c is excluded from Emscripten build, so qemu_fd_getpagesize + * is unavailable. Fallback to the lower level implementation. + */ + size_t hpagesize = qemu_real_host_page_size(); +#endif size_t numpages = DIV_ROUND_UP(sz, hpagesize); bool use_madv_populate_write; struct sigaction act; @@ -931,3 +974,55 @@ void qemu_close_all_open_fd(const int *skip, unsigned int nskip) qemu_close_all_open_fd_fallback(skip, nskip, open_max); } } + +int qemu_shm_alloc(size_t size, Error **errp) +{ + g_autoptr(GString) shm_name = g_string_new(NULL); + int fd, oflag, cur_sequence; + static int sequence; + mode_t mode; + + cur_sequence = qatomic_fetch_inc(&sequence); + + /* + * Let's use `mode = 0` because we don't want other processes to open our + * memory unless we share the file descriptor with them. + */ + mode = 0; + oflag = O_RDWR | O_CREAT | O_EXCL; + + /* + * Some operating systems allow creating anonymous POSIX shared memory + * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not + * defined by POSIX, so let's create a unique name. + * + * From Linux's shm_open(3) man-page: + * For portable use, a shared memory object should be identified + * by a name of the form /somename;" + */ + g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%d", getpid(), + cur_sequence); + + fd = shm_open(shm_name->str, oflag, mode); + if (fd < 0) { + error_setg_errno(errp, errno, + "failed to create POSIX shared memory"); + return -1; + } + + /* + * We have the file descriptor, so we no longer need to expose the + * POSIX shared memory object. However it will remain allocated as long as + * there are file descriptors pointing to it. + */ + shm_unlink(shm_name->str); + + if (ftruncate(fd, size) == -1) { + error_setg_errno(errp, errno, + "failed to resize POSIX shared memory to %zu", size); + close(fd); + return -1; + } + + return fd; +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index b623830..b735163 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -877,3 +877,9 @@ void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp) } CloseHandle(h); } + +int qemu_shm_alloc(size_t size, Error **errp) +{ + error_setg(errp, "Shared memory is not supported."); + return -1; +} diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c index a66cc07..752eb5a 100644 --- a/util/qemu-co-shared-resource.c +++ b/util/qemu-co-shared-resource.c @@ -66,12 +66,6 @@ static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n) return false; } -bool co_try_get_from_shres(SharedResource *s, uint64_t n) -{ - QEMU_LOCK_GUARD(&s->lock); - return co_try_get_from_shres_locked(s, n); -} - void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n) { assert(n <= s->total); diff --git a/util/qemu-config.c b/util/qemu-config.c index a90c18d..d1fc49c 100644 --- a/util/qemu-config.c +++ b/util/qemu-config.c @@ -1,8 +1,8 @@ #include "qemu/osdep.h" #include "block/qdict.h" /* for qdict_extract_subqdict() */ #include "qapi/error.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qlist.h" +#include "qobject/qdict.h" +#include "qobject/qlist.h" #include "qemu/error-report.h" #include "qemu/option.h" #include "qemu/config-file.h" diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index eb4eebe..64d6264 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -136,7 +136,7 @@ static Coroutine *coroutine_pool_get_local(void) static void coroutine_pool_refill_local(void) { CoroutinePool *local_pool = get_ptr_local_pool(); - CoroutinePoolBatch *batch; + CoroutinePoolBatch *batch = NULL; WITH_QEMU_LOCK_GUARD(&global_pool_lock) { batch = QSLIST_FIRST(&global_pool); diff --git a/util/qemu-option.c b/util/qemu-option.c index 201f7a8..770300df 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -27,10 +27,10 @@ #include "qapi/error.h" #include "qemu/error-report.h" -#include "qapi/qmp/qbool.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qnum.h" -#include "qapi/qmp/qstring.h" +#include "qobject/qbool.h" +#include "qobject/qdict.h" +#include "qobject/qnum.h" +#include "qobject/qstring.h" #include "qapi/qmp/qerror.h" #include "qemu/option_int.h" #include "qemu/cutils.h" diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 60c44b2..4773755 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -30,6 +30,7 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" #include "qemu/cutils.h" +#include "qemu/option.h" #include "trace.h" #ifndef AI_ADDRCONFIG @@ -44,6 +45,14 @@ # define AI_NUMERICSERV 0 #endif +/* + * On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE. + * https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 + */ +#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE) +# define TCP_KEEPIDLE TCP_KEEPALIVE +#endif + static int inet_getport(struct addrinfo *e) { @@ -205,6 +214,58 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) #endif } +static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) +{ + if (saddr->keep_alive) { + int keep_alive = 1; + int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + &keep_alive, sizeof(keep_alive)); + + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set keep-alive option on socket"); + return -1; + } +#ifdef HAVE_TCP_KEEPCNT + if (saddr->has_keep_alive_count && saddr->keep_alive_count) { + int keep_count = saddr->keep_alive_count; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count, + sizeof(keep_count)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive count option on socket"); + return -1; + } + } +#endif +#ifdef HAVE_TCP_KEEPIDLE + if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) { + int keep_idle = saddr->keep_alive_idle; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle, + sizeof(keep_idle)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive idle option on socket"); + return -1; + } + } +#endif +#ifdef HAVE_TCP_KEEPINTVL + if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) { + int keep_interval = saddr->keep_alive_interval; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval, + sizeof(keep_interval)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive interval option on socket"); + return -1; + } + } +#endif + } + return 0; +} + static int inet_listen_saddr(InetSocketAddress *saddr, int port_offset, int num, @@ -220,12 +281,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr, int saved_errno = 0; bool socket_created = false; - if (saddr->keep_alive) { - error_setg(errp, "keep-alive option is not supported for passive " - "sockets"); - return -1; - } - memset(&ai,0, sizeof(ai)); ai.ai_flags = AI_PASSIVE; if (saddr->has_numeric && saddr->numeric) { @@ -287,11 +342,20 @@ static int inet_listen_saddr(InetSocketAddress *saddr, port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { + if (slisten >= 0) { + /* + * We have a socket we tried with the previous port. It cannot + * be rebound, we need to close it and create a new one. + */ + close(slisten); + slisten = -1; + } inet_setport(e, p); slisten = create_fast_reuse_socket(e); if (slisten < 0) { - /* First time we expect we might fail to create the socket + /* + * First time we expect we might fail to create the socket * eg if 'e' has AF_INET6 but ipv6 kmod is not loaded. * Later iterations should always succeed if first iteration * worked though, so treat that as fatal. @@ -301,40 +365,41 @@ static int inet_listen_saddr(InetSocketAddress *saddr, } else { error_setg_errno(errp, errno, "Failed to recreate failed listening socket"); - goto listen_failed; + goto fail; } } socket_created = true; rc = try_bind(slisten, saddr, e); if (rc < 0) { - if (errno != EADDRINUSE) { - error_setg_errno(errp, errno, "Failed to bind socket"); - goto listen_failed; - } - } else { - if (!listen(slisten, num)) { - goto listen_ok; + if (errno == EADDRINUSE) { + /* This port is already used, try the next one */ + continue; } - if (errno != EADDRINUSE) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - goto listen_failed; + error_setg_errno(errp, errno, "Failed to bind socket"); + goto fail; + } + if (listen(slisten, num)) { + if (errno == EADDRINUSE) { + /* This port is already used, try the next one */ + continue; } + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto fail; + } + /* We have a listening socket */ + if (inet_set_sockopts(slisten, saddr, errp) < 0) { + goto fail; } - /* Someone else managed to bind to the same port and beat us - * to listen on it! Socket semantics does not allow us to - * recover from this situation, so we need to recreate the - * socket to allow bind attempts for subsequent ports: - */ - close(slisten); - slisten = -1; + freeaddrinfo(res); + return slisten; } } error_setg_errno(errp, errno, socket_created ? "Failed to find an available port" : "Failed to create a socket"); -listen_failed: +fail: saved_errno = errno; if (slisten >= 0) { close(slisten); @@ -342,10 +407,6 @@ listen_failed: freeaddrinfo(res); errno = saved_errno; return -1; - -listen_ok: - freeaddrinfo(res); - return slisten; } #ifdef _WIN32 @@ -367,7 +428,6 @@ static int inet_connect_addr(const InetSocketAddress *saddr, addr->ai_family); return -1; } - socket_set_fast_reuse(sock); /* connect to peer */ do { @@ -476,16 +536,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp) return sock; } - if (saddr->keep_alive) { - int val = 1; - int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - &val, sizeof(val)); - - if (ret < 0) { - error_setg_errno(errp, errno, "Unable to set KEEPALIVE"); - close(sock); - return -1; - } + if (inet_set_sockopts(sock, saddr, errp) < 0) { + close(sock); + return -1; } return sock; @@ -592,141 +645,146 @@ err: return -1; } -/* compatibility wrapper */ -static int inet_parse_flag(const char *flagname, const char *optstr, bool *val, - Error **errp) -{ - char *end; - size_t len; - - end = strstr(optstr, ","); - if (end) { - if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */ - error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); - return -1; - } - len = end - optstr; - } else { - len = strlen(optstr); - } - if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) { - *val = true; - } else if (len == 4 && strncmp(optstr, "=off", len) == 0) { - *val = false; - } else { - error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); - return -1; - } - return 0; -} +static QemuOptsList inet_opts = { + .name = "InetSocketAddress", + .head = QTAILQ_HEAD_INITIALIZER(inet_opts.head), + .implied_opt_name = "addr", + .desc = { + { + .name = "addr", + .type = QEMU_OPT_STRING, + }, + { + .name = "numeric", + .type = QEMU_OPT_BOOL, + }, + { + .name = "to", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "ipv4", + .type = QEMU_OPT_BOOL, + }, + { + .name = "ipv6", + .type = QEMU_OPT_BOOL, + }, + { + .name = "keep-alive", + .type = QEMU_OPT_BOOL, + }, +#ifdef HAVE_TCP_KEEPCNT + { + .name = "keep-alive-count", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_TCP_KEEPIDLE + { + .name = "keep-alive-idle", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_TCP_KEEPINTVL + { + .name = "keep-alive-interval", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_IPPROTO_MPTCP + { + .name = "mptcp", + .type = QEMU_OPT_BOOL, + }, +#endif + { /* end of list */ } + }, +}; int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) { - const char *optstr, *h; - char host[65]; - char port[33]; - int to; - int pos; - char *begin; - + QemuOpts *opts = qemu_opts_parse(&inet_opts, str, true, errp); + if (!opts) { + return -1; + } memset(addr, 0, sizeof(*addr)); /* parse address */ - if (str[0] == ':') { - /* no host given */ - host[0] = '\0'; - if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) { - error_setg(errp, "error parsing port in address '%s'", str); - return -1; - } - } else if (str[0] == '[') { + const char *addr_str = qemu_opt_get(opts, "addr"); + if (!addr_str) { + error_setg(errp, "error parsing address ''"); + return -1; + } + if (str[0] == '[') { /* IPv6 addr */ - if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) { - error_setg(errp, "error parsing IPv6 address '%s'", str); + const char *ip_end = strstr(addr_str, "]:"); + if (!ip_end || ip_end - addr_str < 2 || strlen(ip_end) < 3) { + error_setg(errp, "error parsing IPv6 address '%s'", addr_str); return -1; } + addr->host = g_strndup(addr_str + 1, ip_end - addr_str - 1); + addr->port = g_strdup(ip_end + 2); } else { - /* hostname or IPv4 addr */ - if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) { - error_setg(errp, "error parsing address '%s'", str); + /* no host, hostname or IPv4 addr */ + const char *port = strchr(addr_str, ':'); + if (!port || strlen(port) < 2) { + error_setg(errp, "error parsing address '%s'", addr_str); return -1; } + addr->host = g_strndup(addr_str, port - addr_str); + addr->port = g_strdup(port + 1); } - addr->host = g_strdup(host); - addr->port = g_strdup(port); - /* parse options */ - optstr = str + pos; - h = strstr(optstr, ",to="); - if (h) { - h += 4; - if (sscanf(h, "%d%n", &to, &pos) != 1 || - (h[pos] != '\0' && h[pos] != ',')) { - error_setg(errp, "error parsing to= argument"); - return -1; - } + if (qemu_opt_find(opts, "numeric")) { + addr->has_numeric = true, + addr->numeric = qemu_opt_get_bool(opts, "numeric", false); + } + if (qemu_opt_find(opts, "to")) { addr->has_to = true; - addr->to = to; + addr->to = qemu_opt_get_number(opts, "to", 0); } - begin = strstr(optstr, ",ipv4"); - if (begin) { - if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) { - return -1; - } + if (qemu_opt_find(opts, "ipv4")) { addr->has_ipv4 = true; + addr->ipv4 = qemu_opt_get_bool(opts, "ipv4", false); } - begin = strstr(optstr, ",ipv6"); - if (begin) { - if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) { - return -1; - } + if (qemu_opt_find(opts, "ipv6")) { addr->has_ipv6 = true; + addr->ipv6 = qemu_opt_get_bool(opts, "ipv6", false); } - begin = strstr(optstr, ",keep-alive"); - if (begin) { - if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"), - &addr->keep_alive, errp) < 0) - { - return -1; - } + if (qemu_opt_find(opts, "keep-alive")) { addr->has_keep_alive = true; + addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); + } +#ifdef HAVE_TCP_KEEPCNT + if (qemu_opt_find(opts, "keep-alive-count")) { + addr->has_keep_alive_count = true; + addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0); + } +#endif +#ifdef HAVE_TCP_KEEPIDLE + if (qemu_opt_find(opts, "keep-alive-idle")) { + addr->has_keep_alive_idle = true; + addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0); } +#endif +#ifdef HAVE_TCP_KEEPINTVL + if (qemu_opt_find(opts, "keep-alive-interval")) { + addr->has_keep_alive_interval = true; + addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0); + } +#endif #ifdef HAVE_IPPROTO_MPTCP - begin = strstr(optstr, ",mptcp"); - if (begin) { - if (inet_parse_flag("mptcp", begin + strlen(",mptcp"), - &addr->mptcp, errp) < 0) - { - return -1; - } + if (qemu_opt_find(opts, "mptcp")) { addr->has_mptcp = true; + addr->mptcp = qemu_opt_get_bool(opts, "mptcp", 0); } #endif return 0; } -/** - * Create a blocking socket and connect it to an address. - * - * @str: address string - * @errp: set in case of an error - * - * Returns -1 in case of error, file descriptor on success - **/ -int inet_connect(const char *str, Error **errp) -{ - int sock = -1; - InetSocketAddress *addr = g_new(InetSocketAddress, 1); - - if (!inet_parse(addr, str, errp)) { - sock = inet_connect_saddr(addr, errp); - } - qapi_free_InetSocketAddress(addr); - return sock; -} - #ifdef CONFIG_AF_VSOCK static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr, struct sockaddr_vm *svm, @@ -1421,21 +1479,6 @@ SocketAddress *socket_local_address(int fd, Error **errp) } -SocketAddress *socket_remote_address(int fd, Error **errp) -{ - struct sockaddr_storage ss; - socklen_t sslen = sizeof(ss); - - if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) { - error_setg_errno(errp, errno, "%s", - "Unable to query remote socket address"); - return NULL; - } - - return socket_sockaddr_to_address(&ss, sslen, errp); -} - - SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy) { SocketAddress *addr; diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index b2e26e2..ba72544 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -317,154 +317,6 @@ void qemu_sem_wait(QemuSemaphore *sem) qemu_mutex_unlock(&sem->mutex); } -#ifdef __linux__ -#include "qemu/futex.h" -#else -static inline void qemu_futex_wake(QemuEvent *ev, int n) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (n == 1) { - pthread_cond_signal(&ev->cond); - } else { - pthread_cond_broadcast(&ev->cond); - } - pthread_mutex_unlock(&ev->lock); -} - -static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (ev->value == val) { - pthread_cond_wait(&ev->cond, &ev->lock); - } - pthread_mutex_unlock(&ev->lock); -} -#endif - -/* Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by qemu_futex_wake - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy. - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ -#ifndef __linux__ - pthread_mutex_init(&ev->lock, NULL); - pthread_cond_init(&ev->cond, NULL); -#endif - - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; -#ifndef __linux__ - pthread_mutex_destroy(&ev->lock); - pthread_cond_destroy(&ev->cond); -#endif -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier in kernel futex_wait system call. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - * - * This cmpxchg doesn't have particular ordering requirements if it - * succeeds (moving the store earlier can only cause qemu_event_set() - * to issue _more_ wakeups), the failing case needs acquire semantics - * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * This is the final check for a concurrent set, so it does need - * a smp_mb() pairing with the second barrier of qemu_event_set(). - * The barrier is inside the FUTEX_WAIT system call. - */ - qemu_futex_wait(ev, EV_BUSY); - } -} - static __thread NotifierList thread_exit; /* diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index a7fe3cc..ca2e0b5 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -231,135 +231,6 @@ void qemu_sem_wait(QemuSemaphore *sem) } } -/* Wrap a Win32 manual-reset event with a fast userspace path. The idea - * is to reset the Win32 event lazily, as part of a test-reset-test-wait - * sequence. Such a sequence is, indeed, how QemuEvents are used by - * RCU and other subsystems! - * - * Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by SetEvent - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy (and is faster than cmpxchg). - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ - /* Manual reset. */ - ev->event = CreateEvent(NULL, TRUE, TRUE, NULL); - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; - CloseHandle(ev->event); -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier after ResetEvent. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Here the underlying kernel event is reset, but qemu_event_set is - * not yet going to call SetEvent. However, there will be another - * check for EV_SET below when setting EV_BUSY. At that point it - * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - - /* - * It is not clear whether ResetEvent provides this barrier; kernel - * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! - */ - smp_mb(); - - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * ev->value is now EV_BUSY. Since we didn't observe EV_SET, - * qemu_event_set() must observe EV_BUSY and call SetEvent(). - */ - WaitForSingleObject(ev->event, INFINITE); - } -} - struct QemuThreadData { /* Passed to win32_start_routine. */ void *(*start_routine)(void *); diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 213114b..1fb48be 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -26,9 +26,10 @@ #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/lockable.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/replay.h" -#include "sysemu/cpus.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "system/replay.h" +#include "system/cpus.h" #ifdef CONFIG_POSIX #include <pthread.h> @@ -182,7 +183,7 @@ bool qemu_clock_has_timers(QEMUClockType type) bool timerlist_expired(QEMUTimerList *timer_list) { - int64_t expire_time; + int64_t expire_time = 0; if (!qatomic_read(&timer_list->active_timers)) { return false; @@ -212,7 +213,7 @@ bool qemu_clock_expired(QEMUClockType type) int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) { int64_t delta; - int64_t expire_time; + int64_t expire_time = 0; if (!qatomic_read(&timer_list->active_timers)) { return -1; @@ -286,16 +287,6 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) return deadline; } -QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) -{ - return timer_list->clock->type; -} - -QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) -{ - return main_loop_tlg.tl[type]; -} - void timerlist_notify(QEMUTimerList *timer_list) { if (timer_list->notify_cb) { @@ -419,10 +410,6 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list, static void timerlist_rearm(QEMUTimerList *timer_list) { - /* Interrupt execution to force deadline recalculation. */ - if (icount_enabled() && timer_list->clock->type == QEMU_CLOCK_VIRTUAL) { - icount_start_warp_timer(); - } timerlist_notify(timer_list); } @@ -461,7 +448,7 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time) { QEMUTimerList *timer_list = ts->timer_list; - bool rearm; + bool rearm = false; WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) { if (ts->expire_time == -1 || ts->expire_time > expire_time) { @@ -367,7 +367,6 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) qht_map_lock_buckets(map); qht_unlock(ht); *pmap = map; - return; } /* @@ -20,8 +20,8 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * License along with this library; if not, see + * <https://www.gnu.org/licenses/>. * * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ diff --git a/util/s390x_pci_mmio.c b/util/s390x_pci_mmio.c new file mode 100644 index 0000000..5ab24fa --- /dev/null +++ b/util/s390x_pci_mmio.c @@ -0,0 +1,146 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali <alifm@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include <sys/syscall.h> +#include "qemu/s390x_pci_mmio.h" +#include "elf.h" + +union register_pair { + unsigned __int128 pair; + struct { + uint64_t even; + uint64_t odd; + }; +}; + +static bool is_mio_supported; + +static __attribute__((constructor)) void check_is_mio_supported(void) +{ + is_mio_supported = !!(qemu_getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO); +} + +static uint64_t s390x_pcilgi(const void *ioaddr, size_t len) +{ + union register_pair ioaddr_len = { .even = (uint64_t)ioaddr, + .odd = len }; + uint64_t val; + int cc; + + asm volatile( + /* pcilgi */ + ".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d"(cc), [val] "=d"(val), + [ioaddr_len] "+d"(ioaddr_len.pair) :: "cc"); + + if (cc) { + val = -1ULL; + } + + return val; +} + +static void s390x_pcistgi(void *ioaddr, uint64_t val, size_t len) +{ + union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len}; + + asm volatile ( + /* pcistgi */ + ".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n" + : [ioaddr_len] "+d" (ioaddr_len.pair) + : [val] "d" (val) + : "cc", "memory"); +} + +uint8_t s390x_pci_mmio_read_8(const void *ioaddr) +{ + uint8_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint16_t s390x_pci_mmio_read_16(const void *ioaddr) +{ + uint16_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint32_t s390x_pci_mmio_read_32(const void *ioaddr) +{ + uint32_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint64_t s390x_pci_mmio_read_64(const void *ioaddr) +{ + uint64_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} diff --git a/util/thread-context.c b/util/thread-context.c index 2bc7883..0146154 100644 --- a/util/thread-context.c +++ b/util/thread-context.c @@ -273,7 +273,7 @@ static void thread_context_instance_complete(UserCreatable *uc, Error **errp) } } -static void thread_context_class_init(ObjectClass *oc, void *data) +static void thread_context_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); @@ -319,7 +319,7 @@ static const TypeInfo thread_context_info = { .instance_size = sizeof(ThreadContext), .instance_init = thread_context_instance_init, .instance_finalize = thread_context_instance_finalize, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } } diff --git a/util/thread-pool.c b/util/thread-pool.c index 27eb777..d2ead6b 100644 --- a/util/thread-pool.c +++ b/util/thread-pool.c @@ -23,9 +23,9 @@ #include "block/thread-pool.h" #include "qemu/main-loop.h" -static void do_spawn_thread(ThreadPool *pool); +static void do_spawn_thread(ThreadPoolAio *pool); -typedef struct ThreadPoolElement ThreadPoolElement; +typedef struct ThreadPoolElementAio ThreadPoolElementAio; enum ThreadState { THREAD_QUEUED, @@ -33,9 +33,9 @@ enum ThreadState { THREAD_DONE, }; -struct ThreadPoolElement { +struct ThreadPoolElementAio { BlockAIOCB common; - ThreadPool *pool; + ThreadPoolAio *pool; ThreadPoolFunc *func; void *arg; @@ -47,13 +47,13 @@ struct ThreadPoolElement { int ret; /* Access to this list is protected by lock. */ - QTAILQ_ENTRY(ThreadPoolElement) reqs; + QTAILQ_ENTRY(ThreadPoolElementAio) reqs; /* This list is only written by the thread pool's mother thread. */ - QLIST_ENTRY(ThreadPoolElement) all; + QLIST_ENTRY(ThreadPoolElementAio) all; }; -struct ThreadPool { +struct ThreadPoolAio { AioContext *ctx; QEMUBH *completion_bh; QemuMutex lock; @@ -62,10 +62,10 @@ struct ThreadPool { QEMUBH *new_thread_bh; /* The following variables are only accessed from one AioContext. */ - QLIST_HEAD(, ThreadPoolElement) head; + QLIST_HEAD(, ThreadPoolElementAio) head; /* The following variables are protected by lock. */ - QTAILQ_HEAD(, ThreadPoolElement) request_list; + QTAILQ_HEAD(, ThreadPoolElementAio) request_list; int cur_threads; int idle_threads; int new_threads; /* backlog of threads we need to create */ @@ -76,14 +76,14 @@ struct ThreadPool { static void *worker_thread(void *opaque) { - ThreadPool *pool = opaque; + ThreadPoolAio *pool = opaque; qemu_mutex_lock(&pool->lock); pool->pending_threads--; do_spawn_thread(pool); while (pool->cur_threads <= pool->max_threads) { - ThreadPoolElement *req; + ThreadPoolElementAio *req; int ret; if (QTAILQ_EMPTY(&pool->request_list)) { @@ -131,7 +131,7 @@ static void *worker_thread(void *opaque) return NULL; } -static void do_spawn_thread(ThreadPool *pool) +static void do_spawn_thread(ThreadPoolAio *pool) { QemuThread t; @@ -148,14 +148,14 @@ static void do_spawn_thread(ThreadPool *pool) static void spawn_thread_bh_fn(void *opaque) { - ThreadPool *pool = opaque; + ThreadPoolAio *pool = opaque; qemu_mutex_lock(&pool->lock); do_spawn_thread(pool); qemu_mutex_unlock(&pool->lock); } -static void spawn_thread(ThreadPool *pool) +static void spawn_thread(ThreadPoolAio *pool) { pool->cur_threads++; pool->new_threads++; @@ -173,8 +173,8 @@ static void spawn_thread(ThreadPool *pool) static void thread_pool_completion_bh(void *opaque) { - ThreadPool *pool = opaque; - ThreadPoolElement *elem, *next; + ThreadPoolAio *pool = opaque; + ThreadPoolElementAio *elem, *next; defer_call_begin(); /* cb() may use defer_call() to coalesce work */ @@ -184,8 +184,8 @@ restart: continue; } - trace_thread_pool_complete(pool, elem, elem->common.opaque, - elem->ret); + trace_thread_pool_complete_aio(pool, elem, elem->common.opaque, + elem->ret); QLIST_REMOVE(elem, all); if (elem->common.cb) { @@ -217,10 +217,10 @@ restart: static void thread_pool_cancel(BlockAIOCB *acb) { - ThreadPoolElement *elem = (ThreadPoolElement *)acb; - ThreadPool *pool = elem->pool; + ThreadPoolElementAio *elem = (ThreadPoolElementAio *)acb; + ThreadPoolAio *pool = elem->pool; - trace_thread_pool_cancel(elem, elem->common.opaque); + trace_thread_pool_cancel_aio(elem, elem->common.opaque); QEMU_LOCK_GUARD(&pool->lock); if (elem->state == THREAD_QUEUED) { @@ -234,16 +234,16 @@ static void thread_pool_cancel(BlockAIOCB *acb) } static const AIOCBInfo thread_pool_aiocb_info = { - .aiocb_size = sizeof(ThreadPoolElement), + .aiocb_size = sizeof(ThreadPoolElementAio), .cancel_async = thread_pool_cancel, }; BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, BlockCompletionFunc *cb, void *opaque) { - ThreadPoolElement *req; + ThreadPoolElementAio *req; AioContext *ctx = qemu_get_current_aio_context(); - ThreadPool *pool = aio_get_thread_pool(ctx); + ThreadPoolAio *pool = aio_get_thread_pool(ctx); /* Assert that the thread submitting work is the same running the pool */ assert(pool->ctx == qemu_get_current_aio_context()); @@ -256,7 +256,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, QLIST_INSERT_HEAD(&pool->head, req, all); - trace_thread_pool_submit(pool, req, arg); + trace_thread_pool_submit_aio(pool, req, arg); qemu_mutex_lock(&pool->lock); if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) { @@ -290,12 +290,7 @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) return tpc.ret; } -void thread_pool_submit(ThreadPoolFunc *func, void *arg) -{ - thread_pool_submit_aio(func, arg, NULL, NULL); -} - -void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) +void thread_pool_update_params(ThreadPoolAio *pool, AioContext *ctx) { qemu_mutex_lock(&pool->lock); @@ -322,7 +317,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) qemu_mutex_unlock(&pool->lock); } -static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) +static void thread_pool_init_one(ThreadPoolAio *pool, AioContext *ctx) { if (!ctx) { ctx = qemu_get_aio_context(); @@ -342,14 +337,14 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) thread_pool_update_params(pool, ctx); } -ThreadPool *thread_pool_new(AioContext *ctx) +ThreadPoolAio *thread_pool_new_aio(AioContext *ctx) { - ThreadPool *pool = g_new(ThreadPool, 1); + ThreadPoolAio *pool = g_new(ThreadPoolAio, 1); thread_pool_init_one(pool, ctx); return pool; } -void thread_pool_free(ThreadPool *pool) +void thread_pool_free_aio(ThreadPoolAio *pool) { if (!pool) { return; @@ -379,3 +374,122 @@ void thread_pool_free(ThreadPool *pool) qemu_mutex_destroy(&pool->lock); g_free(pool); } + +struct ThreadPool { + GThreadPool *t; + size_t cur_work; + QemuMutex cur_work_lock; + QemuCond all_finished_cond; +}; + +typedef struct { + ThreadPoolFunc *func; + void *opaque; + GDestroyNotify opaque_destroy; +} ThreadPoolElement; + +static void thread_pool_func(gpointer data, gpointer user_data) +{ + ThreadPool *pool = user_data; + g_autofree ThreadPoolElement *el = data; + + el->func(el->opaque); + + if (el->opaque_destroy) { + el->opaque_destroy(el->opaque); + } + + QEMU_LOCK_GUARD(&pool->cur_work_lock); + + assert(pool->cur_work > 0); + pool->cur_work--; + + if (pool->cur_work == 0) { + qemu_cond_signal(&pool->all_finished_cond); + } +} + +ThreadPool *thread_pool_new(void) +{ + ThreadPool *pool = g_new(ThreadPool, 1); + + pool->cur_work = 0; + qemu_mutex_init(&pool->cur_work_lock); + qemu_cond_init(&pool->all_finished_cond); + + pool->t = g_thread_pool_new(thread_pool_func, pool, 0, TRUE, NULL); + /* + * g_thread_pool_new() can only return errors if initial thread(s) + * creation fails but we ask for 0 initial threads above. + */ + assert(pool->t); + + return pool; +} + +void thread_pool_free(ThreadPool *pool) +{ + /* + * With _wait = TRUE this effectively waits for all + * previously submitted work to complete first. + */ + g_thread_pool_free(pool->t, FALSE, TRUE); + + qemu_cond_destroy(&pool->all_finished_cond); + qemu_mutex_destroy(&pool->cur_work_lock); + + g_free(pool); +} + +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, + void *opaque, GDestroyNotify opaque_destroy) +{ + ThreadPoolElement *el = g_new(ThreadPoolElement, 1); + + el->func = func; + el->opaque = opaque; + el->opaque_destroy = opaque_destroy; + + WITH_QEMU_LOCK_GUARD(&pool->cur_work_lock) { + pool->cur_work++; + } + + /* + * Ignore the return value since this function can only return errors + * if creation of an additional thread fails but even in this case the + * provided work is still getting queued (just for the existing threads). + */ + g_thread_pool_push(pool->t, el, NULL); +} + +void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func, + void *opaque, GDestroyNotify opaque_destroy) +{ + thread_pool_submit(pool, func, opaque, opaque_destroy); + thread_pool_adjust_max_threads_to_work(pool); +} + +void thread_pool_wait(ThreadPool *pool) +{ + QEMU_LOCK_GUARD(&pool->cur_work_lock); + + while (pool->cur_work > 0) { + qemu_cond_wait(&pool->all_finished_cond, + &pool->cur_work_lock); + } +} + +bool thread_pool_set_max_threads(ThreadPool *pool, + int max_threads) +{ + assert(max_threads > 0); + + return g_thread_pool_set_max_threads(pool->t, max_threads, NULL); +} + +bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool) +{ + QEMU_LOCK_GUARD(&pool->cur_work_lock); + + return thread_pool_set_max_threads(pool, pool->cur_work); +} diff --git a/util/timed-average.c b/util/timed-average.c index 2b49d53..5b5c22a 100644 --- a/util/timed-average.c +++ b/util/timed-average.c @@ -8,10 +8,12 @@ * BenoƮt Canet <benoit.canet@nodalink.com> * Alberto Garcia <berto@igalia.com> * + * SPDX-License-Identifier: GPL-2.0-or-later + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or - * (at your option) version 3 or any later version. + * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/util/trace-events b/util/trace-events index 49a4962..bd8f25f 100644 --- a/util/trace-events +++ b/util/trace-events @@ -14,9 +14,9 @@ aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" reentrant_aio(void *ctx, const char *name) "ctx %p name %s" # thread-pool.c -thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" -thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d" -thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" +thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" +thread_pool_complete_aio(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d" +thread_pool_cancel_aio(void *req, void *opaque) "req %p opaque %p" # buffer.c buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd" diff --git a/util/userfaultfd.c b/util/userfaultfd.c index 1b2fa94..2396104 100644 --- a/util/userfaultfd.c +++ b/util/userfaultfd.c @@ -240,7 +240,7 @@ int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, * Copy range of source pages to the destination to resolve * missing page fault somewhere in the destination range. * - * Returns 0 on success, negative value in case of an error + * Returns 0 on success, -errno in case of an error * * @uffd_fd: UFFD file descriptor * @dst_addr: destination base address @@ -259,10 +259,11 @@ int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0; if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) { + int e = errno; error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr, - length, (uint64_t) uffd_copy.mode, errno); - return -1; + length, (uint64_t) uffd_copy.mode, e); + return -e; } return 0; @@ -273,7 +274,7 @@ int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, * * Fill range pages with zeroes to resolve missing page fault within the range. * - * Returns 0 on success, negative value in case of an error + * Returns 0 on success, -errno in case of an error * * @uffd_fd: UFFD file descriptor * @addr: base address @@ -289,10 +290,11 @@ int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake) uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0; if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) { + int e = errno; error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64 " mode=%" PRIx64 " errno=%i", addr, length, - (uint64_t) uffd_zeropage.mode, errno); - return -1; + (uint64_t) uffd_zeropage.mode, e); + return -e; } return 0; @@ -306,7 +308,7 @@ int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake) * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits * for the whole memory range are satisfied in a single call to uffd_wakeup(). * - * Returns 0 on success, negative value in case of an error + * Returns 0 on success, -errno in case of an error * * @uffd_fd: UFFD file descriptor * @addr: base address @@ -320,9 +322,10 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length) uffd_range.len = length; if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) { + int e = errno; error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i", - addr, length, errno); - return -1; + addr, length, e); + return -e; } return 0; @@ -355,31 +358,3 @@ int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count) return (int) (res / sizeof(struct uffd_msg)); } - -/** - * uffd_poll_events: poll UFFD file descriptor for read - * - * Returns true if events are available for read, false otherwise - * - * @uffd_fd: UFFD file descriptor - * @tmo: timeout value - */ -bool uffd_poll_events(int uffd_fd, int tmo) -{ - int res; - struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 }; - - do { - res = poll(&poll_fd, 1, tmo); - } while (res < 0 && errno == EINTR); - - if (res == 0) { - return false; - } - if (res < 0) { - error_report("uffd_poll_events() failed: errno=%i", errno); - return false; - } - - return (poll_fd.revents & POLLIN) != 0; -} diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index f8bab46..fdff042 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -16,7 +16,7 @@ #include "qapi/error.h" #include "exec/ramlist.h" #include "exec/cpu-common.h" -#include "exec/memory.h" +#include "system/memory.h" #include "trace.h" #include "qemu/error-report.h" #include "standard-headers/linux/pci_regs.h" |