aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/aio-posix.c115
-rw-r--r--util/aio-posix.h1
-rw-r--r--util/aio-win32.c1
-rw-r--r--util/async.c10
-rw-r--r--util/block-helpers.c28
-rw-r--r--util/block-helpers.h3
-rw-r--r--util/cacheflush.c8
-rw-r--r--util/coroutine-wasm.c127
-rw-r--r--util/cpuinfo-i386.c1
-rw-r--r--util/cpuinfo-riscv.c51
-rw-r--r--util/cutils.c5
-rw-r--r--util/envlist.c69
-rw-r--r--util/error.c31
-rw-r--r--util/event.c171
-rw-r--r--util/fdmon-epoll.c1
-rw-r--r--util/fifo8.c42
-rw-r--r--util/hbitmap.c2
-rw-r--r--util/hexdump.c18
-rw-r--r--util/iov.c30
-rw-r--r--util/iova-tree.c69
-rw-r--r--util/keyval.c6
-rw-r--r--util/lockcnt.c10
-rw-r--r--util/log.c2
-rw-r--r--util/main-loop.c8
-rw-r--r--util/memfd.c25
-rw-r--r--util/meson.build11
-rw-r--r--util/module.c2
-rw-r--r--util/oslib-posix.c97
-rw-r--r--util/oslib-win32.c6
-rw-r--r--util/qemu-co-shared-resource.c6
-rw-r--r--util/qemu-config.c4
-rw-r--r--util/qemu-coroutine.c2
-rw-r--r--util/qemu-option.c8
-rw-r--r--util/qemu-sockets.c363
-rw-r--r--util/qemu-thread-posix.c148
-rw-r--r--util/qemu-thread-win32.c129
-rw-r--r--util/qemu-timer.c27
-rw-r--r--util/qht.c1
-rw-r--r--util/rcu.c4
-rw-r--r--util/s390x_pci_mmio.c146
-rw-r--r--util/thread-context.c4
-rw-r--r--util/thread-pool.c184
-rw-r--r--util/timed-average.c4
-rw-r--r--util/trace-events6
-rw-r--r--util/userfaultfd.c49
-rw-r--r--util/vfio-helpers.c2
46 files changed, 1266 insertions, 771 deletions
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 266c9dd..2e0a5da 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -17,6 +17,7 @@
#include "block/block.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
+#include "qemu/lockcnt.h"
#include "qemu/rcu.h"
#include "qemu/rcu_queue.h"
#include "qemu/sockets.h"
@@ -27,6 +28,9 @@
/* Stop userspace polling on a handler if it isn't active for some time */
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
+ int64_t block_ns);
+
bool aio_poll_disabled(AioContext *ctx)
{
return qatomic_read(&ctx->poll_disable_cnt);
@@ -391,7 +395,8 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
* scanning all handlers with aio_dispatch_handlers().
*/
static bool aio_dispatch_ready_handlers(AioContext *ctx,
- AioHandlerList *ready_list)
+ AioHandlerList *ready_list,
+ int64_t block_ns)
{
bool progress = false;
AioHandler *node;
@@ -399,6 +404,14 @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
while ((node = QLIST_FIRST(ready_list))) {
QLIST_REMOVE(node, node_ready);
progress = aio_dispatch_handler(ctx, node) || progress;
+
+ /*
+ * Adjust polling time only after aio_dispatch_handler(), which can
+ * add the handler to ctx->poll_aio_handlers.
+ */
+ if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
+ adjust_polling_time(ctx, &node->poll, block_ns);
+ }
}
return progress;
@@ -578,13 +591,19 @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
int64_t *timeout)
{
+ AioHandler *node;
int64_t max_ns;
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
return false;
}
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
+ max_ns = 0;
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
+ max_ns = MAX(max_ns, node->poll.ns);
+ }
+ max_ns = qemu_soonest_timeout(*timeout, max_ns);
+
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
/*
* Enable poll mode. It pairs with the poll_set_started() in
@@ -599,6 +618,46 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
return false;
}
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
+ int64_t block_ns)
+{
+ if (block_ns <= poll->ns) {
+ /* This is the sweet spot, no adjustment needed */
+ } else if (block_ns > ctx->poll_max_ns) {
+ /* We'd have to poll for too long, poll less */
+ int64_t old = poll->ns;
+
+ if (ctx->poll_shrink) {
+ poll->ns /= ctx->poll_shrink;
+ } else {
+ poll->ns = 0;
+ }
+
+ trace_poll_shrink(ctx, old, poll->ns);
+ } else if (poll->ns < ctx->poll_max_ns &&
+ block_ns < ctx->poll_max_ns) {
+ /* There is room to grow, poll longer */
+ int64_t old = poll->ns;
+ int64_t grow = ctx->poll_grow;
+
+ if (grow == 0) {
+ grow = 2;
+ }
+
+ if (poll->ns) {
+ poll->ns *= grow;
+ } else {
+ poll->ns = 4000; /* start polling at 4 microseconds */
+ }
+
+ if (poll->ns > ctx->poll_max_ns) {
+ poll->ns = ctx->poll_max_ns;
+ }
+
+ trace_poll_grow(ctx, old, poll->ns);
+ }
+}
+
bool aio_poll(AioContext *ctx, bool blocking)
{
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
@@ -606,6 +665,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
bool use_notify_me;
int64_t timeout;
int64_t start = 0;
+ int64_t block_ns = 0;
/*
* There cannot be two concurrent aio_poll calls for the same AioContext (or
@@ -678,49 +738,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
aio_notify_accept(ctx);
- /* Adjust polling time */
+ /* Calculate blocked time for adaptive polling */
if (ctx->poll_max_ns) {
- int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
-
- if (block_ns <= ctx->poll_ns) {
- /* This is the sweet spot, no adjustment needed */
- } else if (block_ns > ctx->poll_max_ns) {
- /* We'd have to poll for too long, poll less */
- int64_t old = ctx->poll_ns;
-
- if (ctx->poll_shrink) {
- ctx->poll_ns /= ctx->poll_shrink;
- } else {
- ctx->poll_ns = 0;
- }
-
- trace_poll_shrink(ctx, old, ctx->poll_ns);
- } else if (ctx->poll_ns < ctx->poll_max_ns &&
- block_ns < ctx->poll_max_ns) {
- /* There is room to grow, poll longer */
- int64_t old = ctx->poll_ns;
- int64_t grow = ctx->poll_grow;
-
- if (grow == 0) {
- grow = 2;
- }
-
- if (ctx->poll_ns) {
- ctx->poll_ns *= grow;
- } else {
- ctx->poll_ns = 4000; /* start polling at 4 microseconds */
- }
-
- if (ctx->poll_ns > ctx->poll_max_ns) {
- ctx->poll_ns = ctx->poll_max_ns;
- }
-
- trace_poll_grow(ctx, old, ctx->poll_ns);
- }
+ block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
}
progress |= aio_bh_poll(ctx);
- progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
+ progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
aio_free_deleted_handlers(ctx);
@@ -766,11 +790,18 @@ void aio_context_use_g_source(AioContext *ctx)
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
int64_t grow, int64_t shrink, Error **errp)
{
+ AioHandler *node;
+
+ qemu_lockcnt_inc(&ctx->list_lock);
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ node->poll.ns = 0;
+ }
+ qemu_lockcnt_dec(&ctx->list_lock);
+
/* No thread synchronization here, it doesn't matter if an incorrect value
* is used once.
*/
ctx->poll_max_ns = max_ns;
- ctx->poll_ns = 0;
ctx->poll_grow = grow;
ctx->poll_shrink = shrink;
diff --git a/util/aio-posix.h b/util/aio-posix.h
index 4264c51..82a0201 100644
--- a/util/aio-posix.h
+++ b/util/aio-posix.h
@@ -38,6 +38,7 @@ struct AioHandler {
#endif
int64_t poll_idle_timeout; /* when to stop userspace polling */
bool poll_ready; /* has polling detected an event? */
+ AioPolledEvent poll;
};
/* Add a handler to a ready list */
diff --git a/util/aio-win32.c b/util/aio-win32.c
index d144f93..6583d5c 100644
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "block/block.h"
#include "qemu/main-loop.h"
+#include "qemu/lockcnt.h"
#include "qemu/queue.h"
#include "qemu/sockets.h"
#include "qapi/error.h"
diff --git a/util/async.c b/util/async.c
index 3e3e4fc..2719c62 100644
--- a/util/async.c
+++ b/util/async.c
@@ -30,11 +30,12 @@
#include "block/graph-lock.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "qemu/lockcnt.h"
#include "qemu/rcu_queue.h"
#include "block/raw-aio.h"
#include "qemu/coroutine_int.h"
#include "qemu/coroutine-tls.h"
-#include "sysemu/cpu-timers.h"
+#include "exec/icount.h"
#include "trace.h"
/***********************************************************/
@@ -368,7 +369,7 @@ aio_ctx_finalize(GSource *source)
QEMUBH *bh;
unsigned flags;
- thread_pool_free(ctx->thread_pool);
+ thread_pool_free_aio(ctx->thread_pool);
#ifdef CONFIG_LINUX_AIO
if (ctx->linux_aio) {
@@ -434,10 +435,10 @@ GSource *aio_get_g_source(AioContext *ctx)
return &ctx->source;
}
-ThreadPool *aio_get_thread_pool(AioContext *ctx)
+ThreadPoolAio *aio_get_thread_pool(AioContext *ctx)
{
if (!ctx->thread_pool) {
- ctx->thread_pool = thread_pool_new(ctx);
+ ctx->thread_pool = thread_pool_new_aio(ctx);
}
return ctx->thread_pool;
}
@@ -608,7 +609,6 @@ AioContext *aio_context_new(Error **errp)
qemu_rec_mutex_init(&ctx->lock);
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
- ctx->poll_ns = 0;
ctx->poll_max_ns = 0;
ctx->poll_grow = 0;
ctx->poll_shrink = 0;
diff --git a/util/block-helpers.c b/util/block-helpers.c
index c485143..052b4e1 100644
--- a/util/block-helpers.c
+++ b/util/block-helpers.c
@@ -10,12 +10,10 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
#include "block-helpers.h"
/**
* check_block_size:
- * @id: The unique ID of the object
* @name: The name of the property being validated
* @value: The block size in bytes
* @errp: A pointer to an area to store an error
@@ -24,23 +22,23 @@
* 1. At least MIN_BLOCK_SIZE
* 2. No larger than MAX_BLOCK_SIZE
* 3. A power of 2
+ *
+ * Returns: true on success, false on failure
*/
-void check_block_size(const char *id, const char *name, int64_t value,
- Error **errp)
+bool check_block_size(const char *name, int64_t value, Error **errp)
{
- /* value of 0 means "unset" */
- if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
- error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
- id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
- return;
+ if (!value) {
+ /* unset */
+ return true;
}
- /* We rely on power-of-2 blocksizes for bitmasks */
- if ((value & (value - 1)) != 0) {
+ if (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE
+ || (value & (value - 1))) {
error_setg(errp,
- "Property %s.%s doesn't take value '%" PRId64
- "', it's not a power of 2",
- id, name, value);
- return;
+ "parameter %s must be a power of 2 between %" PRId64
+ " and %" PRId64,
+ name, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
+ return false;
}
+ return true;
}
diff --git a/util/block-helpers.h b/util/block-helpers.h
index b53295a..838b082 100644
--- a/util/block-helpers.h
+++ b/util/block-helpers.h
@@ -13,7 +13,6 @@
#define MAX_BLOCK_SIZE (2 * MiB)
#define MAX_BLOCK_SIZE_STR "2 MiB"
-void check_block_size(const char *id, const char *name, int64_t value,
- Error **errp);
+bool check_block_size(const char *name, int64_t value, Error **errp);
#endif /* BLOCK_HELPERS_H */
diff --git a/util/cacheflush.c b/util/cacheflush.c
index a089061..17c5891 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -229,6 +229,10 @@ static void __attribute__((constructor)) init_cache_info(void)
/* Caches are coherent and do not require flushing; symbol inline. */
+#elif defined(EMSCRIPTEN)
+
+/* Wasm doesn't have executable region of memory. */
+
#elif defined(__aarch64__) && !defined(CONFIG_WIN32)
/*
* For Windows, we use generic implementation of flush_idcache_range, that
@@ -279,9 +283,11 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) {
asm volatile("dc\tcvau, %0" : : "r" (p) : "memory");
}
- asm volatile("dsb\tish" : : : "memory");
}
+ /* DSB unconditionally to ensure any outstanding writes are committed. */
+ asm volatile("dsb\tish" : : : "memory");
+
/*
* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
* of Unification is not required for instruction to data coherence.
diff --git a/util/coroutine-wasm.c b/util/coroutine-wasm.c
new file mode 100644
index 0000000..cb1ec92
--- /dev/null
+++ b/util/coroutine-wasm.c
@@ -0,0 +1,127 @@
+/*
+ * emscripten fiber coroutine initialization code
+ * based on coroutine-ucontext.c
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
+
+#include <emscripten/fiber.h>
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ size_t stack_size;
+
+ void *asyncify_stack;
+ size_t asyncify_stack_size;
+
+ CoroutineAction action;
+
+ emscripten_fiber_t fiber;
+} CoroutineEmscripten;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineEmscripten *, leader);
+size_t leader_asyncify_stack_size = COROUTINE_STACK_SIZE;
+
+static void coroutine_trampoline(void *co_)
+{
+ Coroutine *co = co_;
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ CoroutineEmscripten *co;
+
+ co = g_malloc0(sizeof(*co));
+
+ co->stack_size = COROUTINE_STACK_SIZE;
+ co->stack = qemu_alloc_stack(&co->stack_size);
+
+ co->asyncify_stack_size = COROUTINE_STACK_SIZE;
+ co->asyncify_stack = g_malloc0(co->asyncify_stack_size);
+ emscripten_fiber_init(&co->fiber, coroutine_trampoline, &co->base,
+ co->stack, co->stack_size, co->asyncify_stack,
+ co->asyncify_stack_size);
+
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineEmscripten *co = DO_UPCAST(CoroutineEmscripten, base, co_);
+
+ qemu_free_stack(co->stack, co->stack_size);
+ g_free(co->asyncify_stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineEmscripten *from = DO_UPCAST(CoroutineEmscripten, base, from_);
+ CoroutineEmscripten *to = DO_UPCAST(CoroutineEmscripten, base, to_);
+
+ set_current(to_);
+ to->action = action;
+ emscripten_fiber_swap(&from->fiber, &to->fiber);
+ return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ Coroutine *self = get_current();
+
+ if (!self) {
+ CoroutineEmscripten *leaderp = get_leader();
+ if (!leaderp) {
+ leaderp = g_malloc0(sizeof(*leaderp));
+ leaderp->asyncify_stack = g_malloc0(leader_asyncify_stack_size);
+ leaderp->asyncify_stack_size = leader_asyncify_stack_size;
+ emscripten_fiber_init_from_current_context(
+ &leaderp->fiber,
+ leaderp->asyncify_stack,
+ leaderp->asyncify_stack_size);
+ leaderp->stack = leaderp->fiber.stack_limit;
+ leaderp->stack_size =
+ leaderp->fiber.stack_base - leaderp->fiber.stack_limit;
+ set_leader(leaderp);
+ }
+ self = &leaderp->base;
+ set_current(self);
+ }
+ return self;
+}
+
+bool qemu_in_coroutine(void)
+{
+ Coroutine *self = get_current();
+
+ return self && self->caller;
+}
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 90f92a4..c8c8a1b 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
__cpuid(1, a, b, c, d);
info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
+ info |= (c & bit_OSXSAVE ? CPUINFO_OSXSAVE : 0);
info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c
index 497ce12..0291b72 100644
--- a/util/cpuinfo-riscv.c
+++ b/util/cpuinfo-riscv.c
@@ -4,14 +4,17 @@
*/
#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
#include "host/cpuinfo.h"
#ifdef CONFIG_ASM_HWPROBE_H
#include <asm/hwprobe.h>
#include <sys/syscall.h>
+#include <asm/unistd.h>
#endif
unsigned cpuinfo;
+unsigned riscv_lg2_vlenb;
static volatile sig_atomic_t got_sigill;
static void sigill_handler(int signo, siginfo_t *si, void *data)
@@ -33,7 +36,8 @@ static void sigill_handler(int signo, siginfo_t *si, void *data)
/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
- unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND;
+ unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS
+ | CPUINFO_ZICOND | CPUINFO_ZVE64X;
unsigned info = cpuinfo;
if (info) {
@@ -47,9 +51,16 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
#if defined(__riscv_arch_test) && defined(__riscv_zbb)
info |= CPUINFO_ZBB;
#endif
+#if defined(__riscv_arch_test) && defined(__riscv_zbs)
+ info |= CPUINFO_ZBS;
+#endif
#if defined(__riscv_arch_test) && defined(__riscv_zicond)
info |= CPUINFO_ZICOND;
#endif
+#if defined(__riscv_arch_test) && \
+ (defined(__riscv_vector) || defined(__riscv_zve64x))
+ info |= CPUINFO_ZVE64X;
+#endif
left &= ~info;
#ifdef CONFIG_ASM_HWPROBE_H
@@ -64,15 +75,27 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
&& pair.key >= 0) {
info |= pair.value & RISCV_HWPROBE_EXT_ZBA ? CPUINFO_ZBA : 0;
info |= pair.value & RISCV_HWPROBE_EXT_ZBB ? CPUINFO_ZBB : 0;
- left &= ~(CPUINFO_ZBA | CPUINFO_ZBB);
+ info |= pair.value & RISCV_HWPROBE_EXT_ZBS ? CPUINFO_ZBS : 0;
+ left &= ~(CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS);
#ifdef RISCV_HWPROBE_EXT_ZICOND
info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0;
left &= ~CPUINFO_ZICOND;
#endif
+ /* For rv64, V is Zve64d, a superset of Zve64x. */
+ info |= pair.value & RISCV_HWPROBE_IMA_V ? CPUINFO_ZVE64X : 0;
+#ifdef RISCV_HWPROBE_EXT_ZVE64X
+ info |= pair.value & RISCV_HWPROBE_EXT_ZVE64X ? CPUINFO_ZVE64X : 0;
+#endif
}
}
#endif /* CONFIG_ASM_HWPROBE_H */
+ /*
+ * We only detect support for vectors with hwprobe. All kernels with
+ * support for vectors in userspace also support the hwprobe syscall.
+ */
+ left &= ~CPUINFO_ZVE64X;
+
if (left) {
struct sigaction sa_old, sa_new;
@@ -99,6 +122,15 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
left &= ~CPUINFO_ZBB;
}
+ if (left & CPUINFO_ZBS) {
+ /* Probe for Zbs: bext zero,zero,zero. */
+ got_sigill = 0;
+ asm volatile(".insn r 0x33, 5, 0x24, zero, zero, zero"
+ : : : "memory");
+ info |= got_sigill ? 0 : CPUINFO_ZBS;
+ left &= ~CPUINFO_ZBS;
+ }
+
if (left & CPUINFO_ZICOND) {
/* Probe for Zicond: czero.eqz zero,zero,zero. */
got_sigill = 0;
@@ -112,6 +144,21 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
assert(left == 0);
}
+ if (info & CPUINFO_ZVE64X) {
+ /*
+ * We are guaranteed by RVV-1.0 that VLEN is a power of 2.
+ * We are guaranteed by Zve64x that VLEN >= 64, and that
+ * EEW of {8,16,32,64} are supported.
+ */
+ unsigned long vlenb;
+ /* csrr %0, vlenb */
+ asm volatile(".insn i 0x73, 0x2, %0, zero, -990" : "=r"(vlenb));
+ assert(vlenb >= 8);
+ assert(is_power_of_2(vlenb));
+ /* Cache VLEN in a convenient form. */
+ riscv_lg2_vlenb = ctz32(vlenb);
+ }
+
info |= CPUINFO_ALWAYS;
cpuinfo = info;
return info;
diff --git a/util/cutils.c b/util/cutils.c
index 4236403..9803f11 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -1144,11 +1144,6 @@ void qemu_init_exec_dir(const char *argv0)
#endif
}
-const char *qemu_get_exec_dir(void)
-{
- return exec_dir;
-}
-
char *get_relocated_path(const char *dir)
{
size_t prefix_len = strlen(CONFIG_PREFIX);
diff --git a/util/envlist.c b/util/envlist.c
index db937c0..15fdbb1 100644
--- a/util/envlist.c
+++ b/util/envlist.c
@@ -12,9 +12,6 @@ struct envlist {
size_t el_count; /* number of entries */
};
-static int envlist_parse(envlist_t *envlist,
- const char *env, int (*)(envlist_t *, const char *));
-
/*
* Allocates new envlist and returns pointer to it.
*/
@@ -52,72 +49,6 @@ envlist_free(envlist_t *envlist)
}
/*
- * Parses comma separated list of set/modify environment
- * variable entries and updates given enlist accordingly.
- *
- * For example:
- * envlist_parse(el, "HOME=foo,SHELL=/bin/sh");
- *
- * inserts/sets environment variables HOME and SHELL.
- *
- * Returns 0 on success, errno otherwise.
- */
-int
-envlist_parse_set(envlist_t *envlist, const char *env)
-{
- return (envlist_parse(envlist, env, &envlist_setenv));
-}
-
-/*
- * Parses comma separated list of unset environment variable
- * entries and removes given variables from given envlist.
- *
- * Returns 0 on success, errno otherwise.
- */
-int
-envlist_parse_unset(envlist_t *envlist, const char *env)
-{
- return (envlist_parse(envlist, env, &envlist_unsetenv));
-}
-
-/*
- * Parses comma separated list of set, modify or unset entries
- * and calls given callback for each entry.
- *
- * Returns 0 in case of success, errno otherwise.
- */
-static int
-envlist_parse(envlist_t *envlist, const char *env,
- int (*callback)(envlist_t *, const char *))
-{
- char *tmpenv, *envvar;
- char *envsave = NULL;
- int ret = 0;
- assert(callback != NULL);
-
- if ((envlist == NULL) || (env == NULL))
- return (EINVAL);
-
- tmpenv = g_strdup(env);
- envsave = tmpenv;
-
- do {
- envvar = strchr(tmpenv, ',');
- if (envvar != NULL) {
- *envvar = '\0';
- }
- if ((*callback)(envlist, tmpenv) != 0) {
- ret = errno;
- break;
- }
- tmpenv = envvar + 1;
- } while (envvar != NULL);
-
- g_free(envsave);
- return ret;
-}
-
-/*
* Sets environment value to envlist in similar manner
* than putenv(3).
*
diff --git a/util/error.c b/util/error.c
index e5e2472..daea214 100644
--- a/util/error.c
+++ b/util/error.c
@@ -15,15 +15,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-
-struct Error
-{
- char *msg;
- ErrorClass err_class;
- const char *src, *func;
- int line;
- GString *hint;
-};
+#include "qapi/error-internal.h"
Error *error_abort;
Error *error_fatal;
@@ -32,8 +24,13 @@ Error *error_warn;
static void error_handle(Error **errp, Error *err)
{
if (errp == &error_abort) {
- fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
- err->func, err->src, err->line);
+ if (err->func) {
+ fprintf(stderr, "Unexpected error in %s() at %.*s:%d:\n",
+ err->func, err->src_len, err->src, err->line);
+ } else {
+ fprintf(stderr, "Unexpected error at %.*s:%d:\n",
+ err->src_len, err->src, err->line);
+ }
error_report("%s", error_get_pretty(err));
if (err->hint) {
error_printf("%s", err->hint->str);
@@ -75,6 +72,7 @@ static void error_setv(Error **errp,
g_free(msg);
}
err->err_class = err_class;
+ err->src_len = -1;
err->src = src;
err->line = line;
err->func = func;
@@ -247,6 +245,17 @@ void warn_report_err(Error *err)
error_free(err);
}
+bool warn_report_err_once_cond(bool *printed, Error *err)
+{
+ if (*printed) {
+ error_free(err);
+ return false;
+ }
+ *printed = true;
+ warn_report_err(err);
+ return true;
+}
+
void error_reportf_err(Error *err, const char *fmt, ...)
{
va_list ap;
diff --git a/util/event.c b/util/event.c
new file mode 100644
index 0000000..5a8141c
--- /dev/null
+++ b/util/event.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+
+/*
+ * Valid transitions:
+ * - FREE -> SET (qemu_event_set)
+ * - BUSY -> SET (qemu_event_set)
+ * - SET -> FREE (qemu_event_reset)
+ * - FREE -> BUSY (qemu_event_wait)
+ *
+ * With futex, the waking and blocking operations follow
+ * BUSY -> SET and FREE -> BUSY, respectively.
+ *
+ * Without futex, BUSY -> SET and FREE -> BUSY never happen. Instead, the waking
+ * operation follows FREE -> SET and the blocking operation will happen in
+ * qemu_event_wait() if the event is not SET.
+ *
+ * SET->BUSY does not happen (it can be observed from the outside but
+ * it really is SET->FREE->BUSY).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef HAVE_FUTEX
+ pthread_mutex_init(&ev->lock, NULL);
+ pthread_cond_init(&ev->cond, NULL);
+#endif
+
+ ev->value = (init ? EV_SET : EV_FREE);
+ ev->initialized = true;
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+ assert(ev->initialized);
+ ev->initialized = false;
+#ifndef HAVE_FUTEX
+ pthread_mutex_destroy(&ev->lock);
+ pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
+ * ev->value we need a full memory barrier here.
+ */
+ smp_mb();
+ if (qatomic_read(&ev->value) != EV_SET) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier in kernel futex_wait system call. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ qemu_futex_wake_all(ev);
+ }
+ }
+#else
+ pthread_mutex_lock(&ev->lock);
+ /* Pairs with qemu_event_reset()'s load acquire. */
+ qatomic_store_release(&ev->value, EV_SET);
+ pthread_cond_broadcast(&ev->cond);
+ pthread_mutex_unlock(&ev->lock);
+#endif
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
+#else
+ /*
+ * If futexes are not available, there are no EV_FREE->EV_BUSY
+ * transitions because wakeups are done entirely through the
+ * condition variable. Since qatomic_set() only writes EV_FREE,
+ * the load seems useless but in reality, the acquire synchronizes
+ * with qemu_event_set()'s store release: if qemu_event_reset()
+ * sees EV_SET here, then the caller will certainly see a
+ * successful condition and skip qemu_event_wait():
+ *
+ * done = 1; if (done == 0)
+ * qemu_event_set() { qemu_event_reset() {
+ * lock();
+ * ev->value = EV_SET -----> load ev->value
+ * ev->value = old value | EV_FREE
+ * cond_broadcast()
+ * unlock(); }
+ * } if (done == 0)
+ * // qemu_event_wait() not called
+ */
+ qatomic_set(&ev->value, qatomic_load_acquire(&ev->value) | EV_FREE);
+#endif
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ while (true) {
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ */
+ unsigned value = qatomic_load_acquire(&ev->value);
+ if (value == EV_SET) {
+ break;
+ }
+
+ if (value == EV_FREE) {
+ /*
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
+ *
+ * This cmpxchg doesn't have particular ordering requirements if it
+ * succeeds (moving the store earlier can only cause
+ * qemu_event_set() to issue _more_ wakeups), the failing case needs
+ * acquire semantics like the load above.
+ */
+ if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ break;
+ }
+ }
+
+ /*
+ * This is the final check for a concurrent set, so it does need
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
+ * The barrier is inside the FUTEX_WAIT system call.
+ */
+ qemu_futex_wait(ev, EV_BUSY);
+ }
+#else
+ pthread_mutex_lock(&ev->lock);
+ while (qatomic_read(&ev->value) != EV_SET) {
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock(&ev->lock);
+#endif
+}
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
index c6413cb..9fb8800 100644
--- a/util/fdmon-epoll.c
+++ b/util/fdmon-epoll.c
@@ -5,6 +5,7 @@
#include "qemu/osdep.h"
#include <sys/epoll.h>
+#include "qemu/lockcnt.h"
#include "qemu/rcu_queue.h"
#include "aio-posix.h"
diff --git a/util/fifo8.c b/util/fifo8.c
index 1ffa19d..a26da66 100644
--- a/util/fifo8.c
+++ b/util/fifo8.c
@@ -71,18 +71,27 @@ uint8_t fifo8_pop(Fifo8 *fifo)
return ret;
}
-static const uint8_t *fifo8_peekpop_buf(Fifo8 *fifo, uint32_t max,
- uint32_t *numptr, bool do_pop)
+uint8_t fifo8_peek(Fifo8 *fifo)
+{
+ assert(fifo->num > 0);
+ return fifo->data[fifo->head];
+}
+
+static const uint8_t *fifo8_peekpop_bufptr(Fifo8 *fifo, uint32_t max,
+ uint32_t skip, uint32_t *numptr,
+ bool do_pop)
{
uint8_t *ret;
- uint32_t num;
+ uint32_t num, head;
assert(max > 0 && max <= fifo->num);
- num = MIN(fifo->capacity - fifo->head, max);
- ret = &fifo->data[fifo->head];
+ assert(skip <= fifo->num);
+ head = (fifo->head + skip) % fifo->capacity;
+ num = MIN(fifo->capacity - head, max);
+ ret = &fifo->data[head];
if (do_pop) {
- fifo->head += num;
+ fifo->head = head + num;
fifo->head %= fifo->capacity;
fifo->num -= num;
}
@@ -94,15 +103,16 @@ static const uint8_t *fifo8_peekpop_buf(Fifo8 *fifo, uint32_t max,
const uint8_t *fifo8_peek_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr)
{
- return fifo8_peekpop_buf(fifo, max, numptr, false);
+ return fifo8_peekpop_bufptr(fifo, max, 0, numptr, false);
}
const uint8_t *fifo8_pop_bufptr(Fifo8 *fifo, uint32_t max, uint32_t *numptr)
{
- return fifo8_peekpop_buf(fifo, max, numptr, true);
+ return fifo8_peekpop_bufptr(fifo, max, 0, numptr, true);
}
-uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
+static uint32_t fifo8_peekpop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen,
+ bool do_pop)
{
const uint8_t *buf;
uint32_t n1, n2 = 0;
@@ -113,7 +123,7 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
}
len = destlen;
- buf = fifo8_pop_bufptr(fifo, len, &n1);
+ buf = fifo8_peekpop_bufptr(fifo, len, 0, &n1, do_pop);
if (dest) {
memcpy(dest, buf, n1);
}
@@ -122,7 +132,7 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
len -= n1;
len = MIN(len, fifo8_num_used(fifo));
if (len) {
- buf = fifo8_pop_bufptr(fifo, len, &n2);
+ buf = fifo8_peekpop_bufptr(fifo, len, do_pop ? 0 : n1, &n2, do_pop);
if (dest) {
memcpy(&dest[n1], buf, n2);
}
@@ -131,6 +141,16 @@ uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
return n1 + n2;
}
+uint32_t fifo8_pop_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
+{
+ return fifo8_peekpop_buf(fifo, dest, destlen, true);
+}
+
+uint32_t fifo8_peek_buf(Fifo8 *fifo, uint8_t *dest, uint32_t destlen)
+{
+ return fifo8_peekpop_buf(fifo, dest, destlen, false);
+}
+
void fifo8_drop(Fifo8 *fifo, uint32_t len)
{
len -= fifo8_pop_buf(fifo, NULL, len);
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 6d6e1b5..d9a1dab 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -949,7 +949,7 @@ char *hbitmap_sha256(const HBitmap *bitmap, Error **errp)
size_t size = bitmap->sizes[HBITMAP_LEVELS - 1] * sizeof(unsigned long);
char *data = (char *)bitmap->levels[HBITMAP_LEVELS - 1];
char *hash = NULL;
- qcrypto_hash_digest(QCRYPTO_HASH_ALG_SHA256, data, size, &hash, errp);
+ qcrypto_hash_digest(QCRYPTO_HASH_ALGO_SHA256, data, size, &hash, errp);
return hash;
}
diff --git a/util/hexdump.c b/util/hexdump.c
index ae0d499..f29ffce 100644
--- a/util/hexdump.c
+++ b/util/hexdump.c
@@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
+#include "qemu/host-utils.h"
static inline char hexdump_nibble(unsigned x)
{
@@ -97,3 +98,20 @@ void qemu_hexdump(FILE *fp, const char *prefix,
}
}
+
+void qemu_hexdump_to_buffer(char *restrict buffer, size_t buffer_size,
+ const uint8_t *restrict data, size_t data_size)
+{
+ size_t i;
+ uint64_t required_buffer_size;
+ bool overflow = umul64_overflow(data_size, 2, &required_buffer_size);
+ overflow |= uadd64_overflow(required_buffer_size, 1, &required_buffer_size);
+ assert(!overflow && buffer_size >= required_buffer_size);
+
+ for (i = 0; i < data_size; i++) {
+ uint8_t val = data[i];
+ *(buffer++) = hexdump_nibble(val >> 4);
+ *(buffer++) = hexdump_nibble(val & 0xf);
+ }
+ *buffer = '\0';
+}
diff --git a/util/iov.c b/util/iov.c
index 7e73948..f8536f0 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -3,6 +3,7 @@
*
* Copyright IBM, Corp. 2007, 2008
* Copyright (C) 2010 Red Hat, Inc.
+ * Copyright (c) 2024 Seagate Technology LLC and/or its Affiliates
*
* Author(s):
* Anthony Liguori <aliguori@us.ibm.com>
@@ -36,7 +37,6 @@ size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt,
offset -= iov[i].iov_len;
}
}
- assert(offset == 0);
return done;
}
@@ -55,7 +55,6 @@ size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt,
offset -= iov[i].iov_len;
}
}
- assert(offset == 0);
return done;
}
@@ -74,7 +73,6 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
offset -= iov[i].iov_len;
}
}
- assert(offset == 0);
return done;
}
@@ -92,7 +90,8 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
/* helper function for iov_send_recv() */
static ssize_t
-do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+do_send_recv(int sockfd, int flags, struct iovec *iov, unsigned iov_cnt,
+ bool do_send)
{
#ifdef CONFIG_POSIX
ssize_t ret;
@@ -102,8 +101,8 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
msg.msg_iovlen = iov_cnt;
do {
ret = do_send
- ? sendmsg(sockfd, &msg, 0)
- : recvmsg(sockfd, &msg, 0);
+ ? sendmsg(sockfd, &msg, flags)
+ : recvmsg(sockfd, &msg, flags);
} while (ret < 0 && errno == EINTR);
return ret;
#else
@@ -114,8 +113,8 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
ssize_t off = 0;
while (i < iov_cnt) {
ssize_t r = do_send
- ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0)
- : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0);
+ ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, flags)
+ : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, flags);
if (r > 0) {
ret += r;
off += r;
@@ -145,6 +144,15 @@ ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt,
size_t offset, size_t bytes,
bool do_send)
{
+ return iov_send_recv_with_flags(sockfd, 0, _iov, iov_cnt, offset, bytes,
+ do_send);
+}
+
+ssize_t iov_send_recv_with_flags(int sockfd, int sockflags,
+ const struct iovec *_iov,
+ unsigned iov_cnt, size_t offset,
+ size_t bytes, bool do_send)
+{
ssize_t total = 0;
ssize_t ret;
size_t orig_len, tail;
@@ -192,11 +200,11 @@ ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt,
assert(iov[niov].iov_len > tail);
orig_len = iov[niov].iov_len;
iov[niov++].iov_len = tail;
- ret = do_send_recv(sockfd, iov, niov, do_send);
+ ret = do_send_recv(sockfd, sockflags, iov, niov, do_send);
/* Undo the changes above before checking for errors */
iov[niov-1].iov_len = orig_len;
} else {
- ret = do_send_recv(sockfd, iov, niov, do_send);
+ ret = do_send_recv(sockfd, sockflags, iov, niov, do_send);
}
if (offset) {
iov[0].iov_base -= offset;
@@ -266,7 +274,6 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
bytes -= len;
offset = 0;
}
- assert(offset == 0);
return j;
}
@@ -337,7 +344,6 @@ size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
soffset -= src_iov[i].iov_len;
}
}
- assert(soffset == 0); /* offset beyond end of src */
return done;
}
diff --git a/util/iova-tree.c b/util/iova-tree.c
index 5367897..5b0c95f 100644
--- a/util/iova-tree.c
+++ b/util/iova-tree.c
@@ -115,13 +115,6 @@ const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
return args.result;
}
-const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
-{
- const DMAMap map = { .iova = iova, .size = 0 };
-
- return iova_tree_find(tree, &map);
-}
-
static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range)
{
/* Key and value are sharing the same range data */
@@ -148,22 +141,6 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map)
return IOVA_OK;
}
-static gboolean iova_tree_traverse(gpointer key, gpointer value,
- gpointer data)
-{
- iova_tree_iterator iterator = data;
- DMAMap *map = key;
-
- g_assert(key == value);
-
- return iterator(map);
-}
-
-void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
-{
- g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
-}
-
void iova_tree_remove(IOVATree *tree, DMAMap map)
{
const DMAMap *overlap;
@@ -280,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree)
g_tree_destroy(tree->tree);
g_free(tree);
}
+
+static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const DMAMap *m1 = a, *m2 = b;
+
+ if (m1->translated_addr > m2->translated_addr + m2->size) {
+ return 1;
+ }
+
+ if (m1->translated_addr + m1->size < m2->translated_addr) {
+ return -1;
+ }
+
+ /* Overlapped */
+ return 0;
+}
+
+IOVATree *gpa_tree_new(void)
+{
+ IOVATree *gpa_tree = g_new0(IOVATree, 1);
+
+ gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL);
+
+ return gpa_tree;
+}
+
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map)
+{
+ DMAMap *new;
+
+ if (map->translated_addr + map->size < map->translated_addr ||
+ map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* We don't allow inserting ranges that overlap with existing ones */
+ if (iova_tree_find(tree, map)) {
+ return IOVA_ERR_OVERLAP;
+ }
+
+ new = g_new0(DMAMap, 1);
+ memcpy(new, map, sizeof(*new));
+ iova_tree_insert_internal(tree->tree, new);
+
+ return IOVA_OK;
+}
diff --git a/util/keyval.c b/util/keyval.c
index 66a5b47..a70629a 100644
--- a/util/keyval.c
+++ b/util/keyval.c
@@ -91,9 +91,9 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qstring.h"
+#include "qobject/qdict.h"
+#include "qobject/qlist.h"
+#include "qobject/qstring.h"
#include "qemu/cutils.h"
#include "qemu/keyval.h"
#include "qemu/help_option.h"
diff --git a/util/lockcnt.c b/util/lockcnt.c
index 5da3694..92c9f8c 100644
--- a/util/lockcnt.c
+++ b/util/lockcnt.c
@@ -7,14 +7,16 @@
* Paolo Bonzini <pbonzini@redhat.com>
*/
#include "qemu/osdep.h"
+#include "qemu/lockcnt.h"
#include "qemu/thread.h"
#include "qemu/atomic.h"
#include "trace.h"
-#ifdef CONFIG_LINUX
-#include "qemu/futex.h"
+#ifdef HAVE_FUTEX
-/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
+/*
+ * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the
+ * counter.
* For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
* this is not the most relaxing citation I could make...). It is similar
* to mutex2 in the paper.
@@ -105,7 +107,7 @@ static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
static void lockcnt_wake(QemuLockCnt *lockcnt)
{
trace_lockcnt_futex_wake(lockcnt);
- qemu_futex_wake(&lockcnt->count, 1);
+ qemu_futex_wake_single(&lockcnt->count);
}
void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
diff --git a/util/log.c b/util/log.c
index 6219819..b87d399 100644
--- a/util/log.c
+++ b/util/log.c
@@ -503,6 +503,8 @@ const QEMULogItem qemu_log_items[] = {
"open a separate log file per thread; filename must contain '%d'" },
{ CPU_LOG_TB_VPU, "vpu",
"include VPU registers in the 'cpu' logging" },
+ { LOG_INVALID_MEM, "invalid_mem",
+ "log invalid memory accesses" },
{ 0, NULL, NULL },
};
diff --git a/util/main-loop.c b/util/main-loop.c
index a0386cf..51aeb24 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -26,8 +26,9 @@
#include "qapi/error.h"
#include "qemu/cutils.h"
#include "qemu/timer.h"
-#include "sysemu/cpu-timers.h"
-#include "sysemu/replay.h"
+#include "system/cpu-timers.h"
+#include "exec/icount.h"
+#include "system/replay.h"
#include "qemu/main-loop.h"
#include "block/aio.h"
#include "block/thread-pool.h"
@@ -212,7 +213,6 @@ static void main_loop_init(EventLoopBase *base, Error **errp)
main_loop_update_params(base, errp);
mloop = m;
- return;
}
static bool main_loop_can_be_deleted(EventLoopBase *base)
@@ -220,7 +220,7 @@ static bool main_loop_can_be_deleted(EventLoopBase *base)
return false;
}
-static void main_loop_class_init(ObjectClass *oc, void *class_data)
+static void main_loop_class_init(ObjectClass *oc, const void *class_data)
{
EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
diff --git a/util/memfd.c b/util/memfd.c
index 4a3c07e..07beab1 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -28,6 +28,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#include "qemu/memfd.h"
#include "qemu/host-utils.h"
@@ -149,11 +150,15 @@ err:
void qemu_memfd_free(void *ptr, size_t size, int fd)
{
if (ptr) {
- munmap(ptr, size);
+ if (munmap(ptr, size) != 0) {
+ error_report("memfd munmap() failed: %s", strerror(errno));
+ }
}
if (fd != -1) {
- close(fd);
+ if (close(fd) != 0) {
+ error_report("memfd close() failed: %s", strerror(errno));
+ }
}
}
@@ -189,17 +194,27 @@ bool qemu_memfd_alloc_check(void)
/**
* qemu_memfd_check():
*
- * Check if host supports memfd.
+ * Check if host supports memfd. Cache the answer for the common case flags=0.
*/
bool qemu_memfd_check(unsigned int flags)
{
#ifdef CONFIG_LINUX
- int mfd = memfd_create("test", flags | MFD_CLOEXEC);
+ int mfd;
+ static int memfd_check = MEMFD_TODO;
+ if (!flags && memfd_check != MEMFD_TODO) {
+ return memfd_check;
+ }
+
+ mfd = memfd_create("test", flags | MFD_CLOEXEC);
if (mfd >= 0) {
close(mfd);
- return true;
}
+ if (!flags) {
+ memfd_check = (mfd >= 0) ? MEMFD_OK : MEMFD_KO;
+ }
+ return (mfd >= 0);
+
#endif
return false;
diff --git a/util/meson.build b/util/meson.build
index 5d8bef9..3502938 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -11,7 +11,9 @@ if host_os != 'windows'
endif
util_ss.add(files('compatfd.c'))
util_ss.add(files('event_notifier-posix.c'))
- util_ss.add(files('mmap-alloc.c'))
+ if host_os != 'emscripten'
+ util_ss.add(files('mmap-alloc.c'))
+ endif
freebsd_dep = []
if host_os == 'freebsd'
freebsd_dep = util
@@ -25,7 +27,7 @@ else
util_ss.add(files('event_notifier-win32.c'))
util_ss.add(files('oslib-win32.c'))
util_ss.add(files('qemu-thread-win32.c'))
- util_ss.add(winmm, pathcch)
+ util_ss.add(winmm, pathcch, synchronization)
endif
util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c'))
if glib_has_gslice
@@ -33,6 +35,7 @@ if glib_has_gslice
endif
util_ss.add(files('defer-call.c'))
util_ss.add(files('envlist.c', 'path.c', 'module.c'))
+util_ss.add(files('event.c'))
util_ss.add(files('host-utils.c'))
util_ss.add(files('bitmap.c', 'bitops.c'))
util_ss.add(files('fifo8.c'))
@@ -84,6 +87,8 @@ if have_block or have_ga
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
util_ss.add(files(f'coroutine-@coroutine_backend@.c'))
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
+endif
+if have_block or have_ga or have_user
util_ss.add(files('qemu-sockets.c'))
endif
if have_block
@@ -129,4 +134,6 @@ elif cpu in ['ppc', 'ppc64']
util_ss.add(files('cpuinfo-ppc.c'))
elif cpu in ['riscv32', 'riscv64']
util_ss.add(files('cpuinfo-riscv.c'))
+elif cpu == 's390x'
+ util_ss.add(files('s390x_pci_mmio.c'))
endif
diff --git a/util/module.c b/util/module.c
index 3eb0f06..1aa2079 100644
--- a/util/module.c
+++ b/util/module.c
@@ -234,7 +234,7 @@ int module_load(const char *prefix, const char *name, Error **errp)
search_dir = getenv("QEMU_MODULE_DIR");
if (search_dir != NULL) {
- dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
+ dirs[n_dirs++] = g_strdup(search_dir);
}
dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR);
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 11b35e4..4ff577e 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -31,7 +31,7 @@
#include <glib/gprintf.h>
-#include "sysemu/sysemu.h"
+#include "system/system.h"
#include "trace.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -58,6 +58,7 @@
#include <lwp.h>
#endif
+#include "qemu/memalign.h"
#include "qemu/mmap-alloc.h"
#define MAX_MEM_PREALLOC_THREAD_COUNT 16
@@ -111,6 +112,21 @@ int qemu_get_thread_id(void)
#endif
}
+int qemu_kill_thread(int tid, int sig)
+{
+#if defined(__linux__)
+ return syscall(__NR_tgkill, getpid(), tid, sig);
+#elif defined(__FreeBSD__)
+ return thr_kill2(getpid(), tid, sig);
+#elif defined(__NetBSD__)
+ return _lwp_kill(tid, sig);
+#elif defined(__OpenBSD__)
+ return thrkill(tid, sig, NULL);
+#else
+ return kill(tid, sig);
+#endif
+}
+
int qemu_daemon(int nochdir, int noclose)
{
return daemon(nochdir, noclose);
@@ -195,11 +211,21 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
(noreserve ? QEMU_MAP_NORESERVE : 0);
size_t align = QEMU_VMALLOC_ALIGN;
+#ifndef EMSCRIPTEN
void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
if (ptr == MAP_FAILED) {
return NULL;
}
+#else
+ /*
+ * qemu_ram_mmap is not implemented for Emscripten. Use qemu_memalign
+ * for the anonymous allocation. noreserve is ignored as there is no swap
+ * space on Emscripten, and shared is ignored as there is no other
+ * processes on Emscripten.
+ */
+ void *ptr = qemu_memalign(align, size);
+#endif
if (alignment) {
*alignment = align;
@@ -212,7 +238,16 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
void qemu_anon_ram_free(void *ptr, size_t size)
{
trace_qemu_anon_ram_free(ptr, size);
+#ifndef EMSCRIPTEN
qemu_ram_munmap(-1, ptr, size);
+#else
+ /*
+ * qemu_ram_munmap is not implemented for Emscripten and qemu_memalign
+ * was used for the allocation. Use the corresponding freeing function
+ * here.
+ */
+ qemu_vfree(ptr);
+#endif
}
void qemu_socket_set_block(int fd)
@@ -573,7 +608,15 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
{
static gsize initialized;
int ret;
+#ifndef EMSCRIPTEN
size_t hpagesize = qemu_fd_getpagesize(fd);
+#else
+ /*
+ * mmap-alloc.c is excluded from Emscripten build, so qemu_fd_getpagesize
+ * is unavailable. Fallback to the lower level implementation.
+ */
+ size_t hpagesize = qemu_real_host_page_size();
+#endif
size_t numpages = DIV_ROUND_UP(sz, hpagesize);
bool use_madv_populate_write;
struct sigaction act;
@@ -931,3 +974,55 @@ void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
qemu_close_all_open_fd_fallback(skip, nskip, open_max);
}
}
+
+int qemu_shm_alloc(size_t size, Error **errp)
+{
+ g_autoptr(GString) shm_name = g_string_new(NULL);
+ int fd, oflag, cur_sequence;
+ static int sequence;
+ mode_t mode;
+
+ cur_sequence = qatomic_fetch_inc(&sequence);
+
+ /*
+ * Let's use `mode = 0` because we don't want other processes to open our
+ * memory unless we share the file descriptor with them.
+ */
+ mode = 0;
+ oflag = O_RDWR | O_CREAT | O_EXCL;
+
+ /*
+ * Some operating systems allow creating anonymous POSIX shared memory
+ * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not
+ * defined by POSIX, so let's create a unique name.
+ *
+ * From Linux's shm_open(3) man-page:
+ * For portable use, a shared memory object should be identified
+ * by a name of the form /somename;"
+ */
+ g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%d", getpid(),
+ cur_sequence);
+
+ fd = shm_open(shm_name->str, oflag, mode);
+ if (fd < 0) {
+ error_setg_errno(errp, errno,
+ "failed to create POSIX shared memory");
+ return -1;
+ }
+
+ /*
+ * We have the file descriptor, so we no longer need to expose the
+ * POSIX shared memory object. However it will remain allocated as long as
+ * there are file descriptors pointing to it.
+ */
+ shm_unlink(shm_name->str);
+
+ if (ftruncate(fd, size) == -1) {
+ error_setg_errno(errp, errno,
+ "failed to resize POSIX shared memory to %zu", size);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index b623830..b735163 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -877,3 +877,9 @@ void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp)
}
CloseHandle(h);
}
+
+int qemu_shm_alloc(size_t size, Error **errp)
+{
+ error_setg(errp, "Shared memory is not supported.");
+ return -1;
+}
diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c
index a66cc07..752eb5a 100644
--- a/util/qemu-co-shared-resource.c
+++ b/util/qemu-co-shared-resource.c
@@ -66,12 +66,6 @@ static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n)
return false;
}
-bool co_try_get_from_shres(SharedResource *s, uint64_t n)
-{
- QEMU_LOCK_GUARD(&s->lock);
- return co_try_get_from_shres_locked(s, n);
-}
-
void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n)
{
assert(n <= s->total);
diff --git a/util/qemu-config.c b/util/qemu-config.c
index a90c18d..d1fc49c 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -1,8 +1,8 @@
#include "qemu/osdep.h"
#include "block/qdict.h" /* for qdict_extract_subqdict() */
#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qlist.h"
+#include "qobject/qdict.h"
+#include "qobject/qlist.h"
#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index eb4eebe..64d6264 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -136,7 +136,7 @@ static Coroutine *coroutine_pool_get_local(void)
static void coroutine_pool_refill_local(void)
{
CoroutinePool *local_pool = get_ptr_local_pool();
- CoroutinePoolBatch *batch;
+ CoroutinePoolBatch *batch = NULL;
WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
batch = QSLIST_FIRST(&global_pool);
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 201f7a8..770300df 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -27,10 +27,10 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qnum.h"
-#include "qapi/qmp/qstring.h"
+#include "qobject/qbool.h"
+#include "qobject/qdict.h"
+#include "qobject/qnum.h"
+#include "qobject/qstring.h"
#include "qapi/qmp/qerror.h"
#include "qemu/option_int.h"
#include "qemu/cutils.h"
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 60c44b2..4773755 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -30,6 +30,7 @@
#include "qapi/qobject-input-visitor.h"
#include "qapi/qobject-output-visitor.h"
#include "qemu/cutils.h"
+#include "qemu/option.h"
#include "trace.h"
#ifndef AI_ADDRCONFIG
@@ -44,6 +45,14 @@
# define AI_NUMERICSERV 0
#endif
+/*
+ * On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE.
+ * https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172
+ */
+#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE)
+# define TCP_KEEPIDLE TCP_KEEPALIVE
+#endif
+
static int inet_getport(struct addrinfo *e)
{
@@ -205,6 +214,58 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e)
#endif
}
+static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp)
+{
+ if (saddr->keep_alive) {
+ int keep_alive = 1;
+ int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ &keep_alive, sizeof(keep_alive));
+
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set keep-alive option on socket");
+ return -1;
+ }
+#ifdef HAVE_TCP_KEEPCNT
+ if (saddr->has_keep_alive_count && saddr->keep_alive_count) {
+ int keep_count = saddr->keep_alive_count;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count,
+ sizeof(keep_count));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive count option on socket");
+ return -1;
+ }
+ }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+ if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) {
+ int keep_idle = saddr->keep_alive_idle;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle,
+ sizeof(keep_idle));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive idle option on socket");
+ return -1;
+ }
+ }
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+ if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) {
+ int keep_interval = saddr->keep_alive_interval;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval,
+ sizeof(keep_interval));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive interval option on socket");
+ return -1;
+ }
+ }
+#endif
+ }
+ return 0;
+}
+
static int inet_listen_saddr(InetSocketAddress *saddr,
int port_offset,
int num,
@@ -220,12 +281,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
int saved_errno = 0;
bool socket_created = false;
- if (saddr->keep_alive) {
- error_setg(errp, "keep-alive option is not supported for passive "
- "sockets");
- return -1;
- }
-
memset(&ai,0, sizeof(ai));
ai.ai_flags = AI_PASSIVE;
if (saddr->has_numeric && saddr->numeric) {
@@ -287,11 +342,20 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
port_min = inet_getport(e);
port_max = saddr->has_to ? saddr->to + port_offset : port_min;
for (p = port_min; p <= port_max; p++) {
+ if (slisten >= 0) {
+ /*
+ * We have a socket we tried with the previous port. It cannot
+ * be rebound, we need to close it and create a new one.
+ */
+ close(slisten);
+ slisten = -1;
+ }
inet_setport(e, p);
slisten = create_fast_reuse_socket(e);
if (slisten < 0) {
- /* First time we expect we might fail to create the socket
+ /*
+ * First time we expect we might fail to create the socket
* eg if 'e' has AF_INET6 but ipv6 kmod is not loaded.
* Later iterations should always succeed if first iteration
* worked though, so treat that as fatal.
@@ -301,40 +365,41 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
} else {
error_setg_errno(errp, errno,
"Failed to recreate failed listening socket");
- goto listen_failed;
+ goto fail;
}
}
socket_created = true;
rc = try_bind(slisten, saddr, e);
if (rc < 0) {
- if (errno != EADDRINUSE) {
- error_setg_errno(errp, errno, "Failed to bind socket");
- goto listen_failed;
- }
- } else {
- if (!listen(slisten, num)) {
- goto listen_ok;
+ if (errno == EADDRINUSE) {
+ /* This port is already used, try the next one */
+ continue;
}
- if (errno != EADDRINUSE) {
- error_setg_errno(errp, errno, "Failed to listen on socket");
- goto listen_failed;
+ error_setg_errno(errp, errno, "Failed to bind socket");
+ goto fail;
+ }
+ if (listen(slisten, num)) {
+ if (errno == EADDRINUSE) {
+ /* This port is already used, try the next one */
+ continue;
}
+ error_setg_errno(errp, errno, "Failed to listen on socket");
+ goto fail;
+ }
+ /* We have a listening socket */
+ if (inet_set_sockopts(slisten, saddr, errp) < 0) {
+ goto fail;
}
- /* Someone else managed to bind to the same port and beat us
- * to listen on it! Socket semantics does not allow us to
- * recover from this situation, so we need to recreate the
- * socket to allow bind attempts for subsequent ports:
- */
- close(slisten);
- slisten = -1;
+ freeaddrinfo(res);
+ return slisten;
}
}
error_setg_errno(errp, errno,
socket_created ?
"Failed to find an available port" :
"Failed to create a socket");
-listen_failed:
+fail:
saved_errno = errno;
if (slisten >= 0) {
close(slisten);
@@ -342,10 +407,6 @@ listen_failed:
freeaddrinfo(res);
errno = saved_errno;
return -1;
-
-listen_ok:
- freeaddrinfo(res);
- return slisten;
}
#ifdef _WIN32
@@ -367,7 +428,6 @@ static int inet_connect_addr(const InetSocketAddress *saddr,
addr->ai_family);
return -1;
}
- socket_set_fast_reuse(sock);
/* connect to peer */
do {
@@ -476,16 +536,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
return sock;
}
- if (saddr->keep_alive) {
- int val = 1;
- int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- &val, sizeof(val));
-
- if (ret < 0) {
- error_setg_errno(errp, errno, "Unable to set KEEPALIVE");
- close(sock);
- return -1;
- }
+ if (inet_set_sockopts(sock, saddr, errp) < 0) {
+ close(sock);
+ return -1;
}
return sock;
@@ -592,141 +645,146 @@ err:
return -1;
}
-/* compatibility wrapper */
-static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
- Error **errp)
-{
- char *end;
- size_t len;
-
- end = strstr(optstr, ",");
- if (end) {
- if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */
- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr);
- return -1;
- }
- len = end - optstr;
- } else {
- len = strlen(optstr);
- }
- if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) {
- *val = true;
- } else if (len == 4 && strncmp(optstr, "=off", len) == 0) {
- *val = false;
- } else {
- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr);
- return -1;
- }
- return 0;
-}
+static QemuOptsList inet_opts = {
+ .name = "InetSocketAddress",
+ .head = QTAILQ_HEAD_INITIALIZER(inet_opts.head),
+ .implied_opt_name = "addr",
+ .desc = {
+ {
+ .name = "addr",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "numeric",
+ .type = QEMU_OPT_BOOL,
+ },
+ {
+ .name = "to",
+ .type = QEMU_OPT_NUMBER,
+ },
+ {
+ .name = "ipv4",
+ .type = QEMU_OPT_BOOL,
+ },
+ {
+ .name = "ipv6",
+ .type = QEMU_OPT_BOOL,
+ },
+ {
+ .name = "keep-alive",
+ .type = QEMU_OPT_BOOL,
+ },
+#ifdef HAVE_TCP_KEEPCNT
+ {
+ .name = "keep-alive-count",
+ .type = QEMU_OPT_NUMBER,
+ },
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+ {
+ .name = "keep-alive-idle",
+ .type = QEMU_OPT_NUMBER,
+ },
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+ {
+ .name = "keep-alive-interval",
+ .type = QEMU_OPT_NUMBER,
+ },
+#endif
+#ifdef HAVE_IPPROTO_MPTCP
+ {
+ .name = "mptcp",
+ .type = QEMU_OPT_BOOL,
+ },
+#endif
+ { /* end of list */ }
+ },
+};
int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
{
- const char *optstr, *h;
- char host[65];
- char port[33];
- int to;
- int pos;
- char *begin;
-
+ QemuOpts *opts = qemu_opts_parse(&inet_opts, str, true, errp);
+ if (!opts) {
+ return -1;
+ }
memset(addr, 0, sizeof(*addr));
/* parse address */
- if (str[0] == ':') {
- /* no host given */
- host[0] = '\0';
- if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
- error_setg(errp, "error parsing port in address '%s'", str);
- return -1;
- }
- } else if (str[0] == '[') {
+ const char *addr_str = qemu_opt_get(opts, "addr");
+ if (!addr_str) {
+ error_setg(errp, "error parsing address ''");
+ return -1;
+ }
+ if (str[0] == '[') {
/* IPv6 addr */
- if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
- error_setg(errp, "error parsing IPv6 address '%s'", str);
+ const char *ip_end = strstr(addr_str, "]:");
+ if (!ip_end || ip_end - addr_str < 2 || strlen(ip_end) < 3) {
+ error_setg(errp, "error parsing IPv6 address '%s'", addr_str);
return -1;
}
+ addr->host = g_strndup(addr_str + 1, ip_end - addr_str - 1);
+ addr->port = g_strdup(ip_end + 2);
} else {
- /* hostname or IPv4 addr */
- if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
- error_setg(errp, "error parsing address '%s'", str);
+ /* no host, hostname or IPv4 addr */
+ const char *port = strchr(addr_str, ':');
+ if (!port || strlen(port) < 2) {
+ error_setg(errp, "error parsing address '%s'", addr_str);
return -1;
}
+ addr->host = g_strndup(addr_str, port - addr_str);
+ addr->port = g_strdup(port + 1);
}
- addr->host = g_strdup(host);
- addr->port = g_strdup(port);
-
/* parse options */
- optstr = str + pos;
- h = strstr(optstr, ",to=");
- if (h) {
- h += 4;
- if (sscanf(h, "%d%n", &to, &pos) != 1 ||
- (h[pos] != '\0' && h[pos] != ',')) {
- error_setg(errp, "error parsing to= argument");
- return -1;
- }
+ if (qemu_opt_find(opts, "numeric")) {
+ addr->has_numeric = true,
+ addr->numeric = qemu_opt_get_bool(opts, "numeric", false);
+ }
+ if (qemu_opt_find(opts, "to")) {
addr->has_to = true;
- addr->to = to;
+ addr->to = qemu_opt_get_number(opts, "to", 0);
}
- begin = strstr(optstr, ",ipv4");
- if (begin) {
- if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) {
- return -1;
- }
+ if (qemu_opt_find(opts, "ipv4")) {
addr->has_ipv4 = true;
+ addr->ipv4 = qemu_opt_get_bool(opts, "ipv4", false);
}
- begin = strstr(optstr, ",ipv6");
- if (begin) {
- if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) {
- return -1;
- }
+ if (qemu_opt_find(opts, "ipv6")) {
addr->has_ipv6 = true;
+ addr->ipv6 = qemu_opt_get_bool(opts, "ipv6", false);
}
- begin = strstr(optstr, ",keep-alive");
- if (begin) {
- if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"),
- &addr->keep_alive, errp) < 0)
- {
- return -1;
- }
+ if (qemu_opt_find(opts, "keep-alive")) {
addr->has_keep_alive = true;
+ addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false);
+ }
+#ifdef HAVE_TCP_KEEPCNT
+ if (qemu_opt_find(opts, "keep-alive-count")) {
+ addr->has_keep_alive_count = true;
+ addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0);
+ }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+ if (qemu_opt_find(opts, "keep-alive-idle")) {
+ addr->has_keep_alive_idle = true;
+ addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0);
}
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+ if (qemu_opt_find(opts, "keep-alive-interval")) {
+ addr->has_keep_alive_interval = true;
+ addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0);
+ }
+#endif
#ifdef HAVE_IPPROTO_MPTCP
- begin = strstr(optstr, ",mptcp");
- if (begin) {
- if (inet_parse_flag("mptcp", begin + strlen(",mptcp"),
- &addr->mptcp, errp) < 0)
- {
- return -1;
- }
+ if (qemu_opt_find(opts, "mptcp")) {
addr->has_mptcp = true;
+ addr->mptcp = qemu_opt_get_bool(opts, "mptcp", 0);
}
#endif
return 0;
}
-/**
- * Create a blocking socket and connect it to an address.
- *
- * @str: address string
- * @errp: set in case of an error
- *
- * Returns -1 in case of error, file descriptor on success
- **/
-int inet_connect(const char *str, Error **errp)
-{
- int sock = -1;
- InetSocketAddress *addr = g_new(InetSocketAddress, 1);
-
- if (!inet_parse(addr, str, errp)) {
- sock = inet_connect_saddr(addr, errp);
- }
- qapi_free_InetSocketAddress(addr);
- return sock;
-}
-
#ifdef CONFIG_AF_VSOCK
static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr,
struct sockaddr_vm *svm,
@@ -1421,21 +1479,6 @@ SocketAddress *socket_local_address(int fd, Error **errp)
}
-SocketAddress *socket_remote_address(int fd, Error **errp)
-{
- struct sockaddr_storage ss;
- socklen_t sslen = sizeof(ss);
-
- if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
- error_setg_errno(errp, errno, "%s",
- "Unable to query remote socket address");
- return NULL;
- }
-
- return socket_sockaddr_to_address(&ss, sslen, errp);
-}
-
-
SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy)
{
SocketAddress *addr;
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b2e26e2..ba72544 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -317,154 +317,6 @@ void qemu_sem_wait(QemuSemaphore *sem)
qemu_mutex_unlock(&sem->mutex);
}
-#ifdef __linux__
-#include "qemu/futex.h"
-#else
-static inline void qemu_futex_wake(QemuEvent *ev, int n)
-{
- assert(ev->initialized);
- pthread_mutex_lock(&ev->lock);
- if (n == 1) {
- pthread_cond_signal(&ev->cond);
- } else {
- pthread_cond_broadcast(&ev->cond);
- }
- pthread_mutex_unlock(&ev->lock);
-}
-
-static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
-{
- assert(ev->initialized);
- pthread_mutex_lock(&ev->lock);
- if (ev->value == val) {
- pthread_cond_wait(&ev->cond, &ev->lock);
- }
- pthread_mutex_unlock(&ev->lock);
-}
-#endif
-
-/* Valid transitions:
- * - free->set, when setting the event
- * - busy->set, when setting the event, followed by qemu_futex_wake
- * - set->free, when resetting the event
- * - free->busy, when waiting
- *
- * set->busy does not happen (it can be observed from the outside but
- * it really is set->free->busy).
- *
- * busy->free provably cannot happen; to enforce it, the set->free transition
- * is done with an OR, which becomes a no-op if the event has concurrently
- * transitioned to free or busy.
- */
-
-#define EV_SET 0
-#define EV_FREE 1
-#define EV_BUSY -1
-
-void qemu_event_init(QemuEvent *ev, bool init)
-{
-#ifndef __linux__
- pthread_mutex_init(&ev->lock, NULL);
- pthread_cond_init(&ev->cond, NULL);
-#endif
-
- ev->value = (init ? EV_SET : EV_FREE);
- ev->initialized = true;
-}
-
-void qemu_event_destroy(QemuEvent *ev)
-{
- assert(ev->initialized);
- ev->initialized = false;
-#ifndef __linux__
- pthread_mutex_destroy(&ev->lock);
- pthread_cond_destroy(&ev->cond);
-#endif
-}
-
-void qemu_event_set(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * Pairs with both qemu_event_reset() and qemu_event_wait().
- *
- * qemu_event_set has release semantics, but because it *loads*
- * ev->value we need a full memory barrier here.
- */
- smp_mb();
- if (qatomic_read(&ev->value) != EV_SET) {
- int old = qatomic_xchg(&ev->value, EV_SET);
-
- /* Pairs with memory barrier in kernel futex_wait system call. */
- smp_mb__after_rmw();
- if (old == EV_BUSY) {
- /* There were waiters, wake them up. */
- qemu_futex_wake(ev, INT_MAX);
- }
- }
-}
-
-void qemu_event_reset(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
-
- /*
- * Order reset before checking the condition in the caller.
- * Pairs with the first memory barrier in qemu_event_set().
- */
- smp_mb__after_rmw();
-}
-
-void qemu_event_wait(QemuEvent *ev)
-{
- unsigned value;
-
- assert(ev->initialized);
-
- /*
- * qemu_event_wait must synchronize with qemu_event_set even if it does
- * not go down the slow path, so this load-acquire is needed that
- * synchronizes with the first memory barrier in qemu_event_set().
- *
- * If we do go down the slow path, there is no requirement at all: we
- * might miss a qemu_event_set() here but ultimately the memory barrier in
- * qemu_futex_wait() will ensure the check is done correctly.
- */
- value = qatomic_load_acquire(&ev->value);
- if (value != EV_SET) {
- if (value == EV_FREE) {
- /*
- * Leave the event reset and tell qemu_event_set that there are
- * waiters. No need to retry, because there cannot be a concurrent
- * busy->free transition. After the CAS, the event will be either
- * set or busy.
- *
- * This cmpxchg doesn't have particular ordering requirements if it
- * succeeds (moving the store earlier can only cause qemu_event_set()
- * to issue _more_ wakeups), the failing case needs acquire semantics
- * like the load above.
- */
- if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- return;
- }
- }
-
- /*
- * This is the final check for a concurrent set, so it does need
- * a smp_mb() pairing with the second barrier of qemu_event_set().
- * The barrier is inside the FUTEX_WAIT system call.
- */
- qemu_futex_wait(ev, EV_BUSY);
- }
-}
-
static __thread NotifierList thread_exit;
/*
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index a7fe3cc..ca2e0b5 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -231,135 +231,6 @@ void qemu_sem_wait(QemuSemaphore *sem)
}
}
-/* Wrap a Win32 manual-reset event with a fast userspace path. The idea
- * is to reset the Win32 event lazily, as part of a test-reset-test-wait
- * sequence. Such a sequence is, indeed, how QemuEvents are used by
- * RCU and other subsystems!
- *
- * Valid transitions:
- * - free->set, when setting the event
- * - busy->set, when setting the event, followed by SetEvent
- * - set->free, when resetting the event
- * - free->busy, when waiting
- *
- * set->busy does not happen (it can be observed from the outside but
- * it really is set->free->busy).
- *
- * busy->free provably cannot happen; to enforce it, the set->free transition
- * is done with an OR, which becomes a no-op if the event has concurrently
- * transitioned to free or busy (and is faster than cmpxchg).
- */
-
-#define EV_SET 0
-#define EV_FREE 1
-#define EV_BUSY -1
-
-void qemu_event_init(QemuEvent *ev, bool init)
-{
- /* Manual reset. */
- ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
- ev->value = (init ? EV_SET : EV_FREE);
- ev->initialized = true;
-}
-
-void qemu_event_destroy(QemuEvent *ev)
-{
- assert(ev->initialized);
- ev->initialized = false;
- CloseHandle(ev->event);
-}
-
-void qemu_event_set(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * Pairs with both qemu_event_reset() and qemu_event_wait().
- *
- * qemu_event_set has release semantics, but because it *loads*
- * ev->value we need a full memory barrier here.
- */
- smp_mb();
- if (qatomic_read(&ev->value) != EV_SET) {
- int old = qatomic_xchg(&ev->value, EV_SET);
-
- /* Pairs with memory barrier after ResetEvent. */
- smp_mb__after_rmw();
- if (old == EV_BUSY) {
- /* There were waiters, wake them up. */
- SetEvent(ev->event);
- }
- }
-}
-
-void qemu_event_reset(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
-
- /*
- * Order reset before checking the condition in the caller.
- * Pairs with the first memory barrier in qemu_event_set().
- */
- smp_mb__after_rmw();
-}
-
-void qemu_event_wait(QemuEvent *ev)
-{
- unsigned value;
-
- assert(ev->initialized);
-
- /*
- * qemu_event_wait must synchronize with qemu_event_set even if it does
- * not go down the slow path, so this load-acquire is needed that
- * synchronizes with the first memory barrier in qemu_event_set().
- *
- * If we do go down the slow path, there is no requirement at all: we
- * might miss a qemu_event_set() here but ultimately the memory barrier in
- * qemu_futex_wait() will ensure the check is done correctly.
- */
- value = qatomic_load_acquire(&ev->value);
- if (value != EV_SET) {
- if (value == EV_FREE) {
- /*
- * Here the underlying kernel event is reset, but qemu_event_set is
- * not yet going to call SetEvent. However, there will be another
- * check for EV_SET below when setting EV_BUSY. At that point it
- * is safe to call WaitForSingleObject.
- */
- ResetEvent(ev->event);
-
- /*
- * It is not clear whether ResetEvent provides this barrier; kernel
- * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
- */
- smp_mb();
-
- /*
- * Leave the event reset and tell qemu_event_set that there are
- * waiters. No need to retry, because there cannot be a concurrent
- * busy->free transition. After the CAS, the event will be either
- * set or busy.
- */
- if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- return;
- }
- }
-
- /*
- * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
- * qemu_event_set() must observe EV_BUSY and call SetEvent().
- */
- WaitForSingleObject(ev->event, INFINITE);
- }
-}
-
struct QemuThreadData {
/* Passed to win32_start_routine. */
void *(*start_routine)(void *);
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 213114b..1fb48be 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -26,9 +26,10 @@
#include "qemu/main-loop.h"
#include "qemu/timer.h"
#include "qemu/lockable.h"
-#include "sysemu/cpu-timers.h"
-#include "sysemu/replay.h"
-#include "sysemu/cpus.h"
+#include "system/cpu-timers.h"
+#include "exec/icount.h"
+#include "system/replay.h"
+#include "system/cpus.h"
#ifdef CONFIG_POSIX
#include <pthread.h>
@@ -182,7 +183,7 @@ bool qemu_clock_has_timers(QEMUClockType type)
bool timerlist_expired(QEMUTimerList *timer_list)
{
- int64_t expire_time;
+ int64_t expire_time = 0;
if (!qatomic_read(&timer_list->active_timers)) {
return false;
@@ -212,7 +213,7 @@ bool qemu_clock_expired(QEMUClockType type)
int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
{
int64_t delta;
- int64_t expire_time;
+ int64_t expire_time = 0;
if (!qatomic_read(&timer_list->active_timers)) {
return -1;
@@ -286,16 +287,6 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}
-QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
-{
- return timer_list->clock->type;
-}
-
-QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
-{
- return main_loop_tlg.tl[type];
-}
-
void timerlist_notify(QEMUTimerList *timer_list)
{
if (timer_list->notify_cb) {
@@ -419,10 +410,6 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
static void timerlist_rearm(QEMUTimerList *timer_list)
{
- /* Interrupt execution to force deadline recalculation. */
- if (icount_enabled() && timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
- icount_start_warp_timer();
- }
timerlist_notify(timer_list);
}
@@ -461,7 +448,7 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
{
QEMUTimerList *timer_list = ts->timer_list;
- bool rearm;
+ bool rearm = false;
WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
if (ts->expire_time == -1 || ts->expire_time > expire_time) {
diff --git a/util/qht.c b/util/qht.c
index 92c6b78..208c2f4 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -367,7 +367,6 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
qht_map_lock_buckets(map);
qht_unlock(ht);
*pmap = map;
- return;
}
/*
diff --git a/util/rcu.c b/util/rcu.c
index fa32c94..b703c86 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -20,8 +20,8 @@
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * License along with this library; if not, see
+ * <https://www.gnu.org/licenses/>.
*
* IBM's contributions to this file may be relicensed under LGPLv2 or later.
*/
diff --git a/util/s390x_pci_mmio.c b/util/s390x_pci_mmio.c
new file mode 100644
index 0000000..5ab24fa
--- /dev/null
+++ b/util/s390x_pci_mmio.c
@@ -0,0 +1,146 @@
+/*
+ * s390x PCI MMIO definitions
+ *
+ * Copyright 2025 IBM Corp.
+ * Author(s): Farhan Ali <alifm@linux.ibm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <sys/syscall.h>
+#include "qemu/s390x_pci_mmio.h"
+#include "elf.h"
+
+union register_pair {
+ unsigned __int128 pair;
+ struct {
+ uint64_t even;
+ uint64_t odd;
+ };
+};
+
+static bool is_mio_supported;
+
+static __attribute__((constructor)) void check_is_mio_supported(void)
+{
+ is_mio_supported = !!(qemu_getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO);
+}
+
+static uint64_t s390x_pcilgi(const void *ioaddr, size_t len)
+{
+ union register_pair ioaddr_len = { .even = (uint64_t)ioaddr,
+ .odd = len };
+ uint64_t val;
+ int cc;
+
+ asm volatile(
+ /* pcilgi */
+ ".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+ "ipm %[cc]\n"
+ "srl %[cc],28\n"
+ : [cc] "=d"(cc), [val] "=d"(val),
+ [ioaddr_len] "+d"(ioaddr_len.pair) :: "cc");
+
+ if (cc) {
+ val = -1ULL;
+ }
+
+ return val;
+}
+
+static void s390x_pcistgi(void *ioaddr, uint64_t val, size_t len)
+{
+ union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len};
+
+ asm volatile (
+ /* pcistgi */
+ ".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+ : [ioaddr_len] "+d" (ioaddr_len.pair)
+ : [val] "d" (val)
+ : "cc", "memory");
+}
+
+uint8_t s390x_pci_mmio_read_8(const void *ioaddr)
+{
+ uint8_t val = 0;
+
+ if (is_mio_supported) {
+ val = s390x_pcilgi(ioaddr, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val));
+ }
+ return val;
+}
+
+uint16_t s390x_pci_mmio_read_16(const void *ioaddr)
+{
+ uint16_t val = 0;
+
+ if (is_mio_supported) {
+ val = s390x_pcilgi(ioaddr, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val));
+ }
+ return val;
+}
+
+uint32_t s390x_pci_mmio_read_32(const void *ioaddr)
+{
+ uint32_t val = 0;
+
+ if (is_mio_supported) {
+ val = s390x_pcilgi(ioaddr, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val));
+ }
+ return val;
+}
+
+uint64_t s390x_pci_mmio_read_64(const void *ioaddr)
+{
+ uint64_t val = 0;
+
+ if (is_mio_supported) {
+ val = s390x_pcilgi(ioaddr, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val));
+ }
+ return val;
+}
+
+void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val)
+{
+ if (is_mio_supported) {
+ s390x_pcistgi(ioaddr, val, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val));
+ }
+}
+
+void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val)
+{
+ if (is_mio_supported) {
+ s390x_pcistgi(ioaddr, val, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val));
+ }
+}
+
+void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val)
+{
+ if (is_mio_supported) {
+ s390x_pcistgi(ioaddr, val, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val));
+ }
+}
+
+void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val)
+{
+ if (is_mio_supported) {
+ s390x_pcistgi(ioaddr, val, sizeof(val));
+ } else {
+ syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val));
+ }
+}
diff --git a/util/thread-context.c b/util/thread-context.c
index 2bc7883..0146154 100644
--- a/util/thread-context.c
+++ b/util/thread-context.c
@@ -273,7 +273,7 @@ static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
}
}
-static void thread_context_class_init(ObjectClass *oc, void *data)
+static void thread_context_class_init(ObjectClass *oc, const void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -319,7 +319,7 @@ static const TypeInfo thread_context_info = {
.instance_size = sizeof(ThreadContext),
.instance_init = thread_context_instance_init,
.instance_finalize = thread_context_instance_finalize,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_USER_CREATABLE },
{ }
}
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 27eb777..d2ead6b 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -23,9 +23,9 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
-static void do_spawn_thread(ThreadPool *pool);
+static void do_spawn_thread(ThreadPoolAio *pool);
-typedef struct ThreadPoolElement ThreadPoolElement;
+typedef struct ThreadPoolElementAio ThreadPoolElementAio;
enum ThreadState {
THREAD_QUEUED,
@@ -33,9 +33,9 @@ enum ThreadState {
THREAD_DONE,
};
-struct ThreadPoolElement {
+struct ThreadPoolElementAio {
BlockAIOCB common;
- ThreadPool *pool;
+ ThreadPoolAio *pool;
ThreadPoolFunc *func;
void *arg;
@@ -47,13 +47,13 @@ struct ThreadPoolElement {
int ret;
/* Access to this list is protected by lock. */
- QTAILQ_ENTRY(ThreadPoolElement) reqs;
+ QTAILQ_ENTRY(ThreadPoolElementAio) reqs;
/* This list is only written by the thread pool's mother thread. */
- QLIST_ENTRY(ThreadPoolElement) all;
+ QLIST_ENTRY(ThreadPoolElementAio) all;
};
-struct ThreadPool {
+struct ThreadPoolAio {
AioContext *ctx;
QEMUBH *completion_bh;
QemuMutex lock;
@@ -62,10 +62,10 @@ struct ThreadPool {
QEMUBH *new_thread_bh;
/* The following variables are only accessed from one AioContext. */
- QLIST_HEAD(, ThreadPoolElement) head;
+ QLIST_HEAD(, ThreadPoolElementAio) head;
/* The following variables are protected by lock. */
- QTAILQ_HEAD(, ThreadPoolElement) request_list;
+ QTAILQ_HEAD(, ThreadPoolElementAio) request_list;
int cur_threads;
int idle_threads;
int new_threads; /* backlog of threads we need to create */
@@ -76,14 +76,14 @@ struct ThreadPool {
static void *worker_thread(void *opaque)
{
- ThreadPool *pool = opaque;
+ ThreadPoolAio *pool = opaque;
qemu_mutex_lock(&pool->lock);
pool->pending_threads--;
do_spawn_thread(pool);
while (pool->cur_threads <= pool->max_threads) {
- ThreadPoolElement *req;
+ ThreadPoolElementAio *req;
int ret;
if (QTAILQ_EMPTY(&pool->request_list)) {
@@ -131,7 +131,7 @@ static void *worker_thread(void *opaque)
return NULL;
}
-static void do_spawn_thread(ThreadPool *pool)
+static void do_spawn_thread(ThreadPoolAio *pool)
{
QemuThread t;
@@ -148,14 +148,14 @@ static void do_spawn_thread(ThreadPool *pool)
static void spawn_thread_bh_fn(void *opaque)
{
- ThreadPool *pool = opaque;
+ ThreadPoolAio *pool = opaque;
qemu_mutex_lock(&pool->lock);
do_spawn_thread(pool);
qemu_mutex_unlock(&pool->lock);
}
-static void spawn_thread(ThreadPool *pool)
+static void spawn_thread(ThreadPoolAio *pool)
{
pool->cur_threads++;
pool->new_threads++;
@@ -173,8 +173,8 @@ static void spawn_thread(ThreadPool *pool)
static void thread_pool_completion_bh(void *opaque)
{
- ThreadPool *pool = opaque;
- ThreadPoolElement *elem, *next;
+ ThreadPoolAio *pool = opaque;
+ ThreadPoolElementAio *elem, *next;
defer_call_begin(); /* cb() may use defer_call() to coalesce work */
@@ -184,8 +184,8 @@ restart:
continue;
}
- trace_thread_pool_complete(pool, elem, elem->common.opaque,
- elem->ret);
+ trace_thread_pool_complete_aio(pool, elem, elem->common.opaque,
+ elem->ret);
QLIST_REMOVE(elem, all);
if (elem->common.cb) {
@@ -217,10 +217,10 @@ restart:
static void thread_pool_cancel(BlockAIOCB *acb)
{
- ThreadPoolElement *elem = (ThreadPoolElement *)acb;
- ThreadPool *pool = elem->pool;
+ ThreadPoolElementAio *elem = (ThreadPoolElementAio *)acb;
+ ThreadPoolAio *pool = elem->pool;
- trace_thread_pool_cancel(elem, elem->common.opaque);
+ trace_thread_pool_cancel_aio(elem, elem->common.opaque);
QEMU_LOCK_GUARD(&pool->lock);
if (elem->state == THREAD_QUEUED) {
@@ -234,16 +234,16 @@ static void thread_pool_cancel(BlockAIOCB *acb)
}
static const AIOCBInfo thread_pool_aiocb_info = {
- .aiocb_size = sizeof(ThreadPoolElement),
+ .aiocb_size = sizeof(ThreadPoolElementAio),
.cancel_async = thread_pool_cancel,
};
BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
BlockCompletionFunc *cb, void *opaque)
{
- ThreadPoolElement *req;
+ ThreadPoolElementAio *req;
AioContext *ctx = qemu_get_current_aio_context();
- ThreadPool *pool = aio_get_thread_pool(ctx);
+ ThreadPoolAio *pool = aio_get_thread_pool(ctx);
/* Assert that the thread submitting work is the same running the pool */
assert(pool->ctx == qemu_get_current_aio_context());
@@ -256,7 +256,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
QLIST_INSERT_HEAD(&pool->head, req, all);
- trace_thread_pool_submit(pool, req, arg);
+ trace_thread_pool_submit_aio(pool, req, arg);
qemu_mutex_lock(&pool->lock);
if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
@@ -290,12 +290,7 @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
return tpc.ret;
}
-void thread_pool_submit(ThreadPoolFunc *func, void *arg)
-{
- thread_pool_submit_aio(func, arg, NULL, NULL);
-}
-
-void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
+void thread_pool_update_params(ThreadPoolAio *pool, AioContext *ctx)
{
qemu_mutex_lock(&pool->lock);
@@ -322,7 +317,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
qemu_mutex_unlock(&pool->lock);
}
-static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
+static void thread_pool_init_one(ThreadPoolAio *pool, AioContext *ctx)
{
if (!ctx) {
ctx = qemu_get_aio_context();
@@ -342,14 +337,14 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
thread_pool_update_params(pool, ctx);
}
-ThreadPool *thread_pool_new(AioContext *ctx)
+ThreadPoolAio *thread_pool_new_aio(AioContext *ctx)
{
- ThreadPool *pool = g_new(ThreadPool, 1);
+ ThreadPoolAio *pool = g_new(ThreadPoolAio, 1);
thread_pool_init_one(pool, ctx);
return pool;
}
-void thread_pool_free(ThreadPool *pool)
+void thread_pool_free_aio(ThreadPoolAio *pool)
{
if (!pool) {
return;
@@ -379,3 +374,122 @@ void thread_pool_free(ThreadPool *pool)
qemu_mutex_destroy(&pool->lock);
g_free(pool);
}
+
+struct ThreadPool {
+ GThreadPool *t;
+ size_t cur_work;
+ QemuMutex cur_work_lock;
+ QemuCond all_finished_cond;
+};
+
+typedef struct {
+ ThreadPoolFunc *func;
+ void *opaque;
+ GDestroyNotify opaque_destroy;
+} ThreadPoolElement;
+
+static void thread_pool_func(gpointer data, gpointer user_data)
+{
+ ThreadPool *pool = user_data;
+ g_autofree ThreadPoolElement *el = data;
+
+ el->func(el->opaque);
+
+ if (el->opaque_destroy) {
+ el->opaque_destroy(el->opaque);
+ }
+
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+ assert(pool->cur_work > 0);
+ pool->cur_work--;
+
+ if (pool->cur_work == 0) {
+ qemu_cond_signal(&pool->all_finished_cond);
+ }
+}
+
+ThreadPool *thread_pool_new(void)
+{
+ ThreadPool *pool = g_new(ThreadPool, 1);
+
+ pool->cur_work = 0;
+ qemu_mutex_init(&pool->cur_work_lock);
+ qemu_cond_init(&pool->all_finished_cond);
+
+ pool->t = g_thread_pool_new(thread_pool_func, pool, 0, TRUE, NULL);
+ /*
+ * g_thread_pool_new() can only return errors if initial thread(s)
+ * creation fails but we ask for 0 initial threads above.
+ */
+ assert(pool->t);
+
+ return pool;
+}
+
+void thread_pool_free(ThreadPool *pool)
+{
+ /*
+ * With _wait = TRUE this effectively waits for all
+ * previously submitted work to complete first.
+ */
+ g_thread_pool_free(pool->t, FALSE, TRUE);
+
+ qemu_cond_destroy(&pool->all_finished_cond);
+ qemu_mutex_destroy(&pool->cur_work_lock);
+
+ g_free(pool);
+}
+
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
+ void *opaque, GDestroyNotify opaque_destroy)
+{
+ ThreadPoolElement *el = g_new(ThreadPoolElement, 1);
+
+ el->func = func;
+ el->opaque = opaque;
+ el->opaque_destroy = opaque_destroy;
+
+ WITH_QEMU_LOCK_GUARD(&pool->cur_work_lock) {
+ pool->cur_work++;
+ }
+
+ /*
+ * Ignore the return value since this function can only return errors
+ * if creation of an additional thread fails but even in this case the
+ * provided work is still getting queued (just for the existing threads).
+ */
+ g_thread_pool_push(pool->t, el, NULL);
+}
+
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
+ void *opaque, GDestroyNotify opaque_destroy)
+{
+ thread_pool_submit(pool, func, opaque, opaque_destroy);
+ thread_pool_adjust_max_threads_to_work(pool);
+}
+
+void thread_pool_wait(ThreadPool *pool)
+{
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+ while (pool->cur_work > 0) {
+ qemu_cond_wait(&pool->all_finished_cond,
+ &pool->cur_work_lock);
+ }
+}
+
+bool thread_pool_set_max_threads(ThreadPool *pool,
+ int max_threads)
+{
+ assert(max_threads > 0);
+
+ return g_thread_pool_set_max_threads(pool->t, max_threads, NULL);
+}
+
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool)
+{
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
+
+ return thread_pool_set_max_threads(pool, pool->cur_work);
+}
diff --git a/util/timed-average.c b/util/timed-average.c
index 2b49d53..5b5c22a 100644
--- a/util/timed-average.c
+++ b/util/timed-average.c
@@ -8,10 +8,12 @@
* BenoƮt Canet <benoit.canet@nodalink.com>
* Alberto Garcia <berto@igalia.com>
*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
- * (at your option) version 3 or any later version.
+ * (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/util/trace-events b/util/trace-events
index 49a4962..bd8f25f 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -14,9 +14,9 @@ aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
# thread-pool.c
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
+thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
+thread_pool_complete_aio(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
+thread_pool_cancel_aio(void *req, void *opaque) "req %p opaque %p"
# buffer.c
buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
index 1b2fa94..2396104 100644
--- a/util/userfaultfd.c
+++ b/util/userfaultfd.c
@@ -240,7 +240,7 @@ int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
* Copy range of source pages to the destination to resolve
* missing page fault somewhere in the destination range.
*
- * Returns 0 on success, negative value in case of an error
+ * Returns 0 on success, -errno in case of an error
*
* @uffd_fd: UFFD file descriptor
* @dst_addr: destination base address
@@ -259,10 +259,11 @@ int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
+ int e = errno;
error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
" mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
- length, (uint64_t) uffd_copy.mode, errno);
- return -1;
+ length, (uint64_t) uffd_copy.mode, e);
+ return -e;
}
return 0;
@@ -273,7 +274,7 @@ int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
*
* Fill range pages with zeroes to resolve missing page fault within the range.
*
- * Returns 0 on success, negative value in case of an error
+ * Returns 0 on success, -errno in case of an error
*
* @uffd_fd: UFFD file descriptor
* @addr: base address
@@ -289,10 +290,11 @@ int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
+ int e = errno;
error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
" mode=%" PRIx64 " errno=%i", addr, length,
- (uint64_t) uffd_zeropage.mode, errno);
- return -1;
+ (uint64_t) uffd_zeropage.mode, e);
+ return -e;
}
return 0;
@@ -306,7 +308,7 @@ int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
* via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
* for the whole memory range are satisfied in a single call to uffd_wakeup().
*
- * Returns 0 on success, negative value in case of an error
+ * Returns 0 on success, -errno in case of an error
*
* @uffd_fd: UFFD file descriptor
* @addr: base address
@@ -320,9 +322,10 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
uffd_range.len = length;
if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
+ int e = errno;
error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
- addr, length, errno);
- return -1;
+ addr, length, e);
+ return -e;
}
return 0;
@@ -355,31 +358,3 @@ int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
return (int) (res / sizeof(struct uffd_msg));
}
-
-/**
- * uffd_poll_events: poll UFFD file descriptor for read
- *
- * Returns true if events are available for read, false otherwise
- *
- * @uffd_fd: UFFD file descriptor
- * @tmo: timeout value
- */
-bool uffd_poll_events(int uffd_fd, int tmo)
-{
- int res;
- struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
-
- do {
- res = poll(&poll_fd, 1, tmo);
- } while (res < 0 && errno == EINTR);
-
- if (res == 0) {
- return false;
- }
- if (res < 0) {
- error_report("uffd_poll_events() failed: errno=%i", errno);
- return false;
- }
-
- return (poll_fd.revents & POLLIN) != 0;
-}
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index f8bab46..fdff042 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -16,7 +16,7 @@
#include "qapi/error.h"
#include "exec/ramlist.h"
#include "exec/cpu-common.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "trace.h"
#include "qemu/error-report.h"
#include "standard-headers/linux/pci_regs.h"