diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/cacheflush.c | 4 | ||||
-rw-r--r-- | util/coroutine-wasm.c | 127 | ||||
-rw-r--r-- | util/error.c | 20 | ||||
-rw-r--r-- | util/event.c | 171 | ||||
-rw-r--r-- | util/lockcnt.c | 9 | ||||
-rw-r--r-- | util/meson.build | 9 | ||||
-rw-r--r-- | util/oslib-posix.c | 28 | ||||
-rw-r--r-- | util/qemu-sockets.c | 327 | ||||
-rw-r--r-- | util/qemu-thread-posix.c | 148 | ||||
-rw-r--r-- | util/qemu-thread-win32.c | 129 | ||||
-rw-r--r-- | util/rcu.c | 4 | ||||
-rw-r--r-- | util/s390x_pci_mmio.c | 146 |
12 files changed, 702 insertions, 420 deletions
diff --git a/util/cacheflush.c b/util/cacheflush.c index 1d12899..17c5891 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -229,6 +229,10 @@ static void __attribute__((constructor)) init_cache_info(void) /* Caches are coherent and do not require flushing; symbol inline. */ +#elif defined(EMSCRIPTEN) + +/* Wasm doesn't have executable region of memory. */ + #elif defined(__aarch64__) && !defined(CONFIG_WIN32) /* * For Windows, we use generic implementation of flush_idcache_range, that diff --git a/util/coroutine-wasm.c b/util/coroutine-wasm.c new file mode 100644 index 0000000..cb1ec92 --- /dev/null +++ b/util/coroutine-wasm.c @@ -0,0 +1,127 @@ +/* + * emscripten fiber coroutine initialization code + * based on coroutine-ucontext.c + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" + +#include <emscripten/fiber.h> + +typedef struct { + Coroutine base; + void *stack; + size_t stack_size; + + void *asyncify_stack; + size_t asyncify_stack_size; + + CoroutineAction action; + + emscripten_fiber_t fiber; +} CoroutineEmscripten; + +/** + * Per-thread coroutine bookkeeping + */ +QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); +QEMU_DEFINE_STATIC_CO_TLS(CoroutineEmscripten *, leader); +size_t leader_asyncify_stack_size = COROUTINE_STACK_SIZE; + +static void coroutine_trampoline(void *co_) +{ + Coroutine *co = co_; + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineEmscripten *co; + + co = g_malloc0(sizeof(*co)); + + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); + + co->asyncify_stack_size = COROUTINE_STACK_SIZE; + co->asyncify_stack = g_malloc0(co->asyncify_stack_size); + emscripten_fiber_init(&co->fiber, coroutine_trampoline, &co->base, + co->stack, co->stack_size, co->asyncify_stack, + co->asyncify_stack_size); + + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineEmscripten *co = DO_UPCAST(CoroutineEmscripten, base, co_); + + qemu_free_stack(co->stack, co->stack_size); + g_free(co->asyncify_stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineEmscripten *from = DO_UPCAST(CoroutineEmscripten, base, from_); + CoroutineEmscripten *to = DO_UPCAST(CoroutineEmscripten, base, to_); + + set_current(to_); + to->action = action; + emscripten_fiber_swap(&from->fiber, &to->fiber); + return from->action; +} + +Coroutine *qemu_coroutine_self(void) +{ + Coroutine *self = get_current(); + + if (!self) { + CoroutineEmscripten *leaderp = get_leader(); + if (!leaderp) { + leaderp = g_malloc0(sizeof(*leaderp)); + leaderp->asyncify_stack = g_malloc0(leader_asyncify_stack_size); + leaderp->asyncify_stack_size = leader_asyncify_stack_size; + emscripten_fiber_init_from_current_context( + &leaderp->fiber, + leaderp->asyncify_stack, + leaderp->asyncify_stack_size); + leaderp->stack = leaderp->fiber.stack_limit; + leaderp->stack_size = + leaderp->fiber.stack_base - leaderp->fiber.stack_limit; + set_leader(leaderp); + } + self = &leaderp->base; + set_current(self); + } + return self; +} + +bool qemu_in_coroutine(void) +{ + Coroutine *self = get_current(); + + return self && self->caller; +} diff --git a/util/error.c b/util/error.c index 673011b..daea214 100644 --- a/util/error.c +++ b/util/error.c @@ -15,15 +15,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" - -struct Error -{ - char *msg; - ErrorClass err_class; - const char *src, *func; - int line; - GString *hint; -}; +#include "qapi/error-internal.h" Error *error_abort; Error *error_fatal; @@ -32,8 +24,13 @@ Error *error_warn; static void error_handle(Error **errp, Error *err) { if (errp == &error_abort) { - fprintf(stderr, "Unexpected error in %s() at %s:%d:\n", - err->func, err->src, err->line); + if (err->func) { + fprintf(stderr, "Unexpected error in %s() at %.*s:%d:\n", + err->func, err->src_len, err->src, err->line); + } else { + fprintf(stderr, "Unexpected error at %.*s:%d:\n", + err->src_len, err->src, err->line); + } error_report("%s", error_get_pretty(err)); if (err->hint) { error_printf("%s", err->hint->str); @@ -75,6 +72,7 @@ static void error_setv(Error **errp, g_free(msg); } err->err_class = err_class; + err->src_len = -1; err->src = src; err->line = line; err->func = func; diff --git a/util/event.c b/util/event.c new file mode 100644 index 0000000..5a8141c --- /dev/null +++ b/util/event.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" + +/* + * Valid transitions: + * - FREE -> SET (qemu_event_set) + * - BUSY -> SET (qemu_event_set) + * - SET -> FREE (qemu_event_reset) + * - FREE -> BUSY (qemu_event_wait) + * + * With futex, the waking and blocking operations follow + * BUSY -> SET and FREE -> BUSY, respectively. + * + * Without futex, BUSY -> SET and FREE -> BUSY never happen. Instead, the waking + * operation follows FREE -> SET and the blocking operation will happen in + * qemu_event_wait() if the event is not SET. + * + * SET->BUSY does not happen (it can be observed from the outside but + * it really is SET->FREE->BUSY). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef HAVE_FUTEX + pthread_mutex_init(&ev->lock, NULL); + pthread_cond_init(&ev->cond, NULL); +#endif + + ev->value = (init ? EV_SET : EV_FREE); + ev->initialized = true; +} + +void qemu_event_destroy(QemuEvent *ev) +{ + assert(ev->initialized); + ev->initialized = false; +#ifndef HAVE_FUTEX + pthread_mutex_destroy(&ev->lock); + pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * Pairs with both qemu_event_reset() and qemu_event_wait(). + * + * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { + int old = qatomic_xchg(&ev->value, EV_SET); + + /* Pairs with memory barrier in kernel futex_wait system call. */ + smp_mb__after_rmw(); + if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake_all(ev); + } + } +#else + pthread_mutex_lock(&ev->lock); + /* Pairs with qemu_event_reset()'s load acquire. */ + qatomic_store_release(&ev->value, EV_SET); + pthread_cond_broadcast(&ev->cond); + pthread_mutex_unlock(&ev->lock); +#endif +} + +void qemu_event_reset(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + qatomic_or(&ev->value, EV_FREE); + + /* + * Order reset before checking the condition in the caller. + * Pairs with the first memory barrier in qemu_event_set(). + */ + smp_mb__after_rmw(); +#else + /* + * If futexes are not available, there are no EV_FREE->EV_BUSY + * transitions because wakeups are done entirely through the + * condition variable. Since qatomic_set() only writes EV_FREE, + * the load seems useless but in reality, the acquire synchronizes + * with qemu_event_set()'s store release: if qemu_event_reset() + * sees EV_SET here, then the caller will certainly see a + * successful condition and skip qemu_event_wait(): + * + * done = 1; if (done == 0) + * qemu_event_set() { qemu_event_reset() { + * lock(); + * ev->value = EV_SET -----> load ev->value + * ev->value = old value | EV_FREE + * cond_broadcast() + * unlock(); } + * } if (done == 0) + * // qemu_event_wait() not called + */ + qatomic_set(&ev->value, qatomic_load_acquire(&ev->value) | EV_FREE); +#endif +} + +void qemu_event_wait(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + while (true) { + /* + * qemu_event_wait must synchronize with qemu_event_set even if it does + * not go down the slow path, so this load-acquire is needed that + * synchronizes with the first memory barrier in qemu_event_set(). + */ + unsigned value = qatomic_load_acquire(&ev->value); + if (value == EV_SET) { + break; + } + + if (value == EV_FREE) { + /* + * Leave the event reset and tell qemu_event_set that there are + * waiters. No need to retry, because there cannot be a concurrent + * busy->free transition. After the CAS, the event will be either + * set or busy. + * + * This cmpxchg doesn't have particular ordering requirements if it + * succeeds (moving the store earlier can only cause + * qemu_event_set() to issue _more_ wakeups), the failing case needs + * acquire semantics like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + break; + } + } + + /* + * This is the final check for a concurrent set, so it does need + * a smp_mb() pairing with the second barrier of qemu_event_set(). + * The barrier is inside the FUTEX_WAIT system call. + */ + qemu_futex_wait(ev, EV_BUSY); + } +#else + pthread_mutex_lock(&ev->lock); + while (qatomic_read(&ev->value) != EV_SET) { + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); +#endif +} diff --git a/util/lockcnt.c b/util/lockcnt.c index d07c6cc..92c9f8c 100644 --- a/util/lockcnt.c +++ b/util/lockcnt.c @@ -12,10 +12,11 @@ #include "qemu/atomic.h" #include "trace.h" -#ifdef CONFIG_LINUX -#include "qemu/futex.h" +#ifdef HAVE_FUTEX -/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. +/* + * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the + * counter. * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, * this is not the most relaxing citation I could make...). It is similar * to mutex2 in the paper. @@ -106,7 +107,7 @@ static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, static void lockcnt_wake(QemuLockCnt *lockcnt) { trace_lockcnt_futex_wake(lockcnt); - qemu_futex_wake(&lockcnt->count, 1); + qemu_futex_wake_single(&lockcnt->count); } void qemu_lockcnt_inc(QemuLockCnt *lockcnt) diff --git a/util/meson.build b/util/meson.build index 780b597..3502938 100644 --- a/util/meson.build +++ b/util/meson.build @@ -11,7 +11,9 @@ if host_os != 'windows' endif util_ss.add(files('compatfd.c')) util_ss.add(files('event_notifier-posix.c')) - util_ss.add(files('mmap-alloc.c')) + if host_os != 'emscripten' + util_ss.add(files('mmap-alloc.c')) + endif freebsd_dep = [] if host_os == 'freebsd' freebsd_dep = util @@ -25,7 +27,7 @@ else util_ss.add(files('event_notifier-win32.c')) util_ss.add(files('oslib-win32.c')) util_ss.add(files('qemu-thread-win32.c')) - util_ss.add(winmm, pathcch) + util_ss.add(winmm, pathcch, synchronization) endif util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c')) if glib_has_gslice @@ -33,6 +35,7 @@ if glib_has_gslice endif util_ss.add(files('defer-call.c')) util_ss.add(files('envlist.c', 'path.c', 'module.c')) +util_ss.add(files('event.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) @@ -131,4 +134,6 @@ elif cpu in ['ppc', 'ppc64'] util_ss.add(files('cpuinfo-ppc.c')) elif cpu in ['riscv32', 'riscv64'] util_ss.add(files('cpuinfo-riscv.c')) +elif cpu == 's390x' + util_ss.add(files('s390x_pci_mmio.c')) endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c index a697c60..4ff577e 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -58,6 +58,7 @@ #include <lwp.h> #endif +#include "qemu/memalign.h" #include "qemu/mmap-alloc.h" #define MAX_MEM_PREALLOC_THREAD_COUNT 16 @@ -210,11 +211,21 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) | (noreserve ? QEMU_MAP_NORESERVE : 0); size_t align = QEMU_VMALLOC_ALIGN; +#ifndef EMSCRIPTEN void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0); if (ptr == MAP_FAILED) { return NULL; } +#else + /* + * qemu_ram_mmap is not implemented for Emscripten. Use qemu_memalign + * for the anonymous allocation. noreserve is ignored as there is no swap + * space on Emscripten, and shared is ignored as there is no other + * processes on Emscripten. + */ + void *ptr = qemu_memalign(align, size); +#endif if (alignment) { *alignment = align; @@ -227,7 +238,16 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); +#ifndef EMSCRIPTEN qemu_ram_munmap(-1, ptr, size); +#else + /* + * qemu_ram_munmap is not implemented for Emscripten and qemu_memalign + * was used for the allocation. Use the corresponding freeing function + * here. + */ + qemu_vfree(ptr); +#endif } void qemu_socket_set_block(int fd) @@ -588,7 +608,15 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, { static gsize initialized; int ret; +#ifndef EMSCRIPTEN size_t hpagesize = qemu_fd_getpagesize(fd); +#else + /* + * mmap-alloc.c is excluded from Emscripten build, so qemu_fd_getpagesize + * is unavailable. Fallback to the lower level implementation. + */ + size_t hpagesize = qemu_real_host_page_size(); +#endif size_t numpages = DIV_ROUND_UP(sz, hpagesize); bool use_madv_populate_write; struct sigaction act; diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 77477c1..4773755 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -30,6 +30,7 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" #include "qemu/cutils.h" +#include "qemu/option.h" #include "trace.h" #ifndef AI_ADDRCONFIG @@ -44,6 +45,14 @@ # define AI_NUMERICSERV 0 #endif +/* + * On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE. + * https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 + */ +#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE) +# define TCP_KEEPIDLE TCP_KEEPALIVE +#endif + static int inet_getport(struct addrinfo *e) { @@ -205,6 +214,58 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) #endif } +static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) +{ + if (saddr->keep_alive) { + int keep_alive = 1; + int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + &keep_alive, sizeof(keep_alive)); + + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set keep-alive option on socket"); + return -1; + } +#ifdef HAVE_TCP_KEEPCNT + if (saddr->has_keep_alive_count && saddr->keep_alive_count) { + int keep_count = saddr->keep_alive_count; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count, + sizeof(keep_count)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive count option on socket"); + return -1; + } + } +#endif +#ifdef HAVE_TCP_KEEPIDLE + if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) { + int keep_idle = saddr->keep_alive_idle; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle, + sizeof(keep_idle)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive idle option on socket"); + return -1; + } + } +#endif +#ifdef HAVE_TCP_KEEPINTVL + if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) { + int keep_interval = saddr->keep_alive_interval; + ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval, + sizeof(keep_interval)); + if (ret < 0) { + error_setg_errno(errp, errno, + "Unable to set TCP keep-alive interval option on socket"); + return -1; + } + } +#endif + } + return 0; +} + static int inet_listen_saddr(InetSocketAddress *saddr, int port_offset, int num, @@ -220,12 +281,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr, int saved_errno = 0; bool socket_created = false; - if (saddr->keep_alive) { - error_setg(errp, "keep-alive option is not supported for passive " - "sockets"); - return -1; - } - memset(&ai,0, sizeof(ai)); ai.ai_flags = AI_PASSIVE; if (saddr->has_numeric && saddr->numeric) { @@ -287,11 +342,20 @@ static int inet_listen_saddr(InetSocketAddress *saddr, port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { + if (slisten >= 0) { + /* + * We have a socket we tried with the previous port. It cannot + * be rebound, we need to close it and create a new one. + */ + close(slisten); + slisten = -1; + } inet_setport(e, p); slisten = create_fast_reuse_socket(e); if (slisten < 0) { - /* First time we expect we might fail to create the socket + /* + * First time we expect we might fail to create the socket * eg if 'e' has AF_INET6 but ipv6 kmod is not loaded. * Later iterations should always succeed if first iteration * worked though, so treat that as fatal. @@ -301,40 +365,41 @@ static int inet_listen_saddr(InetSocketAddress *saddr, } else { error_setg_errno(errp, errno, "Failed to recreate failed listening socket"); - goto listen_failed; + goto fail; } } socket_created = true; rc = try_bind(slisten, saddr, e); if (rc < 0) { - if (errno != EADDRINUSE) { - error_setg_errno(errp, errno, "Failed to bind socket"); - goto listen_failed; - } - } else { - if (!listen(slisten, num)) { - goto listen_ok; + if (errno == EADDRINUSE) { + /* This port is already used, try the next one */ + continue; } - if (errno != EADDRINUSE) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - goto listen_failed; + error_setg_errno(errp, errno, "Failed to bind socket"); + goto fail; + } + if (listen(slisten, num)) { + if (errno == EADDRINUSE) { + /* This port is already used, try the next one */ + continue; } + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto fail; + } + /* We have a listening socket */ + if (inet_set_sockopts(slisten, saddr, errp) < 0) { + goto fail; } - /* Someone else managed to bind to the same port and beat us - * to listen on it! Socket semantics does not allow us to - * recover from this situation, so we need to recreate the - * socket to allow bind attempts for subsequent ports: - */ - close(slisten); - slisten = -1; + freeaddrinfo(res); + return slisten; } } error_setg_errno(errp, errno, socket_created ? "Failed to find an available port" : "Failed to create a socket"); -listen_failed: +fail: saved_errno = errno; if (slisten >= 0) { close(slisten); @@ -342,10 +407,6 @@ listen_failed: freeaddrinfo(res); errno = saved_errno; return -1; - -listen_ok: - freeaddrinfo(res); - return slisten; } #ifdef _WIN32 @@ -475,16 +536,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp) return sock; } - if (saddr->keep_alive) { - int val = 1; - int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - &val, sizeof(val)); - - if (ret < 0) { - error_setg_errno(errp, errno, "Unable to set KEEPALIVE"); - close(sock); - return -1; - } + if (inet_set_sockopts(sock, saddr, errp) < 0) { + close(sock); + return -1; } return sock; @@ -591,115 +645,140 @@ err: return -1; } -/* compatibility wrapper */ -static int inet_parse_flag(const char *flagname, const char *optstr, bool *val, - Error **errp) -{ - char *end; - size_t len; - - end = strstr(optstr, ","); - if (end) { - if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */ - error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); - return -1; - } - len = end - optstr; - } else { - len = strlen(optstr); - } - if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) { - *val = true; - } else if (len == 4 && strncmp(optstr, "=off", len) == 0) { - *val = false; - } else { - error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); - return -1; - } - return 0; -} +static QemuOptsList inet_opts = { + .name = "InetSocketAddress", + .head = QTAILQ_HEAD_INITIALIZER(inet_opts.head), + .implied_opt_name = "addr", + .desc = { + { + .name = "addr", + .type = QEMU_OPT_STRING, + }, + { + .name = "numeric", + .type = QEMU_OPT_BOOL, + }, + { + .name = "to", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "ipv4", + .type = QEMU_OPT_BOOL, + }, + { + .name = "ipv6", + .type = QEMU_OPT_BOOL, + }, + { + .name = "keep-alive", + .type = QEMU_OPT_BOOL, + }, +#ifdef HAVE_TCP_KEEPCNT + { + .name = "keep-alive-count", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_TCP_KEEPIDLE + { + .name = "keep-alive-idle", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_TCP_KEEPINTVL + { + .name = "keep-alive-interval", + .type = QEMU_OPT_NUMBER, + }, +#endif +#ifdef HAVE_IPPROTO_MPTCP + { + .name = "mptcp", + .type = QEMU_OPT_BOOL, + }, +#endif + { /* end of list */ } + }, +}; int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) { - const char *optstr, *h; - char host[65]; - char port[33]; - int to; - int pos; - char *begin; - + QemuOpts *opts = qemu_opts_parse(&inet_opts, str, true, errp); + if (!opts) { + return -1; + } memset(addr, 0, sizeof(*addr)); /* parse address */ - if (str[0] == ':') { - /* no host given */ - host[0] = '\0'; - if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) { - error_setg(errp, "error parsing port in address '%s'", str); - return -1; - } - } else if (str[0] == '[') { + const char *addr_str = qemu_opt_get(opts, "addr"); + if (!addr_str) { + error_setg(errp, "error parsing address ''"); + return -1; + } + if (str[0] == '[') { /* IPv6 addr */ - if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) { - error_setg(errp, "error parsing IPv6 address '%s'", str); + const char *ip_end = strstr(addr_str, "]:"); + if (!ip_end || ip_end - addr_str < 2 || strlen(ip_end) < 3) { + error_setg(errp, "error parsing IPv6 address '%s'", addr_str); return -1; } + addr->host = g_strndup(addr_str + 1, ip_end - addr_str - 1); + addr->port = g_strdup(ip_end + 2); } else { - /* hostname or IPv4 addr */ - if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) { - error_setg(errp, "error parsing address '%s'", str); + /* no host, hostname or IPv4 addr */ + const char *port = strchr(addr_str, ':'); + if (!port || strlen(port) < 2) { + error_setg(errp, "error parsing address '%s'", addr_str); return -1; } + addr->host = g_strndup(addr_str, port - addr_str); + addr->port = g_strdup(port + 1); } - addr->host = g_strdup(host); - addr->port = g_strdup(port); - /* parse options */ - optstr = str + pos; - h = strstr(optstr, ",to="); - if (h) { - h += 4; - if (sscanf(h, "%d%n", &to, &pos) != 1 || - (h[pos] != '\0' && h[pos] != ',')) { - error_setg(errp, "error parsing to= argument"); - return -1; - } + if (qemu_opt_find(opts, "numeric")) { + addr->has_numeric = true, + addr->numeric = qemu_opt_get_bool(opts, "numeric", false); + } + if (qemu_opt_find(opts, "to")) { addr->has_to = true; - addr->to = to; + addr->to = qemu_opt_get_number(opts, "to", 0); } - begin = strstr(optstr, ",ipv4"); - if (begin) { - if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) { - return -1; - } + if (qemu_opt_find(opts, "ipv4")) { addr->has_ipv4 = true; + addr->ipv4 = qemu_opt_get_bool(opts, "ipv4", false); } - begin = strstr(optstr, ",ipv6"); - if (begin) { - if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) { - return -1; - } + if (qemu_opt_find(opts, "ipv6")) { addr->has_ipv6 = true; + addr->ipv6 = qemu_opt_get_bool(opts, "ipv6", false); } - begin = strstr(optstr, ",keep-alive"); - if (begin) { - if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"), - &addr->keep_alive, errp) < 0) - { - return -1; - } + if (qemu_opt_find(opts, "keep-alive")) { addr->has_keep_alive = true; + addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); + } +#ifdef HAVE_TCP_KEEPCNT + if (qemu_opt_find(opts, "keep-alive-count")) { + addr->has_keep_alive_count = true; + addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0); + } +#endif +#ifdef HAVE_TCP_KEEPIDLE + if (qemu_opt_find(opts, "keep-alive-idle")) { + addr->has_keep_alive_idle = true; + addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0); } +#endif +#ifdef HAVE_TCP_KEEPINTVL + if (qemu_opt_find(opts, "keep-alive-interval")) { + addr->has_keep_alive_interval = true; + addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0); + } +#endif #ifdef HAVE_IPPROTO_MPTCP - begin = strstr(optstr, ",mptcp"); - if (begin) { - if (inet_parse_flag("mptcp", begin + strlen(",mptcp"), - &addr->mptcp, errp) < 0) - { - return -1; - } + if (qemu_opt_find(opts, "mptcp")) { addr->has_mptcp = true; + addr->mptcp = qemu_opt_get_bool(opts, "mptcp", 0); } #endif return 0; diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index b2e26e2..ba72544 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -317,154 +317,6 @@ void qemu_sem_wait(QemuSemaphore *sem) qemu_mutex_unlock(&sem->mutex); } -#ifdef __linux__ -#include "qemu/futex.h" -#else -static inline void qemu_futex_wake(QemuEvent *ev, int n) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (n == 1) { - pthread_cond_signal(&ev->cond); - } else { - pthread_cond_broadcast(&ev->cond); - } - pthread_mutex_unlock(&ev->lock); -} - -static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (ev->value == val) { - pthread_cond_wait(&ev->cond, &ev->lock); - } - pthread_mutex_unlock(&ev->lock); -} -#endif - -/* Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by qemu_futex_wake - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy. - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ -#ifndef __linux__ - pthread_mutex_init(&ev->lock, NULL); - pthread_cond_init(&ev->cond, NULL); -#endif - - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; -#ifndef __linux__ - pthread_mutex_destroy(&ev->lock); - pthread_cond_destroy(&ev->cond); -#endif -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier in kernel futex_wait system call. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - * - * This cmpxchg doesn't have particular ordering requirements if it - * succeeds (moving the store earlier can only cause qemu_event_set() - * to issue _more_ wakeups), the failing case needs acquire semantics - * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * This is the final check for a concurrent set, so it does need - * a smp_mb() pairing with the second barrier of qemu_event_set(). - * The barrier is inside the FUTEX_WAIT system call. - */ - qemu_futex_wait(ev, EV_BUSY); - } -} - static __thread NotifierList thread_exit; /* diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index a7fe3cc..ca2e0b5 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -231,135 +231,6 @@ void qemu_sem_wait(QemuSemaphore *sem) } } -/* Wrap a Win32 manual-reset event with a fast userspace path. The idea - * is to reset the Win32 event lazily, as part of a test-reset-test-wait - * sequence. Such a sequence is, indeed, how QemuEvents are used by - * RCU and other subsystems! - * - * Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by SetEvent - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy (and is faster than cmpxchg). - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ - /* Manual reset. */ - ev->event = CreateEvent(NULL, TRUE, TRUE, NULL); - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; - CloseHandle(ev->event); -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier after ResetEvent. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Here the underlying kernel event is reset, but qemu_event_set is - * not yet going to call SetEvent. However, there will be another - * check for EV_SET below when setting EV_BUSY. At that point it - * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - - /* - * It is not clear whether ResetEvent provides this barrier; kernel - * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! - */ - smp_mb(); - - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * ev->value is now EV_BUSY. Since we didn't observe EV_SET, - * qemu_event_set() must observe EV_BUSY and call SetEvent(). - */ - WaitForSingleObject(ev->event, INFINITE); - } -} - struct QemuThreadData { /* Passed to win32_start_routine. */ void *(*start_routine)(void *); @@ -20,8 +20,8 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * License along with this library; if not, see + * <https://www.gnu.org/licenses/>. * * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ diff --git a/util/s390x_pci_mmio.c b/util/s390x_pci_mmio.c new file mode 100644 index 0000000..5ab24fa --- /dev/null +++ b/util/s390x_pci_mmio.c @@ -0,0 +1,146 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali <alifm@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include <sys/syscall.h> +#include "qemu/s390x_pci_mmio.h" +#include "elf.h" + +union register_pair { + unsigned __int128 pair; + struct { + uint64_t even; + uint64_t odd; + }; +}; + +static bool is_mio_supported; + +static __attribute__((constructor)) void check_is_mio_supported(void) +{ + is_mio_supported = !!(qemu_getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO); +} + +static uint64_t s390x_pcilgi(const void *ioaddr, size_t len) +{ + union register_pair ioaddr_len = { .even = (uint64_t)ioaddr, + .odd = len }; + uint64_t val; + int cc; + + asm volatile( + /* pcilgi */ + ".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d"(cc), [val] "=d"(val), + [ioaddr_len] "+d"(ioaddr_len.pair) :: "cc"); + + if (cc) { + val = -1ULL; + } + + return val; +} + +static void s390x_pcistgi(void *ioaddr, uint64_t val, size_t len) +{ + union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len}; + + asm volatile ( + /* pcistgi */ + ".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n" + : [ioaddr_len] "+d" (ioaddr_len.pair) + : [val] "d" (val) + : "cc", "memory"); +} + +uint8_t s390x_pci_mmio_read_8(const void *ioaddr) +{ + uint8_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint16_t s390x_pci_mmio_read_16(const void *ioaddr) +{ + uint16_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint32_t s390x_pci_mmio_read_32(const void *ioaddr) +{ + uint32_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint64_t s390x_pci_mmio_read_64(const void *ioaddr) +{ + uint64_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} |