aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2022-01-07 17:24:24 -0800
committerRichard Henderson <richard.henderson@linaro.org>2022-01-07 17:24:24 -0800
commitd70075373af51b6aa1d637962c962120e201fc98 (patch)
tree75543f075b2e6a53db655e9e022a2feacc5d0f4a /util
parentc87507a8cfb5b11bf1773c0214ee76ba9382179c (diff)
parentca745d2277496464b54fd832c15c45d0227325bb (diff)
downloadqemu-d70075373af51b6aa1d637962c962120e201fc98.zip
qemu-d70075373af51b6aa1d637962c962120e201fc98.tar.gz
qemu-d70075373af51b6aa1d637962c962120e201fc98.tar.bz2
Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pci,pc: features,fixes,cleanups New virtio mem options. A vhost-user cleanup. Control over smbios entry point type. Config interrupt support for vdpa. Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Fri 07 Jan 2022 04:30:41 PM PST # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (55 commits) tests: acpi: Add updated TPM related tables acpi: tpm: Add missing device identification objects tests: acpi: prepare for updated TPM related tables virtio/vhost-vsock: don't double close vhostfd, remove redundant cleanup hw/scsi/vhost-scsi: don't double close vhostfd on error hw/scsi/vhost-scsi: don't leak vqs on error docs: reSTify virtio-balloon-stats documentation and move to docs/interop hw/i386/pc: Add missing property descriptions acpihp: simplify acpi_pcihp_disable_root_bus tests: acpi: SLIC: update expected blobs tests: acpi: add SLIC table test tests: acpi: whitelist expected blobs before changing them acpi: fix QEMU crash when started with SLIC table intel-iommu: correctly check passthrough during translation virtio-mem: Set "unplugged-inaccessible=auto" for the 7.0 machine on x86 virtio-mem: Support VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE linux-headers: sync VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE MAINTAINERS: Add a separate entry for acpi/VIOT tables virtio: signal after wrapping packed used_idx virtio-mem: Support "prealloc=on" option ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'util')
-rw-r--r--util/oslib-posix.c232
1 files changed, 177 insertions, 55 deletions
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e8bdb02..9efdc74 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -35,11 +35,13 @@
#include "sysemu/sysemu.h"
#include "trace.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "qemu/thread.h"
#include <libgen.h>
#include "qemu/cutils.h"
#include "qemu/compiler.h"
+#include "qemu/units.h"
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
@@ -73,22 +75,32 @@
#define MAX_MEM_PREALLOC_THREAD_COUNT 16
+struct MemsetThread;
+
+typedef struct MemsetContext {
+ bool all_threads_created;
+ bool any_thread_failed;
+ struct MemsetThread *threads;
+ int num_threads;
+} MemsetContext;
+
struct MemsetThread {
char *addr;
size_t numpages;
size_t hpagesize;
QemuThread pgthread;
sigjmp_buf env;
+ MemsetContext *context;
};
typedef struct MemsetThread MemsetThread;
-static MemsetThread *memset_thread;
-static int memset_num_threads;
-static bool memset_thread_failed;
+/* used by sigbus_handler() */
+static MemsetContext *sigbus_memset_context;
+struct sigaction sigbus_oldact;
+static QemuMutex sigbus_mutex;
static QemuMutex page_mutex;
static QemuCond page_cond;
-static bool threads_created_flag;
int qemu_get_thread_id(void)
{
@@ -436,22 +448,50 @@ const char *qemu_get_exec_dir(void)
return exec_dir;
}
+#ifdef CONFIG_LINUX
+static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
+#else /* CONFIG_LINUX */
static void sigbus_handler(int signal)
+#endif /* CONFIG_LINUX */
{
int i;
- if (memset_thread) {
- for (i = 0; i < memset_num_threads; i++) {
- if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
- siglongjmp(memset_thread[i].env, 1);
+
+ if (sigbus_memset_context) {
+ for (i = 0; i < sigbus_memset_context->num_threads; i++) {
+ MemsetThread *thread = &sigbus_memset_context->threads[i];
+
+ if (qemu_thread_is_self(&thread->pgthread)) {
+ siglongjmp(thread->env, 1);
}
}
}
+
+#ifdef CONFIG_LINUX
+ /*
+ * We assume that the MCE SIGBUS handler could have been registered. We
+ * should never receive BUS_MCEERR_AO on any of our threads, but only on
+ * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
+ * receive BUS_MCEERR_AR triggered by action of other threads on one of
+ * our threads. So, no need to check for unrelated SIGBUS when seeing one
+ * for our threads.
+ *
+ * We will forward to the MCE handler, which will either handle the SIGBUS
+ * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
+ * default SIGBUS handler will crash the process, so we don't care.
+ */
+ if (sigbus_oldact.sa_flags & SA_SIGINFO) {
+ sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
+ return;
+ }
+#endif /* CONFIG_LINUX */
+ warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored");
}
static void *do_touch_pages(void *arg)
{
MemsetThread *memset_args = (MemsetThread *)arg;
sigset_t set, oldset;
+ int ret = 0;
/*
* On Linux, the page faults from the loop below can cause mmap_sem
@@ -459,7 +499,7 @@ static void *do_touch_pages(void *arg)
* clearing until all threads have been created.
*/
qemu_mutex_lock(&page_mutex);
- while(!threads_created_flag){
+ while (!memset_args->context->all_threads_created) {
qemu_cond_wait(&page_cond, &page_mutex);
}
qemu_mutex_unlock(&page_mutex);
@@ -470,7 +510,7 @@ static void *do_touch_pages(void *arg)
pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
if (sigsetjmp(memset_args->env, 1)) {
- memset_thread_failed = true;
+ ret = -EFAULT;
} else {
char *addr = memset_args->addr;
size_t numpages = memset_args->numpages;
@@ -484,20 +524,37 @@ static void *do_touch_pages(void *arg)
*
* 'volatile' to stop compiler optimizing this away
* to a no-op
- *
- * TODO: get a better solution from kernel so we
- * don't need to write at all so we don't cause
- * wear on the storage backing the region...
*/
*(volatile char *)addr = *addr;
addr += hpagesize;
}
}
pthread_sigmask(SIG_SETMASK, &oldset, NULL);
- return NULL;
+ return (void *)(uintptr_t)ret;
}
-static inline int get_memset_num_threads(int smp_cpus)
+static void *do_madv_populate_write_pages(void *arg)
+{
+ MemsetThread *memset_args = (MemsetThread *)arg;
+ const size_t size = memset_args->numpages * memset_args->hpagesize;
+ char * const addr = memset_args->addr;
+ int ret = 0;
+
+ /* See do_touch_pages(). */
+ qemu_mutex_lock(&page_mutex);
+ while (!memset_args->context->all_threads_created) {
+ qemu_cond_wait(&page_cond, &page_mutex);
+ }
+ qemu_mutex_unlock(&page_mutex);
+
+ if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
+ ret = -errno;
+ }
+ return (void *)(uintptr_t)ret;
+}
+
+static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
+ int smp_cpus)
{
long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
int ret = 1;
@@ -505,17 +562,27 @@ static inline int get_memset_num_threads(int smp_cpus)
if (host_procs > 0) {
ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
}
+
+ /* Especially with gigantic pages, don't create more threads than pages. */
+ ret = MIN(ret, numpages);
+ /* Don't start threads to prealloc comparatively little memory. */
+ ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
+
/* In case sysconf() fails, we fall back to single threaded */
return ret;
}
-static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int smp_cpus)
+static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
+ int smp_cpus, bool use_madv_populate_write)
{
static gsize initialized = 0;
+ MemsetContext context = {
+ .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus),
+ };
size_t numpages_per_thread, leftover;
+ void *(*touch_fn)(void *);
+ int ret = 0, i = 0;
char *addr = area;
- int i = 0;
if (g_once_init_enter(&initialized)) {
qemu_mutex_init(&page_mutex);
@@ -523,66 +590,121 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
g_once_init_leave(&initialized, 1);
}
- memset_thread_failed = false;
- threads_created_flag = false;
- memset_num_threads = get_memset_num_threads(smp_cpus);
- memset_thread = g_new0(MemsetThread, memset_num_threads);
- numpages_per_thread = numpages / memset_num_threads;
- leftover = numpages % memset_num_threads;
- for (i = 0; i < memset_num_threads; i++) {
- memset_thread[i].addr = addr;
- memset_thread[i].numpages = numpages_per_thread + (i < leftover);
- memset_thread[i].hpagesize = hpagesize;
- qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
- do_touch_pages, &memset_thread[i],
+ if (use_madv_populate_write) {
+ /* Avoid creating a single thread for MADV_POPULATE_WRITE */
+ if (context.num_threads == 1) {
+ if (qemu_madvise(area, hpagesize * numpages,
+ QEMU_MADV_POPULATE_WRITE)) {
+ return -errno;
+ }
+ return 0;
+ }
+ touch_fn = do_madv_populate_write_pages;
+ } else {
+ touch_fn = do_touch_pages;
+ }
+
+ context.threads = g_new0(MemsetThread, context.num_threads);
+ numpages_per_thread = numpages / context.num_threads;
+ leftover = numpages % context.num_threads;
+ for (i = 0; i < context.num_threads; i++) {
+ context.threads[i].addr = addr;
+ context.threads[i].numpages = numpages_per_thread + (i < leftover);
+ context.threads[i].hpagesize = hpagesize;
+ context.threads[i].context = &context;
+ qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
+ touch_fn, &context.threads[i],
QEMU_THREAD_JOINABLE);
- addr += memset_thread[i].numpages * hpagesize;
+ addr += context.threads[i].numpages * hpagesize;
+ }
+
+ if (!use_madv_populate_write) {
+ sigbus_memset_context = &context;
}
qemu_mutex_lock(&page_mutex);
- threads_created_flag = true;
+ context.all_threads_created = true;
qemu_cond_broadcast(&page_cond);
qemu_mutex_unlock(&page_mutex);
- for (i = 0; i < memset_num_threads; i++) {
- qemu_thread_join(&memset_thread[i].pgthread);
+ for (i = 0; i < context.num_threads; i++) {
+ int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+
+ if (tmp) {
+ ret = tmp;
+ }
}
- g_free(memset_thread);
- memset_thread = NULL;
- return memset_thread_failed;
+ if (!use_madv_populate_write) {
+ sigbus_memset_context = NULL;
+ }
+ g_free(context.threads);
+
+ return ret;
+}
+
+static bool madv_populate_write_possible(char *area, size_t pagesize)
+{
+ return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
+ errno != EINVAL;
}
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
Error **errp)
{
+ static gsize initialized;
int ret;
- struct sigaction act, oldact;
size_t hpagesize = qemu_fd_getpagesize(fd);
size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+ bool use_madv_populate_write;
+ struct sigaction act;
- memset(&act, 0, sizeof(act));
- act.sa_handler = &sigbus_handler;
- act.sa_flags = 0;
+ /*
+ * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
+ * some special mappings, such as mapping /dev/mem.
+ */
+ use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
- ret = sigaction(SIGBUS, &act, &oldact);
- if (ret) {
- error_setg_errno(errp, errno,
- "os_mem_prealloc: failed to install signal handler");
- return;
+ if (!use_madv_populate_write) {
+ if (g_once_init_enter(&initialized)) {
+ qemu_mutex_init(&sigbus_mutex);
+ g_once_init_leave(&initialized, 1);
+ }
+
+ qemu_mutex_lock(&sigbus_mutex);
+ memset(&act, 0, sizeof(act));
+#ifdef CONFIG_LINUX
+ act.sa_sigaction = &sigbus_handler;
+ act.sa_flags = SA_SIGINFO;
+#else /* CONFIG_LINUX */
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+#endif /* CONFIG_LINUX */
+
+ ret = sigaction(SIGBUS, &act, &sigbus_oldact);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "os_mem_prealloc: failed to install signal handler");
+ return;
+ }
}
/* touch pages simultaneously */
- if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
- error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
- "pages available to allocate guest RAM");
+ ret = touch_all_pages(area, hpagesize, numpages, smp_cpus,
+ use_madv_populate_write);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "os_mem_prealloc: preallocating memory failed");
}
- ret = sigaction(SIGBUS, &oldact, NULL);
- if (ret) {
- /* Terminate QEMU since it can't recover from error */
- perror("os_mem_prealloc: failed to reinstall signal handler");
- exit(1);
+ if (!use_madv_populate_write) {
+ ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
+ if (ret) {
+ /* Terminate QEMU since it can't recover from error */
+ perror("os_mem_prealloc: failed to reinstall signal handler");
+ exit(1);
+ }
+ qemu_mutex_unlock(&sigbus_mutex);
}
}