aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/meson.build1
-rw-r--r--util/oslib-posix.c39
-rw-r--r--util/oslib-win32.c8
-rw-r--r--util/qemu-thread-posix.c70
-rw-r--r--util/qemu-thread-win32.c12
-rw-r--r--util/thread-context.c362
6 files changed, 473 insertions, 19 deletions
diff --git a/util/meson.build b/util/meson.build
index 5e28213..c0a7bc5 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -1,4 +1,5 @@
util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
+util_ss.add(files('thread-context.c'), numa)
if not config_host_data.get('CONFIG_ATOMIC64')
util_ss.add(files('atomic64.c'))
endif
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 827a7aa..59a891b 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -42,6 +42,7 @@
#include "qemu/cutils.h"
#include "qemu/compiler.h"
#include "qemu/units.h"
+#include "qemu/thread-context.h"
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
@@ -329,7 +330,7 @@ static void sigbus_handler(int signal)
return;
}
#endif /* CONFIG_LINUX */
- warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored");
+ warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
}
static void *do_touch_pages(void *arg)
@@ -399,13 +400,13 @@ static void *do_madv_populate_write_pages(void *arg)
}
static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
- int smp_cpus)
+ int max_threads)
{
long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
int ret = 1;
if (host_procs > 0) {
- ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
+ ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
}
/* Especially with gigantic pages, don't create more threads than pages. */
@@ -418,11 +419,12 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
}
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int smp_cpus, bool use_madv_populate_write)
+ int max_threads, ThreadContext *tc,
+ bool use_madv_populate_write)
{
static gsize initialized = 0;
MemsetContext context = {
- .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus),
+ .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
};
size_t numpages_per_thread, leftover;
void *(*touch_fn)(void *);
@@ -457,9 +459,16 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
context.threads[i].numpages = numpages_per_thread + (i < leftover);
context.threads[i].hpagesize = hpagesize;
context.threads[i].context = &context;
- qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
- touch_fn, &context.threads[i],
- QEMU_THREAD_JOINABLE);
+ if (tc) {
+ thread_context_create_thread(tc, &context.threads[i].pgthread,
+ "touch_pages",
+ touch_fn, &context.threads[i],
+ QEMU_THREAD_JOINABLE);
+ } else {
+ qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
+ touch_fn, &context.threads[i],
+ QEMU_THREAD_JOINABLE);
+ }
addr += context.threads[i].numpages * hpagesize;
}
@@ -494,13 +503,13 @@ static bool madv_populate_write_possible(char *area, size_t pagesize)
errno != EINVAL;
}
-void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
- Error **errp)
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+ ThreadContext *tc, Error **errp)
{
static gsize initialized;
int ret;
size_t hpagesize = qemu_fd_getpagesize(fd);
- size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+ size_t numpages = DIV_ROUND_UP(sz, hpagesize);
bool use_madv_populate_write;
struct sigaction act;
@@ -530,24 +539,24 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
if (ret) {
qemu_mutex_unlock(&sigbus_mutex);
error_setg_errno(errp, errno,
- "os_mem_prealloc: failed to install signal handler");
+ "qemu_prealloc_mem: failed to install signal handler");
return;
}
}
/* touch pages simultaneously */
- ret = touch_all_pages(area, hpagesize, numpages, smp_cpus,
+ ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
use_madv_populate_write);
if (ret) {
error_setg_errno(errp, -ret,
- "os_mem_prealloc: preallocating memory failed");
+ "qemu_prealloc_mem: preallocating memory failed");
}
if (!use_madv_populate_write) {
ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
if (ret) {
/* Terminate QEMU since it can't recover from error */
- perror("os_mem_prealloc: failed to reinstall signal handler");
+ perror("qemu_prealloc_mem: failed to reinstall signal handler");
exit(1);
}
qemu_mutex_unlock(&sigbus_mutex);
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 5723d3e..a67cb38 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -268,14 +268,14 @@ int getpagesize(void)
return system_info.dwPageSize;
}
-void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
- Error **errp)
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+ ThreadContext *tc, Error **errp)
{
int i;
size_t pagesize = qemu_real_host_page_size();
- memory = (memory + pagesize - 1) & -pagesize;
- for (i = 0; i < memory / pagesize; i++) {
+ sz = (sz + pagesize - 1) & -pagesize;
+ for (i = 0; i < sz / pagesize; i++) {
memset(area + pagesize * i, 0, 1);
}
}
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index ac1d56e..bae938c 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -16,6 +16,7 @@
#include "qemu/notify.h"
#include "qemu-thread-common.h"
#include "qemu/tsan.h"
+#include "qemu/bitmap.h"
static bool name_threads;
@@ -552,6 +553,75 @@ void qemu_thread_create(QemuThread *thread, const char *name,
pthread_attr_destroy(&attr);
}
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+ unsigned long nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+ const size_t setsize = CPU_ALLOC_SIZE(nbits);
+ unsigned long value;
+ cpu_set_t *cpuset;
+ int err;
+
+ cpuset = CPU_ALLOC(nbits);
+ g_assert(cpuset);
+
+ CPU_ZERO_S(setsize, cpuset);
+ value = find_first_bit(host_cpus, nbits);
+ while (value < nbits) {
+ CPU_SET_S(value, setsize, cpuset);
+ value = find_next_bit(host_cpus, nbits, value + 1);
+ }
+
+ err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
+ CPU_FREE(cpuset);
+ return err;
+#else
+ return -ENOSYS;
+#endif
+}
+
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+ unsigned long *nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+ unsigned long tmpbits;
+ cpu_set_t *cpuset;
+ size_t setsize;
+ int i, err;
+
+ tmpbits = CPU_SETSIZE;
+ while (true) {
+ setsize = CPU_ALLOC_SIZE(tmpbits);
+ cpuset = CPU_ALLOC(tmpbits);
+ g_assert(cpuset);
+
+ err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
+ if (err) {
+ CPU_FREE(cpuset);
+ if (err != -EINVAL) {
+ return err;
+ }
+ tmpbits *= 2;
+ } else {
+ break;
+ }
+ }
+
+ /* Convert the result into a proper bitmap. */
+ *nbits = tmpbits;
+ *host_cpus = bitmap_new(tmpbits);
+ for (i = 0; i < tmpbits; i++) {
+ if (CPU_ISSET(i, cpuset)) {
+ set_bit(i, *host_cpus);
+ }
+ }
+ CPU_FREE(cpuset);
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
void qemu_thread_get_self(QemuThread *thread)
{
thread->thread = pthread_self();
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index b9a467d..69db254 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -477,6 +477,18 @@ void qemu_thread_create(QemuThread *thread, const char *name,
thread->data = data;
}
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+ unsigned long nbits)
+{
+ return -ENOSYS;
+}
+
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+ unsigned long *nbits)
+{
+ return -ENOSYS;
+}
+
void qemu_thread_get_self(QemuThread *thread)
{
thread->data = qemu_thread_data;
diff --git a/util/thread-context.c b/util/thread-context.c
new file mode 100644
index 0000000..4138245
--- /dev/null
+++ b/util/thread-context.c
@@ -0,0 +1,362 @@
+/*
+ * QEMU Thread Context
+ *
+ * Copyright Red Hat Inc., 2022
+ *
+ * Authors:
+ * David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread-context.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/visitor.h"
+#include "qemu/config-file.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qom/object_interfaces.h"
+#include "qemu/module.h"
+#include "qemu/bitmap.h"
+
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#endif
+
+enum {
+ TC_CMD_NONE = 0,
+ TC_CMD_STOP,
+ TC_CMD_NEW,
+};
+
+typedef struct ThreadContextCmdNew {
+ QemuThread *thread;
+ const char *name;
+ void *(*start_routine)(void *);
+ void *arg;
+ int mode;
+} ThreadContextCmdNew;
+
+static void *thread_context_run(void *opaque)
+{
+ ThreadContext *tc = opaque;
+
+ tc->thread_id = qemu_get_thread_id();
+ qemu_sem_post(&tc->sem);
+
+ while (true) {
+ /*
+ * Threads inherit the CPU affinity of the creating thread. For this
+ * reason, we create new (especially short-lived) threads from our
+ * persistent context thread.
+ *
+ * Especially when QEMU is not allowed to set the affinity itself,
+ * management tools can simply set the affinity of the context thread
+ * after creating the context, to have new threads created via
+ * the context inherit the CPU affinity automatically.
+ */
+ switch (tc->thread_cmd) {
+ case TC_CMD_NONE:
+ break;
+ case TC_CMD_STOP:
+ tc->thread_cmd = TC_CMD_NONE;
+ qemu_sem_post(&tc->sem);
+ return NULL;
+ case TC_CMD_NEW: {
+ ThreadContextCmdNew *cmd_new = tc->thread_cmd_data;
+
+ qemu_thread_create(cmd_new->thread, cmd_new->name,
+ cmd_new->start_routine, cmd_new->arg,
+ cmd_new->mode);
+ tc->thread_cmd = TC_CMD_NONE;
+ tc->thread_cmd_data = NULL;
+ qemu_sem_post(&tc->sem);
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+ qemu_sem_wait(&tc->sem_thread);
+ }
+}
+
+static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+ uint16List *l, *host_cpus = NULL;
+ unsigned long *bitmap = NULL;
+ int nbits = 0, ret;
+ Error *err = NULL;
+
+ if (tc->init_cpu_bitmap) {
+ error_setg(errp, "Mixing CPU and node affinity not supported");
+ return;
+ }
+
+ visit_type_uint16List(v, name, &host_cpus, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ if (!host_cpus) {
+ error_setg(errp, "CPU list is empty");
+ goto out;
+ }
+
+ for (l = host_cpus; l; l = l->next) {
+ nbits = MAX(nbits, l->value + 1);
+ }
+ bitmap = bitmap_new(nbits);
+ for (l = host_cpus; l; l = l->next) {
+ set_bit(l->value, bitmap);
+ }
+
+ if (tc->thread_id != -1) {
+ /*
+ * Note: we won't be adjusting the affinity of any thread that is still
+ * around, but only the affinity of the context thread.
+ */
+ ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
+ if (ret) {
+ error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+ }
+ } else {
+ tc->init_cpu_bitmap = bitmap;
+ bitmap = NULL;
+ tc->init_cpu_nbits = nbits;
+ }
+out:
+ g_free(bitmap);
+ qapi_free_uint16List(host_cpus);
+}
+
+static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ unsigned long *bitmap, nbits, value;
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+ uint16List *host_cpus = NULL;
+ uint16List **tail = &host_cpus;
+ int ret;
+
+ if (tc->thread_id == -1) {
+ error_setg(errp, "Object not initialized yet");
+ return;
+ }
+
+ ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits);
+ if (ret) {
+ error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret));
+ return;
+ }
+
+ value = find_first_bit(bitmap, nbits);
+ while (value < nbits) {
+ QAPI_LIST_APPEND(tail, value);
+
+ value = find_next_bit(bitmap, nbits, value + 1);
+ }
+ g_free(bitmap);
+
+ visit_type_uint16List(v, name, &host_cpus, errp);
+ qapi_free_uint16List(host_cpus);
+}
+
+static void thread_context_set_node_affinity(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+#ifdef CONFIG_NUMA
+ const int nbits = numa_num_possible_cpus();
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+ uint16List *l, *host_nodes = NULL;
+ unsigned long *bitmap = NULL;
+ struct bitmask *tmp_cpus;
+ Error *err = NULL;
+ int ret, i;
+
+ if (tc->init_cpu_bitmap) {
+ error_setg(errp, "Mixing CPU and node affinity not supported");
+ return;
+ }
+
+ visit_type_uint16List(v, name, &host_nodes, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ if (!host_nodes) {
+ error_setg(errp, "Node list is empty");
+ goto out;
+ }
+
+ bitmap = bitmap_new(nbits);
+ tmp_cpus = numa_allocate_cpumask();
+ for (l = host_nodes; l; l = l->next) {
+ numa_bitmask_clearall(tmp_cpus);
+ ret = numa_node_to_cpus(l->value, tmp_cpus);
+ if (ret) {
+ /* We ignore any errors, such as impossible nodes. */
+ continue;
+ }
+ for (i = 0; i < nbits; i++) {
+ if (numa_bitmask_isbitset(tmp_cpus, i)) {
+ set_bit(i, bitmap);
+ }
+ }
+ }
+ numa_free_cpumask(tmp_cpus);
+
+ if (bitmap_empty(bitmap, nbits)) {
+ error_setg(errp, "The nodes select no CPUs");
+ goto out;
+ }
+
+ if (tc->thread_id != -1) {
+ /*
+ * Note: we won't be adjusting the affinity of any thread that is still
+ * around for now, but only the affinity of the context thread.
+ */
+ ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
+ if (ret) {
+ error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+ }
+ } else {
+ tc->init_cpu_bitmap = bitmap;
+ bitmap = NULL;
+ tc->init_cpu_nbits = nbits;
+ }
+out:
+ g_free(bitmap);
+ qapi_free_uint16List(host_nodes);
+#else
+ error_setg(errp, "NUMA node affinity is not supported by this QEMU");
+#endif
+}
+
+static void thread_context_get_thread_id(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+ uint64_t value = tc->thread_id;
+
+ visit_type_uint64(v, name, &value, errp);
+}
+
+static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
+{
+ ThreadContext *tc = THREAD_CONTEXT(uc);
+ char *thread_name;
+ int ret;
+
+ thread_name = g_strdup_printf("TC %s",
+ object_get_canonical_path_component(OBJECT(uc)));
+ qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc,
+ QEMU_THREAD_JOINABLE);
+ g_free(thread_name);
+
+ /* Wait until initialization of the thread is done. */
+ while (tc->thread_id == -1) {
+ qemu_sem_wait(&tc->sem);
+ }
+
+ if (tc->init_cpu_bitmap) {
+ ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap,
+ tc->init_cpu_nbits);
+ if (ret) {
+ error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+ }
+ g_free(tc->init_cpu_bitmap);
+ tc->init_cpu_bitmap = NULL;
+ }
+}
+
+static void thread_context_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->complete = thread_context_instance_complete;
+ object_class_property_add(oc, "thread-id", "int",
+ thread_context_get_thread_id, NULL, NULL,
+ NULL);
+ object_class_property_add(oc, "cpu-affinity", "int",
+ thread_context_get_cpu_affinity,
+ thread_context_set_cpu_affinity, NULL, NULL);
+ object_class_property_add(oc, "node-affinity", "int", NULL,
+ thread_context_set_node_affinity, NULL, NULL);
+}
+
+static void thread_context_instance_init(Object *obj)
+{
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+
+ tc->thread_id = -1;
+ qemu_sem_init(&tc->sem, 0);
+ qemu_sem_init(&tc->sem_thread, 0);
+ qemu_mutex_init(&tc->mutex);
+}
+
+static void thread_context_instance_finalize(Object *obj)
+{
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+
+ if (tc->thread_id != -1) {
+ tc->thread_cmd = TC_CMD_STOP;
+ qemu_sem_post(&tc->sem_thread);
+ qemu_thread_join(&tc->thread);
+ }
+ qemu_sem_destroy(&tc->sem);
+ qemu_sem_destroy(&tc->sem_thread);
+ qemu_mutex_destroy(&tc->mutex);
+}
+
+static const TypeInfo thread_context_info = {
+ .name = TYPE_THREAD_CONTEXT,
+ .parent = TYPE_OBJECT,
+ .class_init = thread_context_class_init,
+ .instance_size = sizeof(ThreadContext),
+ .instance_init = thread_context_instance_init,
+ .instance_finalize = thread_context_instance_finalize,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void thread_context_register_types(void)
+{
+ type_register_static(&thread_context_info);
+}
+type_init(thread_context_register_types)
+
+void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
+ const char *name,
+ void *(*start_routine)(void *), void *arg,
+ int mode)
+{
+ ThreadContextCmdNew data = {
+ .thread = thread,
+ .name = name,
+ .start_routine = start_routine,
+ .arg = arg,
+ .mode = mode,
+ };
+
+ qemu_mutex_lock(&tc->mutex);
+ tc->thread_cmd = TC_CMD_NEW;
+ tc->thread_cmd_data = &data;
+ qemu_sem_post(&tc->sem_thread);
+
+ while (tc->thread_cmd != TC_CMD_NONE) {
+ qemu_sem_wait(&tc->sem);
+ }
+ qemu_mutex_unlock(&tc->mutex);
+}