diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/oslib-posix.c | 232 |
1 files changed, 177 insertions, 55 deletions
diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e8bdb02..9efdc74 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -35,11 +35,13 @@ #include "sysemu/sysemu.h" #include "trace.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qemu/sockets.h" #include "qemu/thread.h" #include <libgen.h> #include "qemu/cutils.h" #include "qemu/compiler.h" +#include "qemu/units.h" #ifdef CONFIG_LINUX #include <sys/syscall.h> @@ -73,22 +75,32 @@ #define MAX_MEM_PREALLOC_THREAD_COUNT 16 +struct MemsetThread; + +typedef struct MemsetContext { + bool all_threads_created; + bool any_thread_failed; + struct MemsetThread *threads; + int num_threads; +} MemsetContext; + struct MemsetThread { char *addr; size_t numpages; size_t hpagesize; QemuThread pgthread; sigjmp_buf env; + MemsetContext *context; }; typedef struct MemsetThread MemsetThread; -static MemsetThread *memset_thread; -static int memset_num_threads; -static bool memset_thread_failed; +/* used by sigbus_handler() */ +static MemsetContext *sigbus_memset_context; +struct sigaction sigbus_oldact; +static QemuMutex sigbus_mutex; static QemuMutex page_mutex; static QemuCond page_cond; -static bool threads_created_flag; int qemu_get_thread_id(void) { @@ -436,22 +448,50 @@ const char *qemu_get_exec_dir(void) return exec_dir; } +#ifdef CONFIG_LINUX +static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx) +#else /* CONFIG_LINUX */ static void sigbus_handler(int signal) +#endif /* CONFIG_LINUX */ { int i; - if (memset_thread) { - for (i = 0; i < memset_num_threads; i++) { - if (qemu_thread_is_self(&memset_thread[i].pgthread)) { - siglongjmp(memset_thread[i].env, 1); + + if (sigbus_memset_context) { + for (i = 0; i < sigbus_memset_context->num_threads; i++) { + MemsetThread *thread = &sigbus_memset_context->threads[i]; + + if (qemu_thread_is_self(&thread->pgthread)) { + siglongjmp(thread->env, 1); } } } + +#ifdef CONFIG_LINUX + /* + * We assume that the MCE SIGBUS handler could have been registered. We + * should never receive BUS_MCEERR_AO on any of our threads, but only on + * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not + * receive BUS_MCEERR_AR triggered by action of other threads on one of + * our threads. So, no need to check for unrelated SIGBUS when seeing one + * for our threads. + * + * We will forward to the MCE handler, which will either handle the SIGBUS + * or reinstall the default SIGBUS handler and reraise the SIGBUS. The + * default SIGBUS handler will crash the process, so we don't care. + */ + if (sigbus_oldact.sa_flags & SA_SIGINFO) { + sigbus_oldact.sa_sigaction(signal, siginfo, ctx); + return; + } +#endif /* CONFIG_LINUX */ + warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored"); } static void *do_touch_pages(void *arg) { MemsetThread *memset_args = (MemsetThread *)arg; sigset_t set, oldset; + int ret = 0; /* * On Linux, the page faults from the loop below can cause mmap_sem @@ -459,7 +499,7 @@ static void *do_touch_pages(void *arg) * clearing until all threads have been created. */ qemu_mutex_lock(&page_mutex); - while(!threads_created_flag){ + while (!memset_args->context->all_threads_created) { qemu_cond_wait(&page_cond, &page_mutex); } qemu_mutex_unlock(&page_mutex); @@ -470,7 +510,7 @@ static void *do_touch_pages(void *arg) pthread_sigmask(SIG_UNBLOCK, &set, &oldset); if (sigsetjmp(memset_args->env, 1)) { - memset_thread_failed = true; + ret = -EFAULT; } else { char *addr = memset_args->addr; size_t numpages = memset_args->numpages; @@ -484,20 +524,37 @@ static void *do_touch_pages(void *arg) * * 'volatile' to stop compiler optimizing this away * to a no-op - * - * TODO: get a better solution from kernel so we - * don't need to write at all so we don't cause - * wear on the storage backing the region... */ *(volatile char *)addr = *addr; addr += hpagesize; } } pthread_sigmask(SIG_SETMASK, &oldset, NULL); - return NULL; + return (void *)(uintptr_t)ret; } -static inline int get_memset_num_threads(int smp_cpus) +static void *do_madv_populate_write_pages(void *arg) +{ + MemsetThread *memset_args = (MemsetThread *)arg; + const size_t size = memset_args->numpages * memset_args->hpagesize; + char * const addr = memset_args->addr; + int ret = 0; + + /* See do_touch_pages(). */ + qemu_mutex_lock(&page_mutex); + while (!memset_args->context->all_threads_created) { + qemu_cond_wait(&page_cond, &page_mutex); + } + qemu_mutex_unlock(&page_mutex); + + if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) { + ret = -errno; + } + return (void *)(uintptr_t)ret; +} + +static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, + int smp_cpus) { long host_procs = sysconf(_SC_NPROCESSORS_ONLN); int ret = 1; @@ -505,17 +562,27 @@ static inline int get_memset_num_threads(int smp_cpus) if (host_procs > 0) { ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus); } + + /* Especially with gigantic pages, don't create more threads than pages. */ + ret = MIN(ret, numpages); + /* Don't start threads to prealloc comparatively little memory. */ + ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB))); + /* In case sysconf() fails, we fall back to single threaded */ return ret; } -static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, - int smp_cpus) +static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + int smp_cpus, bool use_madv_populate_write) { static gsize initialized = 0; + MemsetContext context = { + .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus), + }; size_t numpages_per_thread, leftover; + void *(*touch_fn)(void *); + int ret = 0, i = 0; char *addr = area; - int i = 0; if (g_once_init_enter(&initialized)) { qemu_mutex_init(&page_mutex); @@ -523,66 +590,121 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, g_once_init_leave(&initialized, 1); } - memset_thread_failed = false; - threads_created_flag = false; - memset_num_threads = get_memset_num_threads(smp_cpus); - memset_thread = g_new0(MemsetThread, memset_num_threads); - numpages_per_thread = numpages / memset_num_threads; - leftover = numpages % memset_num_threads; - for (i = 0; i < memset_num_threads; i++) { - memset_thread[i].addr = addr; - memset_thread[i].numpages = numpages_per_thread + (i < leftover); - memset_thread[i].hpagesize = hpagesize; - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", - do_touch_pages, &memset_thread[i], + if (use_madv_populate_write) { + /* Avoid creating a single thread for MADV_POPULATE_WRITE */ + if (context.num_threads == 1) { + if (qemu_madvise(area, hpagesize * numpages, + QEMU_MADV_POPULATE_WRITE)) { + return -errno; + } + return 0; + } + touch_fn = do_madv_populate_write_pages; + } else { + touch_fn = do_touch_pages; + } + + context.threads = g_new0(MemsetThread, context.num_threads); + numpages_per_thread = numpages / context.num_threads; + leftover = numpages % context.num_threads; + for (i = 0; i < context.num_threads; i++) { + context.threads[i].addr = addr; + context.threads[i].numpages = numpages_per_thread + (i < leftover); + context.threads[i].hpagesize = hpagesize; + context.threads[i].context = &context; + qemu_thread_create(&context.threads[i].pgthread, "touch_pages", + touch_fn, &context.threads[i], QEMU_THREAD_JOINABLE); - addr += memset_thread[i].numpages * hpagesize; + addr += context.threads[i].numpages * hpagesize; + } + + if (!use_madv_populate_write) { + sigbus_memset_context = &context; } qemu_mutex_lock(&page_mutex); - threads_created_flag = true; + context.all_threads_created = true; qemu_cond_broadcast(&page_cond); qemu_mutex_unlock(&page_mutex); - for (i = 0; i < memset_num_threads; i++) { - qemu_thread_join(&memset_thread[i].pgthread); + for (i = 0; i < context.num_threads; i++) { + int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread); + + if (tmp) { + ret = tmp; + } } - g_free(memset_thread); - memset_thread = NULL; - return memset_thread_failed; + if (!use_madv_populate_write) { + sigbus_memset_context = NULL; + } + g_free(context.threads); + + return ret; +} + +static bool madv_populate_write_possible(char *area, size_t pagesize) +{ + return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) || + errno != EINVAL; } void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, Error **errp) { + static gsize initialized; int ret; - struct sigaction act, oldact; size_t hpagesize = qemu_fd_getpagesize(fd); size_t numpages = DIV_ROUND_UP(memory, hpagesize); + bool use_madv_populate_write; + struct sigaction act; - memset(&act, 0, sizeof(act)); - act.sa_handler = &sigbus_handler; - act.sa_flags = 0; + /* + * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for + * some special mappings, such as mapping /dev/mem. + */ + use_madv_populate_write = madv_populate_write_possible(area, hpagesize); - ret = sigaction(SIGBUS, &act, &oldact); - if (ret) { - error_setg_errno(errp, errno, - "os_mem_prealloc: failed to install signal handler"); - return; + if (!use_madv_populate_write) { + if (g_once_init_enter(&initialized)) { + qemu_mutex_init(&sigbus_mutex); + g_once_init_leave(&initialized, 1); + } + + qemu_mutex_lock(&sigbus_mutex); + memset(&act, 0, sizeof(act)); +#ifdef CONFIG_LINUX + act.sa_sigaction = &sigbus_handler; + act.sa_flags = SA_SIGINFO; +#else /* CONFIG_LINUX */ + act.sa_handler = &sigbus_handler; + act.sa_flags = 0; +#endif /* CONFIG_LINUX */ + + ret = sigaction(SIGBUS, &act, &sigbus_oldact); + if (ret) { + error_setg_errno(errp, errno, + "os_mem_prealloc: failed to install signal handler"); + return; + } } /* touch pages simultaneously */ - if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { - error_setg(errp, "os_mem_prealloc: Insufficient free host memory " - "pages available to allocate guest RAM"); + ret = touch_all_pages(area, hpagesize, numpages, smp_cpus, + use_madv_populate_write); + if (ret) { + error_setg_errno(errp, -ret, + "os_mem_prealloc: preallocating memory failed"); } - ret = sigaction(SIGBUS, &oldact, NULL); - if (ret) { - /* Terminate QEMU since it can't recover from error */ - perror("os_mem_prealloc: failed to reinstall signal handler"); - exit(1); + if (!use_madv_populate_write) { + ret = sigaction(SIGBUS, &sigbus_oldact, NULL); + if (ret) { + /* Terminate QEMU since it can't recover from error */ + perror("os_mem_prealloc: failed to reinstall signal handler"); + exit(1); + } + qemu_mutex_unlock(&sigbus_mutex); } } |