diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-03-09 17:02:48 -0600 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2021-06-11 09:26:28 -0700 |
commit | c46184a90a5a0209960b7c0813aff4feb1e373e1 (patch) | |
tree | af5c0fba9d39fd31386b8e3cf47b18c4beab22a2 | |
parent | 324b9d462ea227f10a25c80421046e6187247116 (diff) | |
download | qemu-c46184a90a5a0209960b7c0813aff4feb1e373e1.zip qemu-c46184a90a5a0209960b7c0813aff4feb1e373e1.tar.gz qemu-c46184a90a5a0209960b7c0813aff4feb1e373e1.tar.bz2 |
accel/tcg: Move alloc_code_gen_buffer to tcg/region.c
Buffer management is integral to tcg. Do not leave the allocation
to code outside of tcg/. This is code movement, with further
cleanups to follow.
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | accel/tcg/translate-all.c | 414 | ||||
-rw-r--r-- | include/tcg/tcg.h | 2 | ||||
-rw-r--r-- | tcg/region.c | 431 |
3 files changed, 428 insertions, 419 deletions
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 337fbb1..ad7a25d 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" #include "qemu-common.h" #define NO_CPU_IO_DEFS @@ -49,7 +48,6 @@ #include "exec/cputlb.h" #include "exec/translate-all.h" #include "qemu/bitmap.h" -#include "qemu/error-report.h" #include "qemu/qemu-print.h" #include "qemu/timer.h" #include "qemu/main-loop.h" @@ -895,408 +893,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, } } -/* Minimum size of the code gen buffer. This number is randomly chosen, - but not so small that we can't have a fair number of TB's live. */ -#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) - -/* Maximum size of the code gen buffer we'd like to use. Unless otherwise - indicated, this is constrained by the range of direct branches on the - host cpu, as used by the TCG implementation of goto_tb. */ -#if defined(__x86_64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__sparc__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__powerpc64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__powerpc__) -# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) -#elif defined(__aarch64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#elif defined(__s390x__) - /* We have a +- 4GB range on the branches; leave some slop. */ -# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) -#elif defined(__mips__) - /* We have a 256MB branch region, but leave room to make sure the - main executable is also within that region. */ -# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB) -#else -# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) -#endif - -#if TCG_TARGET_REG_BITS == 32 -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) -#ifdef CONFIG_USER_ONLY -/* - * For user mode on smaller 32 bit systems we may run into trouble - * allocating big chunks of data in the right place. On these systems - * we utilise a static code generation buffer directly in the binary. - */ -#define USE_STATIC_CODE_GEN_BUFFER -#endif -#else /* TCG_TARGET_REG_BITS == 64 */ -#ifdef CONFIG_USER_ONLY -/* - * As user-mode emulation typically means running multiple instances - * of the translator don't go too nuts with our default code gen - * buffer lest we make things too hard for the OS. - */ -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) -#else -/* - * We expect most system emulation to run one or two guests per host. - * Users running large scale system emulation may want to tweak their - * runtime setup via the tb-size control on the command line. - */ -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) -#endif -#endif - -#define DEFAULT_CODE_GEN_BUFFER_SIZE \ - (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ - ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) - -static size_t size_code_gen_buffer(size_t tb_size) -{ - /* Size the buffer. */ - if (tb_size == 0) { - size_t phys_mem = qemu_get_host_physmem(); - if (phys_mem == 0) { - tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; - } else { - tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8); - } - } - if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { - tb_size = MIN_CODE_GEN_BUFFER_SIZE; - } - if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { - tb_size = MAX_CODE_GEN_BUFFER_SIZE; - } - return tb_size; -} - -#ifdef __mips__ -/* In order to use J and JAL within the code_gen_buffer, we require - that the buffer not cross a 256MB boundary. */ -static inline bool cross_256mb(void *addr, size_t size) -{ - return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful; -} - -/* We weren't able to allocate a buffer without crossing that boundary, - so make do with the larger portion of the buffer that doesn't cross. - Returns the new base of the buffer, and adjusts code_gen_buffer_size. */ -static inline void *split_cross_256mb(void *buf1, size_t size1) -{ - void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful); - size_t size2 = buf1 + size1 - buf2; - - size1 = buf2 - buf1; - if (size1 < size2) { - size1 = size2; - buf1 = buf2; - } - - tcg_ctx->code_gen_buffer_size = size1; - return buf1; -} -#endif - -#ifdef USE_STATIC_CODE_GEN_BUFFER -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] - __attribute__((aligned(CODE_GEN_ALIGN))); - -static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) -{ - void *buf, *end; - size_t size; - - if (splitwx > 0) { - error_setg(errp, "jit split-wx not supported"); - return false; - } - - /* page-align the beginning and end of the buffer */ - buf = static_code_gen_buffer; - end = static_code_gen_buffer + sizeof(static_code_gen_buffer); - buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); - end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); - - size = end - buf; - - /* Honor a command-line option limiting the size of the buffer. */ - if (size > tb_size) { - size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size); - } - tcg_ctx->code_gen_buffer_size = size; - -#ifdef __mips__ - if (cross_256mb(buf, size)) { - buf = split_cross_256mb(buf, size); - size = tcg_ctx->code_gen_buffer_size; - } -#endif - - if (qemu_mprotect_rwx(buf, size)) { - error_setg_errno(errp, errno, "mprotect of jit buffer"); - return false; - } - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - - tcg_ctx->code_gen_buffer = buf; - return true; -} -#elif defined(_WIN32) -static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) -{ - void *buf; - - if (splitwx > 0) { - error_setg(errp, "jit split-wx not supported"); - return false; - } - - buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, - PAGE_EXECUTE_READWRITE); - if (buf == NULL) { - error_setg_win32(errp, GetLastError(), - "allocate %zu bytes for jit buffer", size); - return false; - } - - tcg_ctx->code_gen_buffer = buf; - tcg_ctx->code_gen_buffer_size = size; - return true; -} -#else -static bool alloc_code_gen_buffer_anon(size_t size, int prot, - int flags, Error **errp) -{ - void *buf; - - buf = mmap(NULL, size, prot, flags, -1, 0); - if (buf == MAP_FAILED) { - error_setg_errno(errp, errno, - "allocate %zu bytes for jit buffer", size); - return false; - } - tcg_ctx->code_gen_buffer_size = size; - -#ifdef __mips__ - if (cross_256mb(buf, size)) { - /* - * Try again, with the original still mapped, to avoid re-acquiring - * the same 256mb crossing. - */ - size_t size2; - void *buf2 = mmap(NULL, size, prot, flags, -1, 0); - switch ((int)(buf2 != MAP_FAILED)) { - case 1: - if (!cross_256mb(buf2, size)) { - /* Success! Use the new buffer. */ - munmap(buf, size); - break; - } - /* Failure. Work with what we had. */ - munmap(buf2, size); - /* fallthru */ - default: - /* Split the original buffer. Free the smaller half. */ - buf2 = split_cross_256mb(buf, size); - size2 = tcg_ctx->code_gen_buffer_size; - if (buf == buf2) { - munmap(buf + size2, size - size2); - } else { - munmap(buf, size - size2); - } - size = size2; - break; - } - buf = buf2; - } -#endif - - /* Request large pages for the buffer. */ - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - - tcg_ctx->code_gen_buffer = buf; - return true; -} - -#ifndef CONFIG_TCG_INTERPRETER -#ifdef CONFIG_POSIX -#include "qemu/memfd.h" - -static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) -{ - void *buf_rw = NULL, *buf_rx = MAP_FAILED; - int fd = -1; - -#ifdef __mips__ - /* Find space for the RX mapping, vs the 256MiB regions. */ - if (!alloc_code_gen_buffer_anon(size, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | - MAP_NORESERVE, errp)) { - return false; - } - /* The size of the mapping may have been adjusted. */ - size = tcg_ctx->code_gen_buffer_size; - buf_rx = tcg_ctx->code_gen_buffer; -#endif - - buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); - if (buf_rw == NULL) { - goto fail; - } - -#ifdef __mips__ - void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC, - MAP_SHARED | MAP_FIXED, fd, 0); - if (tmp != buf_rx) { - goto fail_rx; - } -#else - buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); - if (buf_rx == MAP_FAILED) { - goto fail_rx; - } -#endif - - close(fd); - tcg_ctx->code_gen_buffer = buf_rw; - tcg_ctx->code_gen_buffer_size = size; - tcg_splitwx_diff = buf_rx - buf_rw; - - /* Request large pages for the buffer and the splitwx. */ - qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE); - qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE); - return true; - - fail_rx: - error_setg_errno(errp, errno, "failed to map shared memory for execute"); - fail: - if (buf_rx != MAP_FAILED) { - munmap(buf_rx, size); - } - if (buf_rw) { - munmap(buf_rw, size); - } - if (fd >= 0) { - close(fd); - } - return false; -} -#endif /* CONFIG_POSIX */ - -#ifdef CONFIG_DARWIN -#include <mach/mach.h> - -extern kern_return_t mach_vm_remap(vm_map_t target_task, - mach_vm_address_t *target_address, - mach_vm_size_t size, - mach_vm_offset_t mask, - int flags, - vm_map_t src_task, - mach_vm_address_t src_address, - boolean_t copy, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance); - -static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) -{ - kern_return_t ret; - mach_vm_address_t buf_rw, buf_rx; - vm_prot_t cur_prot, max_prot; - - /* Map the read-write portion via normal anon memory. */ - if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, errp)) { - return false; - } - - buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer; - buf_rx = 0; - ret = mach_vm_remap(mach_task_self(), - &buf_rx, - size, - 0, - VM_FLAGS_ANYWHERE, - mach_task_self(), - buf_rw, - false, - &cur_prot, - &max_prot, - VM_INHERIT_NONE); - if (ret != KERN_SUCCESS) { - /* TODO: Convert "ret" to a human readable error message. */ - error_setg(errp, "vm_remap for jit splitwx failed"); - munmap((void *)buf_rw, size); - return false; - } - - if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { - error_setg_errno(errp, errno, "mprotect for jit splitwx"); - munmap((void *)buf_rx, size); - munmap((void *)buf_rw, size); - return false; - } - - tcg_splitwx_diff = buf_rx - buf_rw; - return true; -} -#endif /* CONFIG_DARWIN */ -#endif /* CONFIG_TCG_INTERPRETER */ - -static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp) -{ -#ifndef CONFIG_TCG_INTERPRETER -# ifdef CONFIG_DARWIN - return alloc_code_gen_buffer_splitwx_vmremap(size, errp); -# endif -# ifdef CONFIG_POSIX - return alloc_code_gen_buffer_splitwx_memfd(size, errp); -# endif -#endif - error_setg(errp, "jit split-wx not supported"); - return false; -} - -static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) -{ - ERRP_GUARD(); - int prot, flags; - - if (splitwx) { - if (alloc_code_gen_buffer_splitwx(size, errp)) { - return true; - } - /* - * If splitwx force-on (1), fail; - * if splitwx default-on (-1), fall through to splitwx off. - */ - if (splitwx > 0) { - return false; - } - error_free_or_abort(errp); - } - - prot = PROT_READ | PROT_WRITE | PROT_EXEC; - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#ifdef CONFIG_TCG_INTERPRETER - /* The tcg interpreter does not need execute permission. */ - prot = PROT_READ | PROT_WRITE; -#elif defined(CONFIG_DARWIN) - /* Applicable to both iOS and macOS (Apple Silicon). */ - if (!splitwx) { - flags |= MAP_JIT; - } -#endif - - return alloc_code_gen_buffer_anon(size, prot, flags, errp); -} -#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ - static bool tb_cmp(const void *ap, const void *bp) { const TranslationBlock *a = ap; @@ -1323,19 +919,11 @@ static void tb_htable_init(void) size. */ void tcg_exec_init(unsigned long tb_size, int splitwx) { - bool ok; - tcg_allowed = true; tcg_context_init(&tcg_init_ctx); page_init(); tb_htable_init(); - - ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size), - splitwx, &error_fatal); - assert(ok); - - /* TODO: allocating regions is hand-in-glove with code_gen_buffer. */ - tcg_region_init(); + tcg_region_init(tb_size, splitwx); #if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 74cb345..834785f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -873,7 +873,7 @@ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); TranslationBlock *tcg_tb_alloc(TCGContext *s); -void tcg_region_init(void); +void tcg_region_init(size_t tb_size, int splitwx); void tb_destroy(TranslationBlock *tb); void tcg_region_reset_all(void); diff --git a/tcg/region.c b/tcg/region.c index 6e34fcf..162b4d6 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -23,6 +23,8 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" #include "exec/exec-all.h" #include "tcg/tcg.h" #if !defined(CONFIG_USER_ONLY) @@ -407,6 +409,418 @@ static size_t tcg_n_regions(void) #endif /* + * Minimum size of the code gen buffer. This number is randomly chosen, + * but not so small that we can't have a fair number of TB's live. + */ +#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) + +/* + * Maximum size of the code gen buffer we'd like to use. Unless otherwise + * indicated, this is constrained by the range of direct branches on the + * host cpu, as used by the TCG implementation of goto_tb. + */ +#if defined(__x86_64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#elif defined(__sparc__) +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#elif defined(__powerpc64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#elif defined(__powerpc__) +# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) +#elif defined(__aarch64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#elif defined(__s390x__) + /* We have a +- 4GB range on the branches; leave some slop. */ +# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) +#elif defined(__mips__) + /* + * We have a 256MB branch region, but leave room to make sure the + * main executable is also within that region. + */ +# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB) +#else +# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) +#endif + +#if TCG_TARGET_REG_BITS == 32 +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) +#ifdef CONFIG_USER_ONLY +/* + * For user mode on smaller 32 bit systems we may run into trouble + * allocating big chunks of data in the right place. On these systems + * we utilise a static code generation buffer directly in the binary. + */ +#define USE_STATIC_CODE_GEN_BUFFER +#endif +#else /* TCG_TARGET_REG_BITS == 64 */ +#ifdef CONFIG_USER_ONLY +/* + * As user-mode emulation typically means running multiple instances + * of the translator don't go too nuts with our default code gen + * buffer lest we make things too hard for the OS. + */ +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) +#else +/* + * We expect most system emulation to run one or two guests per host. + * Users running large scale system emulation may want to tweak their + * runtime setup via the tb-size control on the command line. + */ +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) +#endif +#endif + +#define DEFAULT_CODE_GEN_BUFFER_SIZE \ + (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ + ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) + +static size_t size_code_gen_buffer(size_t tb_size) +{ + /* Size the buffer. */ + if (tb_size == 0) { + size_t phys_mem = qemu_get_host_physmem(); + if (phys_mem == 0) { + tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; + } else { + tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8); + } + } + if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { + tb_size = MIN_CODE_GEN_BUFFER_SIZE; + } + if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { + tb_size = MAX_CODE_GEN_BUFFER_SIZE; + } + return tb_size; +} + +#ifdef __mips__ +/* + * In order to use J and JAL within the code_gen_buffer, we require + * that the buffer not cross a 256MB boundary. + */ +static inline bool cross_256mb(void *addr, size_t size) +{ + return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful; +} + +/* + * We weren't able to allocate a buffer without crossing that boundary, + * so make do with the larger portion of the buffer that doesn't cross. + * Returns the new base of the buffer, and adjusts code_gen_buffer_size. + */ +static inline void *split_cross_256mb(void *buf1, size_t size1) +{ + void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful); + size_t size2 = buf1 + size1 - buf2; + + size1 = buf2 - buf1; + if (size1 < size2) { + size1 = size2; + buf1 = buf2; + } + + tcg_ctx->code_gen_buffer_size = size1; + return buf1; +} +#endif + +#ifdef USE_STATIC_CODE_GEN_BUFFER +static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] + __attribute__((aligned(CODE_GEN_ALIGN))); + +static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) +{ + void *buf, *end; + size_t size; + + if (splitwx > 0) { + error_setg(errp, "jit split-wx not supported"); + return false; + } + + /* page-align the beginning and end of the buffer */ + buf = static_code_gen_buffer; + end = static_code_gen_buffer + sizeof(static_code_gen_buffer); + buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); + end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); + + size = end - buf; + + /* Honor a command-line option limiting the size of the buffer. */ + if (size > tb_size) { + size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size); + } + tcg_ctx->code_gen_buffer_size = size; + +#ifdef __mips__ + if (cross_256mb(buf, size)) { + buf = split_cross_256mb(buf, size); + size = tcg_ctx->code_gen_buffer_size; + } +#endif + + if (qemu_mprotect_rwx(buf, size)) { + error_setg_errno(errp, errno, "mprotect of jit buffer"); + return false; + } + qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); + + tcg_ctx->code_gen_buffer = buf; + return true; +} +#elif defined(_WIN32) +static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) +{ + void *buf; + + if (splitwx > 0) { + error_setg(errp, "jit split-wx not supported"); + return false; + } + + buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + if (buf == NULL) { + error_setg_win32(errp, GetLastError(), + "allocate %zu bytes for jit buffer", size); + return false; + } + + tcg_ctx->code_gen_buffer = buf; + tcg_ctx->code_gen_buffer_size = size; + return true; +} +#else +static bool alloc_code_gen_buffer_anon(size_t size, int prot, + int flags, Error **errp) +{ + void *buf; + + buf = mmap(NULL, size, prot, flags, -1, 0); + if (buf == MAP_FAILED) { + error_setg_errno(errp, errno, + "allocate %zu bytes for jit buffer", size); + return false; + } + tcg_ctx->code_gen_buffer_size = size; + +#ifdef __mips__ + if (cross_256mb(buf, size)) { + /* + * Try again, with the original still mapped, to avoid re-acquiring + * the same 256mb crossing. + */ + size_t size2; + void *buf2 = mmap(NULL, size, prot, flags, -1, 0); + switch ((int)(buf2 != MAP_FAILED)) { + case 1: + if (!cross_256mb(buf2, size)) { + /* Success! Use the new buffer. */ + munmap(buf, size); + break; + } + /* Failure. Work with what we had. */ + munmap(buf2, size); + /* fallthru */ + default: + /* Split the original buffer. Free the smaller half. */ + buf2 = split_cross_256mb(buf, size); + size2 = tcg_ctx->code_gen_buffer_size; + if (buf == buf2) { + munmap(buf + size2, size - size2); + } else { + munmap(buf, size - size2); + } + size = size2; + break; + } + buf = buf2; + } +#endif + + /* Request large pages for the buffer. */ + qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); + + tcg_ctx->code_gen_buffer = buf; + return true; +} + +#ifndef CONFIG_TCG_INTERPRETER +#ifdef CONFIG_POSIX +#include "qemu/memfd.h" + +static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) +{ + void *buf_rw = NULL, *buf_rx = MAP_FAILED; + int fd = -1; + +#ifdef __mips__ + /* Find space for the RX mapping, vs the 256MiB regions. */ + if (!alloc_code_gen_buffer_anon(size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | + MAP_NORESERVE, errp)) { + return false; + } + /* The size of the mapping may have been adjusted. */ + size = tcg_ctx->code_gen_buffer_size; + buf_rx = tcg_ctx->code_gen_buffer; +#endif + + buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); + if (buf_rw == NULL) { + goto fail; + } + +#ifdef __mips__ + void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC, + MAP_SHARED | MAP_FIXED, fd, 0); + if (tmp != buf_rx) { + goto fail_rx; + } +#else + buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + if (buf_rx == MAP_FAILED) { + goto fail_rx; + } +#endif + + close(fd); + tcg_ctx->code_gen_buffer = buf_rw; + tcg_ctx->code_gen_buffer_size = size; + tcg_splitwx_diff = buf_rx - buf_rw; + + /* Request large pages for the buffer and the splitwx. */ + qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE); + qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE); + return true; + + fail_rx: + error_setg_errno(errp, errno, "failed to map shared memory for execute"); + fail: + if (buf_rx != MAP_FAILED) { + munmap(buf_rx, size); + } + if (buf_rw) { + munmap(buf_rw, size); + } + if (fd >= 0) { + close(fd); + } + return false; +} +#endif /* CONFIG_POSIX */ + +#ifdef CONFIG_DARWIN +#include <mach/mach.h> + +extern kern_return_t mach_vm_remap(vm_map_t target_task, + mach_vm_address_t *target_address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_task, + mach_vm_address_t src_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance); + +static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) +{ + kern_return_t ret; + mach_vm_address_t buf_rw, buf_rx; + vm_prot_t cur_prot, max_prot; + + /* Map the read-write portion via normal anon memory. */ + if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, errp)) { + return false; + } + + buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer; + buf_rx = 0; + ret = mach_vm_remap(mach_task_self(), + &buf_rx, + size, + 0, + VM_FLAGS_ANYWHERE, + mach_task_self(), + buf_rw, + false, + &cur_prot, + &max_prot, + VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + /* TODO: Convert "ret" to a human readable error message. */ + error_setg(errp, "vm_remap for jit splitwx failed"); + munmap((void *)buf_rw, size); + return false; + } + + if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { + error_setg_errno(errp, errno, "mprotect for jit splitwx"); + munmap((void *)buf_rx, size); + munmap((void *)buf_rw, size); + return false; + } + + tcg_splitwx_diff = buf_rx - buf_rw; + return true; +} +#endif /* CONFIG_DARWIN */ +#endif /* CONFIG_TCG_INTERPRETER */ + +static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp) +{ +#ifndef CONFIG_TCG_INTERPRETER +# ifdef CONFIG_DARWIN + return alloc_code_gen_buffer_splitwx_vmremap(size, errp); +# endif +# ifdef CONFIG_POSIX + return alloc_code_gen_buffer_splitwx_memfd(size, errp); +# endif +#endif + error_setg(errp, "jit split-wx not supported"); + return false; +} + +static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) +{ + ERRP_GUARD(); + int prot, flags; + + if (splitwx) { + if (alloc_code_gen_buffer_splitwx(size, errp)) { + return true; + } + /* + * If splitwx force-on (1), fail; + * if splitwx default-on (-1), fall through to splitwx off. + */ + if (splitwx > 0) { + return false; + } + error_free_or_abort(errp); + } + + prot = PROT_READ | PROT_WRITE | PROT_EXEC; + flags = MAP_PRIVATE | MAP_ANONYMOUS; +#ifdef CONFIG_TCG_INTERPRETER + /* The tcg interpreter does not need execute permission. */ + prot = PROT_READ | PROT_WRITE; +#elif defined(CONFIG_DARWIN) + /* Applicable to both iOS and macOS (Apple Silicon). */ + if (!splitwx) { + flags |= MAP_JIT; + } +#endif + + return alloc_code_gen_buffer_anon(size, prot, flags, errp); +} +#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ + +/* * Initializes region partitioning. * * Called at init time from the parent thread (i.e. the one calling @@ -434,16 +848,23 @@ static size_t tcg_n_regions(void) * in practice. Multi-threaded guests share most if not all of their translated * code, which makes parallel code generation less appealing than in softmmu. */ -void tcg_region_init(void) +void tcg_region_init(size_t tb_size, int splitwx) { - void *buf = tcg_init_ctx.code_gen_buffer; - void *aligned; - size_t size = tcg_init_ctx.code_gen_buffer_size; - size_t page_size = qemu_real_host_page_size; + void *buf, *aligned; + size_t size; + size_t page_size; size_t region_size; size_t n_regions; size_t i; + bool ok; + + ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size), + splitwx, &error_fatal); + assert(ok); + buf = tcg_init_ctx.code_gen_buffer; + size = tcg_init_ctx.code_gen_buffer_size; + page_size = qemu_real_host_page_size; n_regions = tcg_n_regions(); /* The first region will be 'aligned - buf' bytes larger than the others */ |