diff options
Diffstat (limited to 'accel')
58 files changed, 1914 insertions, 1768 deletions
diff --git a/accel/Kconfig b/accel/Kconfig index 794e0d1..4263cab 100644 --- a/accel/Kconfig +++ b/accel/Kconfig @@ -16,4 +16,5 @@ config KVM config XEN bool select FSDEV_9P if VIRTFS + select PCI_EXPRESS_GENERIC_BRIDGE select XEN_BUS diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c index e083f24..51132d1 100644 --- a/accel/accel-blocker.c +++ b/accel/accel-blocker.c @@ -25,10 +25,11 @@ */ #include "qemu/osdep.h" +#include "qemu/lockcnt.h" #include "qemu/thread.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" -#include "sysemu/accel-blocker.h" +#include "system/accel-blocker.h" static QemuLockCnt accel_in_ioctl_lock; static QemuEvent accel_in_ioctl_event; diff --git a/accel/accel-common.c b/accel/accel-common.c new file mode 100644 index 0000000..4894b98 --- /dev/null +++ b/accel/accel-common.c @@ -0,0 +1,142 @@ +/* + * QEMU accel class, components common to system emulation and user mode + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2014 Red Hat Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "qemu/osdep.h" +#include "qemu/accel.h" +#include "qemu/target-info.h" +#include "accel/accel-cpu.h" +#include "accel-internal.h" + +/* Lookup AccelClass from opt_name. Returns NULL if not found */ +AccelClass *accel_find(const char *opt_name) +{ + char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name); + AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name)); + g_free(class_name); + return ac; +} + +/* Return the name of the current accelerator */ +const char *current_accel_name(void) +{ + AccelClass *ac = ACCEL_GET_CLASS(current_accel()); + + return ac->name; +} + +static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque) +{ + CPUClass *cc = CPU_CLASS(klass); + AccelCPUClass *accel_cpu = opaque; + + /* + * The first callback allows accel-cpu to run initializations + * for the CPU, customizing CPU behavior according to the accelerator. + * + * The second one allows the CPU to customize the accel-cpu + * behavior according to the CPU. + * + * The second is currently only used by TCG, to specialize the + * TCGCPUOps depending on the CPU type. + */ + cc->accel_cpu = accel_cpu; + if (accel_cpu->cpu_class_init) { + accel_cpu->cpu_class_init(cc); + } + if (cc->init_accel_cpu) { + cc->init_accel_cpu(accel_cpu, cc); + } +} + +/* initialize the arch-specific accel CpuClass interfaces */ +static void accel_init_cpu_interfaces(AccelClass *ac) +{ + const char *ac_name; /* AccelClass name */ + char *acc_name; /* AccelCPUClass name */ + ObjectClass *acc; /* AccelCPUClass */ + const char *cpu_resolving_type = target_cpu_type(); + + ac_name = object_class_get_name(OBJECT_CLASS(ac)); + g_assert(ac_name != NULL); + + acc_name = g_strdup_printf("%s-%s", ac_name, cpu_resolving_type); + acc = object_class_by_name(acc_name); + g_free(acc_name); + + if (acc) { + object_class_foreach(accel_init_cpu_int_aux, + cpu_resolving_type, false, acc); + } +} + +void accel_init_interfaces(AccelClass *ac) +{ + accel_init_ops_interfaces(ac); + accel_init_cpu_interfaces(ac); +} + +void accel_cpu_instance_init(CPUState *cpu) +{ + if (cpu->cc->accel_cpu && cpu->cc->accel_cpu->cpu_instance_init) { + cpu->cc->accel_cpu->cpu_instance_init(cpu); + } +} + +bool accel_cpu_common_realize(CPUState *cpu, Error **errp) +{ + AccelState *accel = current_accel(); + AccelClass *acc = ACCEL_GET_CLASS(accel); + + /* target specific realization */ + if (cpu->cc->accel_cpu + && cpu->cc->accel_cpu->cpu_target_realize + && !cpu->cc->accel_cpu->cpu_target_realize(cpu, errp)) { + return false; + } + + /* generic realization */ + if (acc->cpu_common_realize && !acc->cpu_common_realize(cpu, errp)) { + return false; + } + + return true; +} + +void accel_cpu_common_unrealize(CPUState *cpu) +{ + AccelState *accel = current_accel(); + AccelClass *acc = ACCEL_GET_CLASS(accel); + + /* generic unrealization */ + if (acc->cpu_common_unrealize) { + acc->cpu_common_unrealize(cpu); + } +} + +int accel_supported_gdbstub_sstep_flags(void) +{ + AccelState *accel = current_accel(); + AccelClass *acc = ACCEL_GET_CLASS(accel); + if (acc->gdbstub_supported_sstep_flags) { + return acc->gdbstub_supported_sstep_flags(); + } + return 0; +} + +static const TypeInfo accel_types[] = { + { + .name = TYPE_ACCEL, + .parent = TYPE_OBJECT, + .class_size = sizeof(AccelClass), + .instance_size = sizeof(AccelState), + .abstract = true, + }, +}; + +DEFINE_TYPES(accel_types) diff --git a/accel/accel-system.h b/accel/accel-internal.h index 2d37c73..d3a4422 100644 --- a/accel/accel-system.h +++ b/accel/accel-internal.h @@ -1,5 +1,5 @@ /* - * QEMU System Emulation accel internal functions + * QEMU accel internal functions * * Copyright 2021 SUSE LLC * @@ -7,9 +7,11 @@ * See the COPYING file in the top-level directory. */ -#ifndef ACCEL_SYSTEM_H -#define ACCEL_SYSTEM_H +#ifndef ACCEL_INTERNAL_H +#define ACCEL_INTERNAL_H -void accel_system_init_ops_interfaces(AccelClass *ac); +#include "qemu/accel.h" + +void accel_init_ops_interfaces(AccelClass *ac); #endif /* ACCEL_SYSTEM_H */ diff --git a/accel/accel-system.c b/accel/accel-system.c index f6c947d..a0f562a 100644 --- a/accel/accel-system.c +++ b/accel/accel-system.c @@ -26,9 +26,10 @@ #include "qemu/osdep.h" #include "qemu/accel.h" #include "hw/boards.h" -#include "sysemu/cpus.h" +#include "system/accel-ops.h" +#include "system/cpus.h" #include "qemu/error-report.h" -#include "accel-system.h" +#include "accel-internal.h" int accel_init_machine(AccelState *accel, MachineState *ms) { @@ -62,7 +63,7 @@ void accel_setup_post(MachineState *ms) } /* initialize the arch-independent accel operation interfaces */ -void accel_system_init_ops_interfaces(AccelClass *ac) +void accel_init_ops_interfaces(AccelClass *ac) { const char *ac_name; char *ops_name; @@ -73,19 +74,17 @@ void accel_system_init_ops_interfaces(AccelClass *ac) g_assert(ac_name != NULL); ops_name = g_strdup_printf("%s" ACCEL_OPS_SUFFIX, ac_name); - ops = ACCEL_OPS_CLASS(module_object_class_by_name(ops_name)); oc = module_object_class_by_name(ops_name); if (!oc) { error_report("fatal: could not load module for type '%s'", ops_name); exit(1); } g_free(ops_name); - ops = ACCEL_OPS_CLASS(oc); /* * all accelerators need to define ops, providing at least a mandatory * non-NULL create_vcpu_thread operation. */ - g_assert(ops != NULL); + ops = ACCEL_OPS_CLASS(oc); if (ops->ops_init) { ops->ops_init(ops); } diff --git a/accel/accel-target.c b/accel/accel-target.c index 08626c0..7fd392f 100644 --- a/accel/accel-target.c +++ b/accel/accel-target.c @@ -24,141 +24,7 @@ */ #include "qemu/osdep.h" -#include "qemu/accel.h" - -#include "cpu.h" -#include "hw/core/accel-cpu.h" - -#ifndef CONFIG_USER_ONLY -#include "accel-system.h" -#endif /* !CONFIG_USER_ONLY */ - -static const TypeInfo accel_type = { - .name = TYPE_ACCEL, - .parent = TYPE_OBJECT, - .class_size = sizeof(AccelClass), - .instance_size = sizeof(AccelState), -}; - -/* Lookup AccelClass from opt_name. Returns NULL if not found */ -AccelClass *accel_find(const char *opt_name) -{ - char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name); - AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name)); - g_free(class_name); - return ac; -} - -/* Return the name of the current accelerator */ -const char *current_accel_name(void) -{ - AccelClass *ac = ACCEL_GET_CLASS(current_accel()); - - return ac->name; -} - -static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque) -{ - CPUClass *cc = CPU_CLASS(klass); - AccelCPUClass *accel_cpu = opaque; - - /* - * The first callback allows accel-cpu to run initializations - * for the CPU, customizing CPU behavior according to the accelerator. - * - * The second one allows the CPU to customize the accel-cpu - * behavior according to the CPU. - * - * The second is currently only used by TCG, to specialize the - * TCGCPUOps depending on the CPU type. - */ - cc->accel_cpu = accel_cpu; - if (accel_cpu->cpu_class_init) { - accel_cpu->cpu_class_init(cc); - } - if (cc->init_accel_cpu) { - cc->init_accel_cpu(accel_cpu, cc); - } -} - -/* initialize the arch-specific accel CpuClass interfaces */ -static void accel_init_cpu_interfaces(AccelClass *ac) -{ - const char *ac_name; /* AccelClass name */ - char *acc_name; /* AccelCPUClass name */ - ObjectClass *acc; /* AccelCPUClass */ - - ac_name = object_class_get_name(OBJECT_CLASS(ac)); - g_assert(ac_name != NULL); - - acc_name = g_strdup_printf("%s-%s", ac_name, CPU_RESOLVING_TYPE); - acc = object_class_by_name(acc_name); - g_free(acc_name); - - if (acc) { - object_class_foreach(accel_init_cpu_int_aux, - CPU_RESOLVING_TYPE, false, acc); - } -} - -void accel_init_interfaces(AccelClass *ac) -{ -#ifndef CONFIG_USER_ONLY - accel_system_init_ops_interfaces(ac); -#endif /* !CONFIG_USER_ONLY */ - - accel_init_cpu_interfaces(ac); -} - -void accel_cpu_instance_init(CPUState *cpu) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->accel_cpu && cc->accel_cpu->cpu_instance_init) { - cc->accel_cpu->cpu_instance_init(cpu); - } -} - -bool accel_cpu_common_realize(CPUState *cpu, Error **errp) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - AccelState *accel = current_accel(); - AccelClass *acc = ACCEL_GET_CLASS(accel); - - /* target specific realization */ - if (cc->accel_cpu && cc->accel_cpu->cpu_target_realize - && !cc->accel_cpu->cpu_target_realize(cpu, errp)) { - return false; - } - - /* generic realization */ - if (acc->cpu_common_realize && !acc->cpu_common_realize(cpu, errp)) { - return false; - } - - return true; -} - -void accel_cpu_common_unrealize(CPUState *cpu) -{ - AccelState *accel = current_accel(); - AccelClass *acc = ACCEL_GET_CLASS(accel); - - /* generic unrealization */ - if (acc->cpu_common_unrealize) { - acc->cpu_common_unrealize(cpu); - } -} - -int accel_supported_gdbstub_sstep_flags(void) -{ - AccelState *accel = current_accel(); - AccelClass *acc = ACCEL_GET_CLASS(accel); - if (acc->gdbstub_supported_sstep_flags) { - return acc->gdbstub_supported_sstep_flags(); - } - return 0; -} +#include "accel/accel-cpu-target.h" static const TypeInfo accel_cpu_type = { .name = TYPE_ACCEL_CPU, @@ -169,7 +35,6 @@ static const TypeInfo accel_cpu_type = { static void register_accel_types(void) { - type_register_static(&accel_type); type_register_static(&accel_cpu_type); } diff --git a/accel/accel-user.c b/accel/accel-user.c index 22b6a1a..7d19230 100644 --- a/accel/accel-user.c +++ b/accel/accel-user.c @@ -9,6 +9,12 @@ #include "qemu/osdep.h" #include "qemu/accel.h" +#include "accel-internal.h" + +void accel_init_ops_interfaces(AccelClass *ac) +{ + /* nothing */ +} AccelState *current_accel(void) { diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index f32d8c8..8672761 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" #include "qemu/rcu.h" -#include "sysemu/cpus.h" +#include "system/cpus.h" #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index ac08cfb..b389772 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -50,21 +50,19 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" -#include "exec/address-spaces.h" -#include "exec/exec-all.h" +#include "system/address-spaces.h" #include "gdbstub/enums.h" -#include "sysemu/cpus.h" -#include "sysemu/hvf.h" -#include "sysemu/hvf_int.h" -#include "sysemu/runstate.h" +#include "hw/boards.h" +#include "system/accel-ops.h" +#include "system/cpus.h" +#include "system/hvf.h" +#include "system/hvf_int.h" +#include "system/runstate.h" #include "qemu/guest-random.h" +#include "trace.h" HVFState *hvf_state; -#ifdef __aarch64__ -#define HV_VM_DEFAULT NULL -#endif - /* Memory slots */ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) @@ -100,6 +98,7 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) if (macslot->present) { if (macslot->size != slot->size) { macslot->present = 0; + trace_hvf_vm_unmap(macslot->gpa_start, macslot->size); ret = hv_vm_unmap(macslot->gpa_start, macslot->size); assert_hvf_ok(ret); } @@ -112,6 +111,10 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) macslot->present = 1; macslot->gpa_start = slot->start; macslot->size = slot->size; + trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, + flags & HV_MEMORY_READ ? 'R' : '-', + flags & HV_MEMORY_WRITE ? 'W' : '-', + flags & HV_MEMORY_EXEC ? 'E' : '-'); ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); assert_hvf_ok(ret); return 0; @@ -323,8 +326,17 @@ static int hvf_accel_init(MachineState *ms) int x; hv_return_t ret; HVFState *s; + int pa_range = 36; + MachineClass *mc = MACHINE_GET_CLASS(ms); + + if (mc->hvf_get_physical_address_range) { + pa_range = mc->hvf_get_physical_address_range(ms); + if (pa_range < 0) { + return -EINVAL; + } + } - ret = hv_vm_create(HV_VM_DEFAULT); + ret = hvf_arch_vm_create(ms, (uint32_t)pa_range); assert_hvf_ok(ret); s = g_new0(HVFState, 1); @@ -348,7 +360,7 @@ static inline int hvf_gdbstub_sstep_flags(void) return SSTEP_ENABLE | SSTEP_NOIRQ; } -static void hvf_accel_class_init(ObjectClass *oc, void *data) +static void hvf_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "HVF"; @@ -360,6 +372,7 @@ static void hvf_accel_class_init(ObjectClass *oc, void *data) static const TypeInfo hvf_accel_type = { .name = TYPE_HVF_ACCEL, .parent = TYPE_ACCEL, + .instance_size = sizeof(HVFState), .class_init = hvf_accel_class_init, }; @@ -571,7 +584,7 @@ static void hvf_remove_all_breakpoints(CPUState *cpu) } } -static void hvf_accel_ops_class_init(ObjectClass *oc, void *data) +static void hvf_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index c008dc2..8c387fd 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -10,8 +10,9 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "sysemu/hvf.h" -#include "sysemu/hvf_int.h" +#include "system/hvf.h" +#include "system/hvf_int.h" +#include "hw/core/cpu.h" const char *hvf_return_string(hv_return_t ret) { @@ -23,10 +24,7 @@ const char *hvf_return_string(hv_return_t ret) case HV_NO_RESOURCES: return "HV_NO_RESOURCES"; case HV_NO_DEVICE: return "HV_NO_DEVICE"; case HV_UNSUPPORTED: return "HV_UNSUPPORTED"; -#if defined(MAC_OS_VERSION_11_0) && \ - MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 case HV_DENIED: return "HV_DENIED"; -#endif default: return "[unknown hv_return value]"; } } @@ -61,8 +59,13 @@ int hvf_sw_breakpoints_active(CPUState *cpu) return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints); } -int hvf_update_guest_debug(CPUState *cpu) +static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg) { hvf_arch_update_guest_debug(cpu); +} + +int hvf_update_guest_debug(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL); return 0; } diff --git a/accel/hvf/trace-events b/accel/hvf/trace-events new file mode 100644 index 0000000..2fd3e12 --- /dev/null +++ b/accel/hvf/trace-events @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# See docs/devel/tracing.rst for syntax documentation. + +# hvf-accel-ops.c +hvf_vm_map(uint64_t paddr, uint64_t size, void *vaddr, uint8_t flags, const char r, const char w, const char e) "paddr:0x%016"PRIx64" size:0x%08"PRIx64" vaddr:%p flags:0x%02x/%c%c%c" +hvf_vm_unmap(uint64_t paddr, uint64_t size) "paddr:0x%016"PRIx64" size:0x%08"PRIx64 diff --git a/accel/hvf/trace.h b/accel/hvf/trace.h new file mode 100644 index 0000000..83a1883 --- /dev/null +++ b/accel/hvf/trace.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "trace/trace-accel_hvf.h" diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index c239dfc..e5c1544 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -16,10 +16,11 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" -#include "sysemu/kvm.h" -#include "sysemu/kvm_int.h" -#include "sysemu/runstate.h" -#include "sysemu/cpus.h" +#include "system/accel-ops.h" +#include "system/kvm.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/cpus.h" #include "qemu/guest-random.h" #include "qapi/error.h" @@ -89,7 +90,7 @@ static int kvm_update_guest_debug_ops(CPUState *cpu) } #endif -static void kvm_accel_ops_class_init(ObjectClass *oc, void *data) +static void kvm_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 2b4ab89..d095d1b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -28,13 +28,14 @@ #include "hw/pci/msix.h" #include "hw/s390x/adapter.h" #include "gdbstub/enums.h" -#include "sysemu/kvm_int.h" -#include "sysemu/runstate.h" -#include "sysemu/cpus.h" -#include "sysemu/accel-blocker.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/cpus.h" +#include "system/accel-blocker.h" #include "qemu/bswap.h" -#include "exec/memory.h" -#include "exec/ram_addr.h" +#include "exec/tswap.h" +#include "system/memory.h" +#include "system/ram_addr.h" #include "qemu/event_notifier.h" #include "qemu/main-loop.h" #include "trace.h" @@ -42,21 +43,26 @@ #include "qapi/visitor.h" #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" -#include "sysemu/reset.h" +#include "system/reset.h" #include "qemu/guest-random.h" -#include "sysemu/hw_accel.h" +#include "system/hw_accel.h" #include "kvm-cpus.h" -#include "sysemu/dirtylimit.h" +#include "system/dirtylimit.h" #include "qemu/range.h" #include "hw/boards.h" -#include "sysemu/stats.h" +#include "system/stats.h" /* This check must be after config-host.h is included */ #ifdef CONFIG_EVENTFD #include <sys/eventfd.h> #endif +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +# define KVM_HAVE_MCE_INJECTION 1 +#endif + + /* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We * need to use the real host PAGE_SIZE, as that's what KVM will use. */ @@ -69,6 +75,11 @@ #define KVM_GUESTDBG_BLOCKIRQ 0 #endif +/* Default num of memslots to be allocated when VM starts */ +#define KVM_MEMSLOTS_NR_ALLOC_DEFAULT 16 +/* Default max allowed memslots if kernel reported nothing */ +#define KVM_MEMSLOTS_NR_MAX_DEFAULT 32 + struct KVMParkedVcpu { unsigned long vcpu_id; int kvm_fd; @@ -88,6 +99,7 @@ bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_msi_use_devid; +bool kvm_pre_fault_memory_supported; static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; @@ -165,11 +177,62 @@ void kvm_resample_fd_notify(int gsi) } } +/** + * kvm_slots_grow(): Grow the slots[] array in the KVMMemoryListener + * + * @kml: The KVMMemoryListener* to grow the slots[] array + * @nr_slots_new: The new size of slots[] array + * + * Returns: True if the array grows larger, false otherwise. + */ +static bool kvm_slots_grow(KVMMemoryListener *kml, unsigned int nr_slots_new) +{ + unsigned int i, cur = kml->nr_slots_allocated; + KVMSlot *slots; + + if (nr_slots_new > kvm_state->nr_slots_max) { + nr_slots_new = kvm_state->nr_slots_max; + } + + if (cur >= nr_slots_new) { + /* Big enough, no need to grow, or we reached max */ + return false; + } + + if (cur == 0) { + slots = g_new0(KVMSlot, nr_slots_new); + } else { + assert(kml->slots); + slots = g_renew(KVMSlot, kml->slots, nr_slots_new); + /* + * g_renew() doesn't initialize extended buffers, however kvm + * memslots require fields to be zero-initialized. E.g. pointers, + * memory_size field, etc. + */ + memset(&slots[cur], 0x0, sizeof(slots[0]) * (nr_slots_new - cur)); + } + + for (i = cur; i < nr_slots_new; i++) { + slots[i].slot = i; + } + + kml->slots = slots; + kml->nr_slots_allocated = nr_slots_new; + trace_kvm_slots_grow(cur, nr_slots_new); + + return true; +} + +static bool kvm_slots_double(KVMMemoryListener *kml) +{ + return kvm_slots_grow(kml, kml->nr_slots_allocated * 2); +} + unsigned int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_accel()); - return s->nr_slots; + return s->nr_slots_max; } unsigned int kvm_get_free_memslots(void) @@ -183,25 +246,36 @@ unsigned int kvm_get_free_memslots(void) if (!s->as[i].ml) { continue; } - used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots); + used_slots = MAX(used_slots, s->as[i].ml->nr_slots_used); } kvm_slots_unlock(); - return s->nr_slots - used_slots; + return s->nr_slots_max - used_slots; } /* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) { - KVMState *s = kvm_state; + unsigned int n; int i; - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { if (kml->slots[i].memory_size == 0) { return &kml->slots[i]; } } + /* + * If no free slots, try to grow first by doubling. Cache the old size + * here to avoid another round of search: if the grow succeeded, it + * means slots[] now must have the existing "n" slots occupied, + * followed by one or more free slots starting from slots[n]. + */ + n = kml->nr_slots_allocated; + if (kvm_slots_double(kml)) { + return &kml->slots[n]; + } + return NULL; } @@ -222,10 +296,9 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml, hwaddr start_addr, hwaddr size) { - KVMState *s = kvm_state; int i; - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { KVMSlot *mem = &kml->slots[i]; if (start_addr == mem->start_addr && size == mem->memory_size) { @@ -267,7 +340,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, int i, ret = 0; kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { KVMSlot *mem = &kml->slots[i]; if (ram >= mem->ram && ram < mem->ram + mem->memory_size) { @@ -340,14 +413,95 @@ err: return ret; } +void kvm_park_vcpu(CPUState *cpu) +{ + struct KVMParkedVcpu *vcpu; + + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + vcpu = g_malloc0(sizeof(*vcpu)); + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); +} + +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) +{ + struct KVMParkedVcpu *cpu; + int kvm_fd = -ENOENT; + + QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { + if (cpu->vcpu_id == vcpu_id) { + QLIST_REMOVE(cpu, node); + kvm_fd = cpu->kvm_fd; + g_free(cpu); + break; + } + } + + trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "!found parked"); + + return kvm_fd; +} + +static void kvm_reset_parked_vcpus(KVMState *s) +{ + struct KVMParkedVcpu *cpu; + + QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { + kvm_arch_reset_parked_vcpu(cpu->vcpu_id, cpu->kvm_fd); + } +} + +int kvm_create_vcpu(CPUState *cpu) +{ + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); + KVMState *s = kvm_state; + int kvm_fd; + + /* check if the KVM vCPU already exist but is parked */ + kvm_fd = kvm_unpark_vcpu(s, vcpu_id); + if (kvm_fd < 0) { + /* vCPU not parked: create a new KVM vCPU */ + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); + if (kvm_fd < 0) { + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); + return kvm_fd; + } + } + + cpu->kvm_fd = kvm_fd; + cpu->kvm_state = s; + if (!s->guest_state_protected) { + cpu->vcpu_dirty = true; + } + cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; + + trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd); + + return 0; +} + +int kvm_create_and_park_vcpu(CPUState *cpu) +{ + int ret = 0; + + ret = kvm_create_vcpu(cpu); + if (!ret) { + kvm_park_vcpu(cpu); + } + + return ret; +} + static int do_kvm_destroy_vcpu(CPUState *cpu) { KVMState *s = kvm_state; - long mmap_size; - struct KVMParkedVcpu *vcpu = NULL; + int mmap_size; int ret = 0; - trace_kvm_destroy_vcpu(); + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); ret = kvm_arch_destroy_vcpu(cpu); if (ret < 0) { @@ -373,10 +527,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) } } - vcpu = g_malloc0(sizeof(*vcpu)); - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); - vcpu->kvm_fd = cpu->kvm_fd; - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + kvm_park_vcpu(cpu); err: return ret; } @@ -389,44 +540,26 @@ void kvm_destroy_vcpu(CPUState *cpu) } } -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) -{ - struct KVMParkedVcpu *cpu; - - QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { - if (cpu->vcpu_id == vcpu_id) { - int kvm_fd; - - QLIST_REMOVE(cpu, node); - kvm_fd = cpu->kvm_fd; - g_free(cpu); - return kvm_fd; - } - } - - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); -} - int kvm_init_vcpu(CPUState *cpu, Error **errp) { KVMState *s = kvm_state; - long mmap_size; + int mmap_size; int ret; trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); + ret = kvm_arch_pre_create_vcpu(cpu, errp); if (ret < 0) { - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", - kvm_arch_vcpu_id(cpu)); goto err; } - cpu->kvm_fd = ret; - cpu->kvm_state = s; - cpu->vcpu_dirty = true; - cpu->dirty_pages = 0; - cpu->throttle_us_per_full = 0; + ret = kvm_create_vcpu(cpu); + if (ret < 0) { + error_setg_errno(errp, -ret, + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + goto err; + } mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { @@ -1027,7 +1160,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { mem = &kml->slots[i]; /* Discard slots that are empty or do not overlap the section */ if (!mem->memory_size || @@ -1168,7 +1301,7 @@ static void kvm_unpoison_all(void *param) QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) { QLIST_REMOVE(page, list); - qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE); + qemu_ram_remap(page->ram_addr); g_free(page); } } @@ -1194,21 +1327,22 @@ bool kvm_hwpoisoned_mem(void) static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) { -#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN - /* The kernel expects ioeventfd values in HOST_BIG_ENDIAN - * endianness, but the memory core hands them in target endianness. - * For example, PPC is always treated as big-endian even if running - * on KVM and on PPC64LE. Correct here. - */ - switch (size) { - case 2: - val = bswap16(val); - break; - case 4: - val = bswap32(val); - break; + if (target_needs_bswap()) { + /* + * The kernel expects ioeventfd values in HOST_BIG_ENDIAN + * endianness, but the memory core hands them in target endianness. + * For example, PPC is always treated as big-endian even if running + * on KVM and on PPC64LE. Correct here, swapping back. + */ + switch (size) { + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + } } -#endif return val; } @@ -1406,7 +1540,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, } start_addr += slot_size; size -= slot_size; - kml->nr_used_slots--; + kml->nr_slots_used--; } while (size); return; } @@ -1445,7 +1579,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, ram_start_offset += slot_size; ram += slot_size; size -= slot_size; - kml->nr_used_slots++; + kml->nr_slots_used++; } while (size); } @@ -1481,11 +1615,7 @@ static void *kvm_dirty_ring_reaper_thread(void *data) r->reaper_iteration++; } - trace_kvm_dirty_ring_reaper("exit"); - - rcu_unregister_thread(); - - return NULL; + g_assert_not_reached(); } static void kvm_dirty_ring_reaper_init(KVMState *s) @@ -1675,12 +1805,8 @@ static void kvm_log_sync_global(MemoryListener *l, bool last_stage) /* Flush all kernel dirty addresses into KVMSlot dirty bitmap */ kvm_dirty_ring_flush(); - /* - * TODO: make this faster when nr_slots is big while there are - * only a few used slots (small VMs). - */ kvm_slots_lock(); - for (i = 0; i < s->nr_slots; i++) { + for (i = 0; i < kml->nr_slots_allocated; i++) { mem = &kml->slots[i]; if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { kvm_slot_sync_dirty_pages(mem); @@ -1795,12 +1921,9 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, { int i; - kml->slots = g_new0(KVMSlot, s->nr_slots); kml->as_id = as_id; - for (i = 0; i < s->nr_slots; i++) { - kml->slots[i].slot = i; - } + kvm_slots_grow(kml, KVM_MEMSLOTS_NR_ALLOC_DEFAULT); QSIMPLEQ_INIT(&kml->transaction_add); QSIMPLEQ_INIT(&kml->transaction_del); @@ -2311,7 +2434,7 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } @@ -2341,6 +2464,109 @@ uint32_t kvm_dirty_ring_size(void) return kvm_state->kvm_dirty_ring_size; } +static int do_kvm_create_vm(MachineState *ms, int type) +{ + KVMState *s; + int ret; + + s = KVM_STATE(ms->accelerator); + + do { + ret = kvm_ioctl(s, KVM_CREATE_VM, type); + } while (ret == -EINTR); + + if (ret < 0) { + error_report("ioctl(KVM_CREATE_VM) failed: %s", strerror(-ret)); + +#ifdef TARGET_S390X + if (ret == -EINVAL) { + error_printf("Host kernel setup problem detected." + " Please verify:\n"); + error_printf("- for kernels supporting the" + " switch_amode or user_mode parameters, whether"); + error_printf(" user space is running in primary address space\n"); + error_printf("- for kernels supporting the vm.allocate_pgste" + " sysctl, whether it is enabled\n"); + } +#elif defined(TARGET_PPC) + if (ret == -EINVAL) { + error_printf("PPC KVM module is not loaded. Try modprobe kvm_%s.\n", + (type == 2) ? "pr" : "hv"); + } +#endif + } + + return ret; +} + +static int find_kvm_machine_type(MachineState *ms) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + int type; + + if (object_property_find(OBJECT(current_machine), "kvm-type")) { + g_autofree char *kvm_type; + kvm_type = object_property_get_str(OBJECT(current_machine), + "kvm-type", + &error_abort); + type = mc->kvm_type(ms, kvm_type); + } else if (mc->kvm_type) { + type = mc->kvm_type(ms, NULL); + } else { + type = kvm_arch_get_default_type(ms); + } + return type; +} + +static int kvm_setup_dirty_ring(KVMState *s) +{ + uint64_t dirty_log_manual_caps; + int ret; + + /* + * Enable KVM dirty ring if supported, otherwise fall back to + * dirty logging mode + */ + ret = kvm_dirty_ring_init(s); + if (ret < 0) { + return ret; + } + + /* + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is + * enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no + * page is wr-protected initially, which is against how kvm dirty ring is + * usage - kvm dirty ring requires all pages are wr-protected at the very + * beginning. Enabling this feature for dirty ring causes data corruption. + * + * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log, + * we may expect a higher stall time when starting the migration. In the + * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too: + * instead of clearing dirty bit, it can be a way to explicitly wr-protect + * guest pages. + */ + if (!s->kvm_dirty_ring_size) { + dirty_log_manual_caps = + kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + s->manual_dirty_log_protect = dirty_log_manual_caps; + if (dirty_log_manual_caps) { + ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, + dirty_log_manual_caps); + if (ret) { + warn_report("Trying to enable capability %"PRIu64" of " + "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. " + "Falling back to the legacy mode. ", + dirty_log_manual_caps); + s->manual_dirty_log_protect = 0; + } + } + } + + return 0; +} + static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -2360,7 +2586,6 @@ static int kvm_init(MachineState *ms) const KVMCapabilityInfo *missing_cap; int ret; int type; - uint64_t dirty_log_manual_caps; qemu_mutex_init(&kml_slots_lock); @@ -2383,7 +2608,7 @@ static int kvm_init(MachineState *ms) QLIST_INIT(&s->kvm_parked_vcpus); s->fd = qemu_open_old(s->device ?: "/dev/kvm", O_RDWR); if (s->fd == -1) { - fprintf(stderr, "Could not access KVM kernel module: %m\n"); + error_report("Could not access KVM kernel module: %m"); ret = -errno; goto err; } @@ -2393,84 +2618,43 @@ static int kvm_init(MachineState *ms) if (ret >= 0) { ret = -EINVAL; } - fprintf(stderr, "kvm version too old\n"); + error_report("kvm version too old"); goto err; } if (ret > KVM_API_VERSION) { ret = -EINVAL; - fprintf(stderr, "kvm version not supported\n"); + error_report("kvm version not supported"); goto err; } - kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); - kvm_guest_memfd_supported = - kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && - kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && - (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); - kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); - s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + s->nr_slots_max = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); /* If unspecified, use the default value */ - if (!s->nr_slots) { - s->nr_slots = 32; - } - - s->nr_as = kvm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE); - if (s->nr_as <= 1) { - s->nr_as = 1; - } - s->as = g_new0(struct KVMAs, s->nr_as); - - if (object_property_find(OBJECT(current_machine), "kvm-type")) { - g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine), - "kvm-type", - &error_abort); - type = mc->kvm_type(ms, kvm_type); - } else if (mc->kvm_type) { - type = mc->kvm_type(ms, NULL); - } else { - type = kvm_arch_get_default_type(ms); + if (!s->nr_slots_max) { + s->nr_slots_max = KVM_MEMSLOTS_NR_MAX_DEFAULT; } + type = find_kvm_machine_type(ms); if (type < 0) { ret = -EINVAL; goto err; } - do { - ret = kvm_ioctl(s, KVM_CREATE_VM, type); - } while (ret == -EINTR); - + ret = do_kvm_create_vm(ms, type); if (ret < 0) { - fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret, - strerror(-ret)); - -#ifdef TARGET_S390X - if (ret == -EINVAL) { - fprintf(stderr, - "Host kernel setup problem detected. Please verify:\n"); - fprintf(stderr, "- for kernels supporting the switch_amode or" - " user_mode parameters, whether\n"); - fprintf(stderr, - " user space is running in primary address space\n"); - fprintf(stderr, - "- for kernels supporting the vm.allocate_pgste sysctl, " - "whether it is enabled\n"); - } -#elif defined(TARGET_PPC) - if (ret == -EINVAL) { - fprintf(stderr, - "PPC KVM module is not loaded. Try modprobe kvm_%s.\n", - (type == 2) ? "pr" : "hv"); - } -#endif goto err; } s->vmfd = ret; + s->nr_as = kvm_vm_check_extension(s, KVM_CAP_MULTI_ADDRESS_SPACE); + if (s->nr_as <= 1) { + s->nr_as = 1; + } + s->as = g_new0(struct KVMAs, s->nr_as); + /* check the vcpu limits */ soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -2482,9 +2666,9 @@ static int kvm_init(MachineState *ms) nc->name, nc->num, soft_vcpus_limit); if (nc->num > hard_vcpus_limit) { - fprintf(stderr, "Number of %s cpus requested (%d) exceeds " - "the maximum cpus supported by KVM (%d)\n", - nc->name, nc->num, hard_vcpus_limit); + error_report("Number of %s cpus requested (%d) exceeds " + "the maximum cpus supported by KVM (%d)", + nc->name, nc->num, hard_vcpus_limit); exit(1); } } @@ -2498,8 +2682,8 @@ static int kvm_init(MachineState *ms) } if (missing_cap) { ret = -EINVAL; - fprintf(stderr, "kvm does not support %s\n%s", - missing_cap->name, upgrade_note); + error_report("kvm does not support %s", missing_cap->name); + error_printf("%s", upgrade_note); goto err; } @@ -2507,47 +2691,11 @@ static int kvm_init(MachineState *ms) s->coalesced_pio = s->coalesced_mmio && kvm_check_extension(s, KVM_CAP_COALESCED_PIO); - /* - * Enable KVM dirty ring if supported, otherwise fall back to - * dirty logging mode - */ - ret = kvm_dirty_ring_init(s); + ret = kvm_setup_dirty_ring(s); if (ret < 0) { goto err; } - /* - * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is - * enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no - * page is wr-protected initially, which is against how kvm dirty ring is - * usage - kvm dirty ring requires all pages are wr-protected at the very - * beginning. Enabling this feature for dirty ring causes data corruption. - * - * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log, - * we may expect a higher stall time when starting the migration. In the - * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too: - * instead of clearing dirty bit, it can be a way to explicitly wr-protect - * guest pages. - */ - if (!s->kvm_dirty_ring_size) { - dirty_log_manual_caps = - kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); - s->manual_dirty_log_protect = dirty_log_manual_caps; - if (dirty_log_manual_caps) { - ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, - dirty_log_manual_caps); - if (ret) { - warn_report("Trying to enable capability %"PRIu64" of " - "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. " - "Falling back to the legacy mode. ", - dirty_log_manual_caps); - s->manual_dirty_log_protect = 0; - } - } - } - #ifdef KVM_CAP_VCPU_EVENTS s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS); #endif @@ -2559,7 +2707,7 @@ static int kvm_init(MachineState *ms) } kvm_readonly_mem_allowed = - (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); + (kvm_vm_check_extension(s, KVM_CAP_READONLY_MEM) > 0); kvm_resamplefds_allowed = (kvm_check_extension(s, KVM_CAP_IRQFD_RESAMPLE) > 0); @@ -2593,6 +2741,13 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_vm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY); + if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } @@ -2722,9 +2877,15 @@ void kvm_flush_coalesced_mmio_buffer(void) static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { - int ret = kvm_arch_get_registers(cpu); + Error *err = NULL; + int ret = kvm_arch_get_registers(cpu, &err); if (ret) { - error_report("Failed to get registers: %s", strerror(-ret)); + if (err) { + error_reportf_err(err, "Failed to synchronize CPU state: "); + } else { + error_report("Failed to get registers: %s", strerror(-ret)); + } + cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2742,9 +2903,15 @@ void kvm_cpu_synchronize_state(CPUState *cpu) static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { - int ret = kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE); + Error *err = NULL; + int ret = kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE, &err); if (ret) { - error_report("Failed to put registers after reset: %s", strerror(-ret)); + if (err) { + error_reportf_err(err, "Restoring resisters after reset: "); + } else { + error_report("Failed to put registers after reset: %s", + strerror(-ret)); + } cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2755,13 +2922,23 @@ static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg void kvm_cpu_synchronize_post_reset(CPUState *cpu) { run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); + + if (cpu == first_cpu) { + kvm_reset_parked_vcpus(kvm_state); + } } static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { - int ret = kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE); + Error *err = NULL; + int ret = kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE, &err); if (ret) { - error_report("Failed to put registers after init: %s", strerror(-ret)); + if (err) { + error_reportf_err(err, "Putting registers after init: "); + } else { + error_report("Failed to put registers after init: %s", + strerror(-ret)); + } exit(1); } @@ -2851,17 +3028,17 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) MemoryRegion *mr; RAMBlock *rb; void *addr; - int ret = -1; + int ret = -EINVAL; trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { - return -1; + return ret; } if (!size) { - return -1; + return ret; } section = memory_region_find(get_system_memory(), start, size); @@ -2879,7 +3056,7 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) if (!to_private) { return 0; } - return -1; + return ret; } if (!memory_region_has_guest_memfd(mr)) { @@ -2914,6 +3091,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; rb = qemu_ram_block_from_host(addr, false, &offset); + ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm), + offset, size, to_private); + if (ret) { + error_report("Failed to notify the listener the state change of " + "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + if (to_private) { if (rb->page_size != qemu_real_host_page_size()) { /* @@ -2951,10 +3137,15 @@ int kvm_cpu_exec(CPUState *cpu) MemTxAttrs attrs; if (cpu->vcpu_dirty) { - ret = kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE); + Error *err = NULL; + ret = kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE, &err); if (ret) { - error_report("Failed to put registers after init: %s", - strerror(-ret)); + if (err) { + error_reportf_err(err, "Putting registers after init: "); + } else { + error_report("Failed to put registers after init: %s", + strerror(-ret)); + } ret = -1; break; } @@ -3126,7 +3317,7 @@ int kvm_cpu_exec(CPUState *cpu) return ret; } -int kvm_ioctl(KVMState *s, int type, ...) +int kvm_ioctl(KVMState *s, unsigned long type, ...) { int ret; void *arg; @@ -3144,7 +3335,7 @@ int kvm_ioctl(KVMState *s, int type, ...) return ret; } -int kvm_vm_ioctl(KVMState *s, int type, ...) +int kvm_vm_ioctl(KVMState *s, unsigned long type, ...) { int ret; void *arg; @@ -3164,7 +3355,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) return ret; } -int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) +int kvm_vcpu_ioctl(CPUState *cpu, unsigned long type, ...) { int ret; void *arg; @@ -3184,7 +3375,7 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) return ret; } -int kvm_device_ioctl(int fd, int type, ...) +int kvm_device_ioctl(int fd, unsigned long type, ...) { int ret; void *arg; @@ -3745,6 +3936,21 @@ static void kvm_set_device(Object *obj, s->device = g_strdup(value); } +static void kvm_set_kvm_rapl(Object *obj, bool value, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + s->msr_energy.enable = value; +} + +static void kvm_set_kvm_rapl_socket_path(Object *obj, + const char *str, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + g_free(s->msr_energy.socket_path); + s->msr_energy.socket_path = g_strdup(str); +} + static void kvm_accel_instance_init(Object *obj) { KVMState *s = KVM_STATE(obj); @@ -3764,6 +3970,7 @@ static void kvm_accel_instance_init(Object *obj) s->xen_gnttab_max_frames = 64; s->xen_evtchn_max_pirq = 256; s->device = NULL; + s->msr_energy.enable = false; } /** @@ -3777,7 +3984,7 @@ static int kvm_gdbstub_sstep_flags(void) return kvm_sstep_flags; } -static void kvm_accel_class_init(ObjectClass *oc, void *data) +static void kvm_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "KVM"; @@ -3808,6 +4015,17 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "device", "Path to the device node to use (default: /dev/kvm)"); + object_class_property_add_bool(oc, "rapl", + NULL, + kvm_set_kvm_rapl); + object_class_property_set_description(oc, "rapl", + "Allow energy related MSRs for RAPL interface in Guest"); + + object_class_property_add_str(oc, "rapl-helper-socket", NULL, + kvm_set_kvm_rapl_socket_path); + object_class_property_set_description(oc, "rapl-helper-socket", + "Socket Path for comminucating with the Virtual MSR helper daemon"); + kvm_arch_accel_class_init(oc); } @@ -3878,7 +4096,7 @@ static StatsList *add_kvmstat_entry(struct kvm_stats_desc *pdesc, /* Alloc and populate data list */ stats = g_new0(Stats, 1); stats->name = g_strdup(pdesc->name); - stats->value = g_new0(StatsValue, 1);; + stats->value = g_new0(StatsValue, 1); if ((pdesc->flags & KVM_STATS_UNIT_MASK) == KVM_STATS_UNIT_BOOLEAN) { stats->value->u.boolean = *stats_data; diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h index ca40add..6885111 100644 --- a/accel/kvm/kvm-cpus.h +++ b/accel/kvm/kvm-cpus.h @@ -10,8 +10,6 @@ #ifndef KVM_CPUS_H #define KVM_CPUS_H -#include "sysemu/cpus.h" - int kvm_init_vcpu(CPUState *cpu, Error **errp); int kvm_cpu_exec(CPUState *cpu); void kvm_destroy_vcpu(CPUState *cpu); @@ -22,5 +20,4 @@ bool kvm_supports_guest_debug(void); int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); void kvm_remove_all_breakpoints(CPUState *cpu); - #endif /* KVM_CPUS_H */ diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 681ccb6..e43d18a 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -1,14 +1,18 @@ # See docs/devel/tracing.rst for syntax documentation. # kvm-all.c -kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p" +kvm_ioctl(unsigned long type, void *arg) "type 0x%lx, arg %p" +kvm_vm_ioctl(unsigned long type, void *arg) "type 0x%lx, arg %p" +kvm_vcpu_ioctl(int cpu_index, unsigned long type, void *arg) "cpu_index %d, type 0x%lx, arg %p" kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" -kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" +kvm_device_ioctl(int fd, unsigned long type, void *arg) "dev fd %d, type 0x%lx, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d" +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s" kvm_irqchip_commit_routes(void) "" kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" kvm_dirty_ring_reaper_kick(const char *reason) "%s" kvm_dirty_ring_flush(int finished) "%d" -kvm_destroy_vcpu(void) "" kvm_failed_get_vcpu_mmap_size(void) "" kvm_cpu_exec(void) "" kvm_interrupt_exit_request(void) "" @@ -33,3 +36,4 @@ kvm_io_window_exit(void) "" kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +kvm_slots_grow(unsigned int old, unsigned int new) "%u -> %u" diff --git a/accel/meson.build b/accel/meson.build index 5eaeb68..5290931 100644 --- a/accel/meson.build +++ b/accel/meson.build @@ -1,3 +1,4 @@ +common_ss.add(files('accel-common.c')) specific_ss.add(files('accel-target.c')) system_ss.add(files('accel-system.c', 'accel-blocker.c')) user_ss.add(files('accel-user.c')) diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index bf14032..92bed92 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -18,8 +18,9 @@ #include "qemu/option.h" #include "qemu/config-file.h" #include "qemu/accel.h" -#include "sysemu/qtest.h" -#include "sysemu/cpus.h" +#include "system/accel-ops.h" +#include "system/qtest.h" +#include "system/cpus.h" #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" @@ -41,7 +42,7 @@ static int qtest_init_accel(MachineState *ms) return 0; } -static void qtest_accel_class_init(ObjectClass *oc, void *data) +static void qtest_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "QTest"; @@ -58,7 +59,7 @@ static const TypeInfo qtest_accel_type = { }; module_obj(TYPE_QTEST_ACCEL); -static void qtest_accel_ops_class_init(ObjectClass *oc, void *data) +static void qtest_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); diff --git a/accel/stubs/hvf-stub.c b/accel/stubs/hvf-stub.c new file mode 100644 index 0000000..42eadc5 --- /dev/null +++ b/accel/stubs/hvf-stub.c @@ -0,0 +1,12 @@ +/* + * HVF stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/hvf.h" + +bool hvf_allowed; diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 8e0eb22..ecfd763 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -11,7 +11,7 @@ */ #include "qemu/osdep.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "hw/pci/msi.h" KVMState *kvm_state; diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build index 91a2d21..8ca1a45 100644 --- a/accel/stubs/meson.build +++ b/accel/stubs/meson.build @@ -2,5 +2,6 @@ system_stubs_ss = ss.source_set() system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) +system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss) diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index dd890d6..3b76b8b 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -11,26 +11,7 @@ */ #include "qemu/osdep.h" -#include "exec/tb-flush.h" -#include "exec/exec-all.h" - -void tb_flush(CPUState *cpu) -{ -} - -int probe_access_flags(CPUArchState *env, vaddr addr, int size, - MMUAccessType access_type, int mmu_idx, - bool nonfault, void **phost, uintptr_t retaddr) -{ - g_assert_not_reached(); -} - -void *probe_access(CPUArchState *env, vaddr addr, int size, - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) -{ - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -} +#include "exec/cpu-common.h" G_NORETURN void cpu_loop_exit(CPUState *cpu) { diff --git a/accel/stubs/xen-stub.c b/accel/stubs/xen-stub.c index 7054965..cf929b6 100644 --- a/accel/stubs/xen-stub.c +++ b/accel/stubs/xen-stub.c @@ -6,7 +6,7 @@ */ #include "qemu/osdep.h" -#include "sysemu/xen.h" +#include "system/xen.h" #include "qapi/qapi-commands-migration.h" bool xen_allowed; diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc index 95a5c5f..6056598 100644 --- a/accel/tcg/atomic_common.c.inc +++ b/accel/tcg/atomic_common.c.inc @@ -14,9 +14,20 @@ */ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr, + uint64_t read_value_low, + uint64_t read_value_high, + uint64_t write_value_low, + uint64_t write_value_high, MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW); + if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + read_value_low, read_value_high, + oi, QEMU_PLUGIN_MEM_R); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + write_value_low, write_value_high, + oi, QEMU_PLUGIN_MEM_W); + } } /* diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 1dc2151..08a475c 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -53,6 +53,14 @@ # error unsupported data size #endif +#if DATA_SIZE == 16 +# define VALUE_LOW(val) int128_getlo(val) +# define VALUE_HIGH(val) int128_gethi(val) +#else +# define VALUE_LOW(val) val +# define VALUE_HIGH(val) 0 +#endif + #if DATA_SIZE >= 4 # define ABI_TYPE DATA_TYPE #else @@ -69,7 +77,7 @@ # define END _le #endif -ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr, ABI_TYPE cmpv, ABI_TYPE newv, MemOpIdx oi, uintptr_t retaddr) { @@ -83,12 +91,17 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(newv), + VALUE_HIGH(newv), + oi); return ret; } #if DATA_SIZE < 16 -ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) { DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, @@ -97,19 +110,29 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, ret = qatomic_xchg__nocheck(haddr, val); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(val), + VALUE_HIGH(val), + oi); return ret; } #define GEN_ATOMIC_HELPER(X) \ -ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ { \ DATA_TYPE *haddr, ret; \ haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr); \ ret = qatomic_##X(haddr, val); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(ret), \ + VALUE_HIGH(ret), \ + VALUE_LOW(val), \ + VALUE_HIGH(val), \ + oi); \ return ret; \ } @@ -133,7 +156,7 @@ GEN_ATOMIC_HELPER(xor_fetch) * of CF_PARALLEL's value, we'll trace just a read and a write. */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ -ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ { \ XDATA_TYPE *haddr, cmp, old, new, val = xval; \ @@ -145,7 +168,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ cmp = qatomic_cmpxchg__nocheck(haddr, old, new); \ } while (cmp != old); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(old), \ + VALUE_HIGH(old), \ + VALUE_LOW(xval), \ + VALUE_HIGH(xval), \ + oi); \ return RET; \ } @@ -174,7 +202,7 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) # define END _be #endif -ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr, ABI_TYPE cmpv, ABI_TYPE newv, MemOpIdx oi, uintptr_t retaddr) { @@ -188,12 +216,17 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(newv), + VALUE_HIGH(newv), + oi); return BSWAP(ret); } #if DATA_SIZE < 16 -ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) { DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, @@ -202,19 +235,29 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, ret = qatomic_xchg__nocheck(haddr, BSWAP(val)); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(val), + VALUE_HIGH(val), + oi); return BSWAP(ret); } #define GEN_ATOMIC_HELPER(X) \ -ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \ { \ DATA_TYPE *haddr, ret; \ haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr); \ ret = qatomic_##X(haddr, BSWAP(val)); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(ret), \ + VALUE_HIGH(ret), \ + VALUE_LOW(val), \ + VALUE_HIGH(val), \ + oi); \ return BSWAP(ret); \ } @@ -235,7 +278,7 @@ GEN_ATOMIC_HELPER(xor_fetch) * of CF_PARALLEL's value, we'll trace just a read and a write. */ #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ -ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \ { \ XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval; \ @@ -247,7 +290,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \ } while (ldo != ldn); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(old), \ + VALUE_HIGH(old), \ + VALUE_LOW(xval), \ + VALUE_HIGH(xval), \ + oi); \ return RET; \ } @@ -281,3 +329,5 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new) #undef SUFFIX #undef DATA_SIZE #undef SHIFT +#undef VALUE_LOW +#undef VALUE_HIGH diff --git a/accel/tcg/backend-ldst.h b/accel/tcg/backend-ldst.h new file mode 100644 index 0000000..9c3a407 --- /dev/null +++ b/accel/tcg/backend-ldst.h @@ -0,0 +1,41 @@ +/* + * Internal memory barrier helpers for QEMU (target agnostic) + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef ACCEL_TCG_BACKEND_LDST_H +#define ACCEL_TCG_BACKEND_LDST_H + +#include "tcg-target-mo.h" + +/** + * tcg_req_mo: + * @guest_mo: Guest default memory order + * @type: TCGBar + * + * Filter @type to the barrier that is required for the guest + * memory ordering vs the host memory ordering. A non-zero + * result indicates that some barrier is required. + */ +#define tcg_req_mo(guest_mo, type) \ + ((type) & guest_mo & ~TCG_TARGET_DEFAULT_MO) + +/** + * cpu_req_mo: + * @cpu: CPUState + * @type: TCGBar + * + * If tcg_req_mo indicates a barrier for @type is required + * for the guest memory model, issue a host memory barrier. + */ +#define cpu_req_mo(cpu, type) \ + do { \ + if (tcg_req_mo(cpu->cc->tcg_ops->guest_default_memory_order, type)) { \ + smp_mb(); \ + } \ + } while (0) + +#endif diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index bc9b1a2..c5c513f 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -18,13 +18,45 @@ */ #include "qemu/osdep.h" -#include "sysemu/cpus.h" -#include "sysemu/tcg.h" +#include "exec/log.h" +#include "system/tcg.h" #include "qemu/plugin.h" #include "internal-common.h" bool tcg_allowed; +bool tcg_cflags_has(CPUState *cpu, uint32_t flags) +{ + return cpu->tcg_cflags & flags; +} + +void tcg_cflags_set(CPUState *cpu, uint32_t flags) +{ + cpu->tcg_cflags |= flags; +} + +uint32_t curr_cflags(CPUState *cpu) +{ + uint32_t cflags = cpu->tcg_cflags; + + /* + * Record gdb single-step. We should be exiting the TB by raising + * EXCP_DEBUG, but to simplify other tests, disable chaining too. + * + * For singlestep and -d nochain, suppress goto_tb so that + * we can log -d cpu,exec after every TB. + */ + if (unlikely(cpu->singlestep_enabled)) { + cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1; + } else if (qatomic_read(&one_insn_per_tb)) { + cflags |= CF_NO_GOTO_TB | 1; + } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + cflags |= CF_NO_GOTO_TB; + } + + return cflags; +} + /* exit the current TB, but without causing any exception to be raised */ void cpu_loop_exit_noexc(CPUState *cpu) { diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 6711b58..713bdb2 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -21,29 +21,30 @@ #include "qemu/qemu-print.h" #include "qapi/error.h" #include "qapi/type-helpers.h" -#include "hw/core/tcg-cpu-ops.h" +#include "hw/core/cpu.h" +#include "accel/tcg/cpu-ops.h" +#include "accel/tcg/helper-retaddr.h" #include "trace.h" #include "disas/disas.h" -#include "exec/exec-all.h" +#include "exec/cpu-common.h" +#include "exec/cpu-interrupt.h" +#include "exec/page-protection.h" +#include "exec/mmap-lock.h" +#include "exec/translation-block.h" #include "tcg/tcg.h" #include "qemu/atomic.h" #include "qemu/rcu.h" #include "exec/log.h" #include "qemu/main-loop.h" -#include "sysemu/cpus.h" -#include "exec/cpu-all.h" -#include "sysemu/cpu-timers.h" +#include "exec/icount.h" #include "exec/replay-core.h" -#include "sysemu/tcg.h" +#include "system/tcg.h" #include "exec/helper-proto-common.h" #include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" +#include "tb-internal.h" #include "internal-common.h" -#include "internal-target.h" -#if defined(CONFIG_USER_ONLY) -#include "user-retaddr.h" -#endif /* -icount align implementation. */ @@ -147,45 +148,10 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu) } #endif /* CONFIG USER ONLY */ -bool tcg_cflags_has(CPUState *cpu, uint32_t flags) -{ - return cpu->tcg_cflags & flags; -} - -void tcg_cflags_set(CPUState *cpu, uint32_t flags) -{ - cpu->tcg_cflags |= flags; -} - -uint32_t curr_cflags(CPUState *cpu) -{ - uint32_t cflags = cpu->tcg_cflags; - - /* - * Record gdb single-step. We should be exiting the TB by raising - * EXCP_DEBUG, but to simplify other tests, disable chaining too. - * - * For singlestep and -d nochain, suppress goto_tb so that - * we can log -d cpu,exec after every TB. - */ - if (unlikely(cpu->singlestep_enabled)) { - cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1; - } else if (qatomic_read(&one_insn_per_tb)) { - cflags |= CF_NO_GOTO_TB | 1; - } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - cflags |= CF_NO_GOTO_TB; - } - - return cflags; -} - struct tb_desc { - vaddr pc; - uint64_t cs_base; + TCGTBCPUState s; CPUArchState *env; tb_page_addr_t page_addr0; - uint32_t flags; - uint32_t cflags; }; static bool tb_lookup_cmp(const void *p, const void *d) @@ -193,11 +159,11 @@ static bool tb_lookup_cmp(const void *p, const void *d) const TranslationBlock *tb = p; const struct tb_desc *desc = d; - if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) && + if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->s.pc) && tb_page_addr0(tb) == desc->page_addr0 && - tb->cs_base == desc->cs_base && - tb->flags == desc->flags && - tb_cflags(tb) == desc->cflags) { + tb->cs_base == desc->s.cs_base && + tb->flags == desc->s.flags && + tb_cflags(tb) == desc->s.cflags) { /* check next page if needed */ tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb); if (tb_phys_page1 == -1) { @@ -215,7 +181,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) * is different for the new TB. Therefore any exception raised * here by the faulting lookup is not premature. */ - virt_page1 = TARGET_PAGE_ALIGN(desc->pc); + virt_page1 = TARGET_PAGE_ALIGN(desc->s.pc); phys_page1 = get_page_addr_code(desc->env, virt_page1); if (tb_phys_page1 == phys_page1) { return true; @@ -225,59 +191,65 @@ static bool tb_lookup_cmp(const void *p, const void *d) return false; } -static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc, - uint64_t cs_base, uint32_t flags, - uint32_t cflags) +static TranslationBlock *tb_htable_lookup(CPUState *cpu, TCGTBCPUState s) { tb_page_addr_t phys_pc; struct tb_desc desc; uint32_t h; + desc.s = s; desc.env = cpu_env(cpu); - desc.cs_base = cs_base; - desc.flags = flags; - desc.cflags = cflags; - desc.pc = pc; - phys_pc = get_page_addr_code(desc.env, pc); + phys_pc = get_page_addr_code(desc.env, s.pc); if (phys_pc == -1) { return NULL; } desc.page_addr0 = phys_pc; - h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc), - flags, cs_base, cflags); + h = tb_hash_func(phys_pc, (s.cflags & CF_PCREL ? 0 : s.pc), + s.flags, s.cs_base, s.cflags); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } -/* Might cause an exception, so have a longjmp destination ready */ -static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc, - uint64_t cs_base, uint32_t flags, - uint32_t cflags) +/** + * tb_lookup: + * @cpu: CPU that will execute the returned translation block + * @pc: guest PC + * @cs_base: arch-specific value associated with translation block + * @flags: arch-specific translation block flags + * @cflags: CF_* flags + * + * Look up a translation block inside the QHT using @pc, @cs_base, @flags and + * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a + * longjmp destination ready. + * + * Returns: an existing translation block or NULL. + */ +static inline TranslationBlock *tb_lookup(CPUState *cpu, TCGTBCPUState s) { TranslationBlock *tb; CPUJumpCache *jc; uint32_t hash; /* we should never be trying to look up an INVALID tb */ - tcg_debug_assert(!(cflags & CF_INVALID)); + tcg_debug_assert(!(s.cflags & CF_INVALID)); - hash = tb_jmp_cache_hash_func(pc); + hash = tb_jmp_cache_hash_func(s.pc); jc = cpu->tb_jmp_cache; tb = qatomic_read(&jc->array[hash].tb); if (likely(tb && - jc->array[hash].pc == pc && - tb->cs_base == cs_base && - tb->flags == flags && - tb_cflags(tb) == cflags)) { + jc->array[hash].pc == s.pc && + tb->cs_base == s.cs_base && + tb->flags == s.flags && + tb_cflags(tb) == s.cflags)) { goto hit; } - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags); + tb = tb_htable_lookup(cpu, s); if (tb == NULL) { return NULL; } - jc->array[hash].pc = pc; + jc->array[hash].pc = s.pc; qatomic_set(&jc->array[hash].tb, tb); hit: @@ -285,7 +257,7 @@ hit: * As long as tb is not NULL, the contents are consistent. Therefore, * the virtual PC has to match for non-CF_PCREL translations. */ - assert((tb_cflags(tb) & CF_PCREL) || tb->pc == pc); + assert((tb_cflags(tb) & CF_PCREL) || tb->pc == s.pc); return tb; } @@ -302,14 +274,11 @@ static void log_cpu_exec(vaddr pc, CPUState *cpu, if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) { FILE *logfile = qemu_log_trylock(); if (logfile) { - int flags = 0; + int flags = CPU_DUMP_CCOP; if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) { flags |= CPU_DUMP_FPU; } -#if defined(TARGET_I386) - flags |= CPU_DUMP_CCOP; -#endif if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) { flags |= CPU_DUMP_VPU; } @@ -405,9 +374,6 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) { CPUState *cpu = env_cpu(env); TranslationBlock *tb; - vaddr pc; - uint64_t cs_base; - uint32_t flags, cflags; /* * By definition we've just finished a TB, so I/O is OK. @@ -417,25 +383,36 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) * The next TB, if we chain to it, will clear the flag again. */ cpu->neg.can_do_io = true; - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - cflags = curr_cflags(cpu); - if (check_for_breakpoints(cpu, pc, &cflags)) { + TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); + s.cflags = curr_cflags(cpu); + + if (check_for_breakpoints(cpu, s.pc, &s.cflags)) { cpu_loop_exit(cpu); } - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + tb = tb_lookup(cpu, s); if (tb == NULL) { return tcg_code_gen_epilogue; } if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { - log_cpu_exec(pc, cpu, tb); + log_cpu_exec(s.pc, cpu, tb); } return tb->tc.ptr; } +/* Return the current PC from CPU, which may be cached in TB. */ +static vaddr log_pc(CPUState *cpu, const TranslationBlock *tb) +{ + if (tb_cflags(tb) & CF_PCREL) { + return cpu->cc->get_pc(cpu); + } else { + return tb->pc; + } +} + /* Execute a TB, and fix up the CPU state afterwards if necessary */ /* * Disable CFI checks. @@ -570,11 +547,7 @@ static void cpu_exec_longjmp_cleanup(CPUState *cpu) void cpu_exec_step_atomic(CPUState *cpu) { - CPUArchState *env = cpu_env(cpu); TranslationBlock *tb; - vaddr pc; - uint64_t cs_base; - uint32_t flags, cflags; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -583,13 +556,13 @@ void cpu_exec_step_atomic(CPUState *cpu) g_assert(!cpu->running); cpu->running = true; - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); + s.cflags = curr_cflags(cpu); - cflags = curr_cflags(cpu); /* Execute in a serial context. */ - cflags &= ~CF_PARALLEL; + s.cflags &= ~CF_PARALLEL; /* After 1 insn, return and release the exclusive lock. */ - cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1; + s.cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1; /* * No need to check_for_breakpoints here. * We only arrive in cpu_exec_step_atomic after beginning execution @@ -597,16 +570,16 @@ void cpu_exec_step_atomic(CPUState *cpu) * Any breakpoint for this insn will have been recognized earlier. */ - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + tb = tb_lookup(cpu, s); if (tb == NULL) { mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); + tb = tb_gen_code(cpu, s); mmap_unlock(); } cpu_exec_enter(cpu); /* execute the generated code */ - trace_exec_tb(tb, pc); + trace_exec_tb(tb, s.pc); cpu_tb_exec(cpu, tb, &tb_exit); cpu_exec_exit(cpu); } else { @@ -674,7 +647,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, out_unlock_next: qemu_spin_unlock(&tb_next->jmp_lock); - return; } static inline bool cpu_handle_halt(CPUState *cpu) @@ -682,13 +654,8 @@ static inline bool cpu_handle_halt(CPUState *cpu) #ifndef CONFIG_USER_ONLY if (cpu->halted) { const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; - bool leave_halt; + bool leave_halt = tcg_ops->cpu_exec_halt(cpu); - if (tcg_ops->cpu_exec_halt) { - leave_halt = tcg_ops->cpu_exec_halt(cpu); - } else { - leave_halt = cpu_has_work(cpu); - } if (!leave_halt) { return true; } @@ -745,10 +712,10 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) * If user mode only, we simulate a fake exception which will be * handled outside the cpu execution loop. */ -#if defined(TARGET_I386) const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; - tcg_ops->fake_user_interrupt(cpu); -#endif /* TARGET_I386 */ + if (tcg_ops->fake_user_interrupt) { + tcg_ops->fake_user_interrupt(cpu); + } *ret = cpu->exception_index; cpu->exception_index = -1; return true; @@ -835,35 +802,23 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu->exception_index = EXCP_HLT; bql_unlock(); return true; - } -#if defined(TARGET_I386) - else if (interrupt_request & CPU_INTERRUPT_INIT) { - X86CPU *x86_cpu = X86_CPU(cpu); - CPUArchState *env = &x86_cpu->env; - replay_interrupt(); - cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); - do_cpu_init(x86_cpu); - cpu->exception_index = EXCP_HALTED; - bql_unlock(); - return true; - } -#else - else if (interrupt_request & CPU_INTERRUPT_RESET) { - replay_interrupt(); - cpu_reset(cpu); - bql_unlock(); - return true; - } -#endif /* !TARGET_I386 */ - /* The target hook has 3 exit conditions: - False when the interrupt isn't processed, - True when it is, and we should restart on a new TB, - and via longjmp via cpu_loop_exit. */ - else { + } else { const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; - if (tcg_ops->cpu_exec_interrupt && - tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) { + if (interrupt_request & CPU_INTERRUPT_RESET) { + replay_interrupt(); + tcg_ops->cpu_exec_reset(cpu); + bql_unlock(); + return true; + } + + /* + * The target hook has 3 exit conditions: + * False when the interrupt isn't processed, + * True when it is, and we should restart on a new TB, + * and via longjmp via cpu_loop_exit. + */ + if (tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) { if (!tcg_ops->need_replay_interrupt || tcg_ops->need_replay_interrupt(interrupt_request)) { replay_interrupt(); @@ -969,11 +924,8 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc) while (!cpu_handle_interrupt(cpu, &last_tb)) { TranslationBlock *tb; - vaddr pc; - uint64_t cs_base; - uint32_t flags, cflags; - - cpu_get_tb_cpu_state(cpu_env(cpu), &pc, &cs_base, &flags); + TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); + s.cflags = cpu->cflags_next_tb; /* * When requested, use an exact setting for cflags for the next @@ -982,33 +934,32 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc) * have CF_INVALID set, -1 is a convenient invalid value that * does not require tcg headers for cpu_common_reset. */ - cflags = cpu->cflags_next_tb; - if (cflags == -1) { - cflags = curr_cflags(cpu); + if (s.cflags == -1) { + s.cflags = curr_cflags(cpu); } else { cpu->cflags_next_tb = -1; } - if (check_for_breakpoints(cpu, pc, &cflags)) { + if (check_for_breakpoints(cpu, s.pc, &s.cflags)) { break; } - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + tb = tb_lookup(cpu, s); if (tb == NULL) { CPUJumpCache *jc; uint32_t h; mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); + tb = tb_gen_code(cpu, s); mmap_unlock(); /* * We add the TB in the virtual pc hash table * for the fast lookup */ - h = tb_jmp_cache_hash_func(pc); + h = tb_jmp_cache_hash_func(s.pc); jc = cpu->tb_jmp_cache; - jc->array[h].pc = pc; + jc->array[h].pc = s.pc; qatomic_set(&jc->array[h].tb, tb); } @@ -1028,7 +979,7 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc) tb_add_jump(last_tb, tb_exit, tb); } - cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit); + cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit); /* Try to align the host and virtual clocks if the guest is in advance */ @@ -1082,7 +1033,18 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp) static bool tcg_target_initialized; if (!tcg_target_initialized) { - cpu->cc->tcg_ops->initialize(); + /* Check mandatory TCGCPUOps handlers */ + const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; +#ifndef CONFIG_USER_ONLY + assert(tcg_ops->cpu_exec_halt); + assert(tcg_ops->cpu_exec_interrupt); + assert(tcg_ops->cpu_exec_reset); + assert(tcg_ops->pointer_wrap); +#endif /* !CONFIG_USER_ONLY */ + assert(tcg_ops->translate_code); + assert(tcg_ops->get_tb_cpu_state); + assert(tcg_ops->mmu_index); + tcg_ops->initialize(); tcg_target_initialized = true; } diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 117b516..87e14bd 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -19,15 +19,17 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" -#include "hw/core/tcg-cpu-ops.h" -#include "exec/exec-all.h" +#include "qemu/target-info.h" +#include "accel/tcg/cpu-ops.h" +#include "accel/tcg/iommu.h" +#include "accel/tcg/probe.h" #include "exec/page-protection.h" -#include "exec/memory.h" -#include "exec/cpu_ldst.h" +#include "system/memory.h" +#include "accel/tcg/cpu-ldst-common.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/cputlb.h" #include "exec/tb-flush.h" -#include "exec/memory-internal.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "exec/mmu-access-type.h" #include "exec/tlb-common.h" #include "exec/vaddr.h" @@ -35,18 +37,21 @@ #include "qemu/error-report.h" #include "exec/log.h" #include "exec/helper-proto-common.h" +#include "exec/tlb-flags.h" #include "qemu/atomic.h" #include "qemu/atomic128.h" -#include "exec/translate-all.h" +#include "tb-internal.h" #include "trace.h" #include "tb-hash.h" +#include "tb-internal.h" +#include "tlb-bounds.h" #include "internal-common.h" -#include "internal-target.h" #ifdef CONFIG_PLUGIN #include "qemu/plugin-memory.h" #endif #include "tcg/tcg-ldst.h" -#include "tcg/oversized-guest.h" +#include "backend-ldst.h" + /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -104,26 +109,15 @@ static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry, { /* Do not rearrange the CPUTLBEntry structure members. */ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != - MMU_DATA_LOAD * sizeof(uint64_t)); + MMU_DATA_LOAD * sizeof(uintptr_t)); QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) != - MMU_DATA_STORE * sizeof(uint64_t)); + MMU_DATA_STORE * sizeof(uintptr_t)); QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) != - MMU_INST_FETCH * sizeof(uint64_t)); + MMU_INST_FETCH * sizeof(uintptr_t)); -#if TARGET_LONG_BITS == 32 - /* Use qatomic_read, in case of addr_write; only care about low bits. */ - const uint32_t *ptr = (uint32_t *)&entry->addr_idx[access_type]; - ptr += HOST_BIG_ENDIAN; - return qatomic_read(ptr); -#else - const uint64_t *ptr = &entry->addr_idx[access_type]; -# if TCG_OVERSIZED_GUEST - return *ptr; -# else + const uintptr_t *ptr = &entry->addr_idx[access_type]; /* ofs might correspond to .addr_write, so use qatomic_read */ return qatomic_read(ptr); -# endif -#endif } static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry) @@ -779,19 +773,19 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, assert_cpu_is_self(cpu); + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx(cpu, idxmap); + return; + } /* * If all bits are significant, and len is small, * this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { + if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) { tlb_flush_page_by_mmuidx(cpu, addr, idxmap); return; } - /* If no page bits are significant, this devolves to tlb_flush. */ - if (bits < TARGET_PAGE_BITS) { - tlb_flush_by_mmuidx(cpu, idxmap); - return; - } /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; @@ -817,19 +811,19 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, TLBFlushRangeData d, *p; CPUState *dst_cpu; + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); + return; + } /* * If all bits are significant, and len is small, * this devolves to tlb_flush_page. */ - if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { + if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) { tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); return; } - /* If no page bits are significant, this devolves to tlb_flush. */ - if (bits < TARGET_PAGE_BITS) { - tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); - return; - } /* This should already be page aligned */ d.addr = addr & TARGET_PAGE_MASK; @@ -893,26 +887,17 @@ void tlb_unprotect_code(ram_addr_t ram_addr) * * Called with tlb_c.lock held. */ -static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, +static void tlb_reset_dirty_range_locked(CPUTLBEntryFull *full, CPUTLBEntry *ent, uintptr_t start, uintptr_t length) { - uintptr_t addr = tlb_entry->addr_write; - - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | - TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { - addr &= TARGET_PAGE_MASK; - addr += tlb_entry->addend; - if ((addr - start) < length) { -#if TARGET_LONG_BITS == 32 - uint32_t *ptr_write = (uint32_t *)&tlb_entry->addr_write; - ptr_write += HOST_BIG_ENDIAN; - qatomic_set(ptr_write, *ptr_write | TLB_NOTDIRTY); -#elif TCG_OVERSIZED_GUEST - tlb_entry->addr_write |= TLB_NOTDIRTY; -#else - qatomic_set(&tlb_entry->addr_write, - tlb_entry->addr_write | TLB_NOTDIRTY); -#endif + const uintptr_t addr = ent->addr_write; + int flags = addr | full->slow_flags[MMU_DATA_STORE]; + + flags &= TLB_INVALID_MASK | TLB_MMIO | TLB_DISCARD_WRITE | TLB_NOTDIRTY; + if (flags == 0) { + uintptr_t host = (addr & TARGET_PAGE_MASK) + ent->addend; + if ((host - start) < length) { + qatomic_set(&ent->addr_write, addr | TLB_NOTDIRTY); } } } @@ -931,23 +916,25 @@ static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) * We must take tlb_c.lock to avoid racing with another vCPU update. The only * thing actually updated is the target TLB entry ->addr_write flags. */ -void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) +void tlb_reset_dirty(CPUState *cpu, uintptr_t start, uintptr_t length) { int mmu_idx; qemu_spin_lock(&cpu->neg.tlb.c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx]; + CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx]; + unsigned int n = tlb_n_entries(fast); unsigned int i; - unsigned int n = tlb_n_entries(&cpu->neg.tlb.f[mmu_idx]); for (i = 0; i < n; i++) { - tlb_reset_dirty_range_locked(&cpu->neg.tlb.f[mmu_idx].table[i], - start1, length); + tlb_reset_dirty_range_locked(&desc->fulltlb[i], &fast->table[i], + start, length); } for (i = 0; i < CPU_VTLB_SIZE; i++) { - tlb_reset_dirty_range_locked(&cpu->neg.tlb.d[mmu_idx].vtable[i], - start1, length); + tlb_reset_dirty_range_locked(&desc->vfulltlb[i], &desc->vtable[i], + start, length); } } qemu_spin_unlock(&cpu->neg.tlb.c.lock); @@ -1199,7 +1186,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, hwaddr paddr, MemTxAttrs attrs, int prot, - int mmu_idx, uint64_t size) + int mmu_idx, vaddr size) { CPUTLBEntryFull full = { .phys_addr = paddr, @@ -1214,29 +1201,65 @@ void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, void tlb_set_page(CPUState *cpu, vaddr addr, hwaddr paddr, int prot, - int mmu_idx, uint64_t size) + int mmu_idx, vaddr size) { tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED, prot, mmu_idx, size); } +/** + * tlb_hit_page: return true if page aligned @addr is a hit against the + * TLB entry @tlb_addr + * + * @addr: virtual address to test (must be page aligned) + * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value) + */ +static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr) +{ + return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK)); +} + +/** + * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr + * + * @addr: virtual address to test (need not be page aligned) + * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value) + */ +static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr) +{ + return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK); +} + /* - * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the - * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must - * be discarded and looked up again (e.g. via tlb_entry()). + * Note: tlb_fill_align() can trigger a resize of the TLB. + * This means that all of the caller's prior references to the TLB table + * (e.g. CPUTLBEntry pointers) must be discarded and looked up again + * (e.g. via tlb_entry()). */ -static void tlb_fill(CPUState *cpu, vaddr addr, int size, - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) +static bool tlb_fill_align(CPUState *cpu, vaddr addr, MMUAccessType type, + int mmu_idx, MemOp memop, int size, + bool probe, uintptr_t ra) { - bool ok; + const TCGCPUOps *ops = cpu->cc->tcg_ops; + CPUTLBEntryFull full; - /* - * This is not a probe, so only valid return is success; failure - * should result in exception + longjmp to the cpu loop. - */ - ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size, - access_type, mmu_idx, false, retaddr); - assert(ok); + if (ops->tlb_fill_align) { + if (ops->tlb_fill_align(cpu, &full, addr, type, mmu_idx, + memop, size, probe, ra)) { + tlb_set_page_full(cpu, mmu_idx, addr, &full); + return true; + } + } else { + /* Legacy behaviour is alignment before paging. */ + if (addr & ((1u << memop_alignment_bits(memop)) - 1)) { + ops->do_unaligned_access(cpu, addr, type, mmu_idx, ra); + } + if (ops->tlb_fill(cpu, addr, size, type, mmu_idx, probe, ra)) { + return true; + } + } + assert(probe); + return false; } static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr, @@ -1319,7 +1342,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - tb_invalidate_phys_range_fast(ram_addr, size, retaddr); + tb_invalidate_phys_range_fast(cpu, ram_addr, size, retaddr); } /* @@ -1351,22 +1374,22 @@ static int probe_access_internal(CPUState *cpu, vaddr addr, if (!tlb_hit_page(tlb_addr, page_addr)) { if (!victim_tlb_hit(cpu, mmu_idx, index, access_type, page_addr)) { - if (!cpu->cc->tcg_ops->tlb_fill(cpu, addr, fault_size, access_type, - mmu_idx, nonfault, retaddr)) { + if (!tlb_fill_align(cpu, addr, access_type, mmu_idx, + 0, fault_size, nonfault, retaddr)) { /* Non-faulting page table read failed. */ *phost = NULL; *pfull = NULL; return TLB_INVALID_MASK; } - /* TLB resize via tlb_fill may have moved the entry. */ + /* TLB resize via tlb_fill_align may have moved the entry. */ index = tlb_index(cpu, mmu_idx, addr); entry = tlb_entry(cpu, mmu_idx, addr); /* * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately, - * to force the next access through tlb_fill. We've just - * called tlb_fill, so we know that this entry *is* valid. + * to force the next access through tlb_fill_align. We've just + * called tlb_fill_align, so we know that this entry *is* valid. */ flags &= ~TLB_INVALID_MASK; } @@ -1491,7 +1514,7 @@ void *probe_access(CPUArchState *env, vaddr addr, int size, return host; } -void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, +void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr, MMUAccessType access_type, int mmu_idx) { CPUTLBEntryFull *full; @@ -1607,16 +1630,17 @@ typedef struct MMULookupLocals { * mmu_lookup1: translate one page * @cpu: generic cpu state * @data: lookup parameters + * @memop: memory operation for the access, or 0 * @mmu_idx: virtual address context * @access_type: load/store/code * @ra: return address into tcg generated code, or 0 * * Resolve the translation for the one page at @data.addr, filling in * the rest of @data with the results. If the translation fails, - * tlb_fill will longjmp out. Return true if the softmmu tlb for + * tlb_fill_align will longjmp out. Return true if the softmmu tlb for * @mmu_idx may have resized. */ -static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, +static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop, int mmu_idx, MMUAccessType access_type, uintptr_t ra) { vaddr addr = data->addr; @@ -1631,7 +1655,8 @@ static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, if (!tlb_hit(tlb_addr, addr)) { if (!victim_tlb_hit(cpu, mmu_idx, index, access_type, addr & TARGET_PAGE_MASK)) { - tlb_fill(cpu, addr, data->size, access_type, mmu_idx, ra); + tlb_fill_align(cpu, addr, access_type, mmu_idx, + memop, data->size, false, ra); maybe_resized = true; index = tlb_index(cpu, mmu_idx, addr); entry = tlb_entry(cpu, mmu_idx, addr); @@ -1643,6 +1668,25 @@ static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW); flags |= full->slow_flags[access_type]; + if (likely(!maybe_resized)) { + /* Alignment has not been checked by tlb_fill_align. */ + int a_bits = memop_alignment_bits(memop); + + /* + * This alignment check differs from the one above, in that this is + * based on the atomicity of the operation. The intended use case is + * the ARM memory type field of each PTE, where access to pages with + * Device memory type require alignment. + */ + if (unlikely(flags & TLB_CHECK_ALIGNED)) { + int at_bits = memop_atomicity_bits(memop); + a_bits = MAX(a_bits, at_bits); + } + if (unlikely(addr & ((1 << a_bits) - 1))) { + cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra); + } + } + data->full = full; data->flags = flags; /* Compute haddr speculatively; depending on flags it might be invalid. */ @@ -1699,7 +1743,6 @@ static void mmu_watch_or_dirty(CPUState *cpu, MMULookupPageData *data, static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType type, MMULookupLocals *l) { - unsigned a_bits; bool crosspage; int flags; @@ -1708,12 +1751,6 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, tcg_debug_assert(l->mmu_idx < NB_MMU_MODES); - /* Handle CPU specific unaligned behaviour */ - a_bits = get_alignment_bits(l->memop); - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(cpu, addr, type, l->mmu_idx, ra); - } - l->page[0].addr = addr; l->page[0].size = memop_size(l->memop); l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK; @@ -1721,7 +1758,7 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK; if (likely(!crosspage)) { - mmu_lookup1(cpu, &l->page[0], l->mmu_idx, type, ra); + mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra); flags = l->page[0].flags; if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) { @@ -1736,12 +1773,15 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, l->page[1].size = l->page[0].size - size0; l->page[0].size = size0; + l->page[1].addr = cpu->cc->tcg_ops->pointer_wrap(cpu, l->mmu_idx, + l->page[1].addr, addr); + /* * Lookup both pages, recognizing exceptions from either. If the * second lookup potentially resized, refresh first CPUTLBEntryFull. */ - mmu_lookup1(cpu, &l->page[0], l->mmu_idx, type, ra); - if (mmu_lookup1(cpu, &l->page[1], l->mmu_idx, type, ra)) { + mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra); + if (mmu_lookup1(cpu, &l->page[1], 0, l->mmu_idx, type, ra)) { uintptr_t index = tlb_index(cpu, l->mmu_idx, addr); l->page[0].full = &cpu->neg.tlb.d[l->mmu_idx].fulltlb[index]; } @@ -1760,31 +1800,6 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, tcg_debug_assert((flags & TLB_BSWAP) == 0); } - /* - * This alignment check differs from the one above, in that this is - * based on the atomicity of the operation. The intended use case is - * the ARM memory type field of each PTE, where access to pages with - * Device memory type require alignment. - */ - if (unlikely(flags & TLB_CHECK_ALIGNED)) { - MemOp size = l->memop & MO_SIZE; - - switch (l->memop & MO_ATOM_MASK) { - case MO_ATOM_NONE: - size = MO_8; - break; - case MO_ATOM_IFALIGN_PAIR: - case MO_ATOM_WITHIN16_PAIR: - size = size ? size - 1 : 0; - break; - default: - break; - } - if (addr & ((1 << size) - 1)) { - cpu_unaligned_access(cpu, addr, type, l->mmu_idx, ra); - } - } - return crosspage; } @@ -1797,34 +1812,18 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, { uintptr_t mmu_idx = get_mmuidx(oi); MemOp mop = get_memop(oi); - int a_bits = get_alignment_bits(mop); uintptr_t index; CPUTLBEntry *tlbe; vaddr tlb_addr; void *hostaddr; CPUTLBEntryFull *full; + bool did_tlb_fill = false; tcg_debug_assert(mmu_idx < NB_MMU_MODES); /* Adjust the given return address. */ retaddr -= GETPC_ADJ; - /* Enforce guest required alignment. */ - if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { - /* ??? Maybe indicate atomic op to cpu_unaligned_access */ - cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* Enforce qemu required alignment. */ - if (unlikely(addr & (size - 1))) { - /* We get here if guest alignment was not requested, - or was not enforced by cpu_unaligned_access above. - We might widen the access and emulate, but for now - mark an exception and exit the cpu loop. */ - goto stop_the_world; - } - index = tlb_index(cpu, mmu_idx, addr); tlbe = tlb_entry(cpu, mmu_idx, addr); @@ -1833,8 +1832,9 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, if (!tlb_hit(tlb_addr, addr)) { if (!victim_tlb_hit(cpu, mmu_idx, index, MMU_DATA_STORE, addr & TARGET_PAGE_MASK)) { - tlb_fill(cpu, addr, size, - MMU_DATA_STORE, mmu_idx, retaddr); + tlb_fill_align(cpu, addr, MMU_DATA_STORE, mmu_idx, + mop, size, false, retaddr); + did_tlb_fill = true; index = tlb_index(cpu, mmu_idx, addr); tlbe = tlb_entry(cpu, mmu_idx, addr); } @@ -1848,17 +1848,38 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, * but addr_read will only be -1 if PAGE_READ was unset. */ if (unlikely(tlbe->addr_read == -1)) { - tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); + tlb_fill_align(cpu, addr, MMU_DATA_LOAD, mmu_idx, + 0, size, false, retaddr); /* * Since we don't support reads and writes to different * addresses, and we do have the proper page loaded for - * write, this shouldn't ever return. But just in case, - * handle via stop-the-world. + * write, this shouldn't ever return. + */ + g_assert_not_reached(); + } + + /* Enforce guest required alignment, if not handled by tlb_fill_align. */ + if (!did_tlb_fill && (addr & ((1 << memop_alignment_bits(mop)) - 1))) { + cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr); + } + + /* Enforce qemu required alignment. */ + if (unlikely(addr & (size - 1))) { + /* + * We get here if guest alignment was not requested, or was not + * enforced by cpu_unaligned_access or tlb_fill_align above. + * We might widen the access and emulate, but for now + * mark an exception and exit the cpu loop. */ goto stop_the_world; } - /* Collect tlb flags for read. */ + + /* Finish collecting tlb flags for both read and write. */ + full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; tlb_addr |= tlbe->addr_read; + tlb_addr &= TLB_FLAGS_MASK & ~TLB_FORCE_SLOW; + tlb_addr |= full->slow_flags[MMU_DATA_STORE]; + tlb_addr |= full->slow_flags[MMU_DATA_LOAD]; /* Notice an IO access or a needs-MMU-lookup access */ if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) { @@ -1868,13 +1889,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, } hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(cpu, addr, size, full, retaddr); } - if (unlikely(tlb_addr & TLB_FORCE_SLOW)) { + if (unlikely(tlb_addr & TLB_WATCHPOINT)) { int wp_flags = 0; if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) { @@ -1883,10 +1903,8 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) { wp_flags |= BP_MEM_READ; } - if (wp_flags) { - cpu_check_watchpoint(cpu, addr, size, - full->attrs, wp_flags, retaddr); - } + cpu_check_watchpoint(cpu, addr, size, + full->attrs, wp_flags, retaddr); } return hostaddr; @@ -2313,7 +2331,7 @@ static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, MMULookupLocals l; bool crosspage; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l); tcg_debug_assert(!crosspage); @@ -2328,7 +2346,7 @@ static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, uint16_t ret; uint8_t a, b; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_2(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2352,7 +2370,7 @@ static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, bool crosspage; uint32_t ret; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_4(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2373,7 +2391,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, bool crosspage; uint64_t ret; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_8(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2396,7 +2414,7 @@ static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr, Int128 ret; int first; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_LOAD, &l); if (likely(!crosspage)) { if (unlikely(l.page[0].flags & TLB_MMIO)) { @@ -2724,7 +2742,7 @@ static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val, MMULookupLocals l; bool crosspage; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l); tcg_debug_assert(!crosspage); @@ -2738,7 +2756,7 @@ static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val, bool crosspage; uint8_t a, b; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_2(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2760,7 +2778,7 @@ static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val, MMULookupLocals l; bool crosspage; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_4(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2781,7 +2799,7 @@ static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val, MMULookupLocals l; bool crosspage; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_8(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2804,7 +2822,7 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, uint64_t a, b; int first; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { if (unlikely(l.page[0].flags & TLB_MMIO)) { @@ -2889,54 +2907,45 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, /* Code access functions. */ -uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) -{ - CPUState *cs = env_cpu(env); - MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(cs, true)); - return do_ld1_mmu(cs, addr, oi, 0, MMU_INST_FETCH); -} - -uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) -{ - CPUState *cs = env_cpu(env); - MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(cs, true)); - return do_ld2_mmu(cs, addr, oi, 0, MMU_INST_FETCH); -} - -uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) -{ - CPUState *cs = env_cpu(env); - MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(cs, true)); - return do_ld4_mmu(cs, addr, oi, 0, MMU_INST_FETCH); -} - -uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) -{ - CPUState *cs = env_cpu(env); - MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(cs, true)); - return do_ld8_mmu(cs, addr, oi, 0, MMU_INST_FETCH); -} - -uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, +uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t retaddr) { return do_ld1_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } -uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, +uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t retaddr) { return do_ld2_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } -uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, +uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t retaddr) { return do_ld4_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } -uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, +uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t retaddr) { return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } + +/* + * Common pointer_wrap implementations. + */ + +/* + * To be used for strict alignment targets. + * Because no accesses are unaligned, no accesses wrap either. + */ +vaddr cpu_pointer_wrap_notreached(CPUState *cs, int idx, vaddr res, vaddr base) +{ + g_assert_not_reached(); +} + +/* To be used for strict 32-bit targets. */ +vaddr cpu_pointer_wrap_uint32(CPUState *cs, int idx, vaddr res, vaddr base) +{ + return (uint32_t)res; +} diff --git a/accel/tcg/icount-common.c b/accel/tcg/icount-common.c index 8d3d3a7..d647117 100644 --- a/accel/tcg/icount-common.c +++ b/accel/tcg/icount-common.c @@ -27,17 +27,16 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "sysemu/cpus.h" -#include "sysemu/qtest.h" +#include "system/cpus.h" +#include "system/qtest.h" #include "qemu/main-loop.h" #include "qemu/option.h" #include "qemu/seqlock.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" +#include "system/replay.h" +#include "system/runstate.h" #include "hw/core/cpu.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/cpu-throttle.h" -#include "sysemu/cpu-timers-internal.h" +#include "exec/icount.h" +#include "system/cpu-timers-internal.h" /* * ICOUNT: Instruction Counter @@ -49,6 +48,8 @@ static bool icount_sleep = true; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ #define MAX_ICOUNT_SHIFT 10 +bool icount_align_option; + /* Do not count executed instructions */ ICountMode use_icount = ICOUNT_DISABLED; diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index a8fc3db..1dbc45d 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -11,12 +11,16 @@ #include "exec/cpu-common.h" #include "exec/translation-block.h" +#include "exec/mmap-lock.h" +#include "accel/tcg/tb-cpu-state.h" extern int64_t max_delay; extern int64_t max_advance; extern bool one_insn_per_tb; +extern bool icount_align_option; + /* * Return true if CS is not running in parallel with other cpus, either * because there are no other cpus or we are within an exclusive context. @@ -43,9 +47,7 @@ static inline bool cpu_plugin_mem_cbs_enabled(const CPUState *cpu) #endif } -TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc, - uint64_t cs_base, uint32_t flags, - int cflags); +TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s); void page_init(void); void tb_htable_init(void); void tb_reset_jump(TranslationBlock *tb, int n); @@ -53,7 +55,88 @@ TranslationBlock *tb_link_page(TranslationBlock *tb); void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t host_pc); +/** + * tlb_init - initialize a CPU's TLB + * @cpu: CPU whose TLB should be initialized + */ +void tlb_init(CPUState *cpu); +/** + * tlb_destroy - destroy a CPU's TLB + * @cpu: CPU whose TLB should be destroyed + */ +void tlb_destroy(CPUState *cpu); + bool tcg_exec_realizefn(CPUState *cpu, Error **errp); void tcg_exec_unrealizefn(CPUState *cpu); +/* current cflags for hashing/comparison */ +uint32_t curr_cflags(CPUState *cpu); + +void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr); + +/** + * get_page_addr_code_hostp() + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * See get_page_addr_code() (full-system version) for documentation on the + * return value. + * + * Sets *@hostp (when @hostp is non-NULL) as follows. + * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp + * to the host address where @addr's content is kept. + * + * Note: this function can trigger an exception. + */ +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, + void **hostp); + +/** + * get_page_addr_code() + * @env: CPUArchState + * @addr: guest virtual address of guest code + * + * If we cannot translate and execute from the entire RAM page, or if + * the region is not backed by RAM, returns -1. Otherwise, returns the + * ram_addr_t corresponding to the guest code at @addr. + * + * Note: this function can trigger an exception. + */ +static inline tb_page_addr_t get_page_addr_code(CPUArchState *env, + vaddr addr) +{ + return get_page_addr_code_hostp(env, addr, NULL); +} + +/* + * Access to the various translations structures need to be serialised + * via locks for consistency. In user-mode emulation access to the + * memory related structures are protected with mmap_lock. + * In !user-mode we use per-page locks. + */ +#ifdef CONFIG_USER_ONLY +#define assert_memory_lock() tcg_debug_assert(have_mmap_lock()) +#else +#define assert_memory_lock() +#endif + +#if defined(CONFIG_SOFTMMU) && defined(CONFIG_DEBUG_TCG) +void assert_no_pages_locked(void); +#else +static inline void assert_no_pages_locked(void) { } +#endif + +#ifdef CONFIG_USER_ONLY +static inline void page_table_config_init(void) { } +#else +void page_table_config_init(void); +#endif + +#ifndef CONFIG_USER_ONLY +G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); +#endif /* CONFIG_USER_ONLY */ + +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); +void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); + #endif diff --git a/accel/tcg/internal-target.h b/accel/tcg/internal-target.h deleted file mode 100644 index fe10972..0000000 --- a/accel/tcg/internal-target.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Internal execution defines for qemu (target specific) - * - * Copyright (c) 2003 Fabrice Bellard - * - * SPDX-License-Identifier: LGPL-2.1-or-later - */ - -#ifndef ACCEL_TCG_INTERNAL_TARGET_H -#define ACCEL_TCG_INTERNAL_TARGET_H - -#include "exec/exec-all.h" -#include "exec/translate-all.h" - -/* - * Access to the various translations structures need to be serialised - * via locks for consistency. In user-mode emulation access to the - * memory related structures are protected with mmap_lock. - * In !user-mode we use per-page locks. - */ -#ifdef CONFIG_USER_ONLY -#define assert_memory_lock() tcg_debug_assert(have_mmap_lock()) -#else -#define assert_memory_lock() -#endif - -#if defined(CONFIG_SOFTMMU) && defined(CONFIG_DEBUG_TCG) -void assert_no_pages_locked(void); -#else -static inline void assert_no_pages_locked(void) { } -#endif - -#ifdef CONFIG_USER_ONLY -static inline void page_table_config_init(void) { } -#else -void page_table_config_init(void); -#endif - -#ifdef CONFIG_USER_ONLY -/* - * For user-only, page_protect sets the page read-only. - * Since most execution is already on read-only pages, and we'd need to - * account for other TBs on the same page, defer undoing any page protection - * until we receive the write fault. - */ -static inline void tb_lock_page0(tb_page_addr_t p0) -{ - page_protect(p0); -} - -static inline void tb_lock_page1(tb_page_addr_t p0, tb_page_addr_t p1) -{ - page_protect(p1); -} - -static inline void tb_unlock_page1(tb_page_addr_t p0, tb_page_addr_t p1) { } -static inline void tb_unlock_pages(TranslationBlock *tb) { } -#else -void tb_lock_page0(tb_page_addr_t); -void tb_lock_page1(tb_page_addr_t, tb_page_addr_t); -void tb_unlock_page1(tb_page_addr_t, tb_page_addr_t); -void tb_unlock_pages(TranslationBlock *); -#endif - -#ifdef CONFIG_SOFTMMU -void tb_invalidate_phys_range_fast(ram_addr_t ram_addr, - unsigned size, - uintptr_t retaddr); -G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); -#endif /* CONFIG_SOFTMMU */ - -bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc); - -/* Return the current PC from CPU, which may be cached in TB. */ -static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb) -{ - if (tb_cflags(tb) & CF_PCREL) { - return cpu->cc->get_pc(cpu); - } else { - return tb->pc; - } -} - -/** - * tcg_req_mo: - * @type: TCGBar - * - * Filter @type to the barrier that is required for the guest - * memory ordering vs the host memory ordering. A non-zero - * result indicates that some barrier is required. - * - * If TCG_GUEST_DEFAULT_MO is not defined, assume that the - * guest requires strict ordering. - * - * This is a macro so that it's constant even without optimization. - */ -#ifdef TCG_GUEST_DEFAULT_MO -# define tcg_req_mo(type) \ - ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) -#else -# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO) -#endif - -/** - * cpu_req_mo: - * @type: TCGBar - * - * If tcg_req_mo indicates a barrier for @type is required - * for the guest memory model, issue a host memory barrier. - */ -#define cpu_req_mo(type) \ - do { \ - if (tcg_req_mo(type)) { \ - smp_mb(); \ - } \ - } while (0) - -#endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index 134da3c..c735add 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -168,6 +168,7 @@ static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv) #endif /* Ultimate fallback: re-execute in serial context. */ + trace_load_atom8_or_exit_fallback(ra); cpu_loop_exit_atomic(cpu, ra); } @@ -212,6 +213,7 @@ static Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv) } /* Ultimate fallback: re-execute in serial context. */ + trace_load_atom16_or_exit_fallback(ra); cpu_loop_exit_atomic(cpu, ra); } @@ -519,6 +521,7 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra, if (HAVE_al8) { return load_atom_extract_al8x2(pv); } + trace_load_atom8_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); default: g_assert_not_reached(); @@ -563,6 +566,7 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra, break; case MO_64: if (!HAVE_al8) { + trace_load_atom16_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); } a = load_atomic8(pv); @@ -570,6 +574,7 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra, break; case -MO_64: if (!HAVE_al8) { + trace_load_atom16_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); } a = load_atom_extract_al8x2(pv); @@ -897,6 +902,7 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra, g_assert_not_reached(); } + trace_store_atom2_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); } @@ -961,6 +967,7 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra, return; } } + trace_store_atom4_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); default: g_assert_not_reached(); @@ -1029,6 +1036,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra, default: g_assert_not_reached(); } + trace_store_atom8_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); } @@ -1107,5 +1115,6 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra, default: g_assert_not_reached(); } + trace_store_atom16_fallback(memop, ra); cpu_loop_exit_atomic(cpu, ra); } diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc index 87ceb95..57f3e06 100644 --- a/accel/tcg/ldst_common.c.inc +++ b/accel/tcg/ldst_common.c.inc @@ -123,64 +123,69 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi) * Load helpers for cpu_ldst.h */ -static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) +static void plugin_load_cb(CPUArchState *env, vaddr addr, + uint64_t value_low, + uint64_t value_high, + MemOpIdx oi) { if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + value_low, value_high, + oi, QEMU_PLUGIN_MEM_R); } } -uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) +uint8_t cpu_ldb_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { uint8_t ret; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB); ret = do_ld1_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } -uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr, +uint16_t cpu_ldw_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { uint16_t ret; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16); ret = do_ld2_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } -uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr, +uint32_t cpu_ldl_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { uint32_t ret; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32); ret = do_ld4_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } -uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr, +uint64_t cpu_ldq_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { uint64_t ret; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64); ret = do_ld8_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } -Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, +Int128 cpu_ld16_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { Int128 ret; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128); ret = do_ld16_mmu(env_cpu(env), addr, oi, ra); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, int128_getlo(ret), int128_gethi(ret), oi); return ret; } @@ -188,363 +193,53 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, * Store helpers for cpu_ldst.h */ -static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) +static void plugin_store_cb(CPUArchState *env, vaddr addr, + uint64_t value_low, + uint64_t value_high, + MemOpIdx oi) { if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + value_low, value_high, + oi, QEMU_PLUGIN_MEM_W); } } -void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, +void cpu_stb_mmu(CPUArchState *env, vaddr addr, uint8_t val, MemOpIdx oi, uintptr_t retaddr) { helper_stb_mmu(env, addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } -void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, +void cpu_stw_mmu(CPUArchState *env, vaddr addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16); do_st2_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } -void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, +void cpu_stl_mmu(CPUArchState *env, vaddr addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32); do_st4_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } -void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, +void cpu_stq_mmu(CPUArchState *env, vaddr addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64); do_st8_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } -void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, +void cpu_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, MemOpIdx oi, uintptr_t retaddr) { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128); do_st16_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); -} - -/* - * Wrappers of the above - */ - -uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - return cpu_ldb_mmu(env, addr, oi, ra); -} - -int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra); -} - -uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); - return cpu_ldw_mmu(env, addr, oi, ra); -} - -int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra); -} - -uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); - return cpu_ldl_mmu(env, addr, oi, ra); -} - -uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx); - return cpu_ldq_mmu(env, addr, oi, ra); -} - -uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); - return cpu_ldw_mmu(env, addr, oi, ra); -} - -int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra); -} - -uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); - return cpu_ldl_mmu(env, addr, oi, ra); -} - -uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx); - return cpu_ldq_mmu(env, addr, oi, ra); -} - -void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - cpu_stb_mmu(env, addr, val, oi, ra); -} - -void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx); - cpu_stw_mmu(env, addr, val, oi, ra); -} - -void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx); - cpu_stl_mmu(env, addr, val, oi, ra); -} - -void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx); - cpu_stq_mmu(env, addr, val, oi, ra); -} - -void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx); - cpu_stw_mmu(env, addr, val, oi, ra); -} - -void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx); - cpu_stl_mmu(env, addr, val, oi, ra); -} - -void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t ra) -{ - MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx); - cpu_stq_mmu(env, addr, val, oi, ra); -} - -/*--------------------------*/ - -uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_ldub_mmuidx_ra(env, addr, mmu_index, ra); -} - -int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - return (int8_t)cpu_ldub_data_ra(env, addr, ra); -} - -uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_lduw_be_mmuidx_ra(env, addr, mmu_index, ra); -} - -int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - return (int16_t)cpu_lduw_be_data_ra(env, addr, ra); -} - -uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_ldl_be_mmuidx_ra(env, addr, mmu_index, ra); -} - -uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_ldq_be_mmuidx_ra(env, addr, mmu_index, ra); -} - -uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_lduw_le_mmuidx_ra(env, addr, mmu_index, ra); -} - -int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - return (int16_t)cpu_lduw_le_data_ra(env, addr, ra); -} - -uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_ldl_le_mmuidx_ra(env, addr, mmu_index, ra); -} - -uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - return cpu_ldq_le_mmuidx_ra(env, addr, mmu_index, ra); -} - -void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stb_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stw_be_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stl_be_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr, - uint64_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stq_be_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stw_le_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr, - uint32_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stl_le_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr, - uint64_t val, uintptr_t ra) -{ - int mmu_index = cpu_mmu_index(env_cpu(env), false); - cpu_stq_le_mmuidx_ra(env, addr, val, mmu_index, ra); -} - -/*--------------------------*/ - -uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_ldub_data_ra(env, addr, 0); -} - -int cpu_ldsb_data(CPUArchState *env, abi_ptr addr) -{ - return (int8_t)cpu_ldub_data(env, addr); -} - -uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_lduw_be_data_ra(env, addr, 0); -} - -int cpu_ldsw_be_data(CPUArchState *env, abi_ptr addr) -{ - return (int16_t)cpu_lduw_be_data(env, addr); -} - -uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_ldl_be_data_ra(env, addr, 0); -} - -uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_ldq_be_data_ra(env, addr, 0); -} - -uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_lduw_le_data_ra(env, addr, 0); -} - -int cpu_ldsw_le_data(CPUArchState *env, abi_ptr addr) -{ - return (int16_t)cpu_lduw_le_data(env, addr); -} - -uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_ldl_le_data_ra(env, addr, 0); -} - -uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr addr) -{ - return cpu_ldq_le_data_ra(env, addr, 0); -} - -void cpu_stb_data(CPUArchState *env, abi_ptr addr, uint32_t val) -{ - cpu_stb_data_ra(env, addr, val, 0); -} - -void cpu_stw_be_data(CPUArchState *env, abi_ptr addr, uint32_t val) -{ - cpu_stw_be_data_ra(env, addr, val, 0); -} - -void cpu_stl_be_data(CPUArchState *env, abi_ptr addr, uint32_t val) -{ - cpu_stl_be_data_ra(env, addr, val, 0); -} - -void cpu_stq_be_data(CPUArchState *env, abi_ptr addr, uint64_t val) -{ - cpu_stq_be_data_ra(env, addr, val, 0); -} - -void cpu_stw_le_data(CPUArchState *env, abi_ptr addr, uint32_t val) -{ - cpu_stw_le_data_ra(env, addr, val, 0); -} - -void cpu_stl_le_data(CPUArchState *env, abi_ptr addr, uint32_t val) -{ - cpu_stl_le_data_ra(env, addr, val, 0); -} - -void cpu_stq_le_data(CPUArchState *env, abi_ptr addr, uint64_t val) -{ - cpu_stq_le_data_ra(env, addr, val, 0); + plugin_store_cb(env, addr, int128_getlo(val), int128_gethi(val), oi); } diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index aef80de..575e92b 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -1,36 +1,38 @@ -common_ss.add(when: 'CONFIG_TCG', if_true: files( - 'cpu-exec-common.c', -)) -tcg_specific_ss = ss.source_set() -tcg_specific_ss.add(files( - 'tcg-all.c', +if not have_tcg + subdir_done() +endif + +tcg_ss = ss.source_set() + +tcg_ss.add(files( 'cpu-exec.c', - 'tb-maint.c', - 'tcg-runtime-gvec.c', + 'cpu-exec-common.c', 'tcg-runtime.c', + 'tcg-runtime-gvec.c', + 'tb-maint.c', + 'tcg-all.c', 'translate-all.c', 'translator.c', )) -tcg_specific_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c')) -tcg_specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c')) if get_option('plugins') - tcg_specific_ss.add(files('plugin-gen.c')) + tcg_ss.add(files('plugin-gen.c')) endif -specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_specific_ss) -specific_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files( - 'cputlb.c', - 'watchpoint.c', +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) + +user_ss.add(files( + 'user-exec.c', + 'user-exec-stub.c', )) -system_ss.add(when: ['CONFIG_TCG'], if_true: files( +system_ss.add(files( + 'cputlb.c', 'icount-common.c', 'monitor.c', -)) - -tcg_module_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files( 'tcg-accel-ops.c', - 'tcg-accel-ops-mttcg.c', 'tcg-accel-ops-icount.c', + 'tcg-accel-ops-mttcg.c', 'tcg-accel-ops-rr.c', + 'watchpoint.c', )) diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index 093efe9..1c182b6 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -13,9 +13,9 @@ #include "qapi/type-helpers.h" #include "qapi/qapi-commands-machine.h" #include "monitor/monitor.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/tcg.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "system/tcg.h" #include "tcg/tcg.h" #include "internal-common.h" #include "tb-context.h" diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index b6bae32..c1da753 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -22,13 +22,12 @@ #include "qemu/osdep.h" #include "qemu/plugin.h" #include "qemu/log.h" -#include "cpu.h" #include "tcg/tcg.h" #include "tcg/tcg-temp-internal.h" -#include "tcg/tcg-op.h" -#include "exec/exec-all.h" +#include "tcg/tcg-op-common.h" #include "exec/plugin-gen.h" #include "exec/translator.h" +#include "exec/translation-block.h" enum plugin_gen_from { PLUGIN_GEN_FROM_TB, @@ -85,27 +84,33 @@ static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb, len = insn->mem_cbs->len; arr = g_array_sized_new(false, false, sizeof(struct qemu_plugin_dyn_cb), len); - memcpy(arr->data, insn->mem_cbs->data, - len * sizeof(struct qemu_plugin_dyn_cb)); + g_array_append_vals(arr, insn->mem_cbs->data, len); qemu_plugin_add_dyn_cb_arr(arr); tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env, - offsetof(CPUState, neg.plugin_mem_cbs) - - offsetof(ArchCPU, env)); + offsetof(CPUState, neg.plugin_mem_cbs) - sizeof(CPUState)); } static void gen_disable_mem_helper(void) { tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env, - offsetof(CPUState, neg.plugin_mem_cbs) - - offsetof(ArchCPU, env)); + offsetof(CPUState, neg.plugin_mem_cbs) - sizeof(CPUState)); } static TCGv_i32 gen_cpu_index(void) { + /* + * Optimize when we run with a single vcpu. All values using cpu_index, + * including scoreboard index, will be optimized out. + * User-mode calls tb_flush when setting this flag. In system-mode, all + * vcpus are created before generating code. + */ + if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) { + return tcg_constant_i32(current_cpu->cpu_index); + } TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); tcg_gen_ld_i32(cpu_index, tcg_env, - -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); + offsetof(CPUState, cpu_index) - sizeof(CPUState)); return cpu_index; } @@ -252,7 +257,6 @@ static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb, break; default: g_assert_not_reached(); - break; } } @@ -277,7 +281,7 @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) * that might be live within the existing opcode stream. * The simplest solution is to release them all and create new. */ - memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); + tcg_temp_ebb_reset_freed(tcg_ctx); QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { switch (op->opc) { @@ -469,4 +473,8 @@ void plugin_gen_tb_end(CPUState *cpu, size_t num_insns) /* inject the instrumentation at the appropriate places */ plugin_gen_inject(ptb); + + /* reset plugin translation state (plugin_tb is reused between blocks) */ + tcg_ctx->plugin_db = NULL; + tcg_ctx->plugin_insn = NULL; } diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index a0c61f2..f7b159f 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -20,8 +20,9 @@ #ifndef EXEC_TB_HASH_H #define EXEC_TB_HASH_H -#include "exec/cpu-defs.h" -#include "exec/exec-all.h" +#include "exec/vaddr.h" +#include "exec/target_page.h" +#include "exec/translation-block.h" #include "qemu/xxhash.h" #include "tb-jmp-cache.h" diff --git a/accel/tcg/tb-internal.h b/accel/tcg/tb-internal.h new file mode 100644 index 0000000..40439f0 --- /dev/null +++ b/accel/tcg/tb-internal.h @@ -0,0 +1,55 @@ +/* + * TranslationBlock internal declarations (target specific) + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef ACCEL_TCG_TB_INTERNAL_TARGET_H +#define ACCEL_TCG_TB_INTERNAL_TARGET_H + +#include "exec/translation-block.h" + +/* + * The true return address will often point to a host insn that is part of + * the next translated guest insn. Adjust the address backward to point to + * the middle of the call insn. Subtracting one would do the job except for + * several compressed mode architectures (arm, mips) which set the low bit + * to indicate the compressed mode; subtracting two works around that. It + * is also the case that there are no host isas that contain a call insn + * smaller than 4 bytes, so we don't worry about special-casing this. + */ +#define GETPC_ADJ 2 + +void tb_lock_page0(tb_page_addr_t); + +#ifdef CONFIG_USER_ONLY +/* + * For user-only, page_protect sets the page read-only. + * Since most execution is already on read-only pages, and we'd need to + * account for other TBs on the same page, defer undoing any page protection + * until we receive the write fault. + */ +static inline void tb_lock_page1(tb_page_addr_t p0, tb_page_addr_t p1) +{ + tb_lock_page0(p1); +} + +static inline void tb_unlock_page1(tb_page_addr_t p0, tb_page_addr_t p1) { } +static inline void tb_unlock_pages(TranslationBlock *tb) { } +#else +void tb_lock_page1(tb_page_addr_t, tb_page_addr_t); +void tb_unlock_page1(tb_page_addr_t, tb_page_addr_t); +void tb_unlock_pages(TranslationBlock *); +#endif + +#ifdef CONFIG_SOFTMMU +void tb_invalidate_phys_range_fast(CPUState *cpu, ram_addr_t ram_addr, + unsigned size, uintptr_t retaddr); +#endif /* CONFIG_SOFTMMU */ + +bool tb_invalidate_phys_page_unwind(CPUState *cpu, tb_page_addr_t addr, + uintptr_t pc); + +#endif diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index cc0f5af..0048316 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -22,16 +22,21 @@ #include "qemu/qtree.h" #include "exec/cputlb.h" #include "exec/log.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/mmap-lock.h" #include "exec/tb-flush.h" -#include "exec/translate-all.h" -#include "sysemu/tcg.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ops.h" +#include "tb-internal.h" +#include "system/tcg.h" #include "tcg/tcg.h" #include "tb-hash.h" #include "tb-context.h" +#include "tb-internal.h" #include "internal-common.h" -#include "internal-target.h" +#ifdef CONFIG_USER_ONLY +#include "user/page-protection.h" +#endif /* List iterators for lists of tagged pointers in TranslationBlock. */ @@ -152,11 +157,7 @@ static PageForEachNext foreach_tb_next(PageForEachNext tb, /* * In system mode we want L1_MAP to be based on ram offsets. */ -#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS -# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS -#else -# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS -#endif +#define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS /* Size of the L2 (and L3, etc) page tables. */ #define V_L2_BITS 10 @@ -1005,7 +1006,8 @@ TranslationBlock *tb_link_page(TranslationBlock *tb) * Called with mmap_lock held for user-mode emulation. * NOTE: this function must not be called while a TB is running. */ -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last) +void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start, + tb_page_addr_t last) { TranslationBlock *tb; PageForEachNext n; @@ -1028,17 +1030,16 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr) start = addr & TARGET_PAGE_MASK; last = addr | ~TARGET_PAGE_MASK; - tb_invalidate_phys_range(start, last); + tb_invalidate_phys_range(NULL, start, last); } /* * Called with mmap_lock held. If pc is not 0 then it indicates the * host PC of the faulting store instruction that caused this invalidate. - * Returns true if the caller needs to abort execution of the current - * TB (because it was modified by this store and the guest CPU has - * precise-SMC semantics). + * Returns true if the caller needs to abort execution of the current TB. */ -bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) +bool tb_invalidate_phys_page_unwind(CPUState *cpu, tb_page_addr_t addr, + uintptr_t pc) { TranslationBlock *current_tb; bool current_tb_modified; @@ -1050,10 +1051,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) * Without precise smc semantics, or when outside of a TB, * we can skip to invalidate. */ -#ifndef TARGET_HAS_PRECISE_SMC - pc = 0; -#endif - if (!pc) { + if (!pc || !cpu || !cpu->cc->tcg_ops->precise_smc) { tb_invalidate_phys_page(addr); return false; } @@ -1076,15 +1074,14 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) * the CPU state. */ current_tb_modified = true; - cpu_restore_state_from_tb(current_cpu, current_tb, pc); + cpu_restore_state_from_tb(cpu, current_tb, pc); } tb_phys_invalidate__locked(tb); } if (current_tb_modified) { /* Force execution of one insn next time. */ - CPUState *cpu = current_cpu; - cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); return true; } return false; @@ -1093,23 +1090,28 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) /* * @p must be non-NULL. * Call with all @pages locked. + * (@cpu, @retaddr) may be (NULL, 0) outside of a cpu context, + * in which case precise_smc need not be detected. */ static void -tb_invalidate_phys_page_range__locked(struct page_collection *pages, +tb_invalidate_phys_page_range__locked(CPUState *cpu, + struct page_collection *pages, PageDesc *p, tb_page_addr_t start, tb_page_addr_t last, uintptr_t retaddr) { TranslationBlock *tb; PageForEachNext n; -#ifdef TARGET_HAS_PRECISE_SMC bool current_tb_modified = false; - TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL; -#endif /* TARGET_HAS_PRECISE_SMC */ + TranslationBlock *current_tb = NULL; /* Range may not cross a page. */ tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0); + if (retaddr && cpu && cpu->cc->tcg_ops->precise_smc) { + current_tb = tcg_tb_lookup(retaddr); + } + /* * We remove all the TBs in the range [start, last]. * XXX: see if in some cases it could be faster to invalidate all the code @@ -1127,8 +1129,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK); } if (!(tb_last < start || tb_start > last)) { -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb == tb && + if (unlikely(current_tb == tb) && (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { /* * If we are modifying the current TB, we must stop @@ -1138,9 +1139,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * restore the CPU state. */ current_tb_modified = true; - cpu_restore_state_from_tb(current_cpu, current_tb, retaddr); + cpu_restore_state_from_tb(cpu, current_tb, retaddr); } -#endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate__locked(tb); } } @@ -1150,15 +1150,13 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, tlb_unprotect_code(start); } -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb_modified) { + if (unlikely(current_tb_modified)) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); mmap_unlock(); - cpu_loop_exit_noexc(current_cpu); + cpu_loop_exit_noexc(cpu); } -#endif } /* @@ -1168,7 +1166,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * access: the virtual CPU will exit the current TB if code is modified inside * this TB. */ -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last) +void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start, + tb_page_addr_t last) { struct page_collection *pages; tb_page_addr_t index, index_last; @@ -1187,44 +1186,30 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last) page_start = index << TARGET_PAGE_BITS; page_last = page_start | ~TARGET_PAGE_MASK; page_last = MIN(page_last, last); - tb_invalidate_phys_page_range__locked(pages, pd, + tb_invalidate_phys_page_range__locked(cpu, pages, pd, page_start, page_last, 0); } page_collection_unlock(pages); } /* - * Call with all @pages in the range [@start, @start + len[ locked. - */ -static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages, - tb_page_addr_t start, - unsigned len, uintptr_t ra) -{ - PageDesc *p; - - p = page_find(start >> TARGET_PAGE_BITS); - if (!p) { - return; - } - - assert_page_locked(p); - tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra); -} - -/* * len must be <= 8 and start must be a multiple of len. * Called via softmmu_template.h when code areas are written to with * iothread mutex not held. */ -void tb_invalidate_phys_range_fast(ram_addr_t ram_addr, - unsigned size, - uintptr_t retaddr) +void tb_invalidate_phys_range_fast(CPUState *cpu, ram_addr_t start, + unsigned len, uintptr_t ra) { - struct page_collection *pages; + PageDesc *p = page_find(start >> TARGET_PAGE_BITS); - pages = page_collection_lock(ram_addr, ram_addr + size - 1); - tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr); - page_collection_unlock(pages); + if (p) { + ram_addr_t last = start + len - 1; + struct page_collection *pages = page_collection_lock(start, last); + + tb_invalidate_phys_page_range__locked(cpu, pages, p, + start, last, ra); + page_collection_unlock(pages); + } } #endif /* CONFIG_USER_ONLY */ diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index 9e1ae66..d0f7b41 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -24,11 +24,11 @@ */ #include "qemu/osdep.h" -#include "sysemu/replay.h" -#include "sysemu/cpu-timers.h" +#include "system/replay.h" +#include "exec/icount.h" #include "qemu/main-loop.h" #include "qemu/guest-random.h" -#include "exec/exec-all.h" +#include "hw/core/cpu.h" #include "tcg-accel-ops.h" #include "tcg-accel-ops-icount.h" diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 49814ec..dfcee30 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -24,13 +24,12 @@ */ #include "qemu/osdep.h" -#include "sysemu/tcg.h" -#include "sysemu/replay.h" -#include "sysemu/cpu-timers.h" +#include "system/tcg.h" +#include "system/replay.h" +#include "exec/icount.h" #include "qemu/main-loop.h" #include "qemu/notify.h" #include "qemu/guest-random.h" -#include "exec/exec-all.h" #include "hw/boards.h" #include "tcg/startup.h" #include "tcg-accel-ops.h" diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 84c36c1..6eec5c9 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -25,13 +25,13 @@ #include "qemu/osdep.h" #include "qemu/lockable.h" -#include "sysemu/tcg.h" -#include "sysemu/replay.h" -#include "sysemu/cpu-timers.h" +#include "system/tcg.h" +#include "system/replay.h" +#include "exec/icount.h" #include "qemu/main-loop.h" #include "qemu/notify.h" #include "qemu/guest-random.h" -#include "exec/exec-all.h" +#include "exec/cpu-common.h" #include "tcg/startup.h" #include "tcg-accel-ops.h" #include "tcg-accel-ops-rr.h" @@ -109,7 +109,7 @@ static void rr_wait_io_event(void) { CPUState *cpu; - while (all_cpu_threads_idle() && replay_can_wait()) { + while (all_cpu_threads_idle()) { rr_stop_kick_timer(); qemu_cond_wait_bql(first_cpu->halt_cond); } @@ -302,9 +302,7 @@ static void *rr_cpu_thread_fn(void *arg) rr_deal_with_unplugged_cpus(); } - rcu_remove_force_rcu_notifier(&force_rcu); - rcu_unregister_thread(); - return NULL; + g_assert_not_reached(); } void rr_start_vcpu_thread(CPUState *cpu) @@ -329,6 +327,7 @@ void rr_start_vcpu_thread(CPUState *cpu) /* we share the thread, dump spare data */ g_free(cpu->thread); qemu_cond_destroy(cpu->halt_cond); + g_free(cpu->halt_cond); cpu->thread = single_tcg_cpu_thread; cpu->halt_cond = single_tcg_halt_cond; diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 3c19e68..b24d6a7 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -26,15 +26,18 @@ */ #include "qemu/osdep.h" -#include "sysemu/tcg.h" -#include "sysemu/replay.h" -#include "sysemu/cpu-timers.h" +#include "system/accel-ops.h" +#include "system/tcg.h" +#include "system/replay.h" +#include "exec/icount.h" #include "qemu/main-loop.h" #include "qemu/guest-random.h" #include "qemu/timer.h" -#include "exec/exec-all.h" +#include "exec/cputlb.h" #include "exec/hwaddr.h" #include "exec/tb-flush.h" +#include "exec/translation-block.h" +#include "exec/watchpoint.h" #include "gdbstub/enums.h" #include "hw/core/cpu.h" @@ -119,10 +122,9 @@ static inline int xlat_gdb_type(CPUState *cpu, int gdbtype) [GDB_WATCHPOINT_ACCESS] = BP_GDB | BP_MEM_ACCESS, }; - CPUClass *cc = CPU_GET_CLASS(cpu); int cputype = xlat[gdbtype]; - if (cc->gdb_stop_before_watchpoint) { + if (cpu->cc->gdb_stop_before_watchpoint) { cputype |= BP_STOP_BEFORE_ACCESS; } return cputype; @@ -222,7 +224,7 @@ static void tcg_accel_ops_init(AccelOpsClass *ops) ops->remove_all_breakpoints = tcg_remove_all_breakpoints; } -static void tcg_accel_ops_class_init(ObjectClass *oc, void *data) +static void tcg_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h index 44c4079..6feeb3f 100644 --- a/accel/tcg/tcg-accel-ops.h +++ b/accel/tcg/tcg-accel-ops.h @@ -12,7 +12,7 @@ #ifndef TCG_ACCEL_OPS_H #define TCG_ACCEL_OPS_H -#include "sysemu/cpus.h" +#include "system/cpus.h" void tcg_cpu_destroy(CPUState *cpu); int tcg_cpu_exec(CPUState *cpu); diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 2090907..6e5dc33 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -24,26 +24,29 @@ */ #include "qemu/osdep.h" -#include "sysemu/tcg.h" +#include "system/tcg.h" #include "exec/replay-core.h" -#include "sysemu/cpu-timers.h" +#include "exec/icount.h" #include "tcg/startup.h" -#include "tcg/oversized-guest.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/accel.h" #include "qemu/atomic.h" +#include "qapi/qapi-types-common.h" #include "qapi/qapi-builtin-visit.h" #include "qemu/units.h" -#if !defined(CONFIG_USER_ONLY) +#include "qemu/target-info.h" +#ifndef CONFIG_USER_ONLY #include "hw/boards.h" #endif +#include "accel/tcg/cpu-ops.h" #include "internal-common.h" + struct TCGState { AccelState parent_obj; - bool mttcg_enabled; + OnOffAuto mttcg_enabled; bool one_insn_per_tb; int splitwx_enabled; unsigned long tb_size; @@ -55,40 +58,18 @@ typedef struct TCGState TCGState; DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE, TYPE_TCG_ACCEL) -/* - * We default to false if we know other options have been enabled - * which are currently incompatible with MTTCG. Otherwise when each - * guest (target) has been updated to support: - * - atomic instructions - * - memory ordering primitives (barriers) - * they can set the appropriate CONFIG flags in ${target}-softmmu.mak - * - * Once a guest architecture has been converted to the new primitives - * there is one remaining limitation to check: - * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) - */ - -static bool default_mttcg_enabled(void) +#ifndef CONFIG_USER_ONLY +bool qemu_tcg_mttcg_enabled(void) { - if (icount_enabled() || TCG_OVERSIZED_GUEST) { - return false; - } -#ifdef TARGET_SUPPORTS_MTTCG -# ifndef TCG_GUEST_DEFAULT_MO -# error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO" -# endif - return true; -#else - return false; -#endif + TCGState *s = TCG_STATE(current_accel()); + return s->mttcg_enabled == ON_OFF_AUTO_ON; } +#endif /* !CONFIG_USER_ONLY */ static void tcg_accel_instance_init(Object *obj) { TCGState *s = TCG_STATE(obj); - s->mttcg_enabled = default_mttcg_enabled(); - /* If debugging enabled, default "auto on", otherwise off. */ #if defined(CONFIG_DEBUG_TCG) && !defined(CONFIG_USER_ONLY) s->splitwx_enabled = -1; @@ -97,24 +78,57 @@ static void tcg_accel_instance_init(Object *obj) #endif } -bool mttcg_enabled; bool one_insn_per_tb; static int tcg_init_machine(MachineState *ms) { TCGState *s = TCG_STATE(current_accel()); -#ifdef CONFIG_USER_ONLY - unsigned max_cpus = 1; -#else - unsigned max_cpus = ms->smp.max_cpus; + unsigned max_threads = 1; + +#ifndef CONFIG_USER_ONLY + CPUClass *cc = CPU_CLASS(object_class_by_name(target_cpu_type())); + bool mttcg_supported = cc->tcg_ops->mttcg_supported; + + switch (s->mttcg_enabled) { + case ON_OFF_AUTO_AUTO: + /* + * We default to false if we know other options have been enabled + * which are currently incompatible with MTTCG. Otherwise when each + * guest (target) has been updated to support: + * - atomic instructions + * - memory ordering primitives (barriers) + * they can set the appropriate CONFIG flags in ${target}-softmmu.mak + * + * Once a guest architecture has been converted to the new primitives + * there is one remaining limitation to check: + * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) + */ + if (mttcg_supported && !icount_enabled()) { + s->mttcg_enabled = ON_OFF_AUTO_ON; + max_threads = ms->smp.max_cpus; + } else { + s->mttcg_enabled = ON_OFF_AUTO_OFF; + } + break; + case ON_OFF_AUTO_ON: + if (!mttcg_supported) { + warn_report("Guest not yet converted to MTTCG - " + "you may get unexpected results"); + } + max_threads = ms->smp.max_cpus; + break; + case ON_OFF_AUTO_OFF: + break; + default: + g_assert_not_reached(); + } #endif tcg_allowed = true; - mttcg_enabled = s->mttcg_enabled; page_init(); tb_htable_init(); - tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus); + tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_threads); #if defined(CONFIG_SOFTMMU) /* @@ -124,6 +138,10 @@ static int tcg_init_machine(MachineState *ms) tcg_prologue_init(); #endif +#ifdef CONFIG_USER_ONLY + qdev_create_fake_machine(); +#endif + return 0; } @@ -131,7 +149,7 @@ static char *tcg_get_thread(Object *obj, Error **errp) { TCGState *s = TCG_STATE(obj); - return g_strdup(s->mttcg_enabled ? "multi" : "single"); + return g_strdup(s->mttcg_enabled == ON_OFF_AUTO_ON ? "multi" : "single"); } static void tcg_set_thread(Object *obj, const char *value, Error **errp) @@ -139,19 +157,13 @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp) TCGState *s = TCG_STATE(obj); if (strcmp(value, "multi") == 0) { - if (TCG_OVERSIZED_GUEST) { - error_setg(errp, "No MTTCG when guest word size > hosts"); - } else if (icount_enabled()) { + if (icount_enabled()) { error_setg(errp, "No MTTCG when icount is enabled"); } else { -#ifndef TARGET_SUPPORTS_MTTCG - warn_report("Guest not yet converted to MTTCG - " - "you may get unexpected results"); -#endif - s->mttcg_enabled = true; + s->mttcg_enabled = ON_OFF_AUTO_ON; } } else if (strcmp(value, "single") == 0) { - s->mttcg_enabled = false; + s->mttcg_enabled = ON_OFF_AUTO_OFF; } else { error_setg(errp, "Invalid 'thread' setting %s", value); } @@ -222,7 +234,7 @@ static int tcg_gdbstub_supported_sstep_flags(void) } } -static void tcg_accel_class_init(ObjectClass *oc, void *data) +static void tcg_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "tcg"; diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c index afca89b..ff927c5 100644 --- a/accel/tcg/tcg-runtime-gvec.c +++ b/accel/tcg/tcg-runtime-gvec.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "qemu/host-utils.h" -#include "cpu.h" #include "exec/helper-proto-common.h" #include "tcg/tcg-gvec-desc.h" diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 9fa539a..fa7ed97 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -23,13 +23,9 @@ */ #include "qemu/osdep.h" #include "qemu/host-utils.h" -#include "cpu.h" +#include "exec/cpu-common.h" #include "exec/helper-proto-common.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" -#include "disas/disas.h" -#include "exec/log.h" -#include "tcg/tcg.h" +#include "accel/tcg/getpc.h" #define HELPER_H "accel/tcg/tcg-runtime.h" #include "exec/helper-info.c.inc" diff --git a/accel/tcg/tlb-bounds.h b/accel/tcg/tlb-bounds.h new file mode 100644 index 0000000..f83d9ac --- /dev/null +++ b/accel/tcg/tlb-bounds.h @@ -0,0 +1,13 @@ +/* + * softmmu size bounds + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef ACCEL_TCG_TLB_BOUNDS_H +#define ACCEL_TCG_TLB_BOUNDS_H + +#define CPU_TLB_DYN_MIN_BITS 6 +#define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS) +#define CPU_TLB_DYN_DEFAULT_BITS 8 + +#endif /* ACCEL_TCG_TLB_BOUNDS_H */ diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events index 4e9b450..14f6388 100644 --- a/accel/tcg/trace-events +++ b/accel/tcg/trace-events @@ -12,3 +12,15 @@ memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64 # translate-all.c translate_block(void *tb, uintptr_t pc, const void *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" + +# ldst_atomicity +load_atom2_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +load_atom4_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +load_atom8_or_exit_fallback(uintptr_t ra) "ra:0x%"PRIxPTR"" +load_atom8_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +load_atom16_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +load_atom16_or_exit_fallback(uintptr_t ra) "ra:0x%"PRIxPTR"" +store_atom2_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +store_atom4_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +store_atom8_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" +store_atom16_fallback(uint32_t memop, uintptr_t ra) "mop:0x%"PRIx32", ra:0x%"PRIxPTR"" diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index fdf6d8a..d468667 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -21,48 +21,20 @@ #include "trace.h" #include "disas/disas.h" -#include "exec/exec-all.h" #include "tcg/tcg.h" -#if defined(CONFIG_USER_ONLY) -#include "qemu.h" -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include <sys/param.h> -#if __FreeBSD_version >= 700104 -#define HAVE_KINFO_GETVMMAP -#define sigqueue sigqueue_freebsd /* avoid redefinition */ -#include <sys/proc.h> -#include <machine/profile.h> -#define _KERNEL -#include <sys/user.h> -#undef _KERNEL -#undef sigqueue -#include <libutil.h> -#endif -#endif -#else -#include "exec/ram_addr.h" -#endif - -#include "exec/cputlb.h" -#include "exec/translate-all.h" -#include "exec/translator.h" +#include "exec/mmap-lock.h" +#include "tb-internal.h" #include "exec/tb-flush.h" -#include "qemu/bitmap.h" -#include "qemu/qemu-print.h" -#include "qemu/main-loop.h" #include "qemu/cacheinfo.h" -#include "qemu/timer.h" +#include "qemu/target-info.h" #include "exec/log.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/tcg.h" -#include "qapi/error.h" -#include "hw/core/tcg-cpu-ops.h" +#include "exec/icount.h" +#include "accel/tcg/cpu-ops.h" #include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" +#include "tb-internal.h" #include "internal-common.h" -#include "internal-target.h" #include "tcg/perf.h" #include "tcg/insn-start-words.h" @@ -105,7 +77,7 @@ static int64_t decode_sleb128(const uint8_t **pp) val |= (int64_t)(byte & 0x7f) << shift; shift += 7; } while (byte & 0x80); - if (shift < TARGET_LONG_BITS && (byte & 0x40)) { + if (shift < 64 && (byte & 0x40)) { val |= -(int64_t)1 << shift; } @@ -116,7 +88,7 @@ static int64_t decode_sleb128(const uint8_t **pp) /* Encode the data collected about the instructions while compiling TB. Place the data at BLOCK, and return the number of bytes consumed. - The logical table consists of TARGET_INSN_START_WORDS target_ulong's, + The logical table consists of INSN_START_WORDS uint64_t's, which come from the target's insn_start data, followed by a uintptr_t which comes from the host pc of the end of the code implementing the insn. @@ -136,13 +108,13 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) for (i = 0, n = tb->icount; i < n; ++i) { uint64_t prev, curr; - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { if (i == 0) { prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); } else { - prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j]; + prev = insn_data[(i - 1) * INSN_START_WORDS + j]; } - curr = insn_data[i * TARGET_INSN_START_WORDS + j]; + curr = insn_data[i * INSN_START_WORDS + j]; p = encode_sleb128(p, curr - prev); } prev = (i == 0 ? 0 : insn_end_off[i - 1]); @@ -174,7 +146,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, return -1; } - memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); + memset(data, 0, sizeof(uint64_t) * INSN_START_WORDS); if (!(tb_cflags(tb) & CF_PCREL)) { data[0] = tb->pc; } @@ -184,7 +156,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, * at which the end of the insn exceeds host_pc. */ for (i = 0; i < num_insns; ++i) { - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } iter_pc += decode_sleb128(&p); @@ -202,7 +174,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t host_pc) { - uint64_t data[TARGET_INSN_START_WORDS]; + uint64_t data[INSN_START_WORDS]; int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); if (insns_left < 0) { @@ -274,8 +246,10 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, tcg_func_start(tcg_ctx); - tcg_ctx->cpu = env_cpu(env); - gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); + CPUState *cs = env_cpu(env); + tcg_ctx->cpu = cs; + cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc); + assert(tb->size != 0); tcg_ctx->cpu = NULL; *max_insns = tb->icount; @@ -284,9 +258,7 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, } /* Called with mmap_lock held for user mode emulation. */ -TranslationBlock *tb_gen_code(CPUState *cpu, - vaddr pc, uint64_t cs_base, - uint32_t flags, int cflags) +TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s) { CPUArchState *env = cpu_env(cpu); TranslationBlock *tb, *existing_tb; @@ -299,14 +271,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu, assert_memory_lock(); qemu_thread_jit_write(); - phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); + phys_pc = get_page_addr_code_hostp(env, s.pc, &host_pc); if (phys_pc == -1) { /* Generate a one-shot TB with 1 insn in it */ - cflags = (cflags & ~CF_COUNT_MASK) | 1; + s.cflags = (s.cflags & ~CF_COUNT_MASK) | 1; } - max_insns = cflags & CF_COUNT_MASK; + max_insns = s.cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = TCG_MAX_INSNS; } @@ -326,12 +298,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, gen_code_buf = tcg_ctx->code_gen_ptr; tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); - if (!(cflags & CF_PCREL)) { - tb->pc = pc; + if (!(s.cflags & CF_PCREL)) { + tb->pc = s.pc; } - tb->cs_base = cs_base; - tb->flags = flags; - tb->cflags = cflags; + tb->cs_base = s.cs_base; + tb->flags = s.flags; + tb->cflags = s.cflags; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); if (phys_pc != -1) { @@ -339,30 +311,20 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } tcg_ctx->gen_tb = tb; - tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; -#ifdef CONFIG_SOFTMMU - tcg_ctx->page_bits = TARGET_PAGE_BITS; - tcg_ctx->page_mask = TARGET_PAGE_MASK; - tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; -#endif - tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS; -#ifdef TCG_GUEST_DEFAULT_MO - tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO; -#else - tcg_ctx->guest_mo = TCG_MO_ALL; -#endif + tcg_ctx->addr_type = target_long_bits() == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; + tcg_ctx->guest_mo = cpu->cc->tcg_ops->guest_default_memory_order; restart_translate: - trace_translate_block(tb, pc, tb->tc.ptr); + trace_translate_block(tb, s.pc, tb->tc.ptr); - gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); + gen_code_size = setjmp_gen_code(env, tb, s.pc, host_pc, &max_insns, &ti); if (unlikely(gen_code_size < 0)) { switch (gen_code_size) { case -1: /* * Overflow of code_gen_buffer, or the current slice of it. * - * TODO: We don't need to re-do gen_intermediate_code, nor + * TODO: We don't need to re-do tcg_ops->translate_code, nor * should we re-do the tcg optimization currently hidden * inside tcg_gen_code. All that should be required is to * flush the TBs, allocate a new TB, re-initialize it per @@ -432,10 +394,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, * For CF_PCREL, attribute all executions of the generated code * to its first mapping. */ - perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); + perf_report_code(s.pc, tb, tcg_splitwx_to_rx(gen_code_buf)); if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && - qemu_log_in_addr_range(pc)) { + qemu_log_in_addr_range(s.pc)) { FILE *logfile = qemu_log_trylock(); if (logfile) { int code_size, data_size; @@ -457,7 +419,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); fprintf(logfile, " -- guest addr 0x%016" PRIx64 " + tb prologue\n", - tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]); + tcg_ctx->gen_insn_data[insn * INSN_START_WORDS]); chunk_start = tcg_ctx->gen_insn_end_off[insn]; disas(logfile, tb->tc.ptr, chunk_start); @@ -470,7 +432,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; if (chunk_end > chunk_start) { fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", - tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]); + tcg_ctx->gen_insn_data[insn * INSN_START_WORDS]); disas(logfile, tb->tc.ptr + chunk_start, chunk_end - chunk_start); chunk_start = chunk_end; @@ -528,9 +490,25 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* + * Insert TB into the corresponding region tree before publishing it + * through QHT. Otherwise rewinding happened in the TB might fail to + * lookup itself using host PC. + */ + tcg_tb_insert(tb); + + /* * If the TB is not associated with a physical RAM page then it must be - * a temporary one-insn TB, and we have nothing left to do. Return early - * before attempting to link to other TBs or add to the lookup table. + * a temporary one-insn TB. + * + * Such TBs must be added to region trees in order to make sure that + * restore_state_to_opc() - which on some architectures is not limited to + * rewinding, but also affects exception handling! - is called when such a + * TB causes an exception. + * + * At the same time, temporary one-insn TBs must be executed at most once, + * because subsequent reads from, e.g., I/O memory may return different + * values. So return early before attempting to link to other TBs or add + * to the QHT. */ if (tb_page_addr0(tb) == -1) { assert_no_pages_locked(); @@ -538,13 +516,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* - * Insert TB into the corresponding region tree before publishing it - * through QHT. Otherwise rewinding happened in the TB might fail to - * lookup itself using host PC. - */ - tcg_tb_insert(tb); - - /* * No explicit memory barrier is required -- tb_link_page() makes the * TB visible in a consistent state. */ @@ -579,15 +550,11 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) /* The exception probably happened in a helper. The CPU state should have been saved before calling it. Fetch the PC from there. */ CPUArchState *env = cpu_env(cpu); - vaddr pc; - uint64_t cs_base; - tb_page_addr_t addr; - uint32_t flags; + TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); + tb_page_addr_t addr = get_page_addr_code(env, s.pc); - cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - addr = get_page_addr_code(env, pc); if (addr != -1) { - tb_invalidate_phys_range(addr, addr); + tb_invalidate_phys_range(cpu, addr, addr); } } } @@ -618,7 +585,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * to account for the re-execution of the branch. */ n = 1; - cc = CPU_GET_CLASS(cpu); + cc = cpu->cc; if (cc->tcg_ops->io_recompile_replay_branch && cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { cpu->neg.icount_decr.u16.low++; @@ -629,9 +596,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * Exit the loop and potentially generate a new TB executing the * just the I/O insns. We also limit instrumentation to memory * operations only (which execute after completion) so we don't - * double instrument the instruction. + * double instrument the instruction. Also don't let an IRQ sneak + * in before we execute it. */ - cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n; + cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_NOIRQ | n; if (qemu_loglevel_mask(CPU_LOG_EXEC)) { vaddr pc = cpu->cc->get_pc(cpu); diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 113edcf..034f2f3 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -8,23 +8,24 @@ */ #include "qemu/osdep.h" +#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/error-report.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst-common.h" +#include "accel/tcg/cpu-mmu-index.h" +#include "exec/target_page.h" #include "exec/translator.h" -#include "exec/cpu_ldst.h" #include "exec/plugin-gen.h" -#include "exec/cpu_ldst.h" #include "tcg/tcg-op-common.h" -#include "internal-target.h" +#include "internal-common.h" #include "disas/disas.h" +#include "tb-internal.h" static void set_can_do_io(DisasContextBase *db, bool val) { QEMU_BUILD_BUG_ON(sizeof_field(CPUState, neg.can_do_io) != 1); tcg_gen_st8_i32(tcg_constant_i32(val), tcg_env, - offsetof(ArchCPU, parent_obj.neg.can_do_io) - - offsetof(ArchCPU, env)); + offsetof(CPUState, neg.can_do_io) - sizeof(CPUState)); } bool translator_io_start(DisasContextBase *db) @@ -47,8 +48,8 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) { count = tcg_temp_new_i32(); tcg_gen_ld_i32(count, tcg_env, - offsetof(ArchCPU, parent_obj.neg.icount_decr.u32) - - offsetof(ArchCPU, env)); + offsetof(CPUState, neg.icount_decr.u32) - + sizeof(CPUState)); } if (cflags & CF_USE_ICOUNT) { @@ -77,8 +78,8 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) if (cflags & CF_USE_ICOUNT) { tcg_gen_st16_i32(count, tcg_env, - offsetof(ArchCPU, parent_obj.neg.icount_decr.u16.low) - - offsetof(ArchCPU, env)); + offsetof(CPUState, neg.icount_decr.u16.low) - + sizeof(CPUState)); } return icount_start_insn; @@ -102,6 +103,11 @@ static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags, } } +bool translator_is_same_page(const DisasContextBase *db, vaddr addr) +{ + return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0; +} + bool translator_use_goto_tb(DisasContextBase *db, vaddr dest) { /* Suppress goto_tb if requested. */ @@ -110,7 +116,7 @@ bool translator_use_goto_tb(DisasContextBase *db, vaddr dest) } /* Check for the dest on the same page as the start of the TB. */ - return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0; + return translator_is_same_page(db, dest); } void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, @@ -129,13 +135,13 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, db->is_jmp = DISAS_NEXT; db->num_insns = 0; db->max_insns = *max_insns; - db->singlestep_enabled = cflags & CF_SINGLE_STEP; db->insn_start = NULL; db->fake_insn = false; db->host_addr[0] = host_pc; db->host_addr[1] = NULL; db->record_start = 0; db->record_len = 0; + db->code_mmuidx = cpu_mmu_index(cpu, true); ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ @@ -259,12 +265,14 @@ static bool translator_ld(CPUArchState *env, DisasContextBase *db, if (likely(((base ^ last) & TARGET_PAGE_MASK) == 0)) { /* Entire read is from the first page. */ - memcpy(dest, host + (pc - base), len); - return true; + goto do_read; } if (unlikely(((base ^ pc) & TARGET_PAGE_MASK) == 0)) { - /* Read begins on the first page and extends to the second. */ + /* + * Read begins on the first page and extends to the second. + * The unaligned read is never atomic. + */ size_t len0 = -(pc | TARGET_PAGE_MASK); memcpy(dest, host + (pc - base), len0); pc += len0; @@ -323,7 +331,39 @@ static bool translator_ld(CPUArchState *env, DisasContextBase *db, host = db->host_addr[1]; } - memcpy(dest, host + (pc - base), len); + do_read: + /* + * Assume aligned reads should be atomic, if possible. + * We're not in a position to jump out with EXCP_ATOMIC. + */ + host += pc - base; + switch (len) { + case 2: + if (QEMU_IS_ALIGNED(pc, 2)) { + uint16_t t = qatomic_read((uint16_t *)host); + stw_he_p(dest, t); + return true; + } + break; + case 4: + if (QEMU_IS_ALIGNED(pc, 4)) { + uint32_t t = qatomic_read((uint32_t *)host); + stl_he_p(dest, t); + return true; + } + break; +#ifdef CONFIG_ATOMIC64 + case 8: + if (QEMU_IS_ALIGNED(pc, 8)) { + uint64_t t = qatomic_read__nocheck((uint64_t *)host); + stq_he_p(dest, t); + return true; + } + break; +#endif + } + /* Unaligned or partial read from the second page is not atomic. */ + memcpy(dest, host, len); return true; } @@ -417,55 +457,62 @@ bool translator_st(const DisasContextBase *db, void *dest, uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, vaddr pc) { - uint8_t raw; + uint8_t val; - if (!translator_ld(env, db, &raw, pc, sizeof(raw))) { - raw = cpu_ldub_code(env, pc); - record_save(db, pc, &raw, sizeof(raw)); + if (!translator_ld(env, db, &val, pc, sizeof(val))) { + MemOpIdx oi = make_memop_idx(MO_UB, db->code_mmuidx); + val = cpu_ldb_code_mmu(env, pc, oi, 0); + record_save(db, pc, &val, sizeof(val)); } - return raw; + return val; } -uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, vaddr pc) +uint16_t translator_lduw_end(CPUArchState *env, DisasContextBase *db, + vaddr pc, MemOp endian) { - uint16_t raw, tgt; + uint16_t val; - if (translator_ld(env, db, &raw, pc, sizeof(raw))) { - tgt = tswap16(raw); - } else { - tgt = cpu_lduw_code(env, pc); - raw = tswap16(tgt); - record_save(db, pc, &raw, sizeof(raw)); + if (!translator_ld(env, db, &val, pc, sizeof(val))) { + MemOpIdx oi = make_memop_idx(MO_UW, db->code_mmuidx); + val = cpu_ldw_code_mmu(env, pc, oi, 0); + record_save(db, pc, &val, sizeof(val)); } - return tgt; + if (endian & MO_BSWAP) { + val = bswap16(val); + } + return val; } -uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, vaddr pc) +uint32_t translator_ldl_end(CPUArchState *env, DisasContextBase *db, + vaddr pc, MemOp endian) { - uint32_t raw, tgt; + uint32_t val; - if (translator_ld(env, db, &raw, pc, sizeof(raw))) { - tgt = tswap32(raw); - } else { - tgt = cpu_ldl_code(env, pc); - raw = tswap32(tgt); - record_save(db, pc, &raw, sizeof(raw)); + if (!translator_ld(env, db, &val, pc, sizeof(val))) { + MemOpIdx oi = make_memop_idx(MO_UL, db->code_mmuidx); + val = cpu_ldl_code_mmu(env, pc, oi, 0); + record_save(db, pc, &val, sizeof(val)); + } + if (endian & MO_BSWAP) { + val = bswap32(val); } - return tgt; + return val; } -uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc) +uint64_t translator_ldq_end(CPUArchState *env, DisasContextBase *db, + vaddr pc, MemOp endian) { - uint64_t raw, tgt; + uint64_t val; - if (translator_ld(env, db, &raw, pc, sizeof(raw))) { - tgt = tswap64(raw); - } else { - tgt = cpu_ldq_code(env, pc); - raw = tswap64(tgt); - record_save(db, pc, &raw, sizeof(raw)); + if (!translator_ld(env, db, &val, pc, sizeof(val))) { + MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx); + val = cpu_ldq_code_mmu(env, pc, oi, 0); + record_save(db, pc, &val, sizeof(val)); + } + if (endian & MO_BSWAP) { + val = bswap64(val); } - return tgt; + return val; } void translator_fake_ld(DisasContextBase *db, const void *data, size_t len) diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c index 4fbe2db..1d52f48 100644 --- a/accel/tcg/user-exec-stub.c +++ b/accel/tcg/user-exec-stub.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "hw/core/cpu.h" #include "exec/replay-core.h" +#include "internal-common.h" void cpu_resume(CPUState *cpu) { @@ -18,6 +19,16 @@ void cpu_exec_reset_hold(CPUState *cpu) { } +/* User mode emulation does not support softmmu yet. */ + +void tlb_init(CPUState *cpu) +{ +} + +void tlb_destroy(CPUState *cpu) +{ +} + /* User mode emulation does not support record/replay yet. */ bool replay_exception(void) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 80d2454..f25d80e 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -17,23 +17,30 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include "hw/core/tcg-cpu-ops.h" +#include "accel/tcg/cpu-ops.h" #include "disas/disas.h" -#include "exec/exec-all.h" +#include "exec/vaddr.h" +#include "exec/tlb-flags.h" #include "tcg/tcg.h" #include "qemu/bitops.h" #include "qemu/rcu.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst-common.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/probe.h" +#include "user/cpu_loop.h" +#include "user/guest-host.h" #include "qemu/main-loop.h" -#include "exec/translate-all.h" +#include "user/page-protection.h" #include "exec/page-protection.h" -#include "exec/helper-proto.h" +#include "exec/helper-proto-common.h" #include "qemu/atomic128.h" -#include "trace/trace-root.h" +#include "qemu/bswap.h" +#include "qemu/int128.h" +#include "trace.h" #include "tcg/tcg-ldst.h" +#include "backend-ldst.h" #include "internal-common.h" -#include "internal-target.h" -#include "user-retaddr.h" +#include "tb-internal.h" __thread uintptr_t helper_retaddr; @@ -119,9 +126,9 @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) * guest, we'd end up in an infinite loop of retrying the faulting access. */ bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, - uintptr_t host_pc, abi_ptr guest_addr) + uintptr_t host_pc, vaddr guest_addr) { - switch (page_unprotect(guest_addr, host_pc)) { + switch (page_unprotect(cpu, guest_addr, host_pc)) { case 0: /* * Fault not caused by a page marked unwritable to protect @@ -155,7 +162,7 @@ typedef struct PageFlagsNode { static IntervalTreeRoot pageflags_root; -static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) +static PageFlagsNode *pageflags_find(vaddr start, vaddr last) { IntervalTreeNode *n; @@ -163,8 +170,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) return n ? container_of(n, PageFlagsNode, itree) : NULL; } -static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start, - target_ulong last) +static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last) { IntervalTreeNode *n; @@ -193,13 +199,22 @@ int walk_memory_regions(void *priv, walk_memory_regions_fn fn) return rc; } -static int dump_region(void *priv, target_ulong start, - target_ulong end, unsigned long prot) +static int dump_region(void *opaque, vaddr start, vaddr end, int prot) { - FILE *f = (FILE *)priv; + FILE *f = opaque; + uint64_t mask; + int width; - fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n", - start, end, end - start, + if (guest_addr_max <= UINT32_MAX) { + mask = UINT32_MAX, width = 8; + } else { + mask = UINT64_MAX, width = 16; + } + + fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n", + width, start & mask, + width, end & mask, + width, (end - start) & mask, ((prot & PAGE_READ) ? 'r' : '-'), ((prot & PAGE_WRITE) ? 'w' : '-'), ((prot & PAGE_EXEC) ? 'x' : '-')); @@ -209,14 +224,14 @@ static int dump_region(void *priv, target_ulong start, /* dump memory mappings */ void page_dump(FILE *f) { - const int length = sizeof(target_ulong) * 2; + int width = guest_addr_max <= UINT32_MAX ? 8 : 16; fprintf(f, "%-*s %-*s %-*s %s\n", - length, "start", length, "end", length, "size", "prot"); + width, "start", width, "end", width, "size", "prot"); walk_memory_regions(f, dump_region); } -int page_get_flags(target_ulong address) +int page_get_flags(vaddr address) { PageFlagsNode *p = pageflags_find(address, address); @@ -239,7 +254,7 @@ int page_get_flags(target_ulong address) } /* A subroutine of page_set_flags: insert a new node for [start,last]. */ -static void pageflags_create(target_ulong start, target_ulong last, int flags) +static void pageflags_create(vaddr start, vaddr last, int flags) { PageFlagsNode *p = g_new(PageFlagsNode, 1); @@ -250,13 +265,13 @@ static void pageflags_create(target_ulong start, target_ulong last, int flags) } /* A subroutine of page_set_flags: remove everything in [start,last]. */ -static bool pageflags_unset(target_ulong start, target_ulong last) +static bool pageflags_unset(vaddr start, vaddr last) { bool inval_tb = false; while (true) { PageFlagsNode *p = pageflags_find(start, last); - target_ulong p_last; + vaddr p_last; if (!p) { break; @@ -295,8 +310,7 @@ static bool pageflags_unset(target_ulong start, target_ulong last) * A subroutine of page_set_flags: nothing overlaps [start,last], * but check adjacent mappings and maybe merge into a single range. */ -static void pageflags_create_merge(target_ulong start, target_ulong last, - int flags) +static void pageflags_create_merge(vaddr start, vaddr last, int flags) { PageFlagsNode *next = NULL, *prev = NULL; @@ -347,11 +361,11 @@ static void pageflags_create_merge(target_ulong start, target_ulong last, #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) /* A subroutine of page_set_flags: add flags to [start,last]. */ -static bool pageflags_set_clear(target_ulong start, target_ulong last, +static bool pageflags_set_clear(vaddr start, vaddr last, int set_flags, int clear_flags) { PageFlagsNode *p; - target_ulong p_start, p_last; + vaddr p_start, p_last; int p_flags, merge_flags; bool inval_tb = false; @@ -486,12 +500,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, return inval_tb; } -/* - * Modify the flags of a page and invalidate the code if necessary. - * The flag PAGE_WRITE_ORG is positioned automatically depending - * on PAGE_WRITE. The mmap_lock should already be held. - */ -void page_set_flags(target_ulong start, target_ulong last, int flags) +void page_set_flags(vaddr start, vaddr last, int flags) { bool reset = false; bool inval_tb = false; @@ -500,7 +509,7 @@ void page_set_flags(target_ulong start, target_ulong last, int flags) guest address space. If this assert fires, it probably indicates a missing call to h2g_valid. */ assert(start <= last); - assert(last <= GUEST_ADDR_MAX); + assert(last <= guest_addr_max); /* Only set PAGE_ANON with new mappings. */ assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); assert_memory_lock(); @@ -527,13 +536,13 @@ void page_set_flags(target_ulong start, target_ulong last, int flags) ~(reset ? 0 : PAGE_STICKY)); } if (inval_tb) { - tb_invalidate_phys_range(start, last); + tb_invalidate_phys_range(NULL, start, last); } } -bool page_check_range(target_ulong start, target_ulong len, int flags) +bool page_check_range(vaddr start, vaddr len, int flags) { - target_ulong last; + vaddr last; int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ bool ret; @@ -582,7 +591,7 @@ bool page_check_range(target_ulong start, target_ulong len, int flags) break; } /* Asking about writable, but has been protected: undo. */ - if (!page_unprotect(start, 0)) { + if (!page_unprotect(NULL, start, 0)) { ret = false; break; } @@ -609,20 +618,19 @@ bool page_check_range(target_ulong start, target_ulong len, int flags) return ret; } -bool page_check_range_empty(target_ulong start, target_ulong last) +bool page_check_range_empty(vaddr start, vaddr last) { assert(last >= start); assert_memory_lock(); return pageflags_find(start, last) == NULL; } -target_ulong page_find_range_empty(target_ulong min, target_ulong max, - target_ulong len, target_ulong align) +vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align) { - target_ulong len_m1, align_m1; + vaddr len_m1, align_m1; assert(min <= max); - assert(max <= GUEST_ADDR_MAX); + assert(max <= guest_addr_max); assert(len != 0); assert(is_power_of_2(align)); assert_memory_lock(); @@ -657,10 +665,10 @@ target_ulong page_find_range_empty(target_ulong min, target_ulong max, } } -void page_protect(tb_page_addr_t address) +void tb_lock_page0(tb_page_addr_t address) { PageFlagsNode *p; - target_ulong start, last; + vaddr start, last; int host_page_size = qemu_real_host_page_size(); int prot; @@ -702,11 +710,13 @@ void page_protect(tb_page_addr_t address) * immediately exited. (We can only return 2 if the 'pc' argument is * non-zero.) */ -int page_unprotect(target_ulong address, uintptr_t pc) +int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc) { PageFlagsNode *p; bool current_tb_invalidated; + assert((cpu == NULL) == (pc == 0)); + /* * Technically this isn't safe inside a signal handler. However we * know this only ever happens in a synchronous SEGV handler, so in @@ -729,15 +739,15 @@ int page_unprotect(target_ulong address, uintptr_t pc) * this thread raced with another one which got here first and * set the page to PAGE_WRITE and did the TB invalidate for us. */ -#ifdef TARGET_HAS_PRECISE_SMC - TranslationBlock *current_tb = tcg_tb_lookup(pc); - if (current_tb) { - current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; + if (pc && cpu->cc->tcg_ops->precise_smc) { + TranslationBlock *current_tb = tcg_tb_lookup(pc); + if (current_tb) { + current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; + } } -#endif } else { int host_page_size = qemu_real_host_page_size(); - target_ulong start, len, i; + vaddr start, len, i; int prot; if (host_page_size <= TARGET_PAGE_SIZE) { @@ -745,14 +755,15 @@ int page_unprotect(target_ulong address, uintptr_t pc) len = TARGET_PAGE_SIZE; prot = p->flags | PAGE_WRITE; pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); - current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc); + current_tb_invalidated = + tb_invalidate_phys_page_unwind(cpu, start, pc); } else { start = address & -host_page_size; len = host_page_size; prot = 0; for (i = 0; i < len; i += TARGET_PAGE_SIZE) { - target_ulong addr = start + i; + vaddr addr = start + i; p = pageflags_find(addr, addr); if (p) { @@ -768,7 +779,7 @@ int page_unprotect(target_ulong address, uintptr_t pc) * the corresponding translated code. */ current_tb_invalidated |= - tb_invalidate_phys_page_unwind(addr, pc); + tb_invalidate_phys_page_unwind(cpu, addr, pc); } } if (prot & PAGE_EXEC) { @@ -806,7 +817,7 @@ static int probe_access_internal(CPUArchState *env, vaddr addr, if (guest_addr_valid_untagged(addr)) { int page_flags = page_get_flags(addr); if (page_flags & acc_flag) { - if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE) + if (access_type != MMU_INST_FETCH && cpu_plugin_mem_cbs_enabled(env_cpu(env))) { return TLB_MMIO; } @@ -848,6 +859,12 @@ void *probe_access(CPUArchState *env, vaddr addr, int size, return size ? g2h(env_cpu(env), addr) : NULL; } +void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr, + MMUAccessType access_type, int mmu_idx) +{ + return g2h(env_cpu(env), addr); +} + tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, void **hostp) { @@ -862,7 +879,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, return addr; } -#ifdef TARGET_PAGE_DATA_SIZE /* * Allocate chunks of target data together. For the only current user, * if we allocate one hunk per page, we have overhead of 40/128 or 40%. @@ -878,10 +894,16 @@ typedef struct TargetPageDataNode { } TargetPageDataNode; static IntervalTreeRoot targetdata_root; +static size_t target_page_data_size; -void page_reset_target_data(target_ulong start, target_ulong last) +void page_reset_target_data(vaddr start, vaddr last) { IntervalTreeNode *n, *next; + size_t size = target_page_data_size; + + if (likely(size == 0)) { + return; + } assert_memory_lock(); @@ -893,7 +915,7 @@ void page_reset_target_data(target_ulong start, target_ulong last) n != NULL; n = next, next = next ? interval_tree_iter_next(n, start, last) : NULL) { - target_ulong n_start, n_last, p_ofs, p_len; + vaddr n_start, n_last, p_ofs, p_len; TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); if (n->start >= start && n->last <= last) { @@ -912,16 +934,21 @@ void page_reset_target_data(target_ulong start, target_ulong last) n_last = MIN(last, n->last); p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; - memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0, - p_len * TARGET_PAGE_DATA_SIZE); + memset(t->data + p_ofs * size, 0, p_len * size); } } -void *page_get_target_data(target_ulong address) +void *page_get_target_data(vaddr address, size_t size) { IntervalTreeNode *n; TargetPageDataNode *t; - target_ulong page, region, p_ofs; + vaddr page, region, p_ofs; + + /* Remember the size from the first call, and it should be constant. */ + if (unlikely(target_page_data_size != size)) { + assert(target_page_data_size == 0); + target_page_data_size = size; + } page = address & TARGET_PAGE_MASK; region = address & TBD_MASK; @@ -937,8 +964,7 @@ void *page_get_target_data(target_ulong address) mmap_lock(); n = interval_tree_iter_first(&targetdata_root, page, page); if (!n) { - t = g_malloc0(sizeof(TargetPageDataNode) - + TPD_PAGES * TARGET_PAGE_DATA_SIZE); + t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size); n = &t->itree; n->start = region; n->last = region | ~TBD_MASK; @@ -949,18 +975,15 @@ void *page_get_target_data(target_ulong address) t = container_of(n, TargetPageDataNode, itree); p_ofs = (page - region) >> TARGET_PAGE_BITS; - return t->data + p_ofs * TARGET_PAGE_DATA_SIZE; + return t->data + p_ofs * size; } -#else -void page_reset_target_data(target_ulong start, target_ulong last) { } -#endif /* TARGET_PAGE_DATA_SIZE */ /* The system-mode versions of these helpers are in cputlb.c. */ static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr, MemOp mop, uintptr_t ra, MMUAccessType type) { - int a_bits = get_alignment_bits(mop); + int a_bits = memop_alignment_bits(mop); void *ret; /* Enforce guest required alignment. */ @@ -973,6 +996,85 @@ static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr, return ret; } +/* physical memory access (slow version, mainly for debug) */ +int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, + void *ptr, size_t len, bool is_write) +{ + int flags; + vaddr l, page; + uint8_t *buf = ptr; + ssize_t written; + int ret = -1; + int fd = -1; + + mmap_lock(); + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) { + l = len; + } + flags = page_get_flags(page); + if (!(flags & PAGE_VALID)) { + goto out_close; + } + if (is_write) { + if (flags & PAGE_WRITE) { + memcpy(g2h(cpu, addr), buf, l); + } else { + /* Bypass the host page protection using ptrace. */ + if (fd == -1) { + fd = open("/proc/self/mem", O_WRONLY); + if (fd == -1) { + goto out; + } + } + /* + * If there is a TranslationBlock and we weren't bypassing the + * host page protection, the memcpy() above would SEGV, + * ultimately leading to page_unprotect(). So invalidate the + * translations manually. Both invalidation and pwrite() must + * be under mmap_lock() in order to prevent the creation of + * another TranslationBlock in between. + */ + tb_invalidate_phys_range(NULL, addr, addr + l - 1); + written = pwrite(fd, buf, l, + (off_t)(uintptr_t)g2h_untagged(addr)); + if (written != l) { + goto out_close; + } + } + } else if (flags & PAGE_READ) { + memcpy(buf, g2h(cpu, addr), l); + } else { + /* Bypass the host page protection using ptrace. */ + if (fd == -1) { + fd = open("/proc/self/mem", O_RDONLY); + if (fd == -1) { + goto out; + } + } + if (pread(fd, buf, l, + (off_t)(uintptr_t)g2h_untagged(addr)) != l) { + goto out_close; + } + } + len -= l; + buf += l; + addr += l; + } + ret = 0; +out_close: + if (fd != -1) { + close(fd); + } +out: + mmap_unlock(); + + return ret; +} + #include "ldst_atomicity.c.inc" static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, @@ -981,7 +1083,7 @@ static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, void *haddr; uint8_t ret; - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type); ret = ldub_p(haddr); clear_helper_retaddr(); @@ -995,7 +1097,7 @@ static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, uint16_t ret; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); ret = load_atom_2(cpu, ra, haddr, mop); clear_helper_retaddr(); @@ -1013,7 +1115,7 @@ static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, uint32_t ret; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); ret = load_atom_4(cpu, ra, haddr, mop); clear_helper_retaddr(); @@ -1031,7 +1133,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, uint64_t ret; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); ret = load_atom_8(cpu, ra, haddr, mop); clear_helper_retaddr(); @@ -1042,7 +1144,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, return ret; } -static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr, +static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, uintptr_t ra) { void *haddr; @@ -1050,7 +1152,7 @@ static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr, MemOp mop = get_memop(oi); tcg_debug_assert((mop & MO_SIZE) == MO_128); - cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_16(cpu, ra, haddr, mop); clear_helper_retaddr(); @@ -1066,7 +1168,7 @@ static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val, { void *haddr; - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE); stb_p(haddr, val); clear_helper_retaddr(); @@ -1078,7 +1180,7 @@ static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val, void *haddr; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1094,7 +1196,7 @@ static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val, void *haddr; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1110,7 +1212,7 @@ static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val, void *haddr; MemOp mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1126,7 +1228,7 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, void *haddr; MemOpIdx mop = get_memop(oi); - cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1136,101 +1238,28 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, clear_helper_retaddr(); } -uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - - set_helper_retaddr(1); - ret = ldub_p(g2h_untagged(ptr)); - clear_helper_retaddr(); - return ret; -} - -uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - - set_helper_retaddr(1); - ret = lduw_p(g2h_untagged(ptr)); - clear_helper_retaddr(); - return ret; -} - -uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr) -{ - uint32_t ret; - - set_helper_retaddr(1); - ret = ldl_p(g2h_untagged(ptr)); - clear_helper_retaddr(); - return ret; -} - -uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr) -{ - uint64_t ret; - - set_helper_retaddr(1); - ret = ldq_p(g2h_untagged(ptr)); - clear_helper_retaddr(); - return ret; -} - -uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, +uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { - void *haddr; - uint8_t ret; - - haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); - ret = ldub_p(haddr); - clear_helper_retaddr(); - return ret; + return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); } -uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, +uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { - void *haddr; - uint16_t ret; - - haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); - ret = lduw_p(haddr); - clear_helper_retaddr(); - if (get_memop(oi) & MO_BSWAP) { - ret = bswap16(ret); - } - return ret; + return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); } -uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, +uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { - void *haddr; - uint32_t ret; - - haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); - ret = ldl_p(haddr); - clear_helper_retaddr(); - if (get_memop(oi) & MO_BSWAP) { - ret = bswap32(ret); - } - return ret; + return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); } -uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, +uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { - void *haddr; - uint64_t ret; - - haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - ret = ldq_p(haddr); - clear_helper_retaddr(); - if (get_memop(oi) & MO_BSWAP) { - ret = bswap64(ret); - } - return ret; + return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); } #include "ldst_common.c.inc" @@ -1242,7 +1271,7 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, int size, uintptr_t retaddr) { MemOp mop = get_memop(oi); - int a_bits = get_alignment_bits(mop); + int a_bits = memop_alignment_bits(mop); void *ret; /* Enforce guest required alignment. */ diff --git a/accel/tcg/user-retaddr.h b/accel/tcg/user-retaddr.h deleted file mode 100644 index e0f57e1..0000000 --- a/accel/tcg/user-retaddr.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef ACCEL_TCG_USER_RETADDR_H -#define ACCEL_TCG_USER_RETADDR_H - -#include "qemu/atomic.h" - -extern __thread uintptr_t helper_retaddr; - -static inline void set_helper_retaddr(uintptr_t ra) -{ - helper_retaddr = ra; - /* - * Ensure that this write is visible to the SIGSEGV handler that - * may be invoked due to a subsequent invalid memory operation. - */ - signal_barrier(); -} - -static inline void clear_helper_retaddr(void) -{ - /* - * Ensure that previous memory operations have succeeded before - * removing the data visible to the signal handler. - */ - signal_barrier(); - helper_retaddr = 0; -} - -#endif diff --git a/accel/tcg/vcpu-state.h b/accel/tcg/vcpu-state.h index e407d91..2e3464b 100644 --- a/accel/tcg/vcpu-state.h +++ b/accel/tcg/vcpu-state.h @@ -1,6 +1,11 @@ /* - * SPDX-FileContributor: Philippe Mathieu-Daudé <philmd@linaro.org> - * SPDX-FileCopyrightText: 2023 Linaro Ltd. + * TaskState helpers for QEMU + * + * Copyright (c) 2023 Linaro Ltd. + * + * Authors: + * Philippe Mathieu-Daudé + * * SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef ACCEL_TCG_VCPU_STATE_H diff --git a/accel/tcg/watchpoint.c b/accel/tcg/watchpoint.c index d3aab11..cfb37a4 100644 --- a/accel/tcg/watchpoint.c +++ b/accel/tcg/watchpoint.c @@ -19,13 +19,15 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" -#include "qemu/error-report.h" -#include "exec/exec-all.h" -#include "exec/translate-all.h" -#include "sysemu/tcg.h" -#include "sysemu/replay.h" -#include "hw/core/tcg-cpu-ops.h" +#include "exec/breakpoint.h" +#include "exec/cpu-interrupt.h" +#include "exec/page-protection.h" +#include "exec/translation-block.h" +#include "system/tcg.h" +#include "system/replay.h" +#include "accel/tcg/cpu-ops.h" #include "hw/core/cpu.h" +#include "internal-common.h" /* * Return true if this watchpoint address matches the specified @@ -66,7 +68,6 @@ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len) void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) { - CPUClass *cc = CPU_GET_CLASS(cpu); CPUWatchpoint *wp; assert(tcg_enabled()); @@ -82,9 +83,9 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, return; } - if (cc->tcg_ops->adjust_watchpoint_address) { + if (cpu->cc->tcg_ops->adjust_watchpoint_address) { /* this is currently used only by ARM BE32 */ - addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len); + addr = cpu->cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len); } assert((flags & ~BP_MEM_ACCESS) == 0); @@ -116,24 +117,21 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, wp->hitattrs = attrs; if (wp->flags & BP_CPU - && cc->tcg_ops->debug_check_watchpoint - && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) { + && cpu->cc->tcg_ops->debug_check_watchpoint + && !cpu->cc->tcg_ops->debug_check_watchpoint(cpu, wp)) { wp->flags &= ~BP_WATCHPOINT_HIT; continue; } cpu->watchpoint_hit = wp; - mmap_lock(); /* This call also restores vCPU state */ tb_check_watchpoint(cpu, ra); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; - mmap_unlock(); cpu_loop_exit(cpu); } else { /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); - mmap_unlock(); cpu_loop_exit_noexc(cpu); } } else { diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c index 0bdefce..de52a8f 100644 --- a/accel/xen/xen-all.c +++ b/accel/xen/xen-all.c @@ -18,9 +18,10 @@ #include "hw/xen/xen_igd.h" #include "chardev/char.h" #include "qemu/accel.h" -#include "sysemu/cpus.h" -#include "sysemu/xen.h" -#include "sysemu/runstate.h" +#include "system/accel-ops.h" +#include "system/cpus.h" +#include "system/xen.h" +#include "system/runstate.h" #include "migration/misc.h" #include "migration/global_state.h" #include "hw/boards.h" @@ -115,7 +116,7 @@ static int xen_init(MachineState *ms) return 0; } -static void xen_accel_class_init(ObjectClass *oc, void *data) +static void xen_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); static GlobalProperty compat[] = { @@ -146,7 +147,7 @@ static const TypeInfo xen_accel_type = { .class_init = xen_accel_class_init, }; -static void xen_accel_ops_class_init(ObjectClass *oc, void *data) +static void xen_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); |