diff options
Diffstat (limited to 'accel/kvm/kvm-all.c')
-rw-r--r-- | accel/kvm/kvm-all.c | 130 |
1 files changed, 95 insertions, 35 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f89568b..a106d1b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -33,8 +33,9 @@ #include "system/cpus.h" #include "system/accel-blocker.h" #include "qemu/bswap.h" -#include "exec/memory.h" -#include "exec/ram_addr.h" +#include "exec/tswap.h" +#include "system/memory.h" +#include "system/ram_addr.h" #include "qemu/event_notifier.h" #include "qemu/main-loop.h" #include "trace.h" @@ -57,6 +58,11 @@ #include <sys/eventfd.h> #endif +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +# define KVM_HAVE_MCE_INJECTION 1 +#endif + + /* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We * need to use the real host PAGE_SIZE, as that's what KVM will use. */ @@ -93,6 +99,7 @@ bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_msi_use_devid; +bool kvm_pre_fault_memory_supported; static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; @@ -437,9 +444,8 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) return kvm_fd; } -static void kvm_reset_parked_vcpus(void *param) +static void kvm_reset_parked_vcpus(KVMState *s) { - KVMState *s = param; struct KVMParkedVcpu *cpu; QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { @@ -447,7 +453,13 @@ static void kvm_reset_parked_vcpus(void *param) } } -int kvm_create_vcpu(CPUState *cpu) +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +static int kvm_create_vcpu(CPUState *cpu) { unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); KVMState *s = kvm_state; @@ -466,7 +478,9 @@ int kvm_create_vcpu(CPUState *cpu) cpu->kvm_fd = kvm_fd; cpu->kvm_state = s; - cpu->vcpu_dirty = true; + if (!s->guest_state_protected) { + cpu->vcpu_dirty = true; + } cpu->dirty_pages = 0; cpu->throttle_us_per_full = 0; @@ -507,16 +521,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) goto err; } + /* If I am the CPU that created coalesced_mmio_ring, then discard it */ + if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) { + s->coalesced_mmio_ring = NULL; + } + ret = munmap(cpu->kvm_run, mmap_size); if (ret < 0) { goto err; } + cpu->kvm_run = NULL; if (cpu->kvm_dirty_gfns) { ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes); if (ret < 0) { goto err; } + cpu->kvm_dirty_gfns = NULL; } kvm_park_vcpu(cpu); @@ -540,6 +561,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + ret = kvm_create_vcpu(cpu); if (ret < 0) { error_setg_errno(errp, -ret, @@ -595,6 +621,31 @@ err: return ret; } +void kvm_close(void) +{ + CPUState *cpu; + + if (!kvm_state || kvm_state->fd == -1) { + return; + } + + CPU_FOREACH(cpu) { + cpu_remove_sync(cpu); + close(cpu->kvm_fd); + cpu->kvm_fd = -1; + close(cpu->kvm_vcpu_stats_fd); + cpu->kvm_vcpu_stats_fd = -1; + } + + if (kvm_state && kvm_state->fd != -1) { + close(kvm_state->vmfd); + kvm_state->vmfd = -1; + close(kvm_state->fd); + kvm_state->fd = -1; + } + kvm_state = NULL; +} + /* * dirty pages logging control */ @@ -1314,21 +1365,22 @@ bool kvm_hwpoisoned_mem(void) static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) { -#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN - /* The kernel expects ioeventfd values in HOST_BIG_ENDIAN - * endianness, but the memory core hands them in target endianness. - * For example, PPC is always treated as big-endian even if running - * on KVM and on PPC64LE. Correct here. - */ - switch (size) { - case 2: - val = bswap16(val); - break; - case 4: - val = bswap32(val); - break; + if (target_needs_bswap()) { + /* + * The kernel expects ioeventfd values in HOST_BIG_ENDIAN + * endianness, but the memory core hands them in target endianness. + * For example, PPC is always treated as big-endian even if running + * on KVM and on PPC64LE. Correct here, swapping back. + */ + switch (size) { + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + } } -#endif return val; } @@ -2420,7 +2472,7 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } @@ -2450,13 +2502,10 @@ uint32_t kvm_dirty_ring_size(void) return kvm_state->kvm_dirty_ring_size; } -static int do_kvm_create_vm(MachineState *ms, int type) +static int do_kvm_create_vm(KVMState *s, int type) { - KVMState *s; int ret; - s = KVM_STATE(ms->accelerator); - do { ret = kvm_ioctl(s, KVM_CREATE_VM, type); } while (ret == -EINTR); @@ -2553,7 +2602,7 @@ static int kvm_setup_dirty_ring(KVMState *s) return 0; } -static int kvm_init(MachineState *ms) +static int kvm_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); static const char upgrade_note[] = @@ -2568,15 +2617,13 @@ static int kvm_init(MachineState *ms) { /* end of list */ } }, *nc = num_cpus; int soft_vcpus_limit, hard_vcpus_limit; - KVMState *s; + KVMState *s = KVM_STATE(as); const KVMCapabilityInfo *missing_cap; int ret; int type; qemu_mutex_init(&kml_slots_lock); - s = KVM_STATE(ms->accelerator); - /* * On systems where the kernel can support different base page * sizes, host page size may be different from TARGET_PAGE_SIZE, @@ -2628,7 +2675,7 @@ static int kvm_init(MachineState *ms) goto err; } - ret = do_kvm_create_vm(ms, type); + ret = do_kvm_create_vm(s, type); if (ret < 0) { goto err; } @@ -2732,13 +2779,13 @@ static int kvm_init(MachineState *ms) kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY); if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } qemu_register_reset(kvm_unpoison_all, NULL); - qemu_register_reset(kvm_reset_parked_vcpus, s); if (s->kernel_irqchip_allowed) { kvm_irqchip_create(s); @@ -2908,6 +2955,10 @@ static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg void kvm_cpu_synchronize_post_reset(CPUState *cpu) { run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); + + if (cpu == first_cpu) { + kvm_reset_parked_vcpus(kvm_state); + } } static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) @@ -3073,6 +3124,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; rb = qemu_ram_block_from_host(addr, false, &offset); + ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm), + offset, size, to_private); + if (ret) { + error_report("Failed to notify the listener the state change of " + "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + if (to_private) { if (rb->page_size != qemu_real_host_page_size()) { /* @@ -3758,10 +3818,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) return r; } -static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, +static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as, hwaddr start_addr, hwaddr size) { - KVMState *kvm = KVM_STATE(ms->accelerator); + KVMState *kvm = KVM_STATE(accel); int i; for (i = 0; i < kvm->nr_as; ++i) { @@ -3952,12 +4012,12 @@ static void kvm_accel_instance_init(Object *obj) * Returns: SSTEP_* flags that KVM supports for guest debug. The * support is probed during kvm_init() */ -static int kvm_gdbstub_sstep_flags(void) +static int kvm_gdbstub_sstep_flags(AccelState *as) { return kvm_sstep_flags; } -static void kvm_accel_class_init(ObjectClass *oc, void *data) +static void kvm_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "KVM"; |