aboutsummaryrefslogtreecommitdiff
path: root/accel/kvm/kvm-all.c
diff options
context:
space:
mode:
Diffstat (limited to 'accel/kvm/kvm-all.c')
-rw-r--r--accel/kvm/kvm-all.c130
1 files changed, 95 insertions, 35 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f89568b..a106d1b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -33,8 +33,9 @@
#include "system/cpus.h"
#include "system/accel-blocker.h"
#include "qemu/bswap.h"
-#include "exec/memory.h"
-#include "exec/ram_addr.h"
+#include "exec/tswap.h"
+#include "system/memory.h"
+#include "system/ram_addr.h"
#include "qemu/event_notifier.h"
#include "qemu/main-loop.h"
#include "trace.h"
@@ -57,6 +58,11 @@
#include <sys/eventfd.h>
#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+# define KVM_HAVE_MCE_INJECTION 1
+#endif
+
+
/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
* need to use the real host PAGE_SIZE, as that's what KVM will use.
*/
@@ -93,6 +99,7 @@ bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
bool kvm_msi_use_devid;
+bool kvm_pre_fault_memory_supported;
static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
@@ -437,9 +444,8 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
return kvm_fd;
}
-static void kvm_reset_parked_vcpus(void *param)
+static void kvm_reset_parked_vcpus(KVMState *s)
{
- KVMState *s = param;
struct KVMParkedVcpu *cpu;
QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
@@ -447,7 +453,13 @@ static void kvm_reset_parked_vcpus(void *param)
}
}
-int kvm_create_vcpu(CPUState *cpu)
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+static int kvm_create_vcpu(CPUState *cpu)
{
unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
KVMState *s = kvm_state;
@@ -466,7 +478,9 @@ int kvm_create_vcpu(CPUState *cpu)
cpu->kvm_fd = kvm_fd;
cpu->kvm_state = s;
- cpu->vcpu_dirty = true;
+ if (!s->guest_state_protected) {
+ cpu->vcpu_dirty = true;
+ }
cpu->dirty_pages = 0;
cpu->throttle_us_per_full = 0;
@@ -507,16 +521,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
goto err;
}
+ /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
ret = munmap(cpu->kvm_run, mmap_size);
if (ret < 0) {
goto err;
}
+ cpu->kvm_run = NULL;
if (cpu->kvm_dirty_gfns) {
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
if (ret < 0) {
goto err;
}
+ cpu->kvm_dirty_gfns = NULL;
}
kvm_park_vcpu(cpu);
@@ -540,6 +561,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
ret = kvm_create_vcpu(cpu);
if (ret < 0) {
error_setg_errno(errp, -ret,
@@ -595,6 +621,31 @@ err:
return ret;
}
+void kvm_close(void)
+{
+ CPUState *cpu;
+
+ if (!kvm_state || kvm_state->fd == -1) {
+ return;
+ }
+
+ CPU_FOREACH(cpu) {
+ cpu_remove_sync(cpu);
+ close(cpu->kvm_fd);
+ cpu->kvm_fd = -1;
+ close(cpu->kvm_vcpu_stats_fd);
+ cpu->kvm_vcpu_stats_fd = -1;
+ }
+
+ if (kvm_state && kvm_state->fd != -1) {
+ close(kvm_state->vmfd);
+ kvm_state->vmfd = -1;
+ close(kvm_state->fd);
+ kvm_state->fd = -1;
+ }
+ kvm_state = NULL;
+}
+
/*
* dirty pages logging control
*/
@@ -1314,21 +1365,22 @@ bool kvm_hwpoisoned_mem(void)
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
-#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
- /* The kernel expects ioeventfd values in HOST_BIG_ENDIAN
- * endianness, but the memory core hands them in target endianness.
- * For example, PPC is always treated as big-endian even if running
- * on KVM and on PPC64LE. Correct here.
- */
- switch (size) {
- case 2:
- val = bswap16(val);
- break;
- case 4:
- val = bswap32(val);
- break;
+ if (target_needs_bswap()) {
+ /*
+ * The kernel expects ioeventfd values in HOST_BIG_ENDIAN
+ * endianness, but the memory core hands them in target endianness.
+ * For example, PPC is always treated as big-endian even if running
+ * on KVM and on PPC64LE. Correct here, swapping back.
+ */
+ switch (size) {
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ }
}
-#endif
return val;
}
@@ -2420,7 +2472,7 @@ static int kvm_recommended_vcpus(KVMState *s)
static int kvm_max_vcpus(KVMState *s)
{
- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS);
return (ret) ? ret : kvm_recommended_vcpus(s);
}
@@ -2450,13 +2502,10 @@ uint32_t kvm_dirty_ring_size(void)
return kvm_state->kvm_dirty_ring_size;
}
-static int do_kvm_create_vm(MachineState *ms, int type)
+static int do_kvm_create_vm(KVMState *s, int type)
{
- KVMState *s;
int ret;
- s = KVM_STATE(ms->accelerator);
-
do {
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
} while (ret == -EINTR);
@@ -2553,7 +2602,7 @@ static int kvm_setup_dirty_ring(KVMState *s)
return 0;
}
-static int kvm_init(MachineState *ms)
+static int kvm_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
static const char upgrade_note[] =
@@ -2568,15 +2617,13 @@ static int kvm_init(MachineState *ms)
{ /* end of list */ }
}, *nc = num_cpus;
int soft_vcpus_limit, hard_vcpus_limit;
- KVMState *s;
+ KVMState *s = KVM_STATE(as);
const KVMCapabilityInfo *missing_cap;
int ret;
int type;
qemu_mutex_init(&kml_slots_lock);
- s = KVM_STATE(ms->accelerator);
-
/*
* On systems where the kernel can support different base page
* sizes, host page size may be different from TARGET_PAGE_SIZE,
@@ -2628,7 +2675,7 @@ static int kvm_init(MachineState *ms)
goto err;
}
- ret = do_kvm_create_vm(ms, type);
+ ret = do_kvm_create_vm(s, type);
if (ret < 0) {
goto err;
}
@@ -2732,13 +2779,13 @@ static int kvm_init(MachineState *ms)
kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY);
if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
qemu_register_reset(kvm_unpoison_all, NULL);
- qemu_register_reset(kvm_reset_parked_vcpus, s);
if (s->kernel_irqchip_allowed) {
kvm_irqchip_create(s);
@@ -2908,6 +2955,10 @@ static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
{
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
+
+ if (cpu == first_cpu) {
+ kvm_reset_parked_vcpus(kvm_state);
+ }
}
static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
@@ -3073,6 +3124,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
rb = qemu_ram_block_from_host(addr, false, &offset);
+ ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm),
+ offset, size, to_private);
+ if (ret) {
+ error_report("Failed to notify the listener the state change of "
+ "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ goto out_unref;
+ }
+
if (to_private) {
if (rb->page_size != qemu_real_host_page_size()) {
/*
@@ -3758,10 +3818,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
return r;
}
-static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as,
+static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as,
hwaddr start_addr, hwaddr size)
{
- KVMState *kvm = KVM_STATE(ms->accelerator);
+ KVMState *kvm = KVM_STATE(accel);
int i;
for (i = 0; i < kvm->nr_as; ++i) {
@@ -3952,12 +4012,12 @@ static void kvm_accel_instance_init(Object *obj)
* Returns: SSTEP_* flags that KVM supports for guest debug. The
* support is probed during kvm_init()
*/
-static int kvm_gdbstub_sstep_flags(void)
+static int kvm_gdbstub_sstep_flags(AccelState *as)
{
return kvm_sstep_flags;
}
-static void kvm_accel_class_init(ObjectClass *oc, void *data)
+static void kvm_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "KVM";