diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-10-04 18:32:34 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-10-04 18:32:34 +0100 |
commit | 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb (patch) | |
tree | f8816dd08b76a504e16e3c1284269812695d85ee | |
parent | 4f59102571fce49af180cfc6d4cdd2b5df7bdb14 (diff) | |
parent | a1834d975f7d329b128965dd69bc3aaa7195f5a2 (diff) | |
download | qemu-9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb.zip qemu-9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb.tar.gz qemu-9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb.tar.bz2 |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* Compilation fix for KVM (Alex)
* SMM fix (Dmitry)
* VFIO error reporting (Eric)
* win32 fixes and workarounds (Marc-André)
* qemu-pr-helper crash bugfix (Maxim)
* Memory leak fixes (myself)
* VMX features (myself)
* Record-replay deadlock (Pavel)
* i386 CPUID bits (Sebastian)
* kconfig tweak (Thomas)
* Valgrind fix (Thomas)
* Autoconverge test (Yury)
# gpg: Signature made Fri 04 Oct 2019 17:57:48 BST
# gpg: using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream: (29 commits)
target/i386/kvm: Silence warning from Valgrind about uninitialized bytes
target/i386: work around KVM_GET_MSRS bug for secondary execution controls
target/i386: add VMX features
vmxcap: correct the name of the variables
target/i386: add VMX definitions
target/i386: expand feature words to 64 bits
target/i386: introduce generic feature dependency mechanism
target/i386: handle filtered_features in a new function mark_unavailable_features
tests/docker: only enable ubsan for test-clang
win32: work around main-loop busy loop on socket/fd event
tests: skip serial test on windows
util: WSAEWOULDBLOCK on connect should map to EINPROGRESS
Fix wrong behavior of cpu_memory_rw_debug() function in SMM
memory: allow memory_region_register_iommu_notifier() to fail
vfio: Turn the container error into an Error handle
i386: Add CPUID bit for CLZERO and XSAVEERPTR
docker: test-debug: disable LeakSanitizer
lm32: do not leak memory on object_new/object_unref
cris: do not leak struct cris_disasm_data
mips: fix memory leaks in board initialization
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
38 files changed, 1038 insertions, 244 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index aabe097..d2d96d7 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -712,11 +712,11 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, KVMState *s = kvm_state; uint64_t start, size, offset, count; KVMSlot *mem; - int ret, i; + int ret = 0, i; if (!s->manual_dirty_log_protect) { /* No need to do explicit clear */ - return 0; + return ret; } start = section->offset_within_address_space; @@ -724,7 +724,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml, if (!size) { /* Nothing more we can do... */ - return 0; + return ret; } kvm_slots_lock(kml); diff --git a/disas/cris.c b/disas/cris.c index 2f43c9b..0b0a3fb 100644 --- a/disas/cris.c +++ b/disas/cris.c @@ -1294,24 +1294,17 @@ static int cris_constraint /* Parse disassembler options and store state in info. FIXME: For the time being, we abuse static variables. */ -static bfd_boolean -cris_parse_disassembler_options (disassemble_info *info, +static void +cris_parse_disassembler_options (struct cris_disasm_data *disdata, + char *disassembler_options, enum cris_disass_family distype) { - struct cris_disasm_data *disdata; - - info->private_data = calloc (1, sizeof (struct cris_disasm_data)); - disdata = (struct cris_disasm_data *) info->private_data; - if (disdata == NULL) - return false; - /* Default true. */ disdata->trace_case - = (info->disassembler_options == NULL - || (strcmp (info->disassembler_options, "nocase") != 0)); + = (disassembler_options == NULL + || (strcmp (disassembler_options, "nocase") != 0)); disdata->distype = distype; - return true; } static const struct cris_spec_reg * @@ -2736,9 +2729,10 @@ static int print_insn_cris_with_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_v0_v10); return print_insn_cris_generic (vma, info, true); } /* Disassemble, prefixing register names with `$'. CRIS v32. */ @@ -2747,9 +2741,10 @@ static int print_insn_crisv32_with_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_v32)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_v32); return print_insn_cris_generic (vma, info, true); } @@ -2761,9 +2756,10 @@ static int print_insn_crisv10_v32_with_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_common_v10_v32); return print_insn_cris_generic (vma, info, true); } @@ -2773,9 +2769,10 @@ static int print_insn_cris_without_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_v0_v10); return print_insn_cris_generic (vma, info, false); } @@ -2785,9 +2782,10 @@ static int print_insn_crisv32_without_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_v32)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_v32); return print_insn_cris_generic (vma, info, false); } @@ -2798,9 +2796,10 @@ static int print_insn_crisv10_v32_without_register_prefix (bfd_vma vma, disassemble_info *info) { - if (info->private_data == NULL - && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) - return -1; + struct cris_disasm_data disdata; + info->private_data = &disdata; + cris_parse_disassembler_options (&disdata, info->disassembler_options, + cris_dis_common_v10_v32); return print_insn_cris_generic (vma, info, false); } #endif @@ -660,7 +660,8 @@ static void tcg_register_iommu_notifier(CPUState *cpu, */ MemoryRegion *mr = MEMORY_REGION(iommu_mr); TCGIOMMUNotifier *notifier; - int i; + Error *err = NULL; + int i, ret; for (i = 0; i < cpu->iommu_notifiers->len; i++) { notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); @@ -689,7 +690,12 @@ static void tcg_register_iommu_notifier(CPUState *cpu, 0, HWADDR_MAX, iommu_idx); - memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n); + ret = memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n, + &err); + if (ret) { + error_report_err(err); + exit(1); + } } if (!notifier->active) { @@ -2959,8 +2965,17 @@ static void tcg_log_global_after_sync(MemoryListener *listener) * by pushing the migration thread's memory read after the vCPU thread has * written the memory. */ - cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); - run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); + if (replay_mode == REPLAY_MODE_NONE) { + /* + * VGA can make calls to this function while updating the screen. + * In record/replay mode this causes a deadlock, because + * run_on_cpu waits for rr mutex. Therefore no races are possible + * in this case and no need for making run_on_cpu when + * record/replay is not enabled. + */ + cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); + run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); + } } static void tcg_commit(MemoryListener *listener) diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index db051dc..e2fbb83 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -1469,20 +1469,21 @@ static void smmuv3_class_init(ObjectClass *klass, void *data) dc->realize = smmu_realize; } -static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, - IOMMUNotifierFlag old, - IOMMUNotifierFlag new) +static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, + Error **errp) { SMMUDevice *sdev = container_of(iommu, SMMUDevice, iommu); SMMUv3State *s3 = sdev->smmu; SMMUState *s = &(s3->smmu_state); if (new & IOMMU_NOTIFIER_MAP) { - int bus_num = pci_bus_num(sdev->bus); - PCIDevice *pcidev = pci_find_device(sdev->bus, bus_num, sdev->devfn); - - warn_report("SMMUv3 does not support notification on MAP: " - "device %s will not function properly", pcidev->name); + error_setg(errp, + "device %02x.%02x.%x requires iommu MAP notifier which is " + "not currently supported", pci_bus_num(sdev->bus), + PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn)); + return -EINVAL; } if (old == IOMMU_NOTIFIER_NONE) { @@ -1492,6 +1493,7 @@ static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, trace_smmuv3_notify_flag_del(iommu->parent_obj.name); QLIST_REMOVE(sdev, next); } + return 0; } static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c index e0466ee..ab6969b 100644 --- a/hw/hppa/dino.c +++ b/hw/hppa/dino.c @@ -485,6 +485,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, memory_region_init_alias(&s->pci_mem_alias[i], OBJECT(s), name, &s->pci_mem, addr, DINO_MEM_CHUNK_SIZE); + g_free(name); } /* Set up PCI view of memory: Bus master address space. */ diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index 2736ce8..7e23675 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -78,13 +78,15 @@ static void machine_hppa_init(MachineState *machine) /* Create CPUs. */ for (i = 0; i < smp_cpus; i++) { + char *name = g_strdup_printf("cpu%ld-io-eir", i); cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type)); cpu_region = g_new(MemoryRegion, 1); memory_region_init_io(cpu_region, OBJECT(cpu[i]), &hppa_io_eir_ops, - cpu[i], g_strdup_printf("cpu%ld-io-eir", i), 4); + cpu[i], name, 4); memory_region_add_subregion(addr_space, CPU_HPA + i * 0x1000, cpu_region); + g_free(name); } /* Limit main memory. */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 0888452..d372636 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1466,18 +1466,21 @@ static const MemoryRegionOps mmio_mem_ops = { } }; -static void amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - IOMMUNotifierFlag old, - IOMMUNotifierFlag new) +static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, + Error **errp) { AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); if (new & IOMMU_NOTIFIER_MAP) { - error_report("device %02x.%02x.%x requires iommu notifier which is not " - "currently supported", as->bus_num, PCI_SLOT(as->devfn), - PCI_FUNC(as->devfn)); - exit(1); + error_setg(errp, + "device %02x.%02x.%x requires iommu notifier which is not " + "currently supported", as->bus_num, PCI_SLOT(as->devfn), + PCI_FUNC(as->devfn)); + return -EINVAL; } + return 0; } static void amdvi_init(AMDVIState *s) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index f1de8fd..771bed2 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2929,9 +2929,10 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, return iotlb; } -static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - IOMMUNotifierFlag old, - IOMMUNotifierFlag new) +static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, + Error **errp) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; @@ -2944,6 +2945,7 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, } else if (new == IOMMU_NOTIFIER_NONE) { QLIST_REMOVE(vtd_as, next); } + return 0; } static int vtd_post_load(void *opaque, int version_id) diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c index f3ccd11..19984d2 100644 --- a/hw/ide/cmd646.c +++ b/hw/ide/cmd646.c @@ -300,6 +300,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp) d->bmdma[i].bus = &d->bus[i]; ide_register_restart_cb(&d->bus[i]); } + g_free(irq); vmstate_register(DEVICE(dev), 0, &vmstate_ide_pci, d); qemu_register_reset(cmd646_reset, d); diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 6db0d79..98a2899 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -13,9 +13,13 @@ config I82378 select MC146818RTC select PCSPK -config PC87312 +config ISA_SUPERIO bool select ISA_BUS + +config PC87312 + bool + select ISA_SUPERIO select I8259 select I8254 select I8257 @@ -34,14 +38,14 @@ config PIIX4 config VT82C686 bool - select ISA_BUS + select ISA_SUPERIO select ACPI_SMBUS select SERIAL_ISA select FDC config SMC37C669 bool - select ISA_BUS + select ISA_SUPERIO select SERIAL_ISA select PARALLEL select FDC diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs index 9e106df..ff97485 100644 --- a/hw/isa/Makefile.objs +++ b/hw/isa/Makefile.objs @@ -1,5 +1,5 @@ common-obj-$(CONFIG_ISA_BUS) += isa-bus.o -common-obj-$(CONFIG_ISA_BUS) += isa-superio.o +common-obj-$(CONFIG_ISA_SUPERIO) += isa-superio.o common-obj-$(CONFIG_APM) += apm.o common-obj-$(CONFIG_I82378) += i82378.o common-obj-$(CONFIG_PC87312) += pc87312.o diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c index 012710d..60c5802 100644 --- a/hw/m68k/mcf5208.c +++ b/hw/m68k/mcf5208.c @@ -273,6 +273,8 @@ static void mcf5208evb_init(MachineState *machine) 0xfc030000, pic + 36); } + g_free(pic); + /* 0xfc000000 SCM. */ /* 0xfc004000 XBS. */ /* 0xfc008000 FlexBus CS. */ diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c index bade4d2..d1d7dfb 100644 --- a/hw/microblaze/boot.c +++ b/hw/microblaze/boot.c @@ -100,6 +100,7 @@ static int microblaze_load_dtb(hwaddr addr, } cpu_physical_memory_write(addr, fdt, fdt_size); + g_free(fdt); return fdt_size; } diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index 62aa01b..2c2adbc 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -13,6 +13,7 @@ config R4K config MALTA bool + select ISA_SUPERIO config MIPSSIM bool diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c index 5ebc961..863ed45 100644 --- a/hw/mips/mips_int.c +++ b/hw/mips/mips_int.c @@ -81,6 +81,7 @@ void cpu_mips_irq_init_cpu(MIPSCPU *cpu) for (i = 0; i < 8; i++) { env->irq[i] = qi[i]; } + g_free(qi); } void cpu_mips_soft_irq(CPUMIPSState *env, int irq, int level) diff --git a/hw/mips/mips_jazz.c b/hw/mips/mips_jazz.c index c967b97..8d010a0 100644 --- a/hw/mips/mips_jazz.c +++ b/hw/mips/mips_jazz.c @@ -362,6 +362,8 @@ static void mips_jazz_init(MachineState *machine, /* LED indicator */ sysbus_create_simple("jazz-led", 0x8000f000, NULL); + + g_free(dmas); } static diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index e87b3d5..3d3bcc8 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -205,9 +205,10 @@ static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu, return -EINVAL; } -static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, - IOMMUNotifierFlag old, - IOMMUNotifierFlag new) +static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, + Error **errp) { struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu); @@ -216,6 +217,7 @@ static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) { spapr_tce_set_need_vfio(tbl, false); } + return 0; } static int spapr_tce_table_post_load(void *opaque, int version_id) diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c index ac3edaf..cf316ed 100644 --- a/hw/timer/lm32_timer.c +++ b/hw/timer/lm32_timer.c @@ -186,9 +186,6 @@ static void lm32_timer_init(Object *obj) sysbus_init_irq(dev, &s->irq); - s->bh = qemu_bh_new(timer_hit, s); - s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); - memory_region_init_io(&s->iomem, obj, &timer_ops, s, "timer", R_MAX * 4); sysbus_init_mmio(dev, &s->iomem); @@ -198,6 +195,9 @@ static void lm32_timer_realize(DeviceState *dev, Error **errp) { LM32TimerState *s = LM32_TIMER(dev); + s->bh = qemu_bh_new(timer_hit, s); + s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); + ptimer_set_freq(s->ptimer, s->freq_hz); } diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c index 9583507..6aedc11 100644 --- a/hw/timer/milkymist-sysctl.c +++ b/hw/timer/milkymist-sysctl.c @@ -283,11 +283,6 @@ static void milkymist_sysctl_init(Object *obj) sysbus_init_irq(dev, &s->timer0_irq); sysbus_init_irq(dev, &s->timer1_irq); - s->bh0 = qemu_bh_new(timer0_hit, s); - s->bh1 = qemu_bh_new(timer1_hit, s); - s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); - s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); - memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s, "milkymist-sysctl", R_MAX * 4); sysbus_init_mmio(dev, &s->regs_region); @@ -297,6 +292,11 @@ static void milkymist_sysctl_realize(DeviceState *dev, Error **errp) { MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev); + s->bh0 = qemu_bh_new(timer0_hit, s); + s->bh1 = qemu_bh_new(timer1_hit, s); + s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); + s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); + ptimer_set_freq(s->ptimer0, s->freq_hz); ptimer_set_freq(s->ptimer1, s->freq_hz); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 3e03c49..5ca1148 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -509,6 +509,7 @@ static void vfio_listener_region_add(MemoryListener *listener, int ret; VFIOHostDMAWindow *hostwin; bool hostwin_found; + Error *err = NULL; if (vfio_listener_skipped_section(section)) { trace_vfio_listener_region_add_skip( @@ -543,13 +544,20 @@ static void vfio_listener_region_add(MemoryListener *listener, hostwin->max_iova - hostwin->min_iova + 1, section->offset_within_address_space, int128_get64(section->size))) { - ret = -1; + error_setg(&err, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); goto fail; } } ret = vfio_spapr_create_window(container, section, &pgsize); if (ret) { + error_setg_errno(&err, -ret, "Failed to create SPAPR window"); goto fail; } @@ -594,10 +602,8 @@ static void vfio_listener_region_add(MemoryListener *listener, } if (!hostwin_found) { - error_report("vfio: IOMMU container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, - container, iova, end); - ret = -EFAULT; + error_setg(&err, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end); goto fail; } @@ -630,9 +636,14 @@ static void vfio_listener_region_add(MemoryListener *listener, section->offset_within_region, int128_get64(llend), iommu_idx); - QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); - memory_region_register_iommu_notifier(section->mr, &giommu->n); + ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, + &err); + if (ret) { + g_free(giommu); + goto fail; + } + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_iommu_replay(giommu->iommu, &giommu->n); return; @@ -664,11 +675,12 @@ static void vfio_listener_region_add(MemoryListener *listener, ret = vfio_dma_map(container, iova, int128_get64(llsize), vaddr, section->readonly); if (ret) { - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%m)", - container, iova, int128_get64(llsize), vaddr, ret); + error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%m)", + container, iova, int128_get64(llsize), vaddr, ret); if (memory_region_is_ram_device(section->mr)) { /* Allow unexpected mappings not to be fatal for RAM devices */ + error_report_err(err); return; } goto fail; @@ -688,9 +700,14 @@ fail: */ if (!container->initialized) { if (!container->error) { - container->error = ret; + error_propagate_prepend(&container->error, err, + "Region %s: ", + memory_region_name(section->mr)); + } else { + error_free(err); } } else { + error_report_err(err); hw_error("vfio: DMA mapping failed, unable to continue"); } } @@ -1251,6 +1268,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container = g_malloc0(sizeof(*container)); container->space = space; container->fd = fd; + container->error = NULL; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); @@ -1308,9 +1326,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, &address_space_memory); if (container->error) { memory_listener_unregister(&container->prereg_listener); - ret = container->error; - error_setg(errp, - "RAM memory listener initialization failed for container"); + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); goto free_container_exit; } } @@ -1365,9 +1383,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, memory_listener_register(&container->listener, container->space->as); if (container->error) { - ret = container->error; - error_setg_errno(errp, -ret, - "memory listener initialization failed for container"); + ret = -1; + error_propagate_prepend(errp, container->error, + "memory listener initialization failed: "); goto listener_release_exit; } diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 96c0ad9..e853eeb 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -17,6 +17,7 @@ #include "hw/hw.h" #include "exec/ram_addr.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "trace.h" static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) @@ -85,7 +86,8 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, */ if (!container->initialized) { if (!container->error) { - container->error = ret; + error_setg_errno(&container->error, -ret, + "Memory registering failed"); } } else { hw_error("vfio: Memory registering failed, unable to continue"); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2386b51..99de5f1 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -677,8 +677,9 @@ static void vhost_iommu_region_add(MemoryListener *listener, iommu_listener); struct vhost_iommu *iommu; Int128 end; - int iommu_idx; + int iommu_idx, ret; IOMMUMemoryRegion *iommu_mr; + Error *err = NULL; if (!memory_region_is_iommu(section->mr)) { return; @@ -701,7 +702,11 @@ static void vhost_iommu_region_add(MemoryListener *listener, iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; iommu->hdev = dev; - memory_region_register_iommu_notifier(section->mr, &iommu->n); + ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, &err); + if (ret) { + error_report_err(err); + exit(1); + } QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next); /* TODO: can replay help performance here? */ } diff --git a/include/exec/memory.h b/include/exec/memory.h index 6e67043..e499dc2 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -288,10 +288,16 @@ typedef struct IOMMUMemoryRegionClass { * @iommu: the IOMMUMemoryRegion * @old_flags: events which previously needed to be notified * @new_flags: events which now need to be notified + * + * Returns 0 on success, or a negative errno; in particular + * returns -EINVAL if the new flag bitmap is not supported by the + * IOMMU memory region. In case of failure, the error object + * must be created */ - void (*notify_flag_changed)(IOMMUMemoryRegion *iommu, - IOMMUNotifierFlag old_flags, - IOMMUNotifierFlag new_flags); + int (*notify_flag_changed)(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old_flags, + IOMMUNotifierFlag new_flags, + Error **errp); /* Called to handle memory_region_iommu_replay(). * * The default implementation of memory_region_iommu_replay() is to @@ -1079,13 +1085,18 @@ void memory_region_notify_one(IOMMUNotifier *notifier, * memory_region_register_iommu_notifier: register a notifier for changes to * IOMMU translation entries. * + * Returns 0 on success, or a negative errno otherwise. In particular, + * -EINVAL indicates that at least one of the attributes of the notifier + * is not supported (flag/range) by the IOMMU memory region. In case of error + * the error object must be created. + * * @mr: the memory region to observe * @n: the IOMMUNotifier to be added; the notify callback receives a * pointer to an #IOMMUTLBEntry as the opaque value; the pointer * ceases to be valid on exit from the notifier. */ -void memory_region_register_iommu_notifier(MemoryRegion *mr, - IOMMUNotifier *n); +int memory_region_register_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n, Error **errp); /** * memory_region_iommu_replay: replay existing IOMMU translations to diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 9107bd4..fd56420 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -71,7 +71,7 @@ typedef struct VFIOContainer { MemoryListener listener; MemoryListener prereg_listener; unsigned iommu_type; - int error; + Error *error; bool initialized; unsigned long pgsizes; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index fd67477..9d14328 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -462,7 +462,7 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension); uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, uint32_t index, int reg); -uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); @@ -1817,33 +1817,38 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) return memory_region_get_dirty_log_mask(mr) & (1 << client); } -static void memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr) +static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr, + Error **errp) { IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE; IOMMUNotifier *iommu_notifier; IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + int ret = 0; IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { flags |= iommu_notifier->notifier_flags; } if (flags != iommu_mr->iommu_notify_flags && imrc->notify_flag_changed) { - imrc->notify_flag_changed(iommu_mr, - iommu_mr->iommu_notify_flags, - flags); + ret = imrc->notify_flag_changed(iommu_mr, + iommu_mr->iommu_notify_flags, + flags, errp); } - iommu_mr->iommu_notify_flags = flags; + if (!ret) { + iommu_mr->iommu_notify_flags = flags; + } + return ret; } -void memory_region_register_iommu_notifier(MemoryRegion *mr, - IOMMUNotifier *n) +int memory_region_register_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n, Error **errp) { IOMMUMemoryRegion *iommu_mr; + int ret; if (mr->alias) { - memory_region_register_iommu_notifier(mr->alias, n); - return; + return memory_region_register_iommu_notifier(mr->alias, n, errp); } /* We need to register for at least one bitfield */ @@ -1854,7 +1859,11 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr)); QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node); - memory_region_update_iommu_notify_flags(iommu_mr); + ret = memory_region_update_iommu_notify_flags(iommu_mr, errp); + if (ret) { + QLIST_REMOVE(n, node); + } + return ret; } uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr) @@ -1907,7 +1916,7 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, } QLIST_REMOVE(n, node); iommu_mr = IOMMU_MEMORY_REGION(mr); - memory_region_update_iommu_notify_flags(iommu_mr); + memory_region_update_iommu_notify_flags(iommu_mr, NULL); } void memory_region_notify_one(IOMMUNotifier *notifier, diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index d8c7d6d..5dfeb2e 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -51,15 +51,15 @@ class Control(object): return (val & 0xffffffff, val >> 32) def show(self): print(self.name) - mbz, mb1 = self.read2(self.cap_msr) - tmbz, tmb1 = 0, 0 + mb1, cb1 = self.read2(self.cap_msr) + tmb1, tcb1 = 0, 0 if self.true_cap_msr: - tmbz, tmb1 = self.read2(self.true_cap_msr) + tmb1, tcb1 = self.read2(self.true_cap_msr) for bit in sorted(self.bits.keys()): - zero = not (mbz & (1 << bit)) - one = mb1 & (1 << bit) - true_zero = not (tmbz & (1 << bit)) - true_one = tmb1 & (1 << bit) + zero = not (mb1 & (1 << bit)) + one = cb1 & (1 << bit) + true_zero = not (tmb1 & (1 << bit)) + true_one = tcb1 & (1 << bit) s= '?' if (self.true_cap_msr and true_zero and true_one and one and not zero): diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c index a8a74d1..debb18f 100644 --- a/scsi/qemu-pr-helper.c +++ b/scsi/qemu-pr-helper.c @@ -323,10 +323,10 @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense) */ uint8_t cdb[6] = { TEST_UNIT_READY }; int sz = 0; - int r = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE); + int ret = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE); - if (r != GOOD) { - return r; + if (ret != GOOD) { + return ret; } scsi_build_sense(sense, mpath_generic_sense(r)); return CHECK_CONDITION; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9e0bac3..44f1bbd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -789,7 +789,7 @@ typedef struct FeatureWordInfo { * In cases of disagreement between feature naming conventions, * aliases may be added. */ - const char *feat_names[32]; + const char *feat_names[64]; union { /* If type==CPUID_FEATURE_WORD */ struct { @@ -801,17 +801,13 @@ typedef struct FeatureWordInfo { /* If type==MSR_FEATURE_WORD */ struct { uint32_t index; - struct { /*CPUID that enumerate this MSR*/ - FeatureWord cpuid_class; - uint32_t cpuid_flag; - } cpuid_dep; } msr; }; - uint32_t tcg_features; /* Feature flags supported by TCG */ - uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ - uint32_t migratable_flags; /* Feature flags known to be migratable */ + uint64_t tcg_features; /* Feature flags supported by TCG */ + uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */ + uint64_t migratable_flags; /* Feature flags known to be migratable */ /* Features that shouldn't be auto-enabled by "-cpu host" */ - uint32_t no_autoenable_flags; + uint64_t no_autoenable_flags; } FeatureWordInfo; static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { @@ -1134,7 +1130,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0008_EBX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - NULL, NULL, NULL, NULL, + "clzero", NULL, "xsaveerptr", NULL, NULL, NULL, NULL, NULL, NULL, "wbnoinvd", NULL, NULL, "ibpb", NULL, NULL, NULL, @@ -1218,10 +1214,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, - .cpuid_dep = { - FEAT_7_0_EDX, - CPUID_7_0_EDX_ARCH_CAPABILITIES - } }, }, [FEAT_CORE_CAPABILITY] = { @@ -1238,12 +1230,253 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .msr = { .index = MSR_IA32_CORE_CAPABILITY, - .cpuid_dep = { - FEAT_7_0_EDX, - CPUID_7_0_EDX_CORE_CAPABILITY, - }, }, }, + + [FEAT_VMX_PROCBASED_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, "vmx-vintr-pending", "vmx-tsc-offset", + NULL, NULL, NULL, "vmx-hlt-exit", + NULL, "vmx-invlpg-exit", "vmx-mwait-exit", "vmx-rdpmc-exit", + "vmx-rdtsc-exit", NULL, NULL, "vmx-cr3-load-noexit", + "vmx-cr3-store-noexit", NULL, NULL, "vmx-cr8-load-exit", + "vmx-cr8-store-exit", "vmx-flexpriority", "vmx-vnmi-pending", "vmx-movdr-exit", + "vmx-io-exit", "vmx-io-bitmap", NULL, "vmx-mtf", + "vmx-msr-bitmap", "vmx-monitor-exit", "vmx-pause-exit", "vmx-secondary-ctls", + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + } + }, + + [FEAT_VMX_SECONDARY_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-apicv-xapic", "vmx-ept", "vmx-desc-exit", "vmx-rdtscp-exit", + "vmx-apicv-x2apic", "vmx-vpid", "vmx-wbinvd-exit", "vmx-unrestricted-guest", + "vmx-apicv-register", "vmx-apicv-vid", "vmx-ple", "vmx-rdrand-exit", + "vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit", + "vmx-rdseed-exit", "vmx-pml", NULL, NULL, + "vmx-xsaves", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_PROCBASED_CTLS2, + } + }, + + [FEAT_VMX_PINBASED_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-intr-exit", NULL, NULL, "vmx-nmi-exit", + NULL, "vmx-vnmi", "vmx-preemption-timer", "vmx-posted-intr", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_PINBASED_CTLS, + } + }, + + [FEAT_VMX_EXIT_CTLS] = { + .type = MSR_FEATURE_WORD, + /* + * VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE is copied from + * the LM CPUID bit. + */ + .feat_names = { + NULL, NULL, "vmx-exit-nosave-debugctl", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL /* vmx-exit-host-addr-space-size */, NULL, NULL, + "vmx-exit-load-perf-global-ctrl", NULL, NULL, "vmx-exit-ack-intr", + NULL, NULL, "vmx-exit-save-pat", "vmx-exit-load-pat", + "vmx-exit-save-efer", "vmx-exit-load-efer", + "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", + NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, + } + }, + + [FEAT_VMX_ENTRY_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, "vmx-entry-noload-debugctl", NULL, + NULL, NULL, NULL, NULL, + NULL, "vmx-entry-ia32e-mode", NULL, NULL, + NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", + "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_ENTRY_CTLS, + } + }, + + [FEAT_VMX_MISC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "vmx-store-lma", "vmx-activity-hlt", "vmx-activity-shutdown", + "vmx-activity-wait-sipi", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "vmx-vmwrite-vmexit-fields", "vmx-zero-len-inject", NULL, + }, + .msr = { + .index = MSR_IA32_VMX_MISC, + } + }, + + [FEAT_VMX_EPT_VPID_CAPS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-ept-execonly", NULL, NULL, NULL, + NULL, NULL, "vmx-page-walk-4", "vmx-page-walk-5", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "vmx-ept-2mb", "vmx-ept-1gb", NULL, NULL, + "vmx-invept", "vmx-eptad", "vmx-ept-advanced-exitinfo", NULL, + NULL, "vmx-invept-single-context", "vmx-invept-all-context", NULL, + NULL, NULL, NULL, NULL, + "vmx-invvpid", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "vmx-invvpid-single-addr", "vmx-invept-single-context", + "vmx-invvpid-all-context", "vmx-invept-single-context-noglobals", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_EPT_VPID_CAP, + } + }, + + [FEAT_VMX_BASIC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + [54] = "vmx-ins-outs", + [55] = "vmx-true-ctls", + }, + .msr = { + .index = MSR_IA32_VMX_BASIC, + }, + /* Just to be safe - we don't support setting the MSEG version field. */ + .no_autoenable_flags = MSR_VMX_BASIC_DUAL_MONITOR, + }, + + [FEAT_VMX_VMFUNC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + [0] = "vmx-eptp-switching", + }, + .msr = { + .index = MSR_IA32_VMX_VMFUNC, + } + }, + +}; + +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + +static FeatureDep feature_dependencies[] = { + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, + .to = { FEAT_ARCH_CAPABILITIES, ~0ull }, + }, + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, + .to = { FEAT_CORE_CAPABILITY, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_PROCBASED_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_PINBASED_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_EXIT_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_ENTRY_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_MISC, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_BASIC, ~0ull }, + }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, + .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_IA32E_MODE }, + }, + { + .from = { FEAT_VMX_PROCBASED_CTLS, VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS }, + .to = { FEAT_VMX_SECONDARY_CTLS, ~0ull }, + }, + { + .from = { FEAT_XSAVE, CPUID_XSAVE_XSAVES }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_XSAVES }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_RDRAND }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDRAND_EXITING }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING }, + }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_RDTSCP }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDTSCP }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, + .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VPID }, + .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull << 32 }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, + .to = { FEAT_VMX_VMFUNC, ~0ull }, + }, }; typedef struct X86RegisterInfo32 { @@ -1354,14 +1587,14 @@ const char *get_register_name_32(unsigned int reg) * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. */ -static uint32_t x86_cpu_get_migratable_flags(FeatureWord w) +static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) { FeatureWordInfo *wi = &feature_word_info[w]; - uint32_t r = 0; + uint64_t r = 0; int i; - for (i = 0; i < 32; i++) { - uint32_t f = 1U << i; + for (i = 0; i < 64; i++) { + uint64_t f = 1ULL << i; /* If the feature name is known, it is implicitly considered migratable, * unless it is explicitly set in unmigratable_flags */ @@ -2923,7 +3156,7 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) assert(pv->prop); } -static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, +static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only); static bool lmce_supported(void) @@ -3096,17 +3329,41 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) return NULL; } -static void report_unavailable_features(FeatureWord w, uint32_t mask) +static bool x86_cpu_have_filtered_features(X86CPU *cpu) { + FeatureWord w; + + for (w = 0; w < FEATURE_WORDS; w++) { + if (cpu->filtered_features[w]) { + return true; + } + } + + return false; +} + +static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; char *feat_word_str; - for (i = 0; i < 32; ++i) { - if ((1UL << i) & mask) { + if (!cpu->force_features) { + env->features[w] &= ~mask; + } + cpu->filtered_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { feat_word_str = feature_word_description(f, i); - warn_report("%s doesn't support requested feature: %s%s%s [bit %d]", - accel_uses_host_cpuid() ? "host" : "TCG", + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, feat_word_str, f->feat_names[i] ? "." : "", f->feat_names[i] ? f->feat_names[i] : "", i); @@ -3346,7 +3603,7 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - uint32_t *array = (uint32_t *)opaque; + uint64_t *array = (uint64_t *)opaque; FeatureWord w; X86CPUFeatureWordInfo word_infos[FEATURE_WORDS] = { }; X86CPUFeatureWordInfoList list_entries[FEATURE_WORDS] = { }; @@ -3390,6 +3647,7 @@ static inline void feat2prop(char *s) /* Return the feature property name for a feature flag bit */ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) { + const char *name; /* XSAVE components are automatically enabled by other features, * so return the original feature name instead */ @@ -3403,9 +3661,11 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) } } - assert(bitnr < 32); + assert(bitnr < 64); assert(w < FEATURE_WORDS); - return feature_word_info[w].feat_names[bitnr]; + name = feature_word_info[w].feat_names[bitnr]; + assert(bitnr < 32 || !(name && feature_word_info[w].type == CPUID_FEATURE_WORD)); + return name; } /* Compatibily hack to maintain legacy +-feat semantic, @@ -3511,7 +3771,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); -static int x86_cpu_filter_features(X86CPU *cpu); +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); /* Build a list with the name of all features on a feature word array */ static void x86_cpu_list_feature_names(FeatureWordArray features, @@ -3521,10 +3781,10 @@ static void x86_cpu_list_feature_names(FeatureWordArray features, strList **next = feat_names; for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t filtered = features[w]; + uint64_t filtered = features[w]; int i; - for (i = 0; i < 32; i++) { - if (filtered & (1UL << i)) { + for (i = 0; i < 64; i++) { + if (filtered & (1ULL << i)) { strList *new = g_new0(strList, 1); new->value = g_strdup(x86_cpu_feature_name(w, i)); *next = new; @@ -3576,7 +3836,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, next = &new->next; } - x86_cpu_filter_features(xc); + x86_cpu_filter_features(xc, false); x86_cpu_list_feature_names(xc->filtered_features, next); @@ -3693,7 +3953,7 @@ void x86_cpu_list(void) names = NULL; for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) { FeatureWordInfo *fw = &feature_word_info[i]; - for (j = 0; j < 32; j++) { + for (j = 0; j < 64; j++) { if (fw->feat_names[j]) { names = g_list_append(names, (gpointer)fw->feat_names[j]); } @@ -3748,11 +4008,11 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) return cpu_list; } -static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, +static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only) { FeatureWordInfo *wi = &feature_word_info[w]; - uint32_t r = 0; + uint64_t r = 0; if (kvm_enabled()) { switch (wi->type) { @@ -3784,15 +4044,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, return r; } -static void x86_cpu_report_filtered_features(X86CPU *cpu) -{ - FeatureWord w; - - for (w = 0; w < FEATURE_WORDS; w++) { - report_unavailable_features(w, cpu->filtered_features[w]); - } -} - static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) { PropValue *pv; @@ -3932,7 +4183,7 @@ static QDict *x86_cpu_static_props(void) for (w = 0; w < FEATURE_WORDS; w++) { FeatureWordInfo *fi = &feature_word_info[w]; int bit; - for (bit = 0; bit < 32; bit++) { + for (bit = 0; bit < 64; bit++) { if (!fi->feat_names[bit]) { continue; } @@ -5048,9 +5299,26 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { CPUX86State *env = &cpu->env; FeatureWord w; + int i; GList *l; Error *local_err = NULL; + for (l = plus_features; l; l = l->next) { + const char *prop = l->data; + object_property_set_bool(OBJECT(cpu), true, prop, &local_err); + if (local_err) { + goto out; + } + } + + for (l = minus_features; l; l = l->next) { + const char *prop = l->data; + object_property_set_bool(OBJECT(cpu), false, prop, &local_err); + if (local_err) { + goto out; + } + } + /*TODO: Now cpu->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties @@ -5068,19 +5336,18 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - for (l = plus_features; l; l = l->next) { - const char *prop = l->data; - object_property_set_bool(OBJECT(cpu), true, prop, &local_err); - if (local_err) { - goto out; - } - } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { + FeatureDep *d = &feature_dependencies[i]; + if (!(env->features[d->from.index] & d->from.mask)) { + uint64_t unavailable_features = env->features[d->to.index] & d->to.mask; - for (l = minus_features; l; l = l->next) { - const char *prop = l->data; - object_property_set_bool(OBJECT(cpu), false, prop, &local_err); - if (local_err) { - goto out; + /* Not an error unless the dependent feature was added explicitly. */ + mark_unavailable_features(cpu, d->to.index, + unavailable_features & env->user_features[d->to.index], + "This feature depends on other features that were not requested"); + + env->user_features[d->to.index] |= unavailable_features; + env->features[d->to.index] &= ~unavailable_features; } } @@ -5154,24 +5421,24 @@ out: * * Returns: 0 if all flags are supported by the host, non-zero otherwise. */ -static int x86_cpu_filter_features(X86CPU *cpu) +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = &cpu->env; FeatureWord w; - int rv = 0; + const char *prefix = NULL; + + if (verbose) { + prefix = accel_uses_host_cpuid() + ? "host doesn't support requested feature" + : "TCG doesn't support requested feature"; + } for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t host_feat = + uint64_t host_feat = x86_cpu_get_supported_feature_word(w, false); - uint32_t requested_features = env->features[w]; - uint32_t available_features = requested_features & host_feat; - if (!cpu->force_features) { - env->features[w] = available_features; - } - cpu->filtered_features[w] = requested_features & ~available_features; - if (cpu->filtered_features[w]) { - rv = 1; - } + uint64_t requested_features = env->features[w]; + uint64_t unavailable_features = requested_features & ~host_feat; + mark_unavailable_features(cpu, w, unavailable_features, prefix); } if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && @@ -5197,13 +5464,9 @@ static int x86_cpu_filter_features(X86CPU *cpu) * host can't emulate the capabilities we report on * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. */ - env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; - cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; - rv = 1; + mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } - - return rv; } static void x86_cpu_realizefn(DeviceState *dev, Error **errp) @@ -5244,16 +5507,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } - if (x86_cpu_filter_features(cpu) && - (cpu->check_cpuid || cpu->enforce_cpuid)) { - x86_cpu_report_filtered_features(cpu); - if (cpu->enforce_cpuid) { - error_setg(&local_err, - accel_uses_host_cpuid() ? - "Host doesn't support requested features" : - "TCG doesn't support requested features"); - goto out; - } + x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); + + if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { + error_setg(&local_err, + accel_uses_host_cpuid() ? + "Host doesn't support requested features" : + "TCG doesn't support requested features"); + goto out; } /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on @@ -5473,7 +5734,7 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) typedef struct BitProperty { FeatureWord w; - uint32_t mask; + uint64_t mask; } BitProperty; static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, @@ -5481,7 +5742,7 @@ static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, { X86CPU *cpu = X86_CPU(obj); BitProperty *fp = opaque; - uint32_t f = cpu->env.features[fp->w]; + uint64_t f = cpu->env.features[fp->w]; bool value = (f & fp->mask) == fp->mask; visit_type_bool(v, name, &value, errp); } @@ -5534,7 +5795,7 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu, { BitProperty *fp; ObjectProperty *op; - uint32_t mask = (1UL << bitnr); + uint64_t mask = (1ULL << bitnr); op = object_property_find(OBJECT(cpu), prop_name, NULL); if (op) { @@ -5668,7 +5929,7 @@ static void x86_cpu_initfn(Object *obj) for (w = 0; w < FEATURE_WORDS; w++) { int bitnr; - for (bitnr = 0; bitnr < 32; bitnr++) { + for (bitnr = 0; bitnr < 64; bitnr++) { x86_cpu_register_feature_bit_props(cpu, w, bitnr); } } @@ -5984,7 +6245,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) #ifndef CONFIG_USER_ONLY cc->asidx_from_attrs = x86_asidx_from_attrs; cc->get_memory_mapping = x86_cpu_get_memory_mapping; - cc->get_phys_page_debug = x86_cpu_get_phys_page_debug; + cc->get_phys_page_attrs_debug = x86_cpu_get_phys_page_attrs_debug; cc->write_elf64_note = x86_cpu_write_elf64_note; cc->write_elf64_qemunote = x86_cpu_write_elf64_qemunote; cc->write_elf32_note = x86_cpu_write_elf32_note; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 5f6e3a0..eaa5395 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -452,6 +452,25 @@ typedef enum X86Seg { #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 + #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 #define XSTATE_YMM_BIT 2 @@ -499,10 +518,19 @@ typedef enum FeatureWord { FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ FEAT_ARCH_CAPABILITIES, FEAT_CORE_CAPABILITY, + FEAT_VMX_PROCBASED_CTLS, + FEAT_VMX_SECONDARY_CTLS, + FEAT_VMX_PINBASED_CTLS, + FEAT_VMX_EXIT_CTLS, + FEAT_VMX_ENTRY_CTLS, + FEAT_VMX_MISC, + FEAT_VMX_EPT_VPID_CAPS, + FEAT_VMX_BASIC, + FEAT_VMX_VMFUNC, FEATURE_WORDS, } FeatureWord; -typedef uint32_t FeatureWordArray[FEATURE_WORDS]; +typedef uint64_t FeatureWordArray[FEATURE_WORDS]; /* cpuid_features bits */ #define CPUID_FP87 (1U << 0) @@ -695,6 +723,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) /* AVX512 BFloat16 Instruction */ +#define CPUID_8000_0008_EBX_CLZERO (1U << 0) /* CLZERO instruction */ +#define CPUID_8000_0008_EBX_XSAVEERPTR (1U << 2) /* Always save/restore FP error pointers */ #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and do not invalidate cache */ #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ @@ -750,6 +780,117 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) +/* VMX MSR features */ +#define MSR_VMX_BASIC_VMCS_REVISION_MASK 0x7FFFFFFFull +#define MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK (0x00001FFFull << 32) +#define MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK (0x003C0000ull << 32) +#define MSR_VMX_BASIC_DUAL_MONITOR (1ULL << 49) +#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) +#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) + +#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full +#define MSR_VMX_MISC_STORE_LMA (1ULL << 5) +#define MSR_VMX_MISC_ACTIVITY_HLT (1ULL << 6) +#define MSR_VMX_MISC_ACTIVITY_SHUTDOWN (1ULL << 7) +#define MSR_VMX_MISC_ACTIVITY_WAIT_SIPI (1ULL << 8) +#define MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK 0x0E000000ull +#define MSR_VMX_MISC_VMWRITE_VMEXIT (1ULL << 29) +#define MSR_VMX_MISC_ZERO_LEN_INJECT (1ULL << 30) + +#define MSR_VMX_EPT_EXECONLY (1ULL << 0) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_4 (1ULL << 6) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_5 (1ULL << 7) +#define MSR_VMX_EPT_UC (1ULL << 8) +#define MSR_VMX_EPT_WB (1ULL << 14) +#define MSR_VMX_EPT_2MB (1ULL << 16) +#define MSR_VMX_EPT_1GB (1ULL << 17) +#define MSR_VMX_EPT_INVEPT (1ULL << 20) +#define MSR_VMX_EPT_AD_BITS (1ULL << 21) +#define MSR_VMX_EPT_ADVANCED_VMEXIT_INFO (1ULL << 22) +#define MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT (1ULL << 25) +#define MSR_VMX_EPT_INVEPT_ALL_CONTEXT (1ULL << 26) +#define MSR_VMX_EPT_INVVPID (1ULL << 32) +#define MSR_VMX_EPT_INVVPID_SINGLE_ADDR (1ULL << 40) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT (1ULL << 41) +#define MSR_VMX_EPT_INVVPID_ALL_CONTEXT (1ULL << 42) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS (1ULL << 43) + +#define MSR_VMX_VMFUNC_EPT_SWITCHING (1ULL << 0) + + +/* VMX controls */ +#define VMX_CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define VMX_CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define VMX_CPU_BASED_HLT_EXITING 0x00000080 +#define VMX_CPU_BASED_INVLPG_EXITING 0x00000200 +#define VMX_CPU_BASED_MWAIT_EXITING 0x00000400 +#define VMX_CPU_BASED_RDPMC_EXITING 0x00000800 +#define VMX_CPU_BASED_RDTSC_EXITING 0x00001000 +#define VMX_CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define VMX_CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define VMX_CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define VMX_CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define VMX_CPU_BASED_TPR_SHADOW 0x00200000 +#define VMX_CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define VMX_CPU_BASED_MOV_DR_EXITING 0x00800000 +#define VMX_CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define VMX_CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define VMX_CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 +#define VMX_CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define VMX_CPU_BASED_MONITOR_EXITING 0x20000000 +#define VMX_CPU_BASED_PAUSE_EXITING 0x40000000 +#define VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define VMX_SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define VMX_SECONDARY_EXEC_DESC 0x00000004 +#define VMX_SECONDARY_EXEC_RDTSCP 0x00000008 +#define VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define VMX_SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define VMX_SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define VMX_SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define VMX_SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define VMX_SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define VMX_SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define VMX_SECONDARY_EXEC_ENCLS_EXITING 0x00008000 +#define VMX_SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define VMX_SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define VMX_SECONDARY_EXEC_XSAVES 0x00100000 + +#define VMX_PIN_BASED_EXT_INTR_MASK 0x00000001 +#define VMX_PIN_BASED_NMI_EXITING 0x00000008 +#define VMX_PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define VMX_PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define VMX_PIN_BASED_POSTED_INTR 0x00000080 + +#define VMX_VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VMX_VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VMX_VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VMX_VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VMX_VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VMX_VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VMX_VM_EXIT_CLEAR_BNDCFGS 0x00800000 +#define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 +#define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 + +#define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VMX_VM_ENTRY_IA32E_MODE 0x00000200 +#define VMX_VM_ENTRY_SMM 0x00000400 +#define VMX_VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VMX_VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VMX_VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VMX_VM_ENTRY_LOAD_BNDCFGS 0x00010000 +#define VMX_VM_ENTRY_PT_CONCEAL_PIP 0x00020000 +#define VMX_VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 + /* Supported Hyper-V Enlightenments */ #define HYPERV_FEAT_RELAXED 0 #define HYPERV_FEAT_VAPIC 1 @@ -1549,7 +1690,8 @@ void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags); -hwaddr x86_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); +hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs); int x86_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target/i386/helper.c b/target/i386/helper.c index 0fa51be..c3a6e4f 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -715,7 +715,8 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) } #if !defined(CONFIG_USER_ONLY) -hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, + MemTxAttrs *attrs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; @@ -725,6 +726,8 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) uint32_t page_offset; int page_size; + *attrs = cpu_get_mem_attrs(env); + a20_mask = x86_get_a20_mask(env); if (!(env->cr[0] & CR0_PG_MASK)) { pte = addr & a20_mask; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 9206909..11b9c85 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -100,6 +100,7 @@ static bool has_msr_virt_ssbd; static bool has_msr_smi_count; static bool has_msr_arch_capabs; static bool has_msr_core_capabs; +static bool has_msr_vmx_vmfunc; static uint32_t has_architectural_pmu_version; static uint32_t num_architectural_pmu_gp_counters; @@ -187,7 +188,7 @@ static int kvm_get_tsc(CPUState *cs) struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; - } msr_data; + } msr_data = {}; int ret; if (env->tsc_valid) { @@ -442,13 +443,14 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, return ret; } -uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) { struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; - } msr_data; - uint32_t ret; + } msr_data = {}; + uint64_t value; + uint32_t ret, can_be_one, must_be_one; if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ return 0; @@ -474,7 +476,42 @@ uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) exit(1); } - return msr_data.entries[0].data; + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: + /* KVM forgot to add these bits for some time, do this ourselves. */ + if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { + value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; + } + if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; + } + if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { + value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; + } + if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; + } + if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + /* + * Return true for bits that can be one, but do not have to be one. + * The SDM tells us which bits could have a "must be one" setting, + * so we can do the opposite transformation in make_vmx_msr_value. + */ + must_be_one = (uint32_t)value; + can_be_one = (uint32_t)(value >> 32); + return can_be_one & ~must_be_one; + + default: + return value; + } } @@ -1939,6 +1976,9 @@ static int kvm_get_supported_msrs(KVMState *s) case MSR_IA32_CORE_CAPABILITY: has_msr_core_capabs = true; break; + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; } } } @@ -2413,6 +2453,132 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) return 0; } +static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features) +{ + uint32_t default1, can_be_one, can_be_zero; + uint32_t must_be_one; + + switch (index) { + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + default1 = 0x00000016; + break; + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + default1 = 0x0401e172; + break; + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + default1 = 0x000011ff; + break; + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + default1 = 0x00036dff; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + default1 = 0; + break; + default: + abort(); + } + + /* If a feature bit is set, the control can be either set or clear. + * Otherwise the value is limited to either 0 or 1 by default1. + */ + can_be_one = features | default1; + can_be_zero = features | ~default1; + must_be_one = ~can_be_zero; + + /* + * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one). + * Bit 32:63 -> 1 if the control bit can be one. + */ + return must_be_one | (((uint64_t)can_be_one) << 32); +} + +#define VMCS12_MAX_FIELD_INDEX (0x17) + +static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) +{ + uint64_t kvm_vmx_basic = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_BASIC); + uint64_t kvm_vmx_misc = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_MISC); + uint64_t kvm_vmx_ept_vpid = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_EPT_VPID_CAP); + + /* + * If the guest is 64-bit, a value of 1 is allowed for the host address + * space size vmexit control. + */ + uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM + ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0; + + /* + * Bits 0-30, 32-44 and 50-53 come from the host. KVM should + * not change them for backwards compatibility. + */ + uint64_t fixed_vmx_basic = kvm_vmx_basic & + (MSR_VMX_BASIC_VMCS_REVISION_MASK | + MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK | + MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK); + + /* + * Same for bits 0-4 and 25-27. Bits 16-24 (CR3 target count) can + * change in the future but are always zero for now, clear them to be + * future proof. Bits 32-63 in theory could change, though KVM does + * not support dual-monitor treatment and probably never will; mask + * them out as well. + */ + uint64_t fixed_vmx_misc = kvm_vmx_misc & + (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK | + MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK); + + /* + * EPT memory types should not change either, so we do not bother + * adding features for them. + */ + uint64_t fixed_vmx_ept_mask = + (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ? + MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0); + uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask; + + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + f[FEAT_VMX_PROCBASED_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS, + f[FEAT_VMX_PINBASED_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS, + f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS, + f[FEAT_VMX_ENTRY_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2, + make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2, + f[FEAT_VMX_SECONDARY_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP, + f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC, + f[FEAT_VMX_BASIC] | fixed_vmx_basic); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC, + f[FEAT_VMX_MISC] | fixed_vmx_misc); + if (has_msr_vmx_vmfunc) { + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]); + } + + /* + * Just to be safe, write these with constant values. The CRn_FIXED1 + * MSRs are generated by KVM based on the vCPU's CPUID. + */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0, + CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, + CR4_VMXE_MASK); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, + VMCS12_MAX_FIELD_INDEX << 1); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -2657,7 +2823,16 @@ static int kvm_put_msrs(X86CPU *cpu, int level) /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ + + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } + if (env->mcg_cap) { int i; diff --git a/tests/docker/test-clang b/tests/docker/test-clang index db9e697..8c51ead 100755 --- a/tests/docker/test-clang +++ b/tests/docker/test-clang @@ -17,7 +17,9 @@ requires clang cd "$BUILD_DIR" -OPTS="--cxx=clang++ --cc=clang --host-cc=clang --enable-sanitizers" +OPTS="--cxx=clang++ --cc=clang --host-cc=clang" +OPTS="$OPTS --extra-cflags=-fsanitize=undefined \ + --extra-cflags=-fno-sanitize=float-divide-by-zero" build_qemu $OPTS check_qemu install_qemu diff --git a/tests/docker/test-debug b/tests/docker/test-debug index 137f4f2..c050fa0 100755 --- a/tests/docker/test-debug +++ b/tests/docker/test-debug @@ -21,6 +21,7 @@ cd "$BUILD_DIR" OPTS="--cxx=clang++ --cc=clang --host-cc=clang" OPTS="--enable-debug --enable-sanitizers $OPTS" +export ASAN_OPTIONS=detect_leaks=0 build_qemu $OPTS check_qemu check V=1 install_qemu diff --git a/tests/migration-test.c b/tests/migration-test.c index 221a33d..59f291c 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -240,6 +240,17 @@ static int64_t read_ram_property_int(QTestState *who, const char *property) return result; } +static int64_t read_migrate_property_int(QTestState *who, const char *property) +{ + QDict *rsp_return; + int64_t result; + + rsp_return = migrate_query(who); + result = qdict_get_try_int(rsp_return, property, 0); + qobject_unref(rsp_return); + return result; +} + static uint64_t get_migration_pass(QTestState *who) { return read_ram_property_int(who, "dirty-sync-count"); @@ -254,32 +265,46 @@ static void read_blocktime(QTestState *who) qobject_unref(rsp_return); } +static bool check_migration_status(QTestState *who, const char *goal, + const char **ungoals) +{ + bool ready; + char *current_status; + const char **ungoal; + + current_status = migrate_query_status(who); + ready = strcmp(current_status, goal) == 0; + if (!ungoals) { + g_assert_cmpstr(current_status, !=, "failed"); + /* + * If looking for a state other than completed, + * completion of migration would cause the test to + * hang. + */ + if (strcmp(goal, "completed") != 0) { + g_assert_cmpstr(current_status, !=, "completed"); + } + } else { + for (ungoal = ungoals; *ungoal; ungoal++) { + g_assert_cmpstr(current_status, !=, *ungoal); + } + } + g_free(current_status); + return ready; +} + static void wait_for_migration_status(QTestState *who, const char *goal, const char **ungoals) { - while (true) { - bool completed; - char *status; - const char **ungoal; - - status = migrate_query_status(who); - completed = strcmp(status, goal) == 0; - for (ungoal = ungoals; *ungoal; ungoal++) { - g_assert_cmpstr(status, !=, *ungoal); - } - g_free(status); - if (completed) { - return; - } + while (!check_migration_status(who, goal, ungoals)) { usleep(1000); } } static void wait_for_migration_complete(QTestState *who) { - wait_for_migration_status(who, "completed", - (const char * []) { "failed", NULL }); + wait_for_migration_status(who, "completed", NULL); } static void wait_for_migration_pass(QTestState *who) @@ -450,6 +475,17 @@ static void migrate_pause(QTestState *who) qobject_unref(rsp); } +static void migrate_continue(QTestState *who, const char *state) +{ + QDict *rsp; + + rsp = wait_command(who, + "{ 'execute': 'migrate-continue'," + " 'arguments': { 'state': %s } }", + state); + qobject_unref(rsp); +} + static void migrate_recover(QTestState *who, const char *uri) { QDict *rsp; @@ -814,9 +850,7 @@ static void test_postcopy_recovery(void) * Wait until postcopy is really started; we can only run the * migrate-pause command during a postcopy */ - wait_for_migration_status(from, "postcopy-active", - (const char * []) { "failed", - "completed", NULL }); + wait_for_migration_status(from, "postcopy-active", NULL); /* * Manually stop the postcopy migration. This emulates a network @@ -1210,6 +1244,89 @@ static void test_validate_uuid_dst_not_set(void) false, true); } +static void test_migrate_auto_converge(void) +{ + char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + QTestState *from, *to; + int64_t remaining, percentage; + + /* + * We want the test to be stable and as fast as possible. + * E.g., with 1Gb/s bandwith migration may pass without throttling, + * so we need to decrease a bandwidth. + */ + const int64_t init_pct = 5, inc_pct = 50, max_pct = 95; + const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ + const int64_t downtime_limit = 250; /* 250ms */ + /* + * We migrate through unix-socket (> 500Mb/s). + * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). + * So, we can predict expected_threshold + */ + const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; + + if (test_migrate_start(&from, &to, uri, false, false, NULL, NULL)) { + return; + } + + migrate_set_capability(from, "auto-converge", true); + migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); + migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); + migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); + + /* + * Set the initial parameters so that the migration could not converge + * without throttling. + */ + migrate_set_parameter_int(from, "downtime-limit", 1); + migrate_set_parameter_int(from, "max-bandwidth", 100000000); /* ~100Mb/s */ + + /* To check remaining size after precopy */ + migrate_set_capability(from, "pause-before-switchover", true); + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + migrate(from, uri, "{}"); + + /* Wait for throttling begins */ + percentage = 0; + while (percentage == 0) { + percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); + usleep(100); + g_assert_false(got_stop); + } + /* The first percentage of throttling should be equal to init_pct */ + g_assert_cmpint(percentage, ==, init_pct); + /* Now, when we tested that throttling works, let it converge */ + migrate_set_parameter_int(from, "downtime-limit", downtime_limit); + migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); + + /* + * Wait for pre-switchover status to check last throttle percentage + * and remaining. These values will be zeroed later + */ + wait_for_migration_status(from, "pre-switchover", NULL); + + /* The final percentage of throttling shouldn't be greater than max_pct */ + percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); + g_assert_cmpint(percentage, <=, max_pct); + + remaining = read_ram_property_int(from, "remaining"); + g_assert_cmpint(remaining, <, expected_threshold); + + migrate_continue(from, "pre-switchover"); + + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + + g_free(uri); + + test_migrate_end(from, to, true); +} + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -1272,6 +1389,8 @@ int main(int argc, char **argv) qtest_add_func("/migration/validate_uuid_dst_not_set", test_validate_uuid_dst_not_set); + qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); + ret = g_test_run(); g_assert_cmpint(ret, ==, 0); diff --git a/tests/test-char.c b/tests/test-char.c index d62de1b..45e42af 100644 --- a/tests/test-char.c +++ b/tests/test-char.c @@ -1103,7 +1103,7 @@ static void char_socket_server_two_clients_test(gconstpointer opaque) } -#ifdef HAVE_CHARDEV_SERIAL +#if defined(HAVE_CHARDEV_SERIAL) && !defined(WIN32) static void char_serial_test(void) { QemuOpts *opts; @@ -1460,7 +1460,7 @@ int main(int argc, char **argv) #endif g_test_add_func("/char/udp", char_udp_test); -#ifdef HAVE_CHARDEV_SERIAL +#if defined(HAVE_CHARDEV_SERIAL) && !defined(WIN32) g_test_add_func("/char/serial", char_serial_test); #endif g_test_add_func("/char/hotswap", char_hotswap_test); diff --git a/util/async.c b/util/async.c index 4e4c7af..ca83e32 100644 --- a/util/async.c +++ b/util/async.c @@ -354,7 +354,11 @@ void aio_notify(AioContext *ctx) void aio_notify_accept(AioContext *ctx) { - if (atomic_xchg(&ctx->notified, false)) { + if (atomic_xchg(&ctx->notified, false) +#ifdef WIN32 + || true +#endif + ) { event_notifier_test_and_clear(&ctx->notifier); } } diff --git a/util/oslib-win32.c b/util/oslib-win32.c index c62cd43..886e400 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -585,7 +585,11 @@ int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, int ret; ret = connect(sockfd, addr, addrlen); if (ret < 0) { - errno = socket_error(); + if (WSAGetLastError() == WSAEWOULDBLOCK) { + errno = EINPROGRESS; + } else { + errno = socket_error(); + } } return ret; } |