From a23bc6539890d8b27458cf56bc4ed0e0d3c2de3e Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Wed, 7 Aug 2024 01:18:10 -0700 Subject: target/i386: Delete duplicated macro definition CR4_FRED_MASK Macro CR4_FRED_MASK is defined twice, delete one. Signed-off-by: Xin Li (Intel) Link: https://lore.kernel.org/r/20240807081813.735158-2-xin@zytor.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'target') diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 14edd57..2bf37dd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -267,12 +267,6 @@ typedef enum X86Seg { #define CR4_FRED_MASK 0 #endif -#ifdef TARGET_X86_64 -#define CR4_FRED_MASK (1ULL << 32) -#else -#define CR4_FRED_MASK 0 -#endif - #define CR4_RESERVED_MASK \ (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ -- cgit v1.1 From 7c6ec5bc5fea92a4ddea3f0189e3a7e7588e1d19 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Wed, 7 Aug 2024 01:18:11 -0700 Subject: target/i386: Add VMX control bits for nested FRED support Add definitions of 1) VM-exit activate secondary controls bit 2) VM-entry load FRED bit which are required to enable nested FRED. Reviewed-by: Zhao Liu Signed-off-by: Xin Li (Intel) Link: https://lore.kernel.org/r/20240807081813.735158-3-xin@zytor.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 85ef745..31f287c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1435,7 +1435,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "vmx-exit-save-efer", "vmx-exit-load-efer", "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, - NULL, "vmx-exit-load-pkrs", NULL, NULL, + NULL, "vmx-exit-load-pkrs", NULL, "vmx-exit-secondary-ctls", }, .msr = { .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, @@ -1450,7 +1450,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, "vmx-entry-ia32e-mode", NULL, NULL, NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, - NULL, NULL, "vmx-entry-load-pkrs", NULL, + NULL, NULL, "vmx-entry-load-pkrs", "vmx-entry-load-fred", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, -- cgit v1.1 From ab891454ebe82f7e359be721007652556f9f8356 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Wed, 7 Aug 2024 01:18:12 -0700 Subject: target/i386: Raise the highest index value used for any VMCS encoding Because the index value of the VMCS field encoding of FRED injected-event data (one of the newly added VMCS fields for FRED transitions), 0x52, is larger than any existing index value, raise the highest index value used for any VMCS encoding to 0x52. Because the index value of the VMCS field encoding of Secondary VM-exit controls, 0x44, is larger than any existing index value, raise the highest index value used for any VMCS encoding to 0x44. Co-developed-by: Xin Li Signed-off-by: Xin Li Signed-off-by: Lei Wang Signed-off-by: Xin Li (Intel) Link: https://lore.kernel.org/r/20240807081813.735158-4-xin@zytor.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 1 + target/i386/kvm/kvm.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'target') diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 2bf37dd..9c39384 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1186,6 +1186,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 #define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 #define VMX_VM_EXIT_LOAD_IA32_PKRS 0x20000000 +#define VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS 0x80000000 #define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 #define VMX_VM_ENTRY_IA32E_MODE 0x00000200 diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c8056ef..27a3d76 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3694,7 +3694,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, CR4_VMXE_MASK); - if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { + if (f[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { + /* FRED injected-event data (0x2052). */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x52); + } else if (f[FEAT_VMX_EXIT_CTLS] & + VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS) { + /* Secondary VM-exit controls (0x2044). */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x44); + } else if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { /* TSC multiplier (0x2032). */ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32); } else { -- cgit v1.1 From ed2880f4e93bf83106ebdc8562a5ee4d93285a3b Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Tue, 3 Sep 2024 19:30:45 +0530 Subject: kvm/i386: make kvm_filter_msr() and related definitions private to kvm module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_filer_msr() is only used from i386 kvm module. Make it static so that its easy for developers to understand that its not used anywhere else. Same for QEMURDMSRHandler, QEMUWRMSRHandler and KVMMSRHandlers definitions. CC: philmd@linaro.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Ani Sinha Link: https://lore.kernel.org/r/20240903140045.41167-1-anisinha@redhat.com [Make struct unnamed. - Paolo] Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 12 +++++++++++- target/i386/kvm/kvm_i386.h | 11 ----------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'target') diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 27a3d76..94fdf3c 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -92,7 +92,17 @@ * 255 kvm_msr_entry structs */ #define MSR_BUF_SIZE 4096 +typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val); +typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val); +typedef struct { + uint32_t msr; + QEMURDMSRHandler *rdmsr; + QEMUWRMSRHandler *wrmsr; +} KVMMSRHandlers; + static void kvm_init_msrs(X86CPU *cpu); +static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, + QEMUWRMSRHandler *wrmsr); const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_INFO(SET_TSS_ADDR), @@ -5736,7 +5746,7 @@ static bool kvm_install_msr_filters(KVMState *s) return true; } -bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, +static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, QEMUWRMSRHandler *wrmsr) { int i; diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 34fc607..9de9c0d 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -66,17 +66,6 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); -typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val); -typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val); -typedef struct kvm_msr_handlers { - uint32_t msr; - QEMURDMSRHandler *rdmsr; - QEMUWRMSRHandler *wrmsr; -} KVMMSRHandlers; - -bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, - QEMUWRMSRHandler *wrmsr); - #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); -- cgit v1.1 From 87e82951c155175c8681509e8d25a6dac919c0c9 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Tue, 3 Sep 2024 13:30:04 +0530 Subject: kvm/i386: fix return values of is_host_cpu_intel() is_host_cpu_intel() should return TRUE if the host cpu in Intel based, otherwise it should return FALSE. Currently, it returns zero (FALSE) when the host CPU is INTEL and non-zero otherwise. Fix the function so that it agrees more with the semantics. Adjust the calling logic accordingly. RAPL needs Intel host cpus. If the host CPU is not Intel baseed, we should report error. Signed-off-by: Ani Sinha Link: https://lore.kernel.org/r/20240903080004.33746-1-anisinha@redhat.com [While touching the code remove too many spaces from the second part of the error. - Paolo] Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 6 +++--- target/i386/kvm/vmsr_energy.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'target') diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 94fdf3c..fa0be55 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2906,9 +2906,9 @@ static int kvm_msr_energy_thread_init(KVMState *s, MachineState *ms) * 1. Host cpu must be Intel cpu * 2. RAPL must be enabled on the Host */ - if (is_host_cpu_intel()) { - error_report("The RAPL feature can only be enabled on hosts\ - with Intel CPU models"); + if (!is_host_cpu_intel()) { + error_report("The RAPL feature can only be enabled on hosts " + "with Intel CPU models"); ret = 1; goto out; } diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c index 7e064c5..31508d4 100644 --- a/target/i386/kvm/vmsr_energy.c +++ b/target/i386/kvm/vmsr_energy.c @@ -34,7 +34,7 @@ bool is_host_cpu_intel(void) host_cpu_vendor_fms(vendor, &family, &model, &stepping); - return strcmp(vendor, CPUID_VENDOR_INTEL); + return g_str_equal(vendor, CPUID_VENDOR_INTEL); } int is_rapl_enabled(void) -- cgit v1.1 From 0cc42e63bb54fe2a5a1e76f2d1fa442f9c361c1c Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Tue, 3 Sep 2024 18:11:42 +0530 Subject: kvm/i386: refactor kvm_arch_init and split it into smaller functions kvm_arch_init() enables a lot of vm capabilities. Refactor them into separate smaller functions. Energy MSR related operations also moved to its own function. There should be no functional impact. Signed-off-by: Ani Sinha Link: https://lore.kernel.org/r/20240903124143.39345-2-anisinha@redhat.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 327 +++++++++++++++++++++++++++++++------------------- 1 file changed, 201 insertions(+), 126 deletions(-) (limited to 'target') diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index fa0be55..64ef2db 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3005,10 +3005,185 @@ int kvm_arch_get_default_type(MachineState *ms) return 0; } +static int kvm_vm_enable_exception_payload(KVMState *s) +{ + int ret = 0; + has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD); + if (has_exception_payload) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable exception payload cap: %s", + strerror(-ret)); + } + } + + return ret; +} + +static int kvm_vm_enable_triple_fault_event(KVMState *s) +{ + int ret = 0; + has_triple_fault_event = \ + kvm_check_extension(s, + KVM_CAP_X86_TRIPLE_FAULT_EVENT); + if (has_triple_fault_event) { + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable triple fault event cap: %s", + strerror(-ret)); + } + } + return ret; +} + +static int kvm_vm_set_identity_map_addr(KVMState *s, uint64_t *identity_base) +{ + /* + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS, + * so this value allows up to 16M BIOSes. + */ + *identity_base = 0xfeffc000; + return kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, identity_base); +} + +static int kvm_vm_set_nr_mmu_pages(KVMState *s) +{ + uint64_t shadow_mem; + int ret = 0; + shadow_mem = object_property_get_int(OBJECT(s), + "kvm-shadow-mem", + &error_abort); + if (shadow_mem != -1) { + shadow_mem /= 4096; + ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); + } + return ret; +} + +static int kvm_vm_set_tss_addr(KVMState *s, uint64_t identity_base) +{ + /* Set TSS base one page after EPT identity map. */ + return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base); +} + +static int kvm_vm_enable_disable_exits(KVMState *s) +{ + int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); +/* Work around for kernel header with a typo. TODO: fix header and drop. */ +#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) +#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL +#endif + if (disable_exits) { + disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | + KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_PAUSE | + KVM_X86_DISABLE_EXITS_CSTATE); + } + + return kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, + disable_exits); +} + +static int kvm_vm_enable_bus_lock_exit(KVMState *s) +{ + int ret = 0; + ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); + if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { + error_report("kvm: bus lock detection unsupported"); + return -ENOTSUP; + } + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, + KVM_BUS_LOCK_DETECTION_EXIT); + if (ret < 0) { + error_report("kvm: Failed to enable bus lock detection cap: %s", + strerror(-ret)); + } + + return ret; +} + +static int kvm_vm_enable_notify_vmexit(KVMState *s) +{ + int ret = 0; + if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE) { + uint64_t notify_window_flags = + ((uint64_t)s->notify_window << 32) | + KVM_X86_NOTIFY_VMEXIT_ENABLED | + KVM_X86_NOTIFY_VMEXIT_USER; + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, + notify_window_flags); + if (ret < 0) { + error_report("kvm: Failed to enable notify vmexit cap: %s", + strerror(-ret)); + } + } + return ret; +} + +static int kvm_vm_enable_userspace_msr(KVMState *s) +{ + int ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0, + KVM_MSR_EXIT_REASON_FILTER); + if (ret < 0) { + error_report("Could not enable user space MSRs: %s", + strerror(-ret)); + exit(1); + } + + if (!kvm_filter_msr(s, MSR_CORE_THREAD_COUNT, + kvm_rdmsr_core_thread_count, NULL)) { + error_report("Could not install MSR_CORE_THREAD_COUNT handler!"); + exit(1); + } + + return 0; +} + +static void kvm_vm_enable_energy_msrs(KVMState *s) +{ + bool r; + if (s->msr_energy.enable == true) { + r = kvm_filter_msr(s, MSR_RAPL_POWER_UNIT, + kvm_rdmsr_rapl_power_unit, NULL); + if (!r) { + error_report("Could not install MSR_RAPL_POWER_UNIT \ + handler"); + exit(1); + } + + r = kvm_filter_msr(s, MSR_PKG_POWER_LIMIT, + kvm_rdmsr_pkg_power_limit, NULL); + if (!r) { + error_report("Could not install MSR_PKG_POWER_LIMIT \ + handler"); + exit(1); + } + + r = kvm_filter_msr(s, MSR_PKG_POWER_INFO, + kvm_rdmsr_pkg_power_info, NULL); + if (!r) { + error_report("Could not install MSR_PKG_POWER_INFO \ + handler"); + exit(1); + } + r = kvm_filter_msr(s, MSR_PKG_ENERGY_STATUS, + kvm_rdmsr_pkg_energy_status, NULL); + if (!r) { + error_report("Could not install MSR_PKG_ENERGY_STATUS \ + handler"); + exit(1); + } + } + return; +} + int kvm_arch_init(MachineState *ms, KVMState *s) { uint64_t identity_base = 0xfffbc000; - uint64_t shadow_mem; int ret; struct utsname utsname; Error *local_err = NULL; @@ -3038,24 +3213,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); - has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD); - if (has_exception_payload) { - ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true); - if (ret < 0) { - error_report("kvm: Failed to enable exception payload cap: %s", - strerror(-ret)); - return ret; - } + ret = kvm_vm_enable_exception_payload(s); + if (ret < 0) { + return ret; } - has_triple_fault_event = kvm_check_extension(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT); - if (has_triple_fault_event) { - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); - if (ret < 0) { - error_report("kvm: Failed to enable triple fault event cap: %s", - strerror(-ret)); - return ret; - } + ret = kvm_vm_enable_triple_fault_event(s); + if (ret < 0) { + return ret; } if (s->xen_version) { @@ -3086,22 +3251,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - /* - * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. - * In order to use vm86 mode, an EPT identity map and a TSS are needed. - * Since these must be part of guest physical memory, we need to allocate - * them, both by setting their start addresses in the kernel and by - * creating a corresponding e820 entry. We need 4 pages before the BIOS, - * so this value allows up to 16M BIOSes. - */ - identity_base = 0xfeffc000; - ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + ret = kvm_vm_set_identity_map_addr(s, &identity_base); if (ret < 0) { return ret; } - /* Set TSS base one page after EPT identity map. */ - ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); + ret = kvm_vm_set_tss_addr(s, identity_base + 0x1000); if (ret < 0) { return ret; } @@ -3109,13 +3264,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s) /* Tell fw_cfg to notify the BIOS to reserve the range. */ e820_add_entry(identity_base, 0x4000, E820_RESERVED); - shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort); - if (shadow_mem != -1) { - shadow_mem /= 4096; - ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); - if (ret < 0) { - return ret; - } + ret = kvm_vm_set_nr_mmu_pages(s); + if (ret < 0) { + return ret; } if (kvm_check_extension(s, KVM_CAP_X86_SMM) && @@ -3126,20 +3277,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } if (enable_cpu_pm) { - int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); -/* Work around for kernel header with a typo. TODO: fix header and drop. */ -#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) -#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL -#endif - if (disable_exits) { - disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | - KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_PAUSE | - KVM_X86_DISABLE_EXITS_CSTATE); - } - - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, - disable_exits); + ret = kvm_vm_enable_disable_exits(s); if (ret < 0) { error_report("kvm: guest stopping CPU not supported: %s", strerror(-ret)); @@ -3150,16 +3288,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) X86MachineState *x86ms = X86_MACHINE(ms); if (x86ms->bus_lock_ratelimit > 0) { - ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); - if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { - error_report("kvm: bus lock detection unsupported"); - return -ENOTSUP; - } - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, - KVM_BUS_LOCK_DETECTION_EXIT); + ret = kvm_vm_enable_bus_lock_exit(s); if (ret < 0) { - error_report("kvm: Failed to enable bus lock detection cap: %s", - strerror(-ret)); return ret; } ratelimit_init(&bus_lock_ratelimit_ctrl); @@ -3168,80 +3298,25 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } - if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE && - kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { - uint64_t notify_window_flags = - ((uint64_t)s->notify_window << 32) | - KVM_X86_NOTIFY_VMEXIT_ENABLED | - KVM_X86_NOTIFY_VMEXIT_USER; - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, - notify_window_flags); - if (ret < 0) { - error_report("kvm: Failed to enable notify vmexit cap: %s", - strerror(-ret)); - return ret; - } - } - if (kvm_vm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR)) { - bool r; - - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0, - KVM_MSR_EXIT_REASON_FILTER); - if (ret) { - error_report("Could not enable user space MSRs: %s", - strerror(-ret)); - exit(1); + if (kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { + ret = kvm_vm_enable_notify_vmexit(s); + if (ret < 0) { + return ret; } + } - r = kvm_filter_msr(s, MSR_CORE_THREAD_COUNT, - kvm_rdmsr_core_thread_count, NULL); - if (!r) { - error_report("Could not install MSR_CORE_THREAD_COUNT handler: %s", - strerror(-ret)); - exit(1); + if (kvm_vm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR)) { + ret = kvm_vm_enable_userspace_msr(s); + if (ret < 0) { + return ret; } if (s->msr_energy.enable == true) { - r = kvm_filter_msr(s, MSR_RAPL_POWER_UNIT, - kvm_rdmsr_rapl_power_unit, NULL); - if (!r) { - error_report("Could not install MSR_RAPL_POWER_UNIT \ - handler: %s", - strerror(-ret)); + kvm_vm_enable_energy_msrs(s); + if (kvm_msr_energy_thread_init(s, ms)) { + error_report("kvm : error RAPL feature requirement not met"); exit(1); } - - r = kvm_filter_msr(s, MSR_PKG_POWER_LIMIT, - kvm_rdmsr_pkg_power_limit, NULL); - if (!r) { - error_report("Could not install MSR_PKG_POWER_LIMIT \ - handler: %s", - strerror(-ret)); - exit(1); - } - - r = kvm_filter_msr(s, MSR_PKG_POWER_INFO, - kvm_rdmsr_pkg_power_info, NULL); - if (!r) { - error_report("Could not install MSR_PKG_POWER_INFO \ - handler: %s", - strerror(-ret)); - exit(1); - } - r = kvm_filter_msr(s, MSR_PKG_ENERGY_STATUS, - kvm_rdmsr_pkg_energy_status, NULL); - if (!r) { - error_report("Could not install MSR_PKG_ENERGY_STATUS \ - handler: %s", - strerror(-ret)); - exit(1); - } - r = kvm_msr_energy_thread_init(s, ms); - if (r) { - error_report("kvm : error RAPL feature requirement not meet"); - exit(1); - } - } } -- cgit v1.1 From dc44854978f3d2fba7f57db07768e32192aafc32 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 4 Sep 2024 14:29:00 +0200 Subject: kvm/i386: replace identity_base variable with a constant identity_base variable is first initialzied to address 0xfffbc000 and then kvm_vm_set_identity_map_addr() overrides this value to address 0xfeffc000. The initial address to which the variable was initialized was never used. Clean everything up, placing 0xfeffc000 in a preprocessor constant. Reported-by: Ani Sinha Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'target') diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 64ef2db..7a1f2bd 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -81,6 +81,16 @@ do { } while (0) #endif +/* + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS, + * so this value allows up to 16M BIOSes. + */ +#define KVM_IDENTITY_BASE 0xfeffc000 + /* From arch/x86/kvm/lapic.h */ #define KVM_APIC_BUS_CYCLE_NS 1 #define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS) @@ -3036,18 +3046,9 @@ static int kvm_vm_enable_triple_fault_event(KVMState *s) return ret; } -static int kvm_vm_set_identity_map_addr(KVMState *s, uint64_t *identity_base) +static int kvm_vm_set_identity_map_addr(KVMState *s, uint64_t identity_base) { - /* - * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. - * In order to use vm86 mode, an EPT identity map and a TSS are needed. - * Since these must be part of guest physical memory, we need to allocate - * them, both by setting their start addresses in the kernel and by - * creating a corresponding e820 entry. We need 4 pages before the BIOS, - * so this value allows up to 16M BIOSes. - */ - *identity_base = 0xfeffc000; - return kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, identity_base); + return kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); } static int kvm_vm_set_nr_mmu_pages(KVMState *s) @@ -3064,10 +3065,9 @@ static int kvm_vm_set_nr_mmu_pages(KVMState *s) return ret; } -static int kvm_vm_set_tss_addr(KVMState *s, uint64_t identity_base) +static int kvm_vm_set_tss_addr(KVMState *s, uint64_t tss_base) { - /* Set TSS base one page after EPT identity map. */ - return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base); + return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, tss_base); } static int kvm_vm_enable_disable_exits(KVMState *s) @@ -3183,7 +3183,6 @@ static void kvm_vm_enable_energy_msrs(KVMState *s) int kvm_arch_init(MachineState *ms, KVMState *s) { - uint64_t identity_base = 0xfffbc000; int ret; struct utsname utsname; Error *local_err = NULL; @@ -3251,18 +3250,19 @@ int kvm_arch_init(MachineState *ms, KVMState *s) uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - ret = kvm_vm_set_identity_map_addr(s, &identity_base); + ret = kvm_vm_set_identity_map_addr(s, KVM_IDENTITY_BASE); if (ret < 0) { return ret; } - ret = kvm_vm_set_tss_addr(s, identity_base + 0x1000); + /* Set TSS base one page after EPT identity map. */ + ret = kvm_vm_set_tss_addr(s, KVM_IDENTITY_BASE + 0x1000); if (ret < 0) { return ret; } /* Tell fw_cfg to notify the BIOS to reserve the range. */ - e820_add_entry(identity_base, 0x4000, E820_RESERVED); + e820_add_entry(KVM_IDENTITY_BASE, 0x4000, E820_RESERVED); ret = kvm_vm_set_nr_mmu_pages(s); if (ret < 0) { -- cgit v1.1 From 0701abbf9880b5ab1cf44e0caa6ad173aec840e7 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 5 Aug 2024 17:20:41 -0300 Subject: target/i386: Expose IBPB-BRTYPE and SBPB CPUID bits to the guest According to AMD's Speculative Return Stack Overflow whitepaper (link below), the hypervisor should synthesize the value of IBPB_BRTYPE and SBPB CPUID bits to the guest. Support for this is already present in the kernel with commit e47d86083c66 ("KVM: x86: Add SBPB support") and commit 6f0f23ef76be ("KVM: x86: Add IBPB_BRTYPE support"). Add support in QEMU to expose the bits to the guest OS. host: # cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow Mitigation: Safe RET before (guest): $ cpuid -l 0x80000021 -1 -r 0x80000021 0x00: eax=0x00000045 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 ^ $ cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow Vulnerable: Safe RET, no microcode after (guest): $ cpuid -l 0x80000021 -1 -r 0x80000021 0x00: eax=0x18000045 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 ^ $ cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow Mitigation: Safe RET Reported-by: Fabian Vogt Link: https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240805202041.5936-1-farosas@suse.de Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 31f287c..ff227a8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1221,8 +1221,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "sbpb", + "ibpb-brtype", NULL, NULL, NULL, }, .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, .tcg_features = 0, -- cgit v1.1 From a1676bb3047f28b292ecbce3a378ccc0b4721d47 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 27 Sep 2024 12:47:40 +0200 Subject: kvm: Allow kvm_arch_get/put_registers to accept Error** This is necessary to provide discernible error messages to the caller. Signed-off-by: Julia Suvorova Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20240927104743.218468-2-jusual@redhat.com Signed-off-by: Paolo Bonzini --- target/arm/kvm.c | 4 ++-- target/i386/kvm/kvm.c | 4 ++-- target/loongarch/kvm/kvm.c | 4 ++-- target/mips/kvm.c | 4 ++-- target/ppc/kvm.c | 4 ++-- target/riscv/kvm/kvm-cpu.c | 4 ++-- target/s390x/kvm/kvm.c | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'target') diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 849e2e2..f1f1b5b 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2042,7 +2042,7 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { uint64_t val; uint32_t fpr; @@ -2226,7 +2226,7 @@ static int kvm_arch_get_sve(CPUState *cs) return 0; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { uint64_t val; unsigned int el; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 7a1f2bd..0b03fb0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5210,7 +5210,7 @@ static int kvm_get_nested_state(X86CPU *cpu) return ret; } -int kvm_arch_put_registers(CPUState *cpu, int level) +int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) { X86CPU *x86_cpu = X86_CPU(cpu); int ret; @@ -5298,7 +5298,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level) return 0; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); int ret; diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 4786cd5..30ec160 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -588,7 +588,7 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) return ret; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { int ret; @@ -616,7 +616,7 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { int ret; diff --git a/target/mips/kvm.c b/target/mips/kvm.c index a631ab5..a98798c 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1172,7 +1172,7 @@ static int kvm_mips_get_cp0_registers(CPUState *cs) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { CPUMIPSState *env = cpu_env(cs); struct kvm_regs regs; @@ -1207,7 +1207,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { CPUMIPSState *env = cpu_env(cs); int ret = 0; diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 907dba6..3efc28f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -900,7 +900,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; @@ -1205,7 +1205,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu) return 0; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index f6e3156..2bfb112 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1192,7 +1192,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { int ret = 0; @@ -1237,7 +1237,7 @@ int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { int ret = 0; diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 94181d9..8ffe015 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -472,7 +472,7 @@ static int can_sync_regs(CPUState *cs, int regs) #define KVM_SYNC_REQUIRED_REGS (KVM_SYNC_GPRS | KVM_SYNC_ACRS | \ KVM_SYNC_CRS | KVM_SYNC_PREFIX) -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu = {}; @@ -598,7 +598,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) return 0; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu; -- cgit v1.1 From fc058618d1596d29e89016750a1aaf64c9fe8832 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 27 Sep 2024 12:47:41 +0200 Subject: target/i386/kvm: Report which action failed in kvm_arch_put/get_registers To help debug and triage future failure reports (akin to [1,2]) that may occur during kvm_arch_put/get_registers, the error path of each action is accompanied by unique error message. [1] https://issues.redhat.com/browse/RHEL-7558 [2] https://issues.redhat.com/browse/RHEL-21761 Signed-off-by: Julia Suvorova Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20240927104743.218468-3-jusual@redhat.com Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'target') diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 0b03fb0..e6f9490 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5225,6 +5225,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_msr_feature_control(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set feature control MSR"); return ret; } } @@ -5232,12 +5233,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) /* must be before kvm_put_nested_state so that EFER.SVME is set */ ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set special registers"); return ret; } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_nested_state(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set nested state"); return ret; } } @@ -5255,6 +5258,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) if (xen_mode == XEN_EMULATE && level == KVM_PUT_FULL_STATE) { ret = kvm_put_xen_state(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set Xen state"); return ret; } } @@ -5262,37 +5266,45 @@ int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set general purpose registers"); return ret; } ret = kvm_put_xsave(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set XSAVE"); return ret; } ret = kvm_put_xcrs(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set XCRs"); return ret; } ret = kvm_put_msrs(x86_cpu, level); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set MSRs"); return ret; } ret = kvm_put_vcpu_events(x86_cpu, level); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set vCPU events"); return ret; } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_mp_state(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set MP state"); return ret; } } ret = kvm_put_tscdeadline_msr(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set TSC deadline MSR"); return ret; } ret = kvm_put_debugregs(x86_cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to set debug registers"); return ret; } return 0; @@ -5307,6 +5319,7 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp) ret = kvm_get_vcpu_events(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get vCPU events"); goto out; } /* @@ -5315,44 +5328,54 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp) */ ret = kvm_get_mp_state(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get MP state"); goto out; } ret = kvm_getput_regs(cpu, 0); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get general purpose registers"); goto out; } ret = kvm_get_xsave(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get XSAVE"); goto out; } ret = kvm_get_xcrs(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get XCRs"); goto out; } ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get special registers"); goto out; } ret = kvm_get_msrs(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get MSRs"); goto out; } ret = kvm_get_apic(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get APIC"); goto out; } ret = kvm_get_debugregs(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get debug registers"); goto out; } ret = kvm_get_nested_state(cpu); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get nested state"); goto out; } #ifdef CONFIG_XEN_EMU if (xen_mode == XEN_EMULATE) { ret = kvm_get_xen_state(cs); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to get Xen state"); goto out; } } -- cgit v1.1