From 79e9ebebbf2a00c46fcedb6dc7dd5e12bbd30216 Mon Sep 17 00:00:00 2001 From: Liu Jinsong Date: Thu, 5 Dec 2013 08:32:12 +0800 Subject: target-i386: Intel MPX Add some MPX related definiation, and hardcode sizes and offsets of xsave features 3 and 4. It also add corresponding part to kvm_get/put_xsave, and vmstate. Signed-off-by: Liu Jinsong Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'target-i386/kvm.c') diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 1188482..01ebca2 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -69,6 +69,7 @@ static bool has_msr_feature_control; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; +static bool has_msr_bndcfgs; static bool has_msr_kvm_steal_time; static int lm_capable_kernel; @@ -772,6 +773,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_misc_enable = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) { + has_msr_bndcfgs = true; + continue; + } } } @@ -975,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_XMM_SPACE 40 #define XSAVE_XSTATE_BV 128 #define XSAVE_YMMH_SPACE 144 +#define XSAVE_BNDREGS 240 +#define XSAVE_BNDCSR 256 static int kvm_put_xsave(X86CPU *cpu) { @@ -1007,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu) *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv; memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs, sizeof env->ymmh_regs); + memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs, + sizeof env->bnd_regs); + memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, + sizeof(env->bndcs_regs)); r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1208,6 +1219,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, env->msr_ia32_feature_control); } + if (has_msr_bndcfgs) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } } if (env->mcg_cap) { int i; @@ -1289,6 +1303,10 @@ static int kvm_get_xsave(X86CPU *cpu) env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV]; memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE], sizeof env->ymmh_regs); + memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS], + sizeof env->bnd_regs); + memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], + sizeof(env->bndcs_regs)); return 0; } @@ -1435,6 +1453,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_feature_control) { msrs[n++].index = MSR_IA32_FEATURE_CONTROL; } + if (has_msr_bndcfgs) { + msrs[n++].index = MSR_IA32_BNDCFGS; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1550,6 +1571,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_FEATURE_CONTROL: env->msr_ia32_feature_control = msrs[i].data; break; + case MSR_IA32_BNDCFGS: + env->msr_bndcfgs = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { -- cgit v1.1 From f86746c263753cf7a7e4bdb8829c70272dfcf36c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Fri, 6 Dec 2013 17:38:24 +0900 Subject: target-i386: do not special case TSC writeback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer kernels are capable of synchronizing TSC values of multiple VCPUs on writeback, but we were excluding the power up case, which is not needed anymore. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Paolo Bonzini Signed-off-by: Fernando Luis Vázquez Cao --- target-i386/kvm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'target-i386/kvm.c') diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 01ebca2..312a46b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1151,15 +1151,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } #endif if (level == KVM_PUT_FULL_STATE) { - /* - * KVM is yet unable to synchronize TSC values of multiple VCPUs on - * writeback. Until this is fixed, we only write the offset to SMP - * guests after migration, desynchronizing the VCPUs, but avoiding - * huge jump-backs that would occur without any writeback at all. - */ - if (smp_cpus == 1 || env->tsc != 0) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); - } + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } /* * The following MSRs have side effects on the guest or are too heavy -- cgit v1.1 From 0522604b09b8cff54ba2450a7478da2a4d084817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Fri, 6 Dec 2013 17:33:01 +0900 Subject: target-i386: clear guest TSC on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCPU TSC is not cleared by a warm reset (*), which leaves some types of Linux guests (non-pvops guests and those with the kernel parameter no-kvmclock set) vulnerable to the overflow in cyc2ns_offset fixed by upstream commit 9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 ("sched/x86: Fix overflow in cyc2ns_offset"). To put it in a nutshell, if such a Linux guest without the patch above applied has been up more than 208 days and attempts a warm reset chances are that the newly booted kernel will panic or hang. (*) Intel Xeon E5 processors show the same broken behavior due to the errata "TSC is Not Affected by Warm Reset" (Intel® Xeon® Processor E5 Family Specification Update - August 2013): "The TSC (Time Stamp Counter MSR 10H) should be cleared on reset. Due to this erratum the TSC is not affected by warm reset." Cc: Will Auld Cc: Marcelo Tosatti Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Paolo Bonzini Signed-off-by: Fernando Luis Vázquez Cao --- target-i386/kvm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'target-i386/kvm.c') diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 312a46b..285e1a3 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1150,14 +1150,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); } #endif - if (level == KVM_PUT_FULL_STATE) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); - } /* * The following MSRs have side effects on the guest or are too heavy * for normal writeback. Limit them to reset or full state updates. */ if (level >= KVM_PUT_RESET_STATE) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); -- cgit v1.1 From 6bdf863d942a267f984e4bd82be80cb2ac5b9915 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 17 Dec 2013 20:05:13 +0100 Subject: kvm: x86: Separately write feature control MSR on reset If the guest is running in nested mode on system reset, clearing the feature MSR signals the kernel to leave this mode. Recent kernels processes this properly, but leave the VCPU state undefined behind. It is the job of userspace to bring it to a proper shape. Therefore, write this specific MSR first so that no state transfer gets lost. This allows to cleanly reset a guest with VMX in use. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'target-i386/kvm.c') diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 285e1a3..221c8a0 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1115,6 +1115,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } +/* + * Provide a separate write service for the feature control MSR in order to + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done + * before writing any other state because forcibly leaving nested mode + * invalidates the VCPU state. + */ +static int kvm_put_msr_feature_control(X86CPU *cpu) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entry; + } msr_data; + + kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, + cpu->env.msr_ia32_feature_control); + msr_data.info.nmsrs = 1; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -1205,13 +1224,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_feature_control) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, - env->msr_ia32_feature_control); - } if (has_msr_bndcfgs) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); } + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ } if (env->mcg_cap) { int i; @@ -1815,6 +1833,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { + ret = kvm_put_msr_feature_control(x86_cpu); + if (ret < 0) { + return ret; + } + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; -- cgit v1.1 From 439d19f2922ac409ee224bc1e5522cee7009d829 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jan 2014 14:22:25 +0100 Subject: kvm: always update the MPX model specific register The original patch from Liu Jinsong restricted them to reset or full state updates, but that's unnecessary (and wrong) since the BNDCFGS MSR has no side effects. Cc: Liu Jinsong Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'target-i386/kvm.c') diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 221c8a0..d34981f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1161,6 +1161,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); } + if (has_msr_bndcfgs) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1224,9 +1227,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_bndcfgs) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); - } /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ -- cgit v1.1