From a096b3a6732f846ec57dc28b47ee9435aa0609bf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 May 2014 17:15:21 +0200 Subject: kvmclock: Ensure time in migration never goes backward When we migrate we ask the kernel about its current belief on what the guest time would be. However, I've seen cases where the kvmclock guest structure indicates a time more recent than the kvm returned time. To make sure we never go backwards, calculate what the guest would have seen as time at the point of migration and use that value instead of the kernel returned one when it's more recent. This bases the view of the kvmclock after migration on the same foundation in host as well as guest. Signed-off-by: Alexander Graf Cc: qemu-stable@nongnu.org Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 892aa02..6f4ed28a 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -14,6 +14,7 @@ */ #include "qemu-common.h" +#include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "hw/sysbus.h" @@ -34,6 +35,47 @@ typedef struct KVMClockState { bool clock_valid; } KVMClockState; +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +static uint64_t kvmclock_current_nsec(KVMClockState *s) +{ + CPUState *cpu = first_cpu; + CPUX86State *env = cpu->env_ptr; + hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; + uint64_t migration_tsc = env->tsc; + struct pvclock_vcpu_time_info time; + uint64_t delta; + uint64_t nsec_lo; + uint64_t nsec_hi; + uint64_t nsec; + + if (!(env->system_time_msr & 1ULL)) { + /* KVM clock not active */ + return 0; + } + + cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + + delta = migration_tsc - time.tsc_timestamp; + if (time.tsc_shift < 0) { + delta >>= -time.tsc_shift; + } else { + delta <<= time.tsc_shift; + } + + mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); + nsec = (nsec_lo >> 32) | (nsec_hi << 32); + return nsec + time.system_time; +} static void kvmclock_vm_state_change(void *opaque, int running, RunState state) @@ -45,9 +87,15 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data; + uint64_t time_at_migration = kvmclock_current_nsec(s); s->clock_valid = false; + /* We can't rely on the migrated clock value, just discard it */ + if (time_at_migration) { + s->clock = time_at_migration; + } + data.clock = s->clock; data.flags = 0; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); -- cgit v1.1 From b763adf1a6b271f17f15ea31fae93d03e980d911 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 21 May 2014 12:42:26 +0200 Subject: kvm_stat: allow choosing between tracepoints and old stats The old stats contain information not available in the tracepoints. By default, keep the old behavior, but allow choosing which set of stats to present, or even both. Inspired by a patch from Marcelo Tosatti. Signed-off-by: Paolo Bonzini --- scripts/kvm/kvm_stat | 60 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 762544b..d7e97e7 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -352,8 +352,8 @@ class TracepointProvider(object): return ret class Stats: - def __init__(self, provider, fields = None): - self.provider = provider + def __init__(self, providers, fields = None): + self.providers = providers self.fields_filter = fields self._update() def _update(self): @@ -362,22 +362,25 @@ class Stats: if not self.fields_filter: return True return re.match(self.fields_filter, key) is not None - self.values = dict([(key, None) - for key in provider.fields() - if wanted(key)]) - self.provider.select(self.values.keys()) + self.values = dict() + for d in providers: + provider_fields = [key for key in d.fields() if wanted(key)] + for key in provider_fields: + self.values[key] = None + d.select(provider_fields) def set_fields_filter(self, fields_filter): self.fields_filter = fields_filter self._update() def get(self): - new = self.provider.read() - for key in self.provider.fields(): - oldval = self.values.get(key, (0, 0)) - newval = new[key] - newdelta = None - if oldval is not None: - newdelta = newval - oldval[0] - self.values[key] = (newval, newdelta) + for d in providers: + new = d.read() + for key in d.fields(): + oldval = self.values.get(key, (0, 0)) + newval = new[key] + newdelta = None + if oldval is not None: + newdelta = newval - oldval[0] + self.values[key] = (newval, newdelta) return self.values if not os.access('/sys/kernel/debug', os.F_OK): @@ -487,6 +490,18 @@ options.add_option('-l', '--log', dest = 'log', help = 'run in logging mode (like vmstat)', ) +options.add_option('-t', '--tracepoints', + action = 'store_true', + default = False, + dest = 'tracepoints', + help = 'retrieve statistics from tracepoints', + ) +options.add_option('-d', '--debugfs', + action = 'store_true', + default = False, + dest = 'debugfs', + help = 'retrieve statistics from debugfs', + ) options.add_option('-f', '--fields', action = 'store', default = None, @@ -495,12 +510,19 @@ options.add_option('-f', '--fields', ) (options, args) = options.parse_args(sys.argv) -try: - provider = TracepointProvider() -except: - provider = DebugfsProvider() +providers = [] +if options.tracepoints: + providers.append(TracepointProvider()) +if options.debugfs: + providers.append(DebugfsProvider()) + +if len(providers) == 0: + try: + providers = [TracepointProvider()] + except: + providers = [DebugfsProvider()] -stats = Stats(provider, fields = options.fields) +stats = Stats(providers, fields = options.fields) if options.log: log(stats) -- cgit v1.1 From 87446327ccb2e944fe7abc848bab798a0864eb03 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Tue, 20 May 2014 17:10:24 -0400 Subject: target-i386: Fix vm86 mode regression introduced in fd460606fd6f. Commit fd460606fd6f moved setting of eflags above calls to cpu_x86_load_seg_cache() in seg_helper.c. Unfortunately, in do_interrupt_protected() this moved the clearing of VM_MASK above a test for it. Fix this regression by storing the value of VM_MASK at the start of do_interrupt_protected(). Signed-off-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/seg_helper.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 3cf862e..cc7eadf 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -558,6 +558,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, int has_error_code, new_stack, shift; uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; uint32_t old_eip, sp_mask; + int vm86 = env->eflags & VM_MASK; has_error_code = 0; if (!is_int && !is_hw) { @@ -673,7 +674,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, ssp = get_seg_base(ss_e1, ss_e2); } else if ((e2 & DESC_C_MASK) || dpl == cpl) { /* to same privilege */ - if (env->eflags & VM_MASK) { + if (vm86) { raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc); } new_stack = 0; @@ -694,14 +695,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, #if 0 /* XXX: check that enough room is available */ push_size = 6 + (new_stack << 2) + (has_error_code << 1); - if (env->eflags & VM_MASK) { + if (vm86) { push_size += 8; } push_size <<= shift; #endif if (shift == 1) { if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector); PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector); PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector); @@ -718,7 +719,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } } else { if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector); PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector); PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector); @@ -742,7 +743,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); if (new_stack) { - if (env->eflags & VM_MASK) { + if (vm86) { cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0); cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0); cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0); -- cgit v1.1 From b98dbc90950cd4e43ab9b4f8300dbeae6cf8c8cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 May 2014 16:07:04 +0200 Subject: target-i386: fix segment flags for SMM and VM86 mode With the next patch, these need to be correct or VM86 tasks have the wrong CPL. The flags are basically what the Intel VMX documentation say is mandatory for entry into a VM86 guest. For consistency, SMM ought to have the same flags except with CPL=0. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- bsd-user/main.c | 2 +- linux-user/main.c | 2 +- target-i386/gdbstub.c | 4 +++- target-i386/seg_helper.c | 11 ++++++++--- target-i386/smm_helper.c | 24 ++++++++++++++++++------ 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/bsd-user/main.c b/bsd-user/main.c index 4ba61da..0e8c26c 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -1004,7 +1004,7 @@ int main(int argc, char **argv) #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; - env->hflags |= HF_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; diff --git a/linux-user/main.c b/linux-user/main.c index 882186e..3e21024 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp) #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; - env->hflags |= HF_PE_MASK; + env->hflags |= HF_PE_MASK | HF_CPL_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c index d34e535..19fe9ad 100644 --- a/target-i386/gdbstub.c +++ b/target-i386/gdbstub.c @@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf) target_ulong base; if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + int dpl = (env->eflags & VM_MASK) ? 3 : 0; base = selector << 4; limit = 0xffff; - flags = 0; + flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (dpl << DESC_DPL_SHIFT); } else { if (!cpu_x86_get_descr_debug(env, selector, &base, &limit, &flags)) { diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index cc7eadf..6f7efee 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -88,8 +88,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, static inline void load_seg_vm(CPUX86State *env, int seg, int selector) { selector &= 0xffff; - cpu_x86_load_seg_cache(env, seg, selector, - (selector << 4), 0xffff, 0); + + cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (3 << DESC_DPL_SHIFT)); } static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, @@ -2465,9 +2467,12 @@ void helper_verw(CPUX86State *env, target_ulong selector1) void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector) { if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + int dpl = (env->eflags & VM_MASK) ? 3 : 0; selector &= 0xffff; cpu_x86_load_seg_cache(env, seg_reg, selector, - (selector << 4), 0xffff, 0); + (selector << 4), 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (dpl << DESC_DPL_SHIFT)); } else { helper_load_seg(env, seg_reg, selector); } diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 4841d53..517f3b0 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -171,12 +171,24 @@ void do_smm_enter(X86CPU *cpu) CC_OP = CC_OP_EFLAGS; cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, - 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0); - cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0); + 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); } void helper_rsm(CPUX86State *env) -- cgit v1.1 From d3b5491897456739c6dc21c604ef8bc28e294bfc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 May 2014 18:19:17 +0200 Subject: target-i386: rework CPL checks during task switch, preparing for next patch During task switch, all of CS.DPL, CS.RPL, SS.DPL must match (in addition to all the other requirements) and will be the new CPL. So far this worked by carefully setting the CS selector and flags before doing the task switch; but this will not work once we get the CPL from SS.DPL. Temporarily assume that the CPL comes from CS.RPL during task switch to a protected-mode task, until the descriptor of SS is loaded. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/seg_helper.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 6f7efee..0f00aed 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -135,11 +135,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, } } -/* XXX: merge with load_seg() */ -static void tss_load_seg(CPUX86State *env, int seg_reg, int selector) +static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl) { uint32_t e1, e2; - int rpl, dpl, cpl; + int rpl, dpl; if ((selector & 0xfffc) != 0) { if (load_segment(env, &e1, &e2, selector) != 0) { @@ -150,18 +149,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector) } rpl = selector & 3; dpl = (e2 >> DESC_DPL_SHIFT) & 3; - cpl = env->hflags & HF_CPL_MASK; if (seg_reg == R_CS) { if (!(e2 & DESC_CS_MASK)) { raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); } - /* XXX: is it correct? */ if (dpl != rpl) { raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); } - if ((e2 & DESC_C_MASK) && dpl > rpl) { - raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc); - } } else if (seg_reg == R_SS) { /* SS must be writable data */ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) { @@ -448,12 +442,13 @@ static void switch_tss(CPUX86State *env, int tss_selector, /* load the segments */ if (!(new_eflags & VM_MASK)) { - tss_load_seg(env, R_CS, new_segs[R_CS]); - tss_load_seg(env, R_SS, new_segs[R_SS]); - tss_load_seg(env, R_ES, new_segs[R_ES]); - tss_load_seg(env, R_DS, new_segs[R_DS]); - tss_load_seg(env, R_FS, new_segs[R_FS]); - tss_load_seg(env, R_GS, new_segs[R_GS]); + int cpl = new_segs[R_CS] & 3; + tss_load_seg(env, R_CS, new_segs[R_CS], cpl); + tss_load_seg(env, R_SS, new_segs[R_SS], cpl); + tss_load_seg(env, R_ES, new_segs[R_ES], cpl); + tss_load_seg(env, R_DS, new_segs[R_DS], cpl); + tss_load_seg(env, R_FS, new_segs[R_FS], cpl); + tss_load_seg(env, R_GS, new_segs[R_GS], cpl); } /* check that env->eip is in the CS segment limits */ -- cgit v1.1 From 7125c937c97d9ec4a41b3cb6d1b3e805ec53e255 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 May 2014 10:38:18 +0200 Subject: target-i386: get CPL from SS.DPL CS.RPL is not equal to the CPL in the few instructions between setting CR0.PE and reloading CS. We get this right in the common case, because writes to CR0 do not modify the CPL, but it would not be enough if an SMI comes exactly during that brief period. Were this to happen, the RSM instruction would erroneously set CPL to the low two bits of the real-mode selector; and if they are not 00, the next instruction fetch cannot access the code segment and causes a triple fault. However, SS.DPL *is* always equal to the CPL. In real processors (AMD only) there is a weird case of SYSRET setting SS.DPL=SS.RPL from the STAR register while forcing CPL=3, but we do not emulate that. Tested-by: Kevin O'Connor Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 8 +++----- target-i386/kvm.c | 2 +- target-i386/machine.c | 8 ++++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index e9cbdab..65a44d9 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -986,7 +986,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, /* update the hidden flags */ { if (seg_reg == R_CS) { - int cpl = selector & 3; #ifdef TARGET_X86_64 if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) { /* long mode */ @@ -996,15 +995,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, #endif { /* legacy / compatibility case */ - if (!(env->cr[0] & CR0_PE_MASK)) - cpl = 0; - else if (env->eflags & VM_MASK) - cpl = 3; new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) >> (DESC_B_SHIFT - HF_CS32_SHIFT); env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) | new_hflags; } + } + if (seg_reg == R_SS) { + int cpl = (flags >> DESC_DPL_SHIFT) & 3; #if HF_CPL_MASK != 3 #error HF_CPL_MASK is hardcoded #endif diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0d894ef..3931d4c 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1430,7 +1430,7 @@ static int kvm_get_sregs(X86CPU *cpu) HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); diff --git a/target-i386/machine.c b/target-i386/machine.c index 168cab6..bdff447 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id) env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); } + /* Older versions of QEMU incorrectly used CS.DPL as the CPL when + * running under KVM. This is wrong for conforming code segments. + * Luckily, in our implementation the CPL field of hflags is redundant + * and we can get the right value from the SS descriptor privilege level. + */ + env->hflags &= ~HF_CPL_MASK; + env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + /* XXX: restore FPU round state */ env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; -- cgit v1.1 From 28fb26f19ffa675ac8cc08a355e5b01cc194aa5e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 May 2014 16:47:48 +0200 Subject: target-i386: set CC_OP to CC_OP_EFLAGS in cpu_load_eflags There is no reason to keep that out of the function. The comment refers to the disassembler's cc_op state rather than the CPUState field. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 5 ++++- target-i386/seg_helper.c | 2 -- target-i386/smm_helper.c | 2 -- target-i386/svm_helper.c | 2 -- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 65a44d9..ee410af 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1232,11 +1232,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env) return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK); } -/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */ +/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS + * after generating a call to a helper that uses this. + */ static inline void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask) { CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + CC_OP = CC_OP_EFLAGS; env->df = 1 - (2 * ((eflags >> 10) & 1)); env->eflags = (env->eflags & ~update_mask) | (eflags & update_mask) | 0x2; diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 0f00aed..0b19a8c 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -1598,7 +1598,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, } next_eip = env->eip + next_eip_addend; switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip); - CC_OP = CC_OP_EFLAGS; break; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -1767,7 +1766,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc); } switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip); - CC_OP = CC_OP_EFLAGS; return; case 4: /* 286 call gate */ case 12: /* 386 call gate */ diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 517f3b0..1e5f5ce 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -168,7 +168,6 @@ void do_smm_enter(X86CPU *cpu) CR0_PG_MASK)); cpu_x86_update_cr4(env, 0); env->dr[7] = 0x00000400; - CC_OP = CC_OP_EFLAGS; cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, @@ -308,7 +307,6 @@ void helper_rsm(CPUX86State *env) env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff; } #endif - CC_OP = CC_OP_EFLAGS; env->hflags &= ~HF_SMM_MASK; cpu_smm_update(env); diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c index 846eaa5..8dc6f8c 100644 --- a/target-i386/svm_helper.c +++ b/target-i386/svm_helper.c @@ -260,7 +260,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - CC_OP = CC_OP_EFLAGS; svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es), R_ES); @@ -702,7 +701,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | VM_MASK)); - CC_OP = CC_OP_EFLAGS; svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es), R_ES); -- cgit v1.1 From 0e1dac6c41f337f997814344a847162968c20c64 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 30 May 2014 17:26:22 -0300 Subject: kvm: Ensure negative return value on kvm_init() error handling path We need to ensure ret < 0 when going through the error path, or QEMU may try to run the half-initialized VM and crash. Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- kvm-all.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index a343ede..f7fe9c6 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1374,7 +1374,7 @@ int kvm_init(MachineClass *mc) ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); if (ret < KVM_API_VERSION) { - if (ret > 0) { + if (ret >= 0) { ret = -EINVAL; } fprintf(stderr, "kvm version too old\n"); @@ -1425,6 +1425,7 @@ int kvm_init(MachineClass *mc) if (mc->kvm_type) { type = mc->kvm_type(kvm_type); } else if (kvm_type) { + ret = -EINVAL; fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type); goto err; } @@ -1525,6 +1526,7 @@ int kvm_init(MachineClass *mc) return 0; err: + assert(ret < 0); if (s->vmfd >= 0) { close(s->vmfd); } -- cgit v1.1 From f522d2acc549dd11f495048330aa5f3f424a7dfa Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 2 Jun 2014 11:28:50 -0600 Subject: kvm: Enable -cpu option to hide KVM The latest Nvidia driver (337.88) specifically checks for KVM as the hypervisor and reports Code 43 for the driver in a Windows guest when found. Removing or changing the KVM signature is sufficient for the driver to load and work. This patch adds an option to easily allow the KVM hypervisor signature to be hidden using '-cpu kvm=off'. We continue to expose KVM via the cpuid value by default. The state of this option does not supercede or replace -enable-kvm or the accel=kvm machine option. This only changes the visibility of KVM to the guest and paravirtual features specifically tied to the KVM cpuid. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- target-i386/cpu-qom.h | 1 + target-i386/cpu.c | 1 + target-i386/kvm.c | 28 +++++++++++++++------------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h index e9b3d57..0808cfc 100644 --- a/target-i386/cpu-qom.h +++ b/target-i386/cpu-qom.h @@ -87,6 +87,7 @@ typedef struct X86CPU { bool hyperv_time; bool check_cpuid; bool enforce_cpuid; + bool expose_kvm; /* if true the CPUID code directly forward host cache leaves to the guest */ bool cache_info_passthrough; diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 042a48d..cbf1d97 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2792,6 +2792,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3931d4c..40cd4b8 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -528,23 +528,25 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_hv_hypercall = true; } - memcpy(signature, "KVMKVMKVM\0\0\0", 12); - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = 0; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; + if (cpu->expose_kvm) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | kvm_base; + c->eax = 0; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_FEATURES | kvm_base; - c->eax = env->features[FEAT_KVM]; + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_FEATURES | kvm_base; + c->eax = env->features[FEAT_KVM]; - has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF); + has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF); - has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI); + has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI); - has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME); + has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME); + } cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); -- cgit v1.1 From 9b1786829aefb83f37a8f3135e3ea91c56001b56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 3 Jun 2014 13:34:48 -0300 Subject: kvmclock: Ensure proper env->tsc value for kvmclock_current_nsec calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure proper env->tsc value for kvmclock_current_nsec calculation. Reported-by: Marcin GibuĊ‚a Cc: qemu-stable@nongnu.org Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 6f4ed28a..bef2504 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -17,6 +17,7 @@ #include "qemu/host-utils.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" +#include "sysemu/cpus.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" @@ -65,6 +66,7 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + assert(time.tsc_timestamp <= migration_tsc); delta = migration_tsc - time.tsc_timestamp; if (time.tsc_shift < 0) { delta >>= -time.tsc_shift; @@ -123,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (s->clock_valid) { return; } + + cpu_synchronize_all_states(); ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); -- cgit v1.1 From 79b6f2f651d64a122dd647c1456635d5a6a176ac Mon Sep 17 00:00:00 2001 From: Jidong Xiao Date: Tue, 3 Jun 2014 21:10:06 -0400 Subject: kvm: Fix eax for cpuid leaf 0x40000000 Since Linux kernel 3.5, KVM has documented eax for leaf 0x40000000 to be KVM_CPUID_FEATURES: https://github.com/torvalds/linux/commit/57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 But qemu still tries to set it to 0. It would be better to make qemu and kvm consistent. This patch just fixes this issue. Signed-off-by: Jidong Xiao [Include kvm_base in the value. - Paolo] Signed-off-by: Paolo Bonzini --- target-i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 40cd4b8..4bf0ac9 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -532,7 +532,7 @@ int kvm_arch_init_vcpu(CPUState *cs) memcpy(signature, "KVMKVMKVM\0\0\0", 12); c = &cpuid_data.entries[cpuid_i++]; c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = 0; + c->eax = KVM_CPUID_FEATURES | kvm_base; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; -- cgit v1.1