From a096b3a6732f846ec57dc28b47ee9435aa0609bf Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 16 May 2014 17:15:21 +0200
Subject: kvmclock: Ensure time in migration never goes backward

When we migrate we ask the kernel about its current belief on what the guest
time would be. However, I've seen cases where the kvmclock guest structure
indicates a time more recent than the kvm returned time.

To make sure we never go backwards, calculate what the guest would have seen
as time at the point of migration and use that value instead of the kernel
returned one when it's more recent.  This bases the view of the kvmclock
after migration on the same foundation in host as well as guest.

Signed-off-by: Alexander Graf <agraf@suse.de>
Cc: qemu-stable@nongnu.org
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/kvm/clock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 892aa02..6f4ed28a 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,6 +14,7 @@
  */
 
 #include "qemu-common.h"
+#include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "hw/sysbus.h"
@@ -34,6 +35,47 @@ typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUState *cpu = first_cpu;
+    CPUX86State *env = cpu->env_ptr;
+    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+
+    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
+
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
 
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
@@ -45,9 +87,15 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data;
+        uint64_t time_at_migration = kvmclock_current_nsec(s);
 
         s->clock_valid = false;
 
+	/* We can't rely on the migrated clock value, just discard it */
+	if (time_at_migration) {
+	        s->clock = time_at_migration;
+	}
+
         data.clock = s->clock;
         data.flags = 0;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
-- 
cgit v1.1


From b763adf1a6b271f17f15ea31fae93d03e980d911 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 21 May 2014 12:42:26 +0200
Subject: kvm_stat: allow choosing between tracepoints and old stats

The old stats contain information not available in the tracepoints.
By default, keep the old behavior, but allow choosing which set of stats
to present, or even both.

Inspired by a patch from Marcelo Tosatti.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 scripts/kvm/kvm_stat | 60 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index 762544b..d7e97e7 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -352,8 +352,8 @@ class TracepointProvider(object):
         return ret
 
 class Stats:
-    def __init__(self, provider, fields = None):
-        self.provider = provider
+    def __init__(self, providers, fields = None):
+        self.providers = providers
         self.fields_filter = fields
         self._update()
     def _update(self):
@@ -362,22 +362,25 @@ class Stats:
             if not self.fields_filter:
                 return True
             return re.match(self.fields_filter, key) is not None
-        self.values = dict([(key, None)
-                            for key in provider.fields()
-                            if wanted(key)])
-        self.provider.select(self.values.keys())
+        self.values = dict()
+        for d in providers:
+            provider_fields = [key for key in d.fields() if wanted(key)]
+            for key in provider_fields:
+                self.values[key] = None
+            d.select(provider_fields)
     def set_fields_filter(self, fields_filter):
         self.fields_filter = fields_filter
         self._update()
     def get(self):
-        new = self.provider.read()
-        for key in self.provider.fields():
-            oldval = self.values.get(key, (0, 0))
-            newval = new[key]
-            newdelta = None
-            if oldval is not None:
-                newdelta = newval - oldval[0]
-            self.values[key] = (newval, newdelta)
+        for d in providers:
+            new = d.read()
+            for key in d.fields():
+                oldval = self.values.get(key, (0, 0))
+                newval = new[key]
+                newdelta = None
+                if oldval is not None:
+                    newdelta = newval - oldval[0]
+                self.values[key] = (newval, newdelta)
         return self.values
 
 if not os.access('/sys/kernel/debug', os.F_OK):
@@ -487,6 +490,18 @@ options.add_option('-l', '--log',
                    dest = 'log',
                    help = 'run in logging mode (like vmstat)',
                    )
+options.add_option('-t', '--tracepoints',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'tracepoints',
+                   help = 'retrieve statistics from tracepoints',
+                   )
+options.add_option('-d', '--debugfs',
+                   action = 'store_true',
+                   default = False,
+                   dest = 'debugfs',
+                   help = 'retrieve statistics from debugfs',
+                   )
 options.add_option('-f', '--fields',
                    action = 'store',
                    default = None,
@@ -495,12 +510,19 @@ options.add_option('-f', '--fields',
                    )
 (options, args) = options.parse_args(sys.argv)
 
-try:
-    provider = TracepointProvider()
-except:
-    provider = DebugfsProvider()
+providers = []
+if options.tracepoints:
+    providers.append(TracepointProvider())
+if options.debugfs:
+    providers.append(DebugfsProvider())
+
+if len(providers) == 0:
+    try:
+        providers = [TracepointProvider()]
+    except:
+        providers = [DebugfsProvider()]
 
-stats = Stats(provider, fields = options.fields)
+stats = Stats(providers, fields = options.fields)
 
 if options.log:
     log(stats)
-- 
cgit v1.1


From 87446327ccb2e944fe7abc848bab798a0864eb03 Mon Sep 17 00:00:00 2001
From: Kevin O'Connor <kevin@koconnor.net>
Date: Tue, 20 May 2014 17:10:24 -0400
Subject: target-i386: Fix vm86 mode regression introduced in fd460606fd6f.

Commit fd460606fd6f moved setting of eflags above calls to
cpu_x86_load_seg_cache() in seg_helper.c.  Unfortunately, in
do_interrupt_protected() this moved the clearing of VM_MASK above a
test for it.

Fix this regression by storing the value of VM_MASK at the start of
do_interrupt_protected().

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/seg_helper.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 3cf862e..cc7eadf 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -558,6 +558,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     int has_error_code, new_stack, shift;
     uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
     uint32_t old_eip, sp_mask;
+    int vm86 = env->eflags & VM_MASK;
 
     has_error_code = 0;
     if (!is_int && !is_hw) {
@@ -673,7 +674,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         ssp = get_seg_base(ss_e1, ss_e2);
     } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
         /* to same privilege */
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc);
         }
         new_stack = 0;
@@ -694,14 +695,14 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
 #if 0
     /* XXX: check that enough room is available */
     push_size = 6 + (new_stack << 2) + (has_error_code << 1);
-    if (env->eflags & VM_MASK) {
+    if (vm86) {
         push_size += 8;
     }
     push_size <<= shift;
 #endif
     if (shift == 1) {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -718,7 +719,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         }
     } else {
         if (new_stack) {
-            if (env->eflags & VM_MASK) {
+            if (vm86) {
                 PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
                 PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
@@ -742,7 +743,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 
     if (new_stack) {
-        if (env->eflags & VM_MASK) {
+        if (vm86) {
             cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
             cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
-- 
cgit v1.1


From b98dbc90950cd4e43ab9b4f8300dbeae6cf8c8cb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 15 May 2014 16:07:04 +0200
Subject: target-i386: fix segment flags for SMM and VM86 mode

With the next patch, these need to be correct or VM86 tasks
have the wrong CPL.  The flags are basically what the Intel VMX
documentation say is mandatory for entry into a VM86 guest.

For consistency, SMM ought to have the same flags except with
CPL=0.

Tested-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 bsd-user/main.c          |  2 +-
 linux-user/main.c        |  2 +-
 target-i386/gdbstub.c    |  4 +++-
 target-i386/seg_helper.c | 11 ++++++++---
 target-i386/smm_helper.c | 24 ++++++++++++++++++------
 5 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 4ba61da..0e8c26c 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1004,7 +1004,7 @@ int main(int argc, char **argv)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/linux-user/main.c b/linux-user/main.c
index 882186e..3e21024 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -4052,7 +4052,7 @@ int main(int argc, char **argv, char **envp)
 
 #if defined(TARGET_I386)
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
         env->cr[4] |= CR4_OSFXSR_MASK;
         env->hflags |= HF_OSFXSR_MASK;
diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c
index d34e535..19fe9ad 100644
--- a/target-i386/gdbstub.c
+++ b/target-i386/gdbstub.c
@@ -127,9 +127,11 @@ static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf)
         target_ulong base;
 
         if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+            int dpl = (env->eflags & VM_MASK) ? 3 : 0;
             base = selector << 4;
             limit = 0xffff;
-            flags = 0;
+            flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                    DESC_A_MASK | (dpl << DESC_DPL_SHIFT);
         } else {
             if (!cpu_x86_get_descr_debug(env, selector, &base, &limit,
                                          &flags)) {
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index cc7eadf..6f7efee 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -88,8 +88,10 @@ static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1,
 static inline void load_seg_vm(CPUX86State *env, int seg, int selector)
 {
     selector &= 0xffff;
-    cpu_x86_load_seg_cache(env, seg, selector,
-                           (selector << 4), 0xffff, 0);
+
+    cpu_x86_load_seg_cache(env, seg, selector, (selector << 4), 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK | (3 << DESC_DPL_SHIFT));
 }
 
 static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
@@ -2465,9 +2467,12 @@ void helper_verw(CPUX86State *env, target_ulong selector1)
 void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector)
 {
     if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        int dpl = (env->eflags & VM_MASK) ? 3 : 0;
         selector &= 0xffff;
         cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff, 0);
+                               (selector << 4), 0xffff,
+                               DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                               DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
     } else {
         helper_load_seg(env, seg_reg, selector);
     }
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 4841d53..517f3b0 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -171,12 +171,24 @@ void do_smm_enter(X86CPU *cpu)
     CC_OP = CC_OP_EFLAGS;
 
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
-                           0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
-    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+                           0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
+    cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                           DESC_A_MASK);
 }
 
 void helper_rsm(CPUX86State *env)
-- 
cgit v1.1


From d3b5491897456739c6dc21c604ef8bc28e294bfc Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 15 May 2014 18:19:17 +0200
Subject: target-i386: rework CPL checks during task switch, preparing for next
 patch

During task switch, all of CS.DPL, CS.RPL, SS.DPL must match (in addition
to all the other requirements) and will be the new CPL.  So far this worked
by carefully setting the CS selector and flags before doing the task
switch; but this will not work once we get the CPL from SS.DPL.

Temporarily assume that the CPL comes from CS.RPL during task switch
to a protected-mode task, until the descriptor of SS is loaded.

Tested-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/seg_helper.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 6f7efee..0f00aed 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -135,11 +135,10 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr,
     }
 }
 
-/* XXX: merge with load_seg() */
-static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
+static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl)
 {
     uint32_t e1, e2;
-    int rpl, dpl, cpl;
+    int rpl, dpl;
 
     if ((selector & 0xfffc) != 0) {
         if (load_segment(env, &e1, &e2, selector) != 0) {
@@ -150,18 +149,13 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector)
         }
         rpl = selector & 3;
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        cpl = env->hflags & HF_CPL_MASK;
         if (seg_reg == R_CS) {
             if (!(e2 & DESC_CS_MASK)) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            /* XXX: is it correct? */
             if (dpl != rpl) {
                 raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
             }
-            if ((e2 & DESC_C_MASK) && dpl > rpl) {
-                raise_exception_err(env, EXCP0A_TSS, selector & 0xfffc);
-            }
         } else if (seg_reg == R_SS) {
             /* SS must be writable data */
             if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) {
@@ -448,12 +442,13 @@ static void switch_tss(CPUX86State *env, int tss_selector,
 
     /* load the segments */
     if (!(new_eflags & VM_MASK)) {
-        tss_load_seg(env, R_CS, new_segs[R_CS]);
-        tss_load_seg(env, R_SS, new_segs[R_SS]);
-        tss_load_seg(env, R_ES, new_segs[R_ES]);
-        tss_load_seg(env, R_DS, new_segs[R_DS]);
-        tss_load_seg(env, R_FS, new_segs[R_FS]);
-        tss_load_seg(env, R_GS, new_segs[R_GS]);
+        int cpl = new_segs[R_CS] & 3;
+        tss_load_seg(env, R_CS, new_segs[R_CS], cpl);
+        tss_load_seg(env, R_SS, new_segs[R_SS], cpl);
+        tss_load_seg(env, R_ES, new_segs[R_ES], cpl);
+        tss_load_seg(env, R_DS, new_segs[R_DS], cpl);
+        tss_load_seg(env, R_FS, new_segs[R_FS], cpl);
+        tss_load_seg(env, R_GS, new_segs[R_GS], cpl);
     }
 
     /* check that env->eip is in the CS segment limits */
-- 
cgit v1.1


From 7125c937c97d9ec4a41b3cb6d1b3e805ec53e255 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 May 2014 10:38:18 +0200
Subject: target-i386: get CPL from SS.DPL

CS.RPL is not equal to the CPL in the few instructions between
setting CR0.PE and reloading CS.  We get this right in the common
case, because writes to CR0 do not modify the CPL, but it would
not be enough if an SMI comes exactly during that brief period.
Were this to happen, the RSM instruction would erroneously set
CPL to the low two bits of the real-mode selector; and if they are
not 00, the next instruction fetch cannot access the code segment
and causes a triple fault.

However, SS.DPL *is* always equal to the CPL.  In real processors
(AMD only) there is a weird case of SYSRET setting SS.DPL=SS.RPL
from the STAR register while forcing CPL=3, but we do not emulate
that.

Tested-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/cpu.h     | 8 +++-----
 target-i386/kvm.c     | 2 +-
 target-i386/machine.c | 8 ++++++++
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e9cbdab..65a44d9 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -986,7 +986,6 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
     /* update the hidden flags */
     {
         if (seg_reg == R_CS) {
-            int cpl = selector & 3;
 #ifdef TARGET_X86_64
             if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
                 /* long mode */
@@ -996,15 +995,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
 #endif
             {
                 /* legacy / compatibility case */
-                if (!(env->cr[0] & CR0_PE_MASK))
-                    cpl = 0;
-                else if (env->eflags & VM_MASK)
-                    cpl = 3;
                 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                     >> (DESC_B_SHIFT - HF_CS32_SHIFT);
                 env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
                     new_hflags;
             }
+        }
+        if (seg_reg == R_SS) {
+            int cpl = (flags >> DESC_DPL_SHIFT) & 3;
 #if HF_CPL_MASK != 3
 #error HF_CPL_MASK is hardcoded
 #endif
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 0d894ef..3931d4c 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1430,7 +1430,7 @@ static int kvm_get_sregs(X86CPU *cpu)
        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
 
-    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
                 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 168cab6..bdff447 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -312,6 +312,14 @@ static int cpu_post_load(void *opaque, int version_id)
         env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
     }
 
+    /* Older versions of QEMU incorrectly used CS.DPL as the CPL when
+     * running under KVM.  This is wrong for conforming code segments.
+     * Luckily, in our implementation the CPL field of hflags is redundant
+     * and we can get the right value from the SS descriptor privilege level.
+     */
+    env->hflags &= ~HF_CPL_MASK;
+    env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+
     /* XXX: restore FPU round state */
     env->fpstt = (env->fpus_vmstate >> 11) & 7;
     env->fpus = env->fpus_vmstate & ~0x3800;
-- 
cgit v1.1


From 28fb26f19ffa675ac8cc08a355e5b01cc194aa5e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 May 2014 16:47:48 +0200
Subject: target-i386: set CC_OP to CC_OP_EFLAGS in cpu_load_eflags

There is no reason to keep that out of the function.  The comment refers
to the disassembler's cc_op state rather than the CPUState field.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/cpu.h        | 5 ++++-
 target-i386/seg_helper.c | 2 --
 target-i386/smm_helper.c | 2 --
 target-i386/svm_helper.c | 2 --
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 65a44d9..ee410af 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1232,11 +1232,14 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env)
     return env->eflags | cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK);
 }
 
-/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS
+ * after generating a call to a helper that uses this.
+ */
 static inline void cpu_load_eflags(CPUX86State *env, int eflags,
                                    int update_mask)
 {
     CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    CC_OP = CC_OP_EFLAGS;
     env->df = 1 - (2 * ((eflags >> 10) & 1));
     env->eflags = (env->eflags & ~update_mask) |
         (eflags & update_mask) | 0x2;
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 0f00aed..0b19a8c 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -1598,7 +1598,6 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             }
             next_eip = env->eip + next_eip_addend;
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             break;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
@@ -1767,7 +1766,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                 raise_exception_err(env, EXCP0D_GPF, new_cs & 0xfffc);
             }
             switch_tss(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
-            CC_OP = CC_OP_EFLAGS;
             return;
         case 4: /* 286 call gate */
         case 12: /* 386 call gate */
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 517f3b0..1e5f5ce 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -168,7 +168,6 @@ void do_smm_enter(X86CPU *cpu)
                                       CR0_PG_MASK));
     cpu_x86_update_cr4(env, 0);
     env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
 
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
                            0xffffffff,
@@ -308,7 +307,6 @@ void helper_rsm(CPUX86State *env)
         env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
-    CC_OP = CC_OP_EFLAGS;
     env->hflags &= ~HF_SMM_MASK;
     cpu_smm_update(env);
 
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index 846eaa5..8dc6f8c 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -260,7 +260,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                   env->vm_vmcb + offsetof(struct vmcb,
                                                           save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es),
                        R_ES);
@@ -702,7 +701,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
                                                            save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
                       VM_MASK));
-    CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
                        R_ES);
-- 
cgit v1.1


From 0e1dac6c41f337f997814344a847162968c20c64 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 30 May 2014 17:26:22 -0300
Subject: kvm: Ensure negative return value on kvm_init() error handling path

We need to ensure ret < 0 when going through the error path, or QEMU may
try to run the half-initialized VM and crash.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 kvm-all.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kvm-all.c b/kvm-all.c
index a343ede..f7fe9c6 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1374,7 +1374,7 @@ int kvm_init(MachineClass *mc)
 
     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
     if (ret < KVM_API_VERSION) {
-        if (ret > 0) {
+        if (ret >= 0) {
             ret = -EINVAL;
         }
         fprintf(stderr, "kvm version too old\n");
@@ -1425,6 +1425,7 @@ int kvm_init(MachineClass *mc)
     if (mc->kvm_type) {
         type = mc->kvm_type(kvm_type);
     } else if (kvm_type) {
+        ret = -EINVAL;
         fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
         goto err;
     }
@@ -1525,6 +1526,7 @@ int kvm_init(MachineClass *mc)
     return 0;
 
 err:
+    assert(ret < 0);
     if (s->vmfd >= 0) {
         close(s->vmfd);
     }
-- 
cgit v1.1


From f522d2acc549dd11f495048330aa5f3f424a7dfa Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 2 Jun 2014 11:28:50 -0600
Subject: kvm: Enable -cpu option to hide KVM

The latest Nvidia driver (337.88) specifically checks for KVM as the
hypervisor and reports Code 43 for the driver in a Windows guest when
found.  Removing or changing the KVM signature is sufficient for the
driver to load and work.  This patch adds an option to easily allow
the KVM hypervisor signature to be hidden using '-cpu kvm=off'.  We
continue to expose KVM via the cpuid value by default.  The state of
this option does not supercede or replace -enable-kvm or the accel=kvm
machine option.  This only changes the visibility of KVM to the guest
and paravirtual features specifically tied to the KVM cpuid.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/cpu-qom.h |  1 +
 target-i386/cpu.c     |  1 +
 target-i386/kvm.c     | 28 +++++++++++++++-------------
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index e9b3d57..0808cfc 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -87,6 +87,7 @@ typedef struct X86CPU {
     bool hyperv_time;
     bool check_cpuid;
     bool enforce_cpuid;
+    bool expose_kvm;
 
     /* if true the CPUID code directly forward host cache leaves to the guest */
     bool cache_info_passthrough;
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 042a48d..cbf1d97 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2792,6 +2792,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
+    DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3931d4c..40cd4b8 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -528,23 +528,25 @@ int kvm_arch_init_vcpu(CPUState *cs)
         has_msr_hv_hypercall = true;
     }
 
-    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_SIGNATURE | kvm_base;
-    c->eax = 0;
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
+    if (cpu->expose_kvm) {
+        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_SIGNATURE | kvm_base;
+        c->eax = 0;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
 
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_FEATURES | kvm_base;
-    c->eax = env->features[FEAT_KVM];
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_FEATURES | kvm_base;
+        c->eax = env->features[FEAT_KVM];
 
-    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+        has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
 
-    has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
+        has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
 
-    has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+        has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+    }
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
-- 
cgit v1.1


From 9b1786829aefb83f37a8f3135e3ea91c56001b56 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 3 Jun 2014 13:34:48 -0300
Subject: kvmclock: Ensure proper env->tsc value for kvmclock_current_nsec
 calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure proper env->tsc value for kvmclock_current_nsec calculation.

Reported-by: Marcin Gibuła <m.gibula@beyond.pl>
Cc: qemu-stable@nongnu.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/kvm/clock.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 6f4ed28a..bef2504 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -17,6 +17,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -65,6 +66,7 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
 
     cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
 
+    assert(time.tsc_timestamp <= migration_tsc);
     delta = migration_tsc - time.tsc_timestamp;
     if (time.tsc_shift < 0) {
         delta >>= -time.tsc_shift;
@@ -123,6 +125,8 @@ static void kvmclock_vm_state_change(void *opaque, int running,
         if (s->clock_valid) {
             return;
         }
+
+        cpu_synchronize_all_states();
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
         if (ret < 0) {
             fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
-- 
cgit v1.1


From 79b6f2f651d64a122dd647c1456635d5a6a176ac Mon Sep 17 00:00:00 2001
From: Jidong Xiao <jidong.xiao@gmail.com>
Date: Tue, 3 Jun 2014 21:10:06 -0400
Subject: kvm: Fix eax for cpuid leaf 0x40000000

Since Linux kernel 3.5, KVM has documented eax for leaf 0x40000000
to be KVM_CPUID_FEATURES:

https://github.com/torvalds/linux/commit/57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190

But qemu still tries to set it to 0. It would be better to make qemu
and kvm consistent. This patch just fixes this issue.

Signed-off-by: Jidong Xiao <jidong.xiao@gmail.com>
[Include kvm_base in the value. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 40cd4b8..4bf0ac9 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -532,7 +532,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         memcpy(signature, "KVMKVMKVM\0\0\0", 12);
         c = &cpuid_data.entries[cpuid_i++];
         c->function = KVM_CPUID_SIGNATURE | kvm_base;
-        c->eax = 0;
+        c->eax = KVM_CPUID_FEATURES | kvm_base;
         c->ebx = signature[0];
         c->ecx = signature[1];
         c->edx = signature[2];
-- 
cgit v1.1