aboutsummaryrefslogtreecommitdiff
path: root/target/i386
diff options
context:
space:
mode:
Diffstat (limited to 'target/i386')
-rw-r--r--target/i386/arch_memory_mapping.c1
-rw-r--r--target/i386/confidential-guest.c2
-rw-r--r--target/i386/confidential-guest.h44
-rw-r--r--target/i386/cpu-apic.c2
-rw-r--r--target/i386/cpu-param.h3
-rw-r--r--target/i386/cpu-system.c2
-rw-r--r--target/i386/cpu.c1790
-rw-r--r--target/i386/cpu.h161
-rw-r--r--target/i386/emulate/x86_decode.c93
-rw-r--r--target/i386/emulate/x86_decode.h9
-rw-r--r--target/i386/emulate/x86_emu.c129
-rw-r--r--target/i386/emulate/x86_emu.h8
-rw-r--r--target/i386/emulate/x86_flags.c200
-rw-r--r--target/i386/emulate/x86_flags.h16
-rw-r--r--target/i386/helper.c4
-rw-r--r--target/i386/host-cpu.c48
-rw-r--r--target/i386/host-cpu.h2
-rw-r--r--target/i386/hvf/hvf-cpu.c7
-rw-r--r--target/i386/hvf/hvf.c5
-rw-r--r--target/i386/hvf/vmx.h3
-rw-r--r--target/i386/hvf/x86_cpuid.c2
-rw-r--r--target/i386/hvf/x86hvf.c2
-rw-r--r--target/i386/kvm/hyperv.c1
-rw-r--r--target/i386/kvm/kvm-cpu.c11
-rw-r--r--target/i386/kvm/kvm.c162
-rw-r--r--target/i386/kvm/kvm_i386.h15
-rw-r--r--target/i386/kvm/meson.build2
-rw-r--r--target/i386/kvm/tdx-quote-generator.c302
-rw-r--r--target/i386/kvm/tdx-quote-generator.h82
-rw-r--r--target/i386/kvm/tdx-stub.c32
-rw-r--r--target/i386/kvm/tdx.c1546
-rw-r--r--target/i386/kvm/tdx.h89
-rw-r--r--target/i386/kvm/vmsr_energy.c9
-rw-r--r--target/i386/kvm/vmsr_energy.h1
-rw-r--r--target/i386/kvm/xen-emu.c3
-rw-r--r--target/i386/machine.c7
-rw-r--r--target/i386/meson.build2
-rw-r--r--target/i386/monitor.c1
-rw-r--r--target/i386/nvmm/nvmm-accel-ops.c3
-rw-r--r--target/i386/nvmm/nvmm-all.c62
-rw-r--r--target/i386/ops_sse.h16
-rw-r--r--target/i386/sev-system-stub.c32
-rw-r--r--target/i386/sev.c875
-rw-r--r--target/i386/sev.h124
-rw-r--r--target/i386/tcg/access.c5
-rw-r--r--target/i386/tcg/decode-new.c.inc40
-rw-r--r--target/i386/tcg/emit.c.inc22
-rw-r--r--target/i386/tcg/excp_helper.c1
-rw-r--r--target/i386/tcg/fpu_helper.c103
-rw-r--r--target/i386/tcg/helper-tcg.h5
-rw-r--r--target/i386/tcg/int_helper.c1
-rw-r--r--target/i386/tcg/mem_helper.c3
-rw-r--r--target/i386/tcg/mpx_helper.c4
-rw-r--r--target/i386/tcg/seg_helper.c102
-rw-r--r--target/i386/tcg/seg_helper.h10
-rw-r--r--target/i386/tcg/system/bpt_helper.c2
-rw-r--r--target/i386/tcg/system/excp_helper.c6
-rw-r--r--target/i386/tcg/system/misc_helper.c5
-rw-r--r--target/i386/tcg/system/seg_helper.c2
-rw-r--r--target/i386/tcg/system/svm_helper.c2
-rw-r--r--target/i386/tcg/system/tcg-cpu.c3
-rw-r--r--target/i386/tcg/tcg-cpu.c82
-rw-r--r--target/i386/tcg/tcg-cpu.h6
-rw-r--r--target/i386/tcg/translate.c39
-rw-r--r--target/i386/tcg/user/excp_helper.c1
-rw-r--r--target/i386/tcg/user/seg_helper.c3
-rw-r--r--target/i386/whpx/whpx-accel-ops.c4
-rw-r--r--target/i386/whpx/whpx-accel-ops.h1
-rw-r--r--target/i386/whpx/whpx-all.c66
-rw-r--r--target/i386/whpx/whpx-apic.c2
-rw-r--r--target/i386/xsave_helper.c1
71 files changed, 5323 insertions, 1108 deletions
diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c
index ced1998..a2398c2 100644
--- a/target/i386/arch_memory_mapping.c
+++ b/target/i386/arch_memory_mapping.c
@@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "system/memory_mapping.h"
+#include "system/memory.h"
/* PAE Paging or IA-32e Paging */
static void walk_pte(MemoryMappingList *list, AddressSpace *as,
diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c
index b372784..cfb71bf 100644
--- a/target/i386/confidential-guest.c
+++ b/target/i386/confidential-guest.c
@@ -20,7 +20,7 @@ OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest,
X86_CONFIDENTIAL_GUEST,
CONFIDENTIAL_GUEST_SUPPORT)
-static void x86_confidential_guest_class_init(ObjectClass *oc, void *data)
+static void x86_confidential_guest_class_init(ObjectClass *oc, const void *data)
{
}
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 164be76..48b88db 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass {
/* <public> */
int (*kvm_type)(X86ConfidentialGuest *cg);
- uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
- int reg, uint32_t value);
+ void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu);
+ uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature,
+ uint32_t index, int reg, uint32_t value);
+ int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs);
};
/**
@@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg)
}
}
+static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg,
+ CPUState *cpu)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->cpu_instance_init) {
+ klass->cpu_instance_init(cg, cpu);
+ }
+}
+
/**
- * x86_confidential_guest_mask_cpuid_features:
+ * x86_confidential_guest_adjust_cpuid_features:
*
- * Removes unsupported features from a confidential guest's CPUID values, returns
- * the value with the bits removed. The bits removed should be those that KVM
- * provides independent of host-supported CPUID features, but are not supported by
- * the confidential computing firmware.
+ * Adjust the supported features from a confidential guest's CPUID values,
+ * returns the adjusted value. There are bits being removed that are not
+ * supported by the confidential computing firmware or bits being added that
+ * are forcibly exposed to guest by the confidential computing firmware.
*/
-static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg,
+static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg,
uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
- if (klass->mask_cpuid_features) {
- return klass->mask_cpuid_features(cg, feature, index, reg, value);
+ if (klass->adjust_cpuid_features) {
+ return klass->adjust_cpuid_features(cg, feature, index, reg, value);
} else {
return value;
}
}
+static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg,
+ CPUState *cs)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->check_features) {
+ return klass->check_features(cg, cs);
+ }
+
+ return 0;
+}
+
#endif
diff --git a/target/i386/cpu-apic.c b/target/i386/cpu-apic.c
index c1708b0..242a05f 100644
--- a/target/i386/cpu-apic.c
+++ b/target/i386/cpu-apic.c
@@ -14,7 +14,7 @@
#include "system/hw_accel.h"
#include "system/kvm.h"
#include "system/xen.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "hw/qdev-properties.h"
#include "hw/i386/apic_internal.h"
#include "cpu-internal.h"
diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h
index b0e884c..ebb844b 100644
--- a/target/i386/cpu-param.h
+++ b/target/i386/cpu-param.h
@@ -22,7 +22,6 @@
#endif
#define TARGET_PAGE_BITS 12
-/* The x86 has a strong memory model with some store-after-load re-ordering */
-#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+#define TARGET_INSN_START_EXTRA_WORDS 1
#endif
diff --git a/target/i386/cpu-system.c b/target/i386/cpu-system.c
index 55f192e..b1494aa 100644
--- a/target/i386/cpu-system.c
+++ b/target/i386/cpu-system.c
@@ -24,7 +24,7 @@
#include "qobject/qdict.h"
#include "qapi/qobject-input-visitor.h"
#include "qom/qom-qobject.h"
-#include "qapi/qapi-commands-machine-target.h"
+#include "qapi/qapi-commands-machine.h"
#include "cpu-internal.h"
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 3fb1ec6..da7d8dc 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -28,6 +28,7 @@
#include "system/hvf.h"
#include "hvf/hvf-i386.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "sev.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -35,13 +36,17 @@
#include "standard-headers/asm-x86/kvm_para.h"
#include "hw/qdev-properties.h"
#include "hw/i386/topology.h"
+#include "exec/watchpoint.h"
#ifndef CONFIG_USER_ONLY
+#include "confidential-guest.h"
#include "system/reset.h"
-#include "qapi/qapi-commands-machine-target.h"
-#include "exec/address-spaces.h"
+#include "qapi/qapi-commands-machine.h"
+#include "system/address-spaces.h"
#include "hw/boards.h"
#include "hw/i386/sgx-epc.h"
#endif
+#include "system/qtest.h"
+#include "tcg/tcg-cpu.h"
#include "disas/capstone.h"
#include "cpu-internal.h"
@@ -63,6 +68,7 @@ struct CPUID2CacheDescriptorInfo {
/*
* Known CPUID 2 cache descriptors.
+ * TLB, prefetch and sectored cache related descriptors are not included.
* From Intel SDM Volume 2A, CPUID instruction
*/
struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = {
@@ -84,18 +90,29 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = {
.associativity = 2, .line_size = 64, },
[0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB,
.associativity = 8, .line_size = 64, },
- /* lines per sector is not supported cpuid2_cache_descriptor(),
- * so descriptors 0x22, 0x23 are not included
- */
+ /*
+ * lines per sector is not supported cpuid2_cache_descriptor(),
+ * so descriptors 0x22, 0x23 are not included
+ */
[0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB,
.associativity = 16, .line_size = 64, },
- /* lines per sector is not supported cpuid2_cache_descriptor(),
- * so descriptors 0x25, 0x20 are not included
- */
+ /*
+ * lines per sector is not supported cpuid2_cache_descriptor(),
+ * so descriptors 0x25, 0x29 are not included
+ */
[0x2C] = { .level = 1, .type = DATA_CACHE, .size = 32 * KiB,
.associativity = 8, .line_size = 64, },
[0x30] = { .level = 1, .type = INSTRUCTION_CACHE, .size = 32 * KiB,
.associativity = 8, .line_size = 64, },
+ /*
+ * Newer Intel CPUs (having the cores without L3, e.g., Intel MTL, ARL)
+ * use CPUID 0x4 leaf to describe cache topology, by encoding CPUID 0x2
+ * leaf with 0xFF. For older CPUs (without 0x4 leaf), it's also valid
+ * to just ignore L3's code if there's no L3.
+ *
+ * This already covers all the cases in QEMU, so code 0x40 is not
+ * included.
+ */
[0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB,
.associativity = 4, .line_size = 32, },
[0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB,
@@ -112,7 +129,18 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = {
.associativity = 8, .line_size = 64, },
[0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB,
.associativity = 12, .line_size = 64, },
- /* Descriptor 0x49 depends on CPU family/model, so it is not included */
+ /*
+ * Descriptor 0x49 has 2 cases:
+ * - 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size.
+ * - 3rd-level cache: 4MB, 16-way set associative, 64-byte line size
+ * (Intel Xeon processor MP, Family 0FH, Model 06H).
+ *
+ * When it represents L3, then it depends on CPU family/model. Fortunately,
+ * the legacy cache/CPU models don't have such special L3. So, just add it
+ * to represent the general L2 case.
+ */
+ [0x49] = { .level = 2, .type = UNIFIED_CACHE, .size = 4 * MiB,
+ .associativity = 16, .line_size = 64, },
[0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB,
.associativity = 12, .line_size = 64, },
[0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB,
@@ -133,9 +161,10 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = {
.associativity = 4, .line_size = 64, },
[0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB,
.associativity = 4, .line_size = 64, },
- /* lines per sector is not supported cpuid2_cache_descriptor(),
- * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included.
- */
+ /*
+ * lines per sector is not supported cpuid2_cache_descriptor(),
+ * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included.
+ */
[0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB,
.associativity = 8, .line_size = 64, },
[0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB,
@@ -196,7 +225,7 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = {
* Return a CPUID 2 cache descriptor for a given cache.
* If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE
*/
-static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
+static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache, bool *unmacthed)
{
int i;
@@ -213,9 +242,46 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
}
}
+ *unmacthed |= true;
return CACHE_DESCRIPTOR_UNAVAILABLE;
}
+static const CPUCaches legacy_intel_cpuid2_cache_info;
+
+/* Encode cache info for CPUID[2] */
+static void encode_cache_cpuid2(X86CPU *cpu,
+ const CPUCaches *caches,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ CPUX86State *env = &cpu->env;
+ int l1d, l1i, l2, l3;
+ bool unmatched = false;
+
+ *eax = 1; /* Number of CPUID[EAX=2] calls required */
+ *ebx = *ecx = *edx = 0;
+
+ l1d = cpuid2_cache_descriptor(caches->l1d_cache, &unmatched);
+ l1i = cpuid2_cache_descriptor(caches->l1i_cache, &unmatched);
+ l2 = cpuid2_cache_descriptor(caches->l2_cache, &unmatched);
+ l3 = cpuid2_cache_descriptor(caches->l3_cache, &unmatched);
+
+ if (!cpu->consistent_cache ||
+ (env->cpuid_min_level < 0x4 && !unmatched)) {
+ /*
+ * Though SDM defines code 0x40 for cases with no L2 or L3. It's
+ * also valid to just ignore l3's code if there's no l2.
+ */
+ if (cpu->enable_l3_cache) {
+ *ecx = l3;
+ }
+ *edx = (l1d << 16) | (l1i << 8) | l2;
+ } else {
+ *ecx = 0;
+ *edx = CACHE_DESCRIPTOR_UNAVAILABLE;
+ }
+}
+
/* CPUID Leaf 4 constants: */
/* EAX: */
@@ -283,11 +349,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache,
assert(cache->size == cache->line_size * cache->associativity *
cache->partitions * cache->sets);
+ /*
+ * The following fields have bit-width limitations, so consider the
+ * maximum values to avoid overflow:
+ * Bits 25-14: maximum 4095.
+ * Bits 31-26: maximum 63.
+ */
*eax = CACHE_TYPE(cache->type) |
CACHE_LEVEL(cache->level) |
(cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
- (max_core_ids_in_package(topo_info) << 26) |
- (max_thread_ids_for_cache(topo_info, cache->share_level) << 14);
+ (MIN(max_core_ids_in_package(topo_info), 63) << 26) |
+ (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14);
assert(cache->line_size > 0);
assert(cache->partitions > 0);
@@ -427,7 +499,6 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
{
assert(cache->size % 1024 == 0);
- assert(cache->lines_per_tag > 0);
assert(cache->associativity > 0);
assert(cache->line_size > 0);
return ((cache->size / 1024) << 24) | (cache->associativity << 16) |
@@ -436,8 +507,8 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
#define ASSOC_FULL 0xFF
-/* AMD associativity encoding used on CPUID Leaf 0x80000006: */
-#define AMD_ENC_ASSOC(a) (a <= 1 ? a : \
+/* x86 associativity encoding used on CPUID Leaf 0x80000006: */
+#define X86_ENC_ASSOC(a) (a <= 1 ? a : \
a == 2 ? 0x2 : \
a == 4 ? 0x4 : \
a == 8 ? 0x6 : \
@@ -460,19 +531,18 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
{
assert(l2->size % 1024 == 0);
assert(l2->associativity > 0);
- assert(l2->lines_per_tag > 0);
assert(l2->line_size > 0);
*ecx = ((l2->size / 1024) << 16) |
- (AMD_ENC_ASSOC(l2->associativity) << 12) |
+ (X86_ENC_ASSOC(l2->associativity) << 12) |
(l2->lines_per_tag << 8) | (l2->line_size);
+ /* For Intel, EDX is reserved. */
if (l3) {
assert(l3->size % (512 * 1024) == 0);
assert(l3->associativity > 0);
- assert(l3->lines_per_tag > 0);
assert(l3->line_size > 0);
*edx = ((l3->size / (512 * 1024)) << 18) |
- (AMD_ENC_ASSOC(l3->associativity) << 12) |
+ (X86_ENC_ASSOC(l3->associativity) << 12) |
(l3->lines_per_tag << 8) | (l3->line_size);
} else {
*edx = 0;
@@ -490,7 +560,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache,
*eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
(cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
- *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14;
+ /* Bits 25:14 - NumSharingCache: maximum 4095. */
+ *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14;
assert(cache->line_size > 0);
assert(cache->partitions > 0);
@@ -570,117 +641,172 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info,
* These are legacy cache values. If there is a need to change any
* of these values please use builtin_x86_defs
*/
-
-/* L1 data cache: */
-static CPUCacheInfo legacy_l1d_cache = {
- .type = DATA_CACHE,
- .level = 1,
- .size = 32 * KiB,
- .self_init = 1,
- .line_size = 64,
- .associativity = 8,
- .sets = 64,
- .partitions = 1,
- .no_invd_sharing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
-};
-
-/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo legacy_l1d_cache_amd = {
- .type = DATA_CACHE,
- .level = 1,
- .size = 64 * KiB,
- .self_init = 1,
- .line_size = 64,
- .associativity = 2,
- .sets = 512,
- .partitions = 1,
- .lines_per_tag = 1,
- .no_invd_sharing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
-};
-
-/* L1 instruction cache: */
-static CPUCacheInfo legacy_l1i_cache = {
- .type = INSTRUCTION_CACHE,
- .level = 1,
- .size = 32 * KiB,
- .self_init = 1,
- .line_size = 64,
- .associativity = 8,
- .sets = 64,
- .partitions = 1,
- .no_invd_sharing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
-};
-
-/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo legacy_l1i_cache_amd = {
- .type = INSTRUCTION_CACHE,
- .level = 1,
- .size = 64 * KiB,
- .self_init = 1,
- .line_size = 64,
- .associativity = 2,
- .sets = 512,
- .partitions = 1,
- .lines_per_tag = 1,
- .no_invd_sharing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
-};
-
-/* Level 2 unified cache: */
-static CPUCacheInfo legacy_l2_cache = {
- .type = UNIFIED_CACHE,
- .level = 2,
- .size = 4 * MiB,
- .self_init = 1,
- .line_size = 64,
- .associativity = 16,
- .sets = 4096,
- .partitions = 1,
- .no_invd_sharing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
-};
-
-/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
-static CPUCacheInfo legacy_l2_cache_cpuid2 = {
- .type = UNIFIED_CACHE,
- .level = 2,
- .size = 2 * MiB,
- .line_size = 64,
- .associativity = 8,
- .share_level = CPU_TOPOLOGY_LEVEL_INVALID,
+static const CPUCaches legacy_amd_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 64 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 2,
+ .sets = 512,
+ .partitions = 1,
+ .lines_per_tag = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 64 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 2,
+ .sets = 512,
+ .partitions = 1,
+ .lines_per_tag = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .lines_per_tag = 1,
+ .associativity = 16,
+ .sets = 512,
+ .partitions = 1,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 16 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .sets = 16384,
+ .partitions = 1,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .complex_indexing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
};
-
-/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo legacy_l2_cache_amd = {
- .type = UNIFIED_CACHE,
- .level = 2,
- .size = 512 * KiB,
- .line_size = 64,
- .lines_per_tag = 1,
- .associativity = 16,
- .sets = 512,
- .partitions = 1,
- .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+/*
+ * Only used for the CPU models with CPUID level < 4.
+ * These CPUs (CPUID level < 4) only use CPUID leaf 2 to present
+ * cache information.
+ *
+ * Note: This cache model is just a default one, and is not
+ * guaranteed to match real hardwares.
+ */
+static const CPUCaches legacy_intel_cpuid2_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 8,
+ .sets = 64,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 8,
+ .sets = 64,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 2 * MiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 8,
+ .sets = 4096,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 16 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .sets = 16384,
+ .partitions = 1,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .complex_indexing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
};
-/* Level 3 unified cache: */
-static CPUCacheInfo legacy_l3_cache = {
- .type = UNIFIED_CACHE,
- .level = 3,
- .size = 16 * MiB,
- .line_size = 64,
- .associativity = 16,
- .sets = 16384,
- .partitions = 1,
- .lines_per_tag = 1,
- .self_init = true,
- .inclusive = true,
- .complex_indexing = true,
- .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+static const CPUCaches legacy_intel_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 8,
+ .sets = 64,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 8,
+ .sets = 64,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 4 * MiB,
+ .self_init = 1,
+ .line_size = 64,
+ .associativity = 16,
+ .sets = 4096,
+ .partitions = 1,
+ .no_invd_sharing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 16 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .sets = 16384,
+ .partitions = 1,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .complex_indexing = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
};
/* TLB definitions: */
@@ -774,11 +900,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
- CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
+ CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \
+ CPUID_HT)
/* partly implemented:
CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
/* missing:
- CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
+ CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_TM, CPUID_PBE */
/*
* Kernel-only features that can be shown to usermode programs even if
@@ -846,7 +973,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \
- CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES)
+ CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES | \
+ CPUID_EXT3_CMP_LEG)
#define TCG_EXT4_FEATURES 0
@@ -895,6 +1023,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD)
+#define TCG_7_1_ECX_FEATURES 0
#define TCG_7_1_EDX_FEATURES 0
#define TCG_7_2_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
@@ -920,6 +1049,17 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \
CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_KERNEL_FEATURES)
+#if defined CONFIG_USER_ONLY
+#define CPUID_8000_0021_EAX_KERNEL_FEATURES CPUID_8000_0021_EAX_AUTO_IBRS
+#else
+#define CPUID_8000_0021_EAX_KERNEL_FEATURES 0
+#endif
+
+#define TCG_8000_0021_EAX_FEATURES ( \
+ CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | \
+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | \
+ CPUID_8000_0021_EAX_KERNEL_FEATURES)
+
FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[FEAT_1_EDX] = {
.type = CPUID_FEATURE_WORD,
@@ -1134,6 +1274,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
},
.tcg_features = TCG_7_1_EAX_FEATURES,
},
+ [FEAT_7_1_ECX] = {
+ .type = CPUID_FEATURE_WORD,
+ .feat_names = {
+ NULL, NULL, NULL, NULL,
+ NULL, "msr-imm", NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ },
+ .cpuid = {
+ .eax = 7,
+ .needs_ecx = true, .ecx = 1,
+ .reg = R_ECX,
+ },
+ .tcg_features = TCG_7_1_ECX_FEATURES,
+ },
[FEAT_7_1_EDX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
@@ -1237,17 +1396,17 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[FEAT_8000_0021_EAX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
- "no-nested-data-bp", NULL, "lfence-always-serializing", NULL,
+ "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL,
NULL, NULL, "null-sel-clr-base", NULL,
"auto-ibrs", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ "prefetchi", NULL, NULL, NULL,
"eraps", NULL, NULL, "sbpb",
"ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL,
},
.cpuid = { .eax = 0x80000021, .reg = R_EAX, },
- .tcg_features = 0,
+ .tcg_features = TCG_8000_0021_EAX_FEATURES,
.unmigratable_flags = 0,
},
[FEAT_8000_0021_EBX] = {
@@ -1370,6 +1529,14 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"bhi-no", NULL, NULL, NULL,
"pbrsb-no", NULL, "gds-no", "rfds-no",
"rfds-clear", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, "its-no", NULL,
},
.msr = {
.index = MSR_IA32_ARCH_CAPABILITIES,
@@ -1654,14 +1821,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
},
};
-typedef struct FeatureMask {
- FeatureWord index;
- uint64_t mask;
-} FeatureMask;
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg)
+{
+ FeatureWordInfo *wi;
+ FeatureWord w;
-typedef struct FeatureDep {
- FeatureMask from, to;
-} FeatureDep;
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature &&
+ (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) &&
+ wi->cpuid.reg == reg) {
+ return true;
+ }
+ }
+ return false;
+}
static FeatureDep feature_dependencies[] = {
{
@@ -1773,10 +1947,6 @@ static FeatureDep feature_dependencies[] = {
.to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
},
{
- .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS },
- .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
- },
- {
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX },
.to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC },
},
@@ -1831,9 +2001,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
};
#undef REGISTER
-/* CPUID feature bits available in XSS */
-#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
-
ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
[XSTATE_FP_BIT] = {
/* x87 FP state component is always enabled if XSAVE is supported */
@@ -1899,7 +2066,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted)
static inline bool accel_uses_host_cpuid(void)
{
- return kvm_enabled() || hvf_enabled();
+ return !tcg_enabled() && !qtest_enabled();
}
static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu)
@@ -2183,6 +2350,60 @@ static CPUCaches epyc_v4_cache_info = {
},
};
+static CPUCaches epyc_v5_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 64 * KiB,
+ .line_size = 64,
+ .associativity = 4,
+ .partitions = 1,
+ .sets = 256,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 8 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 8192,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_rome_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2291,6 +2512,60 @@ static const CPUCaches epyc_rome_v3_cache_info = {
},
};
+static const CPUCaches epyc_rome_v5_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 16 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 16384,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_milan_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2399,6 +2674,60 @@ static const CPUCaches epyc_milan_v2_cache_info = {
},
};
+static const CPUCaches epyc_milan_v3_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_genoa_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2453,6 +2782,486 @@ static const CPUCaches epyc_genoa_cache_info = {
},
};
+static const CPUCaches epyc_genoa_v2_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 1 * MiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 2048,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
+static const CPUCaches epyc_turin_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 48 * KiB,
+ .line_size = 64,
+ .associativity = 12,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 1 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ }
+};
+
+static const CPUCaches xeon_spr_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x0.EAX */
+ .type = DATA_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x0.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 12,
+
+ /* CPUID 0x4.0x0.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x0.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 48 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x1.EAX */
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x1.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 8,
+
+ /* CPUID 0x4.0x1.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x1.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 32 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x2.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .self_init = true,
+
+ /* CPUID 0x4.0x2.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x2.ECX */
+ .sets = 2048,
+
+ /* CPUID 0x4.0x2.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 2 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x3.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .self_init = true,
+
+ /* CPUID 0x4.0x3.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 15,
+
+ /* CPUID 0x4.0x3.ECX */
+ .sets = 65536,
+
+ /* CPUID 0x4.0x3.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = true,
+
+ .size = 60 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
+ },
+};
+
+static const CPUCaches xeon_gnr_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x0.EAX */
+ .type = DATA_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x0.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 12,
+
+ /* CPUID 0x4.0x0.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x0.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 48 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x1.EAX */
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x1.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x1.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x1.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 64 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x2.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .self_init = true,
+
+ /* CPUID 0x4.0x2.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x2.ECX */
+ .sets = 2048,
+
+ /* CPUID 0x4.0x2.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 2 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x3.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .self_init = true,
+
+ /* CPUID 0x4.0x3.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x3.ECX */
+ .sets = 294912,
+
+ /* CPUID 0x4.0x3.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = true,
+
+ .size = 288 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
+ },
+};
+
+static const CPUCaches xeon_srf_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x0.EAX */
+ .type = DATA_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x0.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 8,
+
+ /* CPUID 0x4.0x0.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x0.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 32 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x1.EAX */
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x1.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 8,
+
+ /* CPUID 0x4.0x1.ECX */
+ .sets = 128,
+
+ /* CPUID 0x4.0x1.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 64 * KiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x2.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .self_init = true,
+
+ /* CPUID 0x4.0x2.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x2.ECX */
+ .sets = 4096,
+
+ /* CPUID 0x4.0x2.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ .size = 4 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_MODULE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x3.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .self_init = true,
+
+ /* CPUID 0x4.0x3.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 12,
+
+ /* CPUID 0x4.0x3.ECX */
+ .sets = 147456,
+
+ /* CPUID 0x4.0x3.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = true,
+
+ .size = 108 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
+ },
+};
+
+static const CPUCaches yongfeng_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x0.EAX */
+ .type = DATA_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x0.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 8,
+
+ /* CPUID 0x4.0x0.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x0.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ /* CPUID 0x80000005.ECX */
+ .lines_per_tag = 1,
+ .size = 32 * KiB,
+
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x1.EAX */
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .self_init = true,
+
+ /* CPUID 0x4.0x1.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x1.ECX */
+ .sets = 64,
+
+ /* CPUID 0x4.0x1.EDX */
+ .no_invd_sharing = false,
+ .inclusive = false,
+ .complex_indexing = false,
+
+ /* CPUID 0x80000005.EDX */
+ .lines_per_tag = 1,
+ .size = 64 * KiB,
+
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x2.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .self_init = true,
+
+ /* CPUID 0x4.0x2.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 8,
+
+ /* CPUID 0x4.0x2.ECX */
+ .sets = 512,
+
+ /* CPUID 0x4.0x2.EDX */
+ .no_invd_sharing = false,
+ .inclusive = true,
+ .complex_indexing = false,
+
+ /* CPUID 0x80000006.ECX */
+ .size = 256 * KiB,
+
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ /* CPUID 0x4.0x3.EAX */
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .self_init = true,
+
+ /* CPUID 0x4.0x3.EBX */
+ .line_size = 64,
+ .partitions = 1,
+ .associativity = 16,
+
+ /* CPUID 0x4.0x3.ECX */
+ .sets = 8192,
+
+ /* CPUID 0x4.0x3.EDX */
+ .no_invd_sharing = true,
+ .inclusive = true,
+ .complex_indexing = false,
+
+ .size = 8 * MiB,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
/* The following VMX features are not supported by KVM and are left out in the
* CPU definitions:
*
@@ -2705,6 +3514,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
I486_FEATURES,
.xlevel = 0,
.model_id = "",
+ .cache_info = &legacy_intel_cpuid2_cache_info,
},
{
.name = "pentium",
@@ -2717,6 +3527,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
PENTIUM_FEATURES,
.xlevel = 0,
.model_id = "",
+ .cache_info = &legacy_intel_cpuid2_cache_info,
},
{
.name = "pentium2",
@@ -2729,6 +3540,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
PENTIUM2_FEATURES,
.xlevel = 0,
.model_id = "",
+ .cache_info = &legacy_intel_cpuid2_cache_info,
},
{
.name = "pentium3",
@@ -2741,6 +3553,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
PENTIUM3_FEATURES,
.xlevel = 0,
.model_id = "",
+ .cache_info = &legacy_intel_cpuid2_cache_info,
},
{
.name = "athlon",
@@ -4273,6 +5086,15 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .version = 4,
+ .note = "with spr-sp cache model and 0x1f leaf",
+ .cache_info = &xeon_spr_cache_info,
+ .props = (PropValue[]) {
+ { "x-force-cpuid-0x1f", "on" },
+ { /* end of list */ },
+ }
+ },
{ /* end of list */ }
}
},
@@ -4426,6 +5248,15 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .version = 3,
+ .note = "with gnr-sp cache model and 0x1f leaf",
+ .cache_info = &xeon_gnr_cache_info,
+ .props = (PropValue[]) {
+ { "x-force-cpuid-0x1f", "on" },
+ { /* end of list */ },
+ }
+ },
{ /* end of list */ },
},
},
@@ -4571,6 +5402,15 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .version = 3,
+ .note = "with srf-sp cache model and 0x1f leaf",
+ .cache_info = &xeon_srf_cache_info,
+ .props = (PropValue[]) {
+ { "x-force-cpuid-0x1f", "on" },
+ { /* end of list */ },
+ }
+ },
{ /* end of list */ },
},
},
@@ -5210,6 +6050,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
.cache_info = &epyc_v4_cache_info
},
+ {
+ .version = 5,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-v5 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_v5_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5348,6 +6207,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
+ {
+ .version = 5,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-Rome-v5 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_rome_v5_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5423,6 +6301,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
.cache_info = &epyc_milan_v2_cache_info
},
+ {
+ .version = 3,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-Milan-v3 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_milan_v3_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5497,6 +6394,31 @@ static const X86CPUDefinition builtin_x86_defs[] = {
.xlevel = 0x80000022,
.model_id = "AMD EPYC-Genoa Processor",
.cache_info = &epyc_genoa_cache_info,
+ .versions = (X86CPUVersionDefinition[]) {
+ { .version = 1 },
+ {
+ .version = 2,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "fs-gs-base-ns", "on" },
+ { "perfmon-v2", "on" },
+ { "model-id",
+ "AMD EPYC-Genoa-v2 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_genoa_v2_cache_info
+ },
+ { /* end of list */ }
+ }
},
{
.name = "YongFeng",
@@ -5631,9 +6553,101 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .version = 3,
+ .note = "with the cache model and 0x1f leaf",
+ .cache_info = &yongfeng_cache_info,
+ .props = (PropValue[]) {
+ { "x-force-cpuid-0x1f", "on" },
+ { /* end of list */ },
+ }
+ },
{ /* end of list */ }
}
},
+ {
+ .name = "EPYC-Turin",
+ .level = 0xd,
+ .vendor = CPUID_VENDOR_AMD,
+ .family = 26,
+ .model = 0,
+ .stepping = 0,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT |
+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA |
+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ |
+ CPUID_EXT_SSE3,
+ .features[FEAT_1_EDX] =
+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH |
+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE |
+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE |
+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE |
+ CPUID_VME | CPUID_FP87,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS |
+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F |
+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA |
+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB |
+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
+ .features[FEAT_7_0_ECX] =
+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI |
+ CPUID_7_0_ECX_MOVDIR64B,
+ .features[FEAT_7_0_EDX] =
+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT,
+ .features[FEAT_7_1_EAX] =
+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX |
+ CPUID_EXT2_SYSCALL,
+ .features[FEAT_8000_0007_EBX] =
+ CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR,
+ .features[FEAT_8000_0008_EBX] =
+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR |
+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB |
+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP |
+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON |
+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD,
+ .features[FEAT_8000_0021_EAX] =
+ CPUID_8000_0021_EAX_NO_NESTED_DATA_BP |
+ CPUID_8000_0021_EAX_FS_GS_BASE_NS |
+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING |
+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE |
+ CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI |
+ CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE |
+ CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO,
+ .features[FEAT_8000_0022_EAX] =
+ CPUID_8000_0022_EAX_PERFMON_V2,
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ .features[FEAT_SVM] =
+ CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
+ CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID |
+ CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD |
+ CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF |
+ CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK,
+ .xlevel = 0x80000022,
+ .model_id = "AMD EPYC-Turin Processor",
+ .cache_info = &epyc_turin_cache_info,
+ },
};
/*
@@ -5701,13 +6715,14 @@ static void max_x86_cpu_realize(DeviceState *dev, Error **errp)
x86_cpu_realizefn(dev, errp);
}
-static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
+static void max_x86_cpu_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
X86CPUClass *xcc = X86_CPU_CLASS(oc);
xcc->ordering = 9;
+ xcc->max_features = true;
xcc->model_description =
"Enables all features supported by the accelerator in the current host";
@@ -5718,22 +6733,21 @@ static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
static void max_x86_cpu_initfn(Object *obj)
{
X86CPU *cpu = X86_CPU(obj);
-
- /* We can't fill the features array here because we don't know yet if
- * "migratable" is true or false.
- */
- cpu->max_features = true;
- object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort);
+ CPUX86State *env = &cpu->env;
/*
- * these defaults are used for TCG and all other accelerators
- * besides KVM and HVF, which overwrite these values
+ * these defaults are used for TCG, other accelerators have overwritten
+ * these values
*/
- object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD,
- &error_abort);
- object_property_set_str(OBJECT(cpu), "model-id",
- "QEMU TCG CPU version " QEMU_HW_VERSION,
- &error_abort);
+ if (!env->cpuid_vendor1) {
+ object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD,
+ &error_abort);
+ }
+ if (!env->cpuid_model[0]) {
+ object_property_set_str(OBJECT(cpu), "model-id",
+ "QEMU TCG CPU version " QEMU_HW_VERSION,
+ &error_abort);
+ }
}
static const TypeInfo max_x86_cpu_type_info = {
@@ -5743,7 +6757,7 @@ static const TypeInfo max_x86_cpu_type_info = {
.class_init = max_x86_cpu_class_init,
};
-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
+static char *feature_word_description(FeatureWordInfo *f)
{
assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
@@ -5752,11 +6766,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
{
const char *reg = get_register_name_32(f->cpuid.reg);
assert(reg);
- return g_strdup_printf("CPUID.%02XH:%s",
- f->cpuid.eax, reg);
+ if (!f->cpuid.needs_ecx) {
+ return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg);
+ } else {
+ return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s",
+ f->cpuid.eax, f->cpuid.ecx, reg);
+ }
}
case MSR_FEATURE_WORD:
- return g_strdup_printf("MSR(%02XH)",
+ return g_strdup_printf("MSR(%02Xh)",
f->msr.index);
}
@@ -5776,12 +6794,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu)
return false;
}
-static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
- const char *verbose_prefix)
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
{
CPUX86State *env = &cpu->env;
FeatureWordInfo *f = &feature_word_info[w];
int i;
+ g_autofree char *feat_word_str = feature_word_description(f);
if (!cpu->force_features) {
env->features[w] &= ~mask;
@@ -5794,7 +6813,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
for (i = 0; i < 64; ++i) {
if ((1ULL << i) & mask) {
- g_autofree char *feat_word_str = feature_word_description(f, i);
+ warn_report("%s: %s%s%s [bit %d]",
+ verbose_prefix,
+ feat_word_str,
+ f->feat_names[i] ? "." : "",
+ f->feat_names[i] ? f->feat_names[i] : "", i);
+ }
+ }
+}
+
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
+{
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *f = &feature_word_info[w];
+ int i;
+
+ if (!cpu->force_features) {
+ env->features[w] |= mask;
+ }
+
+ cpu->forced_on_features[w] |= mask;
+
+ if (!verbose_prefix) {
+ return;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if ((1ULL << i) & mask) {
+ g_autofree char *feat_word_str = feature_word_description(f);
warn_report("%s: %s%s%s [bit %d]",
verbose_prefix,
feat_word_str,
@@ -5812,10 +6859,7 @@ static void x86_cpuid_version_get_family(Object *obj, Visitor *v,
CPUX86State *env = &cpu->env;
uint64_t value;
- value = (env->cpuid_version >> 8) & 0xf;
- if (value == 0xf) {
- value += (env->cpuid_version >> 20) & 0xff;
- }
+ value = x86_cpu_family(env->cpuid_version);
visit_type_uint64(v, name, &value, errp);
}
@@ -5853,8 +6897,7 @@ static void x86_cpuid_version_get_model(Object *obj, Visitor *v,
CPUX86State *env = &cpu->env;
uint64_t value;
- value = (env->cpuid_version >> 4) & 0xf;
- value |= ((env->cpuid_version >> 16) & 0xf) << 4;
+ value = x86_cpu_model(env->cpuid_version);
visit_type_uint64(v, name, &value, errp);
}
@@ -5888,7 +6931,7 @@ static void x86_cpuid_version_get_stepping(Object *obj, Visitor *v,
CPUX86State *env = &cpu->env;
uint64_t value;
- value = env->cpuid_version & 0xf;
+ value = x86_cpu_stepping(env->cpuid_version);
visit_type_uint64(v, name, &value, errp);
}
@@ -5956,11 +6999,11 @@ static char *x86_cpuid_get_model_id(Object *obj, Error **errp)
char *value;
int i;
- value = g_malloc(48 + 1);
- for (i = 0; i < 48; i++) {
+ value = g_malloc(CPUID_MODEL_ID_SZ + 1);
+ for (i = 0; i < CPUID_MODEL_ID_SZ; i++) {
value[i] = env->cpuid_model[i >> 2] >> (8 * (i & 3));
}
- value[48] = '\0';
+ value[CPUID_MODEL_ID_SZ] = '\0';
return value;
}
@@ -5975,7 +7018,7 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id,
model_id = "";
}
len = strlen(model_id);
- memset(env->cpuid_model, 0, 48);
+ memset(env->cpuid_model, 0, CPUID_MODEL_ID_SZ);
for (i = 0; i < 48; i++) {
if (i >= len) {
c = '\0';
@@ -6238,7 +7281,7 @@ static void listflags(GList *features)
}
/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */
-static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
+static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b, gpointer d)
{
ObjectClass *class_a = (ObjectClass *)a;
ObjectClass *class_b = (ObjectClass *)b;
@@ -6259,7 +7302,7 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
static GSList *get_sorted_cpu_model_list(void)
{
GSList *list = object_class_get_list(TYPE_X86_CPU, false);
- list = g_slist_sort(list, x86_cpu_list_compare);
+ list = g_slist_sort_with_data(list, x86_cpu_list_compare, NULL);
return list;
}
@@ -6316,8 +7359,13 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
qemu_printf(" %-20s %s\n", name, desc);
}
+static gint strcmp_wrap(gconstpointer a, gconstpointer b, gpointer d)
+{
+ return strcmp(a, b);
+}
+
/* list available CPU models and flags */
-void x86_cpu_list(void)
+static void x86_cpu_list(void)
{
int i, j;
GSList *list;
@@ -6338,7 +7386,7 @@ void x86_cpu_list(void)
}
}
- names = g_list_sort(names, (GCompareFunc)strcmp);
+ names = g_list_sort_with_data(names, strcmp_wrap, NULL);
qemu_printf("\nRecognized CPUID flags:\n");
listflags(names);
@@ -6700,7 +7748,7 @@ static const gchar *x86_gdb_arch_name(CPUState *cs)
#endif
}
-static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data)
+static void x86_cpu_cpudef_class_init(ObjectClass *oc, const void *data)
{
const X86CPUModel *model = data;
X86CPUClass *xcc = X86_CPU_CLASS(oc);
@@ -6830,14 +7878,35 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
*edx = env->features[FEAT_1_EDX];
if (threads_per_pkg > 1) {
- *ebx |= threads_per_pkg << 16;
- }
- if (!cpu->enable_pmu) {
- *ecx &= ~CPUID_EXT_PDCM;
+ uint32_t num;
+
+ /*
+ * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor
+ * count, but Intel needs maximum number of addressable IDs for
+ * logical processors per package.
+ */
+ if (cpu->vendor_cpuid_only_v2 &&
+ (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) {
+ num = 1 << apicid_pkg_offset(topo_info);
+ } else {
+ num = threads_per_pkg;
+ }
+
+ /* Fixup overflow: max value for bits 23-16 is 255. */
+ *ebx |= MIN(num, 255) << 16;
}
break;
- case 2:
- /* cache info: needed for Pentium Pro compatibility */
+ case 2: { /* cache info: needed for Pentium Pro compatibility */
+ const CPUCaches *caches;
+
+ if (env->enable_legacy_cpuid2_cache) {
+ caches = &legacy_intel_cpuid2_cache_info;
+ } else if (env->enable_legacy_vendor_cache) {
+ caches = &legacy_intel_cache_info;
+ } else {
+ caches = &env->cache_info;
+ }
+
if (cpu->cache_info_passthrough) {
x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx);
break;
@@ -6845,18 +7914,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*eax = *ebx = *ecx = *edx = 0;
break;
}
- *eax = 1; /* Number of CPUID[EAX=2] calls required */
- *ebx = 0;
- if (!cpu->enable_l3_cache) {
- *ecx = 0;
+ encode_cache_cpuid2(cpu, caches, eax, ebx, ecx, edx);
+ break;
+ }
+ case 4: {
+ const CPUCaches *caches;
+
+ if (env->enable_legacy_vendor_cache) {
+ caches = &legacy_intel_cache_info;
} else {
- *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache);
+ caches = &env->cache_info;
}
- *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) |
- (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) |
- (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache));
- break;
- case 4:
+
/* cache info: needed for Core compatibility */
if (cpu->cache_info_passthrough) {
x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx);
@@ -6868,13 +7937,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
*eax &= ~0xFC000000;
- *eax |= max_core_ids_in_package(topo_info) << 26;
+ *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26;
if (host_vcpus_per_cache > threads_per_pkg) {
*eax &= ~0x3FFC000;
/* Share the cache at package level. */
- *eax |= max_thread_ids_for_cache(topo_info,
- CPU_TOPOLOGY_LEVEL_SOCKET) << 14;
+ *eax |= MIN(max_thread_ids_for_cache(topo_info,
+ CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14;
}
}
} else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
@@ -6884,30 +7953,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
- topo_info,
+ encode_cache_cpuid4(caches->l1d_cache, topo_info,
eax, ebx, ecx, edx);
if (!cpu->l1_cache_per_core) {
*eax &= ~MAKE_64BIT_MASK(14, 12);
}
break;
case 1: /* L1 icache info */
- encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
- topo_info,
+ encode_cache_cpuid4(caches->l1i_cache, topo_info,
eax, ebx, ecx, edx);
if (!cpu->l1_cache_per_core) {
*eax &= ~MAKE_64BIT_MASK(14, 12);
}
break;
case 2: /* L2 cache info */
- encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
- topo_info,
+ encode_cache_cpuid4(caches->l2_cache, topo_info,
eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
if (cpu->enable_l3_cache) {
- encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
- topo_info,
+ encode_cache_cpuid4(caches->l3_cache, topo_info,
eax, ebx, ecx, edx);
break;
}
@@ -6918,6 +7983,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
}
break;
+ }
case 5:
/* MONITOR/MWAIT Leaf */
*eax = cpu->mwait.eax; /* Smallest monitor-line size in bytes */
@@ -6945,9 +8011,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
} else if (count == 1) {
*eax = env->features[FEAT_7_1_EAX];
+ *ecx = env->features[FEAT_7_1_ECX];
*edx = env->features[FEAT_7_1_EDX];
*ebx = 0;
- *ecx = 0;
} else if (count == 2) {
*edx = env->features[FEAT_7_2_EDX];
*eax = 0;
@@ -7008,21 +8074,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
assert(!(*eax & ~0x1f));
*ebx &= 0xffff; /* The count doesn't need to be reliable. */
break;
- case 0x1C:
- if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
- x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx);
- *edx = 0;
- }
- break;
- case 0x1F:
- /* V2 Extended Topology Enumeration Leaf */
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
- *eax = *ebx = *ecx = *edx = 0;
- break;
- }
-
- encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx);
- break;
case 0xD: {
/* Processor Extended State */
*eax = 0;
@@ -7163,6 +8214,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
break;
}
+ case 0x1C:
+ if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
+ x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx);
+ *edx = 0;
+ }
+ break;
case 0x1D: {
/* AMX TILE, for now hardcoded for Sapphire Rapids*/
*eax = 0;
@@ -7200,6 +8257,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
break;
}
+ case 0x1F:
+ /* V2 Extended Topology Enumeration Leaf */
+ if (!x86_has_cpuid_0x1f(cpu)) {
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+
+ encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx);
+ break;
case 0x24: {
*eax = 0;
*ebx = 0;
@@ -7236,9 +8302,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x80000000:
*eax = env->cpuid_xlevel;
- *ebx = env->cpuid_vendor1;
- *edx = env->cpuid_vendor2;
- *ecx = env->cpuid_vendor3;
+
+ if (cpu->vendor_cpuid_only_v2 &&
+ (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) {
+ *ebx = *ecx = *edx = 0;
+ } else {
+ *ebx = env->cpuid_vendor1;
+ *edx = env->cpuid_vendor2;
+ *ecx = env->cpuid_vendor3;
+ }
break;
case 0x80000001:
*eax = env->cpuid_version;
@@ -7246,7 +8318,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx = env->features[FEAT_8000_0001_ECX];
*edx = env->features[FEAT_8000_0001_EDX];
- if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 &&
+ if (tcg_enabled() && IS_INTEL_CPU(env) &&
!(env->hflags & HF_LMA_MASK)) {
*edx &= ~CPUID_EXT2_SYSCALL;
}
@@ -7259,41 +8331,78 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2];
*edx = env->cpuid_model[(index - 0x80000002) * 4 + 3];
break;
- case 0x80000005:
- /* cache info (L1 cache) */
+ case 0x80000005: {
+ /* cache info (L1 cache/TLB Associativity Field) */
+ const CPUCaches *caches;
+
+ if (env->enable_legacy_vendor_cache) {
+ caches = &legacy_amd_cache_info;
+ } else {
+ caches = &env->cache_info;
+ }
+
if (cpu->cache_info_passthrough) {
x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx);
break;
}
+
+ if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) {
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+
*eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) |
(L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES);
*ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) |
(L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES);
- *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache);
- *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache);
+ *ecx = encode_cache_cpuid80000005(caches->l1d_cache);
+ *edx = encode_cache_cpuid80000005(caches->l1i_cache);
break;
- case 0x80000006:
- /* cache info (L2 cache) */
+ }
+ case 0x80000006: { /* cache info (L2 cache/TLB/L3 cache) */
+ const CPUCaches *caches;
+
+ if (env->enable_legacy_vendor_cache) {
+ caches = &legacy_amd_cache_info;
+ } else {
+ caches = &env->cache_info;
+ }
+
if (cpu->cache_info_passthrough) {
x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx);
break;
}
- *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) |
+
+ if (cpu->vendor_cpuid_only_v2 &&
+ (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) {
+ *eax = *ebx = 0;
+ encode_cache_cpuid80000006(caches->l2_cache,
+ NULL, ecx, edx);
+ break;
+ }
+
+ *eax = (X86_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) |
(L2_DTLB_2M_ENTRIES << 16) |
- (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) |
+ (X86_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) |
(L2_ITLB_2M_ENTRIES);
- *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) |
+ *ebx = (X86_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) |
(L2_DTLB_4K_ENTRIES << 16) |
- (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) |
+ (X86_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) |
(L2_ITLB_4K_ENTRIES);
- encode_cache_cpuid80000006(env->cache_info_amd.l2_cache,
+
+ encode_cache_cpuid80000006(caches->l2_cache,
cpu->enable_l3_cache ?
- env->cache_info_amd.l3_cache : NULL,
+ caches->l3_cache : NULL,
ecx, edx);
break;
+ }
case 0x80000007:
*eax = 0;
- *ebx = env->features[FEAT_8000_0007_EBX];
+ if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) {
+ *ebx = 0;
+ } else {
+ *ebx = env->features[FEAT_8000_0007_EBX];
+ }
*ecx = 0;
*edx = env->features[FEAT_8000_0007_EDX];
break;
@@ -7306,6 +8415,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*eax |= (cpu->guest_phys_bits << 16);
}
*ebx = env->features[FEAT_8000_0008_EBX];
+
+ /*
+ * Don't emulate Bits [7:0] & Bits [15:12] for Intel/Zhaoxin, since
+ * they're using 0x1f leaf.
+ */
+ if (cpu->vendor_cpuid_only_v2 &&
+ (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) {
+ *ecx = *edx = 0;
+ break;
+ }
+
if (threads_per_pkg > 1) {
/*
* Bits 15:12 is "The number of bits in the initial
@@ -7341,19 +8461,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache,
+ encode_cache_cpuid8000001d(env->cache_info.l1d_cache,
topo_info, eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache,
+ encode_cache_cpuid8000001d(env->cache_info.l1i_cache,
topo_info, eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache,
+ encode_cache_cpuid8000001d(env->cache_info.l2_cache,
topo_info, eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
- encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache,
+ encode_cache_cpuid8000001d(env->cache_info.l3_cache,
topo_info, eax, ebx, ecx, edx);
break;
default: /* end of info */
@@ -7374,6 +8494,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = 0;
}
break;
+ case 0x8000001F:
+ *eax = *ebx = *ecx = *edx = 0;
+ if (sev_enabled()) {
+ *eax = 0x2;
+ *eax |= sev_es_enabled() ? 0x8 : 0;
+ *eax |= sev_snp_enabled() ? 0x10 : 0;
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
+ }
+ break;
+ case 0x80000021:
+ *eax = *ebx = *ecx = *edx = 0;
+ *eax = env->features[FEAT_8000_0021_EAX];
+ *ebx = env->features[FEAT_8000_0021_EBX];
+ break;
case 0x80000022:
*eax = *ebx = *ecx = *edx = 0;
/* AMD Extended Performance Monitoring and Debug */
@@ -7406,21 +8541,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx = 0;
*edx = 0;
break;
- case 0x8000001F:
- *eax = *ebx = *ecx = *edx = 0;
- if (sev_enabled()) {
- *eax = 0x2;
- *eax |= sev_es_enabled() ? 0x8 : 0;
- *eax |= sev_snp_enabled() ? 0x10 : 0;
- *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
- *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
- }
- break;
- case 0x80000021:
- *eax = *ebx = *ecx = *edx = 0;
- *eax = env->features[FEAT_8000_0021_EAX];
- *ebx = env->features[FEAT_8000_0021_EBX];
- break;
default:
/* reserved values: zero */
*eax = 0;
@@ -7640,7 +8760,7 @@ static void mce_init(X86CPU *cpu)
CPUX86State *cenv = &cpu->env;
unsigned int bank;
- if (((cenv->cpuid_version >> 8) & 0xf) >= 6
+ if (x86_cpu_family(cenv->cpuid_version) >= 6
&& (cenv->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
(CPUID_MCE | CPUID_MCA)) {
cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF |
@@ -7768,6 +8888,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
*/
void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
{
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
CPUX86State *env = &cpu->env;
FeatureWord w;
int i;
@@ -7787,12 +8908,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
- /*TODO: Now cpu->max_features doesn't overwrite features
+ /* TODO: Now xcc->max_features doesn't overwrite features
* set using QOM properties, and we can convert
* plus_features & minus_features to global properties
* inside x86_cpu_parse_featurestr() too.
*/
- if (cpu->max_features) {
+ if (xcc->max_features) {
for (w = 0; w < FEATURE_WORDS; w++) {
/* Override only features that weren't set explicitly
* by the user.
@@ -7824,6 +8945,14 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
+ /* PDCM is fixed1 bit for TDX */
+ if (!cpu->enable_pmu && !is_tdx_vm()) {
+ mark_unavailable_features(cpu, FEAT_1_ECX,
+ env->user_features[FEAT_1_ECX] & CPUID_EXT_PDCM,
+ "This feature is not available due to PMU being disabled");
+ env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM;
+ }
+
for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) {
FeatureDep *d = &feature_dependencies[i];
if (!(env->features[d->from.index] & d->from.mask)) {
@@ -7852,6 +8981,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX);
+ x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX);
x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX);
@@ -7880,7 +9010,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
* cpu->vendor_cpuid_only has been unset for compatibility with older
* machine types.
*/
- if (x86_has_extended_topo(env->avail_cpu_topo) &&
+ if (x86_has_cpuid_0x1f(cpu) &&
(IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
}
@@ -8052,46 +9182,34 @@ static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu,
level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D);
if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
- env->cache_info_cpuid4.l1d_cache->share_level = level;
- env->cache_info_amd.l1d_cache->share_level = level;
+ env->cache_info.l1d_cache->share_level = level;
} else {
machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D,
- env->cache_info_cpuid4.l1d_cache->share_level);
- machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D,
- env->cache_info_amd.l1d_cache->share_level);
+ env->cache_info.l1d_cache->share_level);
}
level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I);
if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
- env->cache_info_cpuid4.l1i_cache->share_level = level;
- env->cache_info_amd.l1i_cache->share_level = level;
+ env->cache_info.l1i_cache->share_level = level;
} else {
machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I,
- env->cache_info_cpuid4.l1i_cache->share_level);
- machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I,
- env->cache_info_amd.l1i_cache->share_level);
+ env->cache_info.l1i_cache->share_level);
}
level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2);
if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
- env->cache_info_cpuid4.l2_cache->share_level = level;
- env->cache_info_amd.l2_cache->share_level = level;
+ env->cache_info.l2_cache->share_level = level;
} else {
machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2,
- env->cache_info_cpuid4.l2_cache->share_level);
- machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2,
- env->cache_info_amd.l2_cache->share_level);
+ env->cache_info.l2_cache->share_level);
}
level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3);
if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) {
- env->cache_info_cpuid4.l3_cache->share_level = level;
- env->cache_info_amd.l3_cache->share_level = level;
+ env->cache_info.l3_cache->share_level = level;
} else {
machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3,
- env->cache_info_cpuid4.l3_cache->share_level);
- machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3,
- env->cache_info_amd.l3_cache->share_level);
+ env->cache_info.l3_cache->share_level);
}
if (!machine_check_smp_cache(ms, errp)) {
@@ -8115,6 +9233,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
tcg_cflags_set(cs, CF_PCREL);
#endif
+ /*
+ * x-vendor-cpuid-only and v2 should be initernal only. But
+ * QEMU doesn't support "internal" property.
+ */
+ if (!cpu->vendor_cpuid_only && cpu->vendor_cpuid_only_v2) {
+ error_setg(errp, "x-vendor-cpuid-only-v2 property "
+ "depends on x-vendor-cpuid-only");
+ return;
+ }
+
if (cpu->apic_id == UNASSIGNED_APIC_ID) {
error_setg(errp, "apic-id property was not initialized properly");
return;
@@ -8318,24 +9446,22 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
"CPU model '%s' doesn't support legacy-cache=off", name);
return;
}
- env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd =
- *cache_info;
+ env->cache_info = *cache_info;
} else {
/* Build legacy cache information */
- env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache;
- env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache;
- env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2;
- env->cache_info_cpuid2.l3_cache = &legacy_l3_cache;
+ if (!cpu->consistent_cache) {
+ env->enable_legacy_cpuid2_cache = true;
+ }
- env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache;
- env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache;
- env->cache_info_cpuid4.l2_cache = &legacy_l2_cache;
- env->cache_info_cpuid4.l3_cache = &legacy_l3_cache;
+ if (!cpu->vendor_cpuid_only_v2) {
+ env->enable_legacy_vendor_cache = true;
+ }
- env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd;
- env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd;
- env->cache_info_amd.l2_cache = &legacy_l2_cache_amd;
- env->cache_info_amd.l3_cache = &legacy_l3_cache;
+ if (IS_AMD_CPU(env)) {
+ env->cache_info = legacy_amd_cache_info;
+ } else {
+ env->cache_info = legacy_intel_cache_info;
+ }
}
#ifndef CONFIG_USER_ONLY
@@ -8514,7 +9640,12 @@ static void x86_cpu_post_initfn(Object *obj)
}
}
- accel_cpu_instance_init(CPU(obj));
+#ifndef CONFIG_USER_ONLY
+ if (current_machine && current_machine->cgs) {
+ x86_confidential_guest_cpu_instance_init(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj)));
+ }
+#endif
}
static void x86_cpu_init_default_topo(X86CPU *cpu)
@@ -8583,6 +9714,8 @@ static void x86_cpu_initfn(Object *obj)
if (xcc->model) {
x86_cpu_load_model(cpu, xcc->model);
}
+
+ accel_cpu_instance_init(CPU(obj));
}
static int64_t x86_cpu_get_arch_id(CPUState *cs)
@@ -8663,39 +9796,6 @@ static bool x86_cpu_has_work(CPUState *cs)
}
#endif /* !CONFIG_USER_ONLY */
-int x86_mmu_index_pl(CPUX86State *env, unsigned pl)
-{
- int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1;
- int mmu_index_base =
- pl == 3 ? MMU_USER64_IDX :
- !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
- (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX;
-
- return mmu_index_base + mmu_index_32;
-}
-
-static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
-{
- CPUX86State *env = cpu_env(cs);
- return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK);
-}
-
-static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl)
-{
- int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1;
- int mmu_index_base =
- !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
- (pl < 3 && (env->eflags & AC_MASK)
- ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX);
-
- return mmu_index_base + mmu_index_32;
-}
-
-int cpu_mmu_index_kernel(CPUX86State *env)
-{
- return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK);
-}
-
static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
{
X86CPU *cpu = X86_CPU(cs);
@@ -8862,6 +9962,7 @@ static const Property x86_cpu_properties[] = {
DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
+ DEFINE_PROP_BOOL("x-vendor-cpuid-only-v2", X86CPU, vendor_cpuid_only_v2, true),
DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true),
DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
@@ -8876,6 +9977,7 @@ static const Property x86_cpu_properties[] = {
* own cache information (see x86_cpu_load_def()).
*/
DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true),
+ DEFINE_PROP_BOOL("x-consistent-cache", X86CPU, consistent_cache, true),
DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false),
DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false),
@@ -8897,6 +9999,7 @@ static const Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
true),
DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true),
+ DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false),
};
#ifndef CONFIG_USER_ONLY
@@ -8917,7 +10020,7 @@ static const struct SysemuCPUOps i386_sysemu_ops = {
};
#endif
-static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
+static void x86_cpu_common_class_init(ObjectClass *oc, const void *data)
{
X86CPUClass *xcc = X86_CPU_CLASS(oc);
CPUClass *cc = CPU_CLASS(oc);
@@ -8936,8 +10039,8 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
cc->reset_dump_flags = CPU_DUMP_FPU | CPU_DUMP_CCOP;
cc->class_by_name = x86_cpu_class_by_name;
+ cc->list_cpus = x86_cpu_list;
cc->parse_features = x86_cpu_parse_featurestr;
- cc->mmu_index = x86_cpu_mmu_index;
cc->dump_state = x86_cpu_dump_state;
cc->set_pc = x86_cpu_set_pc;
cc->get_pc = x86_cpu_get_pc;
@@ -8948,6 +10051,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
#ifndef CONFIG_USER_ONLY
cc->sysemu_ops = &i386_sysemu_ops;
#endif /* !CONFIG_USER_ONLY */
+#ifdef CONFIG_TCG
+ cc->tcg_ops = &x86_tcg_ops;
+#endif /* CONFIG_TCG */
cc->gdb_arch_name = x86_gdb_arch_name;
#ifdef TARGET_X86_64
@@ -9014,7 +10120,7 @@ static const TypeInfo x86_cpu_type_info = {
};
/* "base" CPU model, used by query-cpu-model-expansion */
-static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
+static void x86_cpu_base_class_init(ObjectClass *oc, const void *data)
{
X86CPUClass *xcc = X86_CPU_CLASS(oc);
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 119efc6..f977fc4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -23,7 +23,9 @@
#include "system/tcg.h"
#include "cpu-qom.h"
#include "kvm/hyperv-proto.h"
+#include "exec/cpu-common.h"
#include "exec/cpu-defs.h"
+#include "exec/cpu-interrupt.h"
#include "exec/memop.h"
#include "hw/i386/topology.h"
#include "qapi/qapi-types-common.h"
@@ -33,12 +35,6 @@
#define XEN_NR_VIRQS 24
-#define KVM_HAVE_MCE_INJECTION 1
-
-/* support for self modifying code even if the modified instruction is
- close to the modifying instruction */
-#define TARGET_HAS_PRECISE_SMC
-
#ifdef TARGET_X86_64
#define I386_ELF_MACHINE EM_X86_64
#define ELF_MACHINE_UNAME "x86_64"
@@ -588,6 +584,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_BIT 5
#define XSTATE_ZMM_Hi256_BIT 6
#define XSTATE_Hi16_ZMM_BIT 7
+#define XSTATE_PT_BIT 8
#define XSTATE_PKRU_BIT 9
#define XSTATE_ARCH_LBR_BIT 15
#define XSTATE_XTILE_CFG_BIT 17
@@ -601,6 +598,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT)
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
+#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT)
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
#define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT)
#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
@@ -623,6 +621,11 @@ typedef enum X86Seg {
XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
+/* CPUID feature bits available in XSS */
+#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
+
+#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK)
+
/* CPUID feature words */
typedef enum FeatureWord {
FEAT_1_EDX, /* CPUID[1].EDX */
@@ -665,12 +668,22 @@ typedef enum FeatureWord {
FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */
FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */
FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */
+ FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */
FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */
FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */
FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */
FEATURE_WORDS,
} FeatureWord;
+typedef struct FeatureMask {
+ FeatureWord index;
+ uint64_t mask;
+} FeatureMask;
+
+typedef struct FeatureDep {
+ FeatureMask from, to;
+} FeatureDep;
+
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
@@ -903,6 +916,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_ECX_LA57 (1U << 16)
/* Read Processor ID */
#define CPUID_7_0_ECX_RDPID (1U << 22)
+/* KeyLocker */
+#define CPUID_7_0_ECX_KeyLocker (1U << 23)
/* Bus Lock Debug Exception */
#define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24)
/* Cache Line Demote Instruction */
@@ -924,6 +939,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_EDX_FSRM (1U << 4)
/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */
#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8)
+ /* "md_clear" VERW clears CPU buffers */
+#define CPUID_7_0_EDX_MD_CLEAR (1U << 10)
/* SERIALIZE instruction */
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
/* TSX Suspend Load Address Tracking instruction */
@@ -961,6 +978,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
+/* Linear address space separation */
+#define CPUID_7_1_EAX_LASS (1U << 6)
/* CMPCCXADD Instructions */
#define CPUID_7_1_EAX_CMPCCXADD (1U << 7)
/* Fast Zero REP MOVS */
@@ -982,6 +1001,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* Linear Address Masking */
#define CPUID_7_1_EAX_LAM (1U << 26)
+/* The immediate form of MSR access instructions */
+#define CPUID_7_1_ECX_MSR_IMM (1U << 5)
+
/* Support for VPDPB[SU,UU,SS]D[,S] */
#define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4)
/* AVX NE CONVERT Instructions */
@@ -1005,6 +1027,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_2_EDX_DDPD_U (1U << 3)
/* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */
#define CPUID_7_2_EDX_BHI_CTRL (1U << 4)
+
/* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */
#define CPUID_7_2_EDX_MCDT_NO (1U << 5)
@@ -1074,12 +1097,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* Processor ignores nested data breakpoints */
#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0)
+/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */
+#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1)
/* LFENCE is always serializing */
#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2)
/* Null Selector Clears Base */
#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6)
/* Automatic IBRS */
#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8)
+/* Indicates support for IC prefetch */
+#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20)
/* Enhanced Return Address Predictor Scurity */
#define CPUID_8000_0021_EAX_ERAPS (1U << 24)
/* Selective Branch Predictor Barrier */
@@ -1104,6 +1131,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_XSAVE_XSAVEC (1U << 1)
#define CPUID_XSAVE_XGETBV1 (1U << 2)
#define CPUID_XSAVE_XSAVES (1U << 3)
+#define CPUID_XSAVE_XFD (1U << 4)
#define CPUID_6_EAX_ARAT (1U << 2)
@@ -1131,7 +1159,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* PMM enabled */
#define CPUID_C000_0001_EDX_PMM_EN (1U << 13)
-#define CPUID_VENDOR_SZ 12
+#define CPUID_VENDOR_SZ 12
+#define CPUID_MODEL_ID_SZ 48
#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
@@ -1610,8 +1639,6 @@ typedef struct {
#define MAX_FIXED_COUNTERS 3
#define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
-#define TARGET_INSN_START_EXTRA_WORDS 1
-
#define NB_OPMASK_REGS 8
/* CPU can't have 0xFFFFFFFF APIC ID, use that value to distinguish
@@ -1747,12 +1774,6 @@ typedef enum TPRAccess {
/* Cache information data structures: */
-enum CacheType {
- DATA_CACHE,
- INSTRUCTION_CACHE,
- UNIFIED_CACHE
-};
-
typedef struct CPUCacheInfo {
enum CacheType type;
uint8_t level;
@@ -1811,11 +1832,6 @@ typedef struct CPUCaches {
CPUCacheInfo *l3_cache;
} CPUCaches;
-typedef struct X86LazyFlags {
- target_ulong result;
- target_ulong auxbits;
-} X86LazyFlags;
-
typedef struct CPUArchState {
/* standard registers */
target_ulong regs[CPU_NB_REGS];
@@ -2057,11 +2073,14 @@ typedef struct CPUArchState {
/* Features that were explicitly enabled/disabled */
FeatureWordArray user_features;
uint32_t cpuid_model[12];
- /* Cache information for CPUID. When legacy-cache=on, the cache data
+ /*
+ * Cache information for CPUID. When legacy-cache=on, the cache data
* on each CPUID leaf will be different, because we keep compatibility
* with old QEMU versions.
*/
- CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd;
+ CPUCaches cache_info;
+ bool enable_legacy_cpuid2_cache;
+ bool enable_legacy_vendor_cache;
/* MTRRs */
uint64_t mtrr_fixed[11];
@@ -2108,7 +2127,6 @@ typedef struct CPUArchState {
QemuMutex xen_timers_lock;
#endif
#if defined(CONFIG_HVF)
- X86LazyFlags lflags;
void *emu_mmio_buf;
#endif
@@ -2182,7 +2200,6 @@ struct ArchCPU {
bool expose_tcg;
bool migratable;
bool migrate_smi_count;
- bool max_features; /* Enable all supported features automatically */
uint32_t apic_id;
/* Enables publishing of TSC increment and Local APIC bus frequencies to
@@ -2204,6 +2221,9 @@ struct ArchCPU {
/* Features that were filtered out because of missing host capabilities */
FeatureWordArray filtered_features;
+ /* Features that are forced enabled by underlying hypervisor, e.g., TDX */
+ FeatureWordArray forced_on_features;
+
/* Enable PMU CPUID bits. This can't be enabled by default yet because
* it doesn't have ABI stability guarantees, as it passes all PMU CPUID
* bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel
@@ -2242,6 +2262,13 @@ struct ArchCPU {
*/
bool legacy_cache;
+ /*
+ * Compatibility bits for old machine types.
+ * If true, use the same cache model in CPUID leaf 0x2
+ * and 0x4.
+ */
+ bool consistent_cache;
+
/* Compatibility bits for old machine types.
* If true decode the CPUID Function 0x8000001E_ECX to support multiple
* nodes per processor
@@ -2251,12 +2278,24 @@ struct ArchCPU {
/* Compatibility bits for old machine types: */
bool enable_cpuid_0xb;
+ /* Force to enable cpuid 0x1f */
+ bool force_cpuid_0x1f;
+
/* Enable auto level-increase for all CPUID leaves */
bool full_cpuid_auto_level;
- /* Only advertise CPUID leaves defined by the vendor */
+ /*
+ * Compatibility bits for old machine types (PC machine v6.0 and older).
+ * Only advertise CPUID leaves defined by the vendor.
+ */
bool vendor_cpuid_only;
+ /*
+ * Compatibility bits for old machine types (PC machine v10.0 and older).
+ * Only advertise CPUID leaves defined by the vendor.
+ */
+ bool vendor_cpuid_only_v2;
+
/* Only advertise TOPOEXT features that AMD defines */
bool amd_topoext_features_only;
@@ -2329,6 +2368,7 @@ struct X86CPUClass {
*/
const X86CPUModel *model;
+ bool max_features; /* Enable all supported features automatically */
bool host_cpuid_required;
int ordering;
bool migration_safe;
@@ -2367,7 +2407,6 @@ int x86_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void x86_cpu_gdb_init(CPUState *cs);
-void x86_cpu_list(void);
int cpu_x86_support_mca_broadcast(CPUX86State *env);
#ifndef CONFIG_USER_ONLY
@@ -2398,7 +2437,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
SegmentCache *sc;
unsigned int new_hflags;
- sc = &env->segs[seg_reg];
+ if (seg_reg == R_LDTR) {
+ sc = &env->ldt;
+ } else if (seg_reg == R_TR) {
+ sc = &env->tr;
+ } else {
+ sc = &env->segs[seg_reg];
+ }
+
sc->selector = selector;
sc->base = base;
sc->limit = limit;
@@ -2512,6 +2558,17 @@ void cpu_set_apic_feature(CPUX86State *env);
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
bool cpu_has_x2apic_feature(CPUX86State *env);
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg);
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
+
+static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
+{
+ return cpu->force_cpuid_0x1f ||
+ x86_has_extended_topo(cpu->env.avail_cpu_topo);
+}
/* helper.c */
void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
@@ -2561,8 +2618,6 @@ uint64_t cpu_get_tsc(CPUX86State *env);
#define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("qemu32")
#endif
-#define cpu_list x86_cpu_list
-
/* MMU modes definitions */
#define MMU_KSMAP64_IDX 0
#define MMU_KSMAP32_IDX 1
@@ -2597,35 +2652,17 @@ static inline bool is_mmu_index_32(int mmu_index)
return mmu_index & 1;
}
-int x86_mmu_index_pl(CPUX86State *env, unsigned pl);
-int cpu_mmu_index_kernel(CPUX86State *env);
-
#define CC_DST (env->cc_dst)
#define CC_SRC (env->cc_src)
#define CC_SRC2 (env->cc_src2)
#define CC_OP (env->cc_op)
-#include "exec/cpu-all.h"
#include "svm.h"
#if !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
-static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc,
- uint64_t *cs_base, uint32_t *flags)
-{
- *flags = env->hflags |
- (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
- if (env->hflags & HF_CS64_MASK) {
- *cs_base = 0;
- *pc = env->eip;
- } else {
- *cs_base = env->segs[R_CS].base;
- *pc = (uint32_t)(*cs_base + env->eip);
- }
-}
-
void do_cpu_init(X86CPU *cpu);
#define MCE_INJECT_BROADCAST 1
@@ -2660,6 +2697,36 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env)
}
}
+static inline uint32_t x86_cpu_family(uint32_t eax)
+{
+ uint32_t family = (eax >> 8) & 0xf;
+
+ if (family == 0xf) {
+ family += (eax >> 20) & 0xff;
+ }
+
+ return family;
+}
+
+static inline uint32_t x86_cpu_model(uint32_t eax)
+{
+ uint32_t family, model;
+
+ family = x86_cpu_family(eax);
+ model = (eax >> 4) & 0xf;
+
+ if (family >= 0x6) {
+ model += ((eax >> 16) & 0xf) << 4;
+ }
+
+ return model;
+}
+
+static inline uint32_t x86_cpu_stepping(uint32_t eax)
+{
+ return eax & 0xf;
+}
+
static inline bool cpu_has_vmx(CPUX86State *env)
{
return env->features[FEAT_1_ECX] & CPUID_EXT_VMX;
diff --git a/target/i386/emulate/x86_decode.c b/target/i386/emulate/x86_decode.c
index 7fee219..2eca398 100644
--- a/target/i386/emulate/x86_decode.c
+++ b/target/i386/emulate/x86_decode.c
@@ -26,7 +26,7 @@
static void decode_invalid(CPUX86State *env, struct x86_decode *decode)
{
- printf("%llx: failed to decode instruction ", env->eip);
+ printf(TARGET_FMT_lx ": failed to decode instruction ", env->eip);
for (int i = 0; i < decode->opcode_len; i++) {
printf("%x ", decode->opcode[i]);
}
@@ -109,8 +109,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode,
{
op->type = X86_VAR_REG;
op->reg = decode->modrm.reg;
- op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r,
- decode->operand_size);
+ op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r,
+ decode->operand_size);
}
static void decode_rax(CPUX86State *env, struct x86_decode *decode,
@@ -119,8 +119,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode,
op->type = X86_VAR_REG;
op->reg = R_EAX;
/* Since reg is always AX, REX prefix has no impact. */
- op->ptr = get_reg_ref(env, op->reg, false, 0,
- decode->operand_size);
+ op->regptr = get_reg_ref(env, op->reg, false, 0,
+ decode->operand_size);
}
static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode,
@@ -262,16 +262,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0x40;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_decgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0x48;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode)
@@ -287,16 +287,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0x50;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_popgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0x58;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_jxx(CPUX86State *env, struct x86_decode *decode)
@@ -377,16 +377,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0x90;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_movgroup(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0xb8;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
decode_immediate(env, decode, &decode->op[1], decode->operand_size);
}
@@ -394,15 +394,15 @@ static void fetch_moffs(CPUX86State *env, struct x86_decode *decode,
struct x86_decode_op *op)
{
op->type = X86_VAR_OFFSET;
- op->ptr = decode_bytes(env, decode, decode->addressing_size);
+ op->addr = decode_bytes(env, decode, decode->addressing_size);
}
static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[0] - 0xb0;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
decode_immediate(env, decode, &decode->op[1], decode->operand_size);
}
@@ -411,8 +411,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode,
{
op->type = X86_VAR_REG;
op->reg = R_ECX;
- op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b,
- decode->operand_size);
+ op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b,
+ decode->operand_size);
}
struct decode_tbl {
@@ -631,8 +631,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = decode->opcode[1] - 0xc8;
- decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
}
static void decode_d9_4(CPUX86State *env, struct x86_decode *decode)
@@ -1408,7 +1408,7 @@ struct decode_tbl _2op_inst[] = {
};
struct decode_x87_tbl invl_inst_x87 = {0x0, 0, 0, 0, 0, false, false, NULL,
- NULL, decode_invalid, 0};
+ NULL, decode_invalid};
struct decode_x87_tbl _x87_inst[] = {
{0xd8, 0, 3, X86_DECODE_CMD_FADD, 10, false, false,
@@ -1456,8 +1456,7 @@ struct decode_x87_tbl _x87_inst[] = {
decode_x87_modrm_st0, NULL, decode_d9_4},
{0xd9, 4, 0, X86_DECODE_CMD_INVL, 4, false, false,
decode_x87_modrm_bytep, NULL, NULL},
- {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL,
- RFLAGS_MASK_NONE},
+ {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL},
{0xd9, 5, 0, X86_DECODE_CMD_FLDCW, 2, false, false,
decode_x87_modrm_bytep, NULL, NULL},
@@ -1478,20 +1477,17 @@ struct decode_x87_tbl _x87_inst[] = {
decode_x87_modrm_st0, NULL},
{0xda, 3, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0,
decode_x87_modrm_st0, NULL},
- {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL,
- RFLAGS_MASK_NONE},
+ {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL},
{0xda, 4, 0, X86_DECODE_CMD_FSUB, 4, false, false, decode_x87_modrm_st0,
decode_x87_modrm_intp, NULL},
{0xda, 5, 3, X86_DECODE_CMD_FUCOM, 10, false, true, decode_x87_modrm_st0,
decode_decode_x87_modrm_st0, NULL},
{0xda, 5, 0, X86_DECODE_CMD_FSUB, 4, true, false, decode_x87_modrm_st0,
decode_x87_modrm_intp, NULL},
- {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL,
- RFLAGS_MASK_NONE},
+ {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL},
{0xda, 6, 0, X86_DECODE_CMD_FDIV, 4, false, false, decode_x87_modrm_st0,
decode_x87_modrm_intp, NULL},
- {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL,
- RFLAGS_MASK_NONE},
+ {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL},
{0xda, 7, 0, X86_DECODE_CMD_FDIV, 4, true, false, decode_x87_modrm_st0,
decode_x87_modrm_intp, NULL},
@@ -1511,8 +1507,7 @@ struct decode_x87_tbl _x87_inst[] = {
decode_x87_modrm_intp, NULL, NULL},
{0xdb, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL,
decode_db_4},
- {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL,
- RFLAGS_MASK_NONE},
+ {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL},
{0xdb, 5, 3, X86_DECODE_CMD_FUCOMI, 10, false, false,
decode_x87_modrm_st0, decode_x87_modrm_st0, NULL},
{0xdb, 5, 0, X86_DECODE_CMD_FLD, 10, false, false,
@@ -1661,16 +1656,16 @@ void calc_modrm_operand16(CPUX86State *env, struct x86_decode *decode,
}
calc_addr:
if (X86_DECODE_CMD_LEA == decode->cmd) {
- op->ptr = (uint16_t)ptr;
+ op->addr = (uint16_t)ptr;
} else {
- op->ptr = decode_linear_addr(env, decode, (uint16_t)ptr, seg);
+ op->addr = decode_linear_addr(env, decode, (uint16_t)ptr, seg);
}
}
-target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
+void *get_reg_ref(CPUX86State *env, int reg, int rex_present,
int is_extended, int size)
{
- target_ulong ptr = 0;
+ void *ptr = NULL;
if (is_extended) {
reg |= R_R8;
@@ -1679,13 +1674,13 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
switch (size) {
case 1:
if (is_extended || reg < 4 || rex_present) {
- ptr = (target_ulong)&RL(env, reg);
+ ptr = &RL(env, reg);
} else {
- ptr = (target_ulong)&RH(env, reg - 4);
+ ptr = &RH(env, reg - 4);
}
break;
default:
- ptr = (target_ulong)&RRX(env, reg);
+ ptr = &RRX(env, reg);
break;
}
return ptr;
@@ -1696,7 +1691,7 @@ target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
{
target_ulong val = 0;
memcpy(&val,
- (void *)get_reg_ref(env, reg, rex_present, is_extended, size),
+ get_reg_ref(env, reg, rex_present, is_extended, size),
size);
return val;
}
@@ -1763,9 +1758,9 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode,
}
if (X86_DECODE_CMD_LEA == decode->cmd) {
- op->ptr = (uint32_t)ptr;
+ op->addr = (uint32_t)ptr;
} else {
- op->ptr = decode_linear_addr(env, decode, (uint32_t)ptr, seg);
+ op->addr = decode_linear_addr(env, decode, (uint32_t)ptr, seg);
}
}
@@ -1793,9 +1788,9 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode,
}
if (X86_DECODE_CMD_LEA == decode->cmd) {
- op->ptr = ptr;
+ op->addr = ptr;
} else {
- op->ptr = decode_linear_addr(env, decode, ptr, seg);
+ op->addr = decode_linear_addr(env, decode, ptr, seg);
}
}
@@ -1806,8 +1801,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
if (3 == decode->modrm.mod) {
op->reg = decode->modrm.reg;
op->type = X86_VAR_REG;
- op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex,
- decode->rex.b, decode->operand_size);
+ op->regptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex,
+ decode->rex.b, decode->operand_size);
return;
}
diff --git a/target/i386/emulate/x86_decode.h b/target/i386/emulate/x86_decode.h
index 87cc728..927645a 100644
--- a/target/i386/emulate/x86_decode.h
+++ b/target/i386/emulate/x86_decode.h
@@ -266,7 +266,10 @@ typedef struct x86_decode_op {
int reg;
target_ulong val;
- target_ulong ptr;
+ union {
+ target_ulong addr;
+ void *regptr;
+ };
} x86_decode_op;
typedef struct x86_decode {
@@ -301,8 +304,8 @@ uint64_t sign(uint64_t val, int size);
uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
-target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
- int is_extended, int size);
+void *get_reg_ref(CPUX86State *env, int reg, int rex_present,
+ int is_extended, int size);
target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
int is_extended, int size);
void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
diff --git a/target/i386/emulate/x86_emu.c b/target/i386/emulate/x86_emu.c
index 26a4876..db7a7f7 100644
--- a/target/i386/emulate/x86_emu.c
+++ b/target/i386/emulate/x86_emu.c
@@ -31,8 +31,8 @@
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
+// License along with this library; if not, see
+// <https://www.gnu.org/licenses/>.
/////////////////////////////////////////////////////////////////////////
#include "qemu/osdep.h"
@@ -52,7 +52,7 @@
uint8_t v2 = (uint8_t)decode->op[1].val; \
uint8_t diff = v1 cmd v2; \
if (save_res) { \
- write_val_ext(env, decode->op[0].ptr, diff, 1); \
+ write_val_ext(env, &decode->op[0], diff, 1); \
} \
FLAGS_FUNC##8(env, v1, v2, diff); \
break; \
@@ -63,7 +63,7 @@
uint16_t v2 = (uint16_t)decode->op[1].val; \
uint16_t diff = v1 cmd v2; \
if (save_res) { \
- write_val_ext(env, decode->op[0].ptr, diff, 2); \
+ write_val_ext(env, &decode->op[0], diff, 2); \
} \
FLAGS_FUNC##16(env, v1, v2, diff); \
break; \
@@ -74,7 +74,7 @@
uint32_t v2 = (uint32_t)decode->op[1].val; \
uint32_t diff = v1 cmd v2; \
if (save_res) { \
- write_val_ext(env, decode->op[0].ptr, diff, 4); \
+ write_val_ext(env, &decode->op[0], diff, 4); \
} \
FLAGS_FUNC##32(env, v1, v2, diff); \
break; \
@@ -121,7 +121,7 @@ void write_reg(CPUX86State *env, int reg, target_ulong val, int size)
}
}
-target_ulong read_val_from_reg(target_ulong reg_ptr, int size)
+target_ulong read_val_from_reg(void *reg_ptr, int size)
{
target_ulong val;
@@ -144,7 +144,7 @@ target_ulong read_val_from_reg(target_ulong reg_ptr, int size)
return val;
}
-void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size)
+void write_val_to_reg(void *reg_ptr, target_ulong val, int size)
{
switch (size) {
case 1:
@@ -164,18 +164,18 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size)
}
}
-static bool is_host_reg(CPUX86State *env, target_ulong ptr)
+static void write_val_to_mem(CPUX86State *env, target_ulong ptr, target_ulong val, int size)
{
- return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs);
+ emul_ops->write_mem(env_cpu(env), &val, ptr, size);
}
-void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size)
+void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size)
{
- if (is_host_reg(env, ptr)) {
- write_val_to_reg(ptr, val, size);
- return;
+ if (decode->type == X86_VAR_REG) {
+ write_val_to_reg(decode->regptr, val, size);
+ } else {
+ write_val_to_mem(env, decode->addr, val, size);
}
- emul_ops->write_mem(env_cpu(env), &val, ptr, size);
}
uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes)
@@ -185,15 +185,11 @@ uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes)
}
-target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size)
+static target_ulong read_val_from_mem(CPUX86State *env, target_long ptr, int size)
{
target_ulong val;
uint8_t *mmio_ptr;
- if (is_host_reg(env, ptr)) {
- return read_val_from_reg(ptr, size);
- }
-
mmio_ptr = read_mmio(env, ptr, size);
switch (size) {
case 1:
@@ -215,6 +211,15 @@ target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size)
return val;
}
+target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size)
+{
+ if (decode->type == X86_VAR_REG) {
+ return read_val_from_reg(decode->regptr, size);
+ } else {
+ return read_val_from_mem(env, decode->addr, size);
+ }
+}
+
static void fetch_operands(CPUX86State *env, struct x86_decode *decode,
int n, bool val_op0, bool val_op1, bool val_op2)
{
@@ -226,25 +231,25 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode,
case X86_VAR_IMMEDIATE:
break;
case X86_VAR_REG:
- VM_PANIC_ON(!decode->op[i].ptr);
+ VM_PANIC_ON(!decode->op[i].regptr);
if (calc_val[i]) {
- decode->op[i].val = read_val_from_reg(decode->op[i].ptr,
+ decode->op[i].val = read_val_from_reg(decode->op[i].regptr,
decode->operand_size);
}
break;
case X86_VAR_RM:
calc_modrm_operand(env, decode, &decode->op[i]);
if (calc_val[i]) {
- decode->op[i].val = read_val_ext(env, decode->op[i].ptr,
+ decode->op[i].val = read_val_ext(env, &decode->op[i],
decode->operand_size);
}
break;
case X86_VAR_OFFSET:
- decode->op[i].ptr = decode_linear_addr(env, decode,
- decode->op[i].ptr,
- R_DS);
+ decode->op[i].addr = decode_linear_addr(env, decode,
+ decode->op[i].addr,
+ R_DS);
if (calc_val[i]) {
- decode->op[i].val = read_val_ext(env, decode->op[i].ptr,
+ decode->op[i].val = read_val_ext(env, &decode->op[i],
decode->operand_size);
}
break;
@@ -257,7 +262,7 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode,
static void exec_mov(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 2, false, true, false);
- write_val_ext(env, decode->op[0].ptr, decode->op[1].val,
+ write_val_ext(env, &decode->op[0], decode->op[1].val,
decode->operand_size);
env->eip += decode->len;
@@ -312,7 +317,7 @@ static void exec_neg(CPUX86State *env, struct x86_decode *decode)
fetch_operands(env, decode, 2, true, true, false);
val = 0 - sign(decode->op[1].val, decode->operand_size);
- write_val_ext(env, decode->op[1].ptr, val, decode->operand_size);
+ write_val_ext(env, &decode->op[1], val, decode->operand_size);
if (4 == decode->operand_size) {
SET_FLAGS_OSZAPC_SUB32(env, 0, 0 - val, val);
@@ -363,7 +368,7 @@ static void exec_not(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 1, true, false, false);
- write_val_ext(env, decode->op[0].ptr, ~decode->op[0].val,
+ write_val_ext(env, &decode->op[0], ~decode->op[0].val,
decode->operand_size);
env->eip += decode->len;
}
@@ -382,8 +387,8 @@ void exec_movzx(CPUX86State *env, struct x86_decode *decode)
}
decode->operand_size = src_op_size;
calc_modrm_operand(env, decode, &decode->op[1]);
- decode->op[1].val = read_val_ext(env, decode->op[1].ptr, src_op_size);
- write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size);
+ decode->op[1].val = read_val_ext(env, &decode->op[1], src_op_size);
+ write_val_ext(env, &decode->op[0], decode->op[1].val, op_size);
env->eip += decode->len;
}
@@ -469,10 +474,10 @@ static inline void string_rep(CPUX86State *env, struct x86_decode *decode,
while (rcx--) {
func(env, decode);
write_reg(env, R_ECX, rcx, decode->addressing_size);
- if ((PREFIX_REP == rep) && !get_ZF(env)) {
+ if ((PREFIX_REP == rep) && !env->cc_dst) {
break;
}
- if ((PREFIX_REPN == rep) && get_ZF(env)) {
+ if ((PREFIX_REPN == rep) && env->cc_dst) {
break;
}
}
@@ -535,8 +540,8 @@ static void exec_movs_single(CPUX86State *env, struct x86_decode *decode)
dst_addr = linear_addr_size(env_cpu(env), RDI(env),
decode->addressing_size, R_ES);
- val = read_val_ext(env, src_addr, decode->operand_size);
- write_val_ext(env, dst_addr, val, decode->operand_size);
+ val = read_val_from_mem(env, src_addr, decode->operand_size);
+ write_val_to_mem(env, dst_addr, val, decode->operand_size);
string_increment_reg(env, R_ESI, decode);
string_increment_reg(env, R_EDI, decode);
@@ -563,9 +568,9 @@ static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode)
decode->addressing_size, R_ES);
decode->op[0].type = X86_VAR_IMMEDIATE;
- decode->op[0].val = read_val_ext(env, src_addr, decode->operand_size);
+ decode->op[0].val = read_val_from_mem(env, src_addr, decode->operand_size);
decode->op[1].type = X86_VAR_IMMEDIATE;
- decode->op[1].val = read_val_ext(env, dst_addr, decode->operand_size);
+ decode->op[1].val = read_val_from_mem(env, dst_addr, decode->operand_size);
EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);
@@ -697,15 +702,15 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag)
if (decode->op[0].type != X86_VAR_REG) {
if (4 == decode->operand_size) {
displacement = ((int32_t) (decode->op[1].val & 0xffffffe0)) / 32;
- decode->op[0].ptr += 4 * displacement;
+ decode->op[0].addr += 4 * displacement;
} else if (2 == decode->operand_size) {
displacement = ((int16_t) (decode->op[1].val & 0xfff0)) / 16;
- decode->op[0].ptr += 2 * displacement;
+ decode->op[0].addr += 2 * displacement;
} else {
VM_PANIC("bt 64bit\n");
}
}
- decode->op[0].val = read_val_ext(env, decode->op[0].ptr,
+ decode->op[0].val = read_val_ext(env, &decode->op[0],
decode->operand_size);
cf = (decode->op[0].val >> index) & 0x01;
@@ -723,7 +728,7 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag)
decode->op[0].val &= ~(1u << index);
break;
}
- write_val_ext(env, decode->op[0].ptr, decode->op[0].val,
+ write_val_ext(env, &decode->op[0], decode->op[0].val,
decode->operand_size);
set_CF(env, cf);
}
@@ -775,7 +780,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode)
of = cf ^ (res >> 7);
}
- write_val_ext(env, decode->op[0].ptr, res, 1);
+ write_val_ext(env, &decode->op[0], res, 1);
SET_FLAGS_OSZAPC_LOGIC8(env, 0, 0, res);
SET_FLAGS_OxxxxC(env, of, cf);
break;
@@ -791,7 +796,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode)
of = cf ^ (res >> 15); /* of = cf ^ result15 */
}
- write_val_ext(env, decode->op[0].ptr, res, 2);
+ write_val_ext(env, &decode->op[0], res, 2);
SET_FLAGS_OSZAPC_LOGIC16(env, 0, 0, res);
SET_FLAGS_OxxxxC(env, of, cf);
break;
@@ -800,7 +805,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode)
{
uint32_t res = decode->op[0].val << count;
- write_val_ext(env, decode->op[0].ptr, res, 4);
+ write_val_ext(env, &decode->op[0], res, 4);
SET_FLAGS_OSZAPC_LOGIC32(env, 0, 0, res);
cf = (decode->op[0].val >> (32 - count)) & 0x1;
of = cf ^ (res >> 31); /* of = cf ^ result31 */
@@ -831,10 +836,10 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode)
decode->operand_size = src_op_size;
calc_modrm_operand(env, decode, &decode->op[1]);
- decode->op[1].val = sign(read_val_ext(env, decode->op[1].ptr, src_op_size),
+ decode->op[1].val = sign(read_val_ext(env, &decode->op[1], src_op_size),
src_op_size);
- write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size);
+ write_val_ext(env, &decode->op[0], decode->op[1].val, op_size);
env->eip += decode->len;
}
@@ -862,7 +867,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode)
count &= 0x7; /* use only bottom 3 bits */
res = ((uint8_t)decode->op[0].val >> count) |
((uint8_t)decode->op[0].val << (8 - count));
- write_val_ext(env, decode->op[0].ptr, res, 1);
+ write_val_ext(env, &decode->op[0], res, 1);
bit6 = (res >> 6) & 1;
bit7 = (res >> 7) & 1;
/* set eflags: ROR count affects the following flags: C, O */
@@ -886,7 +891,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode)
count &= 0x0f; /* use only 4 LSB's */
res = ((uint16_t)decode->op[0].val >> count) |
((uint16_t)decode->op[0].val << (16 - count));
- write_val_ext(env, decode->op[0].ptr, res, 2);
+ write_val_ext(env, &decode->op[0], res, 2);
bit14 = (res >> 14) & 1;
bit15 = (res >> 15) & 1;
@@ -904,7 +909,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode)
if (count) {
res = ((uint32_t)decode->op[0].val >> count) |
((uint32_t)decode->op[0].val << (32 - count));
- write_val_ext(env, decode->op[0].ptr, res, 4);
+ write_val_ext(env, &decode->op[0], res, 4);
bit31 = (res >> 31) & 1;
bit30 = (res >> 30) & 1;
@@ -941,7 +946,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode)
res = ((uint8_t)decode->op[0].val << count) |
((uint8_t)decode->op[0].val >> (8 - count));
- write_val_ext(env, decode->op[0].ptr, res, 1);
+ write_val_ext(env, &decode->op[0], res, 1);
/* set eflags:
* ROL count affects the following flags: C, O
*/
@@ -968,7 +973,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode)
res = ((uint16_t)decode->op[0].val << count) |
((uint16_t)decode->op[0].val >> (16 - count));
- write_val_ext(env, decode->op[0].ptr, res, 2);
+ write_val_ext(env, &decode->op[0], res, 2);
bit0 = (res & 0x1);
bit15 = (res >> 15);
/* of = cf ^ result15 */
@@ -986,7 +991,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode)
res = ((uint32_t)decode->op[0].val << count) |
((uint32_t)decode->op[0].val >> (32 - count));
- write_val_ext(env, decode->op[0].ptr, res, 4);
+ write_val_ext(env, &decode->op[0], res, 4);
bit0 = (res & 0x1);
bit31 = (res >> 31);
/* of = cf ^ result31 */
@@ -1024,7 +1029,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode)
(op1_8 >> (9 - count));
}
- write_val_ext(env, decode->op[0].ptr, res, 1);
+ write_val_ext(env, &decode->op[0], res, 1);
cf = (op1_8 >> (8 - count)) & 0x01;
of = cf ^ (res >> 7); /* of = cf ^ result7 */
@@ -1050,7 +1055,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode)
(op1_16 >> (17 - count));
}
- write_val_ext(env, decode->op[0].ptr, res, 2);
+ write_val_ext(env, &decode->op[0], res, 2);
cf = (op1_16 >> (16 - count)) & 0x1;
of = cf ^ (res >> 15); /* of = cf ^ result15 */
@@ -1073,7 +1078,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode)
(op1_32 >> (33 - count));
}
- write_val_ext(env, decode->op[0].ptr, res, 4);
+ write_val_ext(env, &decode->op[0], res, 4);
cf = (op1_32 >> (32 - count)) & 0x1;
of = cf ^ (res >> 31); /* of = cf ^ result31 */
@@ -1105,7 +1110,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode)
res = (op1_8 >> count) | (get_CF(env) << (8 - count)) |
(op1_8 << (9 - count));
- write_val_ext(env, decode->op[0].ptr, res, 1);
+ write_val_ext(env, &decode->op[0], res, 1);
cf = (op1_8 >> (count - 1)) & 0x1;
of = (((res << 1) ^ res) >> 7) & 0x1; /* of = result6 ^ result7 */
@@ -1124,7 +1129,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode)
res = (op1_16 >> count) | (get_CF(env) << (16 - count)) |
(op1_16 << (17 - count));
- write_val_ext(env, decode->op[0].ptr, res, 2);
+ write_val_ext(env, &decode->op[0], res, 2);
cf = (op1_16 >> (count - 1)) & 0x1;
of = ((uint16_t)((res << 1) ^ res) >> 15) & 0x1; /* of = result15 ^
@@ -1148,7 +1153,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode)
(op1_32 << (33 - count));
}
- write_val_ext(env, decode->op[0].ptr, res, 4);
+ write_val_ext(env, &decode->op[0], res, 4);
cf = (op1_32 >> (count - 1)) & 0x1;
of = ((res << 1) ^ res) >> 31; /* of = result30 ^ result31 */
@@ -1163,9 +1168,9 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 2, true, true, false);
- write_val_ext(env, decode->op[0].ptr, decode->op[1].val,
+ write_val_ext(env, &decode->op[0], decode->op[1].val,
decode->operand_size);
- write_val_ext(env, decode->op[1].ptr, decode->op[0].val,
+ write_val_ext(env, &decode->op[1], decode->op[0].val,
decode->operand_size);
env->eip += decode->len;
@@ -1174,7 +1179,7 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode)
static void exec_xadd(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true);
- write_val_ext(env, decode->op[1].ptr, decode->op[0].val,
+ write_val_ext(env, &decode->op[1], decode->op[0].val,
decode->operand_size);
env->eip += decode->len;
@@ -1241,7 +1246,7 @@ static void init_cmd_handler(void)
bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
{
if (!_cmd_handler[ins->cmd].handler) {
- printf("Unimplemented handler (%llx) for %d (%x %x) \n", env->eip,
+ printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n", env->eip,
ins->cmd, ins->opcode[0],
ins->opcode_len > 1 ? ins->opcode[1] : 0);
env->eip += ins->len;
diff --git a/target/i386/emulate/x86_emu.h b/target/i386/emulate/x86_emu.h
index 555b567..a1a9612 100644
--- a/target/i386/emulate/x86_emu.h
+++ b/target/i386/emulate/x86_emu.h
@@ -42,11 +42,11 @@ void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_c
target_ulong read_reg(CPUX86State *env, int reg, int size);
void write_reg(CPUX86State *env, int reg, target_ulong val, int size);
-target_ulong read_val_from_reg(target_ulong reg_ptr, int size);
-void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size);
-void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size);
+target_ulong read_val_from_reg(void *reg_ptr, int size);
+void write_val_to_reg(void *reg_ptr, target_ulong val, int size);
+void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size);
uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes);
-target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size);
+target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size);
void exec_movzx(CPUX86State *env, struct x86_decode *decode);
void exec_shl(CPUX86State *env, struct x86_decode *decode);
diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c
index 84e2736..6592193 100644
--- a/target/i386/emulate/x86_flags.c
+++ b/target/i386/emulate/x86_flags.c
@@ -14,8 +14,8 @@
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
+// License along with this library; if not, see
+// <https://www.gnu.org/licenses/>.
/////////////////////////////////////////////////////////////////////////
/*
* flags functions
@@ -29,41 +29,50 @@
#include "x86.h"
-/* this is basically bocsh code */
+/*
+ * The algorithms here are similar to those in Bochs. After an ALU
+ * operation, CC_DST can be used to compute ZF, SF and PF, whereas
+ * CC_SRC is used to compute AF, CF and OF. In reality, SF and PF are the
+ * XOR of the value computed from CC_DST and the value found in bits 7 and 2
+ * of CC_SRC; this way the same logic can be used to compute the flags
+ * both before and after an ALU operation.
+ *
+ * Compared to the TCG CC_OP codes, this avoids conditionals when converting
+ * to and from the RFLAGS representation.
+ */
-#define LF_SIGN_BIT 31
+#define LF_SIGN_BIT (TARGET_LONG_BITS - 1)
-#define LF_BIT_SD (0) /* lazy Sign Flag Delta */
-#define LF_BIT_AF (3) /* lazy Adjust flag */
-#define LF_BIT_PDB (8) /* lazy Parity Delta Byte (8 bits) */
-#define LF_BIT_CF (31) /* lazy Carry Flag */
-#define LF_BIT_PO (30) /* lazy Partial Overflow = CF ^ OF */
+#define LF_BIT_PD (2) /* lazy Parity Delta, same bit as PF */
+#define LF_BIT_AF (3) /* lazy Adjust flag */
+#define LF_BIT_SD (7) /* lazy Sign Flag Delta, same bit as SF */
+#define LF_BIT_CF (TARGET_LONG_BITS - 1) /* lazy Carry Flag */
+#define LF_BIT_PO (TARGET_LONG_BITS - 2) /* lazy Partial Overflow = CF ^ OF */
-#define LF_MASK_SD (0x01 << LF_BIT_SD)
-#define LF_MASK_AF (0x01 << LF_BIT_AF)
-#define LF_MASK_PDB (0xFF << LF_BIT_PDB)
-#define LF_MASK_CF (0x01 << LF_BIT_CF)
-#define LF_MASK_PO (0x01 << LF_BIT_PO)
+#define LF_MASK_PD ((target_ulong)0x01 << LF_BIT_PD)
+#define LF_MASK_AF ((target_ulong)0x01 << LF_BIT_AF)
+#define LF_MASK_SD ((target_ulong)0x01 << LF_BIT_SD)
+#define LF_MASK_CF ((target_ulong)0x01 << LF_BIT_CF)
+#define LF_MASK_PO ((target_ulong)0x01 << LF_BIT_PO)
/* ******************* */
/* OSZAPC */
/* ******************* */
-/* size, carries, result */
+/* use carries to fill in AF, PO and CF, while ensuring PD and SD are clear.
+ * for full-word operations just clear PD and SD; for smaller operand
+ * sizes only keep AF in the low byte and shift the carries left to
+ * place PO and CF in the top two bits.
+ */
#define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \
- target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \
- (((lf_carries) >> (size - 2)) << LF_BIT_PO); \
- env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \
- if ((size) == 32) { \
- temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \
- } else if ((size) == 16) { \
- temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \
- } else if ((size) == 8) { \
- temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \
+ env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \
+ target_ulong temp = (lf_carries); \
+ if ((size) == TARGET_LONG_BITS) { \
+ temp = temp & ~(LF_MASK_PD | LF_MASK_SD); \
} else { \
- VM_PANIC("unimplemented"); \
+ temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \
} \
- env->lflags.auxbits = (target_ulong)(uint32_t)temp; \
+ env->cc_src = temp; \
}
/* carries, result */
@@ -77,23 +86,18 @@
/* ******************* */
/* OSZAP */
/* ******************* */
-/* size, carries, result */
+/* same as setting OSZAPC, but preserve CF and flip PO if the old value of CF
+ * did not match the high bit of lf_carries. */
#define SET_FLAGS_OSZAP_SIZE(size, lf_carries, lf_result) { \
- target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \
- (((lf_carries) >> (size - 2)) << LF_BIT_PO); \
- if ((size) == 32) { \
- temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \
- } else if ((size) == 16) { \
- temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \
- } else if ((size) == 8) { \
- temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \
+ env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \
+ target_ulong temp = (lf_carries); \
+ if ((size) == TARGET_LONG_BITS) { \
+ temp = (temp & ~(LF_MASK_PD | LF_MASK_SD)); \
} else { \
- VM_PANIC("unimplemented"); \
+ temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \
} \
- env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \
- target_ulong delta_c = (env->lflags.auxbits ^ temp) & LF_MASK_CF; \
- delta_c ^= (delta_c >> 1); \
- env->lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \
+ target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0; \
+ env->cc_src = temp ^ (cf_changed * (LF_MASK_PO | LF_MASK_CF)); \
}
/* carries, result */
@@ -104,11 +108,11 @@
#define SET_FLAGS_OSZAP_32(carries, result) \
SET_FLAGS_OSZAP_SIZE(32, carries, result)
-void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf)
+void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf)
{
- uint32_t temp_po = new_of ^ new_cf;
- env->lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF);
- env->lflags.auxbits |= (temp_po << LF_BIT_PO) | (new_cf << LF_BIT_CF);
+ env->cc_src &= ~(LF_MASK_PO | LF_MASK_CF);
+ env->cc_src |= (-(target_ulong)new_cf << LF_BIT_PO);
+ env->cc_src ^= ((target_ulong)new_of << LF_BIT_PO);
}
void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2,
@@ -202,104 +206,68 @@ void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2,
SET_FLAGS_OSZAPC_8(0, diff);
}
-bool get_PF(CPUX86State *env)
-{
- uint32_t temp = (255 & env->lflags.result);
- temp = temp ^ (255 & (env->lflags.auxbits >> LF_BIT_PDB));
- temp = (temp ^ (temp >> 4)) & 0x0F;
- return (0x9669U >> temp) & 1;
-}
-
-void set_PF(CPUX86State *env, bool val)
+static inline uint32_t get_PF(CPUX86State *env)
{
- uint32_t temp = (255 & env->lflags.result) ^ (!val);
- env->lflags.auxbits &= ~(LF_MASK_PDB);
- env->lflags.auxbits |= (temp << LF_BIT_PDB);
+ return ((parity8(env->cc_dst) - 1) ^ env->cc_src) & CC_P;
}
-bool get_OF(CPUX86State *env)
+static inline uint32_t get_OF(CPUX86State *env)
{
- return ((env->lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1;
+ return ((env->cc_src >> (LF_BIT_CF - 11)) + CC_O / 2) & CC_O;
}
bool get_CF(CPUX86State *env)
{
- return (env->lflags.auxbits >> LF_BIT_CF) & 1;
-}
-
-void set_OF(CPUX86State *env, bool val)
-{
- bool old_cf = get_CF(env);
- SET_FLAGS_OxxxxC(env, val, old_cf);
+ return ((target_long)env->cc_src) < 0;
}
void set_CF(CPUX86State *env, bool val)
{
- bool old_of = get_OF(env);
- SET_FLAGS_OxxxxC(env, old_of, val);
+ /* If CF changes, flip PO and CF */
+ target_ulong temp = -(target_ulong)val;
+ target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0;
+ env->cc_src ^= cf_changed * (LF_MASK_PO | LF_MASK_CF);
}
-bool get_AF(CPUX86State *env)
+static inline uint32_t get_ZF(CPUX86State *env)
{
- return (env->lflags.auxbits >> LF_BIT_AF) & 1;
+ return env->cc_dst ? 0 : CC_Z;
}
-void set_AF(CPUX86State *env, bool val)
+static inline uint32_t get_SF(CPUX86State *env)
{
- env->lflags.auxbits &= ~(LF_MASK_AF);
- env->lflags.auxbits |= val << LF_BIT_AF;
+ return ((env->cc_dst >> (LF_SIGN_BIT - LF_BIT_SD)) ^
+ env->cc_src) & CC_S;
}
-bool get_ZF(CPUX86State *env)
+void lflags_to_rflags(CPUX86State *env)
{
- return !env->lflags.result;
+ env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O);
+ /* rotate left by one to move carry-out bits into CF and AF */
+ env->eflags |= (
+ (env->cc_src << 1) |
+ (env->cc_src >> (TARGET_LONG_BITS - 1))) & (CC_C | CC_A);
+ env->eflags |= get_SF(env);
+ env->eflags |= get_PF(env);
+ env->eflags |= get_ZF(env);
+ env->eflags |= get_OF(env);
}
-void set_ZF(CPUX86State *env, bool val)
+void rflags_to_lflags(CPUX86State *env)
{
- if (val) {
- env->lflags.auxbits ^=
- (((env->lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD);
- /* merge the parity bits into the Parity Delta Byte */
- uint32_t temp_pdb = (255 & env->lflags.result);
- env->lflags.auxbits ^= (temp_pdb << LF_BIT_PDB);
- /* now zero the .result value */
- env->lflags.result = 0;
- } else {
- env->lflags.result |= (1 << 8);
- }
-}
+ target_ulong cf_af, cf_xor_of;
-bool get_SF(CPUX86State *env)
-{
- return ((env->lflags.result >> LF_SIGN_BIT) ^
- (env->lflags.auxbits >> LF_BIT_SD)) & 1;
-}
+ /* Leave the low byte zero so that parity is always even... */
+ env->cc_dst = !(env->eflags & CC_Z) << 8;
-void set_SF(CPUX86State *env, bool val)
-{
- bool temp_sf = get_SF(env);
- env->lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD;
-}
+ /* ... and therefore cc_src always uses opposite polarity. */
+ env->cc_src = CC_P;
+ env->cc_src ^= env->eflags & (CC_S | CC_P);
-void lflags_to_rflags(CPUX86State *env)
-{
- env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O);
- env->eflags |= get_CF(env) ? CC_C : 0;
- env->eflags |= get_PF(env) ? CC_P : 0;
- env->eflags |= get_AF(env) ? CC_A : 0;
- env->eflags |= get_ZF(env) ? CC_Z : 0;
- env->eflags |= get_SF(env) ? CC_S : 0;
- env->eflags |= get_OF(env) ? CC_O : 0;
-}
+ /* rotate right by one to move CF and AF into the carry-out positions */
+ cf_af = env->eflags & (CC_C | CC_A);
+ env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1)));
-void rflags_to_lflags(CPUX86State *env)
-{
- env->lflags.auxbits = env->lflags.result = 0;
- set_OF(env, env->eflags & CC_O);
- set_SF(env, env->eflags & CC_S);
- set_ZF(env, env->eflags & CC_Z);
- set_AF(env, env->eflags & CC_A);
- set_PF(env, env->eflags & CC_P);
- set_CF(env, env->eflags & CC_C);
+ cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O;
+ env->cc_src |= -cf_xor_of & LF_MASK_PO;
}
diff --git a/target/i386/emulate/x86_flags.h b/target/i386/emulate/x86_flags.h
index 6c17500..a395c83 100644
--- a/target/i386/emulate/x86_flags.h
+++ b/target/i386/emulate/x86_flags.h
@@ -14,8 +14,8 @@
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
+// License along with this library; if not, see
+// <https://www.gnu.org/licenses/>.
/////////////////////////////////////////////////////////////////////////
/*
* x86 eflags functions
@@ -28,20 +28,10 @@
void lflags_to_rflags(CPUX86State *env);
void rflags_to_lflags(CPUX86State *env);
-bool get_PF(CPUX86State *env);
-void set_PF(CPUX86State *env, bool val);
bool get_CF(CPUX86State *env);
void set_CF(CPUX86State *env, bool val);
-bool get_AF(CPUX86State *env);
-void set_AF(CPUX86State *env, bool val);
-bool get_ZF(CPUX86State *env);
-void set_ZF(CPUX86State *env, bool val);
-bool get_SF(CPUX86State *env);
-void set_SF(CPUX86State *env, bool val);
-bool get_OF(CPUX86State *env);
-void set_OF(CPUX86State *env, bool val);
-void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf);
+void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf);
void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2,
uint32_t diff);
diff --git a/target/i386/helper.c b/target/i386/helper.c
index c07b1b1..e0aaed3 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -22,9 +22,11 @@
#include "cpu.h"
#include "exec/cputlb.h"
#include "exec/translation-block.h"
+#include "exec/target_page.h"
#include "system/runstate.h"
#ifndef CONFIG_USER_ONLY
#include "system/hw_accel.h"
+#include "system/memory.h"
#include "monitor/monitor.h"
#include "kvm/kvm_i386.h"
#endif
@@ -524,7 +526,7 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
static inline target_ulong get_memio_eip(CPUX86State *env)
{
#ifdef CONFIG_TCG
- uint64_t data[TARGET_INSN_START_WORDS];
+ uint64_t data[INSN_START_WORDS];
CPUState *cs = env_cpu(env);
if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) {
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 072731a..d5e2bb5 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -15,7 +15,7 @@
#include "system/system.h"
/* Note: Only safe for use on x86(-64) hosts */
-static uint32_t host_cpu_phys_bits(void)
+uint32_t host_cpu_phys_bits(void)
{
uint32_t eax;
uint32_t host_phys_bits;
@@ -80,7 +80,6 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp)
return true;
}
-#define CPUID_MODEL_ID_SZ 48
/**
* cpu_x86_fill_model_id:
* Get CPUID model ID string from host CPU.
@@ -118,13 +117,13 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping)
host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
if (family) {
- *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+ *family = x86_cpu_family(eax);
}
if (model) {
- *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+ *model = x86_cpu_model(eax);
}
if (stepping) {
- *stepping = eax & 0x0F;
+ *stepping = x86_cpu_stepping(eax);
}
}
@@ -132,27 +131,27 @@ void host_cpu_instance_init(X86CPU *cpu)
{
X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
- if (xcc->model) {
- char vendor[CPUID_VENDOR_SZ + 1];
-
- host_cpu_vendor_fms(vendor, NULL, NULL, NULL);
- object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
- }
-}
-
-void host_cpu_max_instance_init(X86CPU *cpu)
-{
char vendor[CPUID_VENDOR_SZ + 1] = { 0 };
char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 };
int family, model, stepping;
- /* Use max host physical address bits if -cpu max option is applied */
- object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort);
-
+ /*
+ * setting vendor applies to both max/host and builtin_x86_defs CPU.
+ * FIXME: this probably should warn or should be skipped if vendors do
+ * not match, because family numbers are incompatible between Intel and AMD.
+ */
host_cpu_vendor_fms(vendor, &family, &model, &stepping);
+ object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
+
+ if (!xcc->max_features) {
+ return;
+ }
+
host_cpu_fill_model_id(model_id);
- object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
+ /* Use max host physical address bits if -cpu max option is applied */
+ object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort);
+
object_property_set_int(OBJECT(cpu), "family", family, &error_abort);
object_property_set_int(OBJECT(cpu), "model", model, &error_abort);
object_property_set_int(OBJECT(cpu), "stepping", stepping,
@@ -161,7 +160,16 @@ void host_cpu_max_instance_init(X86CPU *cpu)
&error_abort);
}
-static void host_cpu_class_init(ObjectClass *oc, void *data)
+bool is_host_cpu_intel(void)
+{
+ char vendor[CPUID_VENDOR_SZ + 1];
+
+ host_cpu_vendor_fms(vendor, NULL, NULL, NULL);
+
+ return g_str_equal(vendor, CPUID_VENDOR_INTEL);
+}
+
+static void host_cpu_class_init(ObjectClass *oc, const void *data)
{
X86CPUClass *xcc = X86_CPU_CLASS(oc);
diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h
index 6a9bc91..10df4b3 100644
--- a/target/i386/host-cpu.h
+++ b/target/i386/host-cpu.h
@@ -10,10 +10,12 @@
#ifndef HOST_CPU_H
#define HOST_CPU_H
+uint32_t host_cpu_phys_bits(void);
void host_cpu_instance_init(X86CPU *cpu);
void host_cpu_max_instance_init(X86CPU *cpu);
bool host_cpu_realizefn(CPUState *cs, Error **errp);
void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping);
+bool is_host_cpu_intel(void);
#endif /* HOST_CPU_H */
diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c
index b5f4c80..94ee096 100644
--- a/target/i386/hvf/hvf-cpu.c
+++ b/target/i386/hvf/hvf-cpu.c
@@ -21,8 +21,6 @@ static void hvf_cpu_max_instance_init(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
- host_cpu_max_instance_init(cpu);
-
env->cpuid_min_level =
hvf_get_supported_cpuid(0x0, 0, R_EAX);
env->cpuid_min_xlevel =
@@ -61,20 +59,21 @@ static void hvf_cpu_xsave_init(void)
static void hvf_cpu_instance_init(CPUState *cs)
{
X86CPU *cpu = X86_CPU(cs);
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
host_cpu_instance_init(cpu);
/* Special cases not set in the X86CPUDefinition structs: */
/* TODO: in-kernel irqchip for hvf */
- if (cpu->max_features) {
+ if (xcc->max_features) {
hvf_cpu_max_instance_init(cpu);
}
hvf_cpu_xsave_init();
}
-static void hvf_cpu_accel_class_init(ObjectClass *oc, void *data)
+static void hvf_cpu_accel_class_init(ObjectClass *oc, const void *data)
{
AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 23ebf25..818b504 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -76,6 +76,7 @@
#include "qemu/main-loop.h"
#include "qemu/accel.h"
#include "target/i386/cpu.h"
+#include "exec/target_page.h"
static Error *invtsc_mig_blocker;
@@ -732,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu)
}
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
hvf_put_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (hvf_inject_interrupts(cpu)) {
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 3c56afc..26d6029 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -33,7 +33,8 @@
#include "system/hvf.h"
#include "system/hvf_int.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
static inline uint64_t rreg(hv_vcpuid_t vcpu, hv_x86_reg_t reg)
{
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index fa131b1..0798a0c 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -73,7 +73,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX |
- CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS;
+ CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_HT;
ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID |
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE |
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 2057314..17fce1d 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs)
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
- if (!cs->accel->dirty) {
+ if (!cs->vcpu_dirty) {
/* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
}
diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c
index 70b89ca..9865120 100644
--- a/target/i386/kvm/hyperv.c
+++ b/target/i386/kvm/hyperv.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
+#include "exec/target_page.h"
#include "hyperv.h"
#include "hw/hyperv/hyperv.h"
#include "hyperv-proto.h"
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 6269fa8..89a7953 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -41,6 +41,7 @@ static void kvm_set_guest_phys_bits(CPUState *cs)
static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
{
X86CPU *cpu = X86_CPU(cs);
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
CPUX86State *env = &cpu->env;
bool ret;
@@ -63,7 +64,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
* check/update ucode_rev, phys_bits, guest_phys_bits, mwait
* cpu_common_realizefn() (via xcc->parent_realize)
*/
- if (cpu->max_features) {
+ if (xcc->max_features) {
if (enable_cpu_pm) {
if (kvm_has_waitpkg()) {
env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG;
@@ -72,7 +73,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) {
host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx,
&cpu->mwait.ecx, &cpu->mwait.edx);
- }
+ }
}
if (cpu->ucode_rev == 0) {
cpu->ucode_rev =
@@ -108,7 +109,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
CPUX86State *env = &cpu->env;
KVMState *s = kvm_state;
- host_cpu_max_instance_init(cpu);
+ object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort);
if (lmce_supported()) {
object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort);
@@ -216,14 +217,14 @@ static void kvm_cpu_instance_init(CPUState *cs)
x86_cpu_apply_props(cpu, kvm_default_props);
}
- if (cpu->max_features) {
+ if (xcc->max_features) {
kvm_cpu_max_instance_init(cpu);
}
kvm_cpu_xsave_init();
}
-static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data)
+static void kvm_cpu_accel_class_init(ObjectClass *oc, const void *data)
{
AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 6c749d4..e8c8be0 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -38,6 +38,7 @@
#include "kvm_i386.h"
#include "../confidential-guest.h"
#include "sev.h"
+#include "tdx.h"
#include "xen-emu.h"
#include "hyperv.h"
#include "hyperv-proto.h"
@@ -67,6 +68,7 @@
#include "hw/pci/msix.h"
#include "migration/blocker.h"
#include "exec/memattrs.h"
+#include "exec/target_page.h"
#include "trace.h"
#include CONFIG_DEVICES
@@ -191,6 +193,7 @@ static const char *vm_type_name[] = {
[KVM_X86_SEV_VM] = "SEV",
[KVM_X86_SEV_ES_VM] = "SEV-ES",
[KVM_X86_SNP_VM] = "SEV-SNP",
+ [KVM_X86_TDX_VM] = "TDX",
};
bool kvm_is_vm_type_supported(int type)
@@ -325,7 +328,7 @@ void kvm_synchronize_all_tsc(void)
{
CPUState *cpu;
- if (kvm_enabled()) {
+ if (kvm_enabled() && !is_tdx_vm()) {
CPU_FOREACH(cpu) {
run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
}
@@ -391,7 +394,7 @@ static bool host_tsx_broken(void)
/* Returns the value for a specific register on the cpuid entry
*/
-static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
{
uint32_t ret = 0;
switch (reg) {
@@ -413,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
/* Find matching entry for function/index on kvm_cpuid2 struct
*/
-static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
- uint32_t function,
- uint32_t index)
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index)
{
int i;
for (i = 0; i < cpuid->nent; ++i) {
@@ -571,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
}
if (current_machine->cgs) {
- ret = x86_confidential_guest_mask_cpuid_features(
+ ret = x86_confidential_guest_adjust_cpuid_features(
X86_CONFIDENTIAL_GUEST(current_machine->cgs),
function, index, reg, ret);
}
@@ -867,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
int r, cur_freq;
bool set_ioctl = false;
+ /*
+ * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
+ * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
+ * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
+ */
+ if (is_tdx_vm()) {
+ return 0;
+ }
+
if (!env->tsc_khz) {
return 0;
}
@@ -1778,8 +1790,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
static Error *invtsc_mig_blocker;
-#define KVM_MAX_CPUID_ENTRIES 100
-
static void kvm_init_xsave(CPUX86State *env)
{
if (has_xsave2) {
@@ -1822,9 +1832,8 @@ static void kvm_init_nested_state(CPUX86State *env)
}
}
-static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
- struct kvm_cpuid_entry2 *entries,
- uint32_t cpuid_i)
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i)
{
uint32_t limit, i, j;
uint32_t unused;
@@ -1863,7 +1872,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
break;
}
case 0x1f:
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+ if (!x86_has_cpuid_0x1f(env_archcpu(env))) {
cpuid_i--;
break;
}
@@ -2051,6 +2060,15 @@ full:
abort();
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ if (is_tdx_vm()) {
+ return tdx_pre_create_vcpu(cpu, errp);
+ }
+
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -2075,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
int r;
Error *local_err = NULL;
+ if (current_machine->cgs) {
+ r = x86_confidential_guest_check_features(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs);
+ if (r < 0) {
+ return r;
+ }
+ }
+
memset(&cpuid_data, 0, sizeof(cpuid_data));
cpuid_i = 0;
@@ -2233,7 +2259,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i);
cpuid_data.cpuid.nent = cpuid_i;
- if (((env->cpuid_version >> 8)&0xF) >= 6
+ if (x86_cpu_family(env->cpuid_version) >= 6
&& (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
(CPUID_MCE | CPUID_MCA)) {
uint64_t mcg_cap, unsupported_caps;
@@ -3205,16 +3231,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
Error *local_err = NULL;
/*
- * Initialize SEV context, if required
- *
- * If no memory encryption is requested (ms->cgs == NULL) this is
- * a no-op.
- *
- * It's also a no-op if a non-SEV confidential guest support
- * mechanism is selected. SEV is the only mechanism available to
- * select on x86 at present, so this doesn't arise, but if new
- * mechanisms are supported in future (e.g. TDX), they'll need
- * their own initialization either here or elsewhere.
+ * Initialize confidential guest (SEV/TDX) context, if required
*/
if (ms->cgs) {
ret = confidential_guest_kvm_init(ms->cgs, &local_err);
@@ -3855,32 +3872,34 @@ static void kvm_init_msrs(X86CPU *cpu)
CPUX86State *env = &cpu->env;
kvm_msr_buf_reset(cpu);
- if (has_msr_arch_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
- env->features[FEAT_ARCH_CAPABILITIES]);
- }
- if (has_msr_core_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
- env->features[FEAT_CORE_CAPABILITY]);
- }
+ if (!is_tdx_vm()) {
+ if (has_msr_arch_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
+ env->features[FEAT_ARCH_CAPABILITIES]);
+ }
- if (has_msr_perf_capabs && cpu->enable_pmu) {
- kvm_msr_entry_add_perf(cpu, env->features);
+ if (has_msr_core_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
+ env->features[FEAT_CORE_CAPABILITY]);
+ }
+
+ if (has_msr_perf_capabs && cpu->enable_pmu) {
+ kvm_msr_entry_add_perf(cpu, env->features);
+ }
+
+ /*
+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
+ * all kernels with MSR features should have them.
+ */
+ if (kvm_feature_msrs && cpu_has_vmx(env)) {
+ kvm_msr_entry_add_vmx(cpu, env->features);
+ }
}
if (has_msr_ucode_rev) {
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
}
-
- /*
- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
- * all kernels with MSR features should have them.
- */
- if (kvm_feature_msrs && cpu_has_vmx(env)) {
- kvm_msr_entry_add_vmx(cpu, env->features);
- }
-
assert(kvm_buf_set_msrs(cpu) == 0);
}
@@ -5999,9 +6018,11 @@ static bool host_supports_vmx(void)
* because private/shared page tracking is already provided through other
* means, these 2 use-cases should be treated as being mutually-exclusive.
*/
-static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
+static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run)
{
+ struct kvm_pre_fault_memory mem;
uint64_t gpa, size, attributes;
+ int ret;
if (!machine_require_guest_memfd(current_machine))
return -EINVAL;
@@ -6012,13 +6033,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
- return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ if (ret || !kvm_pre_fault_memory_supported) {
+ return ret;
+ }
+
+ /*
+ * Opportunistically pre-fault memory in. Failures are ignored so that any
+ * errors in faulting in the memory will get captured in KVM page fault
+ * path when the guest first accesses the page.
+ */
+ memset(&mem, 0, sizeof(mem));
+ mem.gpa = gpa;
+ mem.size = size;
+ while (mem.size) {
+ if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) {
+ break;
+ }
+ }
+
+ return 0;
}
-static int kvm_handle_hypercall(struct kvm_run *run)
+static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run)
{
if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
- return kvm_handle_hc_map_gpa_range(run);
+ return kvm_handle_hc_map_gpa_range(cpu, run);
return -EINVAL;
}
@@ -6118,7 +6158,35 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
break;
#endif
case KVM_EXIT_HYPERCALL:
- ret = kvm_handle_hypercall(run);
+ ret = kvm_handle_hypercall(cpu, run);
+ break;
+ case KVM_EXIT_SYSTEM_EVENT:
+ switch (run->system_event.type) {
+ case KVM_SYSTEM_EVENT_TDX_FATAL:
+ ret = tdx_handle_report_fatal_error(cpu, run);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ break;
+ case KVM_EXIT_TDX:
+ /*
+ * run->tdx is already set up for the case where userspace
+ * does not handle the TDVMCALL.
+ */
+ switch (run->tdx.nr) {
+ case TDVMCALL_GET_QUOTE:
+ tdx_handle_get_quote(cpu, run);
+ break;
+ case TDVMCALL_GET_TD_VM_CALL_INFO:
+ tdx_handle_get_tdvmcall_info(cpu, run);
+ break;
+ case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
+ tdx_handle_setup_event_notify_interrupt(cpu, run);
+ break;
+ }
+ ret = 0;
break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 88565e8..5f83e88 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,6 +13,8 @@
#include "system/kvm.h"
+#define KVM_MAX_CPUID_ENTRIES 100
+
/* always false if !CONFIG_KVM */
#define kvm_pit_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
@@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#ifdef CONFIG_KVM
+#include <linux/kvm.h>
+
+typedef struct KvmCpuidInfo {
+ struct kvm_cpuid2 cpuid;
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} KvmCpuidInfo;
+
bool kvm_is_vm_type_supported(int type);
bool kvm_has_adjust_clock_stable(void);
bool kvm_has_exception_payload(void);
@@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
void kvm_update_msi_routes_all(void *private, bool global,
uint32_t index, uint32_t mask);
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index);
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg);
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i);
#endif /* CONFIG_KVM */
void kvm_pc_setup_irq_routing(bool pci_enabled);
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 3996caf..2675bf8 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,6 +8,8 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c'))
+
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c
new file mode 100644
index 0000000..dee8334
--- /dev/null
+++ b/target/i386/kvm/tdx-quote-generator.c
@@ -0,0 +1,302 @@
+/*
+ * QEMU TDX Quote Generation Support
+ *
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * Author:
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
+
+#include "tdx-quote-generator.h"
+
+#define QGS_MSG_LIB_MAJOR_VER 1
+#define QGS_MSG_LIB_MINOR_VER 1
+
+typedef enum _qgs_msg_type_t {
+ GET_QUOTE_REQ = 0,
+ GET_QUOTE_RESP = 1,
+ GET_COLLATERAL_REQ = 2,
+ GET_COLLATERAL_RESP = 3,
+ GET_PLATFORM_INFO_REQ = 4,
+ GET_PLATFORM_INFO_RESP = 5,
+ QGS_MSG_TYPE_MAX
+} qgs_msg_type_t;
+
+typedef struct _qgs_msg_header_t {
+ uint16_t major_version;
+ uint16_t minor_version;
+ uint32_t type;
+ uint32_t size; // size of the whole message, include this header, in byte
+ uint32_t error_code; // used in response only
+} qgs_msg_header_t;
+
+typedef struct _qgs_msg_get_quote_req_t {
+ qgs_msg_header_t header; // header.type = GET_QUOTE_REQ
+ uint32_t report_size; // cannot be 0
+ uint32_t id_list_size; // length of id_list, in byte, can be 0
+} qgs_msg_get_quote_req_t;
+
+typedef struct _qgs_msg_get_quote_resp_s {
+ qgs_msg_header_t header; // header.type = GET_QUOTE_RESP
+ uint32_t selected_id_size; // can be 0 in case only one id is sent in request
+ uint32_t quote_size; // length of quote_data, in byte
+ uint8_t id_quote[]; // selected id followed by quote
+} qgs_msg_get_quote_resp_t;
+
+#define HEADER_SIZE 4
+
+static uint32_t decode_header(const char *buf, size_t len) {
+ if (len < HEADER_SIZE) {
+ return 0;
+ }
+ uint32_t msg_size = 0;
+ for (uint32_t i = 0; i < HEADER_SIZE; ++i) {
+ msg_size = msg_size * 256 + (buf[i] & 0xFF);
+ }
+ return msg_size;
+}
+
+static void encode_header(char *buf, size_t len, uint32_t size) {
+ assert(len >= HEADER_SIZE);
+ buf[0] = ((size >> 24) & 0xFF);
+ buf[1] = ((size >> 16) & 0xFF);
+ buf[2] = ((size >> 8) & 0xFF);
+ buf[3] = (size & 0xFF);
+}
+
+static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task)
+{
+ timer_del(&task->timer);
+
+ if (task->watch) {
+ g_source_remove(task->watch);
+ }
+ qio_channel_close(QIO_CHANNEL(task->sioc), NULL);
+ object_unref(OBJECT(task->sioc));
+
+ task->completion(task);
+}
+
+static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition,
+ gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received,
+ task->payload_len - task->receive_buf_received, &err);
+ if (ret < 0) {
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return G_SOURCE_CONTINUE;
+ } else {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+
+ if (ret == 0) {
+ error_report("End of file before reply received");
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ task->receive_buf_received += ret;
+ if (task->receive_buf_received >= HEADER_SIZE) {
+ uint32_t len = decode_header(task->receive_buf,
+ task->receive_buf_received);
+ if (len == 0 ||
+ len > (task->payload_len - HEADER_SIZE)) {
+ error_report("Message len %u must be non-zero & less than %zu",
+ len, (task->payload_len - HEADER_SIZE));
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ /* Now we know the size, shrink to fit */
+ task->payload_len = HEADER_SIZE + len;
+ task->receive_buf = g_renew(char,
+ task->receive_buf,
+ task->payload_len);
+ }
+
+ if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) {
+ qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE);
+ if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER ||
+ hdr->minor_version != QGS_MSG_LIB_MINOR_VER) {
+ error_report("Invalid QGS message header version %d.%d",
+ hdr->major_version,
+ hdr->minor_version);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->type != GET_QUOTE_RESP) {
+ error_report("Invalid QGS message type %d",
+ hdr->type);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->size > (task->payload_len - HEADER_SIZE)) {
+ error_report("QGS message size %d exceeds payload capacity %zu",
+ hdr->size, task->payload_len);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->error_code != 0) {
+ error_report("QGS message error code %d",
+ hdr->error_code);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+ if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) {
+ qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE);
+ if (msg->selected_id_size != 0) {
+ error_report("QGS message selected ID was %d not 0",
+ msg->selected_id_size);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) !=
+ msg->quote_size) {
+ error_report("QGS quote size %d should be %zu",
+ msg->quote_size,
+ (task->payload_len - sizeof(qgs_msg_get_quote_resp_t)));
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+
+ if (task->receive_buf_received == task->payload_len) {
+ size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t);
+ memmove(task->receive_buf,
+ task->receive_buf + strip,
+ task->receive_buf_received - strip);
+ task->receive_buf_received -= strip;
+ task->status_code = TDX_VP_GET_QUOTE_SUCCESS;
+ goto end;
+ }
+
+ return G_SOURCE_CONTINUE;
+
+end:
+ tdx_generate_quote_cleanup(task);
+ return G_SOURCE_REMOVE;
+}
+
+static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition,
+ gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_channel_write(ioc, task->send_data + task->send_data_sent,
+ task->send_data_size - task->send_data_sent, &err);
+ if (ret < 0) {
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ ret = 0;
+ } else {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ tdx_generate_quote_cleanup(task);
+ goto end;
+ }
+ }
+ task->send_data_sent += ret;
+
+ if (task->send_data_sent == task->send_data_size) {
+ task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN,
+ tdx_get_quote_read, task, NULL);
+ goto end;
+ }
+
+ return G_SOURCE_CONTINUE;
+
+end:
+ return G_SOURCE_REMOVE;
+}
+
+static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_task_propagate_error(qio_task, &err);
+ if (ret) {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE;
+ tdx_generate_quote_cleanup(task);
+ return;
+ }
+
+ task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT,
+ tdx_send_report, task, NULL);
+}
+
+#define TRANSACTION_TIMEOUT 30000
+
+static void getquote_expired(void *opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ tdx_generate_quote_cleanup(task);
+}
+
+static void setup_get_quote_timer(TdxGenerateQuoteTask *task)
+{
+ int64_t time;
+
+ timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task);
+ time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ timer_mod(&task->timer, time + TRANSACTION_TIMEOUT);
+}
+
+void tdx_generate_quote(TdxGenerateQuoteTask *task,
+ SocketAddress *qg_sock_addr)
+{
+ QIOChannelSocket *sioc;
+ qgs_msg_get_quote_req_t msg;
+
+ /* Prepare a QGS message prelude */
+ msg.header.major_version = QGS_MSG_LIB_MAJOR_VER;
+ msg.header.minor_version = QGS_MSG_LIB_MINOR_VER;
+ msg.header.type = GET_QUOTE_REQ;
+ msg.header.size = sizeof(msg) + task->send_data_size;
+ msg.header.error_code = 0;
+ msg.report_size = task->send_data_size;
+ msg.id_list_size = 0;
+
+ /* Make room to add the QGS message prelude */
+ task->send_data = g_renew(char,
+ task->send_data,
+ task->send_data_size + sizeof(msg) + HEADER_SIZE);
+ memmove(task->send_data + sizeof(msg) + HEADER_SIZE,
+ task->send_data,
+ task->send_data_size);
+ memcpy(task->send_data + HEADER_SIZE,
+ &msg,
+ sizeof(msg));
+ encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg));
+ task->send_data_size += sizeof(msg) + HEADER_SIZE;
+
+ sioc = qio_channel_socket_new();
+ task->sioc = sioc;
+
+ setup_get_quote_timer(task);
+
+ qio_channel_socket_connect_async(sioc, qg_sock_addr,
+ tdx_quote_generator_connected, task,
+ NULL, NULL);
+}
diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h
new file mode 100644
index 0000000..3bd9b8e
--- /dev/null
+++ b/target/i386/kvm/tdx-quote-generator.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H
+#define QEMU_I386_TDX_QUOTE_GENERATOR_H
+
+#include "qom/object_interfaces.h"
+#include "io/channel-socket.h"
+#include "exec/hwaddr.h"
+
+#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
+
+#define TDX_VP_GET_QUOTE_SUCCESS 0ULL
+#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL)
+#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL
+#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL
+
+/* Limit to avoid resource starvation. */
+#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024)
+#define TDX_MAX_GET_QUOTE_REQUEST 16
+
+#define TDX_GET_QUOTE_HDR_SIZE 24
+
+/* Format of pages shared with guest. */
+struct tdx_get_quote_header {
+ /* Format version: must be 1 in little endian. */
+ uint64_t structure_version;
+
+ /*
+ * GetQuote status code in little endian:
+ * Guest must set error_code to 0 to avoid information leak.
+ * Qemu sets this before interrupting guest.
+ */
+ uint64_t error_code;
+
+ /*
+ * in-message size in little endian: The message will follow this header.
+ * The in-message will be send to QGS.
+ */
+ uint32_t in_len;
+
+ /*
+ * out-message size in little endian:
+ * On request, out_len must be zero to avoid information leak.
+ * On return, message size from QGS. Qemu overwrites this field.
+ * The message will follows this header. The in-message is overwritten.
+ */
+ uint32_t out_len;
+
+ /*
+ * Message buffer follows.
+ * Guest sets message that will be send to QGS. If out_len > in_len, guest
+ * should zero remaining buffer to avoid information leak.
+ * Qemu overwrites this buffer with a message returned from QGS.
+ */
+};
+
+typedef struct TdxGenerateQuoteTask {
+ hwaddr buf_gpa;
+ hwaddr payload_gpa;
+ uint64_t payload_len;
+
+ char *send_data;
+ uint64_t send_data_size;
+ uint64_t send_data_sent;
+
+ char *receive_buf;
+ uint64_t receive_buf_received;
+
+ uint64_t status_code;
+ struct tdx_get_quote_header hdr;
+
+ QIOChannelSocket *sioc;
+ guint watch;
+ QEMUTimer timer;
+
+ void (*completion)(struct TdxGenerateQuoteTask *task);
+ void *opaque;
+} TdxGenerateQuoteTask;
+
+void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr);
+
+#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
new file mode 100644
index 0000000..1f0e108
--- /dev/null
+++ b/target/i386/kvm/tdx-stub.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+
+#include "tdx.h"
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return -EINVAL;
+}
+
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return -EINVAL;
+}
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ return -EINVAL;
+}
+
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
+{
+}
+
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
+{
+}
+
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run)
+{
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
new file mode 100644
index 0000000..7d69d6d
--- /dev/null
+++ b/target/i386/kvm/tdx.c
@@ -0,0 +1,1546 @@
+/*
+ * QEMU TDX support
+ *
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * Author:
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/base64.h"
+#include "qemu/mmap-alloc.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qom/object_interfaces.h"
+#include "crypto/hash.h"
+#include "system/kvm_int.h"
+#include "system/runstate.h"
+#include "system/system.h"
+#include "system/ramblock.h"
+#include "system/address-spaces.h"
+
+#include <linux/kvm_para.h>
+
+#include "cpu.h"
+#include "cpu-internal.h"
+#include "host-cpu.h"
+#include "hw/i386/apic_internal.h"
+#include "hw/i386/apic-msidef.h"
+#include "hw/i386/e820_memory_layout.h"
+#include "hw/i386/tdvf.h"
+#include "hw/i386/x86.h"
+#include "hw/i386/tdvf-hob.h"
+#include "hw/pci/msi.h"
+#include "kvm_i386.h"
+#include "tdx.h"
+#include "tdx-quote-generator.h"
+
+#include "standard-headers/asm-x86/kvm_para.h"
+
+#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
+#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
+
+#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0)
+#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
+#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
+#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63)
+
+#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
+ TDX_TD_ATTRIBUTES_PKS | \
+ TDX_TD_ATTRIBUTES_PERFMON)
+
+#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1U << KVM_FEATURE_PV_UNHALT) | \
+ (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
+ (1U << KVM_FEATURE_PV_SEND_IPI) | \
+ (1U << KVM_FEATURE_POLL_CONTROL) | \
+ (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
+ (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
+
+static TdxGuest *tdx_guest;
+
+static struct kvm_tdx_capabilities *tdx_caps;
+static struct kvm_cpuid2 *tdx_supported_cpuid;
+
+/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
+bool is_tdx_vm(void)
+{
+ return !!tdx_guest;
+}
+
+enum tdx_ioctl_level {
+ TDX_VM_IOCTL,
+ TDX_VCPU_IOCTL,
+};
+
+static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
+ int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ struct kvm_tdx_cmd tdx_cmd = {};
+ int r;
+
+ const char *tdx_ioctl_name[] = {
+ [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
+ [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
+ [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
+ [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
+ [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
+ [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
+ };
+
+ tdx_cmd.id = cmd_id;
+ tdx_cmd.flags = flags;
+ tdx_cmd.data = (__u64)(unsigned long)data;
+
+ switch (level) {
+ case TDX_VM_IOCTL:
+ r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ case TDX_VCPU_IOCTL:
+ r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ default:
+ error_setg(errp, "Invalid tdx_ioctl_level %d", level);
+ return -EINVAL;
+ }
+
+ if (r < 0) {
+ error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
+ tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
+ }
+ return r;
+}
+
+static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
+}
+
+static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
+ void *data, Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
+}
+
+static int get_tdx_capabilities(Error **errp)
+{
+ struct kvm_tdx_capabilities *caps;
+ /* 1st generation of TDX reports 6 cpuid configs */
+ int nr_cpuid_configs = 6;
+ size_t size;
+ int r;
+
+ do {
+ Error *local_err = NULL;
+ size = sizeof(struct kvm_tdx_capabilities) +
+ nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
+ caps = g_malloc0(size);
+ caps->cpuid.nent = nr_cpuid_configs;
+
+ r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
+ if (r == -E2BIG) {
+ g_free(caps);
+ nr_cpuid_configs *= 2;
+ if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
+ error_report("KVM TDX seems broken that number of CPUID entries"
+ " in kvm_tdx_capabilities exceeds limit: %d",
+ KVM_MAX_CPUID_ENTRIES);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ error_free(local_err);
+ } else if (r < 0) {
+ g_free(caps);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ } while (r == -E2BIG);
+
+ tdx_caps = caps;
+
+ return 0;
+}
+
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
+{
+ assert(!tdx_guest->tdvf_mr);
+ tdx_guest->tdvf_mr = tdvf_mr;
+}
+
+static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
+{
+ TdxFirmwareEntry *entry;
+
+ for_each_tdx_fw_entry(&tdx->tdvf, entry) {
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
+ return entry;
+ }
+ }
+ error_report("TDVF metadata doesn't specify TD_HOB location.");
+ exit(1);
+}
+
+static void tdx_add_ram_entry(uint64_t address, uint64_t length,
+ enum TdxRamType type)
+{
+ uint32_t nr_entries = tdx_guest->nr_ram_entries;
+ tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
+ nr_entries + 1);
+
+ tdx_guest->ram_entries[nr_entries].address = address;
+ tdx_guest->ram_entries[nr_entries].length = length;
+ tdx_guest->ram_entries[nr_entries].type = type;
+ tdx_guest->nr_ram_entries++;
+}
+
+static int tdx_accept_ram_range(uint64_t address, uint64_t length)
+{
+ uint64_t head_start, tail_start, head_length, tail_length;
+ uint64_t tmp_address, tmp_length;
+ TdxRamEntry *e;
+ int i = 0;
+
+ do {
+ if (i == tdx_guest->nr_ram_entries) {
+ return -1;
+ }
+
+ e = &tdx_guest->ram_entries[i++];
+ } while (address + length <= e->address || address >= e->address + e->length);
+
+ /*
+ * The to-be-accepted ram range must be fully contained by one
+ * RAM entry.
+ */
+ if (e->address > address ||
+ e->address + e->length < address + length) {
+ return -1;
+ }
+
+ if (e->type == TDX_RAM_ADDED) {
+ return 0;
+ }
+
+ tmp_address = e->address;
+ tmp_length = e->length;
+
+ e->address = address;
+ e->length = length;
+ e->type = TDX_RAM_ADDED;
+
+ head_length = address - tmp_address;
+ if (head_length > 0) {
+ head_start = tmp_address;
+ tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
+ }
+
+ tail_start = address + length;
+ if (tail_start < tmp_address + tmp_length) {
+ tail_length = tmp_address + tmp_length - tail_start;
+ tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
+ }
+
+ return 0;
+}
+
+static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
+{
+ const TdxRamEntry *lhs = lhs_;
+ const TdxRamEntry *rhs = rhs_;
+
+ if (lhs->address == rhs->address) {
+ return 0;
+ }
+ if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
+ return 1;
+ }
+ return -1;
+}
+
+static void tdx_init_ram_entries(void)
+{
+ unsigned i, j, nr_e820_entries;
+
+ nr_e820_entries = e820_get_table(NULL);
+ tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
+
+ for (i = 0, j = 0; i < nr_e820_entries; i++) {
+ uint64_t addr, len;
+
+ if (e820_get_entry(i, E820_RAM, &addr, &len)) {
+ tdx_guest->ram_entries[j].address = addr;
+ tdx_guest->ram_entries[j].length = len;
+ tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
+ j++;
+ }
+ }
+ tdx_guest->nr_ram_entries = j;
+}
+
+static void tdx_post_init_vcpus(void)
+{
+ TdxFirmwareEntry *hob;
+ CPUState *cpu;
+
+ hob = tdx_get_hob_entry(tdx_guest);
+ CPU_FOREACH(cpu) {
+ tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
+ &error_fatal);
+ }
+}
+
+static void tdx_finalize_vm(Notifier *notifier, void *unused)
+{
+ TdxFirmware *tdvf = &tdx_guest->tdvf;
+ TdxFirmwareEntry *entry;
+ RAMBlock *ram_block;
+ Error *local_err = NULL;
+ int r;
+
+ tdx_init_ram_entries();
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
+ qemu_real_host_page_size(), 0, 0);
+ if (entry->mem_ptr == MAP_FAILED) {
+ error_report("Failed to mmap memory for TDVF section %d",
+ entry->type);
+ exit(1);
+ }
+ if (tdx_accept_ram_range(entry->address, entry->size)) {
+ error_report("Failed to accept memory for TDVF section %d",
+ entry->type);
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ exit(1);
+ }
+ break;
+ default:
+ error_report("Unsupported TDVF section %d", entry->type);
+ exit(1);
+ }
+ }
+
+ qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
+ sizeof(TdxRamEntry), &tdx_ram_entry_compare);
+
+ tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
+
+ tdx_post_init_vcpus();
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ struct kvm_tdx_init_mem_region region;
+ uint32_t flags;
+
+ region = (struct kvm_tdx_init_mem_region) {
+ .source_addr = (uintptr_t)entry->mem_ptr,
+ .gpa = entry->address,
+ .nr_pages = entry->size >> 12,
+ };
+
+ flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
+ KVM_TDX_MEASURE_MEMORY_REGION : 0;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
+ &region, &local_err);
+ } while (r == -EAGAIN || r == -EINTR);
+ if (r < 0) {
+ error_report_err(local_err);
+ exit(1);
+ }
+
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
+ entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ entry->mem_ptr = NULL;
+ }
+ }
+
+ /*
+ * TDVF image has been copied into private region above via
+ * KVM_MEMORY_MAPPING. It becomes useless.
+ */
+ ram_block = tdx_guest->tdvf_mr->ram_block;
+ ram_block_discard_range(ram_block, 0, ram_block->max_length);
+
+ tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
+ CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
+}
+
+static Notifier tdx_machine_done_notify = {
+ .notify = tdx_finalize_vm,
+};
+
+/*
+ * Some CPUID bits change from fixed1 to configurable bits when TDX module
+ * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
+ *
+ * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
+ * here if they are ever fixed1 bits in any of the version though not fixed1 in
+ * the latest version. Otherwise, with the older version of TDX module, QEMU may
+ * treat the fixed1 bit as unsupported.
+ *
+ * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
+ * though they changed to configurable bits. Because tdx_fixed1_bits is used to
+ * setup the supported bits.
+ */
+KvmCpuidInfo tdx_fixed1_bits = {
+ .cpuid.nent = 8,
+ .entries[0] = {
+ .function = 0x1,
+ .index = 0,
+ .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
+ CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
+ CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+ CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
+ CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
+ .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+ CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
+ CPUID_SSE | CPUID_SSE2,
+ },
+ .entries[1] = {
+ .function = 0x6,
+ .index = 0,
+ .eax = CPUID_6_EAX_ARAT,
+ },
+ .entries[2] = {
+ .function = 0x7,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
+ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
+ .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
+ CPUID_7_0_ECX_MOVDIR64B,
+ .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
+ CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
+ CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ },
+ .entries[3] = {
+ .function = 0x7,
+ .index = 2,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
+ CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
+ },
+ .entries[4] = {
+ .function = 0xD,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
+ },
+ .entries[5] = {
+ .function = 0xD,
+ .index = 1,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ },
+ .entries[6] = {
+ .function = 0x80000001,
+ .index = 0,
+ .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+ /*
+ * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
+ * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
+ * purpose of supported bits for TDX. In this sense, SYACALL is always
+ * supported.
+ */
+ .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+ },
+ .entries[7] = {
+ .function = 0x80000007,
+ .index = 0,
+ .edx = CPUID_APM_INVTSC,
+ },
+};
+
+typedef struct TdxAttrsMap {
+ uint32_t attr_index;
+ uint32_t cpuid_leaf;
+ uint32_t cpuid_subleaf;
+ int cpuid_reg;
+ uint32_t feat_mask;
+} TdxAttrsMap;
+
+static TdxAttrsMap tdx_attrs_maps[] = {
+ {.attr_index = 27,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 1,
+ .cpuid_reg = R_EAX,
+ .feat_mask = CPUID_7_1_EAX_LASS,},
+
+ {.attr_index = 30,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_PKS,},
+
+ {.attr_index = 31,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_KeyLocker,},
+};
+
+typedef struct TdxXFAMDep {
+ int xfam_bit;
+ FeatureMask feat_mask;
+} TdxXFAMDep;
+
+/*
+ * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
+ * defiend here.
+ *
+ * For those whose virtualization type are "XFAM & Configured & Native", they
+ * are reported as configurable bits. And they are not supported if not in the
+ * configureable bits list from KVM even if the corresponding XFAM bit is
+ * supported.
+ */
+TdxXFAMDep tdx_xfam_deps[] = {
+ { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }},
+ { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
+ { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
+ { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
+};
+
+static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
+ uint32_t index)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = cpuid_find_entry(tdx_supported_cpuid, function, index);
+ if (!e) {
+ if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
+ error_report("tdx_supported_cpuid requries more space than %d entries",
+ KVM_MAX_CPUID_ENTRIES);
+ exit(1);
+ }
+ e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
+ e->function = function;
+ e->index = index;
+ }
+
+ return e;
+}
+
+static void tdx_add_supported_cpuid_by_fixed1_bits(void)
+{
+ struct kvm_cpuid_entry2 *e, *e1;
+ int i;
+
+ for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
+ e = &tdx_fixed1_bits.entries[i];
+
+ e1 = find_in_supported_entry(e->function, e->index);
+ e1->eax |= e->eax;
+ e1->ebx |= e->ebx;
+ e1->ecx |= e->ecx;
+ e1->edx |= e->edx;
+ }
+}
+
+static void tdx_add_supported_cpuid_by_attrs(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ TdxAttrsMap *map;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
+ map = &tdx_attrs_maps[i];
+ if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
+ continue;
+ }
+
+ e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
+
+ switch(map->cpuid_reg) {
+ case R_EAX:
+ e->eax |= map->feat_mask;
+ break;
+ case R_EBX:
+ e->ebx |= map->feat_mask;
+ break;
+ case R_ECX:
+ e->ecx |= map->feat_mask;
+ break;
+ case R_EDX:
+ e->edx |= map->feat_mask;
+ break;
+ }
+ }
+}
+
+static void tdx_add_supported_cpuid_by_xfam(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ const TdxXFAMDep *xfam_dep;
+ const FeatureWordInfo *f;
+ for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
+ xfam_dep = &tdx_xfam_deps[i];
+ if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
+ continue;
+ }
+
+ f = &feature_word_info[xfam_dep->feat_mask.index];
+ if (f->type != CPUID_FEATURE_WORD) {
+ continue;
+ }
+
+ e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
+ switch(f->cpuid.reg) {
+ case R_EAX:
+ e->eax |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EBX:
+ e->ebx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_ECX:
+ e->ecx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EDX:
+ e->edx |= xfam_dep->feat_mask.mask;
+ break;
+ }
+ }
+
+ e = find_in_supported_entry(0xd, 0);
+ e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
+
+ e = find_in_supported_entry(0xd, 1);
+ /*
+ * Mark XFD always support for TDX, it will be cleared finally in
+ * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
+ * because in this case the original data has it as 0.
+ */
+ e->eax |= CPUID_XSAVE_XFD;
+ e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
+}
+
+static void tdx_add_supported_kvm_features(void)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = find_in_supported_entry(0x40000001, 0);
+ e->eax = TDX_SUPPORTED_KVM_FEATURES;
+}
+
+static void tdx_setup_supported_cpuid(void)
+{
+ if (tdx_supported_cpuid) {
+ return;
+ }
+
+ tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
+ KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
+
+ memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
+ tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
+ tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
+
+ tdx_add_supported_cpuid_by_fixed1_bits();
+ tdx_add_supported_cpuid_by_attrs();
+ tdx_add_supported_cpuid_by_xfam();
+
+ tdx_add_supported_kvm_features();
+}
+
+static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ X86MachineState *x86ms = X86_MACHINE(ms);
+ TdxGuest *tdx = TDX_GUEST(cgs);
+ int r = 0;
+
+ kvm_mark_guest_state_protected();
+
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
+ x86ms->smm = ON_OFF_AUTO_OFF;
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support SMM");
+ return -EINVAL;
+ }
+
+ if (x86ms->pic == ON_OFF_AUTO_AUTO) {
+ x86ms->pic = ON_OFF_AUTO_OFF;
+ } else if (x86ms->pic == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support PIC");
+ return -EINVAL;
+ }
+
+ if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
+ kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
+ } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM requires kernel_irqchip to be split");
+ return -EINVAL;
+ }
+
+ if (!tdx_caps) {
+ r = get_tdx_capabilities(errp);
+ if (r) {
+ return r;
+ }
+ }
+
+ tdx_setup_supported_cpuid();
+
+ /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
+ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
+ * memory for shared memory but not for private memory. Besides, whether a
+ * memslot is private or shared is not determined by QEMU.
+ *
+ * Thus, just mark readonly memory not supported for simplicity.
+ */
+ kvm_readonly_mem_allowed = false;
+
+ qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
+
+ tdx_guest = tdx;
+ return 0;
+}
+
+static int tdx_kvm_type(X86ConfidentialGuest *cg)
+{
+ /* Do the object check */
+ TDX_GUEST(cg);
+
+ return KVM_X86_TDX_VM;
+}
+
+static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
+{
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
+ X86CPU *x86cpu = X86_CPU(cpu);
+
+ if (xcc->model) {
+ error_report("Named cpu model is not supported for TDX yet!");
+ exit(1);
+ }
+
+ object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+
+ /* invtsc is fixed1 for TD guest */
+ object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
+
+ x86cpu->force_cpuid_0x1f = true;
+}
+
+static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
+ uint32_t feature, uint32_t index,
+ int reg, uint32_t value)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
+ if (e) {
+ value |= cpuid_entry_get_reg(e, reg);
+ }
+
+ if (is_feature_word_cpuid(feature, index, reg)) {
+ e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
+ if (e) {
+ value &= cpuid_entry_get_reg(e, reg);
+ }
+ }
+
+ return value;
+}
+
+static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
+{
+ struct kvm_cpuid2 *fetch_cpuid;
+ int size = KVM_MAX_CPUID_ENTRIES;
+ Error *local_err = NULL;
+ int r;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+
+ fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
+ sizeof(struct kvm_cpuid_entry2) * size);
+ fetch_cpuid->nent = size;
+ r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
+ if (r == -E2BIG) {
+ g_free(fetch_cpuid);
+ size = fetch_cpuid->nent;
+ }
+ } while (r == -E2BIG);
+
+ if (r < 0) {
+ error_report_err(local_err);
+ *ret = r;
+ return NULL;
+ }
+
+ return fetch_cpuid;
+}
+
+static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
+{
+ uint64_t actual, requested, unavailable, forced_on;
+ g_autofree struct kvm_cpuid2 *fetch_cpuid;
+ const char *forced_on_prefix = NULL;
+ const char *unav_prefix = NULL;
+ struct kvm_cpuid_entry2 *entry;
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *wi;
+ FeatureWord w;
+ bool mismatch = false;
+ int r;
+
+ fetch_cpuid = tdx_fetch_cpuid(cs, &r);
+ if (!fetch_cpuid) {
+ return r;
+ }
+
+ if (cpu->check_cpuid || cpu->enforce_cpuid) {
+ unav_prefix = "TDX doesn't support requested feature";
+ forced_on_prefix = "TDX forcibly sets the feature";
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ actual = 0;
+
+ switch (wi->type) {
+ case CPUID_FEATURE_WORD:
+ entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
+ if (!entry) {
+ /*
+ * If KVM doesn't report it means it's totally configurable
+ * by QEMU
+ */
+ continue;
+ }
+
+ actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
+ break;
+ case MSR_FEATURE_WORD:
+ /*
+ * TODO:
+ * validate MSR features when KVM has interface report them.
+ */
+ continue;
+ }
+
+ /* Fixup for special cases */
+ switch (w) {
+ case FEAT_8000_0001_EDX:
+ /*
+ * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
+ * mode and before vcpu running it's not in 64-bit mode.
+ */
+ actual |= CPUID_EXT2_SYSCALL;
+ break;
+ default:
+ break;
+ }
+
+ requested = env->features[w];
+ unavailable = requested & ~actual;
+ mark_unavailable_features(cpu, w, unavailable, unav_prefix);
+ if (unavailable) {
+ mismatch = true;
+ }
+
+ forced_on = actual & ~requested;
+ mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
+ if (forced_on) {
+ mismatch = true;
+ }
+ }
+
+ if (cpu->enforce_cpuid && mismatch) {
+ return -EINVAL;
+ }
+
+ if (cpu->phys_bits != host_cpu_phys_bits()) {
+ error_report("TDX requires guest CPU physical bits (%u) "
+ "to match host CPU physical bits (%u)",
+ cpu->phys_bits, host_cpu_phys_bits());
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
+{
+ if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
+ error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
+ "(KVM supported: 0x%"PRIx64")", tdx->attributes,
+ (uint64_t)tdx_caps->supported_attrs);
+ return -1;
+ }
+
+ if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
+ error_setg(errp, "Some QEMU unsupported TD attribute bits being "
+ "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
+ tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+
+ tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
+ TDX_TD_ATTRIBUTES_PKS : 0;
+ tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
+
+ return tdx_validate_attributes(tdx_guest, errp);
+}
+
+static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+ uint64_t xfam;
+
+ xfam = env->features[FEAT_XSAVE_XCR0_LO] |
+ env->features[FEAT_XSAVE_XCR0_HI] |
+ env->features[FEAT_XSAVE_XSS_LO] |
+ env->features[FEAT_XSAVE_XSS_HI];
+
+ if (xfam & ~tdx_caps->supported_xfam) {
+ error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
+ xfam, (uint64_t)tdx_caps->supported_xfam);
+ return -1;
+ }
+
+ tdx_guest->xfam = xfam;
+ return 0;
+}
+
+static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
+{
+ int i, dest_cnt = 0;
+ struct kvm_cpuid_entry2 *src, *dest, *conf;
+
+ for (i = 0; i < cpuids->nent; i++) {
+ src = cpuids->entries + i;
+ conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
+ if (!conf) {
+ continue;
+ }
+ dest = cpuids->entries + dest_cnt;
+
+ dest->function = src->function;
+ dest->index = src->index;
+ dest->flags = src->flags;
+ dest->eax = src->eax & conf->eax;
+ dest->ebx = src->ebx & conf->ebx;
+ dest->ecx = src->ecx & conf->ecx;
+ dest->edx = src->edx & conf->edx;
+
+ dest_cnt++;
+ }
+ cpuids->nent = dest_cnt++;
+}
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
+ Error *local_err = NULL;
+ size_t data_len;
+ int retry = 10000;
+ int r = 0;
+
+ QEMU_LOCK_GUARD(&tdx_guest->lock);
+ if (tdx_guest->initialized) {
+ return r;
+ }
+
+ init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+
+ if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
+ error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
+ return -EOPNOTSUPP;
+ }
+
+ r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ 0, TDX_APIC_BUS_CYCLES_NS);
+ if (r < 0) {
+ error_setg_errno(errp, -r,
+ "Unable to set core crystal clock frequency to 25MHz");
+ return r;
+ }
+
+ if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
+ env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
+ "between [%d, %d] kHz", env->tsc_khz,
+ TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
+ return -EINVAL;
+ }
+
+ if (env->tsc_khz % (25 * 1000)) {
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
+ env->tsc_khz);
+ return -EINVAL;
+ }
+
+ /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
+ r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
+ env->tsc_khz);
+ return r;
+ }
+
+ if (tdx_guest->mrconfigid) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
+ strlen(tdx_guest->mrconfigid), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ return -1;
+ }
+ memcpy(init_vm->mrconfigid, data, data_len);
+ }
+
+ if (tdx_guest->mrowner) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
+ strlen(tdx_guest->mrowner), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ return -1;
+ }
+ memcpy(init_vm->mrowner, data, data_len);
+ }
+
+ if (tdx_guest->mrownerconfig) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
+ strlen(tdx_guest->mrownerconfig), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ return -1;
+ }
+ memcpy(init_vm->mrownerconfig, data, data_len);
+ }
+
+ r = setup_td_guest_attributes(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ r = setup_td_xfam(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
+ tdx_filter_cpuid(&init_vm->cpuid);
+
+ init_vm->attributes = tdx_guest->attributes;
+ init_vm->xfam = tdx_guest->xfam;
+
+ /*
+ * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
+ * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
+ * RDSEED) is busy.
+ *
+ * Retry for the case.
+ */
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
+ } while (r == -EAGAIN && --retry);
+
+ if (r < 0) {
+ if (!retry) {
+ error_append_hint(&local_err, "Hardware RNG (Random Number "
+ "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
+ "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
+ "due to lack of entropy.\n");
+ }
+ error_propagate(errp, local_err);
+ return r;
+ }
+
+ tdx_guest->initialized = true;
+
+ return 0;
+}
+
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
+}
+
+static void tdx_inject_interrupt(uint32_t apicid, uint32_t vector)
+{
+ int ret;
+
+ if (vector < 32 || vector > 255) {
+ return;
+ }
+
+ MSIMessage msg = {
+ .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) |
+ (((uint64_t)apicid & 0xffffff00) << 32),
+ .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT),
+ };
+
+ ret = kvm_irqchip_send_msi(kvm_state, msg);
+ if (ret < 0) {
+ /* In this case, no better way to tell it to guest. Log it. */
+ error_report("TDX: injection interrupt %d failed, interrupt lost (%s).",
+ vector, strerror(-ret));
+ }
+}
+
+static void tdx_get_quote_completion(TdxGenerateQuoteTask *task)
+{
+ TdxGuest *tdx = task->opaque;
+ int ret;
+
+ /* Maintain the number of in-flight requests. */
+ qemu_mutex_lock(&tdx->lock);
+ tdx->num--;
+ qemu_mutex_unlock(&tdx->lock);
+
+ if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) {
+ ret = address_space_write(&address_space_memory, task->payload_gpa,
+ MEMTXATTRS_UNSPECIFIED, task->receive_buf,
+ task->receive_buf_received);
+ if (ret != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to write quote data.");
+ } else {
+ task->hdr.out_len = cpu_to_le64(task->receive_buf_received);
+ }
+ }
+ task->hdr.error_code = cpu_to_le64(task->status_code);
+
+ /* Publish the response contents before marking this request completed. */
+ smp_wmb();
+ ret = address_space_write(&address_space_memory, task->buf_gpa,
+ MEMTXATTRS_UNSPECIFIED, &task->hdr,
+ TDX_GET_QUOTE_HDR_SIZE);
+ if (ret != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to update GetQuote header.");
+ }
+
+ tdx_inject_interrupt(tdx_guest->event_notify_apicid,
+ tdx_guest->event_notify_vector);
+
+ g_free(task->send_data);
+ g_free(task->receive_buf);
+ g_free(task);
+ object_unref(tdx);
+}
+
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
+{
+ TdxGenerateQuoteTask *task;
+ struct tdx_get_quote_header hdr;
+ hwaddr buf_gpa = run->tdx.get_quote.gpa;
+ uint64_t buf_len = run->tdx.get_quote.size;
+
+ QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE);
+
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND;
+
+ if (buf_len == 0) {
+ return;
+ }
+
+ if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR;
+ return;
+ }
+
+ if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to read GetQuote header.");
+ return;
+ }
+
+ if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
+ return;
+ }
+
+ /* Only safe-guard check to avoid too large buffer size. */
+ if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
+ le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) {
+ return;
+ }
+
+ if (!tdx_guest->qg_sock_addr) {
+ hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
+ if (address_space_write(&address_space_memory, buf_gpa,
+ MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ error_report("TDX: failed to update GetQuote header.");
+ return;
+ }
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
+ return;
+ }
+
+ qemu_mutex_lock(&tdx_guest->lock);
+ if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) {
+ qemu_mutex_unlock(&tdx_guest->lock);
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY;
+ return;
+ }
+ tdx_guest->num++;
+ qemu_mutex_unlock(&tdx_guest->lock);
+
+ task = g_new(TdxGenerateQuoteTask, 1);
+ task->buf_gpa = buf_gpa;
+ task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE;
+ task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE;
+ task->hdr = hdr;
+ task->completion = tdx_get_quote_completion;
+
+ task->send_data_size = le32_to_cpu(hdr.in_len);
+ task->send_data = g_malloc(task->send_data_size);
+ task->send_data_sent = 0;
+
+ if (address_space_read(&address_space_memory, task->payload_gpa,
+ MEMTXATTRS_UNSPECIFIED, task->send_data,
+ task->send_data_size) != MEMTX_OK) {
+ goto out_free;
+ }
+
+ /* Mark the buffer in-flight. */
+ hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
+ if (address_space_write(&address_space_memory, buf_gpa,
+ MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ goto out_free;
+ }
+
+ task->receive_buf = g_malloc0(task->payload_len);
+ task->receive_buf_received = 0;
+ task->opaque = tdx_guest;
+
+ object_ref(tdx_guest);
+ tdx_generate_quote(task, tdx_guest->qg_sock_addr);
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
+ return;
+
+out_free:
+ g_free(task->send_data);
+ g_free(task);
+}
+
+#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT)
+#define SUPPORTED_TDVMCALLINFO_1_R12 (0)
+
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
+{
+ if (run->tdx.get_tdvmcall_info.leaf != 1) {
+ return;
+ }
+
+ run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 &
+ SUPPORTED_TDVMCALLINFO_1_R11) |
+ tdx_caps->kernel_tdvmcallinfo_1_r11;
+ run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 &
+ SUPPORTED_TDVMCALLINFO_1_R12) |
+ tdx_caps->kernel_tdvmcallinfo_1_r12;
+ run->tdx.get_tdvmcall_info.r13 = 0;
+ run->tdx.get_tdvmcall_info.r14 = 0;
+
+ run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS;
+}
+
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run)
+{
+ uint64_t vector = run->tdx.setup_event_notify.vector;
+
+ if (vector >= 32 && vector < 256) {
+ qemu_mutex_lock(&tdx_guest->lock);
+ tdx_guest->event_notify_vector = vector;
+ tdx_guest->event_notify_apicid = cpu->apic_id;
+ qemu_mutex_unlock(&tdx_guest->lock);
+ run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS;
+ } else {
+ run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND;
+ }
+}
+
+static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
+ char *message, bool has_gpa,
+ uint64_t gpa)
+{
+ GuestPanicInformation *panic_info;
+
+ panic_info = g_new0(GuestPanicInformation, 1);
+ panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
+ panic_info->u.tdx.error_code = (uint32_t) error_code;
+ panic_info->u.tdx.message = message;
+ panic_info->u.tdx.gpa = gpa;
+ panic_info->u.tdx.has_gpa = has_gpa;
+
+ qemu_system_guest_panicked(panic_info);
+}
+
+/*
+ * Only 8 registers can contain valid ASCII byte stream to form the fatal
+ * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
+ */
+#define TDX_FATAL_MESSAGE_MAX 64
+
+#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63)
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ uint64_t error_code = run->system_event.data[R_R12];
+ uint64_t reg_mask = run->system_event.data[R_ECX];
+ char *message = NULL;
+ uint64_t *tmp;
+ uint64_t gpa = -1ull;
+ bool has_gpa = false;
+
+ if (error_code & 0xffff) {
+ error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
+ error_code);
+ return -1;
+ }
+
+ if (reg_mask) {
+ message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
+ tmp = (uint64_t *)message;
+
+#define COPY_REG(REG) \
+ do { \
+ if (reg_mask & BIT_ULL(REG)) { \
+ *(tmp++) = run->system_event.data[REG]; \
+ } \
+ } while (0)
+
+ COPY_REG(R_R14);
+ COPY_REG(R_R15);
+ COPY_REG(R_EBX);
+ COPY_REG(R_EDI);
+ COPY_REG(R_ESI);
+ COPY_REG(R_R8);
+ COPY_REG(R_R9);
+ COPY_REG(R_EDX);
+ *((char *)tmp) = '\0';
+ }
+#undef COPY_REG
+
+ if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
+ gpa = run->system_event.data[R_R13];
+ has_gpa = true;
+ }
+
+ tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa);
+
+ return -1;
+}
+
+static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
+}
+
+static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ if (value) {
+ tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ } else {
+ tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ }
+}
+
+static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrconfigid);
+}
+
+static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrconfigid);
+ tdx->mrconfigid = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrowner);
+}
+
+static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrowner);
+ tdx->mrowner = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrownerconfig);
+}
+
+static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrownerconfig);
+ tdx->mrownerconfig = g_strdup(value);
+}
+
+static void tdx_guest_get_qgs(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ if (!tdx->qg_sock_addr) {
+ error_setg(errp, "quote-generation-socket is not set");
+ return;
+ }
+ visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp);
+}
+
+static void tdx_guest_set_qgs(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+ SocketAddress *sock = NULL;
+
+ if (!visit_type_SocketAddress(v, name, &sock, errp)) {
+ return;
+ }
+
+ if (tdx->qg_sock_addr) {
+ qapi_free_SocketAddress(tdx->qg_sock_addr);
+ }
+
+ tdx->qg_sock_addr = sock;
+}
+
+/* tdx guest */
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
+ tdx_guest,
+ TDX_GUEST,
+ X86_CONFIDENTIAL_GUEST,
+ { TYPE_USER_CREATABLE },
+ { NULL })
+
+static void tdx_guest_init(Object *obj)
+{
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ qemu_mutex_init(&tdx->lock);
+
+ cgs->require_guest_memfd = true;
+ tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+
+ object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
+ OBJ_PROP_FLAG_READWRITE);
+ object_property_add_bool(obj, "sept-ve-disable",
+ tdx_guest_get_sept_ve_disable,
+ tdx_guest_set_sept_ve_disable);
+ object_property_add_str(obj, "mrconfigid",
+ tdx_guest_get_mrconfigid,
+ tdx_guest_set_mrconfigid);
+ object_property_add_str(obj, "mrowner",
+ tdx_guest_get_mrowner, tdx_guest_set_mrowner);
+ object_property_add_str(obj, "mrownerconfig",
+ tdx_guest_get_mrownerconfig,
+ tdx_guest_set_mrownerconfig);
+
+ object_property_add(obj, "quote-generation-socket", "SocketAddress",
+ tdx_guest_get_qgs,
+ tdx_guest_set_qgs,
+ NULL, NULL);
+
+ qemu_mutex_init(&tdx->lock);
+
+ tdx->event_notify_vector = -1;
+ tdx->event_notify_apicid = -1;
+}
+
+static void tdx_guest_finalize(Object *obj)
+{
+}
+
+static void tdx_guest_class_init(ObjectClass *oc, const void *data)
+{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+
+ klass->kvm_init = tdx_kvm_init;
+ x86_klass->kvm_type = tdx_kvm_type;
+ x86_klass->cpu_instance_init = tdx_cpu_instance_init;
+ x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
+ x86_klass->check_features = tdx_check_features;
+}
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
new file mode 100644
index 0000000..1c38faf
--- /dev/null
+++ b/target/i386/kvm/tdx.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef QEMU_I386_TDX_H
+#define QEMU_I386_TDX_H
+
+#ifndef CONFIG_USER_ONLY
+#include CONFIG_DEVICES /* CONFIG_TDX */
+#endif
+
+#include "confidential-guest.h"
+#include "cpu.h"
+#include "hw/i386/tdvf.h"
+
+#include "tdx-quote-generator.h"
+
+#define TYPE_TDX_GUEST "tdx-guest"
+#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
+
+typedef struct TdxGuestClass {
+ X86ConfidentialGuestClass parent_class;
+} TdxGuestClass;
+
+/* TDX requires bus frequency 25MHz */
+#define TDX_APIC_BUS_CYCLES_NS 40
+
+#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000
+#define TDVMCALL_GET_QUOTE 0x10002
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004
+
+#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL
+#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL
+#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL
+#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
+#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
+
+#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1)
+
+enum TdxRamType {
+ TDX_RAM_UNACCEPTED,
+ TDX_RAM_ADDED,
+};
+
+typedef struct TdxRamEntry {
+ uint64_t address;
+ uint64_t length;
+ enum TdxRamType type;
+} TdxRamEntry;
+
+typedef struct TdxGuest {
+ X86ConfidentialGuest parent_obj;
+
+ QemuMutex lock;
+
+ bool initialized;
+ uint64_t attributes; /* TD attributes */
+ uint64_t xfam;
+ char *mrconfigid; /* base64 encoded sha384 digest */
+ char *mrowner; /* base64 encoded sha384 digest */
+ char *mrownerconfig; /* base64 encoded sha384 digest */
+
+ MemoryRegion *tdvf_mr;
+ TdxFirmware tdvf;
+
+ uint32_t nr_ram_entries;
+ TdxRamEntry *ram_entries;
+
+ /* GetQuote */
+ SocketAddress *qg_sock_addr;
+ int num;
+
+ uint32_t event_notify_vector;
+ uint32_t event_notify_apicid;
+} TdxGuest;
+
+#ifdef CONFIG_TDX
+bool is_tdx_vm(void);
+#else
+#define is_tdx_vm() 0
+#endif /* CONFIG_TDX */
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
+int tdx_parse_tdvf(void *flash_ptr, int size);
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run);
+
+#endif /* QEMU_I386_TDX_H */
diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c
index d6aad52..58ce3df 100644
--- a/target/i386/kvm/vmsr_energy.c
+++ b/target/i386/kvm/vmsr_energy.c
@@ -27,15 +27,6 @@ char *vmsr_compute_default_paths(void)
return g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL);
}
-bool is_host_cpu_intel(void)
-{
- char vendor[CPUID_VENDOR_SZ + 1];
-
- host_cpu_vendor_fms(vendor, NULL, NULL, NULL);
-
- return g_str_equal(vendor, CPUID_VENDOR_INTEL);
-}
-
int is_rapl_enabled(void)
{
const char *path = "/sys/class/powercap/intel-rapl/enabled";
diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h
index 16cc1f4..151bcbd 100644
--- a/target/i386/kvm/vmsr_energy.h
+++ b/target/i386/kvm/vmsr_energy.h
@@ -94,6 +94,5 @@ double vmsr_get_ratio(uint64_t e_delta,
unsigned long long delta_ticks,
unsigned int maxticks);
void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms);
-bool is_host_cpu_intel(void);
int is_rapl_enabled(void);
#endif /* VMSR_ENERGY_H */
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index e81a245..284c5ef 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -13,11 +13,12 @@
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/error-report.h"
+#include "exec/target_page.h"
#include "hw/xen/xen.h"
#include "system/kvm_int.h"
#include "system/kvm_xen.h"
#include "kvm/kvm_i386.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "xen-emu.h"
#include "trace.h"
#include "system/runstate.h"
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 70f632a..dd2dac1 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -7,7 +7,7 @@
#include "hw/i386/x86.h"
#include "kvm/kvm_i386.h"
#include "hw/xen/xen.h"
-
+#include "exec/watchpoint.h"
#include "system/kvm.h"
#include "system/kvm_xen.h"
#include "system/tcg.h"
@@ -1060,9 +1060,8 @@ static bool tsc_khz_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
- X86MachineClass *x86mc = X86_MACHINE_CLASS(mc);
- return env->tsc_khz && x86mc->save_tsc_khz;
+
+ return env->tsc_khz;
}
static const VMStateDescription vmstate_tsc_khz = {
diff --git a/target/i386/meson.build b/target/i386/meson.build
index c1aacea..092af34 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -11,6 +11,8 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest
# x86 cpu type
i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c'))
i386_system_ss = ss.source_set()
i386_system_ss.add(files(
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 3ea92b0..3c9b6ca 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -29,7 +29,6 @@
#include "monitor/hmp.h"
#include "qobject/qdict.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "qapi/qapi-commands-misc.h"
/* Perform linear address sign extension */
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index 4e4e63d..a5517b0 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -81,12 +81,13 @@ static void nvmm_kick_vcpu_thread(CPUState *cpu)
cpus_kick_thread(cpu);
}
-static void nvmm_accel_ops_class_init(ObjectClass *oc, void *data)
+static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = nvmm_start_vcpu_thread;
ops->kick_vcpu_thread = nvmm_kick_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset;
ops->synchronize_post_init = nvmm_cpu_synchronize_post_init;
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index 04e5f7e..11c2630 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -9,8 +9,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/address-spaces.h"
-#include "exec/ioport.h"
+#include "system/address-spaces.h"
+#include "system/ioport.h"
#include "qemu/accel.h"
#include "system/nvmm.h"
#include "system/cpus.h"
@@ -19,6 +19,8 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "qemu/queue.h"
+#include "accel/accel-cpu-target.h"
+#include "host-cpu.h"
#include "migration/blocker.h"
#include "strings.h"
@@ -30,7 +32,6 @@ struct AccelCPUState {
struct nvmm_vcpu vcpu;
uint8_t tpr;
bool stop;
- bool dirty;
/* Window-exiting for INTs/NMIs. */
bool int_window_exit;
@@ -47,7 +48,7 @@ struct qemu_machine {
/* -------------------------------------------------------------------------- */
-static bool nvmm_allowed;
+bool nvmm_allowed;
static struct qemu_machine qemu_mach;
static struct nvmm_machine *
@@ -508,7 +509,7 @@ nvmm_io_callback(struct nvmm_io *io)
}
/* Needed, otherwise infinite loop. */
- current_cpu->accel->dirty = false;
+ current_cpu->vcpu_dirty = false;
}
static void
@@ -517,7 +518,7 @@ nvmm_mem_callback(struct nvmm_mem *mem)
cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
/* Needed, otherwise infinite loop. */
- current_cpu->accel->dirty = false;
+ current_cpu->vcpu_dirty = false;
}
static struct nvmm_assist_callbacks nvmm_callbacks = {
@@ -727,9 +728,9 @@ nvmm_vcpu_loop(CPUState *cpu)
* Inner VCPU loop.
*/
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (qcpu->stop) {
@@ -827,32 +828,32 @@ static void
do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
static void
do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void
do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void
do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
{
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
void nvmm_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -982,7 +983,7 @@ nvmm_init_vcpu(CPUState *cpu)
}
}
- qcpu->dirty = true;
+ qcpu->vcpu_dirty = true;
cpu->accel = qcpu;
return 0;
@@ -1153,7 +1154,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = {
/* -------------------------------------------------------------------------- */
static int
-nvmm_accel_init(MachineState *ms)
+nvmm_accel_init(AccelState *as, MachineState *ms)
{
int ret, err;
@@ -1193,14 +1194,8 @@ nvmm_accel_init(MachineState *ms)
return 0;
}
-int
-nvmm_enabled(void)
-{
- return nvmm_allowed;
-}
-
static void
-nvmm_accel_class_init(ObjectClass *oc, void *data)
+nvmm_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "NVMM";
@@ -1214,10 +1209,33 @@ static const TypeInfo nvmm_accel_type = {
.class_init = nvmm_accel_class_init,
};
+static void nvmm_cpu_instance_init(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+
+ host_cpu_instance_init(cpu);
+}
+
+static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data)
+{
+ AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+ acc->cpu_instance_init = nvmm_cpu_instance_init;
+}
+
+static const TypeInfo nvmm_cpu_accel_type = {
+ .name = ACCEL_CPU_NAME("nvmm"),
+
+ .parent = TYPE_ACCEL_CPU,
+ .class_init = nvmm_cpu_accel_class_init,
+ .abstract = true,
+};
+
static void
nvmm_type_init(void)
{
type_register_static(&nvmm_accel_type);
+ type_register_static(&nvmm_cpu_accel_type);
}
type_init(nvmm_type_init);
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f0aa189..a2e4d48 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
for (i = 0; i < 2 << SHIFT; i++) {
d->ZMM_S(i) = float32_div(float32_one,
@@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
#if SHIFT == 1
void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status),
@@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
for (i = 0; i < 2 << SHIFT; i++) {
d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status);
@@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
#if SHIFT == 1
void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
for (i = 1; i < 2 << SHIFT; i++) {
@@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c
index d5bf886..7c5c02a 100644
--- a/target/i386/sev-system-stub.c
+++ b/target/i386/sev-system-stub.c
@@ -14,34 +14,9 @@
#include "qemu/osdep.h"
#include "monitor/monitor.h"
#include "monitor/hmp-target.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "qapi/error.h"
#include "sev.h"
-SevInfo *qmp_query_sev(Error **errp)
-{
- error_setg(errp, "SEV is not available in this QEMU");
- return NULL;
-}
-
-SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp)
-{
- error_setg(errp, "SEV is not available in this QEMU");
- return NULL;
-}
-
-SevCapability *qmp_query_sev_capabilities(Error **errp)
-{
- error_setg(errp, "SEV is not available in this QEMU");
- return NULL;
-}
-
-void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
- bool has_gpa, uint64_t gpa, Error **errp)
-{
- error_setg(errp, "SEV is not available in this QEMU");
-}
-
int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
g_assert_not_reached();
@@ -56,13 +31,6 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
g_assert_not_reached();
}
-SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce,
- Error **errp)
-{
- error_setg(errp, "SEV is not available in this QEMU");
- return NULL;
-}
-
void hmp_info_sev(Monitor *mon, const QDict *qdict)
{
monitor_printf(mon, "SEV is not available in this QEMU\n");
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 0e1dbb6..1057b8a 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -26,6 +26,7 @@
#include "qemu/uuid.h"
#include "qemu/error-report.h"
#include "crypto/hash.h"
+#include "exec/target_page.h"
#include "system/kvm.h"
#include "kvm/kvm_i386.h"
#include "sev.h"
@@ -36,11 +37,13 @@
#include "qom/object.h"
#include "monitor/monitor.h"
#include "monitor/hmp-target.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "confidential-guest.h"
#include "hw/i386/pc.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+#include "hw/i386/e820_memory_layout.h"
#include "qemu/queue.h"
+#include "qemu/cutils.h"
OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
@@ -49,6 +52,15 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST)
/* hard code sha256 digest size */
#define HASH_SIZE 32
+/* Hard coded GPA that KVM uses for the VMSA */
+#define KVM_VMSA_GPA 0xFFFFFFFFF000
+
+/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */
+#define FLAGS_VMSA_TO_SEGCACHE(flags) \
+ ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8))
+#define FLAGS_SEGCACHE_TO_VMSA(flags) \
+ ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12))
+
typedef struct QEMU_PACKED SevHashTableEntry {
QemuUUID guid;
uint16_t len;
@@ -88,6 +100,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor {
uint32_t size;
} SevHashTableDescriptor;
+typedef struct SevLaunchVmsa {
+ QTAILQ_ENTRY(SevLaunchVmsa) next;
+
+ uint16_t cpu_index;
+ uint64_t gpa;
+ struct sev_es_save_area vmsa;
+} SevLaunchVmsa;
+
struct SevCommonState {
X86ConfidentialGuest parent_obj;
@@ -98,6 +118,8 @@ struct SevCommonState {
uint32_t cbitpos;
uint32_t reduced_phys_bits;
bool kernel_hashes;
+ uint64_t sev_features;
+ uint64_t supported_sev_features;
/* runtime state */
uint8_t api_major;
@@ -106,9 +128,7 @@ struct SevCommonState {
int sev_fd;
SevState state;
- uint32_t reset_cs;
- uint32_t reset_ip;
- bool reset_data_valid;
+ QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa;
};
struct SevCommonStateClass {
@@ -121,7 +141,8 @@ struct SevCommonStateClass {
Error **errp);
int (*launch_start)(SevCommonState *sev_common);
void (*launch_finish)(SevCommonState *sev_common);
- int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len);
+ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa,
+ uint8_t *ptr, size_t len, Error **errp);
int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
};
@@ -211,14 +232,6 @@ static const char *const sev_fw_errlist[] = {
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
-/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
-#define KVM_MAX_CPUID_ENTRIES 100
-
-typedef struct KvmCpuidInfo {
- struct kvm_cpuid2 cpuid;
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
-} KvmCpuidInfo;
-
#define SNP_CPUID_FUNCTION_MAXCOUNT 64
#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
@@ -370,6 +383,288 @@ static struct RAMBlockNotifier sev_ram_notifier = {
.ram_block_removed = sev_ram_block_removed,
};
+static void sev_apply_cpu_context(CPUState *cpu)
+{
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ X86CPU *x86;
+ CPUX86State *env;
+ struct SevLaunchVmsa *launch_vmsa;
+
+ /* See if an initial VMSA has been provided for this CPU */
+ QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next)
+ {
+ if (cpu->cpu_index == launch_vmsa->cpu_index) {
+ x86 = X86_CPU(cpu);
+ env = &x86->env;
+
+ /*
+ * Ideally we would provide the VMSA directly to kvm which would
+ * ensure that the resulting initial VMSA measurement which is
+ * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from
+ * exactly what we provide here. Currently this is not possible so
+ * we need to copy the parts of the VMSA structure that we currently
+ * support into the CPU state.
+ */
+ cpu_load_efer(env, launch_vmsa->vmsa.efer);
+ cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4);
+ cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0);
+ cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3);
+ env->xcr0 = launch_vmsa->vmsa.xcr0;
+ env->pat = launch_vmsa->vmsa.g_pat;
+
+ cpu_x86_load_seg_cache(
+ env, R_CS, launch_vmsa->vmsa.cs.selector,
+ launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_DS, launch_vmsa->vmsa.ds.selector,
+ launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_ES, launch_vmsa->vmsa.es.selector,
+ launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_FS, launch_vmsa->vmsa.fs.selector,
+ launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_GS, launch_vmsa->vmsa.gs.selector,
+ launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_SS, launch_vmsa->vmsa.ss.selector,
+ launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib));
+
+ env->gdt.base = launch_vmsa->vmsa.gdtr.base;
+ env->gdt.limit = launch_vmsa->vmsa.gdtr.limit;
+ env->gdt.flags =
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib);
+ env->idt.base = launch_vmsa->vmsa.idtr.base;
+ env->idt.limit = launch_vmsa->vmsa.idtr.limit;
+ env->idt.flags =
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib);
+
+ cpu_x86_load_seg_cache(
+ env, R_LDTR, launch_vmsa->vmsa.ldtr.selector,
+ launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib));
+ cpu_x86_load_seg_cache(
+ env, R_TR, launch_vmsa->vmsa.tr.selector,
+ launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit,
+ FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib));
+
+ env->dr[6] = launch_vmsa->vmsa.dr6;
+ env->dr[7] = launch_vmsa->vmsa.dr7;
+
+ env->regs[R_EAX] = launch_vmsa->vmsa.rax;
+ env->regs[R_ECX] = launch_vmsa->vmsa.rcx;
+ env->regs[R_EDX] = launch_vmsa->vmsa.rdx;
+ env->regs[R_EBX] = launch_vmsa->vmsa.rbx;
+ env->regs[R_ESP] = launch_vmsa->vmsa.rsp;
+ env->regs[R_EBP] = launch_vmsa->vmsa.rbp;
+ env->regs[R_ESI] = launch_vmsa->vmsa.rsi;
+ env->regs[R_EDI] = launch_vmsa->vmsa.rdi;
+#ifdef TARGET_X86_64
+ env->regs[R_R8] = launch_vmsa->vmsa.r8;
+ env->regs[R_R9] = launch_vmsa->vmsa.r9;
+ env->regs[R_R10] = launch_vmsa->vmsa.r10;
+ env->regs[R_R11] = launch_vmsa->vmsa.r11;
+ env->regs[R_R12] = launch_vmsa->vmsa.r12;
+ env->regs[R_R13] = launch_vmsa->vmsa.r13;
+ env->regs[R_R14] = launch_vmsa->vmsa.r14;
+ env->regs[R_R15] = launch_vmsa->vmsa.r15;
+#endif
+ env->eip = launch_vmsa->vmsa.rip;
+ env->eflags = launch_vmsa->vmsa.rflags;
+
+ cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw);
+ env->mxcsr = launch_vmsa->vmsa.mxcsr;
+
+ break;
+ }
+ }
+}
+
+static int check_sev_features(SevCommonState *sev_common, uint64_t sev_features,
+ Error **errp)
+{
+ /*
+ * Ensure SEV_FEATURES is configured for correct SEV hardware and that
+ * the requested features are supported. If SEV-SNP is enabled then
+ * that feature must be enabled, otherwise it must be cleared.
+ */
+ if (sev_snp_enabled() && !(sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) {
+ error_setg(
+ errp,
+ "%s: SEV_SNP is enabled but is not enabled in VMSA sev_features",
+ __func__);
+ return -1;
+ } else if (!sev_snp_enabled() &&
+ (sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) {
+ error_setg(
+ errp,
+ "%s: SEV_SNP is not enabled but is enabled in VMSA sev_features",
+ __func__);
+ return -1;
+ }
+ if (sev_features & ~sev_common->supported_sev_features) {
+ error_setg(errp,
+ "%s: VMSA contains unsupported sev_features: %lX, "
+ "supported features: %lX",
+ __func__, sev_features, sev_common->supported_sev_features);
+ return -1;
+ }
+ return 0;
+}
+
+static int check_vmsa_supported(SevCommonState *sev_common, hwaddr gpa,
+ const struct sev_es_save_area *vmsa,
+ Error **errp)
+{
+ struct sev_es_save_area vmsa_check;
+
+ /*
+ * KVM always populates the VMSA at a fixed GPA which cannot be modified
+ * from userspace. Specifying a different GPA will not prevent the guest
+ * from starting but will cause the launch measurement to be different
+ * from expected. Therefore check that the provided GPA matches the KVM
+ * hardcoded value.
+ */
+ if (gpa != KVM_VMSA_GPA) {
+ error_setg(errp,
+ "%s: The VMSA GPA must be %lX but is specified as %lX",
+ __func__, KVM_VMSA_GPA, gpa);
+ return -1;
+ }
+
+ /*
+ * Clear all supported fields so we can then check the entire structure
+ * is zero.
+ */
+ memcpy(&vmsa_check, vmsa, sizeof(struct sev_es_save_area));
+ memset(&vmsa_check.es, 0, sizeof(vmsa_check.es));
+ memset(&vmsa_check.cs, 0, sizeof(vmsa_check.cs));
+ memset(&vmsa_check.ss, 0, sizeof(vmsa_check.ss));
+ memset(&vmsa_check.ds, 0, sizeof(vmsa_check.ds));
+ memset(&vmsa_check.fs, 0, sizeof(vmsa_check.fs));
+ memset(&vmsa_check.gs, 0, sizeof(vmsa_check.gs));
+ memset(&vmsa_check.gdtr, 0, sizeof(vmsa_check.gdtr));
+ memset(&vmsa_check.idtr, 0, sizeof(vmsa_check.idtr));
+ memset(&vmsa_check.ldtr, 0, sizeof(vmsa_check.ldtr));
+ memset(&vmsa_check.tr, 0, sizeof(vmsa_check.tr));
+ vmsa_check.efer = 0;
+ vmsa_check.cr0 = 0;
+ vmsa_check.cr3 = 0;
+ vmsa_check.cr4 = 0;
+ vmsa_check.xcr0 = 0;
+ vmsa_check.dr6 = 0;
+ vmsa_check.dr7 = 0;
+ vmsa_check.rax = 0;
+ vmsa_check.rcx = 0;
+ vmsa_check.rdx = 0;
+ vmsa_check.rbx = 0;
+ vmsa_check.rsp = 0;
+ vmsa_check.rbp = 0;
+ vmsa_check.rsi = 0;
+ vmsa_check.rdi = 0;
+ vmsa_check.r8 = 0;
+ vmsa_check.r9 = 0;
+ vmsa_check.r10 = 0;
+ vmsa_check.r11 = 0;
+ vmsa_check.r12 = 0;
+ vmsa_check.r13 = 0;
+ vmsa_check.r14 = 0;
+ vmsa_check.r15 = 0;
+ vmsa_check.rip = 0;
+ vmsa_check.rflags = 0;
+
+ vmsa_check.g_pat = 0;
+ vmsa_check.xcr0 = 0;
+
+ vmsa_check.x87_fcw = 0;
+ vmsa_check.mxcsr = 0;
+
+ if (check_sev_features(sev_common, vmsa_check.sev_features, errp) < 0) {
+ return -1;
+ }
+ vmsa_check.sev_features = 0;
+
+ if (!buffer_is_zero(&vmsa_check, sizeof(vmsa_check))) {
+ error_setg(errp,
+ "%s: The VMSA contains fields that are not "
+ "synchronized with KVM. Continuing would result in "
+ "either unpredictable guest behavior, or a "
+ "mismatched launch measurement.",
+ __func__);
+ return -1;
+ }
+ return 0;
+}
+
+static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx,
+ uint32_t ctx_len, hwaddr gpa, Error **errp)
+{
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ SevLaunchVmsa *launch_vmsa;
+ CPUState *cpu;
+ bool exists = false;
+
+ /*
+ * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The
+ * context buffer will contain a sev_es_save_area from the Linux kernel
+ * which is defined by "Table B-4. VMSA Layout, State Save Area for SEV-ES"
+ * in the AMD64 APM, Volume 2.
+ */
+
+ if (!sev_es_enabled()) {
+ error_setg(errp, "SEV: unable to set CPU context: Not supported");
+ return -1;
+ }
+
+ if (ctx_len < sizeof(struct sev_es_save_area)) {
+ error_setg(errp, "SEV: unable to set CPU context: "
+ "Invalid context provided");
+ return -1;
+ }
+
+ cpu = qemu_get_cpu(cpu_index);
+ if (!cpu) {
+ error_setg(errp, "SEV: unable to set CPU context for out of bounds "
+ "CPU index %d", cpu_index);
+ return -1;
+ }
+
+ /*
+ * If the context of this VP has already been set then replace it with the
+ * new context.
+ */
+ QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next)
+ {
+ if (cpu_index == launch_vmsa->cpu_index) {
+ launch_vmsa->gpa = gpa;
+ memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa));
+ exists = true;
+ break;
+ }
+ }
+
+ if (!exists) {
+ /* New VP context */
+ launch_vmsa = g_new0(SevLaunchVmsa, 1);
+ memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa));
+ launch_vmsa->cpu_index = cpu_index;
+ launch_vmsa->gpa = gpa;
+ QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next);
+ }
+
+ /* Synchronise the VMSA with the current CPU state */
+ sev_apply_cpu_context(cpu);
+
+ return 0;
+}
+
bool
sev_enabled(void)
{
@@ -946,7 +1241,7 @@ out:
}
static uint32_t
-sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
+sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
switch (feature) {
@@ -977,9 +1272,8 @@ sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t
return value;
}
-static int
-sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
- uint8_t *addr, size_t len)
+static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
+ uint8_t *addr, size_t len, Error **errp)
{
int ret, fw_error;
struct kvm_sev_launch_update_data update;
@@ -994,8 +1288,8 @@ sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA,
&update, &fw_error);
if (ret) {
- error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
- __func__, ret, fw_error, fw_error_to_str(fw_error));
+ error_setg(errp, "%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", __func__,
+ ret, fw_error, fw_error_to_str(fw_error));
}
return ret;
@@ -1005,6 +1299,16 @@ static int
sev_launch_update_vmsa(SevGuestState *sev_guest)
{
int ret, fw_error;
+ CPUState *cpu;
+
+ /*
+ * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA.
+ * Synchronise the CPU state to any provided launch VMSA structures.
+ */
+ CPU_FOREACH(cpu) {
+ sev_apply_cpu_context(cpu);
+ }
+
ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA,
NULL, &fw_error);
@@ -1123,8 +1427,8 @@ sev_launch_finish(SevCommonState *sev_common)
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
}
-static int
-snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type)
+static int snp_launch_update_data(uint64_t gpa, void *hva, size_t len,
+ int type, Error **errp)
{
SevLaunchUpdateData *data;
@@ -1139,23 +1443,21 @@ snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type)
return 0;
}
-static int
-sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
- uint8_t *ptr, size_t len)
+static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
+ uint8_t *ptr, size_t len, Error **errp)
{
- int ret = snp_launch_update_data(gpa, ptr, len,
- KVM_SEV_SNP_PAGE_TYPE_NORMAL);
- return ret;
+ return snp_launch_update_data(gpa, ptr, len,
+ KVM_SEV_SNP_PAGE_TYPE_NORMAL, errp);
}
static int
sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
- const KvmCpuidInfo *kvm_cpuid_info)
+ const KvmCpuidInfo *kvm_cpuid_info, Error **errp)
{
size_t i;
if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) {
- error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)",
+ error_setg(errp, "SEV-SNP: CPUID entry count (%d) exceeds max (%d)",
kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT);
return -1;
}
@@ -1197,8 +1499,8 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
return 0;
}
-static int
-snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len)
+static int snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva,
+ size_t cpuid_len, Error **errp)
{
KvmCpuidInfo kvm_cpuid_info = {0};
SnpCpuidInfo snp_cpuid_info;
@@ -1215,26 +1517,25 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len)
} while (ret == -E2BIG);
if (ret) {
- error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'",
- strerror(-ret));
- return 1;
+ error_setg(errp, "SEV-SNP: unable to query CPUID values for CPU: '%s'",
+ strerror(-ret));
+ return -1;
}
- ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info);
- if (ret) {
- error_report("SEV-SNP: failed to generate CPUID table information");
- return 1;
+ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info, errp);
+ if (ret < 0) {
+ return -1;
}
memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info));
return snp_launch_update_data(cpuid_addr, hva, cpuid_len,
- KVM_SEV_SNP_PAGE_TYPE_CPUID);
+ KVM_SEV_SNP_PAGE_TYPE_CPUID, errp);
}
-static int
-snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr,
- void *hva, uint32_t len)
+static int snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp,
+ uint32_t addr, void *hva,
+ uint32_t len, Error **errp)
{
int type = KVM_SEV_SNP_PAGE_TYPE_ZERO;
if (sev_snp->parent_obj.kernel_hashes) {
@@ -1246,7 +1547,7 @@ snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr,
sizeof(*sev_snp->kernel_hashes_data));
type = KVM_SEV_SNP_PAGE_TYPE_NORMAL;
}
- return snp_launch_update_data(addr, hva, len, type);
+ return snp_launch_update_data(addr, hva, len, type, errp);
}
static int
@@ -1284,12 +1585,14 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
}
if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
- ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
+ ret = snp_launch_update_cpuid(desc->base, hva, desc->len,
+ &error_fatal);
} else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) {
ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva,
- desc->len);
+ desc->len, &error_fatal);
} else {
- ret = snp_launch_update_data(desc->base, hva, desc->len, type);
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type,
+ &error_fatal);
}
if (ret) {
@@ -1311,18 +1614,26 @@ sev_snp_launch_finish(SevCommonState *sev_common)
struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
/*
- * To boot the SNP guest, the hypervisor is required to populate the CPUID
- * and Secrets page before finalizing the launch flow. The location of
- * the secrets and CPUID page is available through the OVMF metadata GUID.
+ * Populate all the metadata pages if not using an IGVM file. In the case
+ * where an IGVM file is provided it will be used to configure the metadata
+ * pages directly.
*/
- metadata = pc_system_get_ovmf_sev_metadata_ptr();
- if (metadata == NULL) {
- error_report("%s: Failed to locate SEV metadata header", __func__);
- exit(1);
- }
+ if (!X86_MACHINE(qdev_get_machine())->igvm) {
+ /*
+ * To boot the SNP guest, the hypervisor is required to populate the
+ * CPUID and Secrets page before finalizing the launch flow. The
+ * location of the secrets and CPUID page is available through the
+ * OVMF metadata GUID.
+ */
+ metadata = pc_system_get_ovmf_sev_metadata_ptr();
+ if (metadata == NULL) {
+ error_report("%s: Failed to locate SEV metadata header", __func__);
+ exit(1);
+ }
- /* Populate all the metadata pages */
- snp_populate_metadata_pages(sev_snp, metadata);
+ /* Populate all the metadata pages */
+ snp_populate_metadata_pages(sev_snp, metadata);
+ }
QTAILQ_FOREACH(data, &launch_update, next) {
ret = sev_snp_launch_update(sev_snp, data);
@@ -1432,6 +1743,39 @@ static int sev_snp_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_SNP_VM;
}
+static int sev_init_supported_features(ConfidentialGuestSupport *cgs,
+ SevCommonState *sev_common, Error **errp)
+{
+ X86ConfidentialGuestClass *x86_klass =
+ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs);
+ /*
+ * Older kernels do not support query or setting of sev_features. In this
+ * case the set of supported features must be zero to match the settings
+ * in the kernel.
+ */
+ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) ==
+ KVM_X86_DEFAULT_VM) {
+ sev_common->supported_sev_features = 0;
+ return 0;
+ }
+
+ /* Query KVM for the supported set of sev_features */
+ struct kvm_device_attr attr = {
+ .group = KVM_X86_GRP_SEV,
+ .attr = KVM_X86_SEV_VMSA_FEATURES,
+ .addr = (unsigned long)&sev_common->supported_sev_features,
+ };
+ if (kvm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr) < 0) {
+ error_setg(errp, "%s: failed to query supported sev_features",
+ __func__);
+ return -1;
+ }
+ if (sev_snp_enabled()) {
+ sev_common->supported_sev_features |= SVM_SEV_FEAT_SNP_ACTIVE;
+ }
+ return 0;
+}
+
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
char *devname;
@@ -1512,6 +1856,10 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
}
+ if (sev_init_supported_features(cgs, sev_common, errp) < 0) {
+ return -1;
+ }
+
trace_kvm_sev_init();
switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) {
case KVM_X86_DEFAULT_VM:
@@ -1523,6 +1871,40 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
case KVM_X86_SEV_ES_VM:
case KVM_X86_SNP_VM: {
struct kvm_sev_init args = { 0 };
+ MachineState *machine = MACHINE(qdev_get_machine());
+ X86MachineState *x86machine = X86_MACHINE(qdev_get_machine());
+
+ /*
+ * If configuration is provided via an IGVM file then the IGVM file
+ * might contain configuration of the initial vcpu context. For SEV
+ * the vcpu context includes the sev_features which should be applied
+ * to the vcpu.
+ *
+ * KVM does not synchronize sev_features from CPU state. Instead it
+ * requires sev_features to be provided as part of this initialization
+ * call which is subsequently automatically applied to the VMSA of
+ * each vcpu.
+ *
+ * The IGVM file is normally processed after initialization. Therefore
+ * we need to pre-process it here to extract sev_features in order to
+ * provide it to KVM_SEV_INIT2. Each cgs_* function that is called by
+ * the IGVM processor detects this pre-process by observing the state
+ * as SEV_STATE_UNINIT.
+ */
+ if (x86machine->igvm) {
+ if (IGVM_CFG_GET_CLASS(x86machine->igvm)
+ ->process(x86machine->igvm, machine->cgs, true, errp) ==
+ -1) {
+ return -1;
+ }
+ /*
+ * KVM maintains a bitmask of allowed sev_features. This does not
+ * include SVM_SEV_FEAT_SNP_ACTIVE which is set accordingly by KVM
+ * itself. Therefore we need to clear this flag.
+ */
+ args.vmsa_features = sev_common->sev_features &
+ ~SVM_SEV_FEAT_SNP_ACTIVE;
+ }
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
break;
@@ -1622,9 +2004,8 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) {
int ret;
- ret = klass->launch_update_data(sev_common, gpa, ptr, len);
+ ret = klass->launch_update_data(sev_common, gpa, ptr, len, errp);
if (ret < 0) {
- error_setg(errp, "SEV: Failed to encrypt pflash rom");
return ret;
}
}
@@ -1789,40 +2170,109 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size,
return sev_es_parse_reset_block(info, addr);
}
-void sev_es_set_reset_vector(CPUState *cpu)
+
+static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg)
{
- X86CPU *x86;
- CPUX86State *env;
- ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
- SevCommonState *sev_common = SEV_COMMON(
- object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON));
+ vmsa_seg->selector = cpu_seg->selector;
+ vmsa_seg->base = cpu_seg->base;
+ vmsa_seg->limit = cpu_seg->limit;
+ vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags);
+}
- /* Only update if we have valid reset information */
- if (!sev_common || !sev_common->reset_data_valid) {
- return;
- }
+static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa)
+{
+ const X86CPU *x86 = X86_CPU(cpu);
+ const CPUX86State *env = &x86->env;
- /* Do not update the BSP reset state */
- if (cpu->cpu_index == 0) {
- return;
+ /*
+ * Initialize the SEV-ES save area from the current state of
+ * the CPU. The entire state does not need to be copied, only the state
+ * that is copied back to the CPUState in sev_apply_cpu_context.
+ */
+ memset(vmsa, 0, sizeof(struct sev_es_save_area));
+ vmsa->efer = env->efer;
+ vmsa->cr0 = env->cr[0];
+ vmsa->cr3 = env->cr[3];
+ vmsa->cr4 = env->cr[4];
+ vmsa->xcr0 = env->xcr0;
+ vmsa->g_pat = env->pat;
+
+ seg_to_vmsa(&env->segs[R_CS], &vmsa->cs);
+ seg_to_vmsa(&env->segs[R_DS], &vmsa->ds);
+ seg_to_vmsa(&env->segs[R_ES], &vmsa->es);
+ seg_to_vmsa(&env->segs[R_FS], &vmsa->fs);
+ seg_to_vmsa(&env->segs[R_GS], &vmsa->gs);
+ seg_to_vmsa(&env->segs[R_SS], &vmsa->ss);
+
+ seg_to_vmsa(&env->gdt, &vmsa->gdtr);
+ seg_to_vmsa(&env->idt, &vmsa->idtr);
+ seg_to_vmsa(&env->ldt, &vmsa->ldtr);
+ seg_to_vmsa(&env->tr, &vmsa->tr);
+
+ vmsa->dr6 = env->dr[6];
+ vmsa->dr7 = env->dr[7];
+
+ vmsa->rax = env->regs[R_EAX];
+ vmsa->rcx = env->regs[R_ECX];
+ vmsa->rdx = env->regs[R_EDX];
+ vmsa->rbx = env->regs[R_EBX];
+ vmsa->rsp = env->regs[R_ESP];
+ vmsa->rbp = env->regs[R_EBP];
+ vmsa->rsi = env->regs[R_ESI];
+ vmsa->rdi = env->regs[R_EDI];
+
+#ifdef TARGET_X86_64
+ vmsa->r8 = env->regs[R_R8];
+ vmsa->r9 = env->regs[R_R9];
+ vmsa->r10 = env->regs[R_R10];
+ vmsa->r11 = env->regs[R_R11];
+ vmsa->r12 = env->regs[R_R12];
+ vmsa->r13 = env->regs[R_R13];
+ vmsa->r14 = env->regs[R_R14];
+ vmsa->r15 = env->regs[R_R15];
+#endif
+
+ vmsa->rip = env->eip;
+ vmsa->rflags = env->eflags;
+}
+
+static void sev_es_set_ap_context(uint32_t reset_addr)
+{
+ CPUState *cpu;
+ struct sev_es_save_area vmsa;
+ SegmentCache cs;
+
+ cs.selector = 0xf000;
+ cs.base = reset_addr & 0xffff0000;
+ cs.limit = 0xffff;
+ cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK |
+ DESC_A_MASK;
+
+ CPU_FOREACH(cpu) {
+ if (cpu->cpu_index == 0) {
+ /* Do not update the BSP reset state */
+ continue;
+ }
+ initialize_vmsa(cpu, &vmsa);
+ seg_to_vmsa(&cs, &vmsa.cs);
+ vmsa.rip = reset_addr & 0x0000ffff;
+ sev_set_cpu_context(cpu->cpu_index, &vmsa,
+ sizeof(struct sev_es_save_area),
+ 0, &error_fatal);
}
+}
- x86 = X86_CPU(cpu);
- env = &x86->env;
-
- cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff,
- DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
- DESC_R_MASK | DESC_A_MASK);
-
- env->eip = sev_common->reset_ip;
+void sev_es_set_reset_vector(CPUState *cpu)
+{
+ if (sev_enabled()) {
+ sev_apply_cpu_context(cpu);
+ }
}
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
{
- CPUState *cpu;
uint32_t addr;
int ret;
- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
if (!sev_es_enabled()) {
return 0;
@@ -1835,14 +2285,12 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
return ret;
}
+ /*
+ * The reset vector is saved into a CPU context for each AP but not for
+ * the BSP. This is applied during guest startup or when the CPU is reset.
+ */
if (addr) {
- sev_common->reset_cs = addr & 0xffff0000;
- sev_common->reset_ip = addr & 0x0000ffff;
- sev_common->reset_data_valid = true;
-
- CPU_FOREACH(cpu) {
- sev_es_set_reset_vector(cpu);
- }
+ sev_es_set_ap_context(addr);
}
return 0;
@@ -2044,8 +2492,239 @@ static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp)
SEV_COMMON(obj)->kernel_hashes = value;
}
+static bool cgs_check_support(ConfidentialGuestPlatformType platform,
+ uint16_t platform_version, uint8_t highest_vtl,
+ uint64_t shared_gpa_boundary)
+{
+ return (((platform == CGS_PLATFORM_SEV_SNP) && sev_snp_enabled()) ||
+ ((platform == CGS_PLATFORM_SEV_ES) && sev_es_enabled()) ||
+ ((platform == CGS_PLATFORM_SEV) && sev_enabled()));
+}
+
+static int cgs_set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len,
+ ConfidentialGuestPageType memory_type,
+ uint16_t cpu_index, Error **errp)
+{
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
+
+ if (sev_common->state == SEV_STATE_UNINIT) {
+ /* Pre-processing of IGVM file called from sev_common_kvm_init() */
+ if ((cpu_index == 0) && (memory_type == CGS_PAGE_TYPE_VMSA)) {
+ const struct sev_es_save_area *sa =
+ (const struct sev_es_save_area *)ptr;
+ if (len < sizeof(*sa)) {
+ error_setg(errp, "%s: invalid VMSA length encountered",
+ __func__);
+ return -1;
+ }
+ if (check_sev_features(sev_common, sa->sev_features, errp) < 0) {
+ return -1;
+ }
+ sev_common->sev_features = sa->sev_features;
+ }
+ return 0;
+ }
+
+ if (!sev_enabled()) {
+ error_setg(errp, "%s: attempt to configure guest memory, but SEV "
+ "is not enabled", __func__);
+ return -1;
+ }
+
+ switch (memory_type) {
+ case CGS_PAGE_TYPE_NORMAL:
+ case CGS_PAGE_TYPE_ZERO:
+ return klass->launch_update_data(sev_common, gpa, ptr, len, errp);
+
+ case CGS_PAGE_TYPE_VMSA:
+ if (!sev_es_enabled()) {
+ error_setg(errp,
+ "%s: attempt to configure initial VMSA, but SEV-ES "
+ "is not supported",
+ __func__);
+ return -1;
+ }
+ if (check_vmsa_supported(sev_common, gpa,
+ (const struct sev_es_save_area *)ptr,
+ errp) < 0) {
+ return -1;
+ }
+ return sev_set_cpu_context(cpu_index, ptr, len, gpa, errp);
+
+ case CGS_PAGE_TYPE_UNMEASURED:
+ if (sev_snp_enabled()) {
+ return snp_launch_update_data(
+ gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_UNMEASURED, errp);
+ }
+ /* No action required if not SEV-SNP */
+ return 0;
+
+ case CGS_PAGE_TYPE_SECRETS:
+ if (!sev_snp_enabled()) {
+ error_setg(errp,
+ "%s: attempt to configure secrets page, but SEV-SNP "
+ "is not supported",
+ __func__);
+ return -1;
+ }
+ return snp_launch_update_data(gpa, ptr, len,
+ KVM_SEV_SNP_PAGE_TYPE_SECRETS, errp);
+
+ case CGS_PAGE_TYPE_REQUIRED_MEMORY:
+ if (kvm_convert_memory(gpa, len, true) < 0) {
+ error_setg(
+ errp,
+ "%s: failed to configure required memory. gpa: %lX, type: %d",
+ __func__, gpa, memory_type);
+ return -1;
+ }
+ return 0;
+
+ case CGS_PAGE_TYPE_CPUID:
+ if (!sev_snp_enabled()) {
+ error_setg(errp,
+ "%s: attempt to configure CPUID page, but SEV-SNP "
+ "is not supported",
+ __func__);
+ return -1;
+ }
+ return snp_launch_update_cpuid(gpa, ptr, len, errp);
+ }
+ error_setg(errp, "%s: failed to update guest. gpa: %lX, type: %d", __func__,
+ gpa, memory_type);
+ return -1;
+}
+
+static int cgs_get_mem_map_entry(int index,
+ ConfidentialGuestMemoryMapEntry *entry,
+ Error **errp)
+{
+ struct e820_entry *table;
+ int num_entries;
+
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ if (sev_common->state == SEV_STATE_UNINIT) {
+ /* Pre-processing of IGVM file called from sev_common_kvm_init() */
+ return 1;
+ }
+
+ num_entries = e820_get_table(&table);
+ if ((index < 0) || (index >= num_entries)) {
+ return 1;
+ }
+ entry->gpa = table[index].address;
+ entry->size = table[index].length;
+ switch (table[index].type) {
+ case E820_RAM:
+ entry->type = CGS_MEM_RAM;
+ break;
+ case E820_RESERVED:
+ entry->type = CGS_MEM_RESERVED;
+ break;
+ case E820_ACPI:
+ entry->type = CGS_MEM_ACPI;
+ break;
+ case E820_NVS:
+ entry->type = CGS_MEM_NVS;
+ break;
+ case E820_UNUSABLE:
+ entry->type = CGS_MEM_UNUSABLE;
+ break;
+ }
+ return 0;
+}
+
+static int cgs_set_guest_policy(ConfidentialGuestPolicyType policy_type,
+ uint64_t policy, void *policy_data1,
+ uint32_t policy_data1_size, void *policy_data2,
+ uint32_t policy_data2_size, Error **errp)
+{
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ if (sev_common->state == SEV_STATE_UNINIT) {
+ /* Pre-processing of IGVM file called from sev_common_kvm_init() */
+ return 0;
+ }
+
+ if (policy_type != GUEST_POLICY_SEV) {
+ error_setg(errp, "%s: Invalid guest policy type provided for SEV: %d",
+ __func__, policy_type);
+ return -1;
+ }
+ /*
+ * SEV-SNP handles policy differently. The policy flags are defined in
+ * kvm_start_conf.policy and an ID block and ID auth can be provided.
+ */
+ if (sev_snp_enabled()) {
+ SevSnpGuestState *sev_snp_guest =
+ SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs);
+ struct kvm_sev_snp_launch_finish *finish =
+ &sev_snp_guest->kvm_finish_conf;
+
+ /*
+ * The policy consists of flags in 'policy' and optionally an ID block
+ * and ID auth in policy_data1 and policy_data2 respectively. The ID
+ * block and auth are optional so clear any previous ID block and auth
+ * and set them if provided, but always set the policy flags.
+ */
+ g_free(sev_snp_guest->id_block);
+ g_free((guchar *)finish->id_block_uaddr);
+ g_free(sev_snp_guest->id_auth);
+ g_free((guchar *)finish->id_auth_uaddr);
+ sev_snp_guest->id_block = NULL;
+ finish->id_block_uaddr = 0;
+ sev_snp_guest->id_auth = NULL;
+ finish->id_auth_uaddr = 0;
+
+ if (policy_data1_size > 0) {
+ struct sev_snp_id_authentication *id_auth =
+ (struct sev_snp_id_authentication *)policy_data2;
+
+ if (policy_data1_size != KVM_SEV_SNP_ID_BLOCK_SIZE) {
+ error_setg(errp, "%s: Invalid SEV-SNP ID block: incorrect size",
+ __func__);
+ return -1;
+ }
+ if (policy_data2_size != KVM_SEV_SNP_ID_AUTH_SIZE) {
+ error_setg(errp,
+ "%s: Invalid SEV-SNP ID auth block: incorrect size",
+ __func__);
+ return -1;
+ }
+ assert(policy_data1 != NULL);
+ assert(policy_data2 != NULL);
+
+ finish->id_block_uaddr =
+ (__u64)g_memdup2(policy_data1, KVM_SEV_SNP_ID_BLOCK_SIZE);
+ finish->id_auth_uaddr =
+ (__u64)g_memdup2(policy_data2, KVM_SEV_SNP_ID_AUTH_SIZE);
+
+ /*
+ * Check if an author key has been provided and use that to flag
+ * whether the author key is enabled. The first of the author key
+ * must be non-zero to indicate the key type, which will currently
+ * always be 2.
+ */
+ sev_snp_guest->kvm_finish_conf.auth_key_en =
+ id_auth->author_key[0] ? 1 : 0;
+ finish->id_block_en = 1;
+ }
+ sev_snp_guest->kvm_start_conf.policy = policy;
+ } else {
+ SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
+ /* Only the policy flags are supported for SEV and SEV-ES */
+ if ((policy_data1_size > 0) || (policy_data2_size > 0) || !sev_guest) {
+ error_setg(errp, "%s: An ID block/ID auth block has been provided "
+ "but SEV-SNP is not enabled", __func__);
+ return -1;
+ }
+ sev_guest->policy = policy;
+ }
+ return 0;
+}
+
static void
-sev_common_class_init(ObjectClass *oc, void *data)
+sev_common_class_init(ObjectClass *oc, const void *data)
{
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
@@ -2067,6 +2746,8 @@ static void
sev_common_instance_init(Object *obj)
{
SevCommonState *sev_common = SEV_COMMON(obj);
+ ConfidentialGuestSupportClass *cgs =
+ CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(obj);
sev_common->kvm_type = -1;
@@ -2077,6 +2758,12 @@ sev_common_instance_init(Object *obj)
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
&sev_common->reduced_phys_bits,
OBJ_PROP_FLAG_READWRITE);
+ cgs->check_support = cgs_check_support;
+ cgs->set_guest_state = cgs_set_guest_state;
+ cgs->get_mem_map_entry = cgs_get_mem_map_entry;
+ cgs->set_guest_policy = cgs_set_guest_policy;
+
+ QTAILQ_INIT(&sev_common->launch_vmsa);
}
/* sev guest info common to sev/sev-es/sev-snp */
@@ -2088,7 +2775,7 @@ static const TypeInfo sev_common_info = {
.class_size = sizeof(SevCommonStateClass),
.class_init = sev_common_class_init,
.abstract = true,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_USER_CREATABLE },
{ }
}
@@ -2140,7 +2827,7 @@ static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v,
}
static void
-sev_guest_class_init(ObjectClass *oc, void *data)
+sev_guest_class_init(ObjectClass *oc, const void *data)
{
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
@@ -2394,7 +3081,7 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
}
static void
-sev_snp_guest_class_init(ObjectClass *oc, void *data)
+sev_snp_guest_class_init(ObjectClass *oc, const void *data)
{
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
@@ -2404,7 +3091,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
klass->launch_finish = sev_snp_launch_finish;
klass->launch_update_data = sev_snp_launch_update_data;
klass->kvm_init = sev_snp_kvm_init;
- x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features;
+ x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features;
x86_klass->kvm_type = sev_snp_kvm_type;
object_class_property_add(oc, "policy", "uint64",
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 373669e..9db1a80 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -44,6 +44,8 @@ bool sev_snp_enabled(void);
#define SEV_SNP_POLICY_SMT 0x10000
#define SEV_SNP_POLICY_DBG 0x80000
+#define SVM_SEV_FEAT_SNP_ACTIVE 1
+
typedef struct SevKernelLoaderContext {
char *setup_data;
size_t setup_size;
@@ -55,6 +57,128 @@ typedef struct SevKernelLoaderContext {
size_t cmdline_size;
} SevKernelLoaderContext;
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct QEMU_PACKED sev_es_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ uint64_t vmpl0_ssp;
+ uint64_t vmpl1_ssp;
+ uint64_t vmpl2_ssp;
+ uint64_t vmpl3_ssp;
+ uint64_t u_cet;
+ uint8_t reserved_0xc8[2];
+ uint8_t vmpl;
+ uint8_t cpl;
+ uint8_t reserved_0xcc[4];
+ uint64_t efer;
+ uint8_t reserved_0xd8[104];
+ uint64_t xss;
+ uint64_t cr4;
+ uint64_t cr3;
+ uint64_t cr0;
+ uint64_t dr7;
+ uint64_t dr6;
+ uint64_t rflags;
+ uint64_t rip;
+ uint64_t dr0;
+ uint64_t dr1;
+ uint64_t dr2;
+ uint64_t dr3;
+ uint64_t dr0_addr_mask;
+ uint64_t dr1_addr_mask;
+ uint64_t dr2_addr_mask;
+ uint64_t dr3_addr_mask;
+ uint8_t reserved_0x1c0[24];
+ uint64_t rsp;
+ uint64_t s_cet;
+ uint64_t ssp;
+ uint64_t isst_addr;
+ uint64_t rax;
+ uint64_t star;
+ uint64_t lstar;
+ uint64_t cstar;
+ uint64_t sfmask;
+ uint64_t kernel_gs_base;
+ uint64_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ uint64_t cr2;
+ uint8_t reserved_0x248[32];
+ uint64_t g_pat;
+ uint64_t dbgctl;
+ uint64_t br_from;
+ uint64_t br_to;
+ uint64_t last_excp_from;
+ uint64_t last_excp_to;
+ uint8_t reserved_0x298[80];
+ uint32_t pkru;
+ uint32_t tsc_aux;
+ uint8_t reserved_0x2f0[24];
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t reserved_0x320; /* rsp already available at 0x01d8 */
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint8_t reserved_0x380[16];
+ uint64_t guest_exit_info_1;
+ uint64_t guest_exit_info_2;
+ uint64_t guest_exit_int_info;
+ uint64_t guest_nrip;
+ uint64_t sev_features;
+ uint64_t vintr_ctrl;
+ uint64_t guest_exit_code;
+ uint64_t virtual_tom;
+ uint64_t tlb_id;
+ uint64_t pcpu_id;
+ uint64_t event_inj;
+ uint64_t xcr0;
+ uint8_t reserved_0x3f0[16];
+
+ /* Floating point area */
+ uint64_t x87_dp;
+ uint32_t mxcsr;
+ uint16_t x87_ftw;
+ uint16_t x87_fsw;
+ uint16_t x87_fcw;
+ uint16_t x87_fop;
+ uint16_t x87_ds;
+ uint16_t x87_cs;
+ uint64_t x87_rip;
+ uint8_t fpreg_x87[80];
+ uint8_t fpreg_xmm[256];
+ uint8_t fpreg_ymm[256];
+};
+
+struct QEMU_PACKED sev_snp_id_authentication {
+ uint32_t id_key_alg;
+ uint32_t auth_key_algo;
+ uint8_t reserved[56];
+ uint8_t id_block_sig[512];
+ uint8_t id_key[1028];
+ uint8_t reserved2[60];
+ uint8_t id_key_sig[512];
+ uint8_t author_key[1028];
+ uint8_t reserved3[892];
+};
+
bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c
index e68b73a..97e3f0e 100644
--- a/target/i386/tcg/access.c
+++ b/target/i386/tcg/access.c
@@ -3,8 +3,9 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
+#include "exec/target_page.h"
#include "access.h"
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index cda32ee..853b1c8 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2542,7 +2542,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
s->has_modrm = false;
s->prefix = 0;
- next_byte:
+ next_byte:;
+#ifdef TARGET_X86_64
+ /* clear any REX prefix followed by other prefixes. */
+ int rex;
+ rex = -1;
+ next_byte_rex:
+#endif
b = x86_ldub_code(env, s);
/* Collect prefixes. */
@@ -2585,13 +2591,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
#ifdef TARGET_X86_64
case 0x40 ... 0x4f:
if (CODE64(s)) {
- /* REX prefix */
- s->prefix |= PREFIX_REX;
- s->vex_w = (b >> 3) & 1;
- s->rex_r = (b & 0x4) << 1;
- s->rex_x = (b & 0x2) << 2;
- s->rex_b = (b & 0x1) << 3;
- goto next_byte;
+ /*
+ * REX prefix; ignored unless it is the last prefix, so
+ * for now just stash it
+ */
+ rex = b;
+ goto next_byte_rex;
}
break;
#endif
@@ -2618,10 +2623,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
/* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
- | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
+ | PREFIX_LOCK | PREFIX_DATA)) {
goto illegal_op;
}
#ifdef TARGET_X86_64
+ if (rex != -1) {
+ goto illegal_op;
+ }
s->rex_r = (~vex2 >> 4) & 8;
#endif
if (b == 0xc5) {
@@ -2661,6 +2669,16 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
/* Post-process prefixes. */
if (CODE64(s)) {
+#ifdef TARGET_X86_64
+ if (rex != -1) {
+ s->prefix |= PREFIX_REX;
+ s->vex_w = (rex >> 3) & 1;
+ s->rex_r = (rex & 0x4) << 1;
+ s->rex_x = (rex & 0x2) << 2;
+ s->rex_b = (rex & 0x1) << 3;
+ }
+#endif
+
/*
* In 64-bit mode, the default data size is 32-bit. Select 64-bit
* data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
@@ -2704,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (decode.e.check & X86_CHECK_i64) {
goto illegal_op;
}
- if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
+ if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) {
goto illegal_op;
}
} else {
if (decode.e.check & X86_CHECK_o64) {
goto illegal_op;
}
- if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
+ if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) {
goto illegal_op;
}
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4e09e96..1a7fab93 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -19,16 +19,6 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-/*
- * Sometimes, knowing what the backend has can produce better code.
- * The exact opcode to check depends on 32- vs. 64-bit.
- */
-#ifdef TARGET_X86_64
-#define INDEX_op_extract2_tl INDEX_op_extract2_i64
-#else
-#define INDEX_op_extract2_tl INDEX_op_extract2_i32
-#endif
-
#define MMX_OFFSET(reg) \
({ assert((reg) >= 0 && (reg) <= 7); \
offsetof(CPUX86State, fpregs[reg].mmx); })
@@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
break;
case X86_OP_SEG:
/* Note that gen_movl_seg takes care of interrupt shadow and TF. */
- gen_movl_seg(s, op->n, s->T0);
+ gen_movl_seg(s, op->n, v, op->n == R_SS);
break;
case X86_OP_INT:
if (op->has_ea) {
@@ -1813,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
- MemOp mop = MO_TE | MO_128 | MO_ALIGN;
+ MemOp mop = MO_LE | MO_128 | MO_ALIGN;
TCGv_i64 t0, t1;
TCGv_i128 cmp, val;
@@ -1870,10 +1860,10 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode)
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ);
} else {
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
- s->mem_index, MO_TEUQ);
+ s->mem_index, MO_LEUQ);
}
/* Compute the required value of Z. */
@@ -2392,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg)
gen_op_ld_v(s, MO_16, s->T1, s->A0);
/* load the segment here to handle exceptions properly */
- gen_movl_seg(s, seg, s->T1);
+ gen_movl_seg(s, seg, s->T1, false);
}
static void gen_LDS(DisasContext *s, X86DecodedInsn *decode)
@@ -3023,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1)));
while (vec_len > 8) {
vec_len -= 8;
- if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) {
+ if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) {
/*
* Load the next byte of the result into the high byte of T.
* TCG does a similar expansion of deposit to shl+extract2; by
diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index de71e68..6fb8036 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "qemu/log.h"
#include "system/runstate.h"
#include "exec/helper-proto.h"
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index c1184ca..b3b2382 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -22,7 +22,7 @@
#include "cpu.h"
#include "tcg-cpu.h"
#include "exec/cputlb.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "exec/helper-proto.h"
#include "fpu/softfloat.h"
#include "fpu/softfloat-macros.h"
@@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env)
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
set_float_default_nan_pattern(0b11000000, &env->sse_status);
/*
- * TODO: x86 does flush-to-zero detection after rounding (the SDM
+ * x86 does flush-to-zero detection after rounding (the SDM
* section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
* when we detect underflow, which x86 does after rounding).
*/
- set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status);
}
-static inline uint8_t save_exception_flags(CPUX86State *env)
+static inline int save_exception_flags(CPUX86State *env)
{
- uint8_t old_flags = get_float_exception_flags(&env->fp_status);
+ int old_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
return old_flags;
}
-static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
+static void merge_exception_flags(CPUX86State *env, int old_flags)
{
- uint8_t new_flags = get_float_exception_flags(&env->fp_status);
+ int new_flags = get_float_exception_flags(&env->fp_status);
float_raise(old_flags, &env->fp_status);
fpu_set_exception(env,
((new_flags & float_flag_invalid ? FPUS_IE : 0) |
@@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
(new_flags & float_flag_overflow ? FPUS_OE : 0) |
(new_flags & float_flag_underflow ? FPUS_UE : 0) |
(new_flags & float_flag_inexact ? FPUS_PE : 0) |
- (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0)));
+ (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0)));
}
static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
floatx80 ret = floatx80_div(a, b, &env->fp_status);
merge_exception_flags(env, old_flags);
return ret;
@@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
void helper_flds_FT0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val)
void helper_fldl_FT0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val)
void helper_flds_ST0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float32 f;
@@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
void helper_fldl_ST0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float64 f;
@@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val)
uint32_t helper_fsts_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env)
uint64_t helper_fstl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env)
int32_t helper_fist_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env)
int32_t helper_fistl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env)
int64_t helper_fistll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64(ST0, &env->fp_status);
@@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env)
int32_t helper_fistt_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env)
int32_t helper_fisttl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env)
int64_t helper_fisttll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
@@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
void helper_fcom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare(ST0, FT0, &env->fp_status);
@@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env)
void helper_fucom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
@@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
void helper_fcomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env)
void helper_fucomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env)
void helper_fadd_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_add(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env)
void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int v;
target_ulong mem_ref, mem_end;
int64_t val;
@@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = {
void helper_f2xm1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t sig = extractFloatx80Frac(ST0);
int32_t exp = extractFloatx80Exp(ST0);
bool sign = extractFloatx80Sign(ST0);
@@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = {
void helper_fpatan(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env)
void helper_fxtract(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
CPU_LDoubleU temp;
temp.d = ST0;
@@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env)
static void helper_fprem_common(CPUX86State *env, bool mod)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t quotient;
CPU_LDoubleU temp0, temp1;
int exp0, exp1, expdiff;
@@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
void helper_fyl2xp1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env)
void helper_fyl2x(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env)
void helper_fsqrt(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_is_neg(ST0)) {
env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
env->fpus |= 0x400;
@@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env)
void helper_frndint(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_round_to_int(ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fscale(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
floatx80_invalid_encoding(ST0, &env->fp_status)) {
float_raise(float_flag_invalid, &env->fp_status);
@@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env)
} else {
int n;
FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
- uint8_t save_flags = get_float_exception_flags(&env->fp_status);
+ int save_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
set_float_exception_flags(save_flags, &env->fp_status);
@@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env)
/* Set exception flags. */
set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
+ (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) |
(mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
(mxcsr & FPUS_OE ? float_flag_overflow : 0) |
(mxcsr & FPUS_UE ? float_flag_underflow : 0) |
@@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env)
void update_mxcsr_from_sse_status(CPUX86State *env)
{
- uint8_t flags = get_float_exception_flags(&env->sse_status);
- /*
- * The MXCSR denormal flag has opposite semantics to
- * float_flag_input_denormal_flushed (the softfloat code sets that flag
- * only when flushing input denormals to zero, but SSE sets it
- * only when not flushing them to zero), so is not converted
- * here.
- */
+ int flags = get_float_exception_flags(&env->sse_status);
env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
+ (flags & float_flag_input_denormal_used ? FPUS_DE : 0) |
(flags & float_flag_divbyzero ? FPUS_ZE : 0) |
(flags & float_flag_overflow ? FPUS_OE : 0) |
(flags & float_flag_underflow ? FPUS_UE : 0) |
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 54d8453..be011b0 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -20,7 +20,6 @@
#ifndef I386_HELPER_TCG_H
#define I386_HELPER_TCG_H
-#include "exec/exec-all.h"
#include "qemu/host-utils.h"
/* Maximum instruction code size */
@@ -98,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x)
/* misc_helper.c */
void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
-/* sysemu/svm_helper.c */
+/* system/svm_helper.c */
#ifndef CONFIG_USER_ONLY
G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code,
uint64_t exit_info_1, uintptr_t retaddr);
@@ -116,7 +115,7 @@ int exception_has_error_code(int intno);
/* smm_helper.c */
void do_smm_enter(X86CPU *cpu);
-/* sysemu/bpt_helper.c */
+/* system/bpt_helper.c */
bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
/*
diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c
index 1a02e9d..46741d9 100644
--- a/target/i386/tcg/int_helper.c
+++ b/target/i386/tcg/int_helper.c
@@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "qemu/host-utils.h"
#include "exec/helper-proto.h"
#include "qapi/error.h"
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index 3ef84e9..9e7c2d8 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -20,8 +20,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "qemu/int128.h"
#include "qemu/atomic128.h"
#include "tcg/tcg.h"
diff --git a/target/i386/tcg/mpx_helper.c b/target/i386/tcg/mpx_helper.c
index 22423eed..fa8abcc 100644
--- a/target/i386/tcg/mpx_helper.c
+++ b/target/i386/tcg/mpx_helper.c
@@ -20,8 +20,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "exec/target_page.h"
#include "helper-tcg.h"
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 7196211..071f3fb 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -22,12 +22,13 @@
#include "cpu.h"
#include "qemu/log.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "exec/log.h"
#include "helper-tcg.h"
#include "seg_helper.h"
#include "access.h"
+#include "tcg-cpu.h"
#ifdef TARGET_X86_64
#define SET_ESP(val, sp_mask) \
@@ -128,6 +129,22 @@ int get_pg_mode(CPUX86State *env)
return pg_mode;
}
+static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl)
+{
+ int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1;
+ int mmu_index_base =
+ !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
+ (pl < 3 && (env->eflags & AC_MASK)
+ ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX);
+
+ return mmu_index_base + mmu_index_32;
+}
+
+int cpu_mmu_index_kernel(CPUX86State *env)
+{
+ return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK);
+}
+
/* return non zero if error */
static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr,
uint32_t *e2_ptr, int selector,
@@ -309,10 +326,10 @@ static void tss_set_busy(CPUX86State *env, int tss_selector, bool value,
#define SWITCH_TSS_IRET 1
#define SWITCH_TSS_CALL 2
-/* return 0 if switching to a 16-bit selector */
-static int switch_tss_ra(CPUX86State *env, int tss_selector,
- uint32_t e1, uint32_t e2, int source,
- uint32_t next_eip, uintptr_t retaddr)
+static void switch_tss_ra(CPUX86State *env, int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip, bool has_error_code,
+ uint32_t error_code, uintptr_t retaddr)
{
int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, i;
target_ulong tss_base;
@@ -456,10 +473,6 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector,
new_segs[R_GS] = 0;
new_trap = 0;
}
- /* XXX: avoid a compiler warning, see
- http://support.amd.com/us/Processor_TechDocs/24593.pdf
- chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */
- (void)new_trap;
/* clear busy bit (it is restartable) */
if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
@@ -582,14 +595,43 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector,
cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK);
}
#endif
- return type >> 3;
+
+ if (has_error_code) {
+ int cpl = env->hflags & HF_CPL_MASK;
+ StackAccess sa;
+
+ /* push the error code */
+ sa.env = env;
+ sa.ra = retaddr;
+ sa.mmu_index = x86_mmu_index_pl(env, cpl);
+ sa.sp = env->regs[R_ESP];
+ if (env->segs[R_SS].flags & DESC_B_MASK) {
+ sa.sp_mask = 0xffffffff;
+ } else {
+ sa.sp_mask = 0xffff;
+ }
+ sa.ss_base = env->segs[R_SS].base;
+ if (type & 8) {
+ pushl(&sa, error_code);
+ } else {
+ pushw(&sa, error_code);
+ }
+ SET_ESP(sa.sp, sa.sp_mask);
+ }
+
+ if (new_trap) {
+ env->dr[6] |= DR6_BT;
+ raise_exception_ra(env, EXCP01_DB, retaddr);
+ }
}
-static int switch_tss(CPUX86State *env, int tss_selector,
- uint32_t e1, uint32_t e2, int source,
- uint32_t next_eip)
+static void switch_tss(CPUX86State *env, int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip, bool has_error_code,
+ int error_code)
{
- return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0);
+ switch_tss_ra(env, tss_selector, e1, e2, source, next_eip,
+ has_error_code, error_code, 0);
}
static inline unsigned int get_sp_mask(unsigned int e2)
@@ -702,25 +744,8 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
if (!(e2 & DESC_P_MASK)) {
raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2);
}
- shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
- if (has_error_code) {
- /* push the error code on the destination stack */
- cpl = env->hflags & HF_CPL_MASK;
- sa.mmu_index = x86_mmu_index_pl(env, cpl);
- if (env->segs[R_SS].flags & DESC_B_MASK) {
- sa.sp_mask = 0xffffffff;
- } else {
- sa.sp_mask = 0xffff;
- }
- sa.sp = env->regs[R_ESP];
- sa.ss_base = env->segs[R_SS].base;
- if (shift) {
- pushl(&sa, error_code);
- } else {
- pushw(&sa, error_code);
- }
- SET_ESP(sa.sp, sa.sp_mask);
- }
+ switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip,
+ has_error_code, error_code);
return;
}
@@ -1516,7 +1541,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
if (dpl < cpl || dpl < rpl) {
raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
}
- switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, GETPC());
+ switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip,
+ false, 0, GETPC());
break;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@@ -1728,7 +1754,8 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
if (dpl < cpl || dpl < rpl) {
raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
}
- switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, GETPC());
+ switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip,
+ false, 0, GETPC());
return;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@@ -2239,7 +2266,8 @@ void helper_iret_protected(CPUX86State *env, int shift, int next_eip)
if (type != 3) {
raise_exception_err_ra(env, EXCP0A_TSS, tss_selector & 0xfffc, GETPC());
}
- switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, GETPC());
+ switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip,
+ false, 0, GETPC());
} else {
helper_ret_protected(env, shift, 1, 0, GETPC());
}
diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h
index ebf1035..ea98e1a 100644
--- a/target/i386/tcg/seg_helper.h
+++ b/target/i386/tcg/seg_helper.h
@@ -20,6 +20,8 @@
#ifndef SEG_HELPER_H
#define SEG_HELPER_H
+#include "cpu.h"
+
//#define DEBUG_PCALL
#ifdef DEBUG_PCALL
@@ -31,12 +33,12 @@
# define LOG_PCALL_STATE(cpu) do { } while (0)
#endif
+int cpu_mmu_index_kernel(CPUX86State *env);
+
/*
* TODO: Convert callers to compute cpu_mmu_index_kernel once
* and use *_mmuidx_ra directly.
*/
-#define cpu_ldub_kernel_ra(e, p, r) \
- cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
#define cpu_lduw_kernel_ra(e, p, r) \
cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
#define cpu_ldl_kernel_ra(e, p, r) \
@@ -44,8 +46,6 @@
#define cpu_ldq_kernel_ra(e, p, r) \
cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-#define cpu_stb_kernel_ra(e, p, v, r) \
- cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
#define cpu_stw_kernel_ra(e, p, v, r) \
cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
#define cpu_stl_kernel_ra(e, p, v, r) \
@@ -53,12 +53,10 @@
#define cpu_stq_kernel_ra(e, p, v, r) \
cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0)
#define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0)
#define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0)
#define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0)
-#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0)
#define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0)
#define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0)
#define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0)
diff --git a/target/i386/tcg/system/bpt_helper.c b/target/i386/tcg/system/bpt_helper.c
index be232c1..aebb5ca 100644
--- a/target/i386/tcg/system/bpt_helper.c
+++ b/target/i386/tcg/system/bpt_helper.c
@@ -19,8 +19,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "exec/helper-proto.h"
+#include "exec/watchpoint.h"
#include "tcg/helper-tcg.h"
diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c
index 6876329..c162621 100644
--- a/target/i386/tcg/system/excp_helper.c
+++ b/target/i386/tcg/system/excp_helper.c
@@ -19,9 +19,13 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "exec/cputlb.h"
#include "exec/page-protection.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
+#include "exec/tswap.h"
#include "tcg/helper-tcg.h"
typedef struct TranslateParams {
diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c
index ce18c75..9c3f5cc 100644
--- a/target/i386/tcg/system/misc_helper.c
+++ b/target/i386/tcg/system/misc_helper.c
@@ -21,8 +21,9 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/address-spaces.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "exec/cputlb.h"
#include "tcg/helper-tcg.h"
#include "hw/i386/apic.h"
diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c
index b07cc9f..d4ea890 100644
--- a/target/i386/tcg/system/seg_helper.c
+++ b/target/i386/tcg/system/seg_helper.c
@@ -23,7 +23,7 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
#include "../seg_helper.h"
diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c
index f9982b7..b27049b 100644
--- a/target/i386/tcg/system/svm_helper.c
+++ b/target/i386/tcg/system/svm_helper.c
@@ -22,7 +22,7 @@
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/cputlb.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
/* Secure Virtual Machine helpers */
diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c
index 13a3507..0538a4f 100644
--- a/target/i386/tcg/system/tcg-cpu.c
+++ b/target/i386/tcg/system/tcg-cpu.c
@@ -23,7 +23,8 @@
#include "system/system.h"
#include "qemu/units.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "tcg/tcg-cpu.h"
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index b8aff82..6f5dc06 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -23,7 +23,8 @@
#include "qemu/accel.h"
#include "accel/accel-cpu-target.h"
#include "exec/translation-block.h"
-
+#include "exec/target_page.h"
+#include "accel/tcg/cpu-ops.h"
#include "tcg-cpu.h"
/* Frob eflags into and out of the CPU temporary format. */
@@ -47,6 +48,25 @@ static void x86_cpu_exec_exit(CPUState *cs)
env->eflags = cpu_compute_eflags(env);
}
+static TCGTBCPUState x86_get_tb_cpu_state(CPUState *cs)
+{
+ CPUX86State *env = cpu_env(cs);
+ uint32_t flags, cs_base;
+ vaddr pc;
+
+ flags = env->hflags |
+ (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
+ if (env->hflags & HF_CS64_MASK) {
+ cs_base = 0;
+ pc = env->eip;
+ } else {
+ cs_base = env->segs[R_CS].base;
+ pc = (uint32_t)(cs_base + env->eip);
+ }
+
+ return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base };
+}
+
static void x86_cpu_synchronize_from_tb(CPUState *cs,
const TranslationBlock *tb)
{
@@ -94,6 +114,23 @@ static void x86_restore_state_to_opc(CPUState *cs,
}
}
+int x86_mmu_index_pl(CPUX86State *env, unsigned pl)
+{
+ int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1;
+ int mmu_index_base =
+ pl == 3 ? MMU_USER64_IDX :
+ !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
+ (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX;
+
+ return mmu_index_base + mmu_index_32;
+}
+
+static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+ CPUX86State *env = cpu_env(cs);
+ return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK);
+}
+
#ifndef CONFIG_USER_ONLY
static bool x86_debug_check_breakpoint(CPUState *cs)
{
@@ -103,15 +140,36 @@ static bool x86_debug_check_breakpoint(CPUState *cs)
/* RF disables all architectural breakpoints. */
return !(env->eflags & RF_MASK);
}
-#endif
-#include "accel/tcg/cpu-ops.h"
+static void x86_cpu_exec_reset(CPUState *cs)
+{
+ CPUArchState *env = cpu_env(cs);
+
+ cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
+ do_cpu_init(env_archcpu(env));
+ cs->exception_index = EXCP_HALTED;
+}
-static const TCGCPUOps x86_tcg_ops = {
+static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx,
+ vaddr result, vaddr base)
+{
+ return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result;
+}
+#endif
+
+const TCGCPUOps x86_tcg_ops = {
+ .mttcg_supported = true,
+ .precise_smc = true,
+ /*
+ * The x86 has a strong memory model with some store-after-load re-ordering
+ */
+ .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD,
.initialize = tcg_x86_init,
.translate_code = x86_translate_code,
+ .get_tb_cpu_state = x86_get_tb_cpu_state,
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
.restore_state_to_opc = x86_restore_state_to_opc,
+ .mmu_index = x86_cpu_mmu_index,
.cpu_exec_enter = x86_cpu_exec_enter,
.cpu_exec_exit = x86_cpu_exec_exit,
#ifdef CONFIG_USER_ONLY
@@ -120,9 +178,11 @@ static const TCGCPUOps x86_tcg_ops = {
.record_sigbus = x86_cpu_record_sigbus,
#else
.tlb_fill = x86_cpu_tlb_fill,
+ .pointer_wrap = x86_pointer_wrap,
.do_interrupt = x86_cpu_do_interrupt,
.cpu_exec_halt = x86_cpu_exec_halt,
.cpu_exec_interrupt = x86_cpu_exec_interrupt,
+ .cpu_exec_reset = x86_cpu_exec_reset,
.do_unaligned_access = x86_cpu_do_unaligned_access,
.debug_excp_handler = breakpoint_handler,
.debug_check_breakpoint = x86_debug_check_breakpoint,
@@ -130,17 +190,6 @@ static const TCGCPUOps x86_tcg_ops = {
#endif /* !CONFIG_USER_ONLY */
};
-static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc)
-{
- /* for x86, all cpus use the same set of operations */
- cc->tcg_ops = &x86_tcg_ops;
-}
-
-static void x86_tcg_cpu_class_init(CPUClass *cc)
-{
- cc->init_accel_cpu = x86_tcg_cpu_init_ops;
-}
-
static void x86_tcg_cpu_xsave_init(void)
{
#define XO(bit, field) \
@@ -181,7 +230,7 @@ static void x86_tcg_cpu_instance_init(CPUState *cs)
x86_tcg_cpu_xsave_init();
}
-static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data)
+static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data)
{
AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
@@ -189,7 +238,6 @@ static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data)
acc->cpu_target_realize = tcg_cpu_realizefn;
#endif /* CONFIG_USER_ONLY */
- acc->cpu_class_init = x86_tcg_cpu_class_init;
acc->cpu_instance_init = x86_tcg_cpu_instance_init;
}
static const TypeInfo x86_tcg_cpu_accel_type_info = {
diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h
index 53a8494..85bcd61 100644
--- a/target/i386/tcg/tcg-cpu.h
+++ b/target/i386/tcg/tcg-cpu.h
@@ -19,6 +19,8 @@
#ifndef TCG_CPU_H
#define TCG_CPU_H
+#include "cpu.h"
+
#define XSAVE_FCW_FSW_OFFSET 0x000
#define XSAVE_FTW_FOP_OFFSET 0x004
#define XSAVE_CWD_RIP_OFFSET 0x008
@@ -76,6 +78,10 @@ QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFF
QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET);
QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET);
+extern const TCGCPUOps x86_tcg_ops;
+
bool tcg_cpu_realizefn(CPUState *cs, Error **errp);
+int x86_mmu_index_pl(CPUX86State *env, unsigned pl);
+
#endif /* TCG_CPU_H */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1bbf09a..0cb87d0 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -20,11 +20,12 @@
#include "qemu/host-utils.h"
#include "cpu.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-mmu-index.h"
#include "exec/translation-block.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
#include "exec/translator.h"
+#include "exec/target_page.h"
#include "fpu/softfloat.h"
#include "exec/helper-proto.h"
@@ -2024,27 +2025,39 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
/* move SRC to seg_reg and compute if the CPU state may change. Never
call this function with seg_reg == R_CS */
-static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src)
+static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq)
{
if (PE(s) && !VM86(s)) {
TCGv_i32 sel = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(sel, src);
gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel);
- /* abort translation because the addseg value may change or
- because ss32 may change. For R_SS, translation must always
- stop as a special handling must be done to disable hardware
- interrupts for the next instruction */
- if (seg_reg == R_SS) {
- s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
- } else if (CODE32(s) && seg_reg < R_FS) {
+
+ /*
+ * For moves to SS, the SS32 flag may change. For CODE32 only, changes
+ * to SS, DS and ES may change the ADDSEG flags.
+ */
+ if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
s->base.is_jmp = DISAS_EOB_NEXT;
}
} else {
gen_op_movl_seg_real(s, seg_reg, src);
- if (seg_reg == R_SS) {
- s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
- }
+ }
+
+ /*
+ * For MOV or POP to SS (but not LSS) translation must always
+ * stop as a special handling must be done to disable hardware
+ * interrupts for the next instruction.
+ *
+ * This is the last instruction, so it's okay to overwrite
+ * HF_TF_MASK; the next TB will start with the flag set.
+ *
+ * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which
+ * might have been set above.
+ */
+ if (inhibit_irq) {
+ s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
+ s->flags &= ~HF_TF_MASK;
}
}
@@ -2295,7 +2308,7 @@ gen_eob(DisasContext *s, int mode)
if (mode == DISAS_EOB_RECHECK_TF) {
gen_helper_rechecking_single_step(tcg_env);
tcg_gen_exit_tb(NULL, 0);
- } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) {
+ } else if (s->flags & HF_TF_MASK) {
gen_helper_single_step(tcg_env);
} else if (mode == DISAS_JUMP &&
/* give irqs a chance to happen */
diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
index b3bdb78..98fab4cb 100644
--- a/target/i386/tcg/user/excp_helper.c
+++ b/target/i386/tcg/user/excp_helper.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c
index c45f2ac..263f599 100644
--- a/target/i386/tcg/user/seg_helper.c
+++ b/target/i386/tcg/user/seg_helper.c
@@ -21,8 +21,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
#include "tcg/seg_helper.h"
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index 81fdd06..5f4841c 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -83,19 +83,19 @@ static bool whpx_vcpu_thread_is_idle(CPUState *cpu)
return !whpx_apic_in_platform();
}
-static void whpx_accel_ops_class_init(ObjectClass *oc, void *data)
+static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = whpx_start_vcpu_thread;
ops->kick_vcpu_thread = whpx_kick_vcpu_thread;
ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset;
ops->synchronize_post_init = whpx_cpu_synchronize_post_init;
ops->synchronize_state = whpx_cpu_synchronize_state;
ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm;
- ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume;
}
static const TypeInfo whpx_accel_ops_type = {
diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h
index e6cf155..54cfc25 100644
--- a/target/i386/whpx/whpx-accel-ops.h
+++ b/target/i386/whpx/whpx-accel-ops.h
@@ -21,7 +21,6 @@ void whpx_cpu_synchronize_state(CPUState *cpu);
void whpx_cpu_synchronize_post_reset(CPUState *cpu);
void whpx_cpu_synchronize_post_init(CPUState *cpu);
void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu);
-void whpx_cpu_synchronize_pre_resume(bool step_pending);
/* state subset only touched by the VCPU itself during runtime */
#define WHPX_SET_RUNTIME_STATE 1
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index e44d044..22ac609 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -10,8 +10,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/address-spaces.h"
-#include "exec/ioport.h"
+#include "system/address-spaces.h"
+#include "system/ioport.h"
#include "gdbstub/helpers.h"
#include "qemu/accel.h"
#include "system/whpx.h"
@@ -26,6 +26,8 @@
#include "qapi/qapi-types-common.h"
#include "qapi/qapi-visit-common.h"
#include "migration/blocker.h"
+#include "host-cpu.h"
+#include "accel/accel-cpu-target.h"
#include <winerror.h>
#include "whpx-internal.h"
@@ -237,13 +239,12 @@ struct AccelCPUState {
uint64_t tpr;
uint64_t apic_base;
bool interruption_pending;
- bool dirty;
/* Must be the last field as it may have a tail */
WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
};
-static bool whpx_allowed;
+bool whpx_allowed;
static bool whp_dispatch_initialized;
static HMODULE hWinHvPlatform, hWinHvEmulation;
static uint32_t max_vcpu_index;
@@ -836,7 +837,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback(
* The emulator just successfully wrote the register state. We clear the
* dirty state so we avoid the double write on resume of the VP.
*/
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
return hr;
}
@@ -1391,7 +1392,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu)
/* Returns the address of the next instruction that is about to be executed. */
static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
{
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
/* The CPU registers have been modified by other parts of QEMU. */
return cpu_env(cpu)->eip;
} else if (exit_context_valid) {
@@ -1704,9 +1705,9 @@ static int whpx_vcpu_run(CPUState *cpu)
}
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (exclusive_step_mode == WHPX_STEP_NONE) {
@@ -2054,9 +2055,9 @@ static int whpx_vcpu_run(CPUState *cpu)
static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
whpx_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
}
@@ -2064,20 +2065,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
run_on_cpu_data arg)
{
whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
run_on_cpu_data arg)
{
whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
run_on_cpu_data arg)
{
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
/*
@@ -2086,7 +2087,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
void whpx_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -2106,7 +2107,7 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
}
-void whpx_cpu_synchronize_pre_resume(bool step_pending)
+static void whpx_pre_resume_vm(AccelState *as, bool step_pending)
{
whpx_global.step_pending = step_pending;
}
@@ -2226,7 +2227,7 @@ int whpx_init_vcpu(CPUState *cpu)
}
vcpu->interruptable = true;
- vcpu->dirty = true;
+ cpu->vcpu_dirty = true;
cpu->accel = vcpu;
max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
qemu_add_vm_change_state_handler(whpx_cpu_update_state, env);
@@ -2501,11 +2502,33 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
}
}
+static void whpx_cpu_instance_init(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+
+ host_cpu_instance_init(cpu);
+}
+
+static void whpx_cpu_accel_class_init(ObjectClass *oc, const void *data)
+{
+ AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+ acc->cpu_instance_init = whpx_cpu_instance_init;
+}
+
+static const TypeInfo whpx_cpu_accel_type = {
+ .name = ACCEL_CPU_NAME("whpx"),
+
+ .parent = TYPE_ACCEL_CPU,
+ .class_init = whpx_cpu_accel_class_init,
+ .abstract = true,
+};
+
/*
* Partition support
*/
-static int whpx_accel_init(MachineState *ms)
+static int whpx_accel_init(AccelState *as, MachineState *ms)
{
struct whpx_state *whpx;
int ret;
@@ -2689,20 +2712,16 @@ error:
return ret;
}
-int whpx_enabled(void)
-{
- return whpx_allowed;
-}
-
bool whpx_apic_in_platform(void) {
return whpx_global.apic_in_platform;
}
-static void whpx_accel_class_init(ObjectClass *oc, void *data)
+static void whpx_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "WHPX";
ac->init_machine = whpx_accel_init;
+ ac->pre_resume_vm = whpx_pre_resume_vm;
ac->allowed = &whpx_allowed;
object_class_property_add(oc, "kernel-irqchip", "on|off|split",
@@ -2731,6 +2750,7 @@ static const TypeInfo whpx_accel_type = {
static void whpx_type_init(void)
{
type_register_static(&whpx_accel_type);
+ type_register_static(&whpx_cpu_accel_type);
}
bool init_whp_dispatch(void)
diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c
index 630a961..e1ef6d4 100644
--- a/target/i386/whpx/whpx-apic.c
+++ b/target/i386/whpx/whpx-apic.c
@@ -252,7 +252,7 @@ static void whpx_apic_realize(DeviceState *dev, Error **errp)
msi_nonbroken = true;
}
-static void whpx_apic_class_init(ObjectClass *klass, void *data)
+static void whpx_apic_class_init(ObjectClass *klass, const void *data)
{
APICCommonClass *k = APIC_COMMON_CLASS(klass);
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
index 996e9f3..24ab7be 100644
--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
@@ -5,6 +5,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
+#include "exec/tswap.h"
void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
{