diff options
Diffstat (limited to 'target/i386')
71 files changed, 5323 insertions, 1108 deletions
diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index ced1998..a2398c2 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "system/memory_mapping.h" +#include "system/memory.h" /* PAE Paging or IA-32e Paging */ static void walk_pte(MemoryMappingList *list, AddressSpace *as, diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c index b372784..cfb71bf 100644 --- a/target/i386/confidential-guest.c +++ b/target/i386/confidential-guest.c @@ -20,7 +20,7 @@ OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, X86_CONFIDENTIAL_GUEST, CONFIDENTIAL_GUEST_SUPPORT) -static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +static void x86_confidential_guest_class_init(ObjectClass *oc, const void *data) { } diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be76..48b88db 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass { /* <public> */ int (*kvm_type)(X86ConfidentialGuest *cg); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/cpu-apic.c b/target/i386/cpu-apic.c index c1708b0..242a05f 100644 --- a/target/i386/cpu-apic.c +++ b/target/i386/cpu-apic.c @@ -14,7 +14,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "system/xen.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "hw/qdev-properties.h" #include "hw/i386/apic_internal.h" #include "cpu-internal.h" diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h index b0e884c..ebb844b 100644 --- a/target/i386/cpu-param.h +++ b/target/i386/cpu-param.h @@ -22,7 +22,6 @@ #endif #define TARGET_PAGE_BITS 12 -/* The x86 has a strong memory model with some store-after-load re-ordering */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) +#define TARGET_INSN_START_EXTRA_WORDS 1 #endif diff --git a/target/i386/cpu-system.c b/target/i386/cpu-system.c index 55f192e..b1494aa 100644 --- a/target/i386/cpu-system.c +++ b/target/i386/cpu-system.c @@ -24,7 +24,7 @@ #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qom/qom-qobject.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-internal.h" diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3fb1ec6..da7d8dc 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -28,6 +28,7 @@ #include "system/hvf.h" #include "hvf/hvf-i386.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "sev.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -35,13 +36,17 @@ #include "standard-headers/asm-x86/kvm_para.h" #include "hw/qdev-properties.h" #include "hw/i386/topology.h" +#include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" -#include "qapi/qapi-commands-machine-target.h" -#include "exec/address-spaces.h" +#include "qapi/qapi-commands-machine.h" +#include "system/address-spaces.h" #include "hw/boards.h" #include "hw/i386/sgx-epc.h" #endif +#include "system/qtest.h" +#include "tcg/tcg-cpu.h" #include "disas/capstone.h" #include "cpu-internal.h" @@ -63,6 +68,7 @@ struct CPUID2CacheDescriptorInfo { /* * Known CPUID 2 cache descriptors. + * TLB, prefetch and sectored cache related descriptors are not included. * From Intel SDM Volume 2A, CPUID instruction */ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { @@ -84,18 +90,29 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 2, .line_size = 64, }, [0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, .associativity = 8, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x22, 0x23 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x22, 0x23 are not included + */ [0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 16, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x25, 0x20 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x25, 0x29 are not included + */ [0x2C] = { .level = 1, .type = DATA_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, [0x30] = { .level = 1, .type = INSTRUCTION_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, + /* + * Newer Intel CPUs (having the cores without L3, e.g., Intel MTL, ARL) + * use CPUID 0x4 leaf to describe cache topology, by encoding CPUID 0x2 + * leaf with 0xFF. For older CPUs (without 0x4 leaf), it's also valid + * to just ignore L3's code if there's no L3. + * + * This already covers all the cases in QEMU, so code 0x40 is not + * included. + */ [0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, .associativity = 4, .line_size = 32, }, [0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, @@ -112,7 +129,18 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 8, .line_size = 64, }, [0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB, .associativity = 12, .line_size = 64, }, - /* Descriptor 0x49 depends on CPU family/model, so it is not included */ + /* + * Descriptor 0x49 has 2 cases: + * - 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size. + * - 3rd-level cache: 4MB, 16-way set associative, 64-byte line size + * (Intel Xeon processor MP, Family 0FH, Model 06H). + * + * When it represents L3, then it depends on CPU family/model. Fortunately, + * the legacy cache/CPU models don't have such special L3. So, just add it + * to represent the general L2 case. + */ + [0x49] = { .level = 2, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 16, .line_size = 64, }, [0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, .associativity = 12, .line_size = 64, }, [0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, @@ -133,9 +161,10 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 4, .line_size = 64, }, [0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 4, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. + */ [0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, .associativity = 8, .line_size = 64, }, [0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, @@ -196,7 +225,7 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { * Return a CPUID 2 cache descriptor for a given cache. * If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE */ -static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) +static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache, bool *unmacthed) { int i; @@ -213,9 +242,46 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) } } + *unmacthed |= true; return CACHE_DESCRIPTOR_UNAVAILABLE; } +static const CPUCaches legacy_intel_cpuid2_cache_info; + +/* Encode cache info for CPUID[2] */ +static void encode_cache_cpuid2(X86CPU *cpu, + const CPUCaches *caches, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + CPUX86State *env = &cpu->env; + int l1d, l1i, l2, l3; + bool unmatched = false; + + *eax = 1; /* Number of CPUID[EAX=2] calls required */ + *ebx = *ecx = *edx = 0; + + l1d = cpuid2_cache_descriptor(caches->l1d_cache, &unmatched); + l1i = cpuid2_cache_descriptor(caches->l1i_cache, &unmatched); + l2 = cpuid2_cache_descriptor(caches->l2_cache, &unmatched); + l3 = cpuid2_cache_descriptor(caches->l3_cache, &unmatched); + + if (!cpu->consistent_cache || + (env->cpuid_min_level < 0x4 && !unmatched)) { + /* + * Though SDM defines code 0x40 for cases with no L2 or L3. It's + * also valid to just ignore l3's code if there's no l2. + */ + if (cpu->enable_l3_cache) { + *ecx = l3; + } + *edx = (l1d << 16) | (l1i << 8) | l2; + } else { + *ecx = 0; + *edx = CACHE_DESCRIPTOR_UNAVAILABLE; + } +} + /* CPUID Leaf 4 constants: */ /* EAX: */ @@ -283,11 +349,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); + /* + * The following fields have bit-width limitations, so consider the + * maximum values to avoid overflow: + * Bits 25-14: maximum 4095. + * Bits 31-26: maximum 63. + */ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - (max_core_ids_in_package(topo_info) << 26) | - (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); + (MIN(max_core_ids_in_package(topo_info), 63) << 26) | + (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -427,7 +499,6 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) { assert(cache->size % 1024 == 0); - assert(cache->lines_per_tag > 0); assert(cache->associativity > 0); assert(cache->line_size > 0); return ((cache->size / 1024) << 24) | (cache->associativity << 16) | @@ -436,8 +507,8 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) #define ASSOC_FULL 0xFF -/* AMD associativity encoding used on CPUID Leaf 0x80000006: */ -#define AMD_ENC_ASSOC(a) (a <= 1 ? a : \ +/* x86 associativity encoding used on CPUID Leaf 0x80000006: */ +#define X86_ENC_ASSOC(a) (a <= 1 ? a : \ a == 2 ? 0x2 : \ a == 4 ? 0x4 : \ a == 8 ? 0x6 : \ @@ -460,19 +531,18 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, { assert(l2->size % 1024 == 0); assert(l2->associativity > 0); - assert(l2->lines_per_tag > 0); assert(l2->line_size > 0); *ecx = ((l2->size / 1024) << 16) | - (AMD_ENC_ASSOC(l2->associativity) << 12) | + (X86_ENC_ASSOC(l2->associativity) << 12) | (l2->lines_per_tag << 8) | (l2->line_size); + /* For Intel, EDX is reserved. */ if (l3) { assert(l3->size % (512 * 1024) == 0); assert(l3->associativity > 0); - assert(l3->lines_per_tag > 0); assert(l3->line_size > 0); *edx = ((l3->size / (512 * 1024)) << 18) | - (AMD_ENC_ASSOC(l3->associativity) << 12) | + (X86_ENC_ASSOC(l3->associativity) << 12) | (l3->lines_per_tag << 8) | (l3->line_size); } else { *edx = 0; @@ -490,7 +560,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; + /* Bits 25:14 - NumSharingCache: maximum 4095. */ + *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -570,117 +641,172 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * These are legacy cache values. If there is a need to change any * of these values please use builtin_x86_defs */ - -/* L1 data cache: */ -static CPUCacheInfo legacy_l1d_cache = { - .type = DATA_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1d_cache_amd = { - .type = DATA_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* L1 instruction cache: */ -static CPUCacheInfo legacy_l1i_cache = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1i_cache_amd = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* Level 2 unified cache: */ -static CPUCacheInfo legacy_l2_cache = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 4 * MiB, - .self_init = 1, - .line_size = 64, - .associativity = 16, - .sets = 4096, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -static CPUCacheInfo legacy_l2_cache_cpuid2 = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 2 * MiB, - .line_size = 64, - .associativity = 8, - .share_level = CPU_TOPOLOGY_LEVEL_INVALID, +static const CPUCaches legacy_amd_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .lines_per_tag = 1, + .associativity = 16, + .sets = 512, + .partitions = 1, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; - -/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l2_cache_amd = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 512 * KiB, - .line_size = 64, - .lines_per_tag = 1, - .associativity = 16, - .sets = 512, - .partitions = 1, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, +/* + * Only used for the CPU models with CPUID level < 4. + * These CPUs (CPUID level < 4) only use CPUID leaf 2 to present + * cache information. + * + * Note: This cache model is just a default one, and is not + * guaranteed to match real hardwares. + */ +static const CPUCaches legacy_intel_cpuid2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 2 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; -/* Level 3 unified cache: */ -static CPUCacheInfo legacy_l3_cache = { - .type = UNIFIED_CACHE, - .level = 3, - .size = 16 * MiB, - .line_size = 64, - .associativity = 16, - .sets = 16384, - .partitions = 1, - .lines_per_tag = 1, - .self_init = true, - .inclusive = true, - .complex_indexing = true, - .share_level = CPU_TOPOLOGY_LEVEL_DIE, +static const CPUCaches legacy_intel_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 4 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; /* TLB definitions: */ @@ -774,11 +900,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \ CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \ - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE) + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \ + CPUID_HT) /* partly implemented: CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: - CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_TM, CPUID_PBE */ /* * Kernel-only features that can be shown to usermode programs even if @@ -846,7 +973,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ - CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES | \ + CPUID_EXT3_CMP_LEG) #define TCG_EXT4_FEATURES 0 @@ -895,6 +1023,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_ECX_FEATURES 0 #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 @@ -920,6 +1049,17 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_KERNEL_FEATURES) +#if defined CONFIG_USER_ONLY +#define CPUID_8000_0021_EAX_KERNEL_FEATURES CPUID_8000_0021_EAX_AUTO_IBRS +#else +#define CPUID_8000_0021_EAX_KERNEL_FEATURES 0 +#endif + +#define TCG_8000_0021_EAX_FEATURES ( \ + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | \ + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | \ + CPUID_8000_0021_EAX_KERNEL_FEATURES) + FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_1_EDX] = { .type = CPUID_FEATURE_WORD, @@ -1134,6 +1274,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "msr-imm", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_ECX, + }, + .tcg_features = TCG_7_1_ECX_FEATURES, + }, [FEAT_7_1_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1237,17 +1396,17 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, NULL, NULL, "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, - .tcg_features = 0, + .tcg_features = TCG_8000_0021_EAX_FEATURES, .unmigratable_flags = 0, }, [FEAT_8000_0021_EBX] = { @@ -1370,6 +1529,14 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "bhi-no", NULL, NULL, NULL, "pbrsb-no", NULL, "gds-no", "rfds-no", "rfds-clear", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, "its-no", NULL, }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, @@ -1654,14 +1821,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} static FeatureDep feature_dependencies[] = { { @@ -1773,10 +1947,6 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, - { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, }, @@ -1831,9 +2001,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -1899,7 +2066,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted) static inline bool accel_uses_host_cpuid(void) { - return kvm_enabled() || hvf_enabled(); + return !tcg_enabled() && !qtest_enabled(); } static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) @@ -2183,6 +2350,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2291,6 +2512,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2399,6 +2674,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2453,6 +2782,486 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + } +}; + +static const CPUCaches xeon_spr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 15, + + /* CPUID 0x4.0x3.ECX */ + .sets = 65536, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 60 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_gnr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 294912, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 288 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_srf_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 128, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 4096, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 4 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_MODULE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x3.ECX */ + .sets = 147456, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 108 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches yongfeng_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.ECX */ + .lines_per_tag = 1, + .size = 32 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.EDX */ + .lines_per_tag = 1, + .size = 64 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x2.ECX */ + .sets = 512, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = true, + .complex_indexing = false, + + /* CPUID 0x80000006.ECX */ + .size = 256 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 8192, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = true, + .inclusive = true, + .complex_indexing = false, + + .size = 8 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -2705,6 +3514,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { I486_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium", @@ -2717,6 +3527,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium2", @@ -2729,6 +3540,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM2_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium3", @@ -2741,6 +3553,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM3_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "athlon", @@ -4273,6 +5086,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .note = "with spr-sp cache model and 0x1f leaf", + .cache_info = &xeon_spr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, @@ -4426,6 +5248,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with gnr-sp cache model and 0x1f leaf", + .cache_info = &xeon_gnr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -4571,6 +5402,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with srf-sp cache model and 0x1f leaf", + .cache_info = &xeon_srf_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -5210,6 +6050,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, @@ -5348,6 +6207,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, @@ -5423,6 +6301,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, @@ -5497,6 +6394,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", @@ -5631,9 +6553,101 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with the cache model and 0x1f leaf", + .cache_info = &yongfeng_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, + }, }; /* @@ -5701,13 +6715,14 @@ static void max_x86_cpu_realize(DeviceState *dev, Error **errp) x86_cpu_realizefn(dev, errp); } -static void max_x86_cpu_class_init(ObjectClass *oc, void *data) +static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); X86CPUClass *xcc = X86_CPU_CLASS(oc); xcc->ordering = 9; + xcc->max_features = true; xcc->model_description = "Enables all features supported by the accelerator in the current host"; @@ -5718,22 +6733,21 @@ static void max_x86_cpu_class_init(ObjectClass *oc, void *data) static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); - - /* We can't fill the features array here because we don't know yet if - * "migratable" is true or false. - */ - cpu->max_features = true; - object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); + CPUX86State *env = &cpu->env; /* - * these defaults are used for TCG and all other accelerators - * besides KVM and HVF, which overwrite these values + * these defaults are used for TCG, other accelerators have overwritten + * these values */ - object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, - &error_abort); - object_property_set_str(OBJECT(cpu), "model-id", - "QEMU TCG CPU version " QEMU_HW_VERSION, - &error_abort); + if (!env->cpuid_vendor1) { + object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, + &error_abort); + } + if (!env->cpuid_model[0]) { + object_property_set_str(OBJECT(cpu), "model-id", + "QEMU TCG CPU version " QEMU_HW_VERSION, + &error_abort); + } } static const TypeInfo max_x86_cpu_type_info = { @@ -5743,7 +6757,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5752,11 +6766,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } @@ -5776,12 +6794,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5794,7 +6813,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, @@ -5812,10 +6859,7 @@ static void x86_cpuid_version_get_family(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 8) & 0xf; - if (value == 0xf) { - value += (env->cpuid_version >> 20) & 0xff; - } + value = x86_cpu_family(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5853,8 +6897,7 @@ static void x86_cpuid_version_get_model(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 4) & 0xf; - value |= ((env->cpuid_version >> 16) & 0xf) << 4; + value = x86_cpu_model(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5888,7 +6931,7 @@ static void x86_cpuid_version_get_stepping(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = env->cpuid_version & 0xf; + value = x86_cpu_stepping(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5956,11 +6999,11 @@ static char *x86_cpuid_get_model_id(Object *obj, Error **errp) char *value; int i; - value = g_malloc(48 + 1); - for (i = 0; i < 48; i++) { + value = g_malloc(CPUID_MODEL_ID_SZ + 1); + for (i = 0; i < CPUID_MODEL_ID_SZ; i++) { value[i] = env->cpuid_model[i >> 2] >> (8 * (i & 3)); } - value[48] = '\0'; + value[CPUID_MODEL_ID_SZ] = '\0'; return value; } @@ -5975,7 +7018,7 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id, model_id = ""; } len = strlen(model_id); - memset(env->cpuid_model, 0, 48); + memset(env->cpuid_model, 0, CPUID_MODEL_ID_SZ); for (i = 0; i < 48; i++) { if (i >= len) { c = '\0'; @@ -6238,7 +7281,7 @@ static void listflags(GList *features) } /* Sort alphabetically by type name, respecting X86CPUClass::ordering. */ -static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) +static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b, gpointer d) { ObjectClass *class_a = (ObjectClass *)a; ObjectClass *class_b = (ObjectClass *)b; @@ -6259,7 +7302,7 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) static GSList *get_sorted_cpu_model_list(void) { GSList *list = object_class_get_list(TYPE_X86_CPU, false); - list = g_slist_sort(list, x86_cpu_list_compare); + list = g_slist_sort_with_data(list, x86_cpu_list_compare, NULL); return list; } @@ -6316,8 +7359,13 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data) qemu_printf(" %-20s %s\n", name, desc); } +static gint strcmp_wrap(gconstpointer a, gconstpointer b, gpointer d) +{ + return strcmp(a, b); +} + /* list available CPU models and flags */ -void x86_cpu_list(void) +static void x86_cpu_list(void) { int i, j; GSList *list; @@ -6338,7 +7386,7 @@ void x86_cpu_list(void) } } - names = g_list_sort(names, (GCompareFunc)strcmp); + names = g_list_sort_with_data(names, strcmp_wrap, NULL); qemu_printf("\nRecognized CPUID flags:\n"); listflags(names); @@ -6700,7 +7748,7 @@ static const gchar *x86_gdb_arch_name(CPUState *cs) #endif } -static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data) +static void x86_cpu_cpudef_class_init(ObjectClass *oc, const void *data) { const X86CPUModel *model = data; X86CPUClass *xcc = X86_CPU_CLASS(oc); @@ -6830,14 +7878,35 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *edx = env->features[FEAT_1_EDX]; if (threads_per_pkg > 1) { - *ebx |= threads_per_pkg << 16; - } - if (!cpu->enable_pmu) { - *ecx &= ~CPUID_EXT_PDCM; + uint32_t num; + + /* + * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor + * count, but Intel needs maximum number of addressable IDs for + * logical processors per package. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + num = 1 << apicid_pkg_offset(topo_info); + } else { + num = threads_per_pkg; + } + + /* Fixup overflow: max value for bits 23-16 is 255. */ + *ebx |= MIN(num, 255) << 16; } break; - case 2: - /* cache info: needed for Pentium Pro compatibility */ + case 2: { /* cache info: needed for Pentium Pro compatibility */ + const CPUCaches *caches; + + if (env->enable_legacy_cpuid2_cache) { + caches = &legacy_intel_cpuid2_cache_info; + } else if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; @@ -6845,18 +7914,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = *ebx = *ecx = *edx = 0; break; } - *eax = 1; /* Number of CPUID[EAX=2] calls required */ - *ebx = 0; - if (!cpu->enable_l3_cache) { - *ecx = 0; + encode_cache_cpuid2(cpu, caches, eax, ebx, ecx, edx); + break; + } + case 4: { + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; } else { - *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache); + caches = &env->cache_info; } - *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache)); - break; - case 4: + /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); @@ -6868,13 +7937,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(topo_info) << 26; + *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; /* Share the cache at package level. */ - *eax |= max_thread_ids_for_cache(topo_info, - CPU_TOPOLOGY_LEVEL_SOCKET) << 14; + *eax |= MIN(max_thread_ids_for_cache(topo_info, + CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { @@ -6884,30 +7953,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - topo_info, + encode_cache_cpuid4(caches->l1d_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 1: /* L1 icache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - topo_info, + encode_cache_cpuid4(caches->l1i_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 2: /* L2 cache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - topo_info, + encode_cache_cpuid4(caches->l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ if (cpu->enable_l3_cache) { - encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - topo_info, + encode_cache_cpuid4(caches->l3_cache, topo_info, eax, ebx, ecx, edx); break; } @@ -6918,6 +7983,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } break; + } case 5: /* MONITOR/MWAIT Leaf */ *eax = cpu->mwait.eax; /* Smallest monitor-line size in bytes */ @@ -6945,9 +8011,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *ecx = env->features[FEAT_7_1_ECX]; *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; - *ecx = 0; } else if (count == 2) { *edx = env->features[FEAT_7_2_EDX]; *eax = 0; @@ -7008,21 +8074,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; - case 0x1C: - if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { - x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); - *edx = 0; - } - break; - case 0x1F: - /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { - *eax = *ebx = *ecx = *edx = 0; - break; - } - - encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); - break; case 0xD: { /* Processor Extended State */ *eax = 0; @@ -7163,6 +8214,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1C: + if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; case 0x1D: { /* AMX TILE, for now hardcoded for Sapphire Rapids*/ *eax = 0; @@ -7200,6 +8257,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1F: + /* V2 Extended Topology Enumeration Leaf */ + if (!x86_has_cpuid_0x1f(cpu)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); + break; case 0x24: { *eax = 0; *ebx = 0; @@ -7236,9 +8302,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000000: *eax = env->cpuid_xlevel; - *ebx = env->cpuid_vendor1; - *edx = env->cpuid_vendor2; - *ecx = env->cpuid_vendor3; + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ebx = *ecx = *edx = 0; + } else { + *ebx = env->cpuid_vendor1; + *edx = env->cpuid_vendor2; + *ecx = env->cpuid_vendor3; + } break; case 0x80000001: *eax = env->cpuid_version; @@ -7246,7 +8318,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->features[FEAT_8000_0001_ECX]; *edx = env->features[FEAT_8000_0001_EDX]; - if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + if (tcg_enabled() && IS_INTEL_CPU(env) && !(env->hflags & HF_LMA_MASK)) { *edx &= ~CPUID_EXT2_SYSCALL; } @@ -7259,41 +8331,78 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2]; *edx = env->cpuid_model[(index - 0x80000002) * 4 + 3]; break; - case 0x80000005: - /* cache info (L1 cache) */ + case 0x80000005: { + /* cache info (L1 cache/TLB Associativity Field) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } + + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); - *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); + *ecx = encode_cache_cpuid80000005(caches->l1d_cache); + *edx = encode_cache_cpuid80000005(caches->l1i_cache); break; - case 0x80000006: - /* cache info (L2 cache) */ + } + case 0x80000006: { /* cache info (L2 cache/TLB/L3 cache) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *eax = *ebx = 0; + encode_cache_cpuid80000006(caches->l2_cache, + NULL, ecx, edx); + break; + } + + *eax = (X86_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | + *ebx = (X86_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); - encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, + + encode_cache_cpuid80000006(caches->l2_cache, cpu->enable_l3_cache ? - env->cache_info_amd.l3_cache : NULL, + caches->l3_cache : NULL, ecx, edx); break; + } case 0x80000007: *eax = 0; - *ebx = env->features[FEAT_8000_0007_EBX]; + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *ebx = 0; + } else { + *ebx = env->features[FEAT_8000_0007_EBX]; + } *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; @@ -7306,6 +8415,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; + + /* + * Don't emulate Bits [7:0] & Bits [15:12] for Intel/Zhaoxin, since + * they're using 0x1f leaf. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ecx = *edx = 0; + break; + } + if (threads_per_pkg > 1) { /* * Bits 15:12 is "The number of bits in the initial @@ -7341,19 +8461,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, + encode_cache_cpuid8000001d(env->cache_info.l1d_cache, topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, + encode_cache_cpuid8000001d(env->cache_info.l1i_cache, topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, + encode_cache_cpuid8000001d(env->cache_info.l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, + encode_cache_cpuid8000001d(env->cache_info.l3_cache, topo_info, eax, ebx, ecx, edx); break; default: /* end of info */ @@ -7374,6 +8494,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0x8000001F: + *eax = *ebx = *ecx = *edx = 0; + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + case 0x80000021: + *eax = *ebx = *ecx = *edx = 0; + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = env->features[FEAT_8000_0021_EBX]; + break; case 0x80000022: *eax = *ebx = *ecx = *edx = 0; /* AMD Extended Performance Monitoring and Debug */ @@ -7406,21 +8541,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; - case 0x8000001F: - *eax = *ebx = *ecx = *edx = 0; - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; - *eax |= sev_snp_enabled() ? 0x10 : 0; - *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - case 0x80000021: - *eax = *ebx = *ecx = *edx = 0; - *eax = env->features[FEAT_8000_0021_EAX]; - *ebx = env->features[FEAT_8000_0021_EBX]; - break; default: /* reserved values: zero */ *eax = 0; @@ -7640,7 +8760,7 @@ static void mce_init(X86CPU *cpu) CPUX86State *cenv = &cpu->env; unsigned int bank; - if (((cenv->cpuid_version >> 8) & 0xf) >= 6 + if (x86_cpu_family(cenv->cpuid_version) >= 6 && (cenv->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF | @@ -7768,6 +8888,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) */ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; FeatureWord w; int i; @@ -7787,12 +8908,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - /*TODO: Now cpu->max_features doesn't overwrite features + /* TODO: Now xcc->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->max_features) { + if (xcc->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { /* Override only features that weren't set explicitly * by the user. @@ -7824,6 +8945,14 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } + /* PDCM is fixed1 bit for TDX */ + if (!cpu->enable_pmu && !is_tdx_vm()) { + mark_unavailable_features(cpu, FEAT_1_ECX, + env->user_features[FEAT_1_ECX] & CPUID_EXT_PDCM, + "This feature is not available due to PMU being disabled"); + env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; + } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { FeatureDep *d = &feature_dependencies[i]; if (!(env->features[d->from.index] & d->from.mask)) { @@ -7852,6 +8981,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); @@ -7880,7 +9010,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8052,46 +9182,34 @@ static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu, level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1d_cache->share_level = level; - env->cache_info_amd.l1d_cache->share_level = level; + env->cache_info.l1d_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_cpuid4.l1d_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_amd.l1d_cache->share_level); + env->cache_info.l1d_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1i_cache->share_level = level; - env->cache_info_amd.l1i_cache->share_level = level; + env->cache_info.l1i_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_cpuid4.l1i_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_amd.l1i_cache->share_level); + env->cache_info.l1i_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l2_cache->share_level = level; - env->cache_info_amd.l2_cache->share_level = level; + env->cache_info.l2_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_cpuid4.l2_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_amd.l2_cache->share_level); + env->cache_info.l2_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l3_cache->share_level = level; - env->cache_info_amd.l3_cache->share_level = level; + env->cache_info.l3_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_cpuid4.l3_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_amd.l3_cache->share_level); + env->cache_info.l3_cache->share_level); } if (!machine_check_smp_cache(ms, errp)) { @@ -8115,6 +9233,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) tcg_cflags_set(cs, CF_PCREL); #endif + /* + * x-vendor-cpuid-only and v2 should be initernal only. But + * QEMU doesn't support "internal" property. + */ + if (!cpu->vendor_cpuid_only && cpu->vendor_cpuid_only_v2) { + error_setg(errp, "x-vendor-cpuid-only-v2 property " + "depends on x-vendor-cpuid-only"); + return; + } + if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; @@ -8318,24 +9446,22 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) "CPU model '%s' doesn't support legacy-cache=off", name); return; } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *cache_info; + env->cache_info = *cache_info; } else { /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2; - env->cache_info_cpuid2.l3_cache = &legacy_l3_cache; + if (!cpu->consistent_cache) { + env->enable_legacy_cpuid2_cache = true; + } - env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid4.l2_cache = &legacy_l2_cache; - env->cache_info_cpuid4.l3_cache = &legacy_l3_cache; + if (!cpu->vendor_cpuid_only_v2) { + env->enable_legacy_vendor_cache = true; + } - env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd; - env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd; - env->cache_info_amd.l2_cache = &legacy_l2_cache_amd; - env->cache_info_amd.l3_cache = &legacy_l3_cache; + if (IS_AMD_CPU(env)) { + env->cache_info = legacy_amd_cache_info; + } else { + env->cache_info = legacy_intel_cache_info; + } } #ifndef CONFIG_USER_ONLY @@ -8514,7 +9640,12 @@ static void x86_cpu_post_initfn(Object *obj) } } - accel_cpu_instance_init(CPU(obj)); +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif } static void x86_cpu_init_default_topo(X86CPU *cpu) @@ -8583,6 +9714,8 @@ static void x86_cpu_initfn(Object *obj) if (xcc->model) { x86_cpu_load_model(cpu, xcc->model); } + + accel_cpu_instance_init(CPU(obj)); } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -8663,39 +9796,6 @@ static bool x86_cpu_has_work(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -int x86_mmu_index_pl(CPUX86State *env, unsigned pl) -{ - int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; - int mmu_index_base = - pl == 3 ? MMU_USER64_IDX : - !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : - (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; - - return mmu_index_base + mmu_index_32; -} - -static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - CPUX86State *env = cpu_env(cs); - return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK); -} - -static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl) -{ - int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; - int mmu_index_base = - !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : - (pl < 3 && (env->eflags & AC_MASK) - ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX); - - return mmu_index_base + mmu_index_32; -} - -int cpu_mmu_index_kernel(CPUX86State *env) -{ - return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK); -} - static void x86_disas_set_info(CPUState *cs, disassemble_info *info) { X86CPU *cpu = X86_CPU(cs); @@ -8862,6 +9962,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only-v2", X86CPU, vendor_cpuid_only_v2, true), DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), @@ -8876,6 +9977,7 @@ static const Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("x-consistent-cache", X86CPU, consistent_cache, true), DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false), DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), @@ -8897,6 +9999,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), + DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false), }; #ifndef CONFIG_USER_ONLY @@ -8917,7 +10020,7 @@ static const struct SysemuCPUOps i386_sysemu_ops = { }; #endif -static void x86_cpu_common_class_init(ObjectClass *oc, void *data) +static void x86_cpu_common_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -8936,8 +10039,8 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) cc->reset_dump_flags = CPU_DUMP_FPU | CPU_DUMP_CCOP; cc->class_by_name = x86_cpu_class_by_name; + cc->list_cpus = x86_cpu_list; cc->parse_features = x86_cpu_parse_featurestr; - cc->mmu_index = x86_cpu_mmu_index; cc->dump_state = x86_cpu_dump_state; cc->set_pc = x86_cpu_set_pc; cc->get_pc = x86_cpu_get_pc; @@ -8948,6 +10051,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) #ifndef CONFIG_USER_ONLY cc->sysemu_ops = &i386_sysemu_ops; #endif /* !CONFIG_USER_ONLY */ +#ifdef CONFIG_TCG + cc->tcg_ops = &x86_tcg_ops; +#endif /* CONFIG_TCG */ cc->gdb_arch_name = x86_gdb_arch_name; #ifdef TARGET_X86_64 @@ -9014,7 +10120,7 @@ static const TypeInfo x86_cpu_type_info = { }; /* "base" CPU model, used by query-cpu-model-expansion */ -static void x86_cpu_base_class_init(ObjectClass *oc, void *data) +static void x86_cpu_base_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 119efc6..f977fc4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -23,7 +23,9 @@ #include "system/tcg.h" #include "cpu-qom.h" #include "kvm/hyperv-proto.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "exec/memop.h" #include "hw/i386/topology.h" #include "qapi/qapi-types-common.h" @@ -33,12 +35,6 @@ #define XEN_NR_VIRQS 24 -#define KVM_HAVE_MCE_INJECTION 1 - -/* support for self modifying code even if the modified instruction is - close to the modifying instruction */ -#define TARGET_HAS_PRECISE_SMC - #ifdef TARGET_X86_64 #define I386_ELF_MACHINE EM_X86_64 #define ELF_MACHINE_UNAME "x86_64" @@ -588,6 +584,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -601,6 +598,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -623,6 +621,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -665,12 +668,22 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); @@ -903,6 +916,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -924,6 +939,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ @@ -961,6 +978,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ @@ -982,6 +1001,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Linear Address Masking */ #define CPUID_7_1_EAX_LAM (1U << 26) +/* The immediate form of MSR access instructions */ +#define CPUID_7_1_ECX_MSR_IMM (1U << 5) + /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ @@ -1005,6 +1027,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_2_EDX_DDPD_U (1U << 3) /* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ #define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) @@ -1074,12 +1097,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ @@ -1104,6 +1131,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) @@ -1131,7 +1159,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* PMM enabled */ #define CPUID_C000_0001_EDX_PMM_EN (1U << 13) -#define CPUID_VENDOR_SZ 12 +#define CPUID_VENDOR_SZ 12 +#define CPUID_MODEL_ID_SZ 48 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ @@ -1610,8 +1639,6 @@ typedef struct { #define MAX_FIXED_COUNTERS 3 #define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0) -#define TARGET_INSN_START_EXTRA_WORDS 1 - #define NB_OPMASK_REGS 8 /* CPU can't have 0xFFFFFFFF APIC ID, use that value to distinguish @@ -1747,12 +1774,6 @@ typedef enum TPRAccess { /* Cache information data structures: */ -enum CacheType { - DATA_CACHE, - INSTRUCTION_CACHE, - UNIFIED_CACHE -}; - typedef struct CPUCacheInfo { enum CacheType type; uint8_t level; @@ -1811,11 +1832,6 @@ typedef struct CPUCaches { CPUCacheInfo *l3_cache; } CPUCaches; -typedef struct X86LazyFlags { - target_ulong result; - target_ulong auxbits; -} X86LazyFlags; - typedef struct CPUArchState { /* standard registers */ target_ulong regs[CPU_NB_REGS]; @@ -2057,11 +2073,14 @@ typedef struct CPUArchState { /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; - /* Cache information for CPUID. When legacy-cache=on, the cache data + /* + * Cache information for CPUID. When legacy-cache=on, the cache data * on each CPUID leaf will be different, because we keep compatibility * with old QEMU versions. */ - CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd; + CPUCaches cache_info; + bool enable_legacy_cpuid2_cache; + bool enable_legacy_vendor_cache; /* MTRRs */ uint64_t mtrr_fixed[11]; @@ -2108,7 +2127,6 @@ typedef struct CPUArchState { QemuMutex xen_timers_lock; #endif #if defined(CONFIG_HVF) - X86LazyFlags lflags; void *emu_mmio_buf; #endif @@ -2182,7 +2200,6 @@ struct ArchCPU { bool expose_tcg; bool migratable; bool migrate_smi_count; - bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to @@ -2204,6 +2221,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2242,6 +2262,13 @@ struct ArchCPU { */ bool legacy_cache; + /* + * Compatibility bits for old machine types. + * If true, use the same cache model in CPUID leaf 0x2 + * and 0x4. + */ + bool consistent_cache; + /* Compatibility bits for old machine types. * If true decode the CPUID Function 0x8000001E_ECX to support multiple * nodes per processor @@ -2251,12 +2278,24 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool force_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; - /* Only advertise CPUID leaves defined by the vendor */ + /* + * Compatibility bits for old machine types (PC machine v6.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ bool vendor_cpuid_only; + /* + * Compatibility bits for old machine types (PC machine v10.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ + bool vendor_cpuid_only_v2; + /* Only advertise TOPOEXT features that AMD defines */ bool amd_topoext_features_only; @@ -2329,6 +2368,7 @@ struct X86CPUClass { */ const X86CPUModel *model; + bool max_features; /* Enable all supported features automatically */ bool host_cpuid_required; int ordering; bool migration_safe; @@ -2367,7 +2407,6 @@ int x86_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void x86_cpu_gdb_init(CPUState *cs); -void x86_cpu_list(void); int cpu_x86_support_mca_broadcast(CPUX86State *env); #ifndef CONFIG_USER_ONLY @@ -2398,7 +2437,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, SegmentCache *sc; unsigned int new_hflags; - sc = &env->segs[seg_reg]; + if (seg_reg == R_LDTR) { + sc = &env->ldt; + } else if (seg_reg == R_TR) { + sc = &env->tr; + } else { + sc = &env->segs[seg_reg]; + } + sc->selector = selector; sc->base = base; sc->limit = limit; @@ -2512,6 +2558,17 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); + +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->force_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); @@ -2561,8 +2618,6 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("qemu32") #endif -#define cpu_list x86_cpu_list - /* MMU modes definitions */ #define MMU_KSMAP64_IDX 0 #define MMU_KSMAP32_IDX 1 @@ -2597,35 +2652,17 @@ static inline bool is_mmu_index_32(int mmu_index) return mmu_index & 1; } -int x86_mmu_index_pl(CPUX86State *env, unsigned pl); -int cpu_mmu_index_kernel(CPUX86State *env); - #define CC_DST (env->cc_dst) #define CC_SRC (env->cc_src) #define CC_SRC2 (env->cc_src2) #define CC_OP (env->cc_op) -#include "exec/cpu-all.h" #include "svm.h" #if !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif -static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *flags = env->hflags | - (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); - if (env->hflags & HF_CS64_MASK) { - *cs_base = 0; - *pc = env->eip; - } else { - *cs_base = env->segs[R_CS].base; - *pc = (uint32_t)(*cs_base + env->eip); - } -} - void do_cpu_init(X86CPU *cpu); #define MCE_INJECT_BROADCAST 1 @@ -2660,6 +2697,36 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env) } } +static inline uint32_t x86_cpu_family(uint32_t eax) +{ + uint32_t family = (eax >> 8) & 0xf; + + if (family == 0xf) { + family += (eax >> 20) & 0xff; + } + + return family; +} + +static inline uint32_t x86_cpu_model(uint32_t eax) +{ + uint32_t family, model; + + family = x86_cpu_family(eax); + model = (eax >> 4) & 0xf; + + if (family >= 0x6) { + model += ((eax >> 16) & 0xf) << 4; + } + + return model; +} + +static inline uint32_t x86_cpu_stepping(uint32_t eax) +{ + return eax & 0xf; +} + static inline bool cpu_has_vmx(CPUX86State *env) { return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; diff --git a/target/i386/emulate/x86_decode.c b/target/i386/emulate/x86_decode.c index 7fee219..2eca398 100644 --- a/target/i386/emulate/x86_decode.c +++ b/target/i386/emulate/x86_decode.c @@ -26,7 +26,7 @@ static void decode_invalid(CPUX86State *env, struct x86_decode *decode) { - printf("%llx: failed to decode instruction ", env->eip); + printf(TARGET_FMT_lx ": failed to decode instruction ", env->eip); for (int i = 0; i < decode->opcode_len; i++) { printf("%x ", decode->opcode[i]); } @@ -109,8 +109,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -119,8 +119,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, op->type = X86_VAR_REG; op->reg = R_EAX; /* Since reg is always AX, REX prefix has no impact. */ - op->ptr = get_reg_ref(env, op->reg, false, 0, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, false, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -262,16 +262,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -287,16 +287,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -377,16 +377,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -394,15 +394,15 @@ static void fetch_moffs(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op) { op->type = X86_VAR_OFFSET; - op->ptr = decode_bytes(env, decode, decode->addressing_size); + op->addr = decode_bytes(env, decode, decode->addressing_size); } static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -411,8 +411,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -631,8 +631,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -1408,7 +1408,7 @@ struct decode_tbl _2op_inst[] = { }; struct decode_x87_tbl invl_inst_x87 = {0x0, 0, 0, 0, 0, false, false, NULL, - NULL, decode_invalid, 0}; + NULL, decode_invalid}; struct decode_x87_tbl _x87_inst[] = { {0xd8, 0, 3, X86_DECODE_CMD_FADD, 10, false, false, @@ -1456,8 +1456,7 @@ struct decode_x87_tbl _x87_inst[] = { decode_x87_modrm_st0, NULL, decode_d9_4}, {0xd9, 4, 0, X86_DECODE_CMD_INVL, 4, false, false, decode_x87_modrm_bytep, NULL, NULL}, - {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL}, {0xd9, 5, 0, X86_DECODE_CMD_FLDCW, 2, false, false, decode_x87_modrm_bytep, NULL, NULL}, @@ -1478,20 +1477,17 @@ struct decode_x87_tbl _x87_inst[] = { decode_x87_modrm_st0, NULL}, {0xda, 3, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, - {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 4, 0, X86_DECODE_CMD_FSUB, 4, false, false, decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xda, 5, 3, X86_DECODE_CMD_FUCOM, 10, false, true, decode_x87_modrm_st0, decode_decode_x87_modrm_st0, NULL}, {0xda, 5, 0, X86_DECODE_CMD_FSUB, 4, true, false, decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, - {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 6, 0, X86_DECODE_CMD_FDIV, 4, false, false, decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, - {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 7, 0, X86_DECODE_CMD_FDIV, 4, true, false, decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, @@ -1511,8 +1507,7 @@ struct decode_x87_tbl _x87_inst[] = { decode_x87_modrm_intp, NULL, NULL}, {0xdb, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, decode_db_4}, - {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xdb, 5, 3, X86_DECODE_CMD_FUCOMI, 10, false, false, decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdb, 5, 0, X86_DECODE_CMD_FLD, 10, false, false, @@ -1661,16 +1656,16 @@ void calc_modrm_operand16(CPUX86State *env, struct x86_decode *decode, } calc_addr: if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = (uint16_t)ptr; + op->addr = (uint16_t)ptr; } else { - op->ptr = decode_linear_addr(env, decode, (uint16_t)ptr, seg); + op->addr = decode_linear_addr(env, decode, (uint16_t)ptr, seg); } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, +void *get_reg_ref(CPUX86State *env, int reg, int rex_present, int is_extended, int size) { - target_ulong ptr = 0; + void *ptr = NULL; if (is_extended) { reg |= R_R8; @@ -1679,13 +1674,13 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, switch (size) { case 1: if (is_extended || reg < 4 || rex_present) { - ptr = (target_ulong)&RL(env, reg); + ptr = &RL(env, reg); } else { - ptr = (target_ulong)&RH(env, reg - 4); + ptr = &RH(env, reg - 4); } break; default: - ptr = (target_ulong)&RRX(env, reg); + ptr = &RRX(env, reg); break; } return ptr; @@ -1696,7 +1691,7 @@ target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, { target_ulong val = 0; memcpy(&val, - (void *)get_reg_ref(env, reg, rex_present, is_extended, size), + get_reg_ref(env, reg, rex_present, is_extended, size), size); return val; } @@ -1763,9 +1758,9 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, } if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = (uint32_t)ptr; + op->addr = (uint32_t)ptr; } else { - op->ptr = decode_linear_addr(env, decode, (uint32_t)ptr, seg); + op->addr = decode_linear_addr(env, decode, (uint32_t)ptr, seg); } } @@ -1793,9 +1788,9 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = ptr; + op->addr = ptr; } else { - op->ptr = decode_linear_addr(env, decode, ptr, seg); + op->addr = decode_linear_addr(env, decode, ptr, seg); } } @@ -1806,8 +1801,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, - decode->rex.b, decode->operand_size); + op->regptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } diff --git a/target/i386/emulate/x86_decode.h b/target/i386/emulate/x86_decode.h index 87cc728..927645a 100644 --- a/target/i386/emulate/x86_decode.h +++ b/target/i386/emulate/x86_decode.h @@ -266,7 +266,10 @@ typedef struct x86_decode_op { int reg; target_ulong val; - target_ulong ptr; + union { + target_ulong addr; + void *regptr; + }; } x86_decode_op; typedef struct x86_decode { @@ -301,8 +304,8 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, - int is_extended, int size); +void *get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size); target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, int is_extended, int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/emulate/x86_emu.c b/target/i386/emulate/x86_emu.c index 26a4876..db7a7f7 100644 --- a/target/i386/emulate/x86_emu.c +++ b/target/i386/emulate/x86_emu.c @@ -31,8 +31,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// #include "qemu/osdep.h" @@ -52,7 +52,7 @@ uint8_t v2 = (uint8_t)decode->op[1].val; \ uint8_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 1); \ + write_val_ext(env, &decode->op[0], diff, 1); \ } \ FLAGS_FUNC##8(env, v1, v2, diff); \ break; \ @@ -63,7 +63,7 @@ uint16_t v2 = (uint16_t)decode->op[1].val; \ uint16_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 2); \ + write_val_ext(env, &decode->op[0], diff, 2); \ } \ FLAGS_FUNC##16(env, v1, v2, diff); \ break; \ @@ -74,7 +74,7 @@ uint32_t v2 = (uint32_t)decode->op[1].val; \ uint32_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 4); \ + write_val_ext(env, &decode->op[0], diff, 4); \ } \ FLAGS_FUNC##32(env, v1, v2, diff); \ break; \ @@ -121,7 +121,7 @@ void write_reg(CPUX86State *env, int reg, target_ulong val, int size) } } -target_ulong read_val_from_reg(target_ulong reg_ptr, int size) +target_ulong read_val_from_reg(void *reg_ptr, int size) { target_ulong val; @@ -144,7 +144,7 @@ target_ulong read_val_from_reg(target_ulong reg_ptr, int size) return val; } -void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) +void write_val_to_reg(void *reg_ptr, target_ulong val, int size) { switch (size) { case 1: @@ -164,18 +164,18 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) } } -static bool is_host_reg(CPUX86State *env, target_ulong ptr) +static void write_val_to_mem(CPUX86State *env, target_ulong ptr, target_ulong val, int size) { - return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs); + emul_ops->write_mem(env_cpu(env), &val, ptr, size); } -void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size) +void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size) { - if (is_host_reg(env, ptr)) { - write_val_to_reg(ptr, val, size); - return; + if (decode->type == X86_VAR_REG) { + write_val_to_reg(decode->regptr, val, size); + } else { + write_val_to_mem(env, decode->addr, val, size); } - emul_ops->write_mem(env_cpu(env), &val, ptr, size); } uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes) @@ -185,15 +185,11 @@ uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes) } -target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) +static target_ulong read_val_from_mem(CPUX86State *env, target_long ptr, int size) { target_ulong val; uint8_t *mmio_ptr; - if (is_host_reg(env, ptr)) { - return read_val_from_reg(ptr, size); - } - mmio_ptr = read_mmio(env, ptr, size); switch (size) { case 1: @@ -215,6 +211,15 @@ target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) return val; } +target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size) +{ + if (decode->type == X86_VAR_REG) { + return read_val_from_reg(decode->regptr, size); + } else { + return read_val_from_mem(env, decode->addr, size); + } +} + static void fetch_operands(CPUX86State *env, struct x86_decode *decode, int n, bool val_op0, bool val_op1, bool val_op2) { @@ -226,25 +231,25 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode, case X86_VAR_IMMEDIATE: break; case X86_VAR_REG: - VM_PANIC_ON(!decode->op[i].ptr); + VM_PANIC_ON(!decode->op[i].regptr); if (calc_val[i]) { - decode->op[i].val = read_val_from_reg(decode->op[i].ptr, + decode->op[i].val = read_val_from_reg(decode->op[i].regptr, decode->operand_size); } break; case X86_VAR_RM: calc_modrm_operand(env, decode, &decode->op[i]); if (calc_val[i]) { - decode->op[i].val = read_val_ext(env, decode->op[i].ptr, + decode->op[i].val = read_val_ext(env, &decode->op[i], decode->operand_size); } break; case X86_VAR_OFFSET: - decode->op[i].ptr = decode_linear_addr(env, decode, - decode->op[i].ptr, - R_DS); + decode->op[i].addr = decode_linear_addr(env, decode, + decode->op[i].addr, + R_DS); if (calc_val[i]) { - decode->op[i].val = read_val_ext(env, decode->op[i].ptr, + decode->op[i].val = read_val_ext(env, &decode->op[i], decode->operand_size); } break; @@ -257,7 +262,7 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode, static void exec_mov(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, false, true, false); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, + write_val_ext(env, &decode->op[0], decode->op[1].val, decode->operand_size); env->eip += decode->len; @@ -312,7 +317,7 @@ static void exec_neg(CPUX86State *env, struct x86_decode *decode) fetch_operands(env, decode, 2, true, true, false); val = 0 - sign(decode->op[1].val, decode->operand_size); - write_val_ext(env, decode->op[1].ptr, val, decode->operand_size); + write_val_ext(env, &decode->op[1], val, decode->operand_size); if (4 == decode->operand_size) { SET_FLAGS_OSZAPC_SUB32(env, 0, 0 - val, val); @@ -363,7 +368,7 @@ static void exec_not(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 1, true, false, false); - write_val_ext(env, decode->op[0].ptr, ~decode->op[0].val, + write_val_ext(env, &decode->op[0], ~decode->op[0].val, decode->operand_size); env->eip += decode->len; } @@ -382,8 +387,8 @@ void exec_movzx(CPUX86State *env, struct x86_decode *decode) } decode->operand_size = src_op_size; calc_modrm_operand(env, decode, &decode->op[1]); - decode->op[1].val = read_val_ext(env, decode->op[1].ptr, src_op_size); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); + decode->op[1].val = read_val_ext(env, &decode->op[1], src_op_size); + write_val_ext(env, &decode->op[0], decode->op[1].val, op_size); env->eip += decode->len; } @@ -469,10 +474,10 @@ static inline void string_rep(CPUX86State *env, struct x86_decode *decode, while (rcx--) { func(env, decode); write_reg(env, R_ECX, rcx, decode->addressing_size); - if ((PREFIX_REP == rep) && !get_ZF(env)) { + if ((PREFIX_REP == rep) && !env->cc_dst) { break; } - if ((PREFIX_REPN == rep) && get_ZF(env)) { + if ((PREFIX_REPN == rep) && env->cc_dst) { break; } } @@ -535,8 +540,8 @@ static void exec_movs_single(CPUX86State *env, struct x86_decode *decode) dst_addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); - val = read_val_ext(env, src_addr, decode->operand_size); - write_val_ext(env, dst_addr, val, decode->operand_size); + val = read_val_from_mem(env, src_addr, decode->operand_size); + write_val_to_mem(env, dst_addr, val, decode->operand_size); string_increment_reg(env, R_ESI, decode); string_increment_reg(env, R_EDI, decode); @@ -563,9 +568,9 @@ static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode) decode->addressing_size, R_ES); decode->op[0].type = X86_VAR_IMMEDIATE; - decode->op[0].val = read_val_ext(env, src_addr, decode->operand_size); + decode->op[0].val = read_val_from_mem(env, src_addr, decode->operand_size); decode->op[1].type = X86_VAR_IMMEDIATE; - decode->op[1].val = read_val_ext(env, dst_addr, decode->operand_size); + decode->op[1].val = read_val_from_mem(env, dst_addr, decode->operand_size); EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); @@ -697,15 +702,15 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) if (decode->op[0].type != X86_VAR_REG) { if (4 == decode->operand_size) { displacement = ((int32_t) (decode->op[1].val & 0xffffffe0)) / 32; - decode->op[0].ptr += 4 * displacement; + decode->op[0].addr += 4 * displacement; } else if (2 == decode->operand_size) { displacement = ((int16_t) (decode->op[1].val & 0xfff0)) / 16; - decode->op[0].ptr += 2 * displacement; + decode->op[0].addr += 2 * displacement; } else { VM_PANIC("bt 64bit\n"); } } - decode->op[0].val = read_val_ext(env, decode->op[0].ptr, + decode->op[0].val = read_val_ext(env, &decode->op[0], decode->operand_size); cf = (decode->op[0].val >> index) & 0x01; @@ -723,7 +728,7 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) decode->op[0].val &= ~(1u << index); break; } - write_val_ext(env, decode->op[0].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[0], decode->op[0].val, decode->operand_size); set_CF(env, cf); } @@ -775,7 +780,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) of = cf ^ (res >> 7); } - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); SET_FLAGS_OSZAPC_LOGIC8(env, 0, 0, res); SET_FLAGS_OxxxxC(env, of, cf); break; @@ -791,7 +796,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) of = cf ^ (res >> 15); /* of = cf ^ result15 */ } - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); SET_FLAGS_OSZAPC_LOGIC16(env, 0, 0, res); SET_FLAGS_OxxxxC(env, of, cf); break; @@ -800,7 +805,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) { uint32_t res = decode->op[0].val << count; - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); SET_FLAGS_OSZAPC_LOGIC32(env, 0, 0, res); cf = (decode->op[0].val >> (32 - count)) & 0x1; of = cf ^ (res >> 31); /* of = cf ^ result31 */ @@ -831,10 +836,10 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode) decode->operand_size = src_op_size; calc_modrm_operand(env, decode, &decode->op[1]); - decode->op[1].val = sign(read_val_ext(env, decode->op[1].ptr, src_op_size), + decode->op[1].val = sign(read_val_ext(env, &decode->op[1], src_op_size), src_op_size); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); + write_val_ext(env, &decode->op[0], decode->op[1].val, op_size); env->eip += decode->len; } @@ -862,7 +867,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) count &= 0x7; /* use only bottom 3 bits */ res = ((uint8_t)decode->op[0].val >> count) | ((uint8_t)decode->op[0].val << (8 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); bit6 = (res >> 6) & 1; bit7 = (res >> 7) & 1; /* set eflags: ROR count affects the following flags: C, O */ @@ -886,7 +891,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) count &= 0x0f; /* use only 4 LSB's */ res = ((uint16_t)decode->op[0].val >> count) | ((uint16_t)decode->op[0].val << (16 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); bit14 = (res >> 14) & 1; bit15 = (res >> 15) & 1; @@ -904,7 +909,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) if (count) { res = ((uint32_t)decode->op[0].val >> count) | ((uint32_t)decode->op[0].val << (32 - count)); - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); bit31 = (res >> 31) & 1; bit30 = (res >> 30) & 1; @@ -941,7 +946,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint8_t)decode->op[0].val << count) | ((uint8_t)decode->op[0].val >> (8 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); /* set eflags: * ROL count affects the following flags: C, O */ @@ -968,7 +973,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint16_t)decode->op[0].val << count) | ((uint16_t)decode->op[0].val >> (16 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); bit0 = (res & 0x1); bit15 = (res >> 15); /* of = cf ^ result15 */ @@ -986,7 +991,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint32_t)decode->op[0].val << count) | ((uint32_t)decode->op[0].val >> (32 - count)); - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); bit0 = (res & 0x1); bit31 = (res >> 31); /* of = cf ^ result31 */ @@ -1024,7 +1029,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_8 >> (9 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); cf = (op1_8 >> (8 - count)) & 0x01; of = cf ^ (res >> 7); /* of = cf ^ result7 */ @@ -1050,7 +1055,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_16 >> (17 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); cf = (op1_16 >> (16 - count)) & 0x1; of = cf ^ (res >> 15); /* of = cf ^ result15 */ @@ -1073,7 +1078,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_32 >> (33 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); cf = (op1_32 >> (32 - count)) & 0x1; of = cf ^ (res >> 31); /* of = cf ^ result31 */ @@ -1105,7 +1110,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) res = (op1_8 >> count) | (get_CF(env) << (8 - count)) | (op1_8 << (9 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); cf = (op1_8 >> (count - 1)) & 0x1; of = (((res << 1) ^ res) >> 7) & 0x1; /* of = result6 ^ result7 */ @@ -1124,7 +1129,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) res = (op1_16 >> count) | (get_CF(env) << (16 - count)) | (op1_16 << (17 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); cf = (op1_16 >> (count - 1)) & 0x1; of = ((uint16_t)((res << 1) ^ res) >> 15) & 0x1; /* of = result15 ^ @@ -1148,7 +1153,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) (op1_32 << (33 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); cf = (op1_32 >> (count - 1)) & 0x1; of = ((res << 1) ^ res) >> 31; /* of = result30 ^ result31 */ @@ -1163,9 +1168,9 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, true, true, false); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, + write_val_ext(env, &decode->op[0], decode->op[1].val, decode->operand_size); - write_val_ext(env, decode->op[1].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[1], decode->op[0].val, decode->operand_size); env->eip += decode->len; @@ -1174,7 +1179,7 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode) static void exec_xadd(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); - write_val_ext(env, decode->op[1].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[1], decode->op[0].val, decode->operand_size); env->eip += decode->len; @@ -1241,7 +1246,7 @@ static void init_cmd_handler(void) bool exec_instruction(CPUX86State *env, struct x86_decode *ins) { if (!_cmd_handler[ins->cmd].handler) { - printf("Unimplemented handler (%llx) for %d (%x %x) \n", env->eip, + printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n", env->eip, ins->cmd, ins->opcode[0], ins->opcode_len > 1 ? ins->opcode[1] : 0); env->eip += ins->len; diff --git a/target/i386/emulate/x86_emu.h b/target/i386/emulate/x86_emu.h index 555b567..a1a9612 100644 --- a/target/i386/emulate/x86_emu.h +++ b/target/i386/emulate/x86_emu.h @@ -42,11 +42,11 @@ void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_c target_ulong read_reg(CPUX86State *env, int reg, int size); void write_reg(CPUX86State *env, int reg, target_ulong val, int size); -target_ulong read_val_from_reg(target_ulong reg_ptr, int size); -void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size); -void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size); +target_ulong read_val_from_reg(void *reg_ptr, int size); +void write_val_to_reg(void *reg_ptr, target_ulong val, int size); +void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size); uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes); -target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size); +target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size); void exec_movzx(CPUX86State *env, struct x86_decode *decode); void exec_shl(CPUX86State *env, struct x86_decode *decode); diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c index 84e2736..6592193 100644 --- a/target/i386/emulate/x86_flags.c +++ b/target/i386/emulate/x86_flags.c @@ -14,8 +14,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// /* * flags functions @@ -29,41 +29,50 @@ #include "x86.h" -/* this is basically bocsh code */ +/* + * The algorithms here are similar to those in Bochs. After an ALU + * operation, CC_DST can be used to compute ZF, SF and PF, whereas + * CC_SRC is used to compute AF, CF and OF. In reality, SF and PF are the + * XOR of the value computed from CC_DST and the value found in bits 7 and 2 + * of CC_SRC; this way the same logic can be used to compute the flags + * both before and after an ALU operation. + * + * Compared to the TCG CC_OP codes, this avoids conditionals when converting + * to and from the RFLAGS representation. + */ -#define LF_SIGN_BIT 31 +#define LF_SIGN_BIT (TARGET_LONG_BITS - 1) -#define LF_BIT_SD (0) /* lazy Sign Flag Delta */ -#define LF_BIT_AF (3) /* lazy Adjust flag */ -#define LF_BIT_PDB (8) /* lazy Parity Delta Byte (8 bits) */ -#define LF_BIT_CF (31) /* lazy Carry Flag */ -#define LF_BIT_PO (30) /* lazy Partial Overflow = CF ^ OF */ +#define LF_BIT_PD (2) /* lazy Parity Delta, same bit as PF */ +#define LF_BIT_AF (3) /* lazy Adjust flag */ +#define LF_BIT_SD (7) /* lazy Sign Flag Delta, same bit as SF */ +#define LF_BIT_CF (TARGET_LONG_BITS - 1) /* lazy Carry Flag */ +#define LF_BIT_PO (TARGET_LONG_BITS - 2) /* lazy Partial Overflow = CF ^ OF */ -#define LF_MASK_SD (0x01 << LF_BIT_SD) -#define LF_MASK_AF (0x01 << LF_BIT_AF) -#define LF_MASK_PDB (0xFF << LF_BIT_PDB) -#define LF_MASK_CF (0x01 << LF_BIT_CF) -#define LF_MASK_PO (0x01 << LF_BIT_PO) +#define LF_MASK_PD ((target_ulong)0x01 << LF_BIT_PD) +#define LF_MASK_AF ((target_ulong)0x01 << LF_BIT_AF) +#define LF_MASK_SD ((target_ulong)0x01 << LF_BIT_SD) +#define LF_MASK_CF ((target_ulong)0x01 << LF_BIT_CF) +#define LF_MASK_PO ((target_ulong)0x01 << LF_BIT_PO) /* ******************* */ /* OSZAPC */ /* ******************* */ -/* size, carries, result */ +/* use carries to fill in AF, PO and CF, while ensuring PD and SD are clear. + * for full-word operations just clear PD and SD; for smaller operand + * sizes only keep AF in the low byte and shift the carries left to + * place PO and CF in the top two bits. + */ #define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \ - target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \ - (((lf_carries) >> (size - 2)) << LF_BIT_PO); \ - env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \ - if ((size) == 32) { \ - temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \ - } else if ((size) == 16) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \ - } else if ((size) == 8) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \ + env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \ + target_ulong temp = (lf_carries); \ + if ((size) == TARGET_LONG_BITS) { \ + temp = temp & ~(LF_MASK_PD | LF_MASK_SD); \ } else { \ - VM_PANIC("unimplemented"); \ + temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \ } \ - env->lflags.auxbits = (target_ulong)(uint32_t)temp; \ + env->cc_src = temp; \ } /* carries, result */ @@ -77,23 +86,18 @@ /* ******************* */ /* OSZAP */ /* ******************* */ -/* size, carries, result */ +/* same as setting OSZAPC, but preserve CF and flip PO if the old value of CF + * did not match the high bit of lf_carries. */ #define SET_FLAGS_OSZAP_SIZE(size, lf_carries, lf_result) { \ - target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \ - (((lf_carries) >> (size - 2)) << LF_BIT_PO); \ - if ((size) == 32) { \ - temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \ - } else if ((size) == 16) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \ - } else if ((size) == 8) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \ + env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \ + target_ulong temp = (lf_carries); \ + if ((size) == TARGET_LONG_BITS) { \ + temp = (temp & ~(LF_MASK_PD | LF_MASK_SD)); \ } else { \ - VM_PANIC("unimplemented"); \ + temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \ } \ - env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \ - target_ulong delta_c = (env->lflags.auxbits ^ temp) & LF_MASK_CF; \ - delta_c ^= (delta_c >> 1); \ - env->lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \ + target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0; \ + env->cc_src = temp ^ (cf_changed * (LF_MASK_PO | LF_MASK_CF)); \ } /* carries, result */ @@ -104,11 +108,11 @@ #define SET_FLAGS_OSZAP_32(carries, result) \ SET_FLAGS_OSZAP_SIZE(32, carries, result) -void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf) +void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf) { - uint32_t temp_po = new_of ^ new_cf; - env->lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF); - env->lflags.auxbits |= (temp_po << LF_BIT_PO) | (new_cf << LF_BIT_CF); + env->cc_src &= ~(LF_MASK_PO | LF_MASK_CF); + env->cc_src |= (-(target_ulong)new_cf << LF_BIT_PO); + env->cc_src ^= ((target_ulong)new_of << LF_BIT_PO); } void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, @@ -202,104 +206,68 @@ void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, SET_FLAGS_OSZAPC_8(0, diff); } -bool get_PF(CPUX86State *env) -{ - uint32_t temp = (255 & env->lflags.result); - temp = temp ^ (255 & (env->lflags.auxbits >> LF_BIT_PDB)); - temp = (temp ^ (temp >> 4)) & 0x0F; - return (0x9669U >> temp) & 1; -} - -void set_PF(CPUX86State *env, bool val) +static inline uint32_t get_PF(CPUX86State *env) { - uint32_t temp = (255 & env->lflags.result) ^ (!val); - env->lflags.auxbits &= ~(LF_MASK_PDB); - env->lflags.auxbits |= (temp << LF_BIT_PDB); + return ((parity8(env->cc_dst) - 1) ^ env->cc_src) & CC_P; } -bool get_OF(CPUX86State *env) +static inline uint32_t get_OF(CPUX86State *env) { - return ((env->lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1; + return ((env->cc_src >> (LF_BIT_CF - 11)) + CC_O / 2) & CC_O; } bool get_CF(CPUX86State *env) { - return (env->lflags.auxbits >> LF_BIT_CF) & 1; -} - -void set_OF(CPUX86State *env, bool val) -{ - bool old_cf = get_CF(env); - SET_FLAGS_OxxxxC(env, val, old_cf); + return ((target_long)env->cc_src) < 0; } void set_CF(CPUX86State *env, bool val) { - bool old_of = get_OF(env); - SET_FLAGS_OxxxxC(env, old_of, val); + /* If CF changes, flip PO and CF */ + target_ulong temp = -(target_ulong)val; + target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0; + env->cc_src ^= cf_changed * (LF_MASK_PO | LF_MASK_CF); } -bool get_AF(CPUX86State *env) +static inline uint32_t get_ZF(CPUX86State *env) { - return (env->lflags.auxbits >> LF_BIT_AF) & 1; + return env->cc_dst ? 0 : CC_Z; } -void set_AF(CPUX86State *env, bool val) +static inline uint32_t get_SF(CPUX86State *env) { - env->lflags.auxbits &= ~(LF_MASK_AF); - env->lflags.auxbits |= val << LF_BIT_AF; + return ((env->cc_dst >> (LF_SIGN_BIT - LF_BIT_SD)) ^ + env->cc_src) & CC_S; } -bool get_ZF(CPUX86State *env) +void lflags_to_rflags(CPUX86State *env) { - return !env->lflags.result; + env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O); + /* rotate left by one to move carry-out bits into CF and AF */ + env->eflags |= ( + (env->cc_src << 1) | + (env->cc_src >> (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + env->eflags |= get_SF(env); + env->eflags |= get_PF(env); + env->eflags |= get_ZF(env); + env->eflags |= get_OF(env); } -void set_ZF(CPUX86State *env, bool val) +void rflags_to_lflags(CPUX86State *env) { - if (val) { - env->lflags.auxbits ^= - (((env->lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD); - /* merge the parity bits into the Parity Delta Byte */ - uint32_t temp_pdb = (255 & env->lflags.result); - env->lflags.auxbits ^= (temp_pdb << LF_BIT_PDB); - /* now zero the .result value */ - env->lflags.result = 0; - } else { - env->lflags.result |= (1 << 8); - } -} + target_ulong cf_af, cf_xor_of; -bool get_SF(CPUX86State *env) -{ - return ((env->lflags.result >> LF_SIGN_BIT) ^ - (env->lflags.auxbits >> LF_BIT_SD)) & 1; -} + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; -void set_SF(CPUX86State *env, bool val) -{ - bool temp_sf = get_SF(env); - env->lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD; -} + /* ... and therefore cc_src always uses opposite polarity. */ + env->cc_src = CC_P; + env->cc_src ^= env->eflags & (CC_S | CC_P); -void lflags_to_rflags(CPUX86State *env) -{ - env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O); - env->eflags |= get_CF(env) ? CC_C : 0; - env->eflags |= get_PF(env) ? CC_P : 0; - env->eflags |= get_AF(env) ? CC_A : 0; - env->eflags |= get_ZF(env) ? CC_Z : 0; - env->eflags |= get_SF(env) ? CC_S : 0; - env->eflags |= get_OF(env) ? CC_O : 0; -} + /* rotate right by one to move CF and AF into the carry-out positions */ + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); -void rflags_to_lflags(CPUX86State *env) -{ - env->lflags.auxbits = env->lflags.result = 0; - set_OF(env, env->eflags & CC_O); - set_SF(env, env->eflags & CC_S); - set_ZF(env, env->eflags & CC_Z); - set_AF(env, env->eflags & CC_A); - set_PF(env, env->eflags & CC_P); - set_CF(env, env->eflags & CC_C); + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; + env->cc_src |= -cf_xor_of & LF_MASK_PO; } diff --git a/target/i386/emulate/x86_flags.h b/target/i386/emulate/x86_flags.h index 6c17500..a395c83 100644 --- a/target/i386/emulate/x86_flags.h +++ b/target/i386/emulate/x86_flags.h @@ -14,8 +14,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// /* * x86 eflags functions @@ -28,20 +28,10 @@ void lflags_to_rflags(CPUX86State *env); void rflags_to_lflags(CPUX86State *env); -bool get_PF(CPUX86State *env); -void set_PF(CPUX86State *env, bool val); bool get_CF(CPUX86State *env); void set_CF(CPUX86State *env, bool val); -bool get_AF(CPUX86State *env); -void set_AF(CPUX86State *env, bool val); -bool get_ZF(CPUX86State *env); -void set_ZF(CPUX86State *env, bool val); -bool get_SF(CPUX86State *env); -void set_SF(CPUX86State *env, bool val); -bool get_OF(CPUX86State *env); -void set_OF(CPUX86State *env, bool val); -void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf); +void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf); void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, uint32_t diff); diff --git a/target/i386/helper.c b/target/i386/helper.c index c07b1b1..e0aaed3 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -22,9 +22,11 @@ #include "cpu.h" #include "exec/cputlb.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "system/runstate.h" #ifndef CONFIG_USER_ONLY #include "system/hw_accel.h" +#include "system/memory.h" #include "monitor/monitor.h" #include "kvm/kvm_i386.h" #endif @@ -524,7 +526,7 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, static inline target_ulong get_memio_eip(CPUX86State *env) { #ifdef CONFIG_TCG - uint64_t data[TARGET_INSN_START_WORDS]; + uint64_t data[INSN_START_WORDS]; CPUState *cs = env_cpu(env); if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) { diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 072731a..d5e2bb5 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; @@ -80,7 +80,6 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp) return true; } -#define CPUID_MODEL_ID_SZ 48 /** * cpu_x86_fill_model_id: * Get CPUID model ID string from host CPU. @@ -118,13 +117,13 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping) host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); if (family) { - *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + *family = x86_cpu_family(eax); } if (model) { - *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + *model = x86_cpu_model(eax); } if (stepping) { - *stepping = eax & 0x0F; + *stepping = x86_cpu_stepping(eax); } } @@ -132,27 +131,27 @@ void host_cpu_instance_init(X86CPU *cpu) { X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); - if (xcc->model) { - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); - } -} - -void host_cpu_max_instance_init(X86CPU *cpu) -{ char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; int family, model, stepping; - /* Use max host physical address bits if -cpu max option is applied */ - object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); - + /* + * setting vendor applies to both max/host and builtin_x86_defs CPU. + * FIXME: this probably should warn or should be skipped if vendors do + * not match, because family numbers are incompatible between Intel and AMD. + */ host_cpu_vendor_fms(vendor, &family, &model, &stepping); + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + + if (!xcc->max_features) { + return; + } + host_cpu_fill_model_id(model_id); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + /* Use max host physical address bits if -cpu max option is applied */ + object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); + object_property_set_int(OBJECT(cpu), "family", family, &error_abort); object_property_set_int(OBJECT(cpu), "model", model, &error_abort); object_property_set_int(OBJECT(cpu), "stepping", stepping, @@ -161,7 +160,16 @@ void host_cpu_max_instance_init(X86CPU *cpu) &error_abort); } -static void host_cpu_class_init(ObjectClass *oc, void *data) +bool is_host_cpu_intel(void) +{ + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpu_vendor_fms(vendor, NULL, NULL, NULL); + + return g_str_equal(vendor, CPUID_VENDOR_INTEL); +} + +static void host_cpu_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc91..10df4b3 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,10 +10,12 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping); +bool is_host_cpu_intel(void); #endif /* HOST_CPU_H */ diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c index b5f4c80..94ee096 100644 --- a/target/i386/hvf/hvf-cpu.c +++ b/target/i386/hvf/hvf-cpu.c @@ -21,8 +21,6 @@ static void hvf_cpu_max_instance_init(X86CPU *cpu) { CPUX86State *env = &cpu->env; - host_cpu_max_instance_init(cpu); - env->cpuid_min_level = hvf_get_supported_cpuid(0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -61,20 +59,21 @@ static void hvf_cpu_xsave_init(void) static void hvf_cpu_instance_init(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); host_cpu_instance_init(cpu); /* Special cases not set in the X86CPUDefinition structs: */ /* TODO: in-kernel irqchip for hvf */ - if (cpu->max_features) { + if (xcc->max_features) { hvf_cpu_max_instance_init(cpu); } hvf_cpu_xsave_init(); } -static void hvf_cpu_accel_class_init(ObjectClass *oc, void *data) +static void hvf_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 23ebf25..818b504 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -76,6 +76,7 @@ #include "qemu/main-loop.h" #include "qemu/accel.h" #include "target/i386/cpu.h" +#include "exec/target_page.h" static Error *invtsc_mig_blocker; @@ -732,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (hvf_inject_interrupts(cpu)) { diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 3c56afc..26d6029 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -33,7 +33,8 @@ #include "system/hvf.h" #include "system/hvf_int.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" static inline uint64_t rreg(hv_vcpuid_t vcpu, hv_x86_reg_t reg) { diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index fa131b1..0798a0c 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -73,7 +73,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS; + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_HT; ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE | diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 2057314..17fce1d 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->accel->dirty) { + if (!cs->vcpu_dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index 70b89ca..9865120 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" +#include "exec/target_page.h" #include "hyperv.h" #include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 6269fa8..89a7953 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -41,6 +41,7 @@ static void kvm_set_guest_phys_bits(CPUState *cs) static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; bool ret; @@ -63,7 +64,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * check/update ucode_rev, phys_bits, guest_phys_bits, mwait * cpu_common_realizefn() (via xcc->parent_realize) */ - if (cpu->max_features) { + if (xcc->max_features) { if (enable_cpu_pm) { if (kvm_has_waitpkg()) { env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG; @@ -72,7 +73,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, &cpu->mwait.ecx, &cpu->mwait.edx); - } + } } if (cpu->ucode_rev == 0) { cpu->ucode_rev = @@ -108,7 +109,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) CPUX86State *env = &cpu->env; KVMState *s = kvm_state; - host_cpu_max_instance_init(cpu); + object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort); @@ -216,14 +217,14 @@ static void kvm_cpu_instance_init(CPUState *cs) x86_cpu_apply_props(cpu, kvm_default_props); } - if (cpu->max_features) { + if (xcc->max_features) { kvm_cpu_max_instance_init(cpu); } kvm_cpu_xsave_init(); } -static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +static void kvm_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6c749d4..e8c8be0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -67,6 +68,7 @@ #include "hw/pci/msix.h" #include "migration/blocker.h" #include "exec/memattrs.h" +#include "exec/target_page.h" #include "trace.h" #include CONFIG_DEVICES @@ -191,6 +193,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) @@ -325,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } @@ -391,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { @@ -413,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -571,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } @@ -867,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } @@ -1778,8 +1790,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { @@ -1822,9 +1832,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -1863,7 +1872,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } @@ -2051,6 +2060,15 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -2075,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; @@ -2233,7 +2259,7 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; - if (((env->cpuid_version >> 8)&0xF) >= 6 + if (x86_cpu_family(env->cpuid_version) >= 6 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { uint64_t mcg_cap, unsupported_caps; @@ -3205,16 +3231,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); @@ -3855,32 +3872,34 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } + + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } @@ -5999,9 +6018,11 @@ static bool host_supports_vmx(void) * because private/shared page tracking is already provided through other * means, these 2 use-cases should be treated as being mutually-exclusive. */ -static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run) { + struct kvm_pre_fault_memory mem; uint64_t gpa, size, attributes; + int ret; if (!machine_require_guest_memfd(current_machine)) return -EINVAL; @@ -6012,13 +6033,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); - return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + if (ret || !kvm_pre_fault_memory_supported) { + return ret; + } + + /* + * Opportunistically pre-fault memory in. Failures are ignored so that any + * errors in faulting in the memory will get captured in KVM page fault + * path when the guest first accesses the page. + */ + memset(&mem, 0, sizeof(mem)); + mem.gpa = gpa; + mem.size = size; + while (mem.size) { + if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) { + break; + } + } + + return 0; } -static int kvm_handle_hypercall(struct kvm_run *run) +static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run) { if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) - return kvm_handle_hc_map_gpa_range(run); + return kvm_handle_hc_map_gpa_range(cpu, run); return -EINVAL; } @@ -6118,7 +6158,35 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_hypercall(run); + ret = kvm_handle_hypercall(cpu, run); + break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; + case KVM_EXIT_TDX: + /* + * run->tdx is already set up for the case where userspace + * does not handle the TDVMCALL. + */ + switch (run->tdx.nr) { + case TDVMCALL_GET_QUOTE: + tdx_handle_get_quote(cpu, run); + break; + case TDVMCALL_GET_TD_VM_CALL_INFO: + tdx_handle_get_tdvmcall_info(cpu, run); + break; + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + tdx_handle_setup_event_notify_interrupt(cpu, run); + break; + } + ret = 0; break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8..5f83e88 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) @@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include <linux/kvm.h> + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); @@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996caf..2675bf8 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c new file mode 100644 index 0000000..dee8334 --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.c @@ -0,0 +1,302 @@ +/* + * QEMU TDX Quote Generation Support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" + +#include "tdx-quote-generator.h" + +#define QGS_MSG_LIB_MAJOR_VER 1 +#define QGS_MSG_LIB_MINOR_VER 1 + +typedef enum _qgs_msg_type_t { + GET_QUOTE_REQ = 0, + GET_QUOTE_RESP = 1, + GET_COLLATERAL_REQ = 2, + GET_COLLATERAL_RESP = 3, + GET_PLATFORM_INFO_REQ = 4, + GET_PLATFORM_INFO_RESP = 5, + QGS_MSG_TYPE_MAX +} qgs_msg_type_t; + +typedef struct _qgs_msg_header_t { + uint16_t major_version; + uint16_t minor_version; + uint32_t type; + uint32_t size; // size of the whole message, include this header, in byte + uint32_t error_code; // used in response only +} qgs_msg_header_t; + +typedef struct _qgs_msg_get_quote_req_t { + qgs_msg_header_t header; // header.type = GET_QUOTE_REQ + uint32_t report_size; // cannot be 0 + uint32_t id_list_size; // length of id_list, in byte, can be 0 +} qgs_msg_get_quote_req_t; + +typedef struct _qgs_msg_get_quote_resp_s { + qgs_msg_header_t header; // header.type = GET_QUOTE_RESP + uint32_t selected_id_size; // can be 0 in case only one id is sent in request + uint32_t quote_size; // length of quote_data, in byte + uint8_t id_quote[]; // selected id followed by quote +} qgs_msg_get_quote_resp_t; + +#define HEADER_SIZE 4 + +static uint32_t decode_header(const char *buf, size_t len) { + if (len < HEADER_SIZE) { + return 0; + } + uint32_t msg_size = 0; + for (uint32_t i = 0; i < HEADER_SIZE; ++i) { + msg_size = msg_size * 256 + (buf[i] & 0xFF); + } + return msg_size; +} + +static void encode_header(char *buf, size_t len, uint32_t size) { + assert(len >= HEADER_SIZE); + buf[0] = ((size >> 24) & 0xFF); + buf[1] = ((size >> 16) & 0xFF); + buf[2] = ((size >> 8) & 0xFF); + buf[3] = (size & 0xFF); +} + +static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) +{ + timer_del(&task->timer); + + if (task->watch) { + g_source_remove(task->watch); + } + qio_channel_close(QIO_CHANNEL(task->sioc), NULL); + object_unref(OBJECT(task->sioc)); + + task->completion(task); +} + +static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received, + task->payload_len - task->receive_buf_received, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return G_SOURCE_CONTINUE; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (ret == 0) { + error_report("End of file before reply received"); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + task->receive_buf_received += ret; + if (task->receive_buf_received >= HEADER_SIZE) { + uint32_t len = decode_header(task->receive_buf, + task->receive_buf_received); + if (len == 0 || + len > (task->payload_len - HEADER_SIZE)) { + error_report("Message len %u must be non-zero & less than %zu", + len, (task->payload_len - HEADER_SIZE)); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + /* Now we know the size, shrink to fit */ + task->payload_len = HEADER_SIZE + len; + task->receive_buf = g_renew(char, + task->receive_buf, + task->payload_len); + } + + if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) { + qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE); + if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER || + hdr->minor_version != QGS_MSG_LIB_MINOR_VER) { + error_report("Invalid QGS message header version %d.%d", + hdr->major_version, + hdr->minor_version); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->type != GET_QUOTE_RESP) { + error_report("Invalid QGS message type %d", + hdr->type); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->size > (task->payload_len - HEADER_SIZE)) { + error_report("QGS message size %d exceeds payload capacity %zu", + hdr->size, task->payload_len); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->error_code != 0) { + error_report("QGS message error code %d", + hdr->error_code); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) { + qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE); + if (msg->selected_id_size != 0) { + error_report("QGS message selected ID was %d not 0", + msg->selected_id_size); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) != + msg->quote_size) { + error_report("QGS quote size %d should be %zu", + msg->quote_size, + (task->payload_len - sizeof(qgs_msg_get_quote_resp_t))); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (task->receive_buf_received == task->payload_len) { + size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t); + memmove(task->receive_buf, + task->receive_buf + strip, + task->receive_buf_received - strip); + task->receive_buf_received -= strip; + task->status_code = TDX_VP_GET_QUOTE_SUCCESS; + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + tdx_generate_quote_cleanup(task); + return G_SOURCE_REMOVE; +} + +static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_write(ioc, task->send_data + task->send_data_sent, + task->send_data_size - task->send_data_sent, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); + goto end; + } + } + task->send_data_sent += ret; + + if (task->send_data_sent == task->send_data_size) { + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN, + tdx_get_quote_read, task, NULL); + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + return G_SOURCE_REMOVE; +} + +static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_task_propagate_error(qio_task, &err); + if (ret) { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE; + tdx_generate_quote_cleanup(task); + return; + } + + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT, + tdx_send_report, task, NULL); +} + +#define TRANSACTION_TIMEOUT 30000 + +static void getquote_expired(void *opaque) +{ + TdxGenerateQuoteTask *task = opaque; + + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); +} + +static void setup_get_quote_timer(TdxGenerateQuoteTask *task) +{ + int64_t time; + + timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task); + time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + timer_mod(&task->timer, time + TRANSACTION_TIMEOUT); +} + +void tdx_generate_quote(TdxGenerateQuoteTask *task, + SocketAddress *qg_sock_addr) +{ + QIOChannelSocket *sioc; + qgs_msg_get_quote_req_t msg; + + /* Prepare a QGS message prelude */ + msg.header.major_version = QGS_MSG_LIB_MAJOR_VER; + msg.header.minor_version = QGS_MSG_LIB_MINOR_VER; + msg.header.type = GET_QUOTE_REQ; + msg.header.size = sizeof(msg) + task->send_data_size; + msg.header.error_code = 0; + msg.report_size = task->send_data_size; + msg.id_list_size = 0; + + /* Make room to add the QGS message prelude */ + task->send_data = g_renew(char, + task->send_data, + task->send_data_size + sizeof(msg) + HEADER_SIZE); + memmove(task->send_data + sizeof(msg) + HEADER_SIZE, + task->send_data, + task->send_data_size); + memcpy(task->send_data + HEADER_SIZE, + &msg, + sizeof(msg)); + encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg)); + task->send_data_size += sizeof(msg) + HEADER_SIZE; + + sioc = qio_channel_socket_new(); + task->sioc = sioc; + + setup_get_quote_timer(task); + + qio_channel_socket_connect_async(sioc, qg_sock_addr, + tdx_quote_generator_connected, task, + NULL, NULL); +} diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h new file mode 100644 index 0000000..3bd9b8e --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H +#define QEMU_I386_TDX_QUOTE_GENERATOR_H + +#include "qom/object_interfaces.h" +#include "io/channel-socket.h" +#include "exec/hwaddr.h" + +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL + +#define TDX_VP_GET_QUOTE_SUCCESS 0ULL +#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL) +#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL + +/* Limit to avoid resource starvation. */ +#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024) +#define TDX_MAX_GET_QUOTE_REQUEST 16 + +#define TDX_GET_QUOTE_HDR_SIZE 24 + +/* Format of pages shared with guest. */ +struct tdx_get_quote_header { + /* Format version: must be 1 in little endian. */ + uint64_t structure_version; + + /* + * GetQuote status code in little endian: + * Guest must set error_code to 0 to avoid information leak. + * Qemu sets this before interrupting guest. + */ + uint64_t error_code; + + /* + * in-message size in little endian: The message will follow this header. + * The in-message will be send to QGS. + */ + uint32_t in_len; + + /* + * out-message size in little endian: + * On request, out_len must be zero to avoid information leak. + * On return, message size from QGS. Qemu overwrites this field. + * The message will follows this header. The in-message is overwritten. + */ + uint32_t out_len; + + /* + * Message buffer follows. + * Guest sets message that will be send to QGS. If out_len > in_len, guest + * should zero remaining buffer to avoid information leak. + * Qemu overwrites this buffer with a message returned from QGS. + */ +}; + +typedef struct TdxGenerateQuoteTask { + hwaddr buf_gpa; + hwaddr payload_gpa; + uint64_t payload_len; + + char *send_data; + uint64_t send_data_size; + uint64_t send_data_sent; + + char *receive_buf; + uint64_t receive_buf_received; + + uint64_t status_code; + struct tdx_get_quote_header hdr; + + QIOChannelSocket *sioc; + guint watch; + QEMUTimer timer; + + void (*completion)(struct TdxGenerateQuoteTask *task); + void *opaque; +} TdxGenerateQuoteTask; + +void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr); + +#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */ diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000..1f0e108 --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000..7d69d6d --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,1546 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/base64.h" +#include "qemu/mmap-alloc.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qom/object_interfaces.h" +#include "crypto/hash.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/system.h" +#include "system/ramblock.h" +#include "system/address-spaces.h" + +#include <linux/kvm_para.h> + +#include "cpu.h" +#include "cpu-internal.h" +#include "host-cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/tdvf.h" +#include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" +#include "hw/pci/msi.h" +#include "kvm_i386.h" +#include "tdx.h" +#include "tdx-quote-generator.h" + +#include "standard-headers/asm-x86/kvm_para.h" + +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) + +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + +static TdxGuest *tdx_guest; + +static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; + +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) +{ + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; + + return 0; +} + +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, + &error_fatal); + } +} + +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; + + tdx_init_ram_entries(); + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + tdx_post_init_vcpus(); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uintptr_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); +} + +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + TdxGuest *tdx = TDX_GUEST(cgs); + int r = 0; + + kvm_mark_guest_state_protected(); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + if (r) { + return r; + } + } + + tdx_setup_supported_cpuid(); + + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + + tdx_guest = tdx; + return 0; +} + +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + X86CPU *x86cpu = X86_CPU(cpu); + + if (xcc->model) { + error_report("Named cpu model is not supported for TDX yet!"); + exit(1); + } + + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + + x86cpu->force_cpuid_0x1f = true; +} + +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + + return 0; +} + +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " + "(KVM supported: 0x%"PRIx64")", tdx->attributes, + (uint64_t)tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", + tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); +} + +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", + xfam, (uint64_t)tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + size_t data_len; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", + env->tsc_khz); + return r; + } + + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + +static void tdx_inject_interrupt(uint32_t apicid, uint32_t vector) +{ + int ret; + + if (vector < 32 || vector > 255) { + return; + } + + MSIMessage msg = { + .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) | + (((uint64_t)apicid & 0xffffff00) << 32), + .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT), + }; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + /* In this case, no better way to tell it to guest. Log it. */ + error_report("TDX: injection interrupt %d failed, interrupt lost (%s).", + vector, strerror(-ret)); + } +} + +static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) +{ + TdxGuest *tdx = task->opaque; + int ret; + + /* Maintain the number of in-flight requests. */ + qemu_mutex_lock(&tdx->lock); + tdx->num--; + qemu_mutex_unlock(&tdx->lock); + + if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { + ret = address_space_write(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->receive_buf, + task->receive_buf_received); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to write quote data."); + } else { + task->hdr.out_len = cpu_to_le64(task->receive_buf_received); + } + } + task->hdr.error_code = cpu_to_le64(task->status_code); + + /* Publish the response contents before marking this request completed. */ + smp_wmb(); + ret = address_space_write(&address_space_memory, task->buf_gpa, + MEMTXATTRS_UNSPECIFIED, &task->hdr, + TDX_GET_QUOTE_HDR_SIZE); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to update GetQuote header."); + } + + tdx_inject_interrupt(tdx_guest->event_notify_apicid, + tdx_guest->event_notify_vector); + + g_free(task->send_data); + g_free(task->receive_buf); + g_free(task); + object_unref(tdx); +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ + TdxGenerateQuoteTask *task; + struct tdx_get_quote_header hdr; + hwaddr buf_gpa = run->tdx.get_quote.gpa; + uint64_t buf_len = run->tdx.get_quote.size; + + QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); + + run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; + + if (buf_len == 0) { + return; + } + + if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { + run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; + return; + } + + if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: get-quote: failed to read GetQuote header."); + return; + } + + if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { + return; + } + + /* Only safe-guard check to avoid too large buffer size. */ + if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || + le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { + return; + } + + if (!tdx_guest->qg_sock_addr) { + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: failed to update GetQuote header."); + return; + } + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + } + + qemu_mutex_lock(&tdx_guest->lock); + if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; + return; + } + tdx_guest->num++; + qemu_mutex_unlock(&tdx_guest->lock); + + task = g_new(TdxGenerateQuoteTask, 1); + task->buf_gpa = buf_gpa; + task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; + task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; + task->hdr = hdr; + task->completion = tdx_get_quote_completion; + + task->send_data_size = le32_to_cpu(hdr.in_len); + task->send_data = g_malloc(task->send_data_size); + task->send_data_sent = 0; + + if (address_space_read(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->send_data, + task->send_data_size) != MEMTX_OK) { + goto out_free; + } + + /* Mark the buffer in-flight. */ + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + goto out_free; + } + + task->receive_buf = g_malloc0(task->payload_len); + task->receive_buf_received = 0; + task->opaque = tdx_guest; + + object_ref(tdx_guest); + tdx_generate_quote(task, tdx_guest->qg_sock_addr); + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + +out_free: + g_free(task->send_data); + g_free(task); +} + +#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT) +#define SUPPORTED_TDVMCALLINFO_1_R12 (0) + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ + if (run->tdx.get_tdvmcall_info.leaf != 1) { + return; + } + + run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 & + SUPPORTED_TDVMCALLINFO_1_R11) | + tdx_caps->kernel_tdvmcallinfo_1_r11; + run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 & + SUPPORTED_TDVMCALLINFO_1_R12) | + tdx_caps->kernel_tdvmcallinfo_1_r12; + run->tdx.get_tdvmcall_info.r13 = 0; + run->tdx.get_tdvmcall_info.r14 = 0; + + run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS; +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t vector = run->tdx.setup_event_notify.vector; + + if (vector >= 32 && vector < 256) { + qemu_mutex_lock(&tdx_guest->lock); + tdx_guest->event_notify_vector = vector; + tdx_guest->event_notify_apicid = cpu->apic_id; + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS; + } else { + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND; + } +} + +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, bool has_gpa, + uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + panic_info->u.tdx.has_gpa = has_gpa; + + qemu_system_guest_panicked(panic_info); +} + +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + uint64_t gpa = -1ull; + bool has_gpa = false; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + has_gpa = true; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa); + + return -1; +} + +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + +static void tdx_guest_get_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (!tdx->qg_sock_addr) { + error_setg(errp, "quote-generation-socket is not set"); + return; + } + visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); +} + +static void tdx_guest_set_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + SocketAddress *sock = NULL; + + if (!visit_type_SocketAddress(v, name, &sock, errp)) { + return; + } + + if (tdx->qg_sock_addr) { + qapi_free_SocketAddress(tdx->qg_sock_addr); + } + + tdx->qg_sock_addr = sock; +} + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + qemu_mutex_init(&tdx->lock); + + cgs->require_guest_memfd = true; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); + + object_property_add(obj, "quote-generation-socket", "SocketAddress", + tdx_guest_get_qgs, + tdx_guest_set_qgs, + NULL, NULL); + + qemu_mutex_init(&tdx->lock); + + tdx->event_notify_vector = -1; + tdx->event_notify_apicid = -1; +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = tdx_kvm_init; + x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000..1c38faf --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + +#include "confidential-guest.h" +#include "cpu.h" +#include "hw/i386/tdvf.h" + +#include "tdx-quote-generator.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_GET_QUOTE 0x10002 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004 + +#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL +#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL +#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL +#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL +#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL + +#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1) + +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + QemuMutex lock; + + bool initialized; + uint64_t attributes; /* TD attributes */ + uint64_t xfam; + char *mrconfigid; /* base64 encoded sha384 digest */ + char *mrowner; /* base64 encoded sha384 digest */ + char *mrownerconfig; /* base64 encoded sha384 digest */ + + MemoryRegion *tdvf_mr; + TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; + + /* GetQuote */ + SocketAddress *qg_sock_addr; + int num; + + uint32_t event_notify_vector; + uint32_t event_notify_apicid; +} TdxGuest; + +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run); + +#endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c index d6aad52..58ce3df 100644 --- a/target/i386/kvm/vmsr_energy.c +++ b/target/i386/kvm/vmsr_energy.c @@ -27,15 +27,6 @@ char *vmsr_compute_default_paths(void) return g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL); } -bool is_host_cpu_intel(void) -{ - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - - return g_str_equal(vendor, CPUID_VENDOR_INTEL); -} - int is_rapl_enabled(void) { const char *path = "/sys/class/powercap/intel-rapl/enabled"; diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h index 16cc1f4..151bcbd 100644 --- a/target/i386/kvm/vmsr_energy.h +++ b/target/i386/kvm/vmsr_energy.h @@ -94,6 +94,5 @@ double vmsr_get_ratio(uint64_t e_delta, unsigned long long delta_ticks, unsigned int maxticks); void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms); -bool is_host_cpu_intel(void); int is_rapl_enabled(void); #endif /* VMSR_ENERGY_H */ diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index e81a245..284c5ef 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -13,11 +13,12 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/error-report.h" +#include "exec/target_page.h" #include "hw/xen/xen.h" #include "system/kvm_int.h" #include "system/kvm_xen.h" #include "kvm/kvm_i386.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "xen-emu.h" #include "trace.h" #include "system/runstate.h" diff --git a/target/i386/machine.c b/target/i386/machine.c index 70f632a..dd2dac1 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,7 +7,7 @@ #include "hw/i386/x86.h" #include "kvm/kvm_i386.h" #include "hw/xen/xen.h" - +#include "exec/watchpoint.h" #include "system/kvm.h" #include "system/kvm_xen.h" #include "system/tcg.h" @@ -1060,9 +1060,8 @@ static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); - return env->tsc_khz && x86mc->save_tsc_khz; + + return env->tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { diff --git a/target/i386/meson.build b/target/i386/meson.build index c1aacea..092af34 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -11,6 +11,8 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c')) i386_system_ss = ss.source_set() i386_system_ss.add(files( diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 3ea92b0..3c9b6ca 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -29,7 +29,6 @@ #include "monitor/hmp.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" /* Perform linear address sign extension */ diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 4e4e63d..a5517b0 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -81,12 +81,13 @@ static void nvmm_kick_vcpu_thread(CPUState *cpu) cpus_kick_thread(cpu); } -static void nvmm_accel_ops_class_init(ObjectClass *oc, void *data) +static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = nvmm_start_vcpu_thread; ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 04e5f7e..11c2630 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -9,8 +9,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" +#include "system/address-spaces.h" +#include "system/ioport.h" #include "qemu/accel.h" #include "system/nvmm.h" #include "system/cpus.h" @@ -19,6 +19,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/queue.h" +#include "accel/accel-cpu-target.h" +#include "host-cpu.h" #include "migration/blocker.h" #include "strings.h" @@ -30,7 +32,6 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; - bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -47,7 +48,7 @@ struct qemu_machine { /* -------------------------------------------------------------------------- */ -static bool nvmm_allowed; +bool nvmm_allowed; static struct qemu_machine qemu_mach; static struct nvmm_machine * @@ -508,7 +509,7 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static void @@ -517,7 +518,7 @@ nvmm_mem_callback(struct nvmm_mem *mem) cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -727,9 +728,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (qcpu->stop) { @@ -827,32 +828,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -982,7 +983,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - qcpu->dirty = true; + qcpu->vcpu_dirty = true; cpu->accel = qcpu; return 0; @@ -1153,7 +1154,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = { /* -------------------------------------------------------------------------- */ static int -nvmm_accel_init(MachineState *ms) +nvmm_accel_init(AccelState *as, MachineState *ms) { int ret, err; @@ -1193,14 +1194,8 @@ nvmm_accel_init(MachineState *ms) return 0; } -int -nvmm_enabled(void) -{ - return nvmm_allowed; -} - static void -nvmm_accel_class_init(ObjectClass *oc, void *data) +nvmm_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "NVMM"; @@ -1214,10 +1209,33 @@ static const TypeInfo nvmm_accel_type = { .class_init = nvmm_accel_class_init, }; +static void nvmm_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = nvmm_cpu_instance_init; +} + +static const TypeInfo nvmm_cpu_accel_type = { + .name = ACCEL_CPU_NAME("nvmm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = nvmm_cpu_accel_class_init, + .abstract = true, +}; + static void nvmm_type_init(void) { type_register_static(&nvmm_accel_type); + type_register_static(&nvmm_cpu_accel_type); } type_init(nvmm_type_init); diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index f0aa189..a2e4d48 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, @@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), @@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status); @@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); for (i = 1; i < 2 << SHIFT; i++) { @@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c index d5bf886..7c5c02a 100644 --- a/target/i386/sev-system-stub.c +++ b/target/i386/sev-system-stub.c @@ -14,34 +14,9 @@ #include "qemu/osdep.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/error.h" #include "sev.h" -SevInfo *qmp_query_sev(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevCapability *qmp_query_sev_capabilities(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, - bool has_gpa, uint64_t gpa, Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); -} - int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); @@ -56,13 +31,6 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) g_assert_not_reached(); } -SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, - Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); diff --git a/target/i386/sev.c b/target/i386/sev.c index 0e1dbb6..1057b8a 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -26,6 +26,7 @@ #include "qemu/uuid.h" #include "qemu/error-report.h" #include "crypto/hash.h" +#include "exec/target_page.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" #include "sev.h" @@ -36,11 +37,13 @@ #include "qom/object.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "confidential-guest.h" #include "hw/i386/pc.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "hw/i386/e820_memory_layout.h" #include "qemu/queue.h" +#include "qemu/cutils.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -49,6 +52,15 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) /* hard code sha256 digest size */ #define HASH_SIZE 32 +/* Hard coded GPA that KVM uses for the VMSA */ +#define KVM_VMSA_GPA 0xFFFFFFFFF000 + +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */ +#define FLAGS_VMSA_TO_SEGCACHE(flags) \ + ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8)) +#define FLAGS_SEGCACHE_TO_VMSA(flags) \ + ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12)) + typedef struct QEMU_PACKED SevHashTableEntry { QemuUUID guid; uint16_t len; @@ -88,6 +100,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor { uint32_t size; } SevHashTableDescriptor; +typedef struct SevLaunchVmsa { + QTAILQ_ENTRY(SevLaunchVmsa) next; + + uint16_t cpu_index; + uint64_t gpa; + struct sev_es_save_area vmsa; +} SevLaunchVmsa; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -98,6 +118,8 @@ struct SevCommonState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + uint64_t sev_features; + uint64_t supported_sev_features; /* runtime state */ uint8_t api_major; @@ -106,9 +128,7 @@ struct SevCommonState { int sev_fd; SevState state; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; + QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa; }; struct SevCommonStateClass { @@ -121,7 +141,8 @@ struct SevCommonStateClass { Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); - int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -211,14 +232,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF @@ -370,6 +383,288 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; +static void sev_apply_cpu_context(CPUState *cpu) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + X86CPU *x86; + CPUX86State *env; + struct SevLaunchVmsa *launch_vmsa; + + /* See if an initial VMSA has been provided for this CPU */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu->cpu_index == launch_vmsa->cpu_index) { + x86 = X86_CPU(cpu); + env = &x86->env; + + /* + * Ideally we would provide the VMSA directly to kvm which would + * ensure that the resulting initial VMSA measurement which is + * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from + * exactly what we provide here. Currently this is not possible so + * we need to copy the parts of the VMSA structure that we currently + * support into the CPU state. + */ + cpu_load_efer(env, launch_vmsa->vmsa.efer); + cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4); + cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0); + cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3); + env->xcr0 = launch_vmsa->vmsa.xcr0; + env->pat = launch_vmsa->vmsa.g_pat; + + cpu_x86_load_seg_cache( + env, R_CS, launch_vmsa->vmsa.cs.selector, + launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib)); + cpu_x86_load_seg_cache( + env, R_DS, launch_vmsa->vmsa.ds.selector, + launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib)); + cpu_x86_load_seg_cache( + env, R_ES, launch_vmsa->vmsa.es.selector, + launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib)); + cpu_x86_load_seg_cache( + env, R_FS, launch_vmsa->vmsa.fs.selector, + launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib)); + cpu_x86_load_seg_cache( + env, R_GS, launch_vmsa->vmsa.gs.selector, + launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib)); + cpu_x86_load_seg_cache( + env, R_SS, launch_vmsa->vmsa.ss.selector, + launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib)); + + env->gdt.base = launch_vmsa->vmsa.gdtr.base; + env->gdt.limit = launch_vmsa->vmsa.gdtr.limit; + env->gdt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib); + env->idt.base = launch_vmsa->vmsa.idtr.base; + env->idt.limit = launch_vmsa->vmsa.idtr.limit; + env->idt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib); + + cpu_x86_load_seg_cache( + env, R_LDTR, launch_vmsa->vmsa.ldtr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib)); + cpu_x86_load_seg_cache( + env, R_TR, launch_vmsa->vmsa.tr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib)); + + env->dr[6] = launch_vmsa->vmsa.dr6; + env->dr[7] = launch_vmsa->vmsa.dr7; + + env->regs[R_EAX] = launch_vmsa->vmsa.rax; + env->regs[R_ECX] = launch_vmsa->vmsa.rcx; + env->regs[R_EDX] = launch_vmsa->vmsa.rdx; + env->regs[R_EBX] = launch_vmsa->vmsa.rbx; + env->regs[R_ESP] = launch_vmsa->vmsa.rsp; + env->regs[R_EBP] = launch_vmsa->vmsa.rbp; + env->regs[R_ESI] = launch_vmsa->vmsa.rsi; + env->regs[R_EDI] = launch_vmsa->vmsa.rdi; +#ifdef TARGET_X86_64 + env->regs[R_R8] = launch_vmsa->vmsa.r8; + env->regs[R_R9] = launch_vmsa->vmsa.r9; + env->regs[R_R10] = launch_vmsa->vmsa.r10; + env->regs[R_R11] = launch_vmsa->vmsa.r11; + env->regs[R_R12] = launch_vmsa->vmsa.r12; + env->regs[R_R13] = launch_vmsa->vmsa.r13; + env->regs[R_R14] = launch_vmsa->vmsa.r14; + env->regs[R_R15] = launch_vmsa->vmsa.r15; +#endif + env->eip = launch_vmsa->vmsa.rip; + env->eflags = launch_vmsa->vmsa.rflags; + + cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw); + env->mxcsr = launch_vmsa->vmsa.mxcsr; + + break; + } + } +} + +static int check_sev_features(SevCommonState *sev_common, uint64_t sev_features, + Error **errp) +{ + /* + * Ensure SEV_FEATURES is configured for correct SEV hardware and that + * the requested features are supported. If SEV-SNP is enabled then + * that feature must be enabled, otherwise it must be cleared. + */ + if (sev_snp_enabled() && !(sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is enabled but is not enabled in VMSA sev_features", + __func__); + return -1; + } else if (!sev_snp_enabled() && + (sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is not enabled but is enabled in VMSA sev_features", + __func__); + return -1; + } + if (sev_features & ~sev_common->supported_sev_features) { + error_setg(errp, + "%s: VMSA contains unsupported sev_features: %lX, " + "supported features: %lX", + __func__, sev_features, sev_common->supported_sev_features); + return -1; + } + return 0; +} + +static int check_vmsa_supported(SevCommonState *sev_common, hwaddr gpa, + const struct sev_es_save_area *vmsa, + Error **errp) +{ + struct sev_es_save_area vmsa_check; + + /* + * KVM always populates the VMSA at a fixed GPA which cannot be modified + * from userspace. Specifying a different GPA will not prevent the guest + * from starting but will cause the launch measurement to be different + * from expected. Therefore check that the provided GPA matches the KVM + * hardcoded value. + */ + if (gpa != KVM_VMSA_GPA) { + error_setg(errp, + "%s: The VMSA GPA must be %lX but is specified as %lX", + __func__, KVM_VMSA_GPA, gpa); + return -1; + } + + /* + * Clear all supported fields so we can then check the entire structure + * is zero. + */ + memcpy(&vmsa_check, vmsa, sizeof(struct sev_es_save_area)); + memset(&vmsa_check.es, 0, sizeof(vmsa_check.es)); + memset(&vmsa_check.cs, 0, sizeof(vmsa_check.cs)); + memset(&vmsa_check.ss, 0, sizeof(vmsa_check.ss)); + memset(&vmsa_check.ds, 0, sizeof(vmsa_check.ds)); + memset(&vmsa_check.fs, 0, sizeof(vmsa_check.fs)); + memset(&vmsa_check.gs, 0, sizeof(vmsa_check.gs)); + memset(&vmsa_check.gdtr, 0, sizeof(vmsa_check.gdtr)); + memset(&vmsa_check.idtr, 0, sizeof(vmsa_check.idtr)); + memset(&vmsa_check.ldtr, 0, sizeof(vmsa_check.ldtr)); + memset(&vmsa_check.tr, 0, sizeof(vmsa_check.tr)); + vmsa_check.efer = 0; + vmsa_check.cr0 = 0; + vmsa_check.cr3 = 0; + vmsa_check.cr4 = 0; + vmsa_check.xcr0 = 0; + vmsa_check.dr6 = 0; + vmsa_check.dr7 = 0; + vmsa_check.rax = 0; + vmsa_check.rcx = 0; + vmsa_check.rdx = 0; + vmsa_check.rbx = 0; + vmsa_check.rsp = 0; + vmsa_check.rbp = 0; + vmsa_check.rsi = 0; + vmsa_check.rdi = 0; + vmsa_check.r8 = 0; + vmsa_check.r9 = 0; + vmsa_check.r10 = 0; + vmsa_check.r11 = 0; + vmsa_check.r12 = 0; + vmsa_check.r13 = 0; + vmsa_check.r14 = 0; + vmsa_check.r15 = 0; + vmsa_check.rip = 0; + vmsa_check.rflags = 0; + + vmsa_check.g_pat = 0; + vmsa_check.xcr0 = 0; + + vmsa_check.x87_fcw = 0; + vmsa_check.mxcsr = 0; + + if (check_sev_features(sev_common, vmsa_check.sev_features, errp) < 0) { + return -1; + } + vmsa_check.sev_features = 0; + + if (!buffer_is_zero(&vmsa_check, sizeof(vmsa_check))) { + error_setg(errp, + "%s: The VMSA contains fields that are not " + "synchronized with KVM. Continuing would result in " + "either unpredictable guest behavior, or a " + "mismatched launch measurement.", + __func__); + return -1; + } + return 0; +} + +static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx, + uint32_t ctx_len, hwaddr gpa, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevLaunchVmsa *launch_vmsa; + CPUState *cpu; + bool exists = false; + + /* + * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The + * context buffer will contain a sev_es_save_area from the Linux kernel + * which is defined by "Table B-4. VMSA Layout, State Save Area for SEV-ES" + * in the AMD64 APM, Volume 2. + */ + + if (!sev_es_enabled()) { + error_setg(errp, "SEV: unable to set CPU context: Not supported"); + return -1; + } + + if (ctx_len < sizeof(struct sev_es_save_area)) { + error_setg(errp, "SEV: unable to set CPU context: " + "Invalid context provided"); + return -1; + } + + cpu = qemu_get_cpu(cpu_index); + if (!cpu) { + error_setg(errp, "SEV: unable to set CPU context for out of bounds " + "CPU index %d", cpu_index); + return -1; + } + + /* + * If the context of this VP has already been set then replace it with the + * new context. + */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu_index == launch_vmsa->cpu_index) { + launch_vmsa->gpa = gpa; + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + exists = true; + break; + } + } + + if (!exists) { + /* New VP context */ + launch_vmsa = g_new0(SevLaunchVmsa, 1); + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + launch_vmsa->cpu_index = cpu_index; + launch_vmsa->gpa = gpa; + QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next); + } + + /* Synchronise the VMSA with the current CPU state */ + sev_apply_cpu_context(cpu); + + return 0; +} + bool sev_enabled(void) { @@ -946,7 +1241,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -977,9 +1272,8 @@ sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t return value; } -static int -sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *addr, size_t len) +static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len, Error **errp) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -994,8 +1288,8 @@ sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { - error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", - __func__, ret, fw_error, fw_error_to_str(fw_error)); + error_setg(errp, "%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", __func__, + ret, fw_error, fw_error_to_str(fw_error)); } return ret; @@ -1005,6 +1299,16 @@ static int sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; + CPUState *cpu; + + /* + * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA. + * Synchronise the CPU state to any provided launch VMSA structures. + */ + CPU_FOREACH(cpu) { + sev_apply_cpu_context(cpu); + } + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); @@ -1123,8 +1427,8 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } -static int -snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) +static int snp_launch_update_data(uint64_t gpa, void *hva, size_t len, + int type, Error **errp) { SevLaunchUpdateData *data; @@ -1139,23 +1443,21 @@ snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) return 0; } -static int -sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *ptr, size_t len) +static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp) { - int ret = snp_launch_update_data(gpa, ptr, len, - KVM_SEV_SNP_PAGE_TYPE_NORMAL); - return ret; + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL, errp); } static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, - const KvmCpuidInfo *kvm_cpuid_info) + const KvmCpuidInfo *kvm_cpuid_info, Error **errp) { size_t i; if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { - error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + error_setg(errp, "SEV-SNP: CPUID entry count (%d) exceeds max (%d)", kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); return -1; } @@ -1197,8 +1499,8 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, return 0; } -static int -snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) +static int snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, + size_t cpuid_len, Error **errp) { KvmCpuidInfo kvm_cpuid_info = {0}; SnpCpuidInfo snp_cpuid_info; @@ -1215,26 +1517,25 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) } while (ret == -E2BIG); if (ret) { - error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", - strerror(-ret)); - return 1; + error_setg(errp, "SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return -1; } - ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); - if (ret) { - error_report("SEV-SNP: failed to generate CPUID table information"); - return 1; + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info, errp); + if (ret < 0) { + return -1; } memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); return snp_launch_update_data(cpuid_addr, hva, cpuid_len, - KVM_SEV_SNP_PAGE_TYPE_CPUID); + KVM_SEV_SNP_PAGE_TYPE_CPUID, errp); } -static int -snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, - void *hva, uint32_t len) +static int snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, + uint32_t addr, void *hva, + uint32_t len, Error **errp) { int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; if (sev_snp->parent_obj.kernel_hashes) { @@ -1246,7 +1547,7 @@ snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, sizeof(*sev_snp->kernel_hashes_data)); type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; } - return snp_launch_update_data(addr, hva, len, type); + return snp_launch_update_data(addr, hva, len, type, errp); } static int @@ -1284,12 +1585,14 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, } if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { - ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + ret = snp_launch_update_cpuid(desc->base, hva, desc->len, + &error_fatal); } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, - desc->len); + desc->len, &error_fatal); } else { - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + ret = snp_launch_update_data(desc->base, hva, desc->len, type, + &error_fatal); } if (ret) { @@ -1311,18 +1614,26 @@ sev_snp_launch_finish(SevCommonState *sev_common) struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; /* - * To boot the SNP guest, the hypervisor is required to populate the CPUID - * and Secrets page before finalizing the launch flow. The location of - * the secrets and CPUID page is available through the OVMF metadata GUID. + * Populate all the metadata pages if not using an IGVM file. In the case + * where an IGVM file is provided it will be used to configure the metadata + * pages directly. */ - metadata = pc_system_get_ovmf_sev_metadata_ptr(); - if (metadata == NULL) { - error_report("%s: Failed to locate SEV metadata header", __func__); - exit(1); - } + if (!X86_MACHINE(qdev_get_machine())->igvm) { + /* + * To boot the SNP guest, the hypervisor is required to populate the + * CPUID and Secrets page before finalizing the launch flow. The + * location of the secrets and CPUID page is available through the + * OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } - /* Populate all the metadata pages */ - snp_populate_metadata_pages(sev_snp, metadata); + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + } QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); @@ -1432,6 +1743,39 @@ static int sev_snp_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_SNP_VM; } +static int sev_init_supported_features(ConfidentialGuestSupport *cgs, + SevCommonState *sev_common, Error **errp) +{ + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + /* + * Older kernels do not support query or setting of sev_features. In this + * case the set of supported features must be zero to match the settings + * in the kernel. + */ + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == + KVM_X86_DEFAULT_VM) { + sev_common->supported_sev_features = 0; + return 0; + } + + /* Query KVM for the supported set of sev_features */ + struct kvm_device_attr attr = { + .group = KVM_X86_GRP_SEV, + .attr = KVM_X86_SEV_VMSA_FEATURES, + .addr = (unsigned long)&sev_common->supported_sev_features, + }; + if (kvm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr) < 0) { + error_setg(errp, "%s: failed to query supported sev_features", + __func__); + return -1; + } + if (sev_snp_enabled()) { + sev_common->supported_sev_features |= SVM_SEV_FEAT_SNP_ACTIVE; + } + return 0; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -1512,6 +1856,10 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + if (sev_init_supported_features(cgs, sev_common, errp) < 0) { + return -1; + } + trace_kvm_sev_init(); switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { case KVM_X86_DEFAULT_VM: @@ -1523,6 +1871,40 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) case KVM_X86_SEV_ES_VM: case KVM_X86_SNP_VM: { struct kvm_sev_init args = { 0 }; + MachineState *machine = MACHINE(qdev_get_machine()); + X86MachineState *x86machine = X86_MACHINE(qdev_get_machine()); + + /* + * If configuration is provided via an IGVM file then the IGVM file + * might contain configuration of the initial vcpu context. For SEV + * the vcpu context includes the sev_features which should be applied + * to the vcpu. + * + * KVM does not synchronize sev_features from CPU state. Instead it + * requires sev_features to be provided as part of this initialization + * call which is subsequently automatically applied to the VMSA of + * each vcpu. + * + * The IGVM file is normally processed after initialization. Therefore + * we need to pre-process it here to extract sev_features in order to + * provide it to KVM_SEV_INIT2. Each cgs_* function that is called by + * the IGVM processor detects this pre-process by observing the state + * as SEV_STATE_UNINIT. + */ + if (x86machine->igvm) { + if (IGVM_CFG_GET_CLASS(x86machine->igvm) + ->process(x86machine->igvm, machine->cgs, true, errp) == + -1) { + return -1; + } + /* + * KVM maintains a bitmask of allowed sev_features. This does not + * include SVM_SEV_FEAT_SNP_ACTIVE which is set accordingly by KVM + * itself. Therefore we need to clear this flag. + */ + args.vmsa_features = sev_common->sev_features & + ~SVM_SEV_FEAT_SNP_ACTIVE; + } ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); break; @@ -1622,9 +2004,8 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { int ret; - ret = klass->launch_update_data(sev_common, gpa, ptr, len); + ret = klass->launch_update_data(sev_common, gpa, ptr, len, errp); if (ret < 0) { - error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; } } @@ -1789,40 +2170,109 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, return sev_es_parse_reset_block(info, addr); } -void sev_es_set_reset_vector(CPUState *cpu) + +static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg) { - X86CPU *x86; - CPUX86State *env; - ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - SevCommonState *sev_common = SEV_COMMON( - object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); + vmsa_seg->selector = cpu_seg->selector; + vmsa_seg->base = cpu_seg->base; + vmsa_seg->limit = cpu_seg->limit; + vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags); +} - /* Only update if we have valid reset information */ - if (!sev_common || !sev_common->reset_data_valid) { - return; - } +static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa) +{ + const X86CPU *x86 = X86_CPU(cpu); + const CPUX86State *env = &x86->env; - /* Do not update the BSP reset state */ - if (cpu->cpu_index == 0) { - return; + /* + * Initialize the SEV-ES save area from the current state of + * the CPU. The entire state does not need to be copied, only the state + * that is copied back to the CPUState in sev_apply_cpu_context. + */ + memset(vmsa, 0, sizeof(struct sev_es_save_area)); + vmsa->efer = env->efer; + vmsa->cr0 = env->cr[0]; + vmsa->cr3 = env->cr[3]; + vmsa->cr4 = env->cr[4]; + vmsa->xcr0 = env->xcr0; + vmsa->g_pat = env->pat; + + seg_to_vmsa(&env->segs[R_CS], &vmsa->cs); + seg_to_vmsa(&env->segs[R_DS], &vmsa->ds); + seg_to_vmsa(&env->segs[R_ES], &vmsa->es); + seg_to_vmsa(&env->segs[R_FS], &vmsa->fs); + seg_to_vmsa(&env->segs[R_GS], &vmsa->gs); + seg_to_vmsa(&env->segs[R_SS], &vmsa->ss); + + seg_to_vmsa(&env->gdt, &vmsa->gdtr); + seg_to_vmsa(&env->idt, &vmsa->idtr); + seg_to_vmsa(&env->ldt, &vmsa->ldtr); + seg_to_vmsa(&env->tr, &vmsa->tr); + + vmsa->dr6 = env->dr[6]; + vmsa->dr7 = env->dr[7]; + + vmsa->rax = env->regs[R_EAX]; + vmsa->rcx = env->regs[R_ECX]; + vmsa->rdx = env->regs[R_EDX]; + vmsa->rbx = env->regs[R_EBX]; + vmsa->rsp = env->regs[R_ESP]; + vmsa->rbp = env->regs[R_EBP]; + vmsa->rsi = env->regs[R_ESI]; + vmsa->rdi = env->regs[R_EDI]; + +#ifdef TARGET_X86_64 + vmsa->r8 = env->regs[R_R8]; + vmsa->r9 = env->regs[R_R9]; + vmsa->r10 = env->regs[R_R10]; + vmsa->r11 = env->regs[R_R11]; + vmsa->r12 = env->regs[R_R12]; + vmsa->r13 = env->regs[R_R13]; + vmsa->r14 = env->regs[R_R14]; + vmsa->r15 = env->regs[R_R15]; +#endif + + vmsa->rip = env->eip; + vmsa->rflags = env->eflags; +} + +static void sev_es_set_ap_context(uint32_t reset_addr) +{ + CPUState *cpu; + struct sev_es_save_area vmsa; + SegmentCache cs; + + cs.selector = 0xf000; + cs.base = reset_addr & 0xffff0000; + cs.limit = 0xffff; + cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | + DESC_A_MASK; + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == 0) { + /* Do not update the BSP reset state */ + continue; + } + initialize_vmsa(cpu, &vmsa); + seg_to_vmsa(&cs, &vmsa.cs); + vmsa.rip = reset_addr & 0x0000ffff; + sev_set_cpu_context(cpu->cpu_index, &vmsa, + sizeof(struct sev_es_save_area), + 0, &error_fatal); } +} - x86 = X86_CPU(cpu); - env = &x86->env; - - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, - DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | - DESC_R_MASK | DESC_A_MASK); - - env->eip = sev_common->reset_ip; +void sev_es_set_reset_vector(CPUState *cpu) +{ + if (sev_enabled()) { + sev_apply_cpu_context(cpu); + } } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) { - CPUState *cpu; uint32_t addr; int ret; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1835,14 +2285,12 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) return ret; } + /* + * The reset vector is saved into a CPU context for each AP but not for + * the BSP. This is applied during guest startup or when the CPU is reset. + */ if (addr) { - sev_common->reset_cs = addr & 0xffff0000; - sev_common->reset_ip = addr & 0x0000ffff; - sev_common->reset_data_valid = true; - - CPU_FOREACH(cpu) { - sev_es_set_reset_vector(cpu); - } + sev_es_set_ap_context(addr); } return 0; @@ -2044,8 +2492,239 @@ static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) SEV_COMMON(obj)->kernel_hashes = value; } +static bool cgs_check_support(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary) +{ + return (((platform == CGS_PLATFORM_SEV_SNP) && sev_snp_enabled()) || + ((platform == CGS_PLATFORM_SEV_ES) && sev_es_enabled()) || + ((platform == CGS_PLATFORM_SEV) && sev_enabled())); +} + +static int cgs_set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + if ((cpu_index == 0) && (memory_type == CGS_PAGE_TYPE_VMSA)) { + const struct sev_es_save_area *sa = + (const struct sev_es_save_area *)ptr; + if (len < sizeof(*sa)) { + error_setg(errp, "%s: invalid VMSA length encountered", + __func__); + return -1; + } + if (check_sev_features(sev_common, sa->sev_features, errp) < 0) { + return -1; + } + sev_common->sev_features = sa->sev_features; + } + return 0; + } + + if (!sev_enabled()) { + error_setg(errp, "%s: attempt to configure guest memory, but SEV " + "is not enabled", __func__); + return -1; + } + + switch (memory_type) { + case CGS_PAGE_TYPE_NORMAL: + case CGS_PAGE_TYPE_ZERO: + return klass->launch_update_data(sev_common, gpa, ptr, len, errp); + + case CGS_PAGE_TYPE_VMSA: + if (!sev_es_enabled()) { + error_setg(errp, + "%s: attempt to configure initial VMSA, but SEV-ES " + "is not supported", + __func__); + return -1; + } + if (check_vmsa_supported(sev_common, gpa, + (const struct sev_es_save_area *)ptr, + errp) < 0) { + return -1; + } + return sev_set_cpu_context(cpu_index, ptr, len, gpa, errp); + + case CGS_PAGE_TYPE_UNMEASURED: + if (sev_snp_enabled()) { + return snp_launch_update_data( + gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_UNMEASURED, errp); + } + /* No action required if not SEV-SNP */ + return 0; + + case CGS_PAGE_TYPE_SECRETS: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure secrets page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_SECRETS, errp); + + case CGS_PAGE_TYPE_REQUIRED_MEMORY: + if (kvm_convert_memory(gpa, len, true) < 0) { + error_setg( + errp, + "%s: failed to configure required memory. gpa: %lX, type: %d", + __func__, gpa, memory_type); + return -1; + } + return 0; + + case CGS_PAGE_TYPE_CPUID: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure CPUID page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_cpuid(gpa, ptr, len, errp); + } + error_setg(errp, "%s: failed to update guest. gpa: %lX, type: %d", __func__, + gpa, memory_type); + return -1; +} + +static int cgs_get_mem_map_entry(int index, + ConfidentialGuestMemoryMapEntry *entry, + Error **errp) +{ + struct e820_entry *table; + int num_entries; + + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 1; + } + + num_entries = e820_get_table(&table); + if ((index < 0) || (index >= num_entries)) { + return 1; + } + entry->gpa = table[index].address; + entry->size = table[index].length; + switch (table[index].type) { + case E820_RAM: + entry->type = CGS_MEM_RAM; + break; + case E820_RESERVED: + entry->type = CGS_MEM_RESERVED; + break; + case E820_ACPI: + entry->type = CGS_MEM_ACPI; + break; + case E820_NVS: + entry->type = CGS_MEM_NVS; + break; + case E820_UNUSABLE: + entry->type = CGS_MEM_UNUSABLE; + break; + } + return 0; +} + +static int cgs_set_guest_policy(ConfidentialGuestPolicyType policy_type, + uint64_t policy, void *policy_data1, + uint32_t policy_data1_size, void *policy_data2, + uint32_t policy_data2_size, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 0; + } + + if (policy_type != GUEST_POLICY_SEV) { + error_setg(errp, "%s: Invalid guest policy type provided for SEV: %d", + __func__, policy_type); + return -1; + } + /* + * SEV-SNP handles policy differently. The policy flags are defined in + * kvm_start_conf.policy and an ID block and ID auth can be provided. + */ + if (sev_snp_enabled()) { + SevSnpGuestState *sev_snp_guest = + SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs); + struct kvm_sev_snp_launch_finish *finish = + &sev_snp_guest->kvm_finish_conf; + + /* + * The policy consists of flags in 'policy' and optionally an ID block + * and ID auth in policy_data1 and policy_data2 respectively. The ID + * block and auth are optional so clear any previous ID block and auth + * and set them if provided, but always set the policy flags. + */ + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + sev_snp_guest->id_block = NULL; + finish->id_block_uaddr = 0; + sev_snp_guest->id_auth = NULL; + finish->id_auth_uaddr = 0; + + if (policy_data1_size > 0) { + struct sev_snp_id_authentication *id_auth = + (struct sev_snp_id_authentication *)policy_data2; + + if (policy_data1_size != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "%s: Invalid SEV-SNP ID block: incorrect size", + __func__); + return -1; + } + if (policy_data2_size != KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, + "%s: Invalid SEV-SNP ID auth block: incorrect size", + __func__); + return -1; + } + assert(policy_data1 != NULL); + assert(policy_data2 != NULL); + + finish->id_block_uaddr = + (__u64)g_memdup2(policy_data1, KVM_SEV_SNP_ID_BLOCK_SIZE); + finish->id_auth_uaddr = + (__u64)g_memdup2(policy_data2, KVM_SEV_SNP_ID_AUTH_SIZE); + + /* + * Check if an author key has been provided and use that to flag + * whether the author key is enabled. The first of the author key + * must be non-zero to indicate the key type, which will currently + * always be 2. + */ + sev_snp_guest->kvm_finish_conf.auth_key_en = + id_auth->author_key[0] ? 1 : 0; + finish->id_block_en = 1; + } + sev_snp_guest->kvm_start_conf.policy = policy; + } else { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + /* Only the policy flags are supported for SEV and SEV-ES */ + if ((policy_data1_size > 0) || (policy_data2_size > 0) || !sev_guest) { + error_setg(errp, "%s: An ID block/ID auth block has been provided " + "but SEV-SNP is not enabled", __func__); + return -1; + } + sev_guest->policy = policy; + } + return 0; +} + static void -sev_common_class_init(ObjectClass *oc, void *data) +sev_common_class_init(ObjectClass *oc, const void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); @@ -2067,6 +2746,8 @@ static void sev_common_instance_init(Object *obj) { SevCommonState *sev_common = SEV_COMMON(obj); + ConfidentialGuestSupportClass *cgs = + CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(obj); sev_common->kvm_type = -1; @@ -2077,6 +2758,12 @@ sev_common_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev_common->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + cgs->check_support = cgs_check_support; + cgs->set_guest_state = cgs_set_guest_state; + cgs->get_mem_map_entry = cgs_get_mem_map_entry; + cgs->set_guest_policy = cgs_set_guest_policy; + + QTAILQ_INIT(&sev_common->launch_vmsa); } /* sev guest info common to sev/sev-es/sev-snp */ @@ -2088,7 +2775,7 @@ static const TypeInfo sev_common_info = { .class_size = sizeof(SevCommonStateClass), .class_init = sev_common_class_init, .abstract = true, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } } @@ -2140,7 +2827,7 @@ static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, } static void -sev_guest_class_init(ObjectClass *oc, void *data) +sev_guest_class_init(ObjectClass *oc, const void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -2394,7 +3081,7 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) } static void -sev_snp_guest_class_init(ObjectClass *oc, void *data) +sev_snp_guest_class_init(ObjectClass *oc, const void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -2404,7 +3091,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", diff --git a/target/i386/sev.h b/target/i386/sev.h index 373669e..9db1a80 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -44,6 +44,8 @@ bool sev_snp_enabled(void); #define SEV_SNP_POLICY_SMT 0x10000 #define SEV_SNP_POLICY_DBG 0x80000 +#define SVM_SEV_FEAT_SNP_ACTIVE 1 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; @@ -55,6 +57,128 @@ typedef struct SevKernelLoaderContext { size_t cmdline_size; } SevKernelLoaderContext; +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct QEMU_PACKED sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint64_t vmpl0_ssp; + uint64_t vmpl1_ssp; + uint64_t vmpl2_ssp; + uint64_t vmpl3_ssp; + uint64_t u_cet; + uint8_t reserved_0xc8[2]; + uint8_t vmpl; + uint8_t cpl; + uint8_t reserved_0xcc[4]; + uint64_t efer; + uint8_t reserved_0xd8[104]; + uint64_t xss; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr0_addr_mask; + uint64_t dr1_addr_mask; + uint64_t dr2_addr_mask; + uint64_t dr3_addr_mask; + uint8_t reserved_0x1c0[24]; + uint64_t rsp; + uint64_t s_cet; + uint64_t ssp; + uint64_t isst_addr; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_0x248[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; + uint8_t reserved_0x298[80]; + uint32_t pkru; + uint32_t tsc_aux; + uint8_t reserved_0x2f0[24]; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t reserved_0x320; /* rsp already available at 0x01d8 */ + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint8_t reserved_0x380[16]; + uint64_t guest_exit_info_1; + uint64_t guest_exit_info_2; + uint64_t guest_exit_int_info; + uint64_t guest_nrip; + uint64_t sev_features; + uint64_t vintr_ctrl; + uint64_t guest_exit_code; + uint64_t virtual_tom; + uint64_t tlb_id; + uint64_t pcpu_id; + uint64_t event_inj; + uint64_t xcr0; + uint8_t reserved_0x3f0[16]; + + /* Floating point area */ + uint64_t x87_dp; + uint32_t mxcsr; + uint16_t x87_ftw; + uint16_t x87_fsw; + uint16_t x87_fcw; + uint16_t x87_fop; + uint16_t x87_ds; + uint16_t x87_cs; + uint64_t x87_rip; + uint8_t fpreg_x87[80]; + uint8_t fpreg_xmm[256]; + uint8_t fpreg_ymm[256]; +}; + +struct QEMU_PACKED sev_snp_id_authentication { + uint32_t id_key_alg; + uint32_t auth_key_algo; + uint8_t reserved[56]; + uint8_t id_block_sig[512]; + uint8_t id_key[1028]; + uint8_t reserved2[60]; + uint8_t id_key_sig[512]; + uint8_t author_key[1028]; + uint8_t reserved3[892]; +}; + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c index e68b73a..97e3f0e 100644 --- a/target/i386/tcg/access.c +++ b/target/i386/tcg/access.c @@ -3,8 +3,9 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" #include "access.h" diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index cda32ee..853b1c8 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -2542,7 +2542,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) s->has_modrm = false; s->prefix = 0; - next_byte: + next_byte:; +#ifdef TARGET_X86_64 + /* clear any REX prefix followed by other prefixes. */ + int rex; + rex = -1; + next_byte_rex: +#endif b = x86_ldub_code(env, s); /* Collect prefixes. */ @@ -2585,13 +2591,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu) #ifdef TARGET_X86_64 case 0x40 ... 0x4f: if (CODE64(s)) { - /* REX prefix */ - s->prefix |= PREFIX_REX; - s->vex_w = (b >> 3) & 1; - s->rex_r = (b & 0x4) << 1; - s->rex_x = (b & 0x2) << 2; - s->rex_b = (b & 0x1) << 3; - goto next_byte; + /* + * REX prefix; ignored unless it is the last prefix, so + * for now just stash it + */ + rex = b; + goto next_byte_rex; } break; #endif @@ -2618,10 +2623,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ - | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { + | PREFIX_LOCK | PREFIX_DATA)) { goto illegal_op; } #ifdef TARGET_X86_64 + if (rex != -1) { + goto illegal_op; + } s->rex_r = (~vex2 >> 4) & 8; #endif if (b == 0xc5) { @@ -2661,6 +2669,16 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* Post-process prefixes. */ if (CODE64(s)) { +#ifdef TARGET_X86_64 + if (rex != -1) { + s->prefix |= PREFIX_REX; + s->vex_w = (rex >> 3) & 1; + s->rex_r = (rex & 0x4) << 1; + s->rex_x = (rex & 0x2) << 2; + s->rex_b = (rex & 0x1) << 3; + } +#endif + /* * In 64-bit mode, the default data size is 32-bit. Select 64-bit * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence @@ -2704,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.check & X86_CHECK_i64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) { goto illegal_op; } } else { if (decode.e.check & X86_CHECK_o64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) { goto illegal_op; } } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 4e09e96..1a7fab93 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv break; case X86_OP_SEG: /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ - gen_movl_seg(s, op->n, s->T0); + gen_movl_seg(s, op->n, v, op->n == R_SS); break; case X86_OP_INT: if (op->has_ea) { @@ -1813,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) { #ifdef TARGET_X86_64 - MemOp mop = MO_TE | MO_128 | MO_ALIGN; + MemOp mop = MO_LE | MO_128 | MO_ALIGN; TCGv_i64 t0, t1; TCGv_i128 cmp, val; @@ -1870,10 +1860,10 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) /* Only require atomic with LOCK; non-parallel handled in generator. */ if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ); } else { tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); + s->mem_index, MO_LEUQ); } /* Compute the required value of Z. */ @@ -2392,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) gen_op_ld_v(s, MO_16, s->T1, s->A0); /* load the segment here to handle exceptions properly */ - gen_movl_seg(s, seg, s->T1); + gen_movl_seg(s, seg, s->T1, false); } static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) @@ -3023,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index de71e68..6fb8036 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/log.h" #include "system/runstate.h" #include "exec/helper-proto.h" diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index c1184ca..b3b2382 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "tcg-cpu.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" @@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env) set_float_default_nan_pattern(0b11000000, &env->mmx_status); set_float_default_nan_pattern(0b11000000, &env->sse_status); /* - * TODO: x86 does flush-to-zero detection after rounding (the SDM + * x86 does flush-to-zero detection after rounding (the SDM * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush * when we detect underflow, which x86 does after rounding). */ - set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status); } -static inline uint8_t save_exception_flags(CPUX86State *env) +static inline int save_exception_flags(CPUX86State *env) { - uint8_t old_flags = get_float_exception_flags(&env->fp_status); + int old_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); return old_flags; } -static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +static void merge_exception_flags(CPUX86State *env, int old_flags) { - uint8_t new_flags = get_float_exception_flags(&env->fp_status); + int new_flags = get_float_exception_flags(&env->fp_status); float_raise(old_flags, &env->fp_status); fpu_set_exception(env, ((new_flags & float_flag_invalid ? FPUS_IE : 0) | @@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) (new_flags & float_flag_overflow ? FPUS_OE : 0) | (new_flags & float_flag_underflow ? FPUS_UE : 0) | (new_flags & float_flag_inexact ? FPUS_PE : 0) | - (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); + (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0))); } static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); floatx80 ret = floatx80_div(a, b, &env->fp_status); merge_exception_flags(env, old_flags); return ret; @@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) void helper_fldl_FT0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) void helper_fldl_ST0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env) uint64_t helper_fstl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env) int32_t helper_fist_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env) int32_t helper_fistl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env) int64_t helper_fistll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64(ST0, &env->fp_status); @@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env) int32_t helper_fistt_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env) int32_t helper_fisttl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env) int64_t helper_fisttll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); @@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); @@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env) void helper_fucom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); @@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) void helper_fucomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) void helper_fadd_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); merge_exception_flags(env, old_flags); } @@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); merge_exception_flags(env, old_flags); } @@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; @@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = { void helper_f2xm1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t sig = extractFloatx80Frac(ST0); int32_t exp = extractFloatx80Exp(ST0); bool sign = extractFloatx80Sign(ST0); @@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = { void helper_fpatan(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env) void helper_fxtract(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; @@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env) static void helper_fprem_common(CPUX86State *env, bool mod) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t quotient; CPU_LDoubleU temp0, temp1; int exp0, exp1, expdiff; @@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, void helper_fyl2xp1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env) void helper_fyl2x(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env) void helper_fsqrt(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; @@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_invalid_encoding(ST1, &env->fp_status) || floatx80_invalid_encoding(ST0, &env->fp_status)) { float_raise(float_flag_invalid, &env->fp_status); @@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env) } else { int n; FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; - uint8_t save_flags = get_float_exception_flags(&env->fp_status); + int save_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); set_float_exception_flags(save_flags, &env->fp_status); @@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env) /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) | (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | (mxcsr & FPUS_OE ? float_flag_overflow : 0) | (mxcsr & FPUS_UE ? float_flag_underflow : 0) | @@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env) void update_mxcsr_from_sse_status(CPUX86State *env) { - uint8_t flags = get_float_exception_flags(&env->sse_status); - /* - * The MXCSR denormal flag has opposite semantics to - * float_flag_input_denormal_flushed (the softfloat code sets that flag - * only when flushing input denormals to zero, but SSE sets it - * only when not flushing them to zero), so is not converted - * here. - */ + int flags = get_float_exception_flags(&env->sse_status); env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_input_denormal_used ? FPUS_DE : 0) | (flags & float_flag_divbyzero ? FPUS_ZE : 0) | (flags & float_flag_overflow ? FPUS_OE : 0) | (flags & float_flag_underflow ? FPUS_UE : 0) | diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 54d8453..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -20,7 +20,6 @@ #ifndef I386_HELPER_TCG_H #define I386_HELPER_TCG_H -#include "exec/exec-all.h" #include "qemu/host-utils.h" /* Maximum instruction code size */ @@ -98,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -116,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 1a02e9d..46741d9 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "qapi/error.h" diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c index 3ef84e9..9e7c2d8 100644 --- a/target/i386/tcg/mem_helper.c +++ b/target/i386/tcg/mem_helper.c @@ -20,8 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "tcg/tcg.h" diff --git a/target/i386/tcg/mpx_helper.c b/target/i386/tcg/mpx_helper.c index 22423eed..fa8abcc 100644 --- a/target/i386/tcg/mpx_helper.c +++ b/target/i386/tcg/mpx_helper.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "exec/target_page.h" #include "helper-tcg.h" diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 7196211..071f3fb 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -22,12 +22,13 @@ #include "cpu.h" #include "qemu/log.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/log.h" #include "helper-tcg.h" #include "seg_helper.h" #include "access.h" +#include "tcg-cpu.h" #ifdef TARGET_X86_64 #define SET_ESP(val, sp_mask) \ @@ -128,6 +129,22 @@ int get_pg_mode(CPUX86State *env) return pg_mode; } +static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; + int mmu_index_base = + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (pl < 3 && (env->eflags & AC_MASK) + ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX); + + return mmu_index_base + mmu_index_32; +} + +int cpu_mmu_index_kernel(CPUX86State *env) +{ + return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK); +} + /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, uint32_t *e2_ptr, int selector, @@ -309,10 +326,10 @@ static void tss_set_busy(CPUX86State *env, int tss_selector, bool value, #define SWITCH_TSS_IRET 1 #define SWITCH_TSS_CALL 2 -/* return 0 if switching to a 16-bit selector */ -static int switch_tss_ra(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip, uintptr_t retaddr) +static void switch_tss_ra(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + uint32_t error_code, uintptr_t retaddr) { int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, i; target_ulong tss_base; @@ -456,10 +473,6 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, new_segs[R_GS] = 0; new_trap = 0; } - /* XXX: avoid a compiler warning, see - http://support.amd.com/us/Processor_TechDocs/24593.pdf - chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */ - (void)new_trap; /* clear busy bit (it is restartable) */ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { @@ -582,14 +595,43 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK); } #endif - return type >> 3; + + if (has_error_code) { + int cpl = env->hflags & HF_CPL_MASK; + StackAccess sa; + + /* push the error code */ + sa.env = env; + sa.ra = retaddr; + sa.mmu_index = x86_mmu_index_pl(env, cpl); + sa.sp = env->regs[R_ESP]; + if (env->segs[R_SS].flags & DESC_B_MASK) { + sa.sp_mask = 0xffffffff; + } else { + sa.sp_mask = 0xffff; + } + sa.ss_base = env->segs[R_SS].base; + if (type & 8) { + pushl(&sa, error_code); + } else { + pushw(&sa, error_code); + } + SET_ESP(sa.sp, sa.sp_mask); + } + + if (new_trap) { + env->dr[6] |= DR6_BT; + raise_exception_ra(env, EXCP01_DB, retaddr); + } } -static int switch_tss(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip) +static void switch_tss(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + int error_code) { - return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0); + switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, + has_error_code, error_code, 0); } static inline unsigned int get_sp_mask(unsigned int e2) @@ -702,25 +744,8 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, if (!(e2 & DESC_P_MASK)) { raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); } - shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); - if (has_error_code) { - /* push the error code on the destination stack */ - cpl = env->hflags & HF_CPL_MASK; - sa.mmu_index = x86_mmu_index_pl(env, cpl); - if (env->segs[R_SS].flags & DESC_B_MASK) { - sa.sp_mask = 0xffffffff; - } else { - sa.sp_mask = 0xffff; - } - sa.sp = env->regs[R_ESP]; - sa.ss_base = env->segs[R_SS].base; - if (shift) { - pushl(&sa, error_code); - } else { - pushw(&sa, error_code); - } - SET_ESP(sa.sp, sa.sp_mask); - } + switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip, + has_error_code, error_code); return; } @@ -1516,7 +1541,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, + false, 0, GETPC()); break; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -1728,7 +1754,8 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, + false, 0, GETPC()); return; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -2239,7 +2266,8 @@ void helper_iret_protected(CPUX86State *env, int shift, int next_eip) if (type != 3) { raise_exception_err_ra(env, EXCP0A_TSS, tss_selector & 0xfffc, GETPC()); } - switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, GETPC()); + switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, + false, 0, GETPC()); } else { helper_ret_protected(env, shift, 1, 0, GETPC()); } diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h index ebf1035..ea98e1a 100644 --- a/target/i386/tcg/seg_helper.h +++ b/target/i386/tcg/seg_helper.h @@ -20,6 +20,8 @@ #ifndef SEG_HELPER_H #define SEG_HELPER_H +#include "cpu.h" + //#define DEBUG_PCALL #ifdef DEBUG_PCALL @@ -31,12 +33,12 @@ # define LOG_PCALL_STATE(cpu) do { } while (0) #endif +int cpu_mmu_index_kernel(CPUX86State *env); + /* * TODO: Convert callers to compute cpu_mmu_index_kernel once * and use *_mmuidx_ra directly. */ -#define cpu_ldub_kernel_ra(e, p, r) \ - cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_lduw_kernel_ra(e, p, r) \ cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_ldl_kernel_ra(e, p, r) \ @@ -44,8 +46,6 @@ #define cpu_ldq_kernel_ra(e, p, r) \ cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) -#define cpu_stb_kernel_ra(e, p, v, r) \ - cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stw_kernel_ra(e, p, v, r) \ cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stl_kernel_ra(e, p, v, r) \ @@ -53,12 +53,10 @@ #define cpu_stq_kernel_ra(e, p, v, r) \ cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) -#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0) #define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0) #define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0) #define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0) -#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0) #define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0) #define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0) #define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0) diff --git a/target/i386/tcg/system/bpt_helper.c b/target/i386/tcg/system/bpt_helper.c index be232c1..aebb5ca 100644 --- a/target/i386/tcg/system/bpt_helper.c +++ b/target/i386/tcg/system/bpt_helper.c @@ -19,8 +19,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "exec/watchpoint.h" #include "tcg/helper-tcg.h" diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c index 6876329..c162621 100644 --- a/target/i386/tcg/system/excp_helper.c +++ b/target/i386/tcg/system/excp_helper.c @@ -19,9 +19,13 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" +#include "exec/tswap.h" #include "tcg/helper-tcg.h" typedef struct TranslateParams { diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c index ce18c75..9c3f5cc 100644 --- a/target/i386/tcg/system/misc_helper.c +++ b/target/i386/tcg/system/misc_helper.c @@ -21,8 +21,9 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/address-spaces.h" +#include "accel/tcg/cpu-ldst.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "exec/cputlb.h" #include "tcg/helper-tcg.h" #include "hw/i386/apic.h" diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c index b07cc9f..d4ea890 100644 --- a/target/i386/tcg/system/seg_helper.c +++ b/target/i386/tcg/system/seg_helper.c @@ -23,7 +23,7 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "../seg_helper.h" diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c index f9982b7..b27049b 100644 --- a/target/i386/tcg/system/svm_helper.c +++ b/target/i386/tcg/system/svm_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" /* Secure Virtual Machine helpers */ diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c index 13a3507..0538a4f 100644 --- a/target/i386/tcg/system/tcg-cpu.c +++ b/target/i386/tcg/system/tcg-cpu.c @@ -23,7 +23,8 @@ #include "system/system.h" #include "qemu/units.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "tcg/tcg-cpu.h" diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index b8aff82..6f5dc06 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -23,7 +23,8 @@ #include "qemu/accel.h" #include "accel/accel-cpu-target.h" #include "exec/translation-block.h" - +#include "exec/target_page.h" +#include "accel/tcg/cpu-ops.h" #include "tcg-cpu.h" /* Frob eflags into and out of the CPU temporary format. */ @@ -47,6 +48,25 @@ static void x86_cpu_exec_exit(CPUState *cs) env->eflags = cpu_compute_eflags(env); } +static TCGTBCPUState x86_get_tb_cpu_state(CPUState *cs) +{ + CPUX86State *env = cpu_env(cs); + uint32_t flags, cs_base; + vaddr pc; + + flags = env->hflags | + (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); + if (env->hflags & HF_CS64_MASK) { + cs_base = 0; + pc = env->eip; + } else { + cs_base = env->segs[R_CS].base; + pc = (uint32_t)(cs_base + env->eip); + } + + return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base }; +} + static void x86_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -94,6 +114,23 @@ static void x86_restore_state_to_opc(CPUState *cs, } } +int x86_mmu_index_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; + int mmu_index_base = + pl == 3 ? MMU_USER64_IDX : + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; +} + +static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPUX86State *env = cpu_env(cs); + return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK); +} + #ifndef CONFIG_USER_ONLY static bool x86_debug_check_breakpoint(CPUState *cs) { @@ -103,15 +140,36 @@ static bool x86_debug_check_breakpoint(CPUState *cs) /* RF disables all architectural breakpoints. */ return !(env->eflags & RF_MASK); } -#endif -#include "accel/tcg/cpu-ops.h" +static void x86_cpu_exec_reset(CPUState *cs) +{ + CPUArchState *env = cpu_env(cs); + + cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); + do_cpu_init(env_archcpu(env)); + cs->exception_index = EXCP_HALTED; +} -static const TCGCPUOps x86_tcg_ops = { +static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result; +} +#endif + +const TCGCPUOps x86_tcg_ops = { + .mttcg_supported = true, + .precise_smc = true, + /* + * The x86 has a strong memory model with some store-after-load re-ordering + */ + .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD, .initialize = tcg_x86_init, .translate_code = x86_translate_code, + .get_tb_cpu_state = x86_get_tb_cpu_state, .synchronize_from_tb = x86_cpu_synchronize_from_tb, .restore_state_to_opc = x86_restore_state_to_opc, + .mmu_index = x86_cpu_mmu_index, .cpu_exec_enter = x86_cpu_exec_enter, .cpu_exec_exit = x86_cpu_exec_exit, #ifdef CONFIG_USER_ONLY @@ -120,9 +178,11 @@ static const TCGCPUOps x86_tcg_ops = { .record_sigbus = x86_cpu_record_sigbus, #else .tlb_fill = x86_cpu_tlb_fill, + .pointer_wrap = x86_pointer_wrap, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_halt = x86_cpu_exec_halt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, + .cpu_exec_reset = x86_cpu_exec_reset, .do_unaligned_access = x86_cpu_do_unaligned_access, .debug_excp_handler = breakpoint_handler, .debug_check_breakpoint = x86_debug_check_breakpoint, @@ -130,17 +190,6 @@ static const TCGCPUOps x86_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) -{ - /* for x86, all cpus use the same set of operations */ - cc->tcg_ops = &x86_tcg_ops; -} - -static void x86_tcg_cpu_class_init(CPUClass *cc) -{ - cc->init_accel_cpu = x86_tcg_cpu_init_ops; -} - static void x86_tcg_cpu_xsave_init(void) { #define XO(bit, field) \ @@ -181,7 +230,7 @@ static void x86_tcg_cpu_instance_init(CPUState *cs) x86_tcg_cpu_xsave_init(); } -static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); @@ -189,7 +238,6 @@ static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) acc->cpu_target_realize = tcg_cpu_realizefn; #endif /* CONFIG_USER_ONLY */ - acc->cpu_class_init = x86_tcg_cpu_class_init; acc->cpu_instance_init = x86_tcg_cpu_instance_init; } static const TypeInfo x86_tcg_cpu_accel_type_info = { diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h index 53a8494..85bcd61 100644 --- a/target/i386/tcg/tcg-cpu.h +++ b/target/i386/tcg/tcg-cpu.h @@ -19,6 +19,8 @@ #ifndef TCG_CPU_H #define TCG_CPU_H +#include "cpu.h" + #define XSAVE_FCW_FSW_OFFSET 0x000 #define XSAVE_FTW_FOP_OFFSET 0x004 #define XSAVE_CWD_RIP_OFFSET 0x008 @@ -76,6 +78,10 @@ QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFF QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET); QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET); +extern const TCGCPUOps x86_tcg_ops; + bool tcg_cpu_realizefn(CPUState *cs, Error **errp); +int x86_mmu_index_pl(CPUX86State *env, unsigned pl); + #endif /* TCG_CPU_H */ diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1bbf09a..0cb87d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -20,11 +20,12 @@ #include "qemu/host-utils.h" #include "cpu.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/translation-block.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "exec/translator.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "exec/helper-proto.h" @@ -2024,27 +2025,39 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg) /* move SRC to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src) +static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq) { if (PE(s) && !VM86(s)) { TCGv_i32 sel = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(sel, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); - /* abort translation because the addseg value may change or - because ss32 may change. For R_SS, translation must always - stop as a special handling must be done to disable hardware - interrupts for the next instruction */ - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } else if (CODE32(s) && seg_reg < R_FS) { + + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_real(s, seg_reg, src); - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } + } + + /* + * For MOV or POP to SS (but not LSS) translation must always + * stop as a special handling must be done to disable hardware + * interrupts for the next instruction. + * + * This is the last instruction, so it's okay to overwrite + * HF_TF_MASK; the next TB will start with the flag set. + * + * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which + * might have been set above. + */ + if (inhibit_irq) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + s->flags &= ~HF_TF_MASK; } } @@ -2295,7 +2308,7 @@ gen_eob(DisasContext *s, int mode) if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { + } else if (s->flags & HF_TF_MASK) { gen_helper_single_step(tcg_env); } else if (mode == DISAS_JUMP && /* give irqs a chance to happen */ diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c index b3bdb78..98fab4cb 100644 --- a/target/i386/tcg/user/excp_helper.c +++ b/target/i386/tcg/user/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/helper-tcg.h" void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c index c45f2ac..263f599 100644 --- a/target/i386/tcg/user/seg_helper.c +++ b/target/i386/tcg/user/seg_helper.c @@ -21,8 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "tcg/seg_helper.h" diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index 81fdd06..5f4841c 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -83,19 +83,19 @@ static bool whpx_vcpu_thread_is_idle(CPUState *cpu) return !whpx_apic_in_platform(); } -static void whpx_accel_ops_class_init(ObjectClass *oc, void *data) +static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; ops->synchronize_state = whpx_cpu_synchronize_state; ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm; - ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume; } static const TypeInfo whpx_accel_ops_type = { diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h index e6cf155..54cfc25 100644 --- a/target/i386/whpx/whpx-accel-ops.h +++ b/target/i386/whpx/whpx-accel-ops.h @@ -21,7 +21,6 @@ void whpx_cpu_synchronize_state(CPUState *cpu); void whpx_cpu_synchronize_post_reset(CPUState *cpu); void whpx_cpu_synchronize_post_init(CPUState *cpu); void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); -void whpx_cpu_synchronize_pre_resume(bool step_pending); /* state subset only touched by the VCPU itself during runtime */ #define WHPX_SET_RUNTIME_STATE 1 diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index e44d044..22ac609 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -10,8 +10,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" +#include "system/address-spaces.h" +#include "system/ioport.h" #include "gdbstub/helpers.h" #include "qemu/accel.h" #include "system/whpx.h" @@ -26,6 +26,8 @@ #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" #include "migration/blocker.h" +#include "host-cpu.h" +#include "accel/accel-cpu-target.h" #include <winerror.h> #include "whpx-internal.h" @@ -237,13 +239,12 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; - bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; }; -static bool whpx_allowed; +bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; static uint32_t max_vcpu_index; @@ -836,7 +837,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; return hr; } @@ -1391,7 +1392,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1704,9 +1705,9 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { @@ -2054,9 +2055,9 @@ static int whpx_vcpu_run(CPUState *cpu) static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { whpx_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } @@ -2064,20 +2065,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } /* @@ -2086,7 +2087,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2106,7 +2107,7 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } -void whpx_cpu_synchronize_pre_resume(bool step_pending) +static void whpx_pre_resume_vm(AccelState *as, bool step_pending) { whpx_global.step_pending = step_pending; } @@ -2226,7 +2227,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - vcpu->dirty = true; + cpu->vcpu_dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); @@ -2501,11 +2502,33 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, } } +static void whpx_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void whpx_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = whpx_cpu_instance_init; +} + +static const TypeInfo whpx_cpu_accel_type = { + .name = ACCEL_CPU_NAME("whpx"), + + .parent = TYPE_ACCEL_CPU, + .class_init = whpx_cpu_accel_class_init, + .abstract = true, +}; + /* * Partition support */ -static int whpx_accel_init(MachineState *ms) +static int whpx_accel_init(AccelState *as, MachineState *ms) { struct whpx_state *whpx; int ret; @@ -2689,20 +2712,16 @@ error: return ret; } -int whpx_enabled(void) -{ - return whpx_allowed; -} - bool whpx_apic_in_platform(void) { return whpx_global.apic_in_platform; } -static void whpx_accel_class_init(ObjectClass *oc, void *data) +static void whpx_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "WHPX"; ac->init_machine = whpx_accel_init; + ac->pre_resume_vm = whpx_pre_resume_vm; ac->allowed = &whpx_allowed; object_class_property_add(oc, "kernel-irqchip", "on|off|split", @@ -2731,6 +2750,7 @@ static const TypeInfo whpx_accel_type = { static void whpx_type_init(void) { type_register_static(&whpx_accel_type); + type_register_static(&whpx_cpu_accel_type); } bool init_whp_dispatch(void) diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c index 630a961..e1ef6d4 100644 --- a/target/i386/whpx/whpx-apic.c +++ b/target/i386/whpx/whpx-apic.c @@ -252,7 +252,7 @@ static void whpx_apic_realize(DeviceState *dev, Error **errp) msi_nonbroken = true; } -static void whpx_apic_class_init(ObjectClass *klass, void *data) +static void whpx_apic_class_init(ObjectClass *klass, const void *data) { APICCommonClass *k = APIC_COMMON_CLASS(klass); diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index 996e9f3..24ab7be 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -5,6 +5,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "exec/tswap.h" void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) { |