diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-07-14 09:36:57 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-07-14 09:36:57 -0400 |
commit | b92b39af4219df4250f121f64d215506909c7404 (patch) | |
tree | d112c017e3c77df7341e5fe28072571ad9ecceb8 | |
parent | 6fae7ce1488e3f5bdcc1747564ea68e7f6f0e931 (diff) | |
parent | 5d21ee453ad8e3f95f75e542cb3b35c5bb7cf23a (diff) | |
download | qemu-b92b39af4219df4250f121f64d215506909c7404.zip qemu-b92b39af4219df4250f121f64d215506909c7404.tar.gz qemu-b92b39af4219df4250f121f64d215506909c7404.tar.bz2 |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* rust: miscellaneous fixes
* rust: qemu-api-macros: cleanup and add unit tests for TryInto
* rust: log: implement io::Write, avoid memory allocations
when logging constant strings
* target/i386: fix usage of properties whenever accelerators
change the default (e.g. vendor)
* target/i386: add support for TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT
* target/i386: add support for booting an SEV VM from an IGVM file
* target/i386: unify cache model descriptions between CPUID 2,
CPUID 4 and AMD specific CPUID 0x80000006
* target/i386: introduce cache models for recent Intel CPU models
* target/i386: mark some 0x80000000-0x80000008 bits as reserved on Intel
* target/i386: cleanups
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmh0v+sUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroOQUQf8CTsCnl2xYrnrkVfSVj6kuAE+JYD6
# oLSXsOEG4yrVknuhwIfVsqNScmleJCdz85ej7CZxy3vzzgjLfmy7nwifKEIKku7E
# XO/Q3HbB898MnzqceQRmwe1AzELoj1Lave215CPhUBo60LCRPwaIZsiHprnNZgXi
# TyHlmywDVRjyFLtKkx3El0dnLAhFqPWeGh81CD5lPLZZJ+Wt2FuAw2zqSOGB2ztM
# FkJmunFJiaTItjyCN/uNvBSbDKecAHgCXvSCVNG3+I4U2R0gK1lcwm3TRo7yKia+
# HUHGa3UEXoIqlRfXdX6zuc8tW1/u6SPv+8WX53t204PAeSWDUrtIe9jZ4A==
# =y4/a
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 14 Jul 2025 04:29:31 EDT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (77 commits)
i386/cpu: Honor maximum value for CPUID.8000001DH.EAX[25:14]
i386/cpu: Fix overflow of cache topology fields in CPUID.04H
i386/cpu: Fix cpu number overflow in CPUID.01H.EBX[23:16]
i386/cpu: Fix number of addressable IDs field for CPUID.01H.EBX[23:16]
i386/cpu: Reorder CPUID leaves in cpu_x86_cpuid()
tests/vm: bump FreeBSD image to 14.3
tests/functional: test_x86_cpu_model_versions: remove dead tests
i386/cpu: Mark CPUID 0x80000008 ECX bits[0:7] & [12:15] as reserved for Intel/Zhaoxin
i386/cpu: Mark CPUID 0x80000007[EBX] as reserved for Intel
i386/cpu: Mark EBX/ECX/EDX in CPUID 0x80000000 leaf as reserved for Intel
i386/cpu: Enable 0x1f leaf for YongFeng by default
i386/cpu: Enable 0x1f leaf for SapphireRapids by default
i386/cpu: Enable 0x1f leaf for GraniteRapids by default
i386/cpu: Enable 0x1f leaf for SierraForest by default
i386/cpu: Enable 0x1f leaf for SierraForest by default
i386/cpu: Add a "x-force-cpuid-0x1f" property
i386/cpu: Introduce cache model for YongFeng
i386/cpu: Introduce cache model for SapphireRapids
i386/cpu: Introduce cache model for GraniteRapids
i386/cpu: Introduce cache model for SierraForest
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
55 files changed, 3924 insertions, 630 deletions
diff --git a/backends/confidential-guest-support.c b/backends/confidential-guest-support.c index 8ff7bfa..156dd15 100644 --- a/backends/confidential-guest-support.c +++ b/backends/confidential-guest-support.c @@ -14,15 +14,58 @@ #include "qemu/osdep.h" #include "system/confidential-guest-support.h" +#include "qapi/error.h" OBJECT_DEFINE_ABSTRACT_TYPE(ConfidentialGuestSupport, confidential_guest_support, CONFIDENTIAL_GUEST_SUPPORT, OBJECT) +static bool check_support(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary) +{ + /* Default: no support. */ + return false; +} + +static int set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp) +{ + error_setg(errp, + "Setting confidential guest state is not supported for this platform"); + return -1; +} + +static int set_guest_policy(ConfidentialGuestPolicyType policy_type, + uint64_t policy, + void *policy_data1, uint32_t policy_data1_size, + void *policy_data2, uint32_t policy_data2_size, + Error **errp) +{ + error_setg(errp, + "Setting confidential guest policy is not supported for this platform"); + return -1; +} + +static int get_mem_map_entry(int index, ConfidentialGuestMemoryMapEntry *entry, + Error **errp) +{ + error_setg( + errp, + "Obtaining the confidential guest memory map is not supported for this platform"); + return -1; +} + static void confidential_guest_support_class_init(ObjectClass *oc, const void *data) { + ConfidentialGuestSupportClass *cgsc = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + cgsc->check_support = check_support; + cgsc->set_guest_state = set_guest_state; + cgsc->set_guest_policy = set_guest_policy; + cgsc->get_mem_map_entry = get_mem_map_entry; } static void confidential_guest_support_init(Object *obj) diff --git a/backends/igvm-cfg.c b/backends/igvm-cfg.c new file mode 100644 index 0000000..45df63e --- /dev/null +++ b/backends/igvm-cfg.c @@ -0,0 +1,51 @@ +/* + * QEMU IGVM interface + * + * Copyright (C) 2023-2024 SUSE + * + * Authors: + * Roy Hopkins <roy.hopkins@randomman.co.uk> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "system/igvm-cfg.h" +#include "igvm.h" +#include "qom/object_interfaces.h" + +static char *get_igvm(Object *obj, Error **errp) +{ + IgvmCfg *igvm = IGVM_CFG(obj); + return g_strdup(igvm->filename); +} + +static void set_igvm(Object *obj, const char *value, Error **errp) +{ + IgvmCfg *igvm = IGVM_CFG(obj); + g_free(igvm->filename); + igvm->filename = g_strdup(value); +} + +OBJECT_DEFINE_TYPE_WITH_INTERFACES(IgvmCfg, igvm_cfg, IGVM_CFG, OBJECT, + { TYPE_USER_CREATABLE }, { NULL }) + +static void igvm_cfg_class_init(ObjectClass *oc, const void *data) +{ + IgvmCfgClass *igvmc = IGVM_CFG_CLASS(oc); + + object_class_property_add_str(oc, "file", get_igvm, set_igvm); + object_class_property_set_description(oc, "file", + "Set the IGVM filename to use"); + + igvmc->process = qigvm_process_file; +} + +static void igvm_cfg_init(Object *obj) +{ +} + +static void igvm_cfg_finalize(Object *obj) +{ +} diff --git a/backends/igvm.c b/backends/igvm.c new file mode 100644 index 0000000..9ad4158 --- /dev/null +++ b/backends/igvm.c @@ -0,0 +1,988 @@ +/* + * QEMU IGVM configuration backend for guests + * + * Copyright (C) 2023-2024 SUSE + * + * Authors: + * Roy Hopkins <roy.hopkins@randomman.co.uk> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "igvm.h" +#include "qapi/error.h" +#include "system/memory.h" +#include "system/address-spaces.h" +#include "hw/core/cpu.h" + +#include <igvm/igvm.h> +#include <igvm/igvm_defs.h> + +typedef struct QIgvmParameterData { + QTAILQ_ENTRY(QIgvmParameterData) next; + uint8_t *data; + uint32_t size; + uint32_t index; +} QIgvmParameterData; + +/* + * Some directives are specific to particular confidential computing platforms. + * Define required types for each of those platforms here. + */ + +/* SEV/SEV-ES/SEV-SNP */ + +/* + * These structures are defined in "SEV Secure Nested Paging Firmware ABI + * Specification" Rev 1.58, section 8.18. + */ +struct QEMU_PACKED sev_id_block { + uint8_t ld[48]; + uint8_t family_id[16]; + uint8_t image_id[16]; + uint32_t version; + uint32_t guest_svn; + uint64_t policy; +}; + +struct QEMU_PACKED sev_id_authentication { + uint32_t id_key_alg; + uint32_t auth_key_algo; + uint8_t reserved[56]; + uint8_t id_block_sig[512]; + uint8_t id_key[1028]; + uint8_t reserved2[60]; + uint8_t id_key_sig[512]; + uint8_t author_key[1028]; + uint8_t reserved3[892]; +}; + +#define IGVM_SEV_ID_BLOCK_VERSION 1 + +/* + * QIgvm contains the information required during processing + * of a single IGVM file. + */ +typedef struct QIgvm { + IgvmHandle file; + ConfidentialGuestSupport *cgs; + ConfidentialGuestSupportClass *cgsc; + uint32_t compatibility_mask; + unsigned current_header_index; + QTAILQ_HEAD(, QIgvmParameterData) parameter_data; + IgvmPlatformType platform_type; + + /* + * SEV-SNP platforms can contain an ID block and authentication + * that should be verified by the guest. + */ + struct sev_id_block *id_block; + struct sev_id_authentication *id_auth; + + /* Define the guest policy for SEV guests */ + uint64_t sev_policy; + + /* These variables keep track of contiguous page regions */ + IGVM_VHS_PAGE_DATA region_prev_page_data; + uint64_t region_start; + unsigned region_start_index; + unsigned region_last_index; + unsigned region_page_count; +} QIgvm; + +static int qigvm_directive_page_data(QIgvm *ctx, const uint8_t *header_data, + Error **errp); +static int qigvm_directive_vp_context(QIgvm *ctx, const uint8_t *header_data, + Error **errp); +static int qigvm_directive_parameter_area(QIgvm *ctx, + const uint8_t *header_data, + Error **errp); +static int qigvm_directive_parameter_insert(QIgvm *ctx, + const uint8_t *header_data, + Error **errp); +static int qigvm_directive_memory_map(QIgvm *ctx, const uint8_t *header_data, + Error **errp); +static int qigvm_directive_vp_count(QIgvm *ctx, const uint8_t *header_data, + Error **errp); +static int qigvm_directive_environment_info(QIgvm *ctx, + const uint8_t *header_data, + Error **errp); +static int qigvm_directive_required_memory(QIgvm *ctx, + const uint8_t *header_data, + Error **errp); +static int qigvm_directive_snp_id_block(QIgvm *ctx, const uint8_t *header_data, + Error **errp); +static int qigvm_initialization_guest_policy(QIgvm *ctx, + const uint8_t *header_data, + Error **errp); + +struct QIGVMHandler { + uint32_t type; + uint32_t section; + int (*handler)(QIgvm *ctx, const uint8_t *header_data, Error **errp); +}; + +static struct QIGVMHandler handlers[] = { + { IGVM_VHT_PAGE_DATA, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_page_data }, + { IGVM_VHT_VP_CONTEXT, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_vp_context }, + { IGVM_VHT_PARAMETER_AREA, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_parameter_area }, + { IGVM_VHT_PARAMETER_INSERT, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_parameter_insert }, + { IGVM_VHT_MEMORY_MAP, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_memory_map }, + { IGVM_VHT_VP_COUNT_PARAMETER, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_vp_count }, + { IGVM_VHT_ENVIRONMENT_INFO_PARAMETER, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_environment_info }, + { IGVM_VHT_REQUIRED_MEMORY, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_required_memory }, + { IGVM_VHT_SNP_ID_BLOCK, IGVM_HEADER_SECTION_DIRECTIVE, + qigvm_directive_snp_id_block }, + { IGVM_VHT_GUEST_POLICY, IGVM_HEADER_SECTION_INITIALIZATION, + qigvm_initialization_guest_policy }, +}; + +static int qigvm_handler(QIgvm *ctx, uint32_t type, Error **errp) +{ + size_t handler; + IgvmHandle header_handle; + const uint8_t *header_data; + int result; + + for (handler = 0; handler < G_N_ELEMENTS(handlers); handler++) { + if (handlers[handler].type != type) { + continue; + } + header_handle = igvm_get_header(ctx->file, handlers[handler].section, + ctx->current_header_index); + if (header_handle < 0) { + error_setg( + errp, + "IGVM file is invalid: Failed to read directive header (code: %d)", + (int)header_handle); + return -1; + } + header_data = igvm_get_buffer(ctx->file, header_handle) + + sizeof(IGVM_VHS_VARIABLE_HEADER); + result = handlers[handler].handler(ctx, header_data, errp); + igvm_free_buffer(ctx->file, header_handle); + return result; + } + error_setg(errp, + "IGVM: Unknown header type encountered when processing file: " + "(type 0x%X)", + type); + return -1; +} + +static void *qigvm_prepare_memory(QIgvm *ctx, uint64_t addr, uint64_t size, + int region_identifier, Error **errp) +{ + ERRP_GUARD(); + MemoryRegion *igvm_pages = NULL; + Int128 gpa_region_size; + MemoryRegionSection mrs = + memory_region_find(get_system_memory(), addr, size); + if (mrs.mr) { + if (!memory_region_is_ram(mrs.mr)) { + memory_region_unref(mrs.mr); + error_setg( + errp, + "Processing of IGVM file failed: Could not prepare memory " + "at address 0x%lX due to existing non-RAM region", + addr); + return NULL; + } + + gpa_region_size = int128_make64(size); + if (int128_lt(mrs.size, gpa_region_size)) { + memory_region_unref(mrs.mr); + error_setg( + errp, + "Processing of IGVM file failed: Could not prepare memory " + "at address 0x%lX: region size exceeded", + addr); + return NULL; + } + return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region); + } else { + /* + * The region_identifier is the is the index of the IGVM directive that + * contains the page with the lowest GPA in the region. This will + * generate a unique region name. + */ + g_autofree char *region_name = + g_strdup_printf("igvm.%X", region_identifier); + igvm_pages = g_new0(MemoryRegion, 1); + if (ctx->cgs && ctx->cgs->require_guest_memfd) { + if (!memory_region_init_ram_guest_memfd(igvm_pages, NULL, + region_name, size, errp)) { + return NULL; + } + } else { + if (!memory_region_init_ram(igvm_pages, NULL, region_name, size, + errp)) { + return NULL; + } + } + memory_region_add_subregion(get_system_memory(), addr, igvm_pages); + return memory_region_get_ram_ptr(igvm_pages); + } +} + +static int qigvm_type_to_cgs_type(IgvmPageDataType memory_type, bool unmeasured, + bool zero) +{ + switch (memory_type) { + case IGVM_PAGE_DATA_TYPE_NORMAL: { + if (unmeasured) { + return CGS_PAGE_TYPE_UNMEASURED; + } else { + return zero ? CGS_PAGE_TYPE_ZERO : CGS_PAGE_TYPE_NORMAL; + } + } + case IGVM_PAGE_DATA_TYPE_SECRETS: + return CGS_PAGE_TYPE_SECRETS; + case IGVM_PAGE_DATA_TYPE_CPUID_DATA: + return CGS_PAGE_TYPE_CPUID; + case IGVM_PAGE_DATA_TYPE_CPUID_XF: + return CGS_PAGE_TYPE_CPUID; + default: + return -1; + } +} + +static bool qigvm_page_attrs_equal(IgvmHandle igvm, unsigned header_index, + const IGVM_VHS_PAGE_DATA *page_1, + const IGVM_VHS_PAGE_DATA *page_2) +{ + IgvmHandle data_handle1, data_handle2; + + /* + * If one page has data and the other doesn't then this results in different + * page types: NORMAL vs ZERO. + */ + data_handle1 = igvm_get_header_data(igvm, IGVM_HEADER_SECTION_DIRECTIVE, + header_index - 1); + data_handle2 = + igvm_get_header_data(igvm, IGVM_HEADER_SECTION_DIRECTIVE, header_index); + if ((data_handle1 == IGVMAPI_NO_DATA || + data_handle2 == IGVMAPI_NO_DATA) && + data_handle1 != data_handle2) { + return false; + } + return ((*(const uint32_t *)&page_1->flags == + *(const uint32_t *)&page_2->flags) && + (page_1->data_type == page_2->data_type) && + (page_1->compatibility_mask == page_2->compatibility_mask)); +} + +static int qigvm_process_mem_region(QIgvm *ctx, unsigned start_index, + uint64_t gpa_start, unsigned page_count, + const IgvmPageDataFlags *flags, + const IgvmPageDataType page_type, + Error **errp) +{ + uint8_t *region; + IgvmHandle data_handle; + const void *data; + uint32_t data_size; + unsigned page_index; + bool zero = true; + const uint64_t page_size = flags->is_2mb_page ? 0x200000 : 0x1000; + int result; + int cgs_page_type; + + region = qigvm_prepare_memory(ctx, gpa_start, page_count * page_size, + start_index, errp); + if (!region) { + return -1; + } + + for (page_index = 0; page_index < page_count; page_index++) { + data_handle = igvm_get_header_data( + ctx->file, IGVM_HEADER_SECTION_DIRECTIVE, page_index + start_index); + if (data_handle == IGVMAPI_NO_DATA) { + /* No data indicates a zero page */ + memset(®ion[page_index * page_size], 0, page_size); + } else if (data_handle < 0) { + error_setg( + errp, + "IGVM file contains invalid page data for directive with " + "index %d", + page_index + start_index); + return -1; + } else { + zero = false; + data_size = igvm_get_buffer_size(ctx->file, data_handle); + if (data_size < page_size) { + memset(®ion[page_index * page_size], 0, page_size); + } else if (data_size > page_size) { + error_setg(errp, + "IGVM file contains page data with invalid size for " + "directive with index %d", + page_index + start_index); + return -1; + } + data = igvm_get_buffer(ctx->file, data_handle); + memcpy(®ion[page_index * page_size], data, data_size); + igvm_free_buffer(ctx->file, data_handle); + } + } + + /* + * If a confidential guest support object is provided then use it to set the + * guest state. + */ + if (ctx->cgs) { + cgs_page_type = + qigvm_type_to_cgs_type(page_type, flags->unmeasured, zero); + if (cgs_page_type < 0) { + error_setg(errp, + "Invalid page type in IGVM file. Directives: %d to %d, " + "page type: %d", + start_index, start_index + page_count, page_type); + return -1; + } + + result = ctx->cgsc->set_guest_state( + gpa_start, region, page_size * page_count, cgs_page_type, 0, errp); + if (result < 0) { + return result; + } + } + return 0; +} + +static int qigvm_process_mem_page(QIgvm *ctx, + const IGVM_VHS_PAGE_DATA *page_data, + Error **errp) +{ + if (page_data) { + if (ctx->region_page_count == 0) { + ctx->region_start = page_data->gpa; + ctx->region_start_index = ctx->current_header_index; + } else { + if (!qigvm_page_attrs_equal(ctx->file, ctx->current_header_index, + page_data, + &ctx->region_prev_page_data) || + ((ctx->region_prev_page_data.gpa + + (ctx->region_prev_page_data.flags.is_2mb_page ? 0x200000 : + 0x1000)) != + page_data->gpa) || + (ctx->region_last_index != (ctx->current_header_index - 1))) { + /* End of current region */ + if (qigvm_process_mem_region( + ctx, ctx->region_start_index, ctx->region_start, + ctx->region_page_count, + &ctx->region_prev_page_data.flags, + ctx->region_prev_page_data.data_type, errp) < 0) { + return -1; + } + ctx->region_page_count = 0; + ctx->region_start = page_data->gpa; + ctx->region_start_index = ctx->current_header_index; + } + } + memcpy(&ctx->region_prev_page_data, page_data, + sizeof(ctx->region_prev_page_data)); + ctx->region_last_index = ctx->current_header_index; + ctx->region_page_count++; + } else { + if (ctx->region_page_count > 0) { + if (qigvm_process_mem_region( + ctx, ctx->region_start_index, ctx->region_start, + ctx->region_page_count, &ctx->region_prev_page_data.flags, + ctx->region_prev_page_data.data_type, errp) < 0) { + return -1; + } + ctx->region_page_count = 0; + } + } + return 0; +} + +static int qigvm_directive_page_data(QIgvm *ctx, const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PAGE_DATA *page_data = + (const IGVM_VHS_PAGE_DATA *)header_data; + if (page_data->compatibility_mask & ctx->compatibility_mask) { + return qigvm_process_mem_page(ctx, page_data, errp); + } + return 0; +} + +static int qigvm_directive_vp_context(QIgvm *ctx, const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_VP_CONTEXT *vp_context = + (const IGVM_VHS_VP_CONTEXT *)header_data; + IgvmHandle data_handle; + uint8_t *data; + int result; + + if (!(vp_context->compatibility_mask & ctx->compatibility_mask)) { + return 0; + } + + /* + * A confidential guest support object must be provided for setting + * a VP context. + */ + if (!ctx->cgs) { + error_setg( + errp, + "A VP context is present in the IGVM file but is not supported " + "by the current system."); + return -1; + } + + data_handle = igvm_get_header_data(ctx->file, IGVM_HEADER_SECTION_DIRECTIVE, + ctx->current_header_index); + if (data_handle < 0) { + error_setg(errp, "Invalid VP context in IGVM file. Error code: %X", + data_handle); + return -1; + } + + data = (uint8_t *)igvm_get_buffer(ctx->file, data_handle); + result = ctx->cgsc->set_guest_state( + vp_context->gpa, data, igvm_get_buffer_size(ctx->file, data_handle), + CGS_PAGE_TYPE_VMSA, vp_context->vp_index, errp); + igvm_free_buffer(ctx->file, data_handle); + if (result < 0) { + return result; + } + return 0; +} + +static int qigvm_directive_parameter_area(QIgvm *ctx, + const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PARAMETER_AREA *param_area = + (const IGVM_VHS_PARAMETER_AREA *)header_data; + QIgvmParameterData *param_entry; + + param_entry = g_new0(QIgvmParameterData, 1); + param_entry->size = param_area->number_of_bytes; + param_entry->index = param_area->parameter_area_index; + param_entry->data = g_malloc0(param_entry->size); + + QTAILQ_INSERT_TAIL(&ctx->parameter_data, param_entry, next); + return 0; +} + +static int qigvm_directive_parameter_insert(QIgvm *ctx, + const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PARAMETER_INSERT *param = + (const IGVM_VHS_PARAMETER_INSERT *)header_data; + QIgvmParameterData *param_entry; + int result; + void *region; + + if (!(param->compatibility_mask & ctx->compatibility_mask)) { + return 0; + } + + QTAILQ_FOREACH(param_entry, &ctx->parameter_data, next) + { + if (param_entry->index == param->parameter_area_index) { + region = qigvm_prepare_memory(ctx, param->gpa, param_entry->size, + ctx->current_header_index, errp); + if (!region) { + return -1; + } + memcpy(region, param_entry->data, param_entry->size); + g_free(param_entry->data); + param_entry->data = NULL; + + /* + * If a confidential guest support object is provided then use it to + * set the guest state. + */ + if (ctx->cgs) { + result = ctx->cgsc->set_guest_state(param->gpa, region, + param_entry->size, + CGS_PAGE_TYPE_UNMEASURED, 0, + errp); + if (result < 0) { + return -1; + } + } + } + } + return 0; +} + +static int qigvm_cmp_mm_entry(const void *a, const void *b) +{ + const IGVM_VHS_MEMORY_MAP_ENTRY *entry_a = + (const IGVM_VHS_MEMORY_MAP_ENTRY *)a; + const IGVM_VHS_MEMORY_MAP_ENTRY *entry_b = + (const IGVM_VHS_MEMORY_MAP_ENTRY *)b; + if (entry_a->starting_gpa_page_number < entry_b->starting_gpa_page_number) { + return -1; + } else if (entry_a->starting_gpa_page_number > + entry_b->starting_gpa_page_number) { + return 1; + } else { + return 0; + } +} + +static int qigvm_directive_memory_map(QIgvm *ctx, const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PARAMETER *param = (const IGVM_VHS_PARAMETER *)header_data; + QIgvmParameterData *param_entry; + int max_entry_count; + int entry = 0; + IGVM_VHS_MEMORY_MAP_ENTRY *mm_entry; + ConfidentialGuestMemoryMapEntry cgmm_entry; + int retval = 0; + + if (!ctx->cgs) { + error_setg(errp, + "IGVM file contains a memory map but this is not supported " + "by the current system."); + return -1; + } + + /* Find the parameter area that should hold the memory map */ + QTAILQ_FOREACH(param_entry, &ctx->parameter_data, next) + { + if (param_entry->index == param->parameter_area_index) { + max_entry_count = + param_entry->size / sizeof(IGVM_VHS_MEMORY_MAP_ENTRY); + mm_entry = (IGVM_VHS_MEMORY_MAP_ENTRY *)param_entry->data; + + retval = ctx->cgsc->get_mem_map_entry(entry, &cgmm_entry, errp); + while (retval == 0) { + if (entry > max_entry_count) { + error_setg( + errp, + "IGVM: guest memory map size exceeds parameter area defined in IGVM file"); + return -1; + } + mm_entry[entry].starting_gpa_page_number = cgmm_entry.gpa >> 12; + mm_entry[entry].number_of_pages = cgmm_entry.size >> 12; + + switch (cgmm_entry.type) { + case CGS_MEM_RAM: + mm_entry[entry].entry_type = + IGVM_MEMORY_MAP_ENTRY_TYPE_MEMORY; + break; + case CGS_MEM_RESERVED: + mm_entry[entry].entry_type = + IGVM_MEMORY_MAP_ENTRY_TYPE_PLATFORM_RESERVED; + break; + case CGS_MEM_ACPI: + mm_entry[entry].entry_type = + IGVM_MEMORY_MAP_ENTRY_TYPE_PLATFORM_RESERVED; + break; + case CGS_MEM_NVS: + mm_entry[entry].entry_type = + IGVM_MEMORY_MAP_ENTRY_TYPE_PERSISTENT; + break; + case CGS_MEM_UNUSABLE: + mm_entry[entry].entry_type = + IGVM_MEMORY_MAP_ENTRY_TYPE_PLATFORM_RESERVED; + break; + } + retval = + ctx->cgsc->get_mem_map_entry(++entry, &cgmm_entry, errp); + } + if (retval < 0) { + return retval; + } + /* The entries need to be sorted */ + qsort(mm_entry, entry, sizeof(IGVM_VHS_MEMORY_MAP_ENTRY), + qigvm_cmp_mm_entry); + + break; + } + } + return 0; +} + +static int qigvm_directive_vp_count(QIgvm *ctx, const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PARAMETER *param = (const IGVM_VHS_PARAMETER *)header_data; + QIgvmParameterData *param_entry; + uint32_t *vp_count; + CPUState *cpu; + + QTAILQ_FOREACH(param_entry, &ctx->parameter_data, next) + { + if (param_entry->index == param->parameter_area_index) { + vp_count = (uint32_t *)(param_entry->data + param->byte_offset); + *vp_count = 0; + CPU_FOREACH(cpu) + { + (*vp_count)++; + } + break; + } + } + return 0; +} + +static int qigvm_directive_environment_info(QIgvm *ctx, + const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_PARAMETER *param = (const IGVM_VHS_PARAMETER *)header_data; + QIgvmParameterData *param_entry; + IgvmEnvironmentInfo *environmental_state; + + QTAILQ_FOREACH(param_entry, &ctx->parameter_data, next) + { + if (param_entry->index == param->parameter_area_index) { + environmental_state = + (IgvmEnvironmentInfo *)(param_entry->data + param->byte_offset); + environmental_state->memory_is_shared = 1; + break; + } + } + return 0; +} + +static int qigvm_directive_required_memory(QIgvm *ctx, + const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_REQUIRED_MEMORY *mem = + (const IGVM_VHS_REQUIRED_MEMORY *)header_data; + uint8_t *region; + int result; + + if (!(mem->compatibility_mask & ctx->compatibility_mask)) { + return 0; + } + + region = qigvm_prepare_memory(ctx, mem->gpa, mem->number_of_bytes, + ctx->current_header_index, errp); + if (!region) { + return -1; + } + if (ctx->cgs) { + result = + ctx->cgsc->set_guest_state(mem->gpa, region, mem->number_of_bytes, + CGS_PAGE_TYPE_REQUIRED_MEMORY, 0, errp); + if (result < 0) { + return result; + } + } + return 0; +} + +static int qigvm_directive_snp_id_block(QIgvm *ctx, const uint8_t *header_data, + Error **errp) +{ + const IGVM_VHS_SNP_ID_BLOCK *igvm_id = + (const IGVM_VHS_SNP_ID_BLOCK *)header_data; + + if (!(igvm_id->compatibility_mask & ctx->compatibility_mask)) { + return 0; + } + + if (ctx->id_block) { + error_setg(errp, "IGVM: Multiple ID blocks encountered " + "in IGVM file."); + return -1; + } + ctx->id_block = g_new0(struct sev_id_block, 1); + ctx->id_auth = g_new0(struct sev_id_authentication, 1); + + memcpy(ctx->id_block->family_id, igvm_id->family_id, + sizeof(ctx->id_block->family_id)); + memcpy(ctx->id_block->image_id, igvm_id->image_id, + sizeof(ctx->id_block->image_id)); + ctx->id_block->guest_svn = igvm_id->guest_svn; + ctx->id_block->version = IGVM_SEV_ID_BLOCK_VERSION; + memcpy(ctx->id_block->ld, igvm_id->ld, sizeof(ctx->id_block->ld)); + + ctx->id_auth->id_key_alg = igvm_id->id_key_algorithm; + assert(sizeof(igvm_id->id_key_signature) <= + sizeof(ctx->id_auth->id_block_sig)); + memcpy(ctx->id_auth->id_block_sig, &igvm_id->id_key_signature, + sizeof(igvm_id->id_key_signature)); + + ctx->id_auth->auth_key_algo = igvm_id->author_key_algorithm; + assert(sizeof(igvm_id->author_key_signature) <= + sizeof(ctx->id_auth->id_key_sig)); + memcpy(ctx->id_auth->id_key_sig, &igvm_id->author_key_signature, + sizeof(igvm_id->author_key_signature)); + + /* + * SEV and IGVM public key structure population are slightly different. + * See SEV Secure Nested Paging Firmware ABI Specification, Chapter 10. + */ + *((uint32_t *)ctx->id_auth->id_key) = igvm_id->id_public_key.curve; + memcpy(&ctx->id_auth->id_key[4], &igvm_id->id_public_key.qx, 72); + memcpy(&ctx->id_auth->id_key[76], &igvm_id->id_public_key.qy, 72); + + *((uint32_t *)ctx->id_auth->author_key) = + igvm_id->author_public_key.curve; + memcpy(&ctx->id_auth->author_key[4], &igvm_id->author_public_key.qx, + 72); + memcpy(&ctx->id_auth->author_key[76], &igvm_id->author_public_key.qy, + 72); + + return 0; +} + +static int qigvm_initialization_guest_policy(QIgvm *ctx, + const uint8_t *header_data, Error **errp) +{ + const IGVM_VHS_GUEST_POLICY *guest = + (const IGVM_VHS_GUEST_POLICY *)header_data; + + if (guest->compatibility_mask & ctx->compatibility_mask) { + ctx->sev_policy = guest->policy; + } + return 0; +} + +static int qigvm_supported_platform_compat_mask(QIgvm *ctx, Error **errp) +{ + int32_t header_count; + unsigned header_index; + IgvmHandle header_handle; + IGVM_VHS_SUPPORTED_PLATFORM *platform; + uint32_t compatibility_mask_sev = 0; + uint32_t compatibility_mask_sev_es = 0; + uint32_t compatibility_mask_sev_snp = 0; + uint32_t compatibility_mask = 0; + + header_count = igvm_header_count(ctx->file, IGVM_HEADER_SECTION_PLATFORM); + if (header_count < 0) { + error_setg(errp, + "Invalid platform header count in IGVM file. Error code: %X", + header_count); + return -1; + } + + for (header_index = 0; header_index < (unsigned)header_count; + header_index++) { + IgvmVariableHeaderType typ = igvm_get_header_type( + ctx->file, IGVM_HEADER_SECTION_PLATFORM, header_index); + if (typ == IGVM_VHT_SUPPORTED_PLATFORM) { + header_handle = igvm_get_header( + ctx->file, IGVM_HEADER_SECTION_PLATFORM, header_index); + if (header_handle < 0) { + error_setg(errp, + "Invalid platform header in IGVM file. " + "Index: %d, Error code: %X", + header_index, header_handle); + return -1; + } + platform = + (IGVM_VHS_SUPPORTED_PLATFORM *)(igvm_get_buffer(ctx->file, + header_handle) + + sizeof( + IGVM_VHS_VARIABLE_HEADER)); + if ((platform->platform_type == IGVM_PLATFORM_TYPE_SEV_ES) && + ctx->cgs) { + if (ctx->cgsc->check_support( + CGS_PLATFORM_SEV_ES, platform->platform_version, + platform->highest_vtl, platform->shared_gpa_boundary)) { + compatibility_mask_sev_es = platform->compatibility_mask; + } + } else if ((platform->platform_type == IGVM_PLATFORM_TYPE_SEV) && + ctx->cgs) { + if (ctx->cgsc->check_support( + CGS_PLATFORM_SEV, platform->platform_version, + platform->highest_vtl, platform->shared_gpa_boundary)) { + compatibility_mask_sev = platform->compatibility_mask; + } + } else if ((platform->platform_type == + IGVM_PLATFORM_TYPE_SEV_SNP) && + ctx->cgs) { + if (ctx->cgsc->check_support( + CGS_PLATFORM_SEV_SNP, platform->platform_version, + platform->highest_vtl, platform->shared_gpa_boundary)) { + compatibility_mask_sev_snp = platform->compatibility_mask; + } + } else if (platform->platform_type == IGVM_PLATFORM_TYPE_NATIVE) { + compatibility_mask = platform->compatibility_mask; + } + igvm_free_buffer(ctx->file, header_handle); + } + } + /* Choose the strongest supported isolation technology */ + if (compatibility_mask_sev_snp != 0) { + ctx->compatibility_mask = compatibility_mask_sev_snp; + ctx->platform_type = IGVM_PLATFORM_TYPE_SEV_SNP; + } else if (compatibility_mask_sev_es != 0) { + ctx->compatibility_mask = compatibility_mask_sev_es; + ctx->platform_type = IGVM_PLATFORM_TYPE_SEV_ES; + } else if (compatibility_mask_sev != 0) { + ctx->compatibility_mask = compatibility_mask_sev; + ctx->platform_type = IGVM_PLATFORM_TYPE_SEV; + } else if (compatibility_mask != 0) { + ctx->compatibility_mask = compatibility_mask; + ctx->platform_type = IGVM_PLATFORM_TYPE_NATIVE; + } else { + error_setg( + errp, + "IGVM file does not describe a compatible supported platform"); + return -1; + } + return 0; +} + +static int qigvm_handle_policy(QIgvm *ctx, Error **errp) +{ + if (ctx->platform_type == IGVM_PLATFORM_TYPE_SEV_SNP) { + int id_block_len = 0; + int id_auth_len = 0; + if (ctx->id_block) { + ctx->id_block->policy = ctx->sev_policy; + id_block_len = sizeof(struct sev_id_block); + id_auth_len = sizeof(struct sev_id_authentication); + } + return ctx->cgsc->set_guest_policy(GUEST_POLICY_SEV, ctx->sev_policy, + ctx->id_block, id_block_len, + ctx->id_auth, id_auth_len, errp); + } + return 0; +} + +static IgvmHandle qigvm_file_init(char *filename, Error **errp) +{ + IgvmHandle igvm; + g_autofree uint8_t *buf = NULL; + unsigned long len; + g_autoptr(GError) gerr = NULL; + + if (!g_file_get_contents(filename, (gchar **)&buf, &len, &gerr)) { + error_setg(errp, "Unable to load %s: %s", filename, gerr->message); + return -1; + } + + igvm = igvm_new_from_binary(buf, len); + if (igvm < 0) { + error_setg(errp, "Unable to parse IGVM file %s: %d", filename, igvm); + return -1; + } + return igvm; +} + +int qigvm_process_file(IgvmCfg *cfg, ConfidentialGuestSupport *cgs, + bool onlyVpContext, Error **errp) +{ + int32_t header_count; + QIgvmParameterData *parameter; + int retval = -1; + QIgvm ctx; + + memset(&ctx, 0, sizeof(ctx)); + ctx.file = qigvm_file_init(cfg->filename, errp); + if (ctx.file < 0) { + return -1; + } + + /* + * The ConfidentialGuestSupport object is optional and allows a confidential + * guest platform to perform extra processing, such as page measurement, on + * IGVM directives. + */ + ctx.cgs = cgs; + ctx.cgsc = cgs ? CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs) : NULL; + + /* + * Check that the IGVM file provides configuration for the current + * platform + */ + if (qigvm_supported_platform_compat_mask(&ctx, errp) < 0) { + goto cleanup; + } + + header_count = igvm_header_count(ctx.file, IGVM_HEADER_SECTION_DIRECTIVE); + if (header_count <= 0) { + error_setg( + errp, "Invalid directive header count in IGVM file. Error code: %X", + header_count); + goto cleanup; + } + + QTAILQ_INIT(&ctx.parameter_data); + + for (ctx.current_header_index = 0; + ctx.current_header_index < (unsigned)header_count; + ctx.current_header_index++) { + IgvmVariableHeaderType type = igvm_get_header_type( + ctx.file, IGVM_HEADER_SECTION_DIRECTIVE, ctx.current_header_index); + if (!onlyVpContext || (type == IGVM_VHT_VP_CONTEXT)) { + if (qigvm_handler(&ctx, type, errp) < 0) { + goto cleanup_parameters; + } + } + } + + /* + * If only processing the VP context then we don't need to process + * any more of the file. + */ + if (onlyVpContext) { + retval = 0; + goto cleanup_parameters; + } + + header_count = + igvm_header_count(ctx.file, IGVM_HEADER_SECTION_INITIALIZATION); + if (header_count < 0) { + error_setg( + errp, + "Invalid initialization header count in IGVM file. Error code: %X", + header_count); + goto cleanup_parameters; + } + + for (ctx.current_header_index = 0; + ctx.current_header_index < (unsigned)header_count; + ctx.current_header_index++) { + IgvmVariableHeaderType type = + igvm_get_header_type(ctx.file, IGVM_HEADER_SECTION_INITIALIZATION, + ctx.current_header_index); + if (qigvm_handler(&ctx, type, errp) < 0) { + goto cleanup_parameters; + } + } + + /* + * Contiguous pages of data with compatible flags are grouped together in + * order to reduce the number of memory regions we create. Make sure the + * last group is processed with this call. + */ + retval = qigvm_process_mem_page(&ctx, NULL, errp); + + if (retval == 0) { + retval = qigvm_handle_policy(&ctx, errp); + } + +cleanup_parameters: + QTAILQ_FOREACH(parameter, &ctx.parameter_data, next) + { + g_free(parameter->data); + parameter->data = NULL; + } + g_free(ctx.id_block); + g_free(ctx.id_auth); + +cleanup: + igvm_free(ctx.file); + + return retval; +} diff --git a/backends/igvm.h b/backends/igvm.h new file mode 100644 index 0000000..a4abab0 --- /dev/null +++ b/backends/igvm.h @@ -0,0 +1,22 @@ +/* + * QEMU IGVM configuration backend for Confidential Guests + * + * Copyright (C) 2023-2024 SUSE + * + * Authors: + * Roy Hopkins <roy.hopkins@randomman.co.uk> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef BACKENDS_IGVM_H +#define BACKENDS_IGVM_H + +#include "system/confidential-guest-support.h" +#include "system/igvm-cfg.h" +#include "qapi/error.h" + +int qigvm_process_file(IgvmCfg *igvm, ConfidentialGuestSupport *cgs, + bool onlyVpContext, Error **errp); + +#endif diff --git a/backends/meson.build b/backends/meson.build index 9b88d22..60021f4 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -34,6 +34,11 @@ if have_vhost_user_crypto endif system_ss.add(when: gio, if_true: files('dbus-vmstate.c')) system_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) +if igvm.found() + system_ss.add(igvm) + system_ss.add(files('igvm-cfg.c'), igvm) + system_ss.add(files('igvm.c'), igvm) +endif system_ss.add(when: 'CONFIG_SPDM_SOCKET', if_true: files('spdm-socket.c')) diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst index dc8c441..b673753 100644 --- a/docs/devel/rust.rst +++ b/docs/devel/rust.rst @@ -351,7 +351,7 @@ Writing procedural macros ''''''''''''''''''''''''' By conventions, procedural macros are split in two functions, one -returning ``Result<proc_macro2::TokenStream, MacroError>`` with the body of +returning ``Result<proc_macro2::TokenStream, syn::Error>`` with the body of the procedural macro, and the second returning ``proc_macro::TokenStream`` which is the actual procedural macro. The former's name is the same as the latter with the ``_or_error`` suffix. The code for the latter is more @@ -361,18 +361,19 @@ from the type after ``as`` in the invocation of ``parse_macro_input!``:: #[proc_macro_derive(Object)] pub fn derive_object(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as DeriveInput); - let expanded = derive_object_or_error(input).unwrap_or_else(Into::into); - TokenStream::from(expanded) + derive_object_or_error(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } The ``qemu_api_macros`` crate has utility functions to examine a ``DeriveInput`` and perform common checks (e.g. looking for a struct -with named fields). These functions return ``Result<..., MacroError>`` +with named fields). These functions return ``Result<..., syn::Error>`` and can be used easily in the procedural macro function:: fn derive_object_or_error(input: DeriveInput) -> - Result<proc_macro2::TokenStream, MacroError> + Result<proc_macro2::TokenStream, Error> { is_c_repr(&input, "#[derive(Object)]")?; diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json index 745d21d..0711b6f 100644 --- a/docs/interop/firmware.json +++ b/docs/interop/firmware.json @@ -57,10 +57,17 @@ # # @memory: The firmware is to be mapped into memory. # +# @igvm: The firmware is defined by a file conforming to the IGVM +# specification and mapped into memory according to directives +# defined in the file. This is similar to @memory but may +# include additional processing defined by the IGVM file +# including initial CPU state or population of metadata into +# the guest address space. Since: 10.1 +# # Since: 3.0 ## { 'enum' : 'FirmwareDevice', - 'data' : [ 'flash', 'kernel', 'memory' ] } + 'data' : [ 'flash', 'kernel', 'memory', 'igvm' ] } ## # @FirmwareArchitecture: @@ -378,6 +385,24 @@ 'data' : { 'filename' : 'str' } } ## +# @FirmwareMappingIgvm: +# +# Describes loading and mapping properties for the firmware executable, +# when @FirmwareDevice is @igvm. +# +# @filename: Identifies the IGVM file containing the firmware executable +# along with other information used to configure the initial +# state of the guest. The IGVM file may be shared by multiple +# virtual machine definitions. This corresponds to creating +# an object on the command line with "-object igvm-cfg, +# file=@filename". +# +# Since: 10.1 +## +{ 'struct' : 'FirmwareMappingIgvm', + 'data' : { 'filename' : 'str' } } + +## # @FirmwareMapping: # # Provides a discriminated structure for firmware to describe its @@ -393,7 +418,8 @@ 'discriminator' : 'device', 'data' : { 'flash' : 'FirmwareMappingFlash', 'kernel' : 'FirmwareMappingKernel', - 'memory' : 'FirmwareMappingMemory' } } + 'memory' : 'FirmwareMappingMemory', + 'igvm' : 'FirmwareMappingIgvm' } } ## # @Firmware: diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index 748f509..6c23f35 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -1,3 +1,5 @@ +.. _amd-sev: + AMD Secure Encrypted Virtualization (SEV) ========================================= diff --git a/docs/system/igvm.rst b/docs/system/igvm.rst new file mode 100644 index 0000000..79508d9 --- /dev/null +++ b/docs/system/igvm.rst @@ -0,0 +1,173 @@ +Independent Guest Virtual Machine (IGVM) support +================================================ + +IGVM files are designed to encapsulate all the information required to launch a +virtual machine on any given virtualization stack in a deterministic way. This +allows the cryptographic measurement of initial guest state for Confidential +Guests to be calculated when the IGVM file is built, allowing a relying party to +verify the initial state of a guest via a remote attestation. + +Although IGVM files are designed with Confidential Computing in mind, they can +also be used to configure non-confidential guests. Multiple platforms can be +defined by a single IGVM file, allowing a single IGVM file to configure a +virtual machine that can run on, for example, TDX, SEV and non-confidential +hosts. + +QEMU supports IGVM files through the user-creatable ``igvm-cfg`` object. This +object is used to define the filename of the IGVM file to process. A reference +to the object is added to the ``-machine`` to configure the virtual machine +to use the IGVM file for configuration. + +Confidential platform support is provided through the use of +the ``ConfidentialGuestSupport`` object. If the virtual machine provides an +instance of this object then this is used by the IGVM loader to configure the +isolation properties of the directives within the file. + +Further Information on IGVM +--------------------------- + +Information about the IGVM format, including links to the format specification +and documentation for the Rust and C libraries can be found at the project +repository: + +https://github.com/microsoft/igvm + + +Supported Platforms +------------------- + +Currently, IGVM files can be provided for Confidential Guests on host systems +that support AMD SEV, SEV-ES and SEV-SNP with KVM. IGVM files can also be +provided for non-confidential guests. + + +Limitations when using IGVM with AMD SEV, SEV-ES and SEV-SNP +------------------------------------------------------------ + +IGVM files configure the initial state of the guest using a set of directives. +Not every directive is supported by every Confidential Guest type. For example, +AMD SEV does not support encrypted save state regions, therefore setting the +initial CPU state using IGVM for SEV is not possible. When an IGVM file contains +directives that are not supported for the active platform, an error is generated +and the guest launch is aborted. + +The table below describes the list of directives that are supported for SEV, +SEV-ES, SEV-SNP and non-confidential platforms. + +.. list-table:: SEV, SEV-ES, SEV-SNP & non-confidential Supported Directives + :widths: 35 65 + :header-rows: 1 + + * - IGVM directive + - Notes + * - IGVM_VHT_PAGE_DATA + - ``NORMAL`` zero, measured and unmeasured page types are supported. Other + page types result in an error. + * - IGVM_VHT_PARAMETER_AREA + - + * - IGVM_VHT_PARAMETER_INSERT + - + * - IGVM_VHT_VP_COUNT_PARAMETER + - The guest parameter page is populated with the CPU count. + * - IGVM_VHT_ENVIRONMENT_INFO_PARAMETER + - The ``memory_is_shared`` parameter is set to 1 in the guest parameter + page. + +.. list-table:: Additional SEV, SEV-ES & SEV_SNP Supported Directives + :widths: 25 75 + :header-rows: 1 + + * - IGVM directive + - Notes + * - IGVM_VHT_MEMORY_MAP + - The memory map page is populated using entries from the E820 table. + * - IGVM_VHT_REQUIRED_MEMORY + - Ensures memory is available in the guest at the specified range. + +.. list-table:: Additional SEV-ES & SEV-SNP Supported Directives + :widths: 25 75 + :header-rows: 1 + + * - IGVM directive + - Notes + * - IGVM_VHT_VP_CONTEXT + - Setting of the initial CPU state for the boot CPU and additional CPUs is + supported with limitations on the fields that can be provided in the + VMSA. See below for details on which fields are supported. + +Initial CPU state with VMSA +--------------------------- + +The initial state of guest CPUs can be defined in the IGVM file for AMD SEV-ES +and SEV-SNP. The state data is provided as a VMSA structure as defined in Table +B-4 in the AMD64 Architecture Programmer's Manual, Volume 2 [1]. + +The IGVM VMSA is translated to CPU state in QEMU which is then synchronized +by KVM to the guest VMSA during the launch process where it contributes to the +launch measurement. See :ref:`amd-sev` for details on the launch process and +guest launch measurement. + +It is important that no information is lost or changed when translating the +VMSA provided by the IGVM file into the VSMA that is used to launch the guest. +Therefore, QEMU restricts the VMSA fields that can be provided in the IGVM +VMSA structure to the following registers: + +RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8-R15, RSP, RIP, CS, DS, ES, FS, GS, SS, +CR0, CR3, CR4, XCR0, EFER, PAT, GDT, IDT, LDTR, TR, DR6, DR7, RFLAGS, X87_FCW, +MXCSR. + +When processing the IGVM file, QEMU will check if any fields other than the +above are non-zero and generate an error if this is the case. + +KVM uses a hardcoded GPA of 0xFFFFFFFFF000 for the VMSA. When an IGVM file +defines initial CPU state, the GPA for each VMSA must match this hardcoded +value. + +Firmware Images with IGVM +------------------------- + +When an IGVM filename is specified for a Confidential Guest Support object it +overrides the default handling of system firmware: the firmware image, such as +an OVMF binary should be contained as a payload of the IGVM file and not +provided as a flash drive or via the ``-bios`` parameter. The default QEMU +firmware is not automatically populated into the guest memory space. + +If an IGVM file is provided along with either the ``-bios`` parameter or pflash +devices then an error is displayed and the guest startup is aborted. + +Running a guest configured using IGVM +------------------------------------- + +To run a guest configured with IGVM you firstly need to generate an IGVM file +that contains a guest configuration compatible with the platform you are +targeting. + +The ``buildigvm`` tool [2] is an example of a tool that can be used to generate +IGVM files for non-confidential X86 platforms as well as for SEV, SEV-ES and +SEV-SNP confidential platforms. + +Example using this tool to generate an IGVM file for AMD SEV-SNP:: + + buildigvm --firmware /path/to/OVMF.fd --output sev-snp.igvm \ + --cpucount 4 sev-snp + +To run a guest configured with the generated IGVM you need to add an +``igvm-cfg`` object and refer to it from the ``-machine`` parameter: + +Example (for AMD SEV):: + + qemu-system-x86_64 \ + <other parameters> \ + -machine ...,confidential-guest-support=sev0,igvm-cfg=igvm0 \ + -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \ + -object igvm-cfg,id=igvm0,file=/path/to/sev-snp.igvm + +References +---------- + +[1] AMD64 Architecture Programmer's Manual, Volume 2: System Programming + Rev 3.41 + https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf + +[2] ``buildigvm`` - A tool to build example IGVM files containing OVMF firmware + https://github.com/roy-hopkins/buildigvm
\ No newline at end of file diff --git a/docs/system/index.rst b/docs/system/index.rst index 718e9d3..427b020 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -38,5 +38,6 @@ or Hypervisor.Framework. security multi-process confidential-guest-support + igvm vm-templating sriov diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 860346d..2f58e73 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -81,7 +81,10 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_10_0[] = {}; +GlobalProperty pc_compat_10_0[] = { + { TYPE_X86_CPU, "x-consistent-cache", "false" }, + { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" }, +}; const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0); GlobalProperty pc_compat_9_2[] = {}; @@ -1827,6 +1830,18 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data) object_class_property_add_bool(oc, "fd-bootchk", pc_machine_get_fd_bootchk, pc_machine_set_fd_bootchk); + +#if defined(CONFIG_IGVM) + object_class_property_add_link(oc, "igvm-cfg", + TYPE_IGVM_CFG, + offsetof(X86MachineState, igvm), + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + object_class_property_set_description(oc, "igvm-cfg", + "Set IGVM configuration"); +#endif + + } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ea7572e..a3285fb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -366,6 +366,16 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } typedef enum PCSouthBridgeOption { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 33211b1..cf871cf 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -325,6 +325,16 @@ static void pc_q35_init(MachineState *machine) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } #define DEFINE_Q35_MACHINE(major, minor) \ diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 821396c..1a12b63 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -220,7 +220,13 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + /* + * If an IGVM file is specified then the firmware must be provided + * in the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + } return; } @@ -240,8 +246,13 @@ void pc_system_firmware_init(PCMachineState *pcms, } if (!pflash_blk[0]) { - /* Machine property pflash0 not set, use ROM mode */ - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + /* + * Machine property pflash0 not set, use ROM mode unless using IGVM, + * in which case the firmware must be provided by the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* @@ -257,6 +268,20 @@ void pc_system_firmware_init(PCMachineState *pcms, } pc_system_flash_cleanup_unused(pcms); + + /* + * The user should not have specified any pflash devices when using IGVM + * to configure the guest. + */ + if (X86_MACHINE(pcms)->igvm) { + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + if (pcms->flash[i]) { + error_report("pflash devices cannot be configured when " + "using IGVM"); + exit(1); + } + } + } } void x86_firmware_configure(hwaddr gpa, void *ptr, int size) diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index fc460b8..8755cad 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -25,6 +25,7 @@ #include "hw/intc/ioapic.h" #include "hw/isa/isa.h" #include "qom/object.h" +#include "system/igvm-cfg.h" struct X86MachineClass { MachineClass parent; @@ -92,6 +93,8 @@ struct X86MachineState { * which means no limitation on the guest's bus locks. */ uint64_t bus_lock_ratelimit; + + IgvmCfg *igvm; }; #define X86_MACHINE_SMM "smm" diff --git a/include/qemu/log.h b/include/qemu/log.h index 60da703..aae7298 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -84,6 +84,8 @@ typedef struct QEMULogItem { extern const QEMULogItem qemu_log_items[]; +ssize_t rust_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); + bool qemu_set_log(int log_flags, Error **errp); bool qemu_set_log_filename(const char *filename, Error **errp); bool qemu_set_log_filename_flags(const char *name, int flags, Error **errp); diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h index ea46b50..0cc8b26 100644 --- a/include/system/confidential-guest-support.h +++ b/include/system/confidential-guest-support.h @@ -19,6 +19,7 @@ #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H #include "qom/object.h" +#include "exec/hwaddr.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, @@ -26,6 +27,40 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +typedef enum ConfidentialGuestPlatformType { + CGS_PLATFORM_SEV, + CGS_PLATFORM_SEV_ES, + CGS_PLATFORM_SEV_SNP, +} ConfidentialGuestPlatformType; + +typedef enum ConfidentialGuestMemoryType { + CGS_MEM_RAM, + CGS_MEM_RESERVED, + CGS_MEM_ACPI, + CGS_MEM_NVS, + CGS_MEM_UNUSABLE, +} ConfidentialGuestMemoryType; + +typedef struct ConfidentialGuestMemoryMapEntry { + uint64_t gpa; + uint64_t size; + ConfidentialGuestMemoryType type; +} ConfidentialGuestMemoryMapEntry; + +typedef enum ConfidentialGuestPageType { + CGS_PAGE_TYPE_NORMAL, + CGS_PAGE_TYPE_VMSA, + CGS_PAGE_TYPE_ZERO, + CGS_PAGE_TYPE_UNMEASURED, + CGS_PAGE_TYPE_SECRETS, + CGS_PAGE_TYPE_CPUID, + CGS_PAGE_TYPE_REQUIRED_MEMORY, +} ConfidentialGuestPageType; + +typedef enum ConfidentialGuestPolicyType { + GUEST_POLICY_SEV, +} ConfidentialGuestPolicyType; + struct ConfidentialGuestSupport { Object parent; @@ -64,6 +99,59 @@ typedef struct ConfidentialGuestSupportClass { int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + + /* + * Check to see if this confidential guest supports a particular + * platform or configuration. + * + * Return true if supported or false if not supported. + */ + bool (*check_support)(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary); + + /* + * Configure part of the state of a guest for a particular set of data, page + * type and gpa. This can be used for example to pre-populate and measure + * guest memory contents, define private ranges or set the initial CPU state + * for one or more CPUs. + * + * If memory_type is CGS_PAGE_TYPE_VMSA then ptr points to the initial CPU + * context for a virtual CPU. The format of the data depends on the type of + * confidential virtual machine. For example, for SEV-ES ptr will point to a + * vmcb_save_area structure that should be copied into guest memory at the + * address specified in gpa. The cpu_index parameter contains the index of + * the CPU the VMSA applies to. + */ + int (*set_guest_state)(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp); + + /* + * Set the guest policy. The policy can be used to configure the + * confidential platform, such as if debug is enabled or not and can contain + * information about expected launch measurements, signed verification of + * guest configuration and other platform data. + * + * The format of the policy data is specific to each platform. For example, + * SEV-SNP uses a policy bitfield in the 'policy' argument and provides an + * ID block and ID authentication in the 'policy_data' parameters. The type + * of policy data is identified by the 'policy_type' argument. + */ + int (*set_guest_policy)(ConfidentialGuestPolicyType policy_type, + uint64_t policy, + void *policy_data1, uint32_t policy_data1_size, + void *policy_data2, uint32_t policy_data2_size, + Error **errp); + + /* + * Iterate the system memory map, getting the entry with the given index + * that can be populated into guest memory. + * + * Returns 0 for ok, 1 if the index is out of range and -1 on error. + */ + int (*get_mem_map_entry)(int index, ConfidentialGuestMemoryMapEntry *entry, + Error **errp); } ConfidentialGuestSupportClass; static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, diff --git a/include/system/igvm-cfg.h b/include/system/igvm-cfg.h new file mode 100644 index 0000000..944f23a --- /dev/null +++ b/include/system/igvm-cfg.h @@ -0,0 +1,49 @@ +/* + * QEMU IGVM interface + * + * Copyright (C) 2024 SUSE + * + * Authors: + * Roy Hopkins <roy.hopkins@randomman.co.uk> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_IGVM_CFG_H +#define QEMU_IGVM_CFG_H + +#include "qom/object.h" + +typedef struct IgvmCfg { + ObjectClass parent_class; + + /* + * filename: Filename that specifies a file that contains the configuration + * of the guest in Independent Guest Virtual Machine (IGVM) + * format. + */ + char *filename; +} IgvmCfg; + +typedef struct IgvmCfgClass { + ObjectClass parent_class; + + /* + * If an IGVM filename has been specified then process the IGVM file. + * Performs a no-op if no filename has been specified. + * If onlyVpContext is true then only the IGVM_VHT_VP_CONTEXT entries + * in the IGVM file will be processed, allowing information about the + * CPU state to be determined before processing the entire file. + * + * Returns 0 for ok and -1 on error. + */ + int (*process)(IgvmCfg *cfg, ConfidentialGuestSupport *cgs, + bool onlyVpContext, Error **errp); + +} IgvmCfgClass; + +#define TYPE_IGVM_CFG "igvm-cfg" + +OBJECT_DECLARE_TYPE(IgvmCfg, IgvmCfgClass, IGVM_CFG) + +#endif diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index cd275ae..f0c1a73 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -963,7 +963,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 0690743..32c5885 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -459,6 +459,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ diff --git a/meson.build b/meson.build index 2adb22f..c2bc3ee 100644 --- a/meson.build +++ b/meson.build @@ -1428,6 +1428,12 @@ if host_os == 'linux' and (have_system or have_tools) method: 'pkg-config', required: get_option('libudev')) endif +igvm = not_found +if not get_option('igvm').auto() or have_system + igvm = dependency('igvm', version: '>= 0.3.0', + method: 'pkg-config', + required: get_option('igvm')) +endif mpathlibs = [libudev] mpathpersist = not_found @@ -2606,6 +2612,7 @@ config_host_data.set('CONFIG_CFI', get_option('cfi')) config_host_data.set('CONFIG_SELINUX', selinux.found()) config_host_data.set('CONFIG_XEN_BACKEND', xen.found()) config_host_data.set('CONFIG_LIBDW', libdw.found()) +config_host_data.set('CONFIG_IGVM', igvm.found()) if xen.found() # protect from xen.version() having less than three components xen_version = xen.version().split('.') + ['0', '0'] @@ -4971,6 +4978,7 @@ summary_info += {'seccomp support': seccomp} summary_info += {'GlusterFS support': glusterfs} summary_info += {'hv-balloon support': hv_balloon} summary_info += {'TPM support': have_tpm} +summary_info += {'IGVM support': igvm} summary_info += {'libssh support': libssh} summary_info += {'lzo support': lzo} summary_info += {'snappy support': snappy} diff --git a/meson_options.txt b/meson_options.txt index 3146eec..fff1521 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -117,6 +117,8 @@ option('tpm', type : 'feature', value : 'auto', description: 'TPM support') option('valgrind', type : 'feature', value: 'auto', description: 'valgrind debug support for coroutine stacks') +option('igvm', type: 'feature', value: 'auto', + description: 'Independent Guest Virtual Machine (IGVM) file support') # Do not enable it by default even for Mingw32, because it doesn't # work on Wine. diff --git a/qapi/qom.json b/qapi/qom.json index b133b06..bbdb56d 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -933,6 +933,19 @@ 'if': 'CONFIG_POSIX' } ## +# @IgvmCfgProperties: +# +# Properties common to objects that handle IGVM files. +# +# @file: IGVM file to use to configure guest +# +# Since: 10.1 +## +{ 'struct': 'IgvmCfgProperties', + 'if': 'CONFIG_IGVM', + 'data': { 'file': 'str' } } + +## # @SevCommonProperties: # # Properties common to objects that are derivatives of sev-common. @@ -1142,6 +1155,8 @@ 'filter-redirector', 'filter-replay', 'filter-rewriter', + { 'name': 'igvm-cfg', + 'if': 'CONFIG_IGVM' }, 'input-barrier', { 'name': 'input-linux', 'if': 'CONFIG_LINUX' }, @@ -1218,6 +1233,8 @@ 'filter-redirector': 'FilterRedirectorProperties', 'filter-replay': 'NetfilterProperties', 'filter-rewriter': 'FilterRewriterProperties', + 'igvm-cfg': { 'type': 'IgvmCfgProperties', + 'if': 'CONFIG_IGVM' }, 'input-barrier': 'InputBarrierProperties', 'input-linux': { 'type': 'InputLinuxProperties', 'if': 'CONFIG_LINUX' }, diff --git a/qemu-options.hx b/qemu-options.hx index a3c066c..1a425b2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -6141,6 +6141,34 @@ SRST -machine ...,memory-encryption=sev0 \\ ..... + ``-object igvm-cfg,file=file`` + Create an IGVM configuration object that defines the initial state + of the guest using a file in that conforms to the Independent Guest + Virtual Machine (IGVM) file format. + + This is currently only supported by ``-machine q35`` and + ``-machine pc``. + + The ``file`` parameter is used to specify the IGVM file to load. + When provided, the IGVM file is used to populate the initial + memory of the virtual machine and, depending on the platform, can + define the initial processor state, memory map and parameters. + + The IGVM file is expected to contain the firmware for the virtual + machine, therefore an ``igvm-cfg`` object cannot be provided along + with other ways of specifying firmware, such as the ``-bios`` + parameter on x86 machines. + + e.g to launch a machine providing the firmware in an IGVM file + + .. parsed-literal:: + + # |qemu_system_x86| \\ + ...... \\ + -object igvm-cfg,id=igvm0,file=bios.igvm \\ + -machine ...,igvm-cfg=igvm0 \\ + ..... + ``-object authz-simple,id=id,identity=string`` Create an authorization object that will control access to network services. diff --git a/rust/qemu-api-macros/meson.build b/rust/qemu-api-macros/meson.build index 8610ce1..2152bcb 100644 --- a/rust/qemu-api-macros/meson.build +++ b/rust/qemu-api-macros/meson.build @@ -17,3 +17,6 @@ _qemu_api_macros_rs = rust.proc_macro( qemu_api_macros = declare_dependency( link_with: _qemu_api_macros_rs, ) + +rust.test('rust-qemu-api-macros-tests', _qemu_api_macros_rs, + suite: ['unit', 'rust']) diff --git a/rust/qemu-api-macros/src/bits.rs b/rust/qemu-api-macros/src/bits.rs index 5ba8475..a80a3b9 100644 --- a/rust/qemu-api-macros/src/bits.rs +++ b/rust/qemu-api-macros/src/bits.rs @@ -6,8 +6,7 @@ use proc_macro2::{ Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree, TokenTree as TT, }; - -use crate::utils::MacroError; +use syn::Error; pub struct BitsConstInternal { typ: TokenTree, @@ -36,27 +35,21 @@ impl BitsConstInternal { tok: TokenTree, it: &mut dyn Iterator<Item = TokenTree>, out: &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { let next = match tok { TT::Group(ref g) => { if g.delimiter() != Delimiter::Parenthesis && g.delimiter() != Delimiter::None { - return Err(MacroError::Message("expected parenthesis".into(), g.span())); + return Err(Error::new(g.span(), "expected parenthesis")); } let mut stream = g.stream().into_iter(); let Some(first_tok) = stream.next() else { - return Err(MacroError::Message( - "expected operand, found ')'".into(), - g.span(), - )); + return Err(Error::new(g.span(), "expected operand, found ')'")); }; let mut output = TokenStream::new(); // start from the lowest precedence let next = self.parse_or(first_tok, &mut stream, &mut output)?; if let Some(tok) = next { - return Err(MacroError::Message( - format!("unexpected token {tok}"), - tok.span(), - )); + return Err(Error::new(tok.span(), format!("unexpected token {tok}"))); } out.extend(Some(paren(output))); it.next() @@ -74,20 +67,17 @@ impl BitsConstInternal { } TT::Punct(ref p) => { if p.as_char() != '!' { - return Err(MacroError::Message("expected operand".into(), p.span())); + return Err(Error::new(p.span(), "expected operand")); } let Some(rhs_tok) = it.next() else { - return Err(MacroError::Message( - "expected operand at end of input".into(), - p.span(), - )); + return Err(Error::new(p.span(), "expected operand at end of input")); }; let next = self.parse_primary(rhs_tok, it, out)?; out.extend([punct('.'), ident("invert"), paren(TokenStream::new())]); next } _ => { - return Err(MacroError::Message("unexpected literal".into(), tok.span())); + return Err(Error::new(tok.span(), "unexpected literal")); } }; Ok(next) @@ -99,7 +89,7 @@ impl BitsConstInternal { TokenTree, &mut dyn Iterator<Item = TokenTree>, &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError>, + ) -> Result<Option<TokenTree>, Error>, >( &self, tok: TokenTree, @@ -108,7 +98,7 @@ impl BitsConstInternal { ch: char, f: F, method: &'static str, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { let mut next = f(self, tok, it, out)?; while next.is_some() { let op = next.as_ref().unwrap(); @@ -118,10 +108,7 @@ impl BitsConstInternal { } let Some(rhs_tok) = it.next() else { - return Err(MacroError::Message( - "expected operand at end of input".into(), - p.span(), - )); + return Err(Error::new(p.span(), "expected operand at end of input")); }; let mut rhs = TokenStream::new(); next = f(self, rhs_tok, it, &mut rhs)?; @@ -136,7 +123,7 @@ impl BitsConstInternal { tok: TokenTree, it: &mut dyn Iterator<Item = TokenTree>, out: &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { self.parse_binop(tok, it, out, '-', Self::parse_primary, "difference") } @@ -146,7 +133,7 @@ impl BitsConstInternal { tok: TokenTree, it: &mut dyn Iterator<Item = TokenTree>, out: &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { self.parse_binop(tok, it, out, '&', Self::parse_sub, "intersection") } @@ -156,7 +143,7 @@ impl BitsConstInternal { tok: TokenTree, it: &mut dyn Iterator<Item = TokenTree>, out: &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { self.parse_binop(tok, it, out, '^', Self::parse_and, "symmetric_difference") } @@ -166,13 +153,13 @@ impl BitsConstInternal { tok: TokenTree, it: &mut dyn Iterator<Item = TokenTree>, out: &mut TokenStream, - ) -> Result<Option<TokenTree>, MacroError> { + ) -> Result<Option<TokenTree>, Error> { self.parse_binop(tok, it, out, '|', Self::parse_xor, "union") } pub fn parse( it: &mut dyn Iterator<Item = TokenTree>, - ) -> Result<proc_macro2::TokenStream, MacroError> { + ) -> Result<proc_macro2::TokenStream, Error> { let mut pos = Span::call_site(); let mut typ = proc_macro2::TokenStream::new(); @@ -198,15 +185,15 @@ impl BitsConstInternal { }; let Some(tok) = next else { - return Err(MacroError::Message( - "expected expression, do not call this macro directly".into(), + return Err(Error::new( pos, + "expected expression, do not call this macro directly", )); }; let TT::Group(ref _group) = tok else { - return Err(MacroError::Message( - "expected parenthesis, do not call this macro directly".into(), + return Err(Error::new( tok.span(), + "expected parenthesis, do not call this macro directly", )); }; let mut out = TokenStream::new(); @@ -219,10 +206,7 @@ impl BitsConstInternal { // A parenthesized expression is a single production of the grammar, // so the input must have reached the last token. if let Some(tok) = next { - return Err(MacroError::Message( - format!("unexpected token {tok}"), - tok.span(), - )); + return Err(Error::new(tok.span(), format!("unexpected token {tok}"))); } Ok(out) } diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs index c18bb4e..b525d89 100644 --- a/rust/qemu-api-macros/src/lib.rs +++ b/rust/qemu-api-macros/src/lib.rs @@ -6,83 +6,82 @@ use proc_macro::TokenStream; use quote::quote; use syn::{ parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned, token::Comma, Data, - DeriveInput, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token, Variant, + DeriveInput, Error, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token, Variant, }; - -mod utils; -use utils::MacroError; - mod bits; use bits::BitsConstInternal; +#[cfg(test)] +mod tests; + fn get_fields<'a>( input: &'a DeriveInput, msg: &str, -) -> Result<&'a Punctuated<Field, Comma>, MacroError> { +) -> Result<&'a Punctuated<Field, Comma>, Error> { let Data::Struct(ref s) = &input.data else { - return Err(MacroError::Message( - format!("Struct required for {msg}"), + return Err(Error::new( input.ident.span(), + format!("Struct required for {msg}"), )); }; let Fields::Named(ref fs) = &s.fields else { - return Err(MacroError::Message( - format!("Named fields required for {msg}"), + return Err(Error::new( input.ident.span(), + format!("Named fields required for {msg}"), )); }; Ok(&fs.named) } -fn get_unnamed_field<'a>(input: &'a DeriveInput, msg: &str) -> Result<&'a Field, MacroError> { +fn get_unnamed_field<'a>(input: &'a DeriveInput, msg: &str) -> Result<&'a Field, Error> { let Data::Struct(ref s) = &input.data else { - return Err(MacroError::Message( - format!("Struct required for {msg}"), + return Err(Error::new( input.ident.span(), + format!("Struct required for {msg}"), )); }; let Fields::Unnamed(FieldsUnnamed { ref unnamed, .. }) = &s.fields else { - return Err(MacroError::Message( - format!("Tuple struct required for {msg}"), + return Err(Error::new( s.fields.span(), + format!("Tuple struct required for {msg}"), )); }; if unnamed.len() != 1 { - return Err(MacroError::Message( - format!("A single field is required for {msg}"), + return Err(Error::new( s.fields.span(), + format!("A single field is required for {msg}"), )); } Ok(&unnamed[0]) } -fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> { +fn is_c_repr(input: &DeriveInput, msg: &str) -> Result<(), Error> { let expected = parse_quote! { #[repr(C)] }; if input.attrs.iter().any(|attr| attr == &expected) { Ok(()) } else { - Err(MacroError::Message( - format!("#[repr(C)] required for {msg}"), + Err(Error::new( input.ident.span(), + format!("#[repr(C)] required for {msg}"), )) } } -fn is_transparent_repr(input: &DeriveInput, msg: &str) -> Result<(), MacroError> { +fn is_transparent_repr(input: &DeriveInput, msg: &str) -> Result<(), Error> { let expected = parse_quote! { #[repr(transparent)] }; if input.attrs.iter().any(|attr| attr == &expected) { Ok(()) } else { - Err(MacroError::Message( - format!("#[repr(transparent)] required for {msg}"), + Err(Error::new( input.ident.span(), + format!("#[repr(transparent)] required for {msg}"), )) } } -fn derive_object_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> { +fn derive_object_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, Error> { is_c_repr(&input, "#[derive(Object)]")?; let name = &input.ident; @@ -103,12 +102,13 @@ fn derive_object_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream #[proc_macro_derive(Object)] pub fn derive_object(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as DeriveInput); - let expanded = derive_object_or_error(input).unwrap_or_else(Into::into); - TokenStream::from(expanded) + derive_object_or_error(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } -fn derive_opaque_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> { +fn derive_opaque_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, Error> { is_transparent_repr(&input, "#[derive(Wrapper)]")?; let name = &input.ident; @@ -149,13 +149,14 @@ fn derive_opaque_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream #[proc_macro_derive(Wrapper)] pub fn derive_opaque(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as DeriveInput); - let expanded = derive_opaque_or_error(input).unwrap_or_else(Into::into); - TokenStream::from(expanded) + derive_opaque_or_error(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } #[allow(non_snake_case)] -fn get_repr_uN(input: &DeriveInput, msg: &str) -> Result<Path, MacroError> { +fn get_repr_uN(input: &DeriveInput, msg: &str) -> Result<Path, Error> { let repr = input.attrs.iter().find(|attr| attr.path().is_ident("repr")); if let Some(repr) = repr { let nested = repr.parse_args_with(Punctuated::<Meta, Token![,]>::parse_terminated)?; @@ -170,23 +171,23 @@ fn get_repr_uN(input: &DeriveInput, msg: &str) -> Result<Path, MacroError> { } } - Err(MacroError::Message( - format!("#[repr(u8/u16/u32/u64) required for {msg}"), + Err(Error::new( input.ident.span(), + format!("#[repr(u8/u16/u32/u64) required for {msg}"), )) } -fn get_variants(input: &DeriveInput) -> Result<&Punctuated<Variant, Comma>, MacroError> { +fn get_variants(input: &DeriveInput) -> Result<&Punctuated<Variant, Comma>, Error> { let Data::Enum(ref e) = &input.data else { - return Err(MacroError::Message( - "Cannot derive TryInto for union or struct.".to_string(), + return Err(Error::new( input.ident.span(), + "Cannot derive TryInto for union or struct.", )); }; if let Some(v) = e.variants.iter().find(|v| v.fields != Fields::Unit) { - return Err(MacroError::Message( - "Cannot derive TryInto for enum with non-unit variants.".to_string(), + return Err(Error::new( v.fields.span(), + "Cannot derive TryInto for enum with non-unit variants.", )); } Ok(&e.variants) @@ -197,11 +198,11 @@ fn derive_tryinto_body( name: &Ident, variants: &Punctuated<Variant, Comma>, repr: &Path, -) -> Result<proc_macro2::TokenStream, MacroError> { +) -> Result<proc_macro2::TokenStream, Error> { let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect(); Ok(quote! { - #(const #discriminants: #repr = #name::#discriminants as #repr;)*; + #(const #discriminants: #repr = #name::#discriminants as #repr;)* match value { #(#discriminants => core::result::Result::Ok(#name::#discriminants),)* _ => core::result::Result::Err(value), @@ -210,7 +211,7 @@ fn derive_tryinto_body( } #[rustfmt::skip::macros(quote)] -fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> { +fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, Error> { let repr = get_repr_uN(&input, "#[derive(TryInto)]")?; let name = &input.ident; let body = derive_tryinto_body(name, get_variants(&input)?, &repr)?; @@ -229,7 +230,7 @@ fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStrea #body }) { Ok(x) => x, - Err(_) => panic!(#errmsg) + Err(_) => panic!(#errmsg), } } } @@ -247,9 +248,10 @@ fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStrea #[proc_macro_derive(TryInto)] pub fn derive_tryinto(input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as DeriveInput); - let expanded = derive_tryinto_or_error(input).unwrap_or_else(Into::into); - TokenStream::from(expanded) + derive_tryinto_or_error(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } #[proc_macro] @@ -257,6 +259,7 @@ pub fn bits_const_internal(ts: TokenStream) -> TokenStream { let ts = proc_macro2::TokenStream::from(ts); let mut it = ts.into_iter(); - let expanded = BitsConstInternal::parse(&mut it).unwrap_or_else(Into::into); - TokenStream::from(expanded) + BitsConstInternal::parse(&mut it) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } diff --git a/rust/qemu-api-macros/src/tests.rs b/rust/qemu-api-macros/src/tests.rs new file mode 100644 index 0000000..d6dcd62 --- /dev/null +++ b/rust/qemu-api-macros/src/tests.rs @@ -0,0 +1,137 @@ +// Copyright 2025, Linaro Limited +// Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org> +// SPDX-License-Identifier: GPL-2.0-or-later + +use quote::quote; + +use super::*; + +macro_rules! derive_compile_fail { + ($derive_fn:ident, $input:expr, $error_msg:expr) => {{ + let input: proc_macro2::TokenStream = $input; + let error_msg: &str = $error_msg; + let derive_fn: fn(input: syn::DeriveInput) -> Result<proc_macro2::TokenStream, syn::Error> = + $derive_fn; + + let input: syn::DeriveInput = syn::parse2(input).unwrap(); + let result = derive_fn(input); + let err = result.unwrap_err().into_compile_error(); + assert_eq!( + err.to_string(), + quote! { ::core::compile_error! { #error_msg } }.to_string() + ); + }}; +} + +macro_rules! derive_compile { + ($derive_fn:ident, $input:expr, $($expected:tt)*) => {{ + let input: proc_macro2::TokenStream = $input; + let expected: proc_macro2::TokenStream = $($expected)*; + let derive_fn: fn(input: syn::DeriveInput) -> Result<proc_macro2::TokenStream, syn::Error> = + $derive_fn; + + let input: syn::DeriveInput = syn::parse2(input).unwrap(); + let result = derive_fn(input).unwrap(); + assert_eq!(result.to_string(), expected.to_string()); + }}; +} + +#[test] +fn test_derive_object() { + derive_compile_fail!( + derive_object_or_error, + quote! { + #[derive(Object)] + struct Foo { + _unused: [u8; 0], + } + }, + "#[repr(C)] required for #[derive(Object)]" + ); + derive_compile!( + derive_object_or_error, + quote! { + #[derive(Object)] + #[repr(C)] + struct Foo { + _unused: [u8; 0], + } + }, + quote! { + ::qemu_api::assert_field_type!( + Foo, + _unused, + ::qemu_api::qom::ParentField<<Foo as ::qemu_api::qom::ObjectImpl>::ParentType> + ); + ::qemu_api::module_init! { + MODULE_INIT_QOM => unsafe { + ::qemu_api::bindings::type_register_static(&<Foo as ::qemu_api::qom::ObjectImpl>::TYPE_INFO); + } + } + } + ); +} + +#[test] +fn test_derive_tryinto() { + derive_compile_fail!( + derive_tryinto_or_error, + quote! { + #[derive(TryInto)] + struct Foo { + _unused: [u8; 0], + } + }, + "#[repr(u8/u16/u32/u64) required for #[derive(TryInto)]" + ); + derive_compile!( + derive_tryinto_or_error, + quote! { + #[derive(TryInto)] + #[repr(u8)] + enum Foo { + First = 0, + Second, + } + }, + quote! { + impl Foo { + #[allow(dead_code)] + pub const fn into_bits(self) -> u8 { + self as u8 + } + + #[allow(dead_code)] + pub const fn from_bits(value: u8) -> Self { + match ({ + const First: u8 = Foo::First as u8; + const Second: u8 = Foo::Second as u8; + match value { + First => core::result::Result::Ok(Foo::First), + Second => core::result::Result::Ok(Foo::Second), + _ => core::result::Result::Err(value), + } + }) { + Ok(x) => x, + Err(_) => panic!("invalid value for Foo"), + } + } + } + + impl core::convert::TryFrom<u8> for Foo { + type Error = u8; + + #[allow(ambiguous_associated_items)] + fn try_from(value: u8) -> Result<Self, u8> { + const First: u8 = Foo::First as u8; + const Second: u8 = Foo::Second as u8; + match value { + First => core::result::Result::Ok(Foo::First), + Second => core::result::Result::Ok(Foo::Second), + _ => core::result::Result::Err(value), + } + } + } + } + ); +} diff --git a/rust/qemu-api-macros/src/utils.rs b/rust/qemu-api-macros/src/utils.rs deleted file mode 100644 index 02c91ae..0000000 --- a/rust/qemu-api-macros/src/utils.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Procedural macro utilities. -// Author(s): Paolo Bonzini <pbonzini@redhat.com> -// SPDX-License-Identifier: GPL-2.0-or-later - -use proc_macro2::Span; -use quote::quote_spanned; - -pub enum MacroError { - Message(String, Span), - ParseError(syn::Error), -} - -impl From<syn::Error> for MacroError { - fn from(err: syn::Error) -> Self { - MacroError::ParseError(err) - } -} - -impl From<MacroError> for proc_macro2::TokenStream { - fn from(err: MacroError) -> Self { - match err { - MacroError::Message(msg, span) => quote_spanned! { span => compile_error!(#msg); }, - MacroError::ParseError(err) => err.into_compile_error(), - } - } -} diff --git a/rust/qemu-api/build.rs b/rust/qemu-api/build.rs index 7849486..29d0945 100644 --- a/rust/qemu-api/build.rs +++ b/rust/qemu-api/build.rs @@ -9,12 +9,14 @@ use std::os::windows::fs::symlink_file; use std::{env, fs::remove_file, io::Result, path::Path}; fn main() -> Result<()> { - // Placing bindings.inc.rs in the source directory is supported - // but not documented or encouraged. - let path = env::var("MESON_BUILD_ROOT") - .unwrap_or_else(|_| format!("{}/src", env!("CARGO_MANIFEST_DIR"))); + let file = if let Ok(root) = env::var("MESON_BUILD_ROOT") { + format!("{root}/rust/qemu-api/bindings.inc.rs") + } else { + // Placing bindings.inc.rs in the source directory is supported + // but not documented or encouraged. + format!("{}/src/bindings.inc.rs", env!("CARGO_MANIFEST_DIR")) + }; - let file = format!("{path}/rust/qemu-api/bindings.inc.rs"); let file = Path::new(&file); if !Path::new(&file).exists() { panic!(concat!( diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs index 057de4b..3cdad0f 100644 --- a/rust/qemu-api/src/bindings.rs +++ b/rust/qemu-api/src/bindings.rs @@ -6,6 +6,7 @@ non_camel_case_types, non_snake_case, non_upper_case_globals, + unnecessary_transmutes, unsafe_op_in_unsafe_fn, clippy::pedantic, clippy::restriction, diff --git a/rust/qemu-api/src/log.rs b/rust/qemu-api/src/log.rs index d6c3d6c..a441b8c 100644 --- a/rust/qemu-api/src/log.rs +++ b/rust/qemu-api/src/log.rs @@ -3,6 +3,13 @@ //! Bindings for QEMU's logging infrastructure +use std::{ + io::{self, Write}, + ptr::NonNull, +}; + +use crate::{bindings, errno}; + #[repr(u32)] /// Represents specific error categories within QEMU's logging system. /// @@ -11,11 +18,82 @@ pub enum Log { /// Log invalid access caused by the guest. /// Corresponds to `LOG_GUEST_ERROR` in the C implementation. - GuestError = crate::bindings::LOG_GUEST_ERROR, + GuestError = bindings::LOG_GUEST_ERROR, /// Log guest access of unimplemented functionality. /// Corresponds to `LOG_UNIMP` in the C implementation. - Unimp = crate::bindings::LOG_UNIMP, + Unimp = bindings::LOG_UNIMP, +} + +/// A RAII guard for QEMU's logging infrastructure. Creating the guard +/// locks the log file, and dropping it (letting it go out of scope) unlocks +/// the file. +/// +/// As long as the guard lives, it can be written to using [`std::io::Write`]. +/// +/// The locking is recursive, therefore owning a guard does not prevent +/// using [`log_mask_ln!()`](crate::log_mask_ln). +pub struct LogGuard(NonNull<bindings::FILE>); + +impl LogGuard { + /// Return a RAII guard that writes to QEMU's logging infrastructure. + /// The log file is locked while the guard exists, ensuring that there + /// is no tearing of the messages. + /// + /// Return `None` if the log file is closed and could not be opened. + /// Do *not* use `unwrap()` on the result; failure can be handled simply + /// by not logging anything. + /// + /// # Examples + /// + /// ``` + /// # use qemu_api::log::LogGuard; + /// # use std::io::Write; + /// if let Some(mut log) = LogGuard::new() { + /// writeln!(log, "test"); + /// } + /// ``` + pub fn new() -> Option<Self> { + let f = unsafe { bindings::qemu_log_trylock() }.cast(); + NonNull::new(f).map(Self) + } + + /// Writes a formatted string into the log, returning any error encountered. + /// + /// This method is primarily used by the + /// [`log_mask_ln!()`](crate::log_mask_ln) macro, and it is rare for it + /// to be called explicitly. It is public because it is the only way to + /// examine the error, which `log_mask_ln!()` ignores + /// + /// Unlike `log_mask_ln!()`, it does *not* append a newline at the end. + pub fn log_fmt(args: std::fmt::Arguments) -> io::Result<()> { + if let Some(mut log) = Self::new() { + log.write_fmt(args)?; + } + Ok(()) + } +} + +impl Write for LogGuard { + fn write(&mut self, bytes: &[u8]) -> io::Result<usize> { + let ret = unsafe { + bindings::rust_fwrite(bytes.as_ptr().cast(), 1, bytes.len(), self.0.as_ptr()) + }; + errno::into_io_result(ret) + } + + fn flush(&mut self) -> io::Result<()> { + // Do nothing, dropping the guard takes care of flushing + Ok(()) + } +} + +impl Drop for LogGuard { + fn drop(&mut self) { + unsafe { + bindings::qemu_log_unlock(self.0.as_ptr()); + } + } } /// A macro to log messages conditionally based on a provided mask. @@ -24,6 +102,8 @@ pub enum Log { /// log level and, if so, formats and logs the message. It is the Rust /// counterpart of the `qemu_log_mask()` macro in the C implementation. /// +/// Errors from writing to the log are ignored. +/// /// # Parameters /// /// - `$mask`: A log level mask. This should be a variant of the `Log` enum. @@ -62,12 +142,8 @@ macro_rules! log_mask_ln { if unsafe { (::qemu_api::bindings::qemu_loglevel & ($mask as std::os::raw::c_int)) != 0 } { - let formatted_string = format!("{}\n", format_args!($fmt $($args)*)); - let c_string = std::ffi::CString::new(formatted_string).unwrap(); - - unsafe { - ::qemu_api::bindings::qemu_log(c_string.as_ptr()); - } + _ = ::qemu_api::log::LogGuard::log_fmt( + format_args!("{}\n", format_args!($fmt $($args)*))); } }}; } diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index bb3e34d..e850468 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -130,6 +130,7 @@ meson_options_help() { printf "%s\n" ' hv-balloon hv-balloon driver (requires Glib 2.68+ GTree API)' printf "%s\n" ' hvf HVF acceleration support' printf "%s\n" ' iconv Font glyph conversion support' + printf "%s\n" ' igvm IGVM file support' printf "%s\n" ' jack JACK sound support' printf "%s\n" ' keyring Linux keyring support' printf "%s\n" ' kvm KVM acceleration support' @@ -347,6 +348,8 @@ _meson_option_parse() { --iasl=*) quote_sh "-Diasl=$2" ;; --enable-iconv) printf "%s" -Diconv=enabled ;; --disable-iconv) printf "%s" -Diconv=disabled ;; + --enable-igvm) printf "%s" -Digvm=enabled ;; + --disable-igvm) printf "%s" -Digvm=disabled ;; --includedir=*) quote_sh "-Dincludedir=$2" ;; --enable-install-blobs) printf "%s" -Dinstall_blobs=true ;; --disable-install-blobs) printf "%s" -Dinstall_blobs=false ;; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0d35e95..da7d8dc 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -28,6 +28,7 @@ #include "system/hvf.h" #include "hvf/hvf-i386.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "sev.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -44,6 +45,7 @@ #include "hw/boards.h" #include "hw/i386/sgx-epc.h" #endif +#include "system/qtest.h" #include "tcg/tcg-cpu.h" #include "disas/capstone.h" @@ -66,6 +68,7 @@ struct CPUID2CacheDescriptorInfo { /* * Known CPUID 2 cache descriptors. + * TLB, prefetch and sectored cache related descriptors are not included. * From Intel SDM Volume 2A, CPUID instruction */ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { @@ -87,18 +90,29 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 2, .line_size = 64, }, [0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, .associativity = 8, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x22, 0x23 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x22, 0x23 are not included + */ [0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 16, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x25, 0x20 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x25, 0x29 are not included + */ [0x2C] = { .level = 1, .type = DATA_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, [0x30] = { .level = 1, .type = INSTRUCTION_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, + /* + * Newer Intel CPUs (having the cores without L3, e.g., Intel MTL, ARL) + * use CPUID 0x4 leaf to describe cache topology, by encoding CPUID 0x2 + * leaf with 0xFF. For older CPUs (without 0x4 leaf), it's also valid + * to just ignore L3's code if there's no L3. + * + * This already covers all the cases in QEMU, so code 0x40 is not + * included. + */ [0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, .associativity = 4, .line_size = 32, }, [0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, @@ -115,7 +129,18 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 8, .line_size = 64, }, [0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB, .associativity = 12, .line_size = 64, }, - /* Descriptor 0x49 depends on CPU family/model, so it is not included */ + /* + * Descriptor 0x49 has 2 cases: + * - 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size. + * - 3rd-level cache: 4MB, 16-way set associative, 64-byte line size + * (Intel Xeon processor MP, Family 0FH, Model 06H). + * + * When it represents L3, then it depends on CPU family/model. Fortunately, + * the legacy cache/CPU models don't have such special L3. So, just add it + * to represent the general L2 case. + */ + [0x49] = { .level = 2, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 16, .line_size = 64, }, [0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, .associativity = 12, .line_size = 64, }, [0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, @@ -136,9 +161,10 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 4, .line_size = 64, }, [0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 4, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. + */ [0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, .associativity = 8, .line_size = 64, }, [0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, @@ -199,7 +225,7 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { * Return a CPUID 2 cache descriptor for a given cache. * If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE */ -static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) +static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache, bool *unmacthed) { int i; @@ -216,9 +242,46 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) } } + *unmacthed |= true; return CACHE_DESCRIPTOR_UNAVAILABLE; } +static const CPUCaches legacy_intel_cpuid2_cache_info; + +/* Encode cache info for CPUID[2] */ +static void encode_cache_cpuid2(X86CPU *cpu, + const CPUCaches *caches, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + CPUX86State *env = &cpu->env; + int l1d, l1i, l2, l3; + bool unmatched = false; + + *eax = 1; /* Number of CPUID[EAX=2] calls required */ + *ebx = *ecx = *edx = 0; + + l1d = cpuid2_cache_descriptor(caches->l1d_cache, &unmatched); + l1i = cpuid2_cache_descriptor(caches->l1i_cache, &unmatched); + l2 = cpuid2_cache_descriptor(caches->l2_cache, &unmatched); + l3 = cpuid2_cache_descriptor(caches->l3_cache, &unmatched); + + if (!cpu->consistent_cache || + (env->cpuid_min_level < 0x4 && !unmatched)) { + /* + * Though SDM defines code 0x40 for cases with no L2 or L3. It's + * also valid to just ignore l3's code if there's no l2. + */ + if (cpu->enable_l3_cache) { + *ecx = l3; + } + *edx = (l1d << 16) | (l1i << 8) | l2; + } else { + *ecx = 0; + *edx = CACHE_DESCRIPTOR_UNAVAILABLE; + } +} + /* CPUID Leaf 4 constants: */ /* EAX: */ @@ -286,11 +349,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); + /* + * The following fields have bit-width limitations, so consider the + * maximum values to avoid overflow: + * Bits 25-14: maximum 4095. + * Bits 31-26: maximum 63. + */ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - (max_core_ids_in_package(topo_info) << 26) | - (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); + (MIN(max_core_ids_in_package(topo_info), 63) << 26) | + (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -430,7 +499,6 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) { assert(cache->size % 1024 == 0); - assert(cache->lines_per_tag > 0); assert(cache->associativity > 0); assert(cache->line_size > 0); return ((cache->size / 1024) << 24) | (cache->associativity << 16) | @@ -439,8 +507,8 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) #define ASSOC_FULL 0xFF -/* AMD associativity encoding used on CPUID Leaf 0x80000006: */ -#define AMD_ENC_ASSOC(a) (a <= 1 ? a : \ +/* x86 associativity encoding used on CPUID Leaf 0x80000006: */ +#define X86_ENC_ASSOC(a) (a <= 1 ? a : \ a == 2 ? 0x2 : \ a == 4 ? 0x4 : \ a == 8 ? 0x6 : \ @@ -463,19 +531,18 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, { assert(l2->size % 1024 == 0); assert(l2->associativity > 0); - assert(l2->lines_per_tag > 0); assert(l2->line_size > 0); *ecx = ((l2->size / 1024) << 16) | - (AMD_ENC_ASSOC(l2->associativity) << 12) | + (X86_ENC_ASSOC(l2->associativity) << 12) | (l2->lines_per_tag << 8) | (l2->line_size); + /* For Intel, EDX is reserved. */ if (l3) { assert(l3->size % (512 * 1024) == 0); assert(l3->associativity > 0); - assert(l3->lines_per_tag > 0); assert(l3->line_size > 0); *edx = ((l3->size / (512 * 1024)) << 18) | - (AMD_ENC_ASSOC(l3->associativity) << 12) | + (X86_ENC_ASSOC(l3->associativity) << 12) | (l3->lines_per_tag << 8) | (l3->line_size); } else { *edx = 0; @@ -493,7 +560,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; + /* Bits 25:14 - NumSharingCache: maximum 4095. */ + *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -573,117 +641,172 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * These are legacy cache values. If there is a need to change any * of these values please use builtin_x86_defs */ - -/* L1 data cache: */ -static CPUCacheInfo legacy_l1d_cache = { - .type = DATA_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1d_cache_amd = { - .type = DATA_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* L1 instruction cache: */ -static CPUCacheInfo legacy_l1i_cache = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1i_cache_amd = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* Level 2 unified cache: */ -static CPUCacheInfo legacy_l2_cache = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 4 * MiB, - .self_init = 1, - .line_size = 64, - .associativity = 16, - .sets = 4096, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -static CPUCacheInfo legacy_l2_cache_cpuid2 = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 2 * MiB, - .line_size = 64, - .associativity = 8, - .share_level = CPU_TOPOLOGY_LEVEL_INVALID, +static const CPUCaches legacy_amd_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .lines_per_tag = 1, + .associativity = 16, + .sets = 512, + .partitions = 1, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; - -/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l2_cache_amd = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 512 * KiB, - .line_size = 64, - .lines_per_tag = 1, - .associativity = 16, - .sets = 512, - .partitions = 1, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, +/* + * Only used for the CPU models with CPUID level < 4. + * These CPUs (CPUID level < 4) only use CPUID leaf 2 to present + * cache information. + * + * Note: This cache model is just a default one, and is not + * guaranteed to match real hardwares. + */ +static const CPUCaches legacy_intel_cpuid2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 2 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; -/* Level 3 unified cache: */ -static CPUCacheInfo legacy_l3_cache = { - .type = UNIFIED_CACHE, - .level = 3, - .size = 16 * MiB, - .line_size = 64, - .associativity = 16, - .sets = 16384, - .partitions = 1, - .lines_per_tag = 1, - .self_init = true, - .inclusive = true, - .complex_indexing = true, - .share_level = CPU_TOPOLOGY_LEVEL_DIE, +static const CPUCaches legacy_intel_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 4 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; /* TLB definitions: */ @@ -1943,7 +2066,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted) static inline bool accel_uses_host_cpuid(void) { - return kvm_enabled() || hvf_enabled(); + return !tcg_enabled() && !qtest_enabled(); } static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) @@ -2764,6 +2887,378 @@ static const CPUCaches epyc_turin_cache_info = { .no_invd_sharing = true, .complex_indexing = false, .share_level = CPU_TOPOLOGY_LEVEL_DIE, + } +}; + +static const CPUCaches xeon_spr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 15, + + /* CPUID 0x4.0x3.ECX */ + .sets = 65536, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 60 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_gnr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 294912, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 288 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_srf_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 128, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 4096, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 4 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_MODULE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x3.ECX */ + .sets = 147456, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 108 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches yongfeng_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.ECX */ + .lines_per_tag = 1, + .size = 32 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.EDX */ + .lines_per_tag = 1, + .size = 64 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x2.ECX */ + .sets = 512, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = true, + .complex_indexing = false, + + /* CPUID 0x80000006.ECX */ + .size = 256 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 8192, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = true, + .inclusive = true, + .complex_indexing = false, + + .size = 8 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, }, }; @@ -3019,6 +3514,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { I486_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium", @@ -3031,6 +3527,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium2", @@ -3043,6 +3540,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM2_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium3", @@ -3055,6 +3553,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM3_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "athlon", @@ -4587,6 +5086,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .note = "with spr-sp cache model and 0x1f leaf", + .cache_info = &xeon_spr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, @@ -4740,6 +5248,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with gnr-sp cache model and 0x1f leaf", + .cache_info = &xeon_gnr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -4885,6 +5402,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with srf-sp cache model and 0x1f leaf", + .cache_info = &xeon_srf_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -6027,6 +6553,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with the cache model and 0x1f leaf", + .cache_info = &yongfeng_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, @@ -6187,6 +6722,7 @@ static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) xcc->ordering = 9; + xcc->max_features = true; xcc->model_description = "Enables all features supported by the accelerator in the current host"; @@ -6197,22 +6733,21 @@ static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); - - /* We can't fill the features array here because we don't know yet if - * "migratable" is true or false. - */ - cpu->max_features = true; - object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); + CPUX86State *env = &cpu->env; /* - * these defaults are used for TCG and all other accelerators - * besides KVM and HVF, which overwrite these values + * these defaults are used for TCG, other accelerators have overwritten + * these values */ - object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, - &error_abort); - object_property_set_str(OBJECT(cpu), "model-id", - "QEMU TCG CPU version " QEMU_HW_VERSION, - &error_abort); + if (!env->cpuid_vendor1) { + object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, + &error_abort); + } + if (!env->cpuid_model[0]) { + object_property_set_str(OBJECT(cpu), "model-id", + "QEMU TCG CPU version " QEMU_HW_VERSION, + &error_abort); + } } static const TypeInfo max_x86_cpu_type_info = { @@ -6324,10 +6859,7 @@ static void x86_cpuid_version_get_family(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 8) & 0xf; - if (value == 0xf) { - value += (env->cpuid_version >> 20) & 0xff; - } + value = x86_cpu_family(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -6365,8 +6897,7 @@ static void x86_cpuid_version_get_model(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 4) & 0xf; - value |= ((env->cpuid_version >> 16) & 0xf) << 4; + value = x86_cpu_model(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -6400,7 +6931,7 @@ static void x86_cpuid_version_get_stepping(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = env->cpuid_version & 0xf; + value = x86_cpu_stepping(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -6468,11 +6999,11 @@ static char *x86_cpuid_get_model_id(Object *obj, Error **errp) char *value; int i; - value = g_malloc(48 + 1); - for (i = 0; i < 48; i++) { + value = g_malloc(CPUID_MODEL_ID_SZ + 1); + for (i = 0; i < CPUID_MODEL_ID_SZ; i++) { value[i] = env->cpuid_model[i >> 2] >> (8 * (i & 3)); } - value[48] = '\0'; + value[CPUID_MODEL_ID_SZ] = '\0'; return value; } @@ -6487,7 +7018,7 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id, model_id = ""; } len = strlen(model_id); - memset(env->cpuid_model, 0, 48); + memset(env->cpuid_model, 0, CPUID_MODEL_ID_SZ); for (i = 0; i < 48; i++) { if (i >= len) { c = '\0'; @@ -7347,11 +7878,35 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *edx = env->features[FEAT_1_EDX]; if (threads_per_pkg > 1) { - *ebx |= threads_per_pkg << 16; + uint32_t num; + + /* + * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor + * count, but Intel needs maximum number of addressable IDs for + * logical processors per package. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + num = 1 << apicid_pkg_offset(topo_info); + } else { + num = threads_per_pkg; + } + + /* Fixup overflow: max value for bits 23-16 is 255. */ + *ebx |= MIN(num, 255) << 16; } break; - case 2: - /* cache info: needed for Pentium Pro compatibility */ + case 2: { /* cache info: needed for Pentium Pro compatibility */ + const CPUCaches *caches; + + if (env->enable_legacy_cpuid2_cache) { + caches = &legacy_intel_cpuid2_cache_info; + } else if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; @@ -7359,18 +7914,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = *ebx = *ecx = *edx = 0; break; } - *eax = 1; /* Number of CPUID[EAX=2] calls required */ - *ebx = 0; - if (!cpu->enable_l3_cache) { - *ecx = 0; + encode_cache_cpuid2(cpu, caches, eax, ebx, ecx, edx); + break; + } + case 4: { + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; } else { - *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache); + caches = &env->cache_info; } - *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache)); - break; - case 4: + /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); @@ -7382,13 +7937,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(topo_info) << 26; + *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; /* Share the cache at package level. */ - *eax |= max_thread_ids_for_cache(topo_info, - CPU_TOPOLOGY_LEVEL_SOCKET) << 14; + *eax |= MIN(max_thread_ids_for_cache(topo_info, + CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { @@ -7398,30 +7953,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - topo_info, + encode_cache_cpuid4(caches->l1d_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 1: /* L1 icache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - topo_info, + encode_cache_cpuid4(caches->l1i_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 2: /* L2 cache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - topo_info, + encode_cache_cpuid4(caches->l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ if (cpu->enable_l3_cache) { - encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - topo_info, + encode_cache_cpuid4(caches->l3_cache, topo_info, eax, ebx, ecx, edx); break; } @@ -7432,6 +7983,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } break; + } case 5: /* MONITOR/MWAIT Leaf */ *eax = cpu->mwait.eax; /* Smallest monitor-line size in bytes */ @@ -7522,21 +8074,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; - case 0x1C: - if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { - x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); - *edx = 0; - } - break; - case 0x1F: - /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_cpuid_0x1f(cpu)) { - *eax = *ebx = *ecx = *edx = 0; - break; - } - - encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); - break; case 0xD: { /* Processor Extended State */ *eax = 0; @@ -7677,6 +8214,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1C: + if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; case 0x1D: { /* AMX TILE, for now hardcoded for Sapphire Rapids*/ *eax = 0; @@ -7714,6 +8257,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1F: + /* V2 Extended Topology Enumeration Leaf */ + if (!x86_has_cpuid_0x1f(cpu)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); + break; case 0x24: { *eax = 0; *ebx = 0; @@ -7750,9 +8302,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000000: *eax = env->cpuid_xlevel; - *ebx = env->cpuid_vendor1; - *edx = env->cpuid_vendor2; - *ecx = env->cpuid_vendor3; + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ebx = *ecx = *edx = 0; + } else { + *ebx = env->cpuid_vendor1; + *edx = env->cpuid_vendor2; + *ecx = env->cpuid_vendor3; + } break; case 0x80000001: *eax = env->cpuid_version; @@ -7760,7 +8318,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->features[FEAT_8000_0001_ECX]; *edx = env->features[FEAT_8000_0001_EDX]; - if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + if (tcg_enabled() && IS_INTEL_CPU(env) && !(env->hflags & HF_LMA_MASK)) { *edx &= ~CPUID_EXT2_SYSCALL; } @@ -7773,41 +8331,78 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2]; *edx = env->cpuid_model[(index - 0x80000002) * 4 + 3]; break; - case 0x80000005: - /* cache info (L1 cache) */ + case 0x80000005: { + /* cache info (L1 cache/TLB Associativity Field) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } + + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); - *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); + *ecx = encode_cache_cpuid80000005(caches->l1d_cache); + *edx = encode_cache_cpuid80000005(caches->l1i_cache); break; - case 0x80000006: - /* cache info (L2 cache) */ + } + case 0x80000006: { /* cache info (L2 cache/TLB/L3 cache) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *eax = *ebx = 0; + encode_cache_cpuid80000006(caches->l2_cache, + NULL, ecx, edx); + break; + } + + *eax = (X86_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | + *ebx = (X86_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); - encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, + + encode_cache_cpuid80000006(caches->l2_cache, cpu->enable_l3_cache ? - env->cache_info_amd.l3_cache : NULL, + caches->l3_cache : NULL, ecx, edx); break; + } case 0x80000007: *eax = 0; - *ebx = env->features[FEAT_8000_0007_EBX]; + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *ebx = 0; + } else { + *ebx = env->features[FEAT_8000_0007_EBX]; + } *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; @@ -7820,6 +8415,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; + + /* + * Don't emulate Bits [7:0] & Bits [15:12] for Intel/Zhaoxin, since + * they're using 0x1f leaf. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ecx = *edx = 0; + break; + } + if (threads_per_pkg > 1) { /* * Bits 15:12 is "The number of bits in the initial @@ -7855,19 +8461,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, + encode_cache_cpuid8000001d(env->cache_info.l1d_cache, topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, + encode_cache_cpuid8000001d(env->cache_info.l1i_cache, topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, + encode_cache_cpuid8000001d(env->cache_info.l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, + encode_cache_cpuid8000001d(env->cache_info.l3_cache, topo_info, eax, ebx, ecx, edx); break; default: /* end of info */ @@ -7888,6 +8494,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0x8000001F: + *eax = *ebx = *ecx = *edx = 0; + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + case 0x80000021: + *eax = *ebx = *ecx = *edx = 0; + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = env->features[FEAT_8000_0021_EBX]; + break; case 0x80000022: *eax = *ebx = *ecx = *edx = 0; /* AMD Extended Performance Monitoring and Debug */ @@ -7920,21 +8541,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; - case 0x8000001F: - *eax = *ebx = *ecx = *edx = 0; - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; - *eax |= sev_snp_enabled() ? 0x10 : 0; - *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - case 0x80000021: - *eax = *ebx = *ecx = *edx = 0; - *eax = env->features[FEAT_8000_0021_EAX]; - *ebx = env->features[FEAT_8000_0021_EBX]; - break; default: /* reserved values: zero */ *eax = 0; @@ -8154,7 +8760,7 @@ static void mce_init(X86CPU *cpu) CPUX86State *cenv = &cpu->env; unsigned int bank; - if (((cenv->cpuid_version >> 8) & 0xf) >= 6 + if (x86_cpu_family(cenv->cpuid_version) >= 6 && (cenv->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF | @@ -8282,6 +8888,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) */ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; FeatureWord w; int i; @@ -8301,12 +8908,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - /*TODO: Now cpu->max_features doesn't overwrite features + /* TODO: Now xcc->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->max_features) { + if (xcc->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { /* Override only features that weren't set explicitly * by the user. @@ -8338,7 +8945,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - if (!cpu->enable_pmu) { + /* PDCM is fixed1 bit for TDX */ + if (!cpu->enable_pmu && !is_tdx_vm()) { mark_unavailable_features(cpu, FEAT_1_ECX, env->user_features[FEAT_1_ECX] & CPUID_EXT_PDCM, "This feature is not available due to PMU being disabled"); @@ -8574,46 +9182,34 @@ static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu, level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1d_cache->share_level = level; - env->cache_info_amd.l1d_cache->share_level = level; + env->cache_info.l1d_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_cpuid4.l1d_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_amd.l1d_cache->share_level); + env->cache_info.l1d_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1i_cache->share_level = level; - env->cache_info_amd.l1i_cache->share_level = level; + env->cache_info.l1i_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_cpuid4.l1i_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_amd.l1i_cache->share_level); + env->cache_info.l1i_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l2_cache->share_level = level; - env->cache_info_amd.l2_cache->share_level = level; + env->cache_info.l2_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_cpuid4.l2_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_amd.l2_cache->share_level); + env->cache_info.l2_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l3_cache->share_level = level; - env->cache_info_amd.l3_cache->share_level = level; + env->cache_info.l3_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_cpuid4.l3_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_amd.l3_cache->share_level); + env->cache_info.l3_cache->share_level); } if (!machine_check_smp_cache(ms, errp)) { @@ -8637,6 +9233,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) tcg_cflags_set(cs, CF_PCREL); #endif + /* + * x-vendor-cpuid-only and v2 should be initernal only. But + * QEMU doesn't support "internal" property. + */ + if (!cpu->vendor_cpuid_only && cpu->vendor_cpuid_only_v2) { + error_setg(errp, "x-vendor-cpuid-only-v2 property " + "depends on x-vendor-cpuid-only"); + return; + } + if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; @@ -8840,24 +9446,22 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) "CPU model '%s' doesn't support legacy-cache=off", name); return; } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *cache_info; + env->cache_info = *cache_info; } else { /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2; - env->cache_info_cpuid2.l3_cache = &legacy_l3_cache; + if (!cpu->consistent_cache) { + env->enable_legacy_cpuid2_cache = true; + } - env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid4.l2_cache = &legacy_l2_cache; - env->cache_info_cpuid4.l3_cache = &legacy_l3_cache; + if (!cpu->vendor_cpuid_only_v2) { + env->enable_legacy_vendor_cache = true; + } - env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd; - env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd; - env->cache_info_amd.l2_cache = &legacy_l2_cache_amd; - env->cache_info_amd.l3_cache = &legacy_l3_cache; + if (IS_AMD_CPU(env)) { + env->cache_info = legacy_amd_cache_info; + } else { + env->cache_info = legacy_intel_cache_info; + } } #ifndef CONFIG_USER_ONLY @@ -9036,8 +9640,6 @@ static void x86_cpu_post_initfn(Object *obj) } } - accel_cpu_instance_init(CPU(obj)); - #ifndef CONFIG_USER_ONLY if (current_machine && current_machine->cgs) { x86_confidential_guest_cpu_instance_init( @@ -9112,6 +9714,8 @@ static void x86_cpu_initfn(Object *obj) if (xcc->model) { x86_cpu_load_model(cpu, xcc->model); } + + accel_cpu_instance_init(CPU(obj)); } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -9358,6 +9962,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only-v2", X86CPU, vendor_cpuid_only_v2, true), DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), @@ -9372,6 +9977,7 @@ static const Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("x-consistent-cache", X86CPU, consistent_cache, true), DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false), DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), @@ -9393,6 +9999,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), + DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false), }; #ifndef CONFIG_USER_ONLY diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 51e1013..f977fc4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1159,7 +1159,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* PMM enabled */ #define CPUID_C000_0001_EDX_PMM_EN (1U << 13) -#define CPUID_VENDOR_SZ 12 +#define CPUID_VENDOR_SZ 12 +#define CPUID_MODEL_ID_SZ 48 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ @@ -2072,11 +2073,14 @@ typedef struct CPUArchState { /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; - /* Cache information for CPUID. When legacy-cache=on, the cache data + /* + * Cache information for CPUID. When legacy-cache=on, the cache data * on each CPUID leaf will be different, because we keep compatibility * with old QEMU versions. */ - CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd; + CPUCaches cache_info; + bool enable_legacy_cpuid2_cache; + bool enable_legacy_vendor_cache; /* MTRRs */ uint64_t mtrr_fixed[11]; @@ -2196,7 +2200,6 @@ struct ArchCPU { bool expose_tcg; bool migratable; bool migrate_smi_count; - bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to @@ -2259,6 +2262,13 @@ struct ArchCPU { */ bool legacy_cache; + /* + * Compatibility bits for old machine types. + * If true, use the same cache model in CPUID leaf 0x2 + * and 0x4. + */ + bool consistent_cache; + /* Compatibility bits for old machine types. * If true decode the CPUID Function 0x8000001E_ECX to support multiple * nodes per processor @@ -2274,9 +2284,18 @@ struct ArchCPU { /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; - /* Only advertise CPUID leaves defined by the vendor */ + /* + * Compatibility bits for old machine types (PC machine v6.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ bool vendor_cpuid_only; + /* + * Compatibility bits for old machine types (PC machine v10.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ + bool vendor_cpuid_only_v2; + /* Only advertise TOPOEXT features that AMD defines */ bool amd_topoext_features_only; @@ -2349,6 +2368,7 @@ struct X86CPUClass { */ const X86CPUModel *model; + bool max_features; /* Enable all supported features automatically */ bool host_cpuid_required; int ordering; bool migration_safe; @@ -2417,7 +2437,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, SegmentCache *sc; unsigned int new_hflags; - sc = &env->segs[seg_reg]; + if (seg_reg == R_LDTR) { + sc = &env->ldt; + } else if (seg_reg == R_TR) { + sc = &env->tr; + } else { + sc = &env->segs[seg_reg]; + } + sc->selector = selector; sc->base = base; sc->limit = limit; @@ -2670,6 +2697,36 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env) } } +static inline uint32_t x86_cpu_family(uint32_t eax) +{ + uint32_t family = (eax >> 8) & 0xf; + + if (family == 0xf) { + family += (eax >> 20) & 0xff; + } + + return family; +} + +static inline uint32_t x86_cpu_model(uint32_t eax) +{ + uint32_t family, model; + + family = x86_cpu_family(eax); + model = (eax >> 4) & 0xf; + + if (family >= 0x6) { + model += ((eax >> 16) & 0xf) << 4; + } + + return model; +} + +static inline uint32_t x86_cpu_stepping(uint32_t eax) +{ + return eax & 0xf; +} + static inline bool cpu_has_vmx(CPUX86State *env) { return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 7512567..d5e2bb5 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -80,7 +80,6 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp) return true; } -#define CPUID_MODEL_ID_SZ 48 /** * cpu_x86_fill_model_id: * Get CPUID model ID string from host CPU. @@ -118,13 +117,13 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping) host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); if (family) { - *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + *family = x86_cpu_family(eax); } if (model) { - *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + *model = x86_cpu_model(eax); } if (stepping) { - *stepping = eax & 0x0F; + *stepping = x86_cpu_stepping(eax); } } @@ -132,27 +131,27 @@ void host_cpu_instance_init(X86CPU *cpu) { X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); - if (xcc->model) { - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); - } -} - -void host_cpu_max_instance_init(X86CPU *cpu) -{ char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; int family, model, stepping; - /* Use max host physical address bits if -cpu max option is applied */ - object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); - + /* + * setting vendor applies to both max/host and builtin_x86_defs CPU. + * FIXME: this probably should warn or should be skipped if vendors do + * not match, because family numbers are incompatible between Intel and AMD. + */ host_cpu_vendor_fms(vendor, &family, &model, &stepping); + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + + if (!xcc->max_features) { + return; + } + host_cpu_fill_model_id(model_id); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + /* Use max host physical address bits if -cpu max option is applied */ + object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); + object_property_set_int(OBJECT(cpu), "family", family, &error_abort); object_property_set_int(OBJECT(cpu), "model", model, &error_abort); object_property_set_int(OBJECT(cpu), "stepping", stepping, @@ -161,6 +160,15 @@ void host_cpu_max_instance_init(X86CPU *cpu) &error_abort); } +bool is_host_cpu_intel(void) +{ + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpu_vendor_fms(vendor, NULL, NULL, NULL); + + return g_str_equal(vendor, CPUID_VENDOR_INTEL); +} + static void host_cpu_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index b97ec01..10df4b3 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -17,4 +17,5 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp); void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping); +bool is_host_cpu_intel(void); #endif /* HOST_CPU_H */ diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c index dfdda70..94ee096 100644 --- a/target/i386/hvf/hvf-cpu.c +++ b/target/i386/hvf/hvf-cpu.c @@ -21,8 +21,6 @@ static void hvf_cpu_max_instance_init(X86CPU *cpu) { CPUX86State *env = &cpu->env; - host_cpu_max_instance_init(cpu); - env->cpuid_min_level = hvf_get_supported_cpuid(0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -61,13 +59,14 @@ static void hvf_cpu_xsave_init(void) static void hvf_cpu_instance_init(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); host_cpu_instance_init(cpu); /* Special cases not set in the X86CPUDefinition structs: */ /* TODO: in-kernel irqchip for hvf */ - if (cpu->max_features) { + if (xcc->max_features) { hvf_cpu_max_instance_init(cpu); } diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 16bde4d..89a7953 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -41,6 +41,7 @@ static void kvm_set_guest_phys_bits(CPUState *cs) static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; bool ret; @@ -63,7 +64,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * check/update ucode_rev, phys_bits, guest_phys_bits, mwait * cpu_common_realizefn() (via xcc->parent_realize) */ - if (cpu->max_features) { + if (xcc->max_features) { if (enable_cpu_pm) { if (kvm_has_waitpkg()) { env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG; @@ -72,7 +73,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, &cpu->mwait.ecx, &cpu->mwait.edx); - } + } } if (cpu->ucode_rev == 0) { cpu->ucode_rev = @@ -108,7 +109,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) CPUX86State *env = &cpu->env; KVMState *s = kvm_state; - host_cpu_max_instance_init(cpu); + object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort); @@ -216,7 +217,7 @@ static void kvm_cpu_instance_init(CPUState *cs) x86_cpu_apply_props(cpu, kvm_default_props); } - if (cpu->max_features) { + if (xcc->max_features) { kvm_cpu_max_instance_init(cpu); } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 234878c..e8c8be0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2259,7 +2259,7 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; - if (((env->cpuid_version >> 8)&0xF) >= 6 + if (x86_cpu_family(env->cpuid_version) >= 6 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { uint64_t mcg_cap, unsupported_caps; @@ -6182,6 +6182,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case TDVMCALL_GET_TD_VM_CALL_INFO: tdx_handle_get_tdvmcall_info(cpu, run); break; + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + tdx_handle_setup_event_notify_interrupt(cpu, run); + break; } ret = 0; break; diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c index f59715f..dee8334 100644 --- a/target/i386/kvm/tdx-quote-generator.c +++ b/target/i386/kvm/tdx-quote-generator.c @@ -75,7 +75,9 @@ static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) { timer_del(&task->timer); - g_source_remove(task->watch); + if (task->watch) { + g_source_remove(task->watch); + } qio_channel_close(QIO_CHANNEL(task->sioc), NULL); object_unref(OBJECT(task->sioc)); diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c index 76fee49..1f0e108 100644 --- a/target/i386/kvm/tdx-stub.c +++ b/target/i386/kvm/tdx-stub.c @@ -26,3 +26,7 @@ void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) { } + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index e809e4b..7d69d6d 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -28,10 +28,13 @@ #include "cpu.h" #include "cpu-internal.h" #include "host-cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" #include "hw/i386/e820_memory_layout.h" #include "hw/i386/tdvf.h" #include "hw/i386/x86.h" #include "hw/i386/tdvf-hob.h" +#include "hw/pci/msi.h" #include "kvm_i386.h" #include "tdx.h" #include "tdx-quote-generator.h" @@ -1123,6 +1126,28 @@ int tdx_parse_tdvf(void *flash_ptr, int size) return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); } +static void tdx_inject_interrupt(uint32_t apicid, uint32_t vector) +{ + int ret; + + if (vector < 32 || vector > 255) { + return; + } + + MSIMessage msg = { + .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) | + (((uint64_t)apicid & 0xffffff00) << 32), + .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT), + }; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + /* In this case, no better way to tell it to guest. Log it. */ + error_report("TDX: injection interrupt %d failed, interrupt lost (%s).", + vector, strerror(-ret)); + } +} + static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) { TdxGuest *tdx = task->opaque; @@ -1154,6 +1179,9 @@ static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) error_report("TDX: get-quote: failed to update GetQuote header."); } + tdx_inject_interrupt(tdx_guest->event_notify_apicid, + tdx_guest->event_notify_vector); + g_free(task->send_data); g_free(task->receive_buf); g_free(task); @@ -1256,20 +1284,45 @@ out_free: g_free(task); } +#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT) +#define SUPPORTED_TDVMCALLINFO_1_R12 (0) + void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) { if (run->tdx.get_tdvmcall_info.leaf != 1) { - return; + return; } - run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE; - run->tdx.get_tdvmcall_info.r12 = 0; + run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 & + SUPPORTED_TDVMCALLINFO_1_R11) | + tdx_caps->kernel_tdvmcallinfo_1_r11; + run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 & + SUPPORTED_TDVMCALLINFO_1_R12) | + tdx_caps->kernel_tdvmcallinfo_1_r12; run->tdx.get_tdvmcall_info.r13 = 0; run->tdx.get_tdvmcall_info.r14 = 0; + + run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS; +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t vector = run->tdx.setup_event_notify.vector; + + if (vector >= 32 && vector < 256) { + qemu_mutex_lock(&tdx_guest->lock); + tdx_guest->event_notify_vector = vector; + tdx_guest->event_notify_apicid = cpu->apic_id; + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS; + } else { + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND; + } } static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, - char *message, uint64_t gpa) + char *message, bool has_gpa, + uint64_t gpa) { GuestPanicInformation *panic_info; @@ -1278,6 +1331,7 @@ static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, panic_info->u.tdx.error_code = (uint32_t) error_code; panic_info->u.tdx.message = message; panic_info->u.tdx.gpa = gpa; + panic_info->u.tdx.has_gpa = has_gpa; qemu_system_guest_panicked(panic_info); } @@ -1297,6 +1351,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) char *message = NULL; uint64_t *tmp; uint64_t gpa = -1ull; + bool has_gpa = false; if (error_code & 0xffff) { error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, @@ -1329,9 +1384,10 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { gpa = run->system_event.data[R_R13]; + has_gpa = true; } - tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); + tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa); return -1; } @@ -1468,6 +1524,9 @@ static void tdx_guest_init(Object *obj) NULL, NULL); qemu_mutex_init(&tdx->lock); + + tdx->event_notify_vector = -1; + tdx->event_notify_apicid = -1; } static void tdx_guest_finalize(Object *obj) diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h index 35a09c1..1c38faf 100644 --- a/target/i386/kvm/tdx.h +++ b/target/i386/kvm/tdx.h @@ -25,6 +25,7 @@ typedef struct TdxGuestClass { #define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 #define TDVMCALL_GET_QUOTE 0x10002 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004 #define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL #define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL @@ -32,7 +33,7 @@ typedef struct TdxGuestClass { #define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL #define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL -#define TDG_VP_VMCALL_SUBFUNC_GET_QUOTE 0x0000000000000001ULL +#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1) enum TdxRamType { TDX_RAM_UNACCEPTED, @@ -66,6 +67,9 @@ typedef struct TdxGuest { /* GetQuote */ SocketAddress *qg_sock_addr; int num; + + uint32_t event_notify_vector; + uint32_t event_notify_apicid; } TdxGuest; #ifdef CONFIG_TDX @@ -80,5 +84,6 @@ int tdx_parse_tdvf(void *flash_ptr, int size); int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run); #endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c index d6aad52..58ce3df 100644 --- a/target/i386/kvm/vmsr_energy.c +++ b/target/i386/kvm/vmsr_energy.c @@ -27,15 +27,6 @@ char *vmsr_compute_default_paths(void) return g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL); } -bool is_host_cpu_intel(void) -{ - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - - return g_str_equal(vendor, CPUID_VENDOR_INTEL); -} - int is_rapl_enabled(void) { const char *path = "/sys/class/powercap/intel-rapl/enabled"; diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h index 16cc1f4..151bcbd 100644 --- a/target/i386/kvm/vmsr_energy.h +++ b/target/i386/kvm/vmsr_energy.h @@ -94,6 +94,5 @@ double vmsr_get_ratio(uint64_t e_delta, unsigned long long delta_ticks, unsigned int maxticks); void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms); -bool is_host_cpu_intel(void); int is_rapl_enabled(void); #endif /* VMSR_ENERGY_H */ diff --git a/target/i386/meson.build b/target/i386/meson.build index c1aacea..092af34 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -11,6 +11,8 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c')) i386_system_ss = ss.source_set() i386_system_ss.add(files( diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index b4a4d50..11c2630 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -19,6 +19,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/queue.h" +#include "accel/accel-cpu-target.h" +#include "host-cpu.h" #include "migration/blocker.h" #include "strings.h" @@ -1207,10 +1209,33 @@ static const TypeInfo nvmm_accel_type = { .class_init = nvmm_accel_class_init, }; +static void nvmm_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = nvmm_cpu_instance_init; +} + +static const TypeInfo nvmm_cpu_accel_type = { + .name = ACCEL_CPU_NAME("nvmm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = nvmm_cpu_accel_class_init, + .abstract = true, +}; + static void nvmm_type_init(void) { type_register_static(&nvmm_accel_type); + type_register_static(&nvmm_cpu_accel_type); } type_init(nvmm_type_init); diff --git a/target/i386/sev.c b/target/i386/sev.c index 1a12f06..1057b8a 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -41,7 +41,9 @@ #include "confidential-guest.h" #include "hw/i386/pc.h" #include "system/address-spaces.h" +#include "hw/i386/e820_memory_layout.h" #include "qemu/queue.h" +#include "qemu/cutils.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -50,6 +52,15 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) /* hard code sha256 digest size */ #define HASH_SIZE 32 +/* Hard coded GPA that KVM uses for the VMSA */ +#define KVM_VMSA_GPA 0xFFFFFFFFF000 + +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */ +#define FLAGS_VMSA_TO_SEGCACHE(flags) \ + ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8)) +#define FLAGS_SEGCACHE_TO_VMSA(flags) \ + ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12)) + typedef struct QEMU_PACKED SevHashTableEntry { QemuUUID guid; uint16_t len; @@ -89,6 +100,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor { uint32_t size; } SevHashTableDescriptor; +typedef struct SevLaunchVmsa { + QTAILQ_ENTRY(SevLaunchVmsa) next; + + uint16_t cpu_index; + uint64_t gpa; + struct sev_es_save_area vmsa; +} SevLaunchVmsa; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -99,6 +118,8 @@ struct SevCommonState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + uint64_t sev_features; + uint64_t supported_sev_features; /* runtime state */ uint8_t api_major; @@ -107,9 +128,7 @@ struct SevCommonState { int sev_fd; SevState state; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; + QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa; }; struct SevCommonStateClass { @@ -122,7 +141,8 @@ struct SevCommonStateClass { Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); - int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -363,6 +383,288 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; +static void sev_apply_cpu_context(CPUState *cpu) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + X86CPU *x86; + CPUX86State *env; + struct SevLaunchVmsa *launch_vmsa; + + /* See if an initial VMSA has been provided for this CPU */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu->cpu_index == launch_vmsa->cpu_index) { + x86 = X86_CPU(cpu); + env = &x86->env; + + /* + * Ideally we would provide the VMSA directly to kvm which would + * ensure that the resulting initial VMSA measurement which is + * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from + * exactly what we provide here. Currently this is not possible so + * we need to copy the parts of the VMSA structure that we currently + * support into the CPU state. + */ + cpu_load_efer(env, launch_vmsa->vmsa.efer); + cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4); + cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0); + cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3); + env->xcr0 = launch_vmsa->vmsa.xcr0; + env->pat = launch_vmsa->vmsa.g_pat; + + cpu_x86_load_seg_cache( + env, R_CS, launch_vmsa->vmsa.cs.selector, + launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib)); + cpu_x86_load_seg_cache( + env, R_DS, launch_vmsa->vmsa.ds.selector, + launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib)); + cpu_x86_load_seg_cache( + env, R_ES, launch_vmsa->vmsa.es.selector, + launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib)); + cpu_x86_load_seg_cache( + env, R_FS, launch_vmsa->vmsa.fs.selector, + launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib)); + cpu_x86_load_seg_cache( + env, R_GS, launch_vmsa->vmsa.gs.selector, + launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib)); + cpu_x86_load_seg_cache( + env, R_SS, launch_vmsa->vmsa.ss.selector, + launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib)); + + env->gdt.base = launch_vmsa->vmsa.gdtr.base; + env->gdt.limit = launch_vmsa->vmsa.gdtr.limit; + env->gdt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib); + env->idt.base = launch_vmsa->vmsa.idtr.base; + env->idt.limit = launch_vmsa->vmsa.idtr.limit; + env->idt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib); + + cpu_x86_load_seg_cache( + env, R_LDTR, launch_vmsa->vmsa.ldtr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib)); + cpu_x86_load_seg_cache( + env, R_TR, launch_vmsa->vmsa.tr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib)); + + env->dr[6] = launch_vmsa->vmsa.dr6; + env->dr[7] = launch_vmsa->vmsa.dr7; + + env->regs[R_EAX] = launch_vmsa->vmsa.rax; + env->regs[R_ECX] = launch_vmsa->vmsa.rcx; + env->regs[R_EDX] = launch_vmsa->vmsa.rdx; + env->regs[R_EBX] = launch_vmsa->vmsa.rbx; + env->regs[R_ESP] = launch_vmsa->vmsa.rsp; + env->regs[R_EBP] = launch_vmsa->vmsa.rbp; + env->regs[R_ESI] = launch_vmsa->vmsa.rsi; + env->regs[R_EDI] = launch_vmsa->vmsa.rdi; +#ifdef TARGET_X86_64 + env->regs[R_R8] = launch_vmsa->vmsa.r8; + env->regs[R_R9] = launch_vmsa->vmsa.r9; + env->regs[R_R10] = launch_vmsa->vmsa.r10; + env->regs[R_R11] = launch_vmsa->vmsa.r11; + env->regs[R_R12] = launch_vmsa->vmsa.r12; + env->regs[R_R13] = launch_vmsa->vmsa.r13; + env->regs[R_R14] = launch_vmsa->vmsa.r14; + env->regs[R_R15] = launch_vmsa->vmsa.r15; +#endif + env->eip = launch_vmsa->vmsa.rip; + env->eflags = launch_vmsa->vmsa.rflags; + + cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw); + env->mxcsr = launch_vmsa->vmsa.mxcsr; + + break; + } + } +} + +static int check_sev_features(SevCommonState *sev_common, uint64_t sev_features, + Error **errp) +{ + /* + * Ensure SEV_FEATURES is configured for correct SEV hardware and that + * the requested features are supported. If SEV-SNP is enabled then + * that feature must be enabled, otherwise it must be cleared. + */ + if (sev_snp_enabled() && !(sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is enabled but is not enabled in VMSA sev_features", + __func__); + return -1; + } else if (!sev_snp_enabled() && + (sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is not enabled but is enabled in VMSA sev_features", + __func__); + return -1; + } + if (sev_features & ~sev_common->supported_sev_features) { + error_setg(errp, + "%s: VMSA contains unsupported sev_features: %lX, " + "supported features: %lX", + __func__, sev_features, sev_common->supported_sev_features); + return -1; + } + return 0; +} + +static int check_vmsa_supported(SevCommonState *sev_common, hwaddr gpa, + const struct sev_es_save_area *vmsa, + Error **errp) +{ + struct sev_es_save_area vmsa_check; + + /* + * KVM always populates the VMSA at a fixed GPA which cannot be modified + * from userspace. Specifying a different GPA will not prevent the guest + * from starting but will cause the launch measurement to be different + * from expected. Therefore check that the provided GPA matches the KVM + * hardcoded value. + */ + if (gpa != KVM_VMSA_GPA) { + error_setg(errp, + "%s: The VMSA GPA must be %lX but is specified as %lX", + __func__, KVM_VMSA_GPA, gpa); + return -1; + } + + /* + * Clear all supported fields so we can then check the entire structure + * is zero. + */ + memcpy(&vmsa_check, vmsa, sizeof(struct sev_es_save_area)); + memset(&vmsa_check.es, 0, sizeof(vmsa_check.es)); + memset(&vmsa_check.cs, 0, sizeof(vmsa_check.cs)); + memset(&vmsa_check.ss, 0, sizeof(vmsa_check.ss)); + memset(&vmsa_check.ds, 0, sizeof(vmsa_check.ds)); + memset(&vmsa_check.fs, 0, sizeof(vmsa_check.fs)); + memset(&vmsa_check.gs, 0, sizeof(vmsa_check.gs)); + memset(&vmsa_check.gdtr, 0, sizeof(vmsa_check.gdtr)); + memset(&vmsa_check.idtr, 0, sizeof(vmsa_check.idtr)); + memset(&vmsa_check.ldtr, 0, sizeof(vmsa_check.ldtr)); + memset(&vmsa_check.tr, 0, sizeof(vmsa_check.tr)); + vmsa_check.efer = 0; + vmsa_check.cr0 = 0; + vmsa_check.cr3 = 0; + vmsa_check.cr4 = 0; + vmsa_check.xcr0 = 0; + vmsa_check.dr6 = 0; + vmsa_check.dr7 = 0; + vmsa_check.rax = 0; + vmsa_check.rcx = 0; + vmsa_check.rdx = 0; + vmsa_check.rbx = 0; + vmsa_check.rsp = 0; + vmsa_check.rbp = 0; + vmsa_check.rsi = 0; + vmsa_check.rdi = 0; + vmsa_check.r8 = 0; + vmsa_check.r9 = 0; + vmsa_check.r10 = 0; + vmsa_check.r11 = 0; + vmsa_check.r12 = 0; + vmsa_check.r13 = 0; + vmsa_check.r14 = 0; + vmsa_check.r15 = 0; + vmsa_check.rip = 0; + vmsa_check.rflags = 0; + + vmsa_check.g_pat = 0; + vmsa_check.xcr0 = 0; + + vmsa_check.x87_fcw = 0; + vmsa_check.mxcsr = 0; + + if (check_sev_features(sev_common, vmsa_check.sev_features, errp) < 0) { + return -1; + } + vmsa_check.sev_features = 0; + + if (!buffer_is_zero(&vmsa_check, sizeof(vmsa_check))) { + error_setg(errp, + "%s: The VMSA contains fields that are not " + "synchronized with KVM. Continuing would result in " + "either unpredictable guest behavior, or a " + "mismatched launch measurement.", + __func__); + return -1; + } + return 0; +} + +static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx, + uint32_t ctx_len, hwaddr gpa, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevLaunchVmsa *launch_vmsa; + CPUState *cpu; + bool exists = false; + + /* + * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The + * context buffer will contain a sev_es_save_area from the Linux kernel + * which is defined by "Table B-4. VMSA Layout, State Save Area for SEV-ES" + * in the AMD64 APM, Volume 2. + */ + + if (!sev_es_enabled()) { + error_setg(errp, "SEV: unable to set CPU context: Not supported"); + return -1; + } + + if (ctx_len < sizeof(struct sev_es_save_area)) { + error_setg(errp, "SEV: unable to set CPU context: " + "Invalid context provided"); + return -1; + } + + cpu = qemu_get_cpu(cpu_index); + if (!cpu) { + error_setg(errp, "SEV: unable to set CPU context for out of bounds " + "CPU index %d", cpu_index); + return -1; + } + + /* + * If the context of this VP has already been set then replace it with the + * new context. + */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu_index == launch_vmsa->cpu_index) { + launch_vmsa->gpa = gpa; + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + exists = true; + break; + } + } + + if (!exists) { + /* New VP context */ + launch_vmsa = g_new0(SevLaunchVmsa, 1); + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + launch_vmsa->cpu_index = cpu_index; + launch_vmsa->gpa = gpa; + QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next); + } + + /* Synchronise the VMSA with the current CPU state */ + sev_apply_cpu_context(cpu); + + return 0; +} + bool sev_enabled(void) { @@ -970,9 +1272,8 @@ sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32 return value; } -static int -sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *addr, size_t len) +static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len, Error **errp) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -987,8 +1288,8 @@ sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { - error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", - __func__, ret, fw_error, fw_error_to_str(fw_error)); + error_setg(errp, "%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", __func__, + ret, fw_error, fw_error_to_str(fw_error)); } return ret; @@ -998,6 +1299,16 @@ static int sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; + CPUState *cpu; + + /* + * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA. + * Synchronise the CPU state to any provided launch VMSA structures. + */ + CPU_FOREACH(cpu) { + sev_apply_cpu_context(cpu); + } + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); @@ -1116,8 +1427,8 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } -static int -snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) +static int snp_launch_update_data(uint64_t gpa, void *hva, size_t len, + int type, Error **errp) { SevLaunchUpdateData *data; @@ -1132,23 +1443,21 @@ snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) return 0; } -static int -sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *ptr, size_t len) +static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp) { - int ret = snp_launch_update_data(gpa, ptr, len, - KVM_SEV_SNP_PAGE_TYPE_NORMAL); - return ret; + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL, errp); } static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, - const KvmCpuidInfo *kvm_cpuid_info) + const KvmCpuidInfo *kvm_cpuid_info, Error **errp) { size_t i; if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { - error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + error_setg(errp, "SEV-SNP: CPUID entry count (%d) exceeds max (%d)", kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); return -1; } @@ -1190,8 +1499,8 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, return 0; } -static int -snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) +static int snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, + size_t cpuid_len, Error **errp) { KvmCpuidInfo kvm_cpuid_info = {0}; SnpCpuidInfo snp_cpuid_info; @@ -1208,26 +1517,25 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) } while (ret == -E2BIG); if (ret) { - error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", - strerror(-ret)); - return 1; + error_setg(errp, "SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return -1; } - ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); - if (ret) { - error_report("SEV-SNP: failed to generate CPUID table information"); - return 1; + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info, errp); + if (ret < 0) { + return -1; } memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); return snp_launch_update_data(cpuid_addr, hva, cpuid_len, - KVM_SEV_SNP_PAGE_TYPE_CPUID); + KVM_SEV_SNP_PAGE_TYPE_CPUID, errp); } -static int -snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, - void *hva, uint32_t len) +static int snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, + uint32_t addr, void *hva, + uint32_t len, Error **errp) { int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; if (sev_snp->parent_obj.kernel_hashes) { @@ -1239,7 +1547,7 @@ snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, sizeof(*sev_snp->kernel_hashes_data)); type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; } - return snp_launch_update_data(addr, hva, len, type); + return snp_launch_update_data(addr, hva, len, type, errp); } static int @@ -1277,12 +1585,14 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, } if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { - ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + ret = snp_launch_update_cpuid(desc->base, hva, desc->len, + &error_fatal); } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, - desc->len); + desc->len, &error_fatal); } else { - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + ret = snp_launch_update_data(desc->base, hva, desc->len, type, + &error_fatal); } if (ret) { @@ -1304,18 +1614,26 @@ sev_snp_launch_finish(SevCommonState *sev_common) struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; /* - * To boot the SNP guest, the hypervisor is required to populate the CPUID - * and Secrets page before finalizing the launch flow. The location of - * the secrets and CPUID page is available through the OVMF metadata GUID. + * Populate all the metadata pages if not using an IGVM file. In the case + * where an IGVM file is provided it will be used to configure the metadata + * pages directly. */ - metadata = pc_system_get_ovmf_sev_metadata_ptr(); - if (metadata == NULL) { - error_report("%s: Failed to locate SEV metadata header", __func__); - exit(1); - } + if (!X86_MACHINE(qdev_get_machine())->igvm) { + /* + * To boot the SNP guest, the hypervisor is required to populate the + * CPUID and Secrets page before finalizing the launch flow. The + * location of the secrets and CPUID page is available through the + * OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } - /* Populate all the metadata pages */ - snp_populate_metadata_pages(sev_snp, metadata); + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + } QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); @@ -1425,6 +1743,39 @@ static int sev_snp_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_SNP_VM; } +static int sev_init_supported_features(ConfidentialGuestSupport *cgs, + SevCommonState *sev_common, Error **errp) +{ + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + /* + * Older kernels do not support query or setting of sev_features. In this + * case the set of supported features must be zero to match the settings + * in the kernel. + */ + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == + KVM_X86_DEFAULT_VM) { + sev_common->supported_sev_features = 0; + return 0; + } + + /* Query KVM for the supported set of sev_features */ + struct kvm_device_attr attr = { + .group = KVM_X86_GRP_SEV, + .attr = KVM_X86_SEV_VMSA_FEATURES, + .addr = (unsigned long)&sev_common->supported_sev_features, + }; + if (kvm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr) < 0) { + error_setg(errp, "%s: failed to query supported sev_features", + __func__); + return -1; + } + if (sev_snp_enabled()) { + sev_common->supported_sev_features |= SVM_SEV_FEAT_SNP_ACTIVE; + } + return 0; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -1505,6 +1856,10 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + if (sev_init_supported_features(cgs, sev_common, errp) < 0) { + return -1; + } + trace_kvm_sev_init(); switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { case KVM_X86_DEFAULT_VM: @@ -1516,6 +1871,40 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) case KVM_X86_SEV_ES_VM: case KVM_X86_SNP_VM: { struct kvm_sev_init args = { 0 }; + MachineState *machine = MACHINE(qdev_get_machine()); + X86MachineState *x86machine = X86_MACHINE(qdev_get_machine()); + + /* + * If configuration is provided via an IGVM file then the IGVM file + * might contain configuration of the initial vcpu context. For SEV + * the vcpu context includes the sev_features which should be applied + * to the vcpu. + * + * KVM does not synchronize sev_features from CPU state. Instead it + * requires sev_features to be provided as part of this initialization + * call which is subsequently automatically applied to the VMSA of + * each vcpu. + * + * The IGVM file is normally processed after initialization. Therefore + * we need to pre-process it here to extract sev_features in order to + * provide it to KVM_SEV_INIT2. Each cgs_* function that is called by + * the IGVM processor detects this pre-process by observing the state + * as SEV_STATE_UNINIT. + */ + if (x86machine->igvm) { + if (IGVM_CFG_GET_CLASS(x86machine->igvm) + ->process(x86machine->igvm, machine->cgs, true, errp) == + -1) { + return -1; + } + /* + * KVM maintains a bitmask of allowed sev_features. This does not + * include SVM_SEV_FEAT_SNP_ACTIVE which is set accordingly by KVM + * itself. Therefore we need to clear this flag. + */ + args.vmsa_features = sev_common->sev_features & + ~SVM_SEV_FEAT_SNP_ACTIVE; + } ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); break; @@ -1615,9 +2004,8 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { int ret; - ret = klass->launch_update_data(sev_common, gpa, ptr, len); + ret = klass->launch_update_data(sev_common, gpa, ptr, len, errp); if (ret < 0) { - error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; } } @@ -1782,40 +2170,109 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, return sev_es_parse_reset_block(info, addr); } -void sev_es_set_reset_vector(CPUState *cpu) + +static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg) { - X86CPU *x86; - CPUX86State *env; - ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - SevCommonState *sev_common = SEV_COMMON( - object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); + vmsa_seg->selector = cpu_seg->selector; + vmsa_seg->base = cpu_seg->base; + vmsa_seg->limit = cpu_seg->limit; + vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags); +} - /* Only update if we have valid reset information */ - if (!sev_common || !sev_common->reset_data_valid) { - return; - } +static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa) +{ + const X86CPU *x86 = X86_CPU(cpu); + const CPUX86State *env = &x86->env; - /* Do not update the BSP reset state */ - if (cpu->cpu_index == 0) { - return; + /* + * Initialize the SEV-ES save area from the current state of + * the CPU. The entire state does not need to be copied, only the state + * that is copied back to the CPUState in sev_apply_cpu_context. + */ + memset(vmsa, 0, sizeof(struct sev_es_save_area)); + vmsa->efer = env->efer; + vmsa->cr0 = env->cr[0]; + vmsa->cr3 = env->cr[3]; + vmsa->cr4 = env->cr[4]; + vmsa->xcr0 = env->xcr0; + vmsa->g_pat = env->pat; + + seg_to_vmsa(&env->segs[R_CS], &vmsa->cs); + seg_to_vmsa(&env->segs[R_DS], &vmsa->ds); + seg_to_vmsa(&env->segs[R_ES], &vmsa->es); + seg_to_vmsa(&env->segs[R_FS], &vmsa->fs); + seg_to_vmsa(&env->segs[R_GS], &vmsa->gs); + seg_to_vmsa(&env->segs[R_SS], &vmsa->ss); + + seg_to_vmsa(&env->gdt, &vmsa->gdtr); + seg_to_vmsa(&env->idt, &vmsa->idtr); + seg_to_vmsa(&env->ldt, &vmsa->ldtr); + seg_to_vmsa(&env->tr, &vmsa->tr); + + vmsa->dr6 = env->dr[6]; + vmsa->dr7 = env->dr[7]; + + vmsa->rax = env->regs[R_EAX]; + vmsa->rcx = env->regs[R_ECX]; + vmsa->rdx = env->regs[R_EDX]; + vmsa->rbx = env->regs[R_EBX]; + vmsa->rsp = env->regs[R_ESP]; + vmsa->rbp = env->regs[R_EBP]; + vmsa->rsi = env->regs[R_ESI]; + vmsa->rdi = env->regs[R_EDI]; + +#ifdef TARGET_X86_64 + vmsa->r8 = env->regs[R_R8]; + vmsa->r9 = env->regs[R_R9]; + vmsa->r10 = env->regs[R_R10]; + vmsa->r11 = env->regs[R_R11]; + vmsa->r12 = env->regs[R_R12]; + vmsa->r13 = env->regs[R_R13]; + vmsa->r14 = env->regs[R_R14]; + vmsa->r15 = env->regs[R_R15]; +#endif + + vmsa->rip = env->eip; + vmsa->rflags = env->eflags; +} + +static void sev_es_set_ap_context(uint32_t reset_addr) +{ + CPUState *cpu; + struct sev_es_save_area vmsa; + SegmentCache cs; + + cs.selector = 0xf000; + cs.base = reset_addr & 0xffff0000; + cs.limit = 0xffff; + cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | + DESC_A_MASK; + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == 0) { + /* Do not update the BSP reset state */ + continue; + } + initialize_vmsa(cpu, &vmsa); + seg_to_vmsa(&cs, &vmsa.cs); + vmsa.rip = reset_addr & 0x0000ffff; + sev_set_cpu_context(cpu->cpu_index, &vmsa, + sizeof(struct sev_es_save_area), + 0, &error_fatal); } +} - x86 = X86_CPU(cpu); - env = &x86->env; - - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, - DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | - DESC_R_MASK | DESC_A_MASK); - - env->eip = sev_common->reset_ip; +void sev_es_set_reset_vector(CPUState *cpu) +{ + if (sev_enabled()) { + sev_apply_cpu_context(cpu); + } } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) { - CPUState *cpu; uint32_t addr; int ret; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1828,14 +2285,12 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) return ret; } + /* + * The reset vector is saved into a CPU context for each AP but not for + * the BSP. This is applied during guest startup or when the CPU is reset. + */ if (addr) { - sev_common->reset_cs = addr & 0xffff0000; - sev_common->reset_ip = addr & 0x0000ffff; - sev_common->reset_data_valid = true; - - CPU_FOREACH(cpu) { - sev_es_set_reset_vector(cpu); - } + sev_es_set_ap_context(addr); } return 0; @@ -2037,6 +2492,237 @@ static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) SEV_COMMON(obj)->kernel_hashes = value; } +static bool cgs_check_support(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary) +{ + return (((platform == CGS_PLATFORM_SEV_SNP) && sev_snp_enabled()) || + ((platform == CGS_PLATFORM_SEV_ES) && sev_es_enabled()) || + ((platform == CGS_PLATFORM_SEV) && sev_enabled())); +} + +static int cgs_set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + if ((cpu_index == 0) && (memory_type == CGS_PAGE_TYPE_VMSA)) { + const struct sev_es_save_area *sa = + (const struct sev_es_save_area *)ptr; + if (len < sizeof(*sa)) { + error_setg(errp, "%s: invalid VMSA length encountered", + __func__); + return -1; + } + if (check_sev_features(sev_common, sa->sev_features, errp) < 0) { + return -1; + } + sev_common->sev_features = sa->sev_features; + } + return 0; + } + + if (!sev_enabled()) { + error_setg(errp, "%s: attempt to configure guest memory, but SEV " + "is not enabled", __func__); + return -1; + } + + switch (memory_type) { + case CGS_PAGE_TYPE_NORMAL: + case CGS_PAGE_TYPE_ZERO: + return klass->launch_update_data(sev_common, gpa, ptr, len, errp); + + case CGS_PAGE_TYPE_VMSA: + if (!sev_es_enabled()) { + error_setg(errp, + "%s: attempt to configure initial VMSA, but SEV-ES " + "is not supported", + __func__); + return -1; + } + if (check_vmsa_supported(sev_common, gpa, + (const struct sev_es_save_area *)ptr, + errp) < 0) { + return -1; + } + return sev_set_cpu_context(cpu_index, ptr, len, gpa, errp); + + case CGS_PAGE_TYPE_UNMEASURED: + if (sev_snp_enabled()) { + return snp_launch_update_data( + gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_UNMEASURED, errp); + } + /* No action required if not SEV-SNP */ + return 0; + + case CGS_PAGE_TYPE_SECRETS: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure secrets page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_SECRETS, errp); + + case CGS_PAGE_TYPE_REQUIRED_MEMORY: + if (kvm_convert_memory(gpa, len, true) < 0) { + error_setg( + errp, + "%s: failed to configure required memory. gpa: %lX, type: %d", + __func__, gpa, memory_type); + return -1; + } + return 0; + + case CGS_PAGE_TYPE_CPUID: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure CPUID page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_cpuid(gpa, ptr, len, errp); + } + error_setg(errp, "%s: failed to update guest. gpa: %lX, type: %d", __func__, + gpa, memory_type); + return -1; +} + +static int cgs_get_mem_map_entry(int index, + ConfidentialGuestMemoryMapEntry *entry, + Error **errp) +{ + struct e820_entry *table; + int num_entries; + + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 1; + } + + num_entries = e820_get_table(&table); + if ((index < 0) || (index >= num_entries)) { + return 1; + } + entry->gpa = table[index].address; + entry->size = table[index].length; + switch (table[index].type) { + case E820_RAM: + entry->type = CGS_MEM_RAM; + break; + case E820_RESERVED: + entry->type = CGS_MEM_RESERVED; + break; + case E820_ACPI: + entry->type = CGS_MEM_ACPI; + break; + case E820_NVS: + entry->type = CGS_MEM_NVS; + break; + case E820_UNUSABLE: + entry->type = CGS_MEM_UNUSABLE; + break; + } + return 0; +} + +static int cgs_set_guest_policy(ConfidentialGuestPolicyType policy_type, + uint64_t policy, void *policy_data1, + uint32_t policy_data1_size, void *policy_data2, + uint32_t policy_data2_size, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 0; + } + + if (policy_type != GUEST_POLICY_SEV) { + error_setg(errp, "%s: Invalid guest policy type provided for SEV: %d", + __func__, policy_type); + return -1; + } + /* + * SEV-SNP handles policy differently. The policy flags are defined in + * kvm_start_conf.policy and an ID block and ID auth can be provided. + */ + if (sev_snp_enabled()) { + SevSnpGuestState *sev_snp_guest = + SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs); + struct kvm_sev_snp_launch_finish *finish = + &sev_snp_guest->kvm_finish_conf; + + /* + * The policy consists of flags in 'policy' and optionally an ID block + * and ID auth in policy_data1 and policy_data2 respectively. The ID + * block and auth are optional so clear any previous ID block and auth + * and set them if provided, but always set the policy flags. + */ + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + sev_snp_guest->id_block = NULL; + finish->id_block_uaddr = 0; + sev_snp_guest->id_auth = NULL; + finish->id_auth_uaddr = 0; + + if (policy_data1_size > 0) { + struct sev_snp_id_authentication *id_auth = + (struct sev_snp_id_authentication *)policy_data2; + + if (policy_data1_size != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "%s: Invalid SEV-SNP ID block: incorrect size", + __func__); + return -1; + } + if (policy_data2_size != KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, + "%s: Invalid SEV-SNP ID auth block: incorrect size", + __func__); + return -1; + } + assert(policy_data1 != NULL); + assert(policy_data2 != NULL); + + finish->id_block_uaddr = + (__u64)g_memdup2(policy_data1, KVM_SEV_SNP_ID_BLOCK_SIZE); + finish->id_auth_uaddr = + (__u64)g_memdup2(policy_data2, KVM_SEV_SNP_ID_AUTH_SIZE); + + /* + * Check if an author key has been provided and use that to flag + * whether the author key is enabled. The first of the author key + * must be non-zero to indicate the key type, which will currently + * always be 2. + */ + sev_snp_guest->kvm_finish_conf.auth_key_en = + id_auth->author_key[0] ? 1 : 0; + finish->id_block_en = 1; + } + sev_snp_guest->kvm_start_conf.policy = policy; + } else { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + /* Only the policy flags are supported for SEV and SEV-ES */ + if ((policy_data1_size > 0) || (policy_data2_size > 0) || !sev_guest) { + error_setg(errp, "%s: An ID block/ID auth block has been provided " + "but SEV-SNP is not enabled", __func__); + return -1; + } + sev_guest->policy = policy; + } + return 0; +} + static void sev_common_class_init(ObjectClass *oc, const void *data) { @@ -2060,6 +2746,8 @@ static void sev_common_instance_init(Object *obj) { SevCommonState *sev_common = SEV_COMMON(obj); + ConfidentialGuestSupportClass *cgs = + CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(obj); sev_common->kvm_type = -1; @@ -2070,6 +2758,12 @@ sev_common_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev_common->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + cgs->check_support = cgs_check_support; + cgs->set_guest_state = cgs_set_guest_state; + cgs->get_mem_map_entry = cgs_get_mem_map_entry; + cgs->set_guest_policy = cgs_set_guest_policy; + + QTAILQ_INIT(&sev_common->launch_vmsa); } /* sev guest info common to sev/sev-es/sev-snp */ diff --git a/target/i386/sev.h b/target/i386/sev.h index 373669e..9db1a80 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -44,6 +44,8 @@ bool sev_snp_enabled(void); #define SEV_SNP_POLICY_SMT 0x10000 #define SEV_SNP_POLICY_DBG 0x80000 +#define SVM_SEV_FEAT_SNP_ACTIVE 1 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; @@ -55,6 +57,128 @@ typedef struct SevKernelLoaderContext { size_t cmdline_size; } SevKernelLoaderContext; +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct QEMU_PACKED sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint64_t vmpl0_ssp; + uint64_t vmpl1_ssp; + uint64_t vmpl2_ssp; + uint64_t vmpl3_ssp; + uint64_t u_cet; + uint8_t reserved_0xc8[2]; + uint8_t vmpl; + uint8_t cpl; + uint8_t reserved_0xcc[4]; + uint64_t efer; + uint8_t reserved_0xd8[104]; + uint64_t xss; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr0_addr_mask; + uint64_t dr1_addr_mask; + uint64_t dr2_addr_mask; + uint64_t dr3_addr_mask; + uint8_t reserved_0x1c0[24]; + uint64_t rsp; + uint64_t s_cet; + uint64_t ssp; + uint64_t isst_addr; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_0x248[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; + uint8_t reserved_0x298[80]; + uint32_t pkru; + uint32_t tsc_aux; + uint8_t reserved_0x2f0[24]; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t reserved_0x320; /* rsp already available at 0x01d8 */ + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint8_t reserved_0x380[16]; + uint64_t guest_exit_info_1; + uint64_t guest_exit_info_2; + uint64_t guest_exit_int_info; + uint64_t guest_nrip; + uint64_t sev_features; + uint64_t vintr_ctrl; + uint64_t guest_exit_code; + uint64_t virtual_tom; + uint64_t tlb_id; + uint64_t pcpu_id; + uint64_t event_inj; + uint64_t xcr0; + uint8_t reserved_0x3f0[16]; + + /* Floating point area */ + uint64_t x87_dp; + uint32_t mxcsr; + uint16_t x87_ftw; + uint16_t x87_fsw; + uint16_t x87_fcw; + uint16_t x87_fop; + uint16_t x87_ds; + uint16_t x87_cs; + uint64_t x87_rip; + uint8_t fpreg_x87[80]; + uint8_t fpreg_xmm[256]; + uint8_t fpreg_ymm[256]; +}; + +struct QEMU_PACKED sev_snp_id_authentication { + uint32_t id_key_alg; + uint32_t auth_key_algo; + uint8_t reserved[56]; + uint8_t id_block_sig[512]; + uint8_t id_key[1028]; + uint8_t reserved2[60]; + uint8_t id_key_sig[512]; + uint8_t author_key[1028]; + uint8_t reserved3[892]; +}; + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 55216e0..853b1c8 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -2722,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.check & X86_CHECK_i64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) { goto illegal_op; } } else { if (decode.e.check & X86_CHECK_o64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) { goto illegal_op; } } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index faf56e1..22ac609 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -26,6 +26,8 @@ #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" #include "migration/blocker.h" +#include "host-cpu.h" +#include "accel/accel-cpu-target.h" #include <winerror.h> #include "whpx-internal.h" @@ -2500,6 +2502,28 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, } } +static void whpx_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void whpx_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = whpx_cpu_instance_init; +} + +static const TypeInfo whpx_cpu_accel_type = { + .name = ACCEL_CPU_NAME("whpx"), + + .parent = TYPE_ACCEL_CPU, + .class_init = whpx_cpu_accel_class_init, + .abstract = true, +}; + /* * Partition support */ @@ -2726,6 +2750,7 @@ static const TypeInfo whpx_accel_type = { static void whpx_type_init(void) { type_register_static(&whpx_accel_type); + type_register_static(&whpx_cpu_accel_type); } bool init_whp_dispatch(void) diff --git a/tests/functional/test_x86_cpu_model_versions.py b/tests/functional/test_x86_cpu_model_versions.py index bd18acd..36c968f 100755 --- a/tests/functional/test_x86_cpu_model_versions.py +++ b/tests/functional/test_x86_cpu_model_versions.py @@ -72,44 +72,11 @@ class X86CPUModelAliases(QemuSystemTest): self.assertNotIn("EPYC-IBPB-v1", cpus, "EPYC-IBPB shouldn't be versioned") - def test_4_0_alias_compatibility(self): - """ - Check if pc-*-4.0 unversioned CPU model won't be reported as aliases - """ - self.set_machine('pc-i440fx-4.0') - # pc-*-4.0 won't expose non-versioned CPU models as aliases - # We do this to help management software to keep compatibility - # with older QEMU versions that didn't have the versioned CPU model - self.vm.add_args('-S') - self.vm.launch() - cpus = dict((m['name'], m) for m in - self.vm.cmd('query-cpu-definitions')) - - self.assertFalse(cpus['Cascadelake-Server']['static'], - 'unversioned Cascadelake-Server CPU model must not be static') - self.assertNotIn('alias-of', cpus['Cascadelake-Server'], - 'Cascadelake-Server must not be an alias') - self.assertNotIn('alias-of', cpus['Cascadelake-Server-v1'], - 'Cascadelake-Server-v1 must not be an alias') - - self.assertFalse(cpus['qemu64']['static'], - 'unversioned qemu64 CPU model must not be static') - self.assertNotIn('alias-of', cpus['qemu64'], - 'qemu64 must not be an alias') - self.assertNotIn('alias-of', cpus['qemu64-v1'], - 'qemu64-v1 must not be an alias') - - self.validate_variant_aliases(cpus) - - # On pc-*-4.0, no CPU model should be reported as an alias: - for name,c in cpus.items(): - self.assertNotIn('alias-of', c, "%s shouldn't be an alias" % (name)) - - def test_4_1_alias(self): + def test_unversioned_alias(self): """ Check if unversioned CPU model is an alias pointing to right version """ - self.set_machine('pc-i440fx-4.1') + self.set_machine('pc') self.vm.add_args('-S') self.vm.launch() @@ -133,7 +100,7 @@ class X86CPUModelAliases(QemuSystemTest): self.validate_variant_aliases(cpus) - # On pc-*-4.1, -noTSX and -IBRS models should be aliases: + # On recent PC machines, -noTSX and -IBRS models should be aliases: self.assertEqual(cpus["Haswell"].get('alias-of'), "Haswell-v1", "Haswell must be an alias") @@ -247,8 +214,8 @@ class CascadelakeArchCapabilities(QemuSystemTest): cpu_path = self.vm.cmd('query-cpus-fast')[0].get('qom-path') return self.vm.cmd('qom-get', path=cpu_path, property=prop) - def test_4_1(self): - self.set_machine('pc-i440fx-4.1') + def test(self): + self.set_machine('pc') # machine-type only: self.vm.add_args('-S') self.set_vm_arg('-cpu', @@ -256,80 +223,27 @@ class CascadelakeArchCapabilities(QemuSystemTest): 'enforce=off') self.vm.launch() self.assertFalse(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.1 + Cascadelake-Server should not have arch-capabilities') - - def test_4_0(self): - self.set_machine('pc-i440fx-4.0') - self.vm.add_args('-S') - self.set_vm_arg('-cpu', - 'Cascadelake-Server,x-force-features=on,check=off,' - 'enforce=off') - self.vm.launch() - self.assertFalse(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.0 + Cascadelake-Server should not have arch-capabilities') - - def test_set_4_0(self): - self.set_machine('pc-i440fx-4.0') - # command line must override machine-type if CPU model is not versioned: - self.vm.add_args('-S') - self.set_vm_arg('-cpu', - 'Cascadelake-Server,x-force-features=on,check=off,' - 'enforce=off,+arch-capabilities') - self.vm.launch() - self.assertTrue(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.0 + Cascadelake-Server,+arch-capabilities should have arch-capabilities') + 'pc + Cascadelake-Server should not have arch-capabilities') - def test_unset_4_1(self): - self.set_machine('pc-i440fx-4.1') + def test_unset(self): + self.set_machine('pc') self.vm.add_args('-S') self.set_vm_arg('-cpu', 'Cascadelake-Server,x-force-features=on,check=off,' 'enforce=off,-arch-capabilities') self.vm.launch() self.assertFalse(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.1 + Cascadelake-Server,-arch-capabilities should not have arch-capabilities') - - def test_v1_4_0(self): - self.set_machine('pc-i440fx-4.0') - # versioned CPU model overrides machine-type: - self.vm.add_args('-S') - self.set_vm_arg('-cpu', - 'Cascadelake-Server-v1,x-force-features=on,check=off,' - 'enforce=off') - self.vm.launch() - self.assertFalse(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.0 + Cascadelake-Server-v1 should not have arch-capabilities') - - def test_v2_4_0(self): - self.set_machine('pc-i440fx-4.0') - self.vm.add_args('-S') - self.set_vm_arg('-cpu', - 'Cascadelake-Server-v2,x-force-features=on,check=off,' - 'enforce=off') - self.vm.launch() - self.assertTrue(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.0 + Cascadelake-Server-v2 should have arch-capabilities') - - def test_v1_set_4_0(self): - self.set_machine('pc-i440fx-4.0') - # command line must override machine-type and versioned CPU model: - self.vm.add_args('-S') - self.set_vm_arg('-cpu', - 'Cascadelake-Server-v1,x-force-features=on,check=off,' - 'enforce=off,+arch-capabilities') - self.vm.launch() - self.assertTrue(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.0 + Cascadelake-Server-v1,+arch-capabilities should have arch-capabilities') + 'pc + Cascadelake-Server,-arch-capabilities should not have arch-capabilities') - def test_v2_unset_4_1(self): - self.set_machine('pc-i440fx-4.1') + def test_v2_unset(self): + self.set_machine('pc') self.vm.add_args('-S') self.set_vm_arg('-cpu', 'Cascadelake-Server-v2,x-force-features=on,check=off,' 'enforce=off,-arch-capabilities') self.vm.launch() self.assertFalse(self.get_cpu_prop('arch-capabilities'), - 'pc-i440fx-4.1 + Cascadelake-Server-v2,-arch-capabilities should not have arch-capabilities') + 'pc + Cascadelake-Server-v2,-arch-capabilities should not have arch-capabilities') if __name__ == '__main__': QemuSystemTest.main() diff --git a/tests/vm/freebsd b/tests/vm/freebsd index 74b3b1e..2e96c9e 100755 --- a/tests/vm/freebsd +++ b/tests/vm/freebsd @@ -28,8 +28,8 @@ class FreeBSDVM(basevm.BaseVM): name = "freebsd" arch = "x86_64" - link = "https://download.freebsd.org/releases/CI-IMAGES/14.1-RELEASE/amd64/Latest/FreeBSD-14.1-RELEASE-amd64-BASIC-CI.raw.xz" - csum = "202fe27a05427f0a86d3ebb97712745186f2776ccc4f70d95466dd99a0238ba5" + link = "https://download.freebsd.org/releases/CI-IMAGES/14.3-RELEASE/amd64/Latest/FreeBSD-14.3-RELEASE-amd64-BASIC-CI.raw.xz" + csum = "ec0f5a4bbe63aa50a725d9fee0f1931f850e9a21cbebdadb991df00f168d6805" size = "20G" BUILD_SCRIPT = """ @@ -558,3 +558,15 @@ void qemu_print_log_usage(FILE *f) fprintf(f, "\nUse \"-d trace:help\" to get a list of trace events.\n\n"); #endif } + +#ifdef CONFIG_HAVE_RUST +ssize_t rust_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + /* + * Same as fwrite, but return -errno because Rust libc does not provide + * portable access to errno. :( + */ + int ret = fwrite(ptr, size, nmemb, stream); + return ret < 0 ? -errno : 0; +} +#endif |