diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-01-16 09:02:17 -0500 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-01-16 09:02:18 -0500 |
commit | 0e3327b690b76b7c3966b028110ee053cc16a385 (patch) | |
tree | 1bf1c254a5589f5e9f595a55eaa2a6e93bb4c2be | |
parent | 7433709a147706ad7d1956b15669279933d0f82b (diff) | |
parent | 3634039b93cc51816263e0cb5ba32e1b61142d5d (diff) | |
download | qemu-0e3327b690b76b7c3966b028110ee053cc16a385.zip qemu-0e3327b690b76b7c3966b028110ee053cc16a385.tar.gz qemu-0e3327b690b76b7c3966b028110ee053cc16a385.tar.bz2 |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes, cleanups
The big thing here are:
stage-1 translation in vtd
internal migration in vhost-user
ghes driver preparation for error injection
new resource uuid feature in virtio gpu
new vmclock device
And as usual, fixes and cleanups.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmeIOiIPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpORgIAL0clwZxQL7PIPJ91FwXc1bo6Do/HYquAzvH
# eA+ryCG5S5ewh/e2R8SdIUG7nYesEMWJGVL1gb3BFu7wgGh1aLaaTxQ1LIo5HpRF
# P0Ak3QO7TKIsSEcZIz9h3eMEpg6X9d8i2h7llp7H3qqXBbduO+cGfeNH/fZD5IEl
# 7DFvXuJUgUtZb38I+qtcO+9EQFKGHjgdQAN5P/I4vawWJdxN9sBfT4YVEgpVhiq/
# ALxdSeaEiXA4EXexdHVZhXiQzEBsCQ78RZIIDiRE8I34cVY7rolTodKRfr4bip3P
# 6Llu11yvzNi1gppOzkny3QFsRza3hV0RisWYjAMTwLhNCdi/mHQ=
# =GjDq
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 15 Jan 2025 17:43:46 EST
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (49 commits)
hw/acpi: Add vmclock device
virtio-net: vhost-user: Implement internal migration
vhost: Add stubs for the migration state transfer interface
hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
tests: acpi: update expected blobs
pci: acpi: Windows 'PCI Label Id' bug workaround
tests: acpi: whitelist expected blobs
docs: acpi_hest_ghes: fix documentation for CPER size
acpi/ghes: Change ghes fill logic to work with only one source
acpi/ghes: move offset calculus to a separate function
acpi/ghes: better name the offset of the hardware error firmware
acpi/ghes: rename etc/hardware_error file macros
acpi/ghes: don't crash QEMU if ghes GED is not found
acpi/ghes: better name GHES memory error function
acpi/ghes: make the GHES record generation more generic
acpi/ghes: don't check if physical_address is not zero
acpi/ghes: Change the type for source_id
acpi/ghes: Remove a duplicated out of bounds check
acpi/ghes: Fix acpi_ghes_record_errors() argument
acpi/ghes: better handle source_id and notification
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
74 files changed, 1536 insertions, 326 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8b9d9a7..a928ce3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3711,6 +3711,7 @@ F: hw/i386/intel_iommu.c F: hw/i386/intel_iommu_internal.h F: include/hw/i386/intel_iommu.h F: tests/functional/test_intel_iommu.py +F: tests/qtest/intel-iommu-test.c AMD-Vi Emulation S: Orphan diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst index 68f1fbe..c3e9f8d 100644 --- a/docs/specs/acpi_hest_ghes.rst +++ b/docs/specs/acpi_hest_ghes.rst @@ -67,8 +67,10 @@ Design Details (3) The address registers table contains N Error Block Address entries and N Read Ack Register entries. The size for each entry is 8-byte. The Error Status Data Block table contains N Error Status Data Block - entries. The size for each entry is 4096(0x1000) bytes. The total size - for the "etc/hardware_errors" fw_cfg blob is (N * 8 * 2 + N * 4096) bytes. + entries. The size for each entry is defined at the source code as + ACPI_GHES_MAX_RAW_DATA_LENGTH (currently 1024 bytes). The total size + for the "etc/hardware_errors" fw_cfg blob is + (N * 8 * 2 + N * ACPI_GHES_MAX_RAW_DATA_LENGTH) bytes. N is the number of the kinds of hardware error sources. (4) QEMU generates the ACPI linker/loader script for the firmware. The diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index e07d320..1d4e9f0 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -60,6 +60,11 @@ config ACPI_VMGENID default y depends on PC +config ACPI_VMCLOCK + bool + default y + depends on PC + config ACPI_VIOT bool depends on ACPI diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 9d530a2..f70a2c0 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -327,6 +327,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_EJECT_METHOD "CEJ0" #define CPU_OST_METHOD "COST" #define CPU_ADDED_LIST "CNEW" +#define CPU_EJ_LIST "CEJL" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" @@ -488,7 +489,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, method = aml_method(CPU_SCAN_METHOD, 0, AML_SERIALIZED); { const uint8_t max_cpus_per_pass = 255; - Aml *else_ctx; Aml *while_ctx, *while_ctx2; Aml *has_event = aml_local(0); Aml *dev_chk = aml_int(1); @@ -499,6 +499,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *uid = aml_local(3); Aml *has_job = aml_local(4); Aml *new_cpus = aml_name(CPU_ADDED_LIST); + Aml *ej_cpus = aml_name(CPU_EJ_LIST); + Aml *num_ej_cpus = aml_local(5); aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); @@ -513,6 +515,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, */ aml_append(method, aml_name_decl(CPU_ADDED_LIST, aml_package(max_cpus_per_pass))); + aml_append(method, aml_name_decl(CPU_EJ_LIST, + aml_package(max_cpus_per_pass))); aml_append(method, aml_store(zero, uid)); aml_append(method, aml_store(one, has_job)); @@ -527,6 +531,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx2, aml_store(one, has_event)); aml_append(while_ctx2, aml_store(zero, num_added_cpus)); + aml_append(while_ctx2, aml_store(zero, num_ej_cpus)); /* * Scan CPUs, till there are CPUs with events or @@ -559,8 +564,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, * if CPU_ADDED_LIST is full, exit inner loop and process * collected CPUs */ - ifctx = aml_if( - aml_equal(num_added_cpus, aml_int(max_cpus_per_pass))); + ifctx = aml_if(aml_lor( + aml_equal(num_added_cpus, aml_int(max_cpus_per_pass)), + aml_equal(num_ej_cpus, aml_int(max_cpus_per_pass)) + )); { aml_append(ifctx, aml_store(one, has_job)); aml_append(ifctx, aml_break()); @@ -577,16 +584,16 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(ifctx, aml_store(one, has_event)); } aml_append(while_ctx, ifctx); - else_ctx = aml_else(); + ifctx = aml_if(aml_equal(rm_evt, one)); { - aml_append(ifctx, - aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); - aml_append(ifctx, aml_store(one, rm_evt)); + /* cache to be removed CPUs to Notify later */ + aml_append(ifctx, aml_store(uid, + aml_index(ej_cpus, num_ej_cpus))); + aml_append(ifctx, aml_increment(num_ej_cpus)); aml_append(ifctx, aml_store(one, has_event)); } - aml_append(else_ctx, ifctx); - aml_append(while_ctx, else_ctx); + aml_append(while_ctx, ifctx); aml_append(while_ctx, aml_increment(uid)); } aml_append(while_ctx2, while_ctx); @@ -620,6 +627,24 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx, aml_increment(cpu_idx)); } aml_append(while_ctx2, while_ctx); + + /* + * Notify OSPM about to be removed CPUs and clear remove flag + */ + aml_append(while_ctx2, aml_store(zero, cpu_idx)); + while_ctx = aml_while(aml_lless(cpu_idx, num_ej_cpus)); + { + aml_append(while_ctx, + aml_store(aml_derefof(aml_index(ej_cpus, cpu_idx)), + uid)); + aml_append(while_ctx, + aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); + aml_append(while_ctx, aml_store(uid, cpu_selector)); + aml_append(while_ctx, aml_store(one, rm_evt)); + aml_append(while_ctx, aml_increment(cpu_idx)); + } + aml_append(while_ctx2, while_ctx); + /* * If another batch is needed, then it will resume scanning * exactly at -- and not after -- the last CPU that's currently diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 58540c0..c85d97c 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -363,7 +363,7 @@ static const VMStateDescription vmstate_ghes = { .version_id = 1, .minimum_version_id = 1, .fields = (const VMStateField[]) { - VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), + VMSTATE_UINT64(hw_error_le, AcpiGhesState), VMSTATE_END_OF_LIST() }, }; @@ -371,7 +371,7 @@ static const VMStateDescription vmstate_ghes = { static bool ghes_needed(void *opaque) { AcpiGedState *s = opaque; - return s->ghes_state.ghes_addr_le; + return s->ghes_state.hw_error_le; } static const VMStateDescription vmstate_ghes_state = { diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c index c315de1..7cec181 100644 --- a/hw/acpi/ghes-stub.c +++ b/hw/acpi/ghes-stub.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "hw/acpi/ghes.h" -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) { return -1; } diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index e9511d9..b709c17 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -28,15 +28,12 @@ #include "hw/nvram/fw_cfg.h" #include "qemu/uuid.h" -#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors" -#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" +#define ACPI_HW_ERROR_FW_CFG_FILE "etc/hardware_errors" +#define ACPI_HW_ERROR_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" /* The max size in bytes for one error block */ #define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB) -/* Now only support ARMv8 SEA notification type error source */ -#define ACPI_GHES_ERROR_SOURCE_COUNT 1 - /* Generic Hardware Error Source version 2 */ #define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10 @@ -184,51 +181,24 @@ static void acpi_ghes_build_append_mem_cper(GArray *table, build_append_int_noprefix(table, 0, 7); } -static int acpi_ghes_record_mem_error(uint64_t error_block_address, - uint64_t error_physical_addr) +static void +ghes_gen_err_data_uncorrectable_recoverable(GArray *block, + const uint8_t *section_type, + int data_length) { - GArray *block; - - /* Memory Error Section Type */ - const uint8_t uefi_cper_mem_sec[] = - UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ - 0xED, 0x7C, 0x83, 0xB1); - /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data, * Table 17-13 Generic Error Data Entry */ QemuUUID fru_id = {}; - uint32_t data_length; - - block = g_array_new(false, true /* clear */, 1); - - /* This is the length if adding a new generic error data entry*/ - data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; - /* - * It should not run out of the preallocated memory if adding a new generic - * error data entry - */ - assert((data_length + ACPI_GHES_GESB_SIZE) <= - ACPI_GHES_MAX_RAW_DATA_LENGTH); /* Build the new generic error status block header */ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE); /* Build this new generic error data entry header */ - acpi_ghes_generic_error_data(block, uefi_cper_mem_sec, + acpi_ghes_generic_error_data(block, section_type, ACPI_CPER_SEV_RECOVERABLE, 0, 0, ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0); - - /* Build the memory section CPER for above new generic error data entry */ - acpi_ghes_build_append_mem_cper(block, error_physical_addr); - - /* Write the generic error data entry into guest memory */ - cpu_physical_memory_write(error_block_address, block->data, block->len); - - g_array_free(block, true); - - return 0; } /* @@ -236,7 +206,7 @@ static int acpi_ghes_record_mem_error(uint64_t error_block_address, * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs. * See docs/specs/acpi_hest_ghes.rst for blobs format. */ -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) +static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) { int i, error_status_block_offset; @@ -264,7 +234,7 @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT); /* Tell guest firmware to place hardware_errors blob into RAM */ - bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE, + bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE, hardware_errors, sizeof(uint64_t), false); for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) { @@ -273,23 +243,31 @@ void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) * corresponding "Generic Error Status Block" */ bios_linker_loader_add_pointer(linker, - ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); + ACPI_HW_ERROR_FW_CFG_FILE, + sizeof(uint64_t) * i, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + error_status_block_offset + + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); } /* * tell firmware to write hardware_errors GPA into * hardware_errors_addr fw_cfg, once the former has been initialized. */ - bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, - 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0); + bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, 0); } /* Build Generic Hardware Error Source version 2 (GHESv2) */ -static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) +static void build_ghes_v2(GArray *table_data, + BIOSLinker *linker, + enum AcpiGhesNotifyType notify, + uint16_t source_id) { uint64_t address_offset; + /* * Type: * Generic Hardware Error Source version 2(GHESv2 - Type 10) @@ -316,21 +294,13 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t), - ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + source_id * sizeof(uint64_t)); - switch (source_id) { - case ACPI_HEST_SRC_ID_SEA: - /* - * Notification Structure - * Now only enable ARMv8 SEA notification type - */ - build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA); - break; - default: - error_report("Not support this error source"); - abort(); - } + /* Notification Structure */ + build_ghes_hw_error_notification(table_data, notify); /* Error Status Block Length */ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4); @@ -344,9 +314,11 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) + * sizeof(uint64_t)); /* * Read Ack Preserve field @@ -359,17 +331,21 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) } /* Build Hardware Error Source Table */ -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { AcpiTable table = { .sig = "HEST", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; + build_ghes_error_table(hardware_errors, linker); + acpi_table_begin(&table, table_data); /* Error Source Count */ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4); - build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker); + build_ghes_v2(table_data, linker, + ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA); acpi_table_end(linker, &table); } @@ -378,70 +354,132 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, GArray *hardware_error) { /* Create a read-only fw_cfg file for GHES */ - fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data, + fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data, hardware_error->len); /* Create a read-write fw_cfg file for Address */ - fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, - NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); + fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL, + NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false); ags->present = true; } -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +static void get_hw_error_offsets(uint64_t ghes_addr, + uint64_t *cper_addr, + uint64_t *read_ack_register_addr) { - uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0; - uint64_t start_addr; - bool ret = -1; + if (!ghes_addr) { + return; + } + + /* + * non-HEST version supports only one source, so no need to change + * the start offset based on the source ID. Also, we can't validate + * the source ID, as it is stored inside the HEST table. + */ + + cpu_physical_memory_read(ghes_addr, cper_addr, + sizeof(*cper_addr)); + + *cper_addr = le64_to_cpu(*cper_addr); + + /* + * As the current version supports only one source, the ack offset is + * just sizeof(uint64_t). + */ + *read_ack_register_addr = ghes_addr + sizeof(uint64_t); +} + +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp) +{ + uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register; AcpiGedState *acpi_ged_state; AcpiGhesState *ags; - assert(source_id < ACPI_HEST_SRC_ID_RESERVED); + if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) { + error_setg(errp, "GHES CPER record is too big: %zd", len); + return; + } acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); - g_assert(acpi_ged_state); + if (!acpi_ged_state) { + error_setg(errp, "Can't find ACPI_GED object"); + return; + } ags = &acpi_ged_state->ghes_state; - start_addr = le64_to_cpu(ags->ghes_addr_le); + assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1); + get_hw_error_offsets(le64_to_cpu(ags->hw_error_le), + &cper_addr, &read_ack_register_addr); + + if (!cper_addr) { + error_setg(errp, "can not find Generic Error Status Block"); + return; + } + + cpu_physical_memory_read(read_ack_register_addr, + &read_ack_register, sizeof(read_ack_register)); + + /* zero means OSPM does not acknowledge the error */ + if (!read_ack_register) { + error_setg(errp, + "OSPM does not acknowledge previous error," + " so can not record CPER for current error anymore"); + return; + } + + read_ack_register = cpu_to_le64(0); + /* + * Clear the Read Ack Register, OSPM will write 1 to this register when + * it acknowledges the error. + */ + cpu_physical_memory_write(read_ack_register_addr, + &read_ack_register, sizeof(uint64_t)); + + /* Write the generic error data entry into guest memory */ + cpu_physical_memory_write(cper_addr, cper, len); + + return; +} - if (physical_address) { +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) +{ + /* Memory Error Section Type */ + const uint8_t guid[] = + UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ + 0xED, 0x7C, 0x83, 0xB1); + Error *errp = NULL; + int data_length; + GArray *block; - if (source_id < ACPI_HEST_SRC_ID_RESERVED) { - start_addr += source_id * sizeof(uint64_t); - } + block = g_array_new(false, true /* clear */, 1); - cpu_physical_memory_read(start_addr, &error_block_addr, - sizeof(error_block_addr)); + data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; + /* + * It should not run out of the preallocated memory if adding a new generic + * error data entry + */ + assert((data_length + ACPI_GHES_GESB_SIZE) <= + ACPI_GHES_MAX_RAW_DATA_LENGTH); - error_block_addr = le64_to_cpu(error_block_addr); + ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length); - read_ack_register_addr = start_addr + - ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); + /* Build the memory section CPER for above new generic error data entry */ + acpi_ghes_build_append_mem_cper(block, physical_address); - cpu_physical_memory_read(read_ack_register_addr, - &read_ack_register, sizeof(read_ack_register)); + /* Report the error */ + ghes_record_cper_errors(block->data, block->len, source_id, &errp); - /* zero means OSPM does not acknowledge the error */ - if (!read_ack_register) { - error_report("OSPM does not acknowledge previous error," - " so can not record CPER for current error anymore"); - } else if (error_block_addr) { - read_ack_register = cpu_to_le64(0); - /* - * Clear the Read Ack Register, OSPM will write it to 1 when - * it acknowledges this error. - */ - cpu_physical_memory_write(read_ack_register_addr, - &read_ack_register, sizeof(uint64_t)); + g_array_free(block, true); - ret = acpi_ghes_record_mem_error(error_block_addr, - physical_address); - } else - error_report("can not find Generic Error Status Block"); + if (errp) { + error_report_err(errp); + return -1; } - return ret; + return 0; } bool acpi_ghes_present(void) diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index c8854f4..73f02b9 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -15,6 +15,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) +acpi_ss.add(when: 'CONFIG_ACPI_VMCLOCK', if_true: files('vmclock.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c')) diff --git a/hw/acpi/vmclock.c b/hw/acpi/vmclock.c new file mode 100644 index 0000000..7387e5c --- /dev/null +++ b/hw/acpi/vmclock.c @@ -0,0 +1,179 @@ +/* + * Virtual Machine Clock Device + * + * Copyright © 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/vmclock.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "migration/vmstate.h" +#include "system/reset.h" + +#include "standard-headers/linux/vmclock-abi.h" + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id) +{ + Aml *ssdt, *dev, *scope, *crs; + AcpiTable table = { .sig = "SSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = "VMCLOCK" }; + + /* Put VMCLOCK into a separate SSDT table */ + acpi_table_begin(&table, table_data); + ssdt = init_aml_allocator(); + + scope = aml_scope("\\_SB"); + dev = aml_device("VCLK"); + aml_append(dev, aml_name_decl("_HID", aml_string("AMZNC10C"))); + aml_append(dev, aml_name_decl("_CID", aml_string("VMCLOCK"))); + aml_append(dev, aml_name_decl("_DDN", aml_string("VMCLOCK"))); + + /* Simple status method */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + + crs = aml_resource_template(); + aml_append(crs, aml_qword_memory(AML_POS_DECODE, + AML_MIN_FIXED, AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_ONLY, + 0xffffffffffffffffULL, + vms->physaddr, + vms->physaddr + VMCLOCK_SIZE - 1, + 0, VMCLOCK_SIZE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + aml_append(ssdt, scope); + + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +static void vmclock_update_guest(VmclockState *vms) +{ + uint64_t disruption_marker; + uint32_t seq_count; + + if (!vms->clk) { + return; + } + + seq_count = le32_to_cpu(vms->clk->seq_count) | 1; + vms->clk->seq_count = cpu_to_le32(seq_count); + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + + disruption_marker = le64_to_cpu(vms->clk->disruption_marker); + disruption_marker++; + vms->clk->disruption_marker = cpu_to_le64(disruption_marker); + + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + vms->clk->seq_count = cpu_to_le32(seq_count + 1); +} + +/* + * After restoring an image, we need to update the guest memory to notify + * it of clock disruption. + */ +static int vmclock_post_load(void *opaque, int version_id) +{ + VmclockState *vms = opaque; + + vmclock_update_guest(vms); + return 0; +} + +static const VMStateDescription vmstate_vmclock = { + .name = "vmclock", + .version_id = 1, + .minimum_version_id = 1, + .post_load = vmclock_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(physaddr, VmclockState), + VMSTATE_END_OF_LIST() + }, +}; + +static void vmclock_handle_reset(void *opaque) +{ + VmclockState *vms = VMCLOCK(opaque); + + if (!memory_region_is_mapped(&vms->clk_page)) { + memory_region_add_subregion_overlap(get_system_memory(), + vms->physaddr, + &vms->clk_page, 0); + } +} + +static void vmclock_realize(DeviceState *dev, Error **errp) +{ + VmclockState *vms = VMCLOCK(dev); + + /* + * Given that this function is executing, there is at least one VMCLOCK + * device. Check if there are several. + */ + if (!find_vmclock_dev()) { + error_setg(errp, "at most one %s device is permitted", TYPE_VMCLOCK); + return; + } + + vms->physaddr = VMCLOCK_ADDR; + + e820_add_entry(vms->physaddr, VMCLOCK_SIZE, E820_RESERVED); + + memory_region_init_ram(&vms->clk_page, OBJECT(dev), "vmclock_page", + VMCLOCK_SIZE, &error_abort); + memory_region_set_enabled(&vms->clk_page, true); + vms->clk = memory_region_get_ram_ptr(&vms->clk_page); + memset(vms->clk, 0, VMCLOCK_SIZE); + + vms->clk->magic = cpu_to_le32(VMCLOCK_MAGIC); + vms->clk->size = cpu_to_le16(VMCLOCK_SIZE); + vms->clk->version = cpu_to_le16(1); + + /* These are all zero and thus default, but be explicit */ + vms->clk->clock_status = VMCLOCK_STATUS_UNKNOWN; + vms->clk->counter_id = VMCLOCK_COUNTER_INVALID; + + qemu_register_reset(vmclock_handle_reset, vms); + + vmclock_update_guest(vms); +} + +static void vmclock_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vmclock; + dc->realize = vmclock_realize; + dc->hotpluggable = false; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static const TypeInfo vmclock_device_info = { + .name = TYPE_VMCLOCK, + .parent = TYPE_DEVICE, + .instance_size = sizeof(VmclockState), + .class_init = vmclock_device_class_init, +}; + +static void vmclock_register_types(void) +{ + type_register_static(&vmclock_device_info); +} + +type_init(vmclock_register_types) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index c9b1305..3ac8f8e 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -946,10 +946,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_dbg2(tables_blob, tables->linker, vms); if (vms->ras) { - build_ghes_error_table(tables->hardware_errors, tables->linker); acpi_add_table(table_offsets, tables_blob); - acpi_build_hest(tables_blob, tables->linker, vms->oem_id, - vms->oem_table_id); + acpi_build_hest(tables_blob, tables->hardware_errors, tables->linker, + vms->oem_id, vms->oem_table_id); } if (ms->numa_state->num_nodes > 0) { diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 12d5c37..2aed624 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -631,6 +631,14 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp) error_report("EDID requested but the backend doesn't support it."); g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_EDID_ENABLED); } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) { return; diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 4fc7ef8..7827536 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -235,6 +235,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features, if (virtio_gpu_context_init_enabled(g->conf)) { features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT); } + if (virtio_gpu_resource_uuid_enabled(g->conf)) { + features |= (1 << VIRTIO_GPU_F_RESOURCE_UUID); + } return features; } diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 3281848..d34ce07 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -43,6 +43,7 @@ config PC select SERIAL_ISA select ACPI_PCI select ACPI_VMGENID + select ACPI_VMCLOCK select VIRTIO_PMEM_SUPPORTED select VIRTIO_MEM_SUPPORTED select HV_BALLOON_SUPPORTED diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 733b8f0..53b7306 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,6 +43,7 @@ #include "system/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" +#include "hw/acpi/vmclock.h" #include "hw/acpi/erst.h" #include "hw/acpi/piix4.h" #include "system/tpm_backend.h" @@ -654,6 +655,7 @@ static Aml *aml_pci_pdsm(void) Aml *acpi_index = aml_local(2); Aml *zero = aml_int(0); Aml *one = aml_int(1); + Aml *not_supp = aml_int(0xFFFFFFFF); Aml *func = aml_arg(2); Aml *params = aml_arg(4); Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); @@ -678,7 +680,7 @@ static Aml *aml_pci_pdsm(void) */ ifctx1 = aml_if(aml_lnot( aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) + aml_equal(acpi_index, not_supp), NULL) )); { /* have supported functions */ @@ -704,18 +706,30 @@ static Aml *aml_pci_pdsm(void) { Aml *pkg = aml_package(2); - aml_append(pkg, zero); - /* - * optional, if not impl. should return null string - */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx, aml_store(pkg, ret)); - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + aml_append(ifctx, aml_store(pkg, ret)); /* - * update acpi-index to actual value + * Windows calls func=7 without checking if it's available, + * as workaround Microsoft has suggested to return invalid for func7 + * Package, so return 2 elements package but only initialize elements + * when acpi_index is supported and leave them uninitialized, which + * leads elements to being Uninitialized ObjectType and should trip + * Windows into discarding result as an unexpected and prevent setting + * bogus 'PCI Label' on the device. */ - aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero))); + ifctx1 = aml_if(aml_lnot(aml_lor( + aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) + ))); + { + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + /* + * optional, if not impl. should return null string + */ + aml_append(ifctx1, aml_store(aml_string("%s", ""), + aml_index(ret, one))); + } + aml_append(ifctx, ifctx1); + aml_append(ifctx, aml_return(ret)); } @@ -2432,7 +2446,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) uint8_t *u; GArray *tables_blob = tables->table_data; AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; - Object *vmgenid_dev; + Object *vmgenid_dev, *vmclock_dev; char *oem_id; char *oem_table_id; @@ -2505,6 +2519,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) tables->vmgenid, tables->linker, x86ms->oem_id); } + vmclock_dev = find_vmclock_dev(); + if (vmclock_dev) { + acpi_add_table(table_offsets, tables_blob); + vmclock_build_acpi(VMCLOCK(vmclock_dev), tables_blob, tables->linker, + x86ms->oem_id); + } + if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); build_hpet(tables_blob, tables->linker, x86ms->oem_id, diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a8c275f..f366c22 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -48,7 +48,10 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) -#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) +#define VTD_PE_GET_FL_LEVEL(pe) \ + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) +#define VTD_PE_GET_SL_LEVEL(pe) \ + (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) /* * PCI bus number (or SID) is not reliable since the device is usaully @@ -67,6 +70,11 @@ struct vtd_hiod_key { uint8_t devfn; }; +struct vtd_as_raw_key { + uint16_t sid; + uint32_t pasid; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -284,15 +292,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, } /* The shift of an addr for a certain level of paging structure */ -static inline uint32_t vtd_slpt_level_shift(uint32_t level) +static inline uint32_t vtd_pt_level_shift(uint32_t level) { assert(level != 0); - return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS; } -static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) +static inline uint64_t vtd_pt_level_page_mask(uint32_t level) { - return ~((1ULL << vtd_slpt_level_shift(level)) - 1); + return ~((1ULL << vtd_pt_level_shift(level)) - 1); } static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, @@ -302,9 +310,43 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; - return (entry->domain_id == info->domain_id) && - (((entry->gfn & info->mask) == gfn) || - (entry->gfn == gfn_tlb)); + + if (entry->domain_id != info->domain_id) { + return false; + } + + /* + * According to spec, IOTLB entries caching first-stage (PGTT=001b) or + * nested (PGTT=011b) mapping associated with specified domain-id are + * invalidated. Nested isn't supported yet, so only need to check 001b. + */ + if (entry->pgtt == VTD_SM_PASID_ENTRY_FLT) { + return true; + } + + return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb; +} + +static gboolean vtd_hash_remove_by_page_piotlb(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; + uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; + + /* + * According to spec, PASID-based-IOTLB Invalidation in page granularity + * doesn't invalidate IOTLB entries caching second-stage (PGTT=010b) + * or pass-through (PGTT=100b) mappings. Nested isn't supported yet, + * so only need to check first-stage (PGTT=001b) mappings. + */ + if (entry->pgtt != VTD_SM_PASID_ENTRY_FLT) { + return false; + } + + return entry->domain_id == info->domain_id && entry->pasid == info->pasid && + ((entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb); } /* Reset all the gen of VTDAddressSpace to zero and set the gen of @@ -349,7 +391,7 @@ static void vtd_reset_caches(IntelIOMMUState *s) static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { - return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; + return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; } /* Must be called with IOMMU lock held */ @@ -360,7 +402,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, VTDIOTLBEntry *entry; unsigned level; - for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { + for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) { key.gfn = vtd_get_iotlb_gfn(addr, level); key.level = level; key.sid = source_id; @@ -377,15 +419,15 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, - uint16_t domain_id, hwaddr addr, uint64_t slpte, + uint16_t domain_id, hwaddr addr, uint64_t pte, uint8_t access_flags, uint32_t level, - uint32_t pasid) + uint32_t pasid, uint8_t pgtt) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); - trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); + trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id); if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { trace_vtd_iotlb_reset("iotlb exceeds size limit"); vtd_reset_iotlb_locked(s); @@ -393,10 +435,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->gfn = gfn; entry->domain_id = domain_id; - entry->slpte = slpte; + entry->pte = pte; entry->access_flags = access_flags; - entry->mask = vtd_slpt_level_page_mask(level); + entry->mask = vtd_pt_level_page_mask(level); entry->pasid = pasid; + entry->pgtt = pgtt; key->gfn = gfn; key->sid = source_id; @@ -710,32 +753,32 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) +static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); + return pte & VTD_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ -static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) +static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level) { - return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); + return level == VTD_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK); } -/* Get the content of a spte located in @base_addr[@index] */ -static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) +/* Get the content of a pte located in @base_addr[@index] */ +static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index) { - uint64_t slpte; + uint64_t pte; - assert(index < VTD_SL_PT_ENTRY_NR); + assert(index < VTD_PT_ENTRY_NR); if (dma_memory_read(&address_space_memory, - base_addr + index * sizeof(slpte), - &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) { - slpte = (uint64_t)-1; - return slpte; + base_addr + index * sizeof(pte), + &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { + pte = (uint64_t)-1; + return pte; } - slpte = le64_to_cpu(slpte); - return slpte; + pte = le64_to_cpu(pte); + return pte; } /* Given an iova and the level of paging structure, return the offset @@ -743,36 +786,39 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) */ static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level) { - return (iova >> vtd_slpt_level_shift(level)) & - ((1ULL << VTD_SL_LEVEL_BITS) - 1); + return (iova >> vtd_pt_level_shift(level)) & + ((1ULL << VTD_LEVEL_BITS) - 1); } /* Check Capability Register to see if the @level of page-table is supported */ -static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) +static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level) { return VTD_CAP_SAGAW_MASK & s->cap & (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); } +static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level) +{ + return level == VTD_PML4_LEVEL; +} + /* Return true if check passed, otherwise false */ -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, - VTDPASIDEntry *pe) +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) { switch (VTD_PE_GET_TYPE(pe)) { case VTD_SM_PASID_ENTRY_FLT: + return !!(s->ecap & VTD_ECAP_FLTS); case VTD_SM_PASID_ENTRY_SLT: + return !!(s->ecap & VTD_ECAP_SLTS); case VTD_SM_PASID_ENTRY_NESTED: - break; + /* Not support NESTED page table type yet */ + return false; case VTD_SM_PASID_ENTRY_PT: - if (!x86_iommu->pt_supported) { - return false; - } - break; + return !!(s->ecap & VTD_ECAP_PT); default: /* Unknown type */ return false; } - return true; } static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) @@ -796,7 +842,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, addr = pasid_dir_base + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ACCESS_ERR; } pdire->val = le64_to_cpu(pdire->val); @@ -814,28 +860,35 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, dma_addr_t addr, VTDPASIDEntry *pe) { + uint8_t pgtt; uint32_t index; dma_addr_t entry_size; - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); index = VTD_PASID_TABLE_INDEX(pasid); entry_size = VTD_PASID_ENTRY_SIZE; addr = addr + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pe, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_TABLE_ACCESS_ERR; } for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) { pe->val[i] = le64_to_cpu(pe->val[i]); } /* Do translation type check */ - if (!vtd_pe_type_check(x86_iommu, pe)) { - return -VTD_FR_PASID_TABLE_INV; + if (!vtd_pe_type_check(s, pe)) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; } - if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) { - return -VTD_FR_PASID_TABLE_INV; + pgtt = VTD_PE_GET_TYPE(pe); + if (pgtt == VTD_SM_PASID_ENTRY_SLT && + !vtd_is_sl_level_supported(s, VTD_PE_GET_SL_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; + } + + if (pgtt == VTD_SM_PASID_ENTRY_FLT && + !vtd_is_fl_level_supported(s, VTD_PE_GET_FL_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; } return 0; @@ -876,7 +929,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe); @@ -885,7 +938,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pe_present(pe)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_ENTRY_P; } return 0; @@ -938,7 +991,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } /* @@ -973,7 +1026,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return VTD_PE_GET_LEVEL(&pe); + if (s->flts) { + return VTD_PE_GET_FL_LEVEL(&pe); + } else { + return VTD_PE_GET_SL_LEVEL(&pe); + } } return vtd_ce_get_level(ce); @@ -1041,9 +1098,9 @@ static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(IntelIOMMUState *s, - uint64_t iova, VTDContextEntry *ce, - uint8_t aw, uint32_t pasid) +static inline bool vtd_iova_sl_range_check(IntelIOMMUState *s, + uint64_t iova, VTDContextEntry *ce, + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW @@ -1060,7 +1117,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + if (s->flts) { + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; + } else { + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + } } return vtd_ce_get_slpt_base(ce); @@ -1084,17 +1145,17 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) /* * We should have caught a guest-mis-programmed level earlier, - * via vtd_is_level_supported. + * via vtd_is_sl_level_supported. */ assert(level < VTD_SPTE_RSVD_LEN); /* - * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and - * checked by vtd_is_last_slpte(). + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). */ assert(level); - if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) && - (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) { + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (slpte & VTD_PT_PAGE_SIZE_MASK)) { /* large page */ rsvd_mask = vtd_spte_rsvd_large[level]; } else { @@ -1118,9 +1179,8 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; - uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; @@ -1131,7 +1191,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, while (true) { offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " @@ -1162,37 +1222,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_PAGING_ENTRY_RSVD; } - if (vtd_is_last_slpte(slpte, level)) { + if (vtd_is_last_pte(slpte, level)) { *slptep = slpte; *slpte_level = level; break; } - addr = vtd_get_slpte_addr(slpte, aw_bits); + addr = vtd_get_pte_addr(slpte, aw_bits); level--; } - xlat = vtd_get_slpte_addr(*slptep, aw_bits); - size = ~vtd_slpt_level_page_mask(level) + 1; - - /* - * From VT-d spec 3.14: Untranslated requests and translation - * requests that result in an address in the interrupt range will be - * blocked with condition code LGN.4 or SGN.8. - */ - if ((xlat > VTD_INTERRUPT_ADDR_LAST || - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) { - return 0; - } else { - error_report_once("%s: xlat address is in interrupt range " - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " - "pasid=0x%" PRIx32 ")", - __func__, iova, level, slpte, is_write, - xlat, size, pasid); - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : - -VTD_FR_INTERRUPT_ADDR; - } + return 0; } typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private); @@ -1323,14 +1362,14 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_level(addr, level, start, end); - subpage_size = 1ULL << vtd_slpt_level_shift(level); - subpage_mask = vtd_slpt_level_page_mask(level); + subpage_size = 1ULL << vtd_pt_level_shift(level); + subpage_mask = vtd_pt_level_page_mask(level); while (iova < end) { iova_next = (iova & subpage_mask) + subpage_size; offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { trace_vtd_page_walk_skip_read(iova, iova_next); @@ -1353,12 +1392,12 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, */ entry_valid = read_cur | write_cur; - if (!vtd_is_last_slpte(slpte, level) && entry_valid) { + if (!vtd_is_last_pte(slpte, level) && entry_valid) { /* * This is a valid PDE (or even bigger than PDE). We need * to walk one further level. */ - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw), + ret = vtd_page_walk_level(vtd_get_pte_addr(slpte, info->aw), iova, MIN(iova_next, end), level - 1, read_cur, write_cur, info); } else { @@ -1375,7 +1414,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); event.entry.addr_mask = ~subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); + event.entry.translated_addr = vtd_get_pte_addr(slpte, info->aw); event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ret = vtd_page_walk_one(&event, info); @@ -1409,11 +1448,11 @@ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ end = vtd_iova_limit(s, ce, info->aw, pasid); } @@ -1528,7 +1567,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Check if the programming of context-entry is valid */ if (!s->root_scalable && - !vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + !vtd_is_sl_level_supported(s, vtd_ce_get_level(ce))) { error_report_once("%s: invalid context entry: hi=%"PRIx64 ", lo=%"PRIx64" (level %d not supported)", __func__, ce->hi, ce->lo, @@ -1795,8 +1834,20 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_ROOT_ENTRY_RSVD] = false, [VTD_FR_PAGING_ENTRY_RSVD] = true, [VTD_FR_CONTEXT_ENTRY_TT] = true, - [VTD_FR_PASID_TABLE_INV] = false, + [VTD_FR_PASID_DIR_ACCESS_ERR] = false, + [VTD_FR_PASID_DIR_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ACCESS_ERR] = false, + [VTD_FR_PASID_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_P] = true, + [VTD_FR_FS_PAGING_ENTRY_RSVD] = true, + [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true, + [VTD_FR_FS_NON_CANONICAL] = true, + [VTD_FR_FS_PAGING_ENTRY_US] = true, + [VTD_FR_SM_WRITE] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, + [VTD_FR_FS_BIT_UPDATE_FAILED] = true, [VTD_FR_MAX] = false, }; @@ -1814,29 +1865,32 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } -static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, - gpointer user_data) +static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value, + gpointer user_data) { struct vtd_as_key *as_key = (struct vtd_as_key *)key; - uint16_t target_sid = *(uint16_t *)user_data; + struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data; uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); - return sid == target_sid; + + return (as_key->pasid == target->pasid) && (sid == target->sid); } -static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s, + uint16_t sid, + uint32_t pasid) { - uint8_t bus_num = PCI_BUS_NUM(sid); - VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; - - if (vtd_as && - (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { - return vtd_as; - } + struct vtd_as_raw_key key = { + .sid = sid, + .pasid = pasid + }; - vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); - s->vtd_as_cache[bus_num] = vtd_as; + return g_hash_table_find(s->vtd_address_spaces, + vtd_find_as_by_sid_and_pasid, &key); +} - return vtd_as; +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +{ + return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID); } static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) @@ -1858,6 +1912,157 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +/* + * Rsvd field masks for fpte: + * vtd_fpte_rsvd 4k pages + * vtd_fpte_rsvd_large large pages + * + * We support only 4-level page tables. + */ +#define VTD_FPTE_RSVD_LEN 5 +static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN]; +static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN]; + +static bool vtd_flpte_nonzero_rsvd(uint64_t flpte, uint32_t level) +{ + uint64_t rsvd_mask; + + /* + * We should have caught a guest-mis-programmed level earlier, + * via vtd_is_fl_level_supported. + */ + assert(level < VTD_FPTE_RSVD_LEN); + /* + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). + */ + assert(level); + + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (flpte & VTD_PT_PAGE_SIZE_MASK)) { + /* large page */ + rsvd_mask = vtd_fpte_rsvd_large[level]; + } else { + rsvd_mask = vtd_fpte_rsvd[level]; + } + + return flpte & rsvd_mask; +} + +static inline bool vtd_flpte_present(uint64_t flpte) +{ + return !!(flpte & VTD_FL_P); +} + +/* Return true if IOVA is canonical, otherwise false. */ +static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova, + VTDContextEntry *ce, uint32_t pasid) +{ + uint64_t iova_limit = vtd_iova_limit(s, ce, s->aw_bits, pasid); + uint64_t upper_bits_mask = ~(iova_limit - 1); + uint64_t upper_bits = iova & upper_bits_mask; + bool msb = ((iova & (iova_limit >> 1)) != 0); + + if (msb) { + return upper_bits == upper_bits_mask; + } else { + return !upper_bits; + } +} + +static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index, + uint64_t pte, uint64_t flag) +{ + if (pte & flag) { + return MEMTX_OK; + } + pte |= flag; + pte = cpu_to_le64(pte); + return dma_memory_write(&address_space_memory, + base_addr + index * sizeof(pte), + &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED); +} + +/* + * Given the @iova, get relevant @flptep. @flpte_level will be the last level + * of the translation, can be used for deciding the size of large page. + */ +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, + uint64_t iova, bool is_write, + uint64_t *flptep, uint32_t *flpte_level, + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) +{ + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); + uint32_t offset; + uint64_t flpte, flag_ad = VTD_FL_A; + + if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { + error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); + return -VTD_FR_FS_NON_CANONICAL; + } + + while (true) { + offset = vtd_iova_level_offset(iova, level); + flpte = vtd_get_pte(addr, offset); + + if (flpte == (uint64_t)-1) { + if (level == vtd_get_iova_level(s, ce, pasid)) { + /* Invalid programming of pasid-entry */ + return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; + } else { + return -VTD_FR_FS_PAGING_ENTRY_INV; + } + } + + if (!vtd_flpte_present(flpte)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_P; + } + + /* No emulated device supports supervisor privilege request yet */ + if (!(flpte & VTD_FL_US)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_US; + } + + *reads = true; + *writes = (*writes) && (flpte & VTD_FL_RW); + if (is_write && !(flpte & VTD_FL_RW)) { + return -VTD_FR_SM_WRITE; + } + if (vtd_flpte_nonzero_rsvd(flpte, level)) { + error_report_once("%s: detected flpte reserved non-zero " + "iova=0x%" PRIx64 ", level=0x%" PRIx32 + "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, flpte, pasid); + return -VTD_FR_FS_PAGING_ENTRY_RSVD; + } + + if (vtd_is_last_pte(flpte, level) && is_write) { + flag_ad |= VTD_FL_D; + } + + if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) { + return -VTD_FR_FS_BIT_UPDATE_FAILED; + } + + if (vtd_is_last_pte(flpte, level)) { + *flptep = flpte; + *flpte_level = level; + return 0; + } + + addr = vtd_get_pte_addr(flpte, aw_bits); + level--; + } +} + static void vtd_report_fault(IntelIOMMUState *s, int err, bool is_fpd_set, uint16_t source_id, @@ -1894,16 +2099,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, VTDContextEntry ce; uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; - uint64_t slpte, page_mask; + uint64_t pte, page_mask; uint32_t level, pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; bool writes = true; - uint8_t access_flags; + uint8_t access_flags, pgtt; bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; + uint64_t xlat, size; /* * We have standalone memory region for interrupt addresses, we @@ -1915,13 +2121,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + /* Try to fetch pte from IOTLB, we don't need RID2PASID logic */ if (!rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; @@ -1993,35 +2199,65 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } - /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + /* Try to fetch pte from IOTLB for RID2PASID slow path */ if (rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; } } - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, - &reads, &writes, s->aw_bits, pasid); + if (s->flts && s->root_scalable) { + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_FLT; + } else { + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_SLT; + } + if (!ret_fr) { + xlat = vtd_get_pte_addr(pte, s->aw_bits); + size = ~vtd_pt_level_page_mask(level) + 1; + + /* + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation + * requests that result in an address in the interrupt range will be + * blocked with condition code LGN.4 or SGN.8. + */ + if ((xlat <= VTD_INTERRUPT_ADDR_LAST && + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) { + error_report_once("%s: xlat address is in interrupt range " + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " + "pte=0x%" PRIx64 ", write=%d, " + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", + __func__, addr, level, pte, is_write, + xlat, size, pasid); + ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : + -VTD_FR_INTERRUPT_ADDR; + } + } + if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, addr, is_write, pasid != PCI_NO_PASID, pasid); goto error; } - page_mask = vtd_slpt_level_page_mask(level); + page_mask = vtd_pt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), - addr, slpte, access_flags, level, pasid); + addr, pte, access_flags, level, pasid, pgtt); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; + entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; @@ -2215,8 +2451,13 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) } } +/* + * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID + * is passed as parameter. Piotlb invalidation supports pasid, pasid in its + * descriptor is passed which should not be PCI_NO_PASID. + */ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, - uint16_t domain_id, hwaddr addr, + uint16_t domain_id, hwaddr addr, uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; @@ -2225,19 +2466,37 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { - if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { - continue; - } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = PCI_NO_PASID; + + if (s->root_scalable) { + rid2pasid = VTD_CE_GET_RID2PASID(&ce); + } + + /* + * In legacy mode, vtd_as->pasid == pasid is always true. + * In scalable mode, for vtd address space backing a PCI + * device without pasid, needs to compare pasid with + * rid2pasid of this device. + */ + if (!(vtd_as->pasid == pasid || + (vtd_as->pasid == PCI_NO_PASID && pasid == rid2pasid))) { + continue; + } + if (vtd_as_has_map_notifier(vtd_as)) { /* - * As long as we have MAP notifications registered in - * any of our IOMMU notifiers, we need to sync the - * shadow page table. + * When stage-1 translation is off, as long as we have MAP + * notifications registered in any of our IOMMU notifiers, + * we need to sync the shadow page table. Otherwise VFIO + * device attaches to nested page table instead of shadow + * page table, so no need to sync. */ - vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + if (!s->flts || !s->root_scalable) { + vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + } } else { /* * For UNMAP-only notifiers, we don't need to walk the @@ -2689,6 +2948,106 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) return true; } +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + + return ((entry->domain_id == info->domain_id) && + (entry->pasid == info->pasid)); +} + +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s, + uint16_t domain_id, uint32_t pasid) +{ + VTDIOTLBPageInvInfo info; + VTDAddressSpace *vtd_as; + VTDContextEntry ce; + + info.domain_id = domain_id; + info.pasid = pasid; + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid, + &info); + vtd_iommu_unlock(s); + + QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce) && + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce); + + if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) && + vtd_as->pasid != pasid) { + continue; + } + + if (!s->flts || !vtd_as_has_map_notifier(vtd_as)) { + vtd_address_space_sync(vtd_as); + } + } + } +} + +static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, + uint32_t pasid, hwaddr addr, uint8_t am) +{ + VTDIOTLBPageInvInfo info; + + info.domain_id = domain_id; + info.pasid = pasid; + info.addr = addr; + info.mask = ~((1 << am) - 1); + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, + vtd_hash_remove_by_page_piotlb, &info); + vtd_iommu_unlock(s); + + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid); +} + +static bool vtd_process_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t domain_id; + uint32_t pasid; + hwaddr addr; + uint8_t am; + uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0, + VTD_INV_DESC_PIOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "piotlb inv")) { + return false; + } + + domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]); + pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]); + switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) { + case VTD_INV_DESC_PIOTLB_ALL_IN_PASID: + vtd_piotlb_pasid_invalidate(s, domain_id, pasid); + break; + + case VTD_INV_DESC_PIOTLB_PSI_IN_PASID: + am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]); + addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]); + vtd_piotlb_page_invalidate(s, domain_id, pasid, addr, am); + break; + + default: + error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (type mismatch: 0x%llx)", + __func__, inv_desc->val[1], inv_desc->val[0], + inv_desc->val[0] & VTD_INV_DESC_IOTLB_G); + return false; + } + return true; +} + static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -2742,6 +3101,49 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } +static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t sid; + VTDAddressSpace *vtd_dev_as; + bool size; + bool global; + hwaddr addr; + uint32_t pasid; + uint64_t mask[4] = {VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0, + VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "device piotlb inv")) { + return false; + } + + global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi); + size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi); + addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi); + sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo); + if (global) { + QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) { + if ((vtd_dev_as->pasid != PCI_NO_PASID) && + (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus), + vtd_dev_as->devfn) == sid)) { + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + } + } else { + pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo); + vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid); + if (!vtd_dev_as) { + return true; + } + + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + + return true; +} + static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -2807,6 +3209,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_PIOTLB: + trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]); + if (!vtd_process_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_WAIT: trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo); if (!vtd_process_wait_desc(s, &inv_desc)) { @@ -2821,6 +3230,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_DEV_PIOTLB: + trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo); + if (!vtd_process_device_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_DEVICE: trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo); if (!vtd_process_device_iotlb_desc(s, &inv_desc)) { @@ -2834,7 +3250,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) * iommu driver) work, just return true is enough so far. */ case VTD_INV_DESC_PC: - case VTD_INV_DESC_PIOTLB: if (s->scalable_mode) { break; } @@ -3413,11 +3828,13 @@ static const Property vtd_properties[] = { VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, flts, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false), + DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true), }; /* Read IRTE entry with specific index */ @@ -3914,7 +4331,13 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, return false; } - return true; + if (!s->flts) { + /* All checks requested by VTD stage-2 translation pass */ + return true; + } + + error_setg(errp, "host device is uncompatible with stage-1 translation"); + return false; } static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, @@ -4137,7 +4560,12 @@ static void vtd_cap_init(IntelIOMMUState *s) } /* TODO: read cap/ecap from host to decide which cap to be exposed. */ - if (s->scalable_mode) { + if (s->flts) { + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS; + if (s->fs1gp) { + s->cap |= VTD_CAP_FS1GP; + } + } else if (s->scalable_mode) { s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; } @@ -4193,6 +4621,18 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, x86_iommu->dt_supported && s->stale_tm); + /* + * Rsvd field masks for fpte + */ + vtd_fpte_rsvd[0] = ~0ULL; + vtd_fpte_rsvd[1] = VTD_FPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[2] = VTD_FPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[3] = VTD_FPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[4] = VTD_FPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_fpte_rsvd_large[2] = VTD_FPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd_large[3] = VTD_FPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + if (s->scalable_mode || s->snoop_control) { vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; @@ -4304,14 +4744,26 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } - /* Currently only address widths supported are 39 and 48 bits */ - if ((s->aw_bits != VTD_HOST_AW_39BIT) && - (s->aw_bits != VTD_HOST_AW_48BIT)) { - error_setg(errp, "Supported values for aw-bits are: %d, %d", + if (!s->scalable_mode && s->flts) { + error_setg(errp, "x-flts is only available in scalable mode"); + return false; + } + + if (!s->flts && s->aw_bits != VTD_HOST_AW_39BIT && + s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, "%s: supported values for aw-bits are: %d, %d", + s->scalable_mode ? "Scalable mode(flts=off)" : "Legacy mode", VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); return false; } + if (s->flts && s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, + "Scalable mode(flts=on): supported value for aw-bits is: %d", + VTD_HOST_AW_48BIT); + return false; + } + if (s->scalable_mode && !s->dma_drain) { error_setg(errp, "Need to set dma_drain for scalable mode"); return false; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 4323fc5..e8b211e 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -195,6 +195,7 @@ #define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) +#define VTD_ECAP_FLTS (1ULL << 47) /* CAP_REG */ /* (offset >> 4) << 24 */ @@ -211,6 +212,7 @@ #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) #define VTD_CAP_DRAIN_WRITE (1ULL << 54) #define VTD_CAP_DRAIN_READ (1ULL << 55) +#define VTD_CAP_FS1GP (1ULL << 56) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) #define VTD_PASID_ID_SHIFT 20 @@ -311,10 +313,28 @@ typedef enum VTDFaultReason { * request while disabled */ VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ - VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */ + /* PASID directory entry access failure */ + VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, + /* The Present(P) field of pasid directory entry is 0 */ + VTD_FR_PASID_DIR_ENTRY_P = 0x51, + VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */ + /* The Present(P) field of pasid table entry is 0 */ + VTD_FR_PASID_ENTRY_P = 0x59, + VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */ + + /* Fail to access a first-level paging entry (not FS_PML4E) */ + VTD_FR_FS_PAGING_ENTRY_INV = 0x70, + VTD_FR_FS_PAGING_ENTRY_P = 0x71, + /* Non-zero reserved field in present first-stage paging entry */ + VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72, + VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */ + VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/ + VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */ + VTD_FR_SM_WRITE = 0x85, /* No write permission */ /* Output address in the interrupt address range for scalable mode */ VTD_FR_SM_INTERRUPT_ADDR = 0x87, + VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */ VTD_FR_MAX, /* Guard */ } VTDFaultReason; @@ -367,6 +387,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */ #define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */ +#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ /* Masks for Invalidation Wait Descriptor*/ @@ -397,11 +418,6 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL -#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -413,6 +429,16 @@ typedef union VTDInvDesc VTDInvDesc; /* Masks for Interrupt Entry Invalidate Descriptor */ #define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL +/* Masks for PASID based Device IOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \ + 0xfffffffffffff000ULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL + /* Rsvd field masks for spte */ #define VTD_SPTE_SNP 0x800ULL @@ -436,6 +462,34 @@ typedef union VTDInvDesc VTDInvDesc; (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +/* Rsvd field masks for fpte */ +#define VTD_FS_UPPER_IGNORED 0xfff0000000000000ULL +#define VTD_FPTE_PAGE_L1_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L2_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L3_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x80ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + +#define VTD_FPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1fe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3fffe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + +/* Masks for PIOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4) +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) +#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PIOTLB_AM(val) ((val) & 0x3fULL) +#define VTD_INV_DESC_PIOTLB_IH(val) (((val) >> 6) & 0x1) +#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL) +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; @@ -519,27 +573,38 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_SM_PASID_ENTRY_FLPM 3ULL +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) + +/* First Level Paging Structure */ +/* Masks for First Level Paging Entry */ +#define VTD_FL_P 1ULL +#define VTD_FL_RW (1ULL << 1) +#define VTD_FL_US (1ULL << 2) +#define VTD_FL_A (1ULL << 5) +#define VTD_FL_D (1ULL << 6) + /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) -/* Paging Structure common */ -#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) -/* Bits to decide the offset for each level */ -#define VTD_SL_LEVEL_BITS 9 - /* Second Level Paging Structure */ -#define VTD_SL_PML4_LEVEL 4 -#define VTD_SL_PDP_LEVEL 3 -#define VTD_SL_PD_LEVEL 2 -#define VTD_SL_PT_LEVEL 1 -#define VTD_SL_PT_ENTRY_NR 512 - /* Masks for Second Level Paging Entry */ #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #define VTD_SL_TM (1ULL << 62) +/* Common for both First Level and Second Level */ +#define VTD_PML4_LEVEL 4 +#define VTD_PDP_LEVEL 3 +#define VTD_PD_LEVEL 2 +#define VTD_PT_LEVEL 1 +#define VTD_PT_ENTRY_NR 512 +#define VTD_PT_PAGE_SIZE_MASK (1ULL << 7) +#define VTD_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) + +/* Bits to decide the offset for each level */ +#define VTD_LEVEL_BITS 9 + #endif diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 9334b03..b46975c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -86,6 +86,7 @@ GlobalProperty pc_compat_9_1[] = { { "ICH9-LPC", "x-smi-swsmi-timer", "off" }, { "ICH9-LPC", "x-smi-periodic-timer", "off" }, { TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" }, + { TYPE_INTEL_IOMMU_DEVICE, "aw-bits", "39" }, }; const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1); diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index bd76527..0ae1704 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -843,7 +843,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 6; + unsigned short msix_num = 10; int i, rc; uint16_t count; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 06f096a..85e14b7 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3337,6 +3337,117 @@ static const VMStateDescription vmstate_virtio_net_rss = { }, }; +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc; + struct vhost_net *net; + + if (!n->nic) { + return NULL; + } + + nc = qemu_get_queue(n->nic); + if (!nc) { + return NULL; + } + + net = get_vhost_net(nc->peer); + if (!net) { + return NULL; + } + + return &net->dev; +} + +static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, + JSONWriter *vmdesc) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_save_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error saving back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_load_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error loading back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static bool vhost_user_net_is_internal_migration(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + return false; + } + + return vhost_supports_device_state(vhdev); +} + +static const VMStateDescription vhost_user_net_backend_state = { + .name = "virtio-net-device/backend", + .version_id = 0, + .needed = vhost_user_net_is_internal_migration, + .fields = (const VMStateField[]) { + { + .name = "backend", + .info = &(const VMStateInfo) { + .name = "virtio-net vhost-user backend state", + .get = vhost_user_net_load_state, + .put = vhost_user_net_save_state, + }, + }, + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -3389,6 +3500,7 @@ static const VMStateDescription vmstate_virtio_net_device = { }, .subsections = (const VMStateDescription * const []) { &vmstate_virtio_net_rss, + &vhost_user_net_backend_state, NULL } }; @@ -3950,29 +4062,6 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } -static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc; - struct vhost_net *net; - - if (!n->nic) { - return NULL; - } - - nc = qemu_get_queue(n->nic); - if (!nc) { - return NULL; - } - - net = get_vhost_net(nc->peer); - if (!net) { - return NULL; - } - - return &net->dev; -} - static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, diff --git a/hw/pci/msix.c b/hw/pci/msix.c index d8a55a6..57ec708 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -250,7 +250,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, PCIDevice *dev = opaque; if (dev->msix_vector_poll_notifier) { unsigned vector_start = addr * 8; - unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); + unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr); dev->msix_vector_poll_notifier(dev, vector_start, vector_end); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 0b455c8..1b12db6 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1113,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { lnksta &= ~PCI_EXP_LNKSTA_NLW; lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; - } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); } if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { lnksta &= ~PCI_EXP_LNKSTA_CLS; lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; - } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); } } + if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); + } + + if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); + } + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 674f695..39619a2 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -23,6 +23,7 @@ #define ACPI_GHES_H #include "hw/acpi/bios-linker-loader.h" +#include "qapi/error.h" /* * Values for Hardware Error Notification Type field @@ -59,26 +60,29 @@ enum AcpiGhesNotifyType { enum { ACPI_HEST_SRC_ID_SEA = 0, /* future ids go here */ - ACPI_HEST_SRC_ID_RESERVED, + + ACPI_GHES_ERROR_SOURCE_COUNT }; typedef struct AcpiGhesState { - uint64_t ghes_addr_le; + uint64_t hw_error_le; bool present; /* True if GHES is present at all on this board */ } AcpiGhesState; -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); -int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr); +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp); /** * acpi_ghes_present: Report whether ACPI GHES table is present * * Returns: true if the system has an ACPI GHES table and it is - * safe to call acpi_ghes_record_errors() to record a memory error. + * safe to call acpi_ghes_memory_errors() to record a memory error. */ bool acpi_ghes_present(void); #endif diff --git a/include/hw/acpi/vmclock.h b/include/hw/acpi/vmclock.h new file mode 100644 index 0000000..5605605 --- /dev/null +++ b/include/hw/acpi/vmclock.h @@ -0,0 +1,34 @@ +#ifndef ACPI_VMCLOCK_H +#define ACPI_VMCLOCK_H + +#include "hw/acpi/bios-linker-loader.h" +#include "hw/qdev-core.h" +#include "qemu/uuid.h" +#include "qom/object.h" + +#define TYPE_VMCLOCK "vmclock" + +#define VMCLOCK_ADDR 0xfeffb000 +#define VMCLOCK_SIZE 0x1000 + +OBJECT_DECLARE_SIMPLE_TYPE(VmclockState, VMCLOCK) + +struct vmclock_abi; + +struct VmclockState { + DeviceState parent_obj; + MemoryRegion clk_page; + uint64_t physaddr; + struct vmclock_abi *clk; +}; + +/* returns NULL unless there is exactly one device */ +static inline Object *find_vmclock_dev(void) +{ + return object_resolve_path_type("", TYPE_VMCLOCK, NULL); +} + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id); + +#endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index d372cd3..e95477e 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -45,8 +45,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) #define DMAR_REG_SIZE 0x230 #define VTD_HOST_AW_39BIT 39 #define VTD_HOST_AW_48BIT 48 -#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT #define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) +#define VTD_MGAW_FROM_CAP(cap) ((cap >> 16) & 0x3fULL) #define DMAR_REPORT_F_INTR (1) @@ -152,9 +153,10 @@ struct VTDIOTLBEntry { uint64_t gfn; uint16_t domain_id; uint32_t pasid; - uint64_t slpte; + uint64_t pte; uint64_t mask; uint8_t access_flags; + uint8_t pgtt; }; /* VT-d Source-ID Qualifier types */ @@ -262,6 +264,7 @@ struct IntelIOMMUState { bool caching_mode; /* RO - is cap CM enabled? */ bool scalable_mode; /* RO - is Scalable Mode supported? */ + bool flts; /* RO - is stage-1 translation supported? */ bool snoop_control; /* RO - is SNP filed supported? */ dma_addr_t root; /* Current root table pointer */ @@ -305,6 +308,7 @@ struct IntelIOMMUState { bool dma_drain; /* Whether DMA r/w draining enabled */ bool dma_translation; /* Whether DMA translation supported */ bool pasid; /* Whether to support PASID */ + bool fs1gp; /* First Stage 1-GByte Page Support */ /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */ bool stale_tm; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 461c168..a9469d5 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -365,7 +365,14 @@ static inline int vhost_reset_device(struct vhost_dev *hdev) * Returns true if the device supports these commands, and false if it * does not. */ +#ifdef CONFIG_VHOST bool vhost_supports_device_state(struct vhost_dev *dev); +#else +static inline bool vhost_supports_device_state(struct vhost_dev *dev) +{ + return false; +} +#endif /** * vhost_set_device_state_fd(): Begin transfer of internal state from/to @@ -448,7 +455,15 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif /** * vhost_load_backend_state(): High-level function to load a vhost @@ -465,6 +480,14 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif #endif diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index bd93672..a42957c 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -98,6 +98,7 @@ enum virtio_gpu_base_conf_flags { VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, VIRTIO_GPU_FLAG_RUTABAGA_ENABLED, VIRTIO_GPU_FLAG_VENUS_ENABLED, + VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED, }; #define virtio_gpu_virgl_enabled(_cfg) \ @@ -114,6 +115,8 @@ enum virtio_gpu_base_conf_flags { (_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED)) #define virtio_gpu_rutabaga_enabled(_cfg) \ (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED)) +#define virtio_gpu_resource_uuid_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED)) #define virtio_gpu_hostmem_enabled(_cfg) \ (_cfg.hostmem > 0) #define virtio_gpu_venus_enabled(_cfg) \ diff --git a/include/standard-headers/linux/vmclock-abi.h b/include/standard-headers/linux/vmclock-abi.h new file mode 100644 index 0000000..15b0316 --- /dev/null +++ b/include/standard-headers/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include "standard-headers/linux/types.h" + +struct vmclock_abi { + /* CONSTANT FIELDS */ + uint32_t magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + uint32_t size; /* Size of region containing this structure */ + uint16_t version; /* 1 */ + uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + uint32_t seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + uint64_t disruption_marker; + uint64_t flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + uint8_t pad[2]; + uint8_t clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + uint16_t tai_offset_sec; /* Actually two's complement signed */ + uint8_t leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + uint8_t counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + uint64_t counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + uint64_t counter_period_frac_sec; + uint64_t counter_period_esterror_rate_frac_sec; + uint64_t counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + uint64_t time_sec; /* Seconds since time_type epoch */ + uint64_t time_frac_sec; /* Units of 1/2^64 of a second */ + uint64_t time_esterror_nanosec; + uint64_t time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 99a8d9f..8913e4f 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -258,6 +258,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ "$hdrdir/include/linux/kernel.h" \ "$hdrdir/include/linux/kvm_para.h" \ "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/vmclock-abi.h" \ "$hdrdir/include/linux/sysinfo.h"; do cp_portable "$i" "$output/include/standard-headers/linux" done diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a9444a2..da30bdb 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2387,7 +2387,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) */ if (code == BUS_MCEERR_AR) { kvm_cpu_synchronize_state(c); - if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { kvm_inject_arm_sea(c); } else { error_report("failed to record the error"); diff --git a/tests/data/acpi/x86/pc/DSDT b/tests/data/acpi/x86/pc/DSDT Binary files differindex 8b8235f..4beb519 100644 --- a/tests/data/acpi/x86/pc/DSDT +++ b/tests/data/acpi/x86/pc/DSDT diff --git a/tests/data/acpi/x86/pc/DSDT.acpierst b/tests/data/acpi/x86/pc/DSDT.acpierst Binary files differindex 06829b9..abda686 100644 --- a/tests/data/acpi/x86/pc/DSDT.acpierst +++ b/tests/data/acpi/x86/pc/DSDT.acpierst diff --git a/tests/data/acpi/x86/pc/DSDT.acpihmat b/tests/data/acpi/x86/pc/DSDT.acpihmat Binary files differindex 2fe355e..d081db2 100644 --- a/tests/data/acpi/x86/pc/DSDT.acpihmat +++ b/tests/data/acpi/x86/pc/DSDT.acpihmat diff --git a/tests/data/acpi/x86/pc/DSDT.bridge b/tests/data/acpi/x86/pc/DSDT.bridge Binary files differindex 4d4067c..e16897d 100644 --- a/tests/data/acpi/x86/pc/DSDT.bridge +++ b/tests/data/acpi/x86/pc/DSDT.bridge diff --git a/tests/data/acpi/x86/pc/DSDT.cphp b/tests/data/acpi/x86/pc/DSDT.cphp Binary files differindex 045a52e..e95711c 100644 --- a/tests/data/acpi/x86/pc/DSDT.cphp +++ b/tests/data/acpi/x86/pc/DSDT.cphp diff --git a/tests/data/acpi/x86/pc/DSDT.dimmpxm b/tests/data/acpi/x86/pc/DSDT.dimmpxm Binary files differindex 205219b..90ba66b 100644 --- a/tests/data/acpi/x86/pc/DSDT.dimmpxm +++ b/tests/data/acpi/x86/pc/DSDT.dimmpxm diff --git a/tests/data/acpi/x86/pc/DSDT.hpbridge b/tests/data/acpi/x86/pc/DSDT.hpbridge Binary files differindex 8fa8b51..0eafe5f 100644 --- a/tests/data/acpi/x86/pc/DSDT.hpbridge +++ b/tests/data/acpi/x86/pc/DSDT.hpbridge diff --git a/tests/data/acpi/x86/pc/DSDT.hpbrroot b/tests/data/acpi/x86/pc/DSDT.hpbrroot Binary files differindex 0171946..077a4cc 100644 --- a/tests/data/acpi/x86/pc/DSDT.hpbrroot +++ b/tests/data/acpi/x86/pc/DSDT.hpbrroot diff --git a/tests/data/acpi/x86/pc/DSDT.ipmikcs b/tests/data/acpi/x86/pc/DSDT.ipmikcs Binary files differindex 0ca6646..8d465f0 100644 --- a/tests/data/acpi/x86/pc/DSDT.ipmikcs +++ b/tests/data/acpi/x86/pc/DSDT.ipmikcs diff --git a/tests/data/acpi/x86/pc/DSDT.memhp b/tests/data/acpi/x86/pc/DSDT.memhp Binary files differindex 03ff464..e3b4975 100644 --- a/tests/data/acpi/x86/pc/DSDT.memhp +++ b/tests/data/acpi/x86/pc/DSDT.memhp diff --git a/tests/data/acpi/x86/pc/DSDT.nohpet b/tests/data/acpi/x86/pc/DSDT.nohpet Binary files differindex b081030..9e772c1 100644 --- a/tests/data/acpi/x86/pc/DSDT.nohpet +++ b/tests/data/acpi/x86/pc/DSDT.nohpet diff --git a/tests/data/acpi/x86/pc/DSDT.numamem b/tests/data/acpi/x86/pc/DSDT.numamem Binary files differindex 2c98caf..9bfbfc2 100644 --- a/tests/data/acpi/x86/pc/DSDT.numamem +++ b/tests/data/acpi/x86/pc/DSDT.numamem diff --git a/tests/data/acpi/x86/pc/DSDT.roothp b/tests/data/acpi/x86/pc/DSDT.roothp Binary files differindex da018dc..efbee6d 100644 --- a/tests/data/acpi/x86/pc/DSDT.roothp +++ b/tests/data/acpi/x86/pc/DSDT.roothp diff --git a/tests/data/acpi/x86/q35/DMAR.dmar b/tests/data/acpi/x86/q35/DMAR.dmar Binary files differindex 0dca6e6..0c05976 100644 --- a/tests/data/acpi/x86/q35/DMAR.dmar +++ b/tests/data/acpi/x86/q35/DMAR.dmar diff --git a/tests/data/acpi/x86/q35/DSDT b/tests/data/acpi/x86/q35/DSDT Binary files differindex fb89ae0..e5e8d1e 100644 --- a/tests/data/acpi/x86/q35/DSDT +++ b/tests/data/acpi/x86/q35/DSDT diff --git a/tests/data/acpi/x86/q35/DSDT.acpierst b/tests/data/acpi/x86/q35/DSDT.acpierst Binary files differindex 46fd254..072a3fe 100644 --- a/tests/data/acpi/x86/q35/DSDT.acpierst +++ b/tests/data/acpi/x86/q35/DSDT.acpierst diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat b/tests/data/acpi/x86/q35/DSDT.acpihmat Binary files differindex 61c5bd5..2a4f2fc 100644 --- a/tests/data/acpi/x86/q35/DSDT.acpihmat +++ b/tests/data/acpi/x86/q35/DSDT.acpihmat diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x b/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x Binary files differindex 497706c..7911c05 100644 --- a/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x +++ b/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator Binary files differindex 3aaa2bb..580b4a4 100644 --- a/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator +++ b/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator diff --git a/tests/data/acpi/x86/q35/DSDT.applesmc b/tests/data/acpi/x86/q35/DSDT.applesmc Binary files differindex 944209a..5e8220e 100644 --- a/tests/data/acpi/x86/q35/DSDT.applesmc +++ b/tests/data/acpi/x86/q35/DSDT.applesmc diff --git a/tests/data/acpi/x86/q35/DSDT.bridge b/tests/data/acpi/x86/q35/DSDT.bridge Binary files differindex d9938db..ee03945 100644 --- a/tests/data/acpi/x86/q35/DSDT.bridge +++ b/tests/data/acpi/x86/q35/DSDT.bridge diff --git a/tests/data/acpi/x86/q35/DSDT.core-count b/tests/data/acpi/x86/q35/DSDT.core-count Binary files differindex a24b04c..7ebfcee 100644 --- a/tests/data/acpi/x86/q35/DSDT.core-count +++ b/tests/data/acpi/x86/q35/DSDT.core-count diff --git a/tests/data/acpi/x86/q35/DSDT.core-count2 b/tests/data/acpi/x86/q35/DSDT.core-count2 Binary files differindex 3a0cb8c..d039455 100644 --- a/tests/data/acpi/x86/q35/DSDT.core-count2 +++ b/tests/data/acpi/x86/q35/DSDT.core-count2 diff --git a/tests/data/acpi/x86/q35/DSDT.cphp b/tests/data/acpi/x86/q35/DSDT.cphp Binary files differindex 20955d0..a055c2e 100644 --- a/tests/data/acpi/x86/q35/DSDT.cphp +++ b/tests/data/acpi/x86/q35/DSDT.cphp diff --git a/tests/data/acpi/x86/q35/DSDT.cxl b/tests/data/acpi/x86/q35/DSDT.cxl Binary files differindex 3c34d4d..2084354 100644 --- a/tests/data/acpi/x86/q35/DSDT.cxl +++ b/tests/data/acpi/x86/q35/DSDT.cxl diff --git a/tests/data/acpi/x86/q35/DSDT.dimmpxm b/tests/data/acpi/x86/q35/DSDT.dimmpxm Binary files differindex 228374b..664e926 100644 --- a/tests/data/acpi/x86/q35/DSDT.dimmpxm +++ b/tests/data/acpi/x86/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/x86/q35/DSDT.ipmibt b/tests/data/acpi/x86/q35/DSDT.ipmibt Binary files differindex 45f911a..4066a76 100644 --- a/tests/data/acpi/x86/q35/DSDT.ipmibt +++ b/tests/data/acpi/x86/q35/DSDT.ipmibt diff --git a/tests/data/acpi/x86/q35/DSDT.ipmismbus b/tests/data/acpi/x86/q35/DSDT.ipmismbus Binary files differindex e5d6811..6d0b6b9 100644 --- a/tests/data/acpi/x86/q35/DSDT.ipmismbus +++ b/tests/data/acpi/x86/q35/DSDT.ipmismbus diff --git a/tests/data/acpi/x86/q35/DSDT.ivrs b/tests/data/acpi/x86/q35/DSDT.ivrs Binary files differindex 46fd254..072a3fe 100644 --- a/tests/data/acpi/x86/q35/DSDT.ivrs +++ b/tests/data/acpi/x86/q35/DSDT.ivrs diff --git a/tests/data/acpi/x86/q35/DSDT.memhp b/tests/data/acpi/x86/q35/DSDT.memhp Binary files differindex 5ce0811..4f2f9bc 100644 --- a/tests/data/acpi/x86/q35/DSDT.memhp +++ b/tests/data/acpi/x86/q35/DSDT.memhp diff --git a/tests/data/acpi/x86/q35/DSDT.mmio64 b/tests/data/acpi/x86/q35/DSDT.mmio64 Binary files differindex bdf36c4..0fb6aab 100644 --- a/tests/data/acpi/x86/q35/DSDT.mmio64 +++ b/tests/data/acpi/x86/q35/DSDT.mmio64 diff --git a/tests/data/acpi/x86/q35/DSDT.multi-bridge b/tests/data/acpi/x86/q35/DSDT.multi-bridge Binary files differindex 1db43a6..f6afa6d 100644 --- a/tests/data/acpi/x86/q35/DSDT.multi-bridge +++ b/tests/data/acpi/x86/q35/DSDT.multi-bridge diff --git a/tests/data/acpi/x86/q35/DSDT.noacpihp b/tests/data/acpi/x86/q35/DSDT.noacpihp Binary files differindex 8bc1688..9f7261d 100644 --- a/tests/data/acpi/x86/q35/DSDT.noacpihp +++ b/tests/data/acpi/x86/q35/DSDT.noacpihp diff --git a/tests/data/acpi/x86/q35/DSDT.nohpet b/tests/data/acpi/x86/q35/DSDT.nohpet Binary files differindex c13e45e..99ad629 100644 --- a/tests/data/acpi/x86/q35/DSDT.nohpet +++ b/tests/data/acpi/x86/q35/DSDT.nohpet diff --git a/tests/data/acpi/x86/q35/DSDT.numamem b/tests/data/acpi/x86/q35/DSDT.numamem Binary files differindex ba66694..fd1d8a7 100644 --- a/tests/data/acpi/x86/q35/DSDT.numamem +++ b/tests/data/acpi/x86/q35/DSDT.numamem diff --git a/tests/data/acpi/x86/q35/DSDT.pvpanic-isa b/tests/data/acpi/x86/q35/DSDT.pvpanic-isa Binary files differindex 6ad4287..89032fa 100644 --- a/tests/data/acpi/x86/q35/DSDT.pvpanic-isa +++ b/tests/data/acpi/x86/q35/DSDT.pvpanic-isa diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count b/tests/data/acpi/x86/q35/DSDT.thread-count Binary files differindex a24b04c..7ebfcee 100644 --- a/tests/data/acpi/x86/q35/DSDT.thread-count +++ b/tests/data/acpi/x86/q35/DSDT.thread-count diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count2 b/tests/data/acpi/x86/q35/DSDT.thread-count2 Binary files differindex 3a0cb8c..d039455 100644 --- a/tests/data/acpi/x86/q35/DSDT.thread-count2 +++ b/tests/data/acpi/x86/q35/DSDT.thread-count2 diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm12 b/tests/data/acpi/x86/q35/DSDT.tis.tpm12 Binary files differindex e381ce4..f2ed40c 100644 --- a/tests/data/acpi/x86/q35/DSDT.tis.tpm12 +++ b/tests/data/acpi/x86/q35/DSDT.tis.tpm12 diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm2 b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 Binary files differindex a092530..5c975d2 100644 --- a/tests/data/acpi/x86/q35/DSDT.tis.tpm2 +++ b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 diff --git a/tests/data/acpi/x86/q35/DSDT.type4-count b/tests/data/acpi/x86/q35/DSDT.type4-count Binary files differindex edc2319..3194a82 100644 --- a/tests/data/acpi/x86/q35/DSDT.type4-count +++ b/tests/data/acpi/x86/q35/DSDT.type4-count diff --git a/tests/data/acpi/x86/q35/DSDT.viot b/tests/data/acpi/x86/q35/DSDT.viot Binary files differindex 4c93dfd..129d43e 100644 --- a/tests/data/acpi/x86/q35/DSDT.viot +++ b/tests/data/acpi/x86/q35/DSDT.viot diff --git a/tests/data/acpi/x86/q35/DSDT.xapic b/tests/data/acpi/x86/q35/DSDT.xapic Binary files differindex d4acd85..b37ab59 100644 --- a/tests/data/acpi/x86/q35/DSDT.xapic +++ b/tests/data/acpi/x86/q35/DSDT.xapic diff --git a/tests/qtest/intel-iommu-test.c b/tests/qtest/intel-iommu-test.c new file mode 100644 index 0000000..c521b37 --- /dev/null +++ b/tests/qtest/intel-iommu-test.c @@ -0,0 +1,64 @@ +/* + * QTest testcase for intel-iommu + * + * Copyright (c) 2024 Intel, Inc. + * + * Author: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "hw/i386/intel_iommu_internal.h" + +#define CAP_STAGE_1_FIXED1 (VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | \ + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS) +#define ECAP_STAGE_1_FIXED1 (VTD_ECAP_QI | VTD_ECAP_IR | VTD_ECAP_IRO | \ + VTD_ECAP_MHMV | VTD_ECAP_SMTS | VTD_ECAP_FLTS) + +static inline uint64_t vtd_reg_readq(QTestState *s, uint64_t offset) +{ + return qtest_readq(s, Q35_HOST_BRIDGE_IOMMU_ADDR + offset); +} + +static void test_intel_iommu_stage_1(void) +{ + uint8_t init_csr[DMAR_REG_SIZE]; /* register values */ + uint8_t post_reset_csr[DMAR_REG_SIZE]; /* register values */ + uint64_t cap, ecap, tmp; + QTestState *s; + + s = qtest_init("-M q35 -device intel-iommu,x-scalable-mode=on,x-flts=on"); + + cap = vtd_reg_readq(s, DMAR_CAP_REG); + g_assert((cap & CAP_STAGE_1_FIXED1) == CAP_STAGE_1_FIXED1); + + tmp = cap & VTD_CAP_SAGAW_MASK; + g_assert(tmp == (VTD_CAP_SAGAW_39bit | VTD_CAP_SAGAW_48bit)); + + tmp = VTD_MGAW_FROM_CAP(cap); + g_assert(tmp == VTD_HOST_AW_48BIT - 1); + + ecap = vtd_reg_readq(s, DMAR_ECAP_REG); + g_assert((ecap & ECAP_STAGE_1_FIXED1) == ECAP_STAGE_1_FIXED1); + + qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, init_csr, DMAR_REG_SIZE); + + qobject_unref(qtest_qmp(s, "{ 'execute': 'system_reset' }")); + qtest_qmp_eventwait(s, "RESET"); + + qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, post_reset_csr, DMAR_REG_SIZE); + /* Ensure registers are consistent after hard reset */ + g_assert(!memcmp(init_csr, post_reset_csr, DMAR_REG_SIZE)); + + qtest_quit(s); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + qtest_add_func("/q35/intel-iommu/stage-1", test_intel_iommu_stage_1); + + return g_test_run(); +} diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index bf4d5c2..edd53ec 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -93,6 +93,7 @@ qtests_i386 = \ (config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) + \ (config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) + \ (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ + (config_all_devices.has_key('CONFIG_VTD') ? ['intel-iommu-test'] : []) + \ (host_os != 'windows' and \ config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) + \ (config_all_devices.has_key('CONFIG_PCIE_PORT') and \ |