diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2022-11-07 18:43:56 -0500 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2022-11-07 18:43:56 -0500 |
commit | f21f1cfeb94b94ce044726856c291bed9391e3a4 (patch) | |
tree | 051f38512751eceb808446cfb01fe22ce2f3b259 /hw/i386 | |
parent | 524fc737431d240f9d9f10aaf381003092868bac (diff) | |
parent | 1ef47f40dce3d5b176ddf76d57b5bfa2efb0b3c6 (diff) | |
download | qemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.zip qemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.tar.gz qemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.tar.bz2 |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
pci,pc,virtio: features, tests, fixes, cleanups
lots of acpi rework
first version of biosbits infrastructure
ASID support in vhost-vdpa
core_count2 support in smbios
PCIe DOE emulation
virtio vq reset
HMAT support
part of infrastructure for viommu support in vhost-vdpa
VTD PASID support
fixes, tests all over the place
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmNpXDkPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpD0AH/2G8ZPrgrxJC9y3uD5/5J6QRzO+TsDYbg5ut
# uBf4rKSHHzcu6zdyAfsrhbAKKzyD4HrEGNXZrBjnKM1xCiB/SGBcDIWntwrca2+s
# 5Dpbi4xvd4tg6tVD4b47XNDCcn2uUbeI0e2M5QIbtCmzdi/xKbFAfl5G8DQp431X
# Kmz79G4CdKWyjVlM0HoYmdCw/4FxkdjD02tE/Uc5YMrePNaEg5Bw4hjCHbx1b6ur
# 6gjeXAtncm9s4sO0l+sIdyiqlxiTry9FSr35WaQ0qPU+Og5zaf1EiWfdl8TRo4qU
# EAATw5A4hyw11GfOGp7oOVkTGvcNB/H7aIxD7emdWZV8+BMRPKo=
# =zTCn
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 07 Nov 2022 14:27:53 EST
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (83 commits)
checkpatch: better pattern for inline comments
hw/virtio: introduce virtio_device_should_start
tests/acpi: update tables for new core count test
bios-tables-test: add test for number of cores > 255
tests/acpi: allow changes for core_count2 test
bios-tables-test: teach test to use smbios 3.0 tables
hw/smbios: add core_count2 to smbios table type 4
vhost-user: Support vhost_dev_start
vhost: Change the sequence of device start
intel-iommu: PASID support
intel-iommu: convert VTD_PE_GET_FPD_ERR() to be a function
intel-iommu: drop VTDBus
intel-iommu: don't warn guest errors when getting rid2pasid entry
vfio: move implement of vfio_get_xlat_addr() to memory.c
tests: virt: Update expected *.acpihmatvirt tables
tests: acpi: aarch64/virt: add a test for hmat nodes with no initiators
hw/arm/virt: Enable HMAT on arm virt machine
tests: Add HMAT AArch64/virt empty table files
tests: acpi: q35: update expected blobs *.hmat-noinitiators expected HMAT:
tests: acpi: q35: add test for hmat nodes without initiators
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/i386')
-rw-r--r-- | hw/i386/acpi-build.c | 203 | ||||
-rw-r--r-- | hw/i386/e820_memory_layout.c | 20 | ||||
-rw-r--r-- | hw/i386/e820_memory_layout.h | 8 | ||||
-rw-r--r-- | hw/i386/fw_cfg.c | 3 | ||||
-rw-r--r-- | hw/i386/fw_cfg.h | 1 | ||||
-rw-r--r-- | hw/i386/intel_iommu.c | 694 | ||||
-rw-r--r-- | hw/i386/intel_iommu_internal.h | 16 | ||||
-rw-r--r-- | hw/i386/microvm.c | 2 | ||||
-rw-r--r-- | hw/i386/pc.c | 2 | ||||
-rw-r--r-- | hw/i386/trace-events | 2 |
10 files changed, 514 insertions, 437 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 4f54b61..d9eaa5f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -60,6 +60,7 @@ #include "hw/i386/fw_cfg.h" #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" +#include "hw/pci-host/i440fx.h" #include "hw/pci-host/q35.h" #include "hw/i386/x86-iommu.h" @@ -112,7 +113,6 @@ typedef struct AcpiPmInfo { } AcpiPmInfo; typedef struct AcpiMiscInfo { - bool is_piix4; bool has_hpet; #ifdef CONFIG_TPM TPMVersion tpm_version; @@ -121,13 +121,6 @@ typedef struct AcpiMiscInfo { unsigned dsdt_size; } AcpiMiscInfo; -typedef struct AcpiBuildPciBusHotplugState { - GArray *device_table; - GArray *notify_table; - struct AcpiBuildPciBusHotplugState *parent; - bool pcihp_bridge_en; -} AcpiBuildPciBusHotplugState; - typedef struct FwCfgTPMConfig { uint32_t tpmppi_address; uint8_t tpm_version; @@ -288,17 +281,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) static void acpi_get_misc_info(AcpiMiscInfo *info) { - Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); - Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); - assert(!!piix != !!lpc); - - if (piix) { - info->is_piix4 = true; - } - if (lpc) { - info->is_piix4 = false; - } - info->has_hpet = hpet_find(); #ifdef CONFIG_TPM info->tpm_version = tpm_get_version(tpm_find()); @@ -430,18 +412,11 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, bool hotpluggbale_slot = false; bool bridge_in_acpi = false; bool cold_plugged_bridge = false; - bool is_vga = false; if (pdev) { pc = PCI_DEVICE_GET_CLASS(pdev); dc = DEVICE_GET_CLASS(pdev); - if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { - continue; - } - - is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA; - /* * Cold plugged bridges aren't themselves hot-pluggable. * Hotplugged bridges *are* hot-pluggable. @@ -455,9 +430,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, /* * allow describing coldplugged bridges in ACPI even if they are not * on function 0, as they are not unpluggable, for all other devices - * generate description only for function 0 per slot + * generate description only for function 0 per slot, and for other + * functions if device on function provides its own AML */ - if (func && !bridge_in_acpi) { + if (func && !bridge_in_acpi && !get_dev_aml_func(DEVICE(pdev))) { continue; } } else { @@ -489,28 +465,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(dev, aml_pci_device_dsm()); } - if (is_vga) { - /* add VGA specific AML methods */ - int s3d; - - if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) { - s3d = 3; - } else { - s3d = 0; - } - - method = aml_method("_S1D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S2D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S3D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(s3d))); - aml_append(dev, method); - } + call_dev_aml_func(DEVICE(pdev), dev); bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; if (bridge_in_acpi) { @@ -1030,7 +985,6 @@ static void build_piix4_pci0_int(Aml *table) { Aml *dev; Aml *crs; - Aml *field; Aml *method; uint32_t irqs; Aml *sb_scope = aml_scope("_SB"); @@ -1039,13 +993,6 @@ static void build_piix4_pci0_int(Aml *table) aml_append(pci0_scope, build_prt(true)); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQ0", 8)); - aml_append(field, aml_named_field("PRQ1", 8)); - aml_append(field, aml_named_field("PRQ2", 8)); - aml_append(field, aml_named_field("PRQ3", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(true)); @@ -1149,7 +1096,6 @@ static Aml *build_q35_routing_table(const char *str) static void build_q35_pci0_int(Aml *table) { - Aml *field; Aml *method; Aml *sb_scope = aml_scope("_SB"); Aml *pci0_scope = aml_scope("PCI0"); @@ -1186,18 +1132,6 @@ static void build_q35_pci0_int(Aml *table) aml_append(pci0_scope, method); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQA", 8)); - aml_append(field, aml_named_field("PRQB", 8)); - aml_append(field, aml_named_field("PRQC", 8)); - aml_append(field, aml_named_field("PRQD", 8)); - aml_append(field, aml_reserved_field(0x20)); - aml_append(field, aml_named_field("PRQE", 8)); - aml_append(field, aml_named_field("PRQF", 8)); - aml_append(field, aml_named_field("PRQG", 8)); - aml_append(field, aml_named_field("PRQH", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(false)); @@ -1262,54 +1196,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg) return dev; } -static void build_q35_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_q35_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_piix4_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_PIIX3_PCI_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) { Aml *scope; @@ -1416,25 +1302,6 @@ static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) return method; } -static void build_smb0(Aml *table, int devnr, int func) -{ - Aml *scope = aml_scope("_SB.PCI0"); - Aml *dev = aml_device("SMB0"); - bool ambiguous; - Object *obj; - /* - * temporarily fish out device hosting SMBUS, build_smb0 will be gone once - * PCI enumeration will be switched to call_dev_aml_func() - */ - obj = object_resolve_path_type("", TYPE_ICH9_SMB_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func))); - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_acpi0017(Aml *table) { Aml *dev, *scope, *method; @@ -1456,6 +1323,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, Range *pci_hole, Range *pci_hole64, MachineState *machine) { + Object *i440fx = object_resolve_type_unambiguous(TYPE_I440FX_PCI_HOST_BRIDGE); + Object *q35 = object_resolve_type_unambiguous(TYPE_Q35_HOST_DEVICE); CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; CrsRangeSet crs_range_set; @@ -1476,11 +1345,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; + assert(!!i440fx != !!q35); + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); build_dbg_aml(dsdt); - if (misc->is_piix4) { + if (i440fx) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); @@ -1489,12 +1360,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); - build_piix4_isa_bridge(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); - } else { + } else if (q35) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); @@ -1534,14 +1404,10 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); - build_q35_isa_bridge(dsdt); if (pm->pcihp_bridge_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_q35_pci0_int(dsdt); - if (pcms->smbus) { - build_smb0(dsdt, ICH9_SMB_DEV, ICH9_SMB_FUNC); - } } if (misc->has_hpet) { @@ -1554,6 +1420,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); } + scope = aml_scope("_GPE"); + { + aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); + if (machine->nvdimms_state->is_enabled) { + method = aml_method("_E04", 0, AML_NOTSERIALIZED); + aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), + aml_int(0x80))); + aml_append(scope, method); + } + } + aml_append(dsdt, scope); + if (pcmc->legacy_cpu_hotplug) { build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base); } else { @@ -1572,28 +1450,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, pcms->memhp_io_base); } - scope = aml_scope("_GPE"); - { - aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - - if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - method = aml_method("_E01", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); - aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); - aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); - aml_append(scope, method); - } - - if (machine->nvdimms_state->is_enabled) { - method = aml_method("_E04", 0, AML_NOTSERIALIZED); - aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), - aml_int(0x80))); - aml_append(scope, method); - } - } - aml_append(dsdt, scope); - crs_range_set_init(&crs_range_set); bus = PC_MACHINE(machine)->bus; if (bus) { @@ -1872,6 +1728,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } aml_append(dsdt, sb_scope); + if (pm->pcihp_bridge_en || pm->pcihp_root_en) { + scope = aml_scope("_GPE"); + { + method = aml_method("_E01", 0, AML_NOTSERIALIZED); + aml_append(method, + aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); + aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); + aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); + aml_append(scope, method); + } + aml_append(dsdt, scope); + } + /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); acpi_table_end(linker, &table); diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c index bcf9eaf..06970ac 100644 --- a/hw/i386/e820_memory_layout.c +++ b/hw/i386/e820_memory_layout.c @@ -11,29 +11,11 @@ #include "e820_memory_layout.h" static size_t e820_entries; -struct e820_table e820_reserve; struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) { - int index = le32_to_cpu(e820_reserve.count); - struct e820_entry *entry; - - if (type != E820_RAM) { - /* old FW_CFG_E820_TABLE entry -- reservations only */ - if (index >= E820_NR_ENTRIES) { - return -EBUSY; - } - entry = &e820_reserve.entry[index++]; - - entry->address = cpu_to_le64(address); - entry->length = cpu_to_le64(length); - entry->type = cpu_to_le32(type); - - e820_reserve.count = cpu_to_le32(index); - } - - /* new "etc/e820" file -- include ram too */ + /* new "etc/e820" file -- include ram and reserved entries */ e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); e820_table[e820_entries].address = cpu_to_le64(address); e820_table[e820_entries].length = cpu_to_le64(length); diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h index 04f9378..7c239aa 100644 --- a/hw/i386/e820_memory_layout.h +++ b/hw/i386/e820_memory_layout.h @@ -16,20 +16,12 @@ #define E820_NVS 4 #define E820_UNUSABLE 5 -#define E820_NR_ENTRIES 16 - struct e820_entry { uint64_t address; uint64_t length; uint32_t type; } QEMU_PACKED __attribute((__aligned__(4))); -struct e820_table { - uint32_t count; - struct e820_entry entry[E820_NR_ENTRIES]; -} QEMU_PACKED __attribute((__aligned__(4))); - -extern struct e820_table e820_reserve; extern struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type); diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index a283785..72a42f3 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -36,7 +36,6 @@ const char *fw_cfg_arch_key_name(uint16_t key) {FW_CFG_ACPI_TABLES, "acpi_tables"}, {FW_CFG_SMBIOS_ENTRIES, "smbios_entries"}, {FW_CFG_IRQ0_OVERRIDE, "irq0_override"}, - {FW_CFG_E820_TABLE, "e820_table"}, {FW_CFG_HPET, "hpet"}, }; @@ -127,8 +126,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, #endif fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 275f15c..86ca7c1 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -17,7 +17,6 @@ #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) -#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) FWCfgState *fw_cfg_arch_create(MachineState *ms, diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6524c2e..a08ee85 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -49,17 +49,24 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) -#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\ - if (ret_fr) { \ - ret_fr = -ret_fr; \ - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \ - trace_vtd_fault_disabled(); \ - } else { \ - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \ - } \ - goto error; \ - } \ -} + +/* + * PCI bus number (or SID) is not reliable since the device is usaully + * initalized before guest can configure the PCI bridge + * (SECONDARY_BUS_NUMBER). + */ +struct vtd_as_key { + PCIBus *bus; + uint8_t devfn; + uint32_t pasid; +}; + +struct vtd_iotlb_key { + uint64_t gfn; + uint32_t pasid; + uint32_t level; + uint16_t sid; +}; static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); @@ -200,14 +207,46 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as) } /* GHashTable functions */ -static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) +static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2) { - return *((const uint64_t *)v1) == *((const uint64_t *)v2); + const struct vtd_iotlb_key *key1 = v1; + const struct vtd_iotlb_key *key2 = v2; + + return key1->sid == key2->sid && + key1->pasid == key2->pasid && + key1->level == key2->level && + key1->gfn == key2->gfn; +} + +static guint vtd_iotlb_hash(gconstpointer v) +{ + const struct vtd_iotlb_key *key = v; + + return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) | + (key->level) << VTD_IOTLB_LVL_SHIFT | + (key->pasid) << VTD_IOTLB_PASID_SHIFT; } -static guint vtd_uint64_hash(gconstpointer v) +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_as_key *key1 = v1; + const struct vtd_as_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) && + (key1->pasid == key2->pasid); +} + +/* + * Note that we use pointer to PCIBus as the key, so hashing/shifting + * based on the pointer value is intended. Note that we deal with + * collisions through vtd_as_equal(). + */ +static guint vtd_as_hash(gconstpointer v) { - return (guint)*(const uint64_t *)v; + const struct vtd_as_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); } static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, @@ -248,22 +287,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, static void vtd_reset_context_cache_locked(IntelIOMMUState *s) { VTDAddressSpace *vtd_as; - VTDBus *vtd_bus; - GHashTableIter bus_it; - uint32_t devfn_it; + GHashTableIter as_it; trace_vtd_context_cache_reset(); - g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); - while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (!vtd_as) { - continue; - } - vtd_as->context_cache_entry.context_cache_gen = 0; - } + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + vtd_as->context_cache_entry.context_cache_gen = 0; } s->context_cache_gen = 1; } @@ -290,13 +321,6 @@ static void vtd_reset_caches(IntelIOMMUState *s) vtd_iommu_unlock(s); } -static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id, - uint32_t level) -{ - return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | - ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); -} - static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; @@ -304,15 +328,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) /* Must be called with IOMMU lock held */ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr) + uint32_t pasid, hwaddr addr) { + struct vtd_iotlb_key key; VTDIOTLBEntry *entry; - uint64_t key; int level; for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { - key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), - source_id, level); + key.gfn = vtd_get_iotlb_gfn(addr, level); + key.level = level; + key.sid = source_id; + key.pasid = pasid; entry = g_hash_table_lookup(s->iotlb, &key); if (entry) { goto out; @@ -326,10 +352,11 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, uint16_t domain_id, hwaddr addr, uint64_t slpte, - uint8_t access_flags, uint32_t level) + uint8_t access_flags, uint32_t level, + uint32_t pasid) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); - uint64_t *key = g_malloc(sizeof(*key)); + struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); @@ -343,7 +370,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->slpte = slpte; entry->access_flags = access_flags; entry->mask = vtd_slpt_level_page_mask(level); - *key = vtd_get_iotlb_key(gfn, source_id, level); + entry->pasid = pasid; + + key->gfn = gfn; + key->sid = source_id; + key->level = level; + key->pasid = pasid; + g_hash_table_replace(s->iotlb, key, entry); } @@ -436,7 +469,8 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) /* Must not update F field now, should be done later */ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, uint16_t source_id, hwaddr addr, - VTDFaultReason fault, bool is_write) + VTDFaultReason fault, bool is_write, + bool is_pasid, uint32_t pasid) { uint64_t hi = 0, lo; hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); @@ -444,7 +478,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, assert(index < DMAR_FRCD_REG_NR); lo = VTD_FRCD_FI(addr); - hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) | + VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid); if (!is_write) { hi |= VTD_FRCD_T; } @@ -475,7 +510,8 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) /* Log and report an DMAR (address translation) fault to software */ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, hwaddr addr, VTDFaultReason fault, - bool is_write) + bool is_write, bool is_pasid, + uint32_t pasid) { uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); @@ -502,7 +538,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, return; } - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); + vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, + is_write, is_pasid, pasid); if (fsts_reg & VTD_FSTS_PPF) { error_report_once("There are pending faults already, " @@ -807,13 +844,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce, - VTDPASIDEntry *pe) + VTDPASIDEntry *pe, + uint32_t pasid) { - uint32_t pasid; dma_addr_t pasid_dir_base; int ret = 0; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe); @@ -822,15 +861,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, VTDContextEntry *ce, - bool *pe_fpd_set) + bool *pe_fpd_set, + uint32_t pasid) { int ret; - uint32_t pasid; dma_addr_t pasid_dir_base; VTDPASIDDirEntry pdire; VTDPASIDEntry pe; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); /* @@ -876,12 +917,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) } static uint32_t vtd_get_iova_level(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_PE_GET_LEVEL(&pe); } @@ -894,12 +936,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) } static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9; } @@ -941,31 +984,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, } static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, - VTDContextEntry *ce, uint8_t aw) + VTDContextEntry *ce, uint8_t aw, + uint32_t pasid) { - uint32_t ce_agaw = vtd_get_iova_agaw(s, ce); + uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid); return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ static inline bool vtd_iova_range_check(IntelIOMMUState *s, uint64_t iova, VTDContextEntry *ce, - uint8_t aw) + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1)); + return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1)); } static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; } @@ -993,50 +1038,25 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) return slpte & rsvd_mask; } -/* Find the VTD address space associated with a given bus number */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - GHashTableIter iter; - - if (vtd_bus) { - return vtd_bus; - } - - /* - * Iterate over the registered buses to find the one which - * currently holds this bus number and update the bus_num - * lookup table. - */ - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - - return NULL; -} - /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes, uint8_t aw_bits) + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t slpte; uint64_t access_right_check; uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { - error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", - __func__, iova); + if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -1049,8 +1069,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " - "(iova=0x%" PRIx64 ")", __func__, iova); - if (level == vtd_get_iova_level(s, ce)) { + "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")", + __func__, iova, pasid); + if (level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -1062,15 +1083,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (!(slpte & access_right_check)) { error_report_once("%s: detected slpte permission error " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d)", __func__, - iova, level, slpte, is_write); + "slpte=0x%" PRIx64 ", write=%d, pasid=0x%" + PRIx32 ")", __func__, iova, level, + slpte, is_write, pasid); return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; } if (vtd_slpte_nonzero_rsvd(slpte, level)) { error_report_once("%s: detected splte reserve non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 - "slpte=0x%" PRIx64 ")", __func__, iova, - level, slpte); + "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, slpte, pasid); return -VTD_FR_PAGING_ENTRY_RSVD; } @@ -1098,9 +1120,10 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, error_report_once("%s: xlat address is in interrupt range " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")", + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", __func__, iova, level, slpte, is_write, - xlat, size); + xlat, size, pasid); return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : -VTD_FR_INTERRUPT_ADDR; } @@ -1314,18 +1337,19 @@ next: */ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t start, uint64_t end, - vtd_page_walk_info *info) + vtd_page_walk_info *info, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw)) { + if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw)) { + if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(s, ce, info->aw); + end = vtd_iova_limit(s, ce, info->aw, pasid); } return vtd_page_walk_level(addr, start, end, level, true, true, info); @@ -1393,7 +1417,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s, * has valid rid2pasid setting, which includes valid * rid2pasid field and corresponding pasid entry setting */ - return vtd_ce_get_rid2pasid_entry(s, ce, &pe); + return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID); } /* Map a device to its corresponding domain (context-entry) */ @@ -1476,12 +1500,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, } static uint16_t vtd_get_domain_id(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_SM_PASID_ENTRY_DID(pe.val[1]); } @@ -1499,10 +1524,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as, .notify_unmap = true, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, ce), + .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid), }; - return vtd_page_walk(s, ce, addr, addr + size, &info); + return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid); } static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) @@ -1546,16 +1571,19 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) * 1st-level translation or 2nd-level translation, it depends * on PGTT setting. */ -static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce) +static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; int ret; if (s->root_scalable) { - ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe); + ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); if (ret) { - error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32, - __func__, ret); + /* + * This error is guest triggerable. We should assumt PT + * not enabled for safety. + */ return false; } return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT); @@ -1569,14 +1597,12 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) { IntelIOMMUState *s; VTDContextEntry ce; - int ret; assert(as); s = as->iommu_state; - ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), - as->devfn, &ce); - if (ret) { + if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn, + &ce)) { /* * Possibly failed to parse the context entry for some reason * (e.g., during init, or any guest configuration errors on @@ -1586,19 +1612,20 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) return false; } - return vtd_dev_pt_enabled(s, &ce); + return vtd_dev_pt_enabled(s, &ce, as->pasid); } /* Return whether the device is using IOMMU translation. */ static bool vtd_switch_address_space(VTDAddressSpace *as) { - bool use_iommu; + bool use_iommu, pt; /* Whether we need to take the BQL on our own */ bool take_bql = !qemu_mutex_iothread_locked(); assert(as); use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as); + pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as); trace_vtd_switch_address_space(pci_bus_num(as->bus), VTD_PCI_SLOT(as->devfn), @@ -1618,11 +1645,53 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) if (use_iommu) { memory_region_set_enabled(&as->nodmar, false); memory_region_set_enabled(MEMORY_REGION(&as->iommu), true); + /* + * vt-d spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx + * are translated normally like any other request-with-PASID + * through DMA-remapping hardware. + * """ + * + * Need to disable ir for as with PASID. + */ + if (as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir, false); + } else { + memory_region_set_enabled(&as->iommu_ir, true); + } } else { memory_region_set_enabled(MEMORY_REGION(&as->iommu), false); memory_region_set_enabled(&as->nodmar, true); } + /* + * vtd-spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx are + * translated normally like any other request-with-PASID through + * DMA-remapping hardware. However, if such a request is processed + * using pass-through translation, it will be blocked as described + * in the paragraph below. + * + * Software must not program paging-structure entries to remap any + * address to the interrupt address range. Untranslated requests + * and translation requests that result in an address in the + * interrupt range will be blocked with condition code LGN.4 or + * SGN.8. + * """ + * + * We enable per as memory region (iommu_ir_fault) for catching + * the tranlsation for interrupt range through PASID + PT. + */ + if (pt && as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir_fault, true); + } else { + memory_region_set_enabled(&as->iommu_ir_fault, false); + } + if (take_bql) { qemu_mutex_unlock_iothread(); } @@ -1632,24 +1701,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) static void vtd_switch_address_space_all(IntelIOMMUState *s) { + VTDAddressSpace *vtd_as; GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) -{ - return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); + g_hash_table_iter_init(&iter, s->vtd_address_spaces); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) { + vtd_switch_address_space(vtd_as); + } } static const bool vtd_qualified_faults[] = { @@ -1686,18 +1744,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, + gpointer user_data) +{ + struct vtd_as_key *as_key = (struct vtd_as_key *)key; + uint16_t target_sid = *(uint16_t *)user_data; + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); + return sid == target_sid; +} + +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +{ + uint8_t bus_num = PCI_BUS_NUM(sid); + VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; + + if (vtd_as && + (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { + return vtd_as; + } + + vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); + s->vtd_as_cache[bus_num] = vtd_as; + + return vtd_as; +} + static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) { - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; bool success = false; - vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); - if (!vtd_bus) { - goto out; - } - - vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + vtd_as = vtd_get_as_by_sid(s, source_id); if (!vtd_as) { goto out; } @@ -1711,6 +1788,22 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +static void vtd_report_fault(IntelIOMMUState *s, + int err, bool is_fpd_set, + uint16_t source_id, + hwaddr addr, + bool is_write, + bool is_pasid, + uint32_t pasid) +{ + if (is_fpd_set && vtd_is_qualified_fault(err)) { + trace_vtd_fault_disabled(); + } else { + vtd_report_dmar_fault(s, source_id, addr, err, is_write, + is_pasid, pasid); + } +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -1732,13 +1825,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t slpte, page_mask; - uint32_t level; - uint16_t source_id = vtd_make_source_id(bus_num, devfn); + uint32_t level, pasid = vtd_as->pasid; + uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; bool writes = true; uint8_t access_flags; + bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; /* @@ -1751,15 +1845,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB */ - iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); - if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, - iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; - access_flags = iotlb_entry->access_flags; - page_mask = iotlb_entry->mask; - goto out; + /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + if (!rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } } /* Try to fetch context-entry from cache first */ @@ -1770,16 +1866,26 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ce = cc_entry->context_entry; is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write, + false, 0); + goto error; + } } } else { ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!ret_fr && !is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); + } + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write, + false, 0); + goto error; } - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); /* Update context-cache */ trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo, cc_entry->context_cache_gen, @@ -1788,11 +1894,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + if (rid2pasid) { + pasid = VTD_CE_GET_RID2PASID(&ce); + } + /* * We don't need to translate for pass-through context entries. * Also, let's ignore IOTLB caching as well for PT devices. */ - if (vtd_dev_pt_enabled(s, &ce)) { + if (vtd_dev_pt_enabled(s, &ce, pasid)) { entry->iova = addr & VTD_PAGE_MASK_4K; entry->translated_addr = entry->iova; entry->addr_mask = ~VTD_PAGE_MASK_4K; @@ -1813,14 +1923,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } + /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + if (rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } + } + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, - &reads, &writes, s->aw_bits); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + &reads, &writes, s->aw_bits, pasid); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, + addr, is_write, pasid != PCI_NO_PASID, pasid); + goto error; + } page_mask = vtd_slpt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); - vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte, - access_flags, level); + vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), + addr, slpte, access_flags, level, pasid); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; @@ -1905,11 +2032,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, uint16_t source_id, uint16_t func_mask) { + GHashTableIter as_it; uint16_t mask; - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; uint8_t bus_n, devfn; - uint16_t devfn_it; trace_vtd_inv_desc_cc_devices(source_id, func_mask); @@ -1932,32 +2058,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, mask = ~mask; bus_n = VTD_SID_TO_BUS(source_id); - vtd_bus = vtd_find_as_from_bus_num(s, bus_n); - if (vtd_bus) { - devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { - trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), - VTD_PCI_FUNC(devfn_it)); - vtd_iommu_lock(s); - vtd_as->context_cache_entry.context_cache_gen = 0; - vtd_iommu_unlock(s); - /* - * Do switch address space when needed, in case if the - * device passthrough bit is switched. - */ - vtd_switch_address_space(vtd_as); - /* - * So a device is moving out of (or moving into) a - * domain, resync the shadow page table. - * This won't bring bad even if we have no such - * notifier registered - the IOMMU notification - * framework will skip MAP notifications if that - * happened. - */ - vtd_sync_shadow_page_table(vtd_as); - } + devfn = VTD_SID_TO_DEVFN(source_id); + + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + if ((pci_bus_num(vtd_as->bus) == bus_n) && + (vtd_as->devfn & mask) == (devfn & mask)) { + trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn), + VTD_PCI_FUNC(vtd_as->devfn)); + vtd_iommu_lock(s); + vtd_as->context_cache_entry.context_cache_gen = 0; + vtd_iommu_unlock(s); + /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* + * So a device is moving out of (or moving into) a + * domain, resync the shadow page table. + * This won't bring bad even if we have no such + * notifier registered - the IOMMU notification + * framework will skip MAP notifications if that + * happened. + */ + vtd_sync_shadow_page_table(vtd_as); } } } @@ -2014,7 +2139,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce) && - domain_id == vtd_get_domain_id(s, &ce)) { + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { vtd_sync_shadow_page_table(vtd_as); } } @@ -2022,7 +2147,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, uint16_t domain_id, hwaddr addr, - uint8_t am) + uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; VTDContextEntry ce; @@ -2030,9 +2155,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { + if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { + continue; + } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); - if (!ret && domain_id == vtd_get_domain_id(s, &ce)) { + if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { if (vtd_as_has_map_notifier(vtd_as)) { /* * As long as we have MAP notifications registered in @@ -2076,7 +2204,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, vtd_iommu_lock(s); g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); vtd_iommu_unlock(s); - vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID); } /* Flush IOTLB @@ -2473,18 +2601,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, { VTDAddressSpace *vtd_dev_as; IOMMUTLBEvent event; - struct VTDBus *vtd_bus; hwaddr addr; uint64_t sz; uint16_t sid; - uint8_t devfn; bool size; - uint8_t bus_num; addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi); sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo); - devfn = sid & 0xff; - bus_num = sid >> 8; size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi); if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || @@ -2495,12 +2618,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, return false; } - vtd_bus = vtd_find_as_from_bus_num(s, bus_num); - if (!vtd_bus) { - goto done; - } - - vtd_dev_as = vtd_bus->dev_as[devfn]; + /* + * Using sid is OK since the guest should have finished the + * initialization of both the bus and device. + */ + vtd_dev_as = vtd_get_as_by_sid(s, sid); if (!vtd_dev_as) { goto done; } @@ -3151,6 +3273,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), + DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_END_OF_LIST(), @@ -3425,32 +3548,98 @@ static const MemoryRegionOps vtd_mem_ir_ops = { }, }; -VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) +static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as, + hwaddr addr, bool is_write) { - uintptr_t key = (uintptr_t)bus; - VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); - VTDAddressSpace *vtd_dev_as; - char name[128]; + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn); + bool is_fpd_set = false; + VTDContextEntry ce; - if (!vtd_bus) { - uintptr_t *new_key = g_malloc(sizeof(*new_key)); - *new_key = (uintptr_t)bus; - /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ - PCI_DEVFN_MAX); - vtd_bus->bus = bus; - g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus); + assert(vtd_as->pasid != PCI_NO_PASID); + + /* Try out best to fetch FPD, we can't do anything more */ + if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + if (!is_fpd_set && s->root_scalable) { + vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid); + } } - vtd_dev_as = vtd_bus->dev_as[devfn]; + vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, + is_fpd_set, sid, addr, is_write, + true, vtd_as->pasid); +} + +static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, false); + return MEMTX_ERROR; +} + +static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, true); + + return MEMTX_ERROR; +} + +static const MemoryRegionOps vtd_mem_ir_fault_ops = { + .read_with_attrs = vtd_mem_ir_fault_read, + .write_with_attrs = vtd_mem_ir_fault_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, + int devfn, unsigned int pasid) +{ + /* + * We can't simply use sid here since the bus number might not be + * initialized by the guest. + */ + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + .pasid = pasid, + }; + VTDAddressSpace *vtd_dev_as; + char name[128]; + + vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); if (!vtd_dev_as) { - snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), - PCI_FUNC(devfn)); - vtd_bus->dev_as[devfn] = vtd_dev_as = g_new0(VTDAddressSpace, 1); + struct vtd_as_key *new_key = g_malloc(sizeof(*new_key)); + + new_key->bus = bus; + new_key->devfn = devfn; + new_key->pasid = pasid; + + if (pasid == PCI_NO_PASID) { + snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), + PCI_FUNC(devfn)); + } else { + snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn), + PCI_FUNC(devfn), pasid); + } + + vtd_dev_as = g_new0(VTDAddressSpace, 1); vtd_dev_as->bus = bus; vtd_dev_as->devfn = (uint8_t)devfn; + vtd_dev_as->pasid = pasid; vtd_dev_as->iommu_state = s; vtd_dev_as->context_cache_entry.context_cache_gen = 0; vtd_dev_as->iova_tree = iova_tree_new(); @@ -3492,6 +3681,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) &vtd_dev_as->iommu_ir, 1); /* + * This region is used for catching fault to access interrupt + * range via passthrough + PASID. See also + * vtd_switch_address_space(). We can't use alias since we + * need to know the sid which is valid for MSI who uses + * bus_master_as (see msi_send_message()). + */ + memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s), + &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir", + VTD_INTERRUPT_ADDR_SIZE); + /* + * Hook to root since when PT is enabled vtd_dev_as->iommu + * will be disabled. + */ + memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root), + VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir_fault, 2); + + /* * Hook both the containers under the root container, we * switch between DMAR & noDMAR by enable/disable * corresponding sub-containers @@ -3503,6 +3710,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) &vtd_dev_as->nodmar, 0); vtd_switch_address_space(vtd_dev_as); + + g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as); } return vtd_dev_as; } @@ -3609,7 +3818,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) "legacy mode", bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn), - vtd_get_domain_id(s, &ce), + vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); if (vtd_as_has_map_notifier(vtd_as)) { /* This is required only for MAP typed notifiers */ @@ -3619,10 +3828,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) .notify_unmap = false, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, &ce), + .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid), }; - vtd_page_walk(s, &ce, 0, ~0ULL, &info); + vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid); } } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), @@ -3722,6 +3931,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_SC; } + if (s->pasid) { + s->ecap |= VTD_ECAP_PASID; + } + vtd_reset_caches(s); /* Define registers with default values and bit semantics */ @@ -3795,7 +4008,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) assert(0 <= devfn && devfn < PCI_DEVFN_MAX); - vtd_as = vtd_find_add_as(s, bus, devfn); + vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID); return &vtd_as->as; } @@ -3838,6 +4051,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return false; } + if (s->pasid && !s->scalable_mode) { + error_setg(errp, "Need to set scalable mode for PASID"); + return false; + } + return true; } @@ -3874,6 +4092,17 @@ static void vtd_realize(DeviceState *dev, Error **errp) X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + if (s->pasid && x86_iommu->dt_supported) { + /* + * PASID-based-Device-TLB Invalidate Descriptor is not + * implemented and it requires support from vhost layer which + * needs to be implemented in the future. + */ + error_setg(errp, "PASID based device IOTLB is not supported"); + return; + } if (!vtd_decide_config(s, errp)) { return; @@ -3881,7 +4110,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) QLIST_INIT(&s->vtd_as_with_notifiers); qemu_mutex_init(&s->iommu_lock); - memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); @@ -3901,10 +4129,10 @@ static void vtd_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); /* No corresponding destroy */ - s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, + s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal, g_free, g_free); - s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, - g_free, g_free); + s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, + g_free, g_free); vtd_init(s); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 930ce61..f090e61 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -114,8 +114,9 @@ VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ -#define VTD_IOTLB_SID_SHIFT 36 -#define VTD_IOTLB_LVL_SHIFT 52 +#define VTD_IOTLB_SID_SHIFT 20 +#define VTD_IOTLB_LVL_SHIFT 28 +#define VTD_IOTLB_PASID_SHIFT 30 #define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ /* IOTLB_REG */ @@ -191,6 +192,7 @@ #define VTD_ECAP_SC (1ULL << 7) #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) @@ -211,6 +213,8 @@ #define VTD_CAP_DRAIN_READ (1ULL << 55) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) +#define VTD_PASID_ID_SHIFT 20 +#define VTD_PASID_ID_MASK ((1ULL << VTD_PASID_ID_SHIFT) - 1) /* Supported Adjusted Guest Address Widths */ #define VTD_CAP_SAGAW_SHIFT 8 @@ -262,6 +266,8 @@ #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) +#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) +#define VTD_FRCD_PP(val) (((val) & 0x1) << 31) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -379,6 +385,11 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL +#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) +#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL +#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -413,6 +424,7 @@ typedef union VTDInvDesc VTDInvDesc; /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; + uint32_t pasid; uint64_t addr; uint8_t mask; }; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index ffd1884..170a331 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms) fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ef14da5..546b703 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, hwaddr cxl_size = MiB; cxl_base = pc_get_cxl_range_start(pcms); - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); cxl_resv_end = cxl_base + cxl_size; @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, "cxl-fixed-memory-region", fw->size); memory_region_add_subregion(system_memory, fw->base, &fw->mr); - e820_add_entry(fw->base, fw->size, E820_RESERVED); cxl_fmw_base += fw->size; cxl_resv_end = cxl_fmw_base; } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index e49814d..04fd71b 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -12,6 +12,8 @@ vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate device vtd_inv_desc_iotlb_global(void) "iotlb invalidate global" vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8 +vtd_inv_desc_iotlb_pasid_pages(uint16_t domain, uint64_t addr, uint8_t mask, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8" pasid 0x%"PRIx32 +vtd_inv_desc_iotlb_pasid(uint16_t domain, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" pasid 0x%"PRIx32 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32 vtd_inv_desc_wait_irq(const char *msg) "%s" vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 |