aboutsummaryrefslogtreecommitdiff
path: root/hw/i386
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2022-11-07 18:43:56 -0500
committerStefan Hajnoczi <stefanha@redhat.com>2022-11-07 18:43:56 -0500
commitf21f1cfeb94b94ce044726856c291bed9391e3a4 (patch)
tree051f38512751eceb808446cfb01fe22ce2f3b259 /hw/i386
parent524fc737431d240f9d9f10aaf381003092868bac (diff)
parent1ef47f40dce3d5b176ddf76d57b5bfa2efb0b3c6 (diff)
downloadqemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.zip
qemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.tar.gz
qemu-f21f1cfeb94b94ce044726856c291bed9391e3a4.tar.bz2
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
pci,pc,virtio: features, tests, fixes, cleanups lots of acpi rework first version of biosbits infrastructure ASID support in vhost-vdpa core_count2 support in smbios PCIe DOE emulation virtio vq reset HMAT support part of infrastructure for viommu support in vhost-vdpa VTD PASID support fixes, tests all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmNpXDkPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpD0AH/2G8ZPrgrxJC9y3uD5/5J6QRzO+TsDYbg5ut # uBf4rKSHHzcu6zdyAfsrhbAKKzyD4HrEGNXZrBjnKM1xCiB/SGBcDIWntwrca2+s # 5Dpbi4xvd4tg6tVD4b47XNDCcn2uUbeI0e2M5QIbtCmzdi/xKbFAfl5G8DQp431X # Kmz79G4CdKWyjVlM0HoYmdCw/4FxkdjD02tE/Uc5YMrePNaEg5Bw4hjCHbx1b6ur # 6gjeXAtncm9s4sO0l+sIdyiqlxiTry9FSr35WaQ0qPU+Og5zaf1EiWfdl8TRo4qU # EAATw5A4hyw11GfOGp7oOVkTGvcNB/H7aIxD7emdWZV8+BMRPKo= # =zTCn # -----END PGP SIGNATURE----- # gpg: Signature made Mon 07 Nov 2022 14:27:53 EST # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (83 commits) checkpatch: better pattern for inline comments hw/virtio: introduce virtio_device_should_start tests/acpi: update tables for new core count test bios-tables-test: add test for number of cores > 255 tests/acpi: allow changes for core_count2 test bios-tables-test: teach test to use smbios 3.0 tables hw/smbios: add core_count2 to smbios table type 4 vhost-user: Support vhost_dev_start vhost: Change the sequence of device start intel-iommu: PASID support intel-iommu: convert VTD_PE_GET_FPD_ERR() to be a function intel-iommu: drop VTDBus intel-iommu: don't warn guest errors when getting rid2pasid entry vfio: move implement of vfio_get_xlat_addr() to memory.c tests: virt: Update expected *.acpihmatvirt tables tests: acpi: aarch64/virt: add a test for hmat nodes with no initiators hw/arm/virt: Enable HMAT on arm virt machine tests: Add HMAT AArch64/virt empty table files tests: acpi: q35: update expected blobs *.hmat-noinitiators expected HMAT: tests: acpi: q35: add test for hmat nodes without initiators ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/i386')
-rw-r--r--hw/i386/acpi-build.c203
-rw-r--r--hw/i386/e820_memory_layout.c20
-rw-r--r--hw/i386/e820_memory_layout.h8
-rw-r--r--hw/i386/fw_cfg.c3
-rw-r--r--hw/i386/fw_cfg.h1
-rw-r--r--hw/i386/intel_iommu.c694
-rw-r--r--hw/i386/intel_iommu_internal.h16
-rw-r--r--hw/i386/microvm.c2
-rw-r--r--hw/i386/pc.c2
-rw-r--r--hw/i386/trace-events2
10 files changed, 514 insertions, 437 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 4f54b61..d9eaa5f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -60,6 +60,7 @@
#include "hw/i386/fw_cfg.h"
#include "hw/i386/ich9.h"
#include "hw/pci/pci_bus.h"
+#include "hw/pci-host/i440fx.h"
#include "hw/pci-host/q35.h"
#include "hw/i386/x86-iommu.h"
@@ -112,7 +113,6 @@ typedef struct AcpiPmInfo {
} AcpiPmInfo;
typedef struct AcpiMiscInfo {
- bool is_piix4;
bool has_hpet;
#ifdef CONFIG_TPM
TPMVersion tpm_version;
@@ -121,13 +121,6 @@ typedef struct AcpiMiscInfo {
unsigned dsdt_size;
} AcpiMiscInfo;
-typedef struct AcpiBuildPciBusHotplugState {
- GArray *device_table;
- GArray *notify_table;
- struct AcpiBuildPciBusHotplugState *parent;
- bool pcihp_bridge_en;
-} AcpiBuildPciBusHotplugState;
-
typedef struct FwCfgTPMConfig {
uint32_t tpmppi_address;
uint8_t tpm_version;
@@ -288,17 +281,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm)
static void acpi_get_misc_info(AcpiMiscInfo *info)
{
- Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM);
- Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE);
- assert(!!piix != !!lpc);
-
- if (piix) {
- info->is_piix4 = true;
- }
- if (lpc) {
- info->is_piix4 = false;
- }
-
info->has_hpet = hpet_find();
#ifdef CONFIG_TPM
info->tpm_version = tpm_get_version(tpm_find());
@@ -430,18 +412,11 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
bool hotpluggbale_slot = false;
bool bridge_in_acpi = false;
bool cold_plugged_bridge = false;
- bool is_vga = false;
if (pdev) {
pc = PCI_DEVICE_GET_CLASS(pdev);
dc = DEVICE_GET_CLASS(pdev);
- if (pc->class_id == PCI_CLASS_BRIDGE_ISA) {
- continue;
- }
-
- is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA;
-
/*
* Cold plugged bridges aren't themselves hot-pluggable.
* Hotplugged bridges *are* hot-pluggable.
@@ -455,9 +430,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
/*
* allow describing coldplugged bridges in ACPI even if they are not
* on function 0, as they are not unpluggable, for all other devices
- * generate description only for function 0 per slot
+ * generate description only for function 0 per slot, and for other
+ * functions if device on function provides its own AML
*/
- if (func && !bridge_in_acpi) {
+ if (func && !bridge_in_acpi && !get_dev_aml_func(DEVICE(pdev))) {
continue;
}
} else {
@@ -489,28 +465,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
aml_append(dev, aml_pci_device_dsm());
}
- if (is_vga) {
- /* add VGA specific AML methods */
- int s3d;
-
- if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) {
- s3d = 3;
- } else {
- s3d = 0;
- }
-
- method = aml_method("_S1D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
-
- method = aml_method("_S2D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
-
- method = aml_method("_S3D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(s3d)));
- aml_append(dev, method);
- }
+ call_dev_aml_func(DEVICE(pdev), dev);
bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en;
if (bridge_in_acpi) {
@@ -1030,7 +985,6 @@ static void build_piix4_pci0_int(Aml *table)
{
Aml *dev;
Aml *crs;
- Aml *field;
Aml *method;
uint32_t irqs;
Aml *sb_scope = aml_scope("_SB");
@@ -1039,13 +993,6 @@ static void build_piix4_pci0_int(Aml *table)
aml_append(pci0_scope, build_prt(true));
aml_append(sb_scope, pci0_scope);
- field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
- aml_append(field, aml_named_field("PRQ0", 8));
- aml_append(field, aml_named_field("PRQ1", 8));
- aml_append(field, aml_named_field("PRQ2", 8));
- aml_append(field, aml_named_field("PRQ3", 8));
- aml_append(sb_scope, field);
-
aml_append(sb_scope, build_irq_status_method());
aml_append(sb_scope, build_iqcr_method(true));
@@ -1149,7 +1096,6 @@ static Aml *build_q35_routing_table(const char *str)
static void build_q35_pci0_int(Aml *table)
{
- Aml *field;
Aml *method;
Aml *sb_scope = aml_scope("_SB");
Aml *pci0_scope = aml_scope("PCI0");
@@ -1186,18 +1132,6 @@ static void build_q35_pci0_int(Aml *table)
aml_append(pci0_scope, method);
aml_append(sb_scope, pci0_scope);
- field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
- aml_append(field, aml_named_field("PRQA", 8));
- aml_append(field, aml_named_field("PRQB", 8));
- aml_append(field, aml_named_field("PRQC", 8));
- aml_append(field, aml_named_field("PRQD", 8));
- aml_append(field, aml_reserved_field(0x20));
- aml_append(field, aml_named_field("PRQE", 8));
- aml_append(field, aml_named_field("PRQF", 8));
- aml_append(field, aml_named_field("PRQG", 8));
- aml_append(field, aml_named_field("PRQH", 8));
- aml_append(sb_scope, field);
-
aml_append(sb_scope, build_irq_status_method());
aml_append(sb_scope, build_iqcr_method(false));
@@ -1262,54 +1196,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg)
return dev;
}
-static void build_q35_isa_bridge(Aml *table)
-{
- Aml *dev;
- Aml *scope;
- Object *obj;
- bool ambiguous;
-
- /*
- * temporarily fish out isa bridge, build_q35_isa_bridge() will be dropped
- * once PCI is converted to AcpiDevAmlIf and would be ble to generate
- * AML for bridge itself
- */
- obj = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- scope = aml_scope("_SB.PCI0");
- dev = aml_device("ISA");
- aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000)));
-
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
-static void build_piix4_isa_bridge(Aml *table)
-{
- Aml *dev;
- Aml *scope;
- Object *obj;
- bool ambiguous;
-
- /*
- * temporarily fish out isa bridge, build_piix4_isa_bridge() will be dropped
- * once PCI is converted to AcpiDevAmlIf and would be ble to generate
- * AML for bridge itself
- */
- obj = object_resolve_path_type("", TYPE_PIIX3_PCI_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- scope = aml_scope("_SB.PCI0");
- dev = aml_device("ISA");
- aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000)));
-
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr)
{
Aml *scope;
@@ -1416,25 +1302,6 @@ static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug)
return method;
}
-static void build_smb0(Aml *table, int devnr, int func)
-{
- Aml *scope = aml_scope("_SB.PCI0");
- Aml *dev = aml_device("SMB0");
- bool ambiguous;
- Object *obj;
- /*
- * temporarily fish out device hosting SMBUS, build_smb0 will be gone once
- * PCI enumeration will be switched to call_dev_aml_func()
- */
- obj = object_resolve_path_type("", TYPE_ICH9_SMB_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func)));
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
static void build_acpi0017(Aml *table)
{
Aml *dev, *scope, *method;
@@ -1456,6 +1323,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiPmInfo *pm, AcpiMiscInfo *misc,
Range *pci_hole, Range *pci_hole64, MachineState *machine)
{
+ Object *i440fx = object_resolve_type_unambiguous(TYPE_I440FX_PCI_HOST_BRIDGE);
+ Object *q35 = object_resolve_type_unambiguous(TYPE_Q35_HOST_DEVICE);
CrsRangeEntry *entry;
Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs;
CrsRangeSet crs_range_set;
@@ -1476,11 +1345,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id,
.oem_table_id = x86ms->oem_table_id };
+ assert(!!i440fx != !!q35);
+
acpi_table_begin(&table, table_data);
dsdt = init_aml_allocator();
build_dbg_aml(dsdt);
- if (misc->is_piix4) {
+ if (i440fx) {
sb_scope = aml_scope("_SB");
dev = aml_device("PCI0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
@@ -1489,12 +1360,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(sb_scope, dev);
aml_append(dsdt, sb_scope);
- build_piix4_isa_bridge(dsdt);
if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
}
build_piix4_pci0_int(dsdt);
- } else {
+ } else if (q35) {
sb_scope = aml_scope("_SB");
dev = aml_device("PCI0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
@@ -1534,14 +1404,10 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dsdt, sb_scope);
- build_q35_isa_bridge(dsdt);
if (pm->pcihp_bridge_en) {
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
}
build_q35_pci0_int(dsdt);
- if (pcms->smbus) {
- build_smb0(dsdt, ICH9_SMB_DEV, ICH9_SMB_FUNC);
- }
}
if (misc->has_hpet) {
@@ -1554,6 +1420,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dsdt, sb_scope);
}
+ scope = aml_scope("_GPE");
+ {
+ aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006")));
+ if (machine->nvdimms_state->is_enabled) {
+ method = aml_method("_E04", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
+ aml_int(0x80)));
+ aml_append(scope, method);
+ }
+ }
+ aml_append(dsdt, scope);
+
if (pcmc->legacy_cpu_hotplug) {
build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base);
} else {
@@ -1572,28 +1450,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
pcms->memhp_io_base);
}
- scope = aml_scope("_GPE");
- {
- aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006")));
-
- if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
- method = aml_method("_E01", 0, AML_NOTSERIALIZED);
- aml_append(method,
- aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
- aml_append(method, aml_call0("\\_SB.PCI0.PCNT"));
- aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK")));
- aml_append(scope, method);
- }
-
- if (machine->nvdimms_state->is_enabled) {
- method = aml_method("_E04", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
- aml_int(0x80)));
- aml_append(scope, method);
- }
- }
- aml_append(dsdt, scope);
-
crs_range_set_init(&crs_range_set);
bus = PC_MACHINE(machine)->bus;
if (bus) {
@@ -1872,6 +1728,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
}
aml_append(dsdt, sb_scope);
+ if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
+ scope = aml_scope("_GPE");
+ {
+ method = aml_method("_E01", 0, AML_NOTSERIALIZED);
+ aml_append(method,
+ aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
+ aml_append(method, aml_call0("\\_SB.PCI0.PCNT"));
+ aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK")));
+ aml_append(scope, method);
+ }
+ aml_append(dsdt, scope);
+ }
+
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
acpi_table_end(linker, &table);
diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c
index bcf9eaf..06970ac 100644
--- a/hw/i386/e820_memory_layout.c
+++ b/hw/i386/e820_memory_layout.c
@@ -11,29 +11,11 @@
#include "e820_memory_layout.h"
static size_t e820_entries;
-struct e820_table e820_reserve;
struct e820_entry *e820_table;
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
- int index = le32_to_cpu(e820_reserve.count);
- struct e820_entry *entry;
-
- if (type != E820_RAM) {
- /* old FW_CFG_E820_TABLE entry -- reservations only */
- if (index >= E820_NR_ENTRIES) {
- return -EBUSY;
- }
- entry = &e820_reserve.entry[index++];
-
- entry->address = cpu_to_le64(address);
- entry->length = cpu_to_le64(length);
- entry->type = cpu_to_le32(type);
-
- e820_reserve.count = cpu_to_le32(index);
- }
-
- /* new "etc/e820" file -- include ram too */
+ /* new "etc/e820" file -- include ram and reserved entries */
e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
e820_table[e820_entries].address = cpu_to_le64(address);
e820_table[e820_entries].length = cpu_to_le64(length);
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index 04f9378..7c239aa 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -16,20 +16,12 @@
#define E820_NVS 4
#define E820_UNUSABLE 5
-#define E820_NR_ENTRIES 16
-
struct e820_entry {
uint64_t address;
uint64_t length;
uint32_t type;
} QEMU_PACKED __attribute((__aligned__(4)));
-struct e820_table {
- uint32_t count;
- struct e820_entry entry[E820_NR_ENTRIES];
-} QEMU_PACKED __attribute((__aligned__(4)));
-
-extern struct e820_table e820_reserve;
extern struct e820_entry *e820_table;
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index a283785..72a42f3 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -36,7 +36,6 @@ const char *fw_cfg_arch_key_name(uint16_t key)
{FW_CFG_ACPI_TABLES, "acpi_tables"},
{FW_CFG_SMBIOS_ENTRIES, "smbios_entries"},
{FW_CFG_IRQ0_OVERRIDE, "irq0_override"},
- {FW_CFG_E820_TABLE, "e820_table"},
{FW_CFG_HPET, "hpet"},
};
@@ -127,8 +126,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
#endif
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
- &e820_reserve, sizeof(e820_reserve));
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
sizeof(struct e820_entry) * e820_get_num_entries());
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
index 275f15c..86ca7c1 100644
--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -17,7 +17,6 @@
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
-#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
FWCfgState *fw_cfg_arch_create(MachineState *ms,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6524c2e..a08ee85 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -49,17 +49,24 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
-#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\
- if (ret_fr) { \
- ret_fr = -ret_fr; \
- if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \
- trace_vtd_fault_disabled(); \
- } else { \
- vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \
- } \
- goto error; \
- } \
-}
+
+/*
+ * PCI bus number (or SID) is not reliable since the device is usaully
+ * initalized before guest can configure the PCI bridge
+ * (SECONDARY_BUS_NUMBER).
+ */
+struct vtd_as_key {
+ PCIBus *bus;
+ uint8_t devfn;
+ uint32_t pasid;
+};
+
+struct vtd_iotlb_key {
+ uint64_t gfn;
+ uint32_t pasid;
+ uint32_t level;
+ uint16_t sid;
+};
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
@@ -200,14 +207,46 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
}
/* GHashTable functions */
-static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
+static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2)
{
- return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+ const struct vtd_iotlb_key *key1 = v1;
+ const struct vtd_iotlb_key *key2 = v2;
+
+ return key1->sid == key2->sid &&
+ key1->pasid == key2->pasid &&
+ key1->level == key2->level &&
+ key1->gfn == key2->gfn;
+}
+
+static guint vtd_iotlb_hash(gconstpointer v)
+{
+ const struct vtd_iotlb_key *key = v;
+
+ return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) |
+ (key->level) << VTD_IOTLB_LVL_SHIFT |
+ (key->pasid) << VTD_IOTLB_PASID_SHIFT;
}
-static guint vtd_uint64_hash(gconstpointer v)
+static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_as_key *key1 = v1;
+ const struct vtd_as_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) &&
+ (key1->pasid == key2->pasid);
+}
+
+/*
+ * Note that we use pointer to PCIBus as the key, so hashing/shifting
+ * based on the pointer value is intended. Note that we deal with
+ * collisions through vtd_as_equal().
+ */
+static guint vtd_as_hash(gconstpointer v)
{
- return (guint)*(const uint64_t *)v;
+ const struct vtd_as_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
}
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
@@ -248,22 +287,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
{
VTDAddressSpace *vtd_as;
- VTDBus *vtd_bus;
- GHashTableIter bus_it;
- uint32_t devfn_it;
+ GHashTableIter as_it;
trace_vtd_context_cache_reset();
- g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
- while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (!vtd_as) {
- continue;
- }
- vtd_as->context_cache_entry.context_cache_gen = 0;
- }
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ vtd_as->context_cache_entry.context_cache_gen = 0;
}
s->context_cache_gen = 1;
}
@@ -290,13 +321,6 @@ static void vtd_reset_caches(IntelIOMMUState *s)
vtd_iommu_unlock(s);
}
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
- uint32_t level)
-{
- return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
- ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
-}
-
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
{
return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
@@ -304,15 +328,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
/* Must be called with IOMMU lock held */
static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
- hwaddr addr)
+ uint32_t pasid, hwaddr addr)
{
+ struct vtd_iotlb_key key;
VTDIOTLBEntry *entry;
- uint64_t key;
int level;
for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
- key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
- source_id, level);
+ key.gfn = vtd_get_iotlb_gfn(addr, level);
+ key.level = level;
+ key.sid = source_id;
+ key.pasid = pasid;
entry = g_hash_table_lookup(s->iotlb, &key);
if (entry) {
goto out;
@@ -326,10 +352,11 @@ out:
/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
- uint8_t access_flags, uint32_t level)
+ uint8_t access_flags, uint32_t level,
+ uint32_t pasid)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
- uint64_t *key = g_malloc(sizeof(*key));
+ struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
@@ -343,7 +370,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->slpte = slpte;
entry->access_flags = access_flags;
entry->mask = vtd_slpt_level_page_mask(level);
- *key = vtd_get_iotlb_key(gfn, source_id, level);
+ entry->pasid = pasid;
+
+ key->gfn = gfn;
+ key->sid = source_id;
+ key->level = level;
+ key->pasid = pasid;
+
g_hash_table_replace(s->iotlb, key, entry);
}
@@ -436,7 +469,8 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
/* Must not update F field now, should be done later */
static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
uint16_t source_id, hwaddr addr,
- VTDFaultReason fault, bool is_write)
+ VTDFaultReason fault, bool is_write,
+ bool is_pasid, uint32_t pasid)
{
uint64_t hi = 0, lo;
hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
@@ -444,7 +478,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
assert(index < DMAR_FRCD_REG_NR);
lo = VTD_FRCD_FI(addr);
- hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
+ hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
+ VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
if (!is_write) {
hi |= VTD_FRCD_T;
}
@@ -475,7 +510,8 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
/* Log and report an DMAR (address translation) fault to software */
static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
hwaddr addr, VTDFaultReason fault,
- bool is_write)
+ bool is_write, bool is_pasid,
+ uint32_t pasid)
{
uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
@@ -502,7 +538,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
return;
}
- vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
+ vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
+ is_write, is_pasid, pasid);
if (fsts_reg & VTD_FSTS_PPF) {
error_report_once("There are pending faults already, "
@@ -807,13 +844,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
VTDContextEntry *ce,
- VTDPASIDEntry *pe)
+ VTDPASIDEntry *pe,
+ uint32_t pasid)
{
- uint32_t pasid;
dma_addr_t pasid_dir_base;
int ret = 0;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
@@ -822,15 +861,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
VTDContextEntry *ce,
- bool *pe_fpd_set)
+ bool *pe_fpd_set,
+ uint32_t pasid)
{
int ret;
- uint32_t pasid;
dma_addr_t pasid_dir_base;
VTDPASIDDirEntry pdire;
VTDPASIDEntry pe;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
/*
@@ -876,12 +917,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_PE_GET_LEVEL(&pe);
}
@@ -894,12 +936,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9;
}
@@ -941,31 +984,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
}
static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
- VTDContextEntry *ce, uint8_t aw)
+ VTDContextEntry *ce, uint8_t aw,
+ uint32_t pasid)
{
- uint32_t ce_agaw = vtd_get_iova_agaw(s, ce);
+ uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid);
return 1ULL << MIN(ce_agaw, aw);
}
/* Return true if IOVA passes range check, otherwise false. */
static inline bool vtd_iova_range_check(IntelIOMMUState *s,
uint64_t iova, VTDContextEntry *ce,
- uint8_t aw)
+ uint8_t aw, uint32_t pasid)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
- return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1));
+ return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1));
}
static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
}
@@ -993,50 +1038,25 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
return slpte & rsvd_mask;
}
-/* Find the VTD address space associated with a given bus number */
-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
- VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
- GHashTableIter iter;
-
- if (vtd_bus) {
- return vtd_bus;
- }
-
- /*
- * Iterate over the registered buses to find the one which
- * currently holds this bus number and update the bus_num
- * lookup table.
- */
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- if (pci_bus_num(vtd_bus->bus) == bus_num) {
- s->vtd_as_by_bus_num[bus_num] = vtd_bus;
- return vtd_bus;
- }
- }
-
- return NULL;
-}
-
/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
*/
static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t iova, bool is_write,
uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes, uint8_t aw_bits)
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
uint64_t xlat, size;
- if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
- error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
- __func__, iova);
+ if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) {
+ error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
+ "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -1049,8 +1069,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (slpte == (uint64_t)-1) {
error_report_once("%s: detected read error on DMAR slpte "
- "(iova=0x%" PRIx64 ")", __func__, iova);
- if (level == vtd_get_iova_level(s, ce)) {
+ "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")",
+ __func__, iova, pasid);
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
/* Invalid programming of context-entry */
return -VTD_FR_CONTEXT_ENTRY_INV;
} else {
@@ -1062,15 +1083,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (!(slpte & access_right_check)) {
error_report_once("%s: detected slpte permission error "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
- "slpte=0x%" PRIx64 ", write=%d)", __func__,
- iova, level, slpte, is_write);
+ "slpte=0x%" PRIx64 ", write=%d, pasid=0x%"
+ PRIx32 ")", __func__, iova, level,
+ slpte, is_write, pasid);
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
error_report_once("%s: detected splte reserve non-zero "
"iova=0x%" PRIx64 ", level=0x%" PRIx32
- "slpte=0x%" PRIx64 ")", __func__, iova,
- level, slpte);
+ "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
+ __func__, iova, level, slpte, pasid);
return -VTD_FR_PAGING_ENTRY_RSVD;
}
@@ -1098,9 +1120,10 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
error_report_once("%s: xlat address is in interrupt range "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
"slpte=0x%" PRIx64 ", write=%d, "
- "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")",
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
+ "pasid=0x%" PRIx32 ")",
__func__, iova, level, slpte, is_write,
- xlat, size);
+ xlat, size, pasid);
return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
-VTD_FR_INTERRUPT_ADDR;
}
@@ -1314,18 +1337,19 @@ next:
*/
static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t start, uint64_t end,
- vtd_page_walk_info *info)
+ vtd_page_walk_info *info,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
- if (!vtd_iova_range_check(s, start, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(s, end, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) {
/* Fix end so that it reaches the maximum */
- end = vtd_iova_limit(s, ce, info->aw);
+ end = vtd_iova_limit(s, ce, info->aw, pasid);
}
return vtd_page_walk_level(addr, start, end, level, true, true, info);
@@ -1393,7 +1417,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s,
* has valid rid2pasid setting, which includes valid
* rid2pasid field and corresponding pasid entry setting
*/
- return vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -1476,12 +1500,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event,
}
static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
}
@@ -1499,10 +1524,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
.notify_unmap = true,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, ce),
+ .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid),
};
- return vtd_page_walk(s, ce, addr, addr + size, &info);
+ return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid);
}
static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
@@ -1546,16 +1571,19 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
* 1st-level translation or 2nd-level translation, it depends
* on PGTT setting.
*/
-static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce)
+static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
int ret;
if (s->root_scalable) {
- ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
if (ret) {
- error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
- __func__, ret);
+ /*
+ * This error is guest triggerable. We should assumt PT
+ * not enabled for safety.
+ */
return false;
}
return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
@@ -1569,14 +1597,12 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
{
IntelIOMMUState *s;
VTDContextEntry ce;
- int ret;
assert(as);
s = as->iommu_state;
- ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
- as->devfn, &ce);
- if (ret) {
+ if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
+ &ce)) {
/*
* Possibly failed to parse the context entry for some reason
* (e.g., during init, or any guest configuration errors on
@@ -1586,19 +1612,20 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
return false;
}
- return vtd_dev_pt_enabled(s, &ce);
+ return vtd_dev_pt_enabled(s, &ce, as->pasid);
}
/* Return whether the device is using IOMMU translation. */
static bool vtd_switch_address_space(VTDAddressSpace *as)
{
- bool use_iommu;
+ bool use_iommu, pt;
/* Whether we need to take the BQL on our own */
bool take_bql = !qemu_mutex_iothread_locked();
assert(as);
use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as);
+ pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as);
trace_vtd_switch_address_space(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
@@ -1618,11 +1645,53 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
if (use_iommu) {
memory_region_set_enabled(&as->nodmar, false);
memory_region_set_enabled(MEMORY_REGION(&as->iommu), true);
+ /*
+ * vt-d spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx
+ * are translated normally like any other request-with-PASID
+ * through DMA-remapping hardware.
+ * """
+ *
+ * Need to disable ir for as with PASID.
+ */
+ if (as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir, false);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir, true);
+ }
} else {
memory_region_set_enabled(MEMORY_REGION(&as->iommu), false);
memory_region_set_enabled(&as->nodmar, true);
}
+ /*
+ * vtd-spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx are
+ * translated normally like any other request-with-PASID through
+ * DMA-remapping hardware. However, if such a request is processed
+ * using pass-through translation, it will be blocked as described
+ * in the paragraph below.
+ *
+ * Software must not program paging-structure entries to remap any
+ * address to the interrupt address range. Untranslated requests
+ * and translation requests that result in an address in the
+ * interrupt range will be blocked with condition code LGN.4 or
+ * SGN.8.
+ * """
+ *
+ * We enable per as memory region (iommu_ir_fault) for catching
+ * the tranlsation for interrupt range through PASID + PT.
+ */
+ if (pt && as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir_fault, true);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir_fault, false);
+ }
+
if (take_bql) {
qemu_mutex_unlock_iothread();
}
@@ -1632,24 +1701,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
static void vtd_switch_address_space_all(IntelIOMMUState *s)
{
+ VTDAddressSpace *vtd_as;
GHashTableIter iter;
- VTDBus *vtd_bus;
- int i;
-
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- for (i = 0; i < PCI_DEVFN_MAX; i++) {
- if (!vtd_bus->dev_as[i]) {
- continue;
- }
- vtd_switch_address_space(vtd_bus->dev_as[i]);
- }
- }
-}
-static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
-{
- return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
+ g_hash_table_iter_init(&iter, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
+ vtd_switch_address_space(vtd_as);
+ }
}
static const bool vtd_qualified_faults[] = {
@@ -1686,18 +1744,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
}
+static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct vtd_as_key *as_key = (struct vtd_as_key *)key;
+ uint16_t target_sid = *(uint16_t *)user_data;
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
+ return sid == target_sid;
+}
+
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+ uint8_t bus_num = PCI_BUS_NUM(sid);
+ VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
+
+ if (vtd_as &&
+ (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
+ return vtd_as;
+ }
+
+ vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid);
+ s->vtd_as_cache[bus_num] = vtd_as;
+
+ return vtd_as;
+}
+
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
{
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
bool success = false;
- vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
- if (!vtd_bus) {
- goto out;
- }
-
- vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
+ vtd_as = vtd_get_as_by_sid(s, source_id);
if (!vtd_as) {
goto out;
}
@@ -1711,6 +1788,22 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+static void vtd_report_fault(IntelIOMMUState *s,
+ int err, bool is_fpd_set,
+ uint16_t source_id,
+ hwaddr addr,
+ bool is_write,
+ bool is_pasid,
+ uint32_t pasid)
+{
+ if (is_fpd_set && vtd_is_qualified_fault(err)) {
+ trace_vtd_fault_disabled();
+ } else {
+ vtd_report_dmar_fault(s, source_id, addr, err, is_write,
+ is_pasid, pasid);
+ }
+}
+
/* Map dev to context-entry then do a paging-structures walk to do a iommu
* translation.
*
@@ -1732,13 +1825,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
uint64_t slpte, page_mask;
- uint32_t level;
- uint16_t source_id = vtd_make_source_id(bus_num, devfn);
+ uint32_t level, pasid = vtd_as->pasid;
+ uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
uint8_t access_flags;
+ bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
/*
@@ -1751,15 +1845,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry = &vtd_as->context_cache_entry;
- /* Try to fetch slpte form IOTLB */
- iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
- if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
- iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
- access_flags = iotlb_entry->access_flags;
- page_mask = iotlb_entry->mask;
- goto out;
+ /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */
+ if (!rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
}
/* Try to fetch context-entry from cache first */
@@ -1770,16 +1866,26 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ce = cc_entry->context_entry;
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
+ }
}
} else {
ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!ret_fr && !is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ }
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
}
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
/* Update context-cache */
trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
cc_entry->context_cache_gen,
@@ -1788,11 +1894,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry->context_cache_gen = s->context_cache_gen;
}
+ if (rid2pasid) {
+ pasid = VTD_CE_GET_RID2PASID(&ce);
+ }
+
/*
* We don't need to translate for pass-through context entries.
* Also, let's ignore IOTLB caching as well for PT devices.
*/
- if (vtd_dev_pt_enabled(s, &ce)) {
+ if (vtd_dev_pt_enabled(s, &ce, pasid)) {
entry->iova = addr & VTD_PAGE_MASK_4K;
entry->translated_addr = entry->iova;
entry->addr_mask = ~VTD_PAGE_MASK_4K;
@@ -1813,14 +1923,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
return true;
}
+ /* Try to fetch slpte form IOTLB for RID2PASID slow path */
+ if (rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
+ }
+
ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
- &reads, &writes, s->aw_bits);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ &reads, &writes, s->aw_bits, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
+ addr, is_write, pasid != PCI_NO_PASID, pasid);
+ goto error;
+ }
page_mask = vtd_slpt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
- vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte,
- access_flags, level);
+ vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
+ addr, slpte, access_flags, level, pasid);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
@@ -1905,11 +2032,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
uint16_t source_id,
uint16_t func_mask)
{
+ GHashTableIter as_it;
uint16_t mask;
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
uint8_t bus_n, devfn;
- uint16_t devfn_it;
trace_vtd_inv_desc_cc_devices(source_id, func_mask);
@@ -1932,32 +2058,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
mask = ~mask;
bus_n = VTD_SID_TO_BUS(source_id);
- vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
- if (vtd_bus) {
- devfn = VTD_SID_TO_DEVFN(source_id);
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
- trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
- VTD_PCI_FUNC(devfn_it));
- vtd_iommu_lock(s);
- vtd_as->context_cache_entry.context_cache_gen = 0;
- vtd_iommu_unlock(s);
- /*
- * Do switch address space when needed, in case if the
- * device passthrough bit is switched.
- */
- vtd_switch_address_space(vtd_as);
- /*
- * So a device is moving out of (or moving into) a
- * domain, resync the shadow page table.
- * This won't bring bad even if we have no such
- * notifier registered - the IOMMU notification
- * framework will skip MAP notifications if that
- * happened.
- */
- vtd_sync_shadow_page_table(vtd_as);
- }
+ devfn = VTD_SID_TO_DEVFN(source_id);
+
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ if ((pci_bus_num(vtd_as->bus) == bus_n) &&
+ (vtd_as->devfn & mask) == (devfn & mask)) {
+ trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
+ VTD_PCI_FUNC(vtd_as->devfn));
+ vtd_iommu_lock(s);
+ vtd_as->context_cache_entry.context_cache_gen = 0;
+ vtd_iommu_unlock(s);
+ /*
+ * Do switch address space when needed, in case if the
+ * device passthrough bit is switched.
+ */
+ vtd_switch_address_space(vtd_as);
+ /*
+ * So a device is moving out of (or moving into) a
+ * domain, resync the shadow page table.
+ * This won't bring bad even if we have no such
+ * notifier registered - the IOMMU notification
+ * framework will skip MAP notifications if that
+ * happened.
+ */
+ vtd_sync_shadow_page_table(vtd_as);
}
}
}
@@ -2014,7 +2139,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
- domain_id == vtd_get_domain_id(s, &ce)) {
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
vtd_sync_shadow_page_table(vtd_as);
}
}
@@ -2022,7 +2147,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
- uint8_t am)
+ uint8_t am, uint32_t pasid)
{
VTDAddressSpace *vtd_as;
VTDContextEntry ce;
@@ -2030,9 +2155,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
+ if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) {
+ continue;
+ }
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
- if (!ret && domain_id == vtd_get_domain_id(s, &ce)) {
+ if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
if (vtd_as_has_map_notifier(vtd_as)) {
/*
* As long as we have MAP notifications registered in
@@ -2076,7 +2204,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
vtd_iommu_unlock(s);
- vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
}
/* Flush IOTLB
@@ -2473,18 +2601,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
{
VTDAddressSpace *vtd_dev_as;
IOMMUTLBEvent event;
- struct VTDBus *vtd_bus;
hwaddr addr;
uint64_t sz;
uint16_t sid;
- uint8_t devfn;
bool size;
- uint8_t bus_num;
addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
- devfn = sid & 0xff;
- bus_num = sid >> 8;
size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
@@ -2495,12 +2618,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
return false;
}
- vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
- if (!vtd_bus) {
- goto done;
- }
-
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ /*
+ * Using sid is OK since the guest should have finished the
+ * initialization of both the bus and device.
+ */
+ vtd_dev_as = vtd_get_as_by_sid(s, sid);
if (!vtd_dev_as) {
goto done;
}
@@ -3151,6 +3273,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
+ DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_END_OF_LIST(),
@@ -3425,32 +3548,98 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
},
};
-VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
+ hwaddr addr, bool is_write)
{
- uintptr_t key = (uintptr_t)bus;
- VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
- VTDAddressSpace *vtd_dev_as;
- char name[128];
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn);
+ bool is_fpd_set = false;
+ VTDContextEntry ce;
- if (!vtd_bus) {
- uintptr_t *new_key = g_malloc(sizeof(*new_key));
- *new_key = (uintptr_t)bus;
- /* No corresponding free() */
- vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
- PCI_DEVFN_MAX);
- vtd_bus->bus = bus;
- g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
+ assert(vtd_as->pasid != PCI_NO_PASID);
+
+ /* Try out best to fetch FPD, we can't do anything more */
+ if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ if (!is_fpd_set && s->root_scalable) {
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+ }
}
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR,
+ is_fpd_set, sid, addr, is_write,
+ true, vtd_as->pasid);
+}
+
+static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, false);
+ return MEMTX_ERROR;
+}
+
+static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, true);
+
+ return MEMTX_ERROR;
+}
+
+static const MemoryRegionOps vtd_mem_ir_fault_ops = {
+ .read_with_attrs = vtd_mem_ir_fault_read,
+ .write_with_attrs = vtd_mem_ir_fault_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
+ int devfn, unsigned int pasid)
+{
+ /*
+ * We can't simply use sid here since the bus number might not be
+ * initialized by the guest.
+ */
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ .pasid = pasid,
+ };
+ VTDAddressSpace *vtd_dev_as;
+ char name[128];
+
+ vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
if (!vtd_dev_as) {
- snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
- PCI_FUNC(devfn));
- vtd_bus->dev_as[devfn] = vtd_dev_as = g_new0(VTDAddressSpace, 1);
+ struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
+
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+ new_key->pasid = pasid;
+
+ if (pasid == PCI_NO_PASID) {
+ snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn));
+ } else {
+ snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pasid);
+ }
+
+ vtd_dev_as = g_new0(VTDAddressSpace, 1);
vtd_dev_as->bus = bus;
vtd_dev_as->devfn = (uint8_t)devfn;
+ vtd_dev_as->pasid = pasid;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
vtd_dev_as->iova_tree = iova_tree_new();
@@ -3492,6 +3681,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->iommu_ir, 1);
/*
+ * This region is used for catching fault to access interrupt
+ * range via passthrough + PASID. See also
+ * vtd_switch_address_space(). We can't use alias since we
+ * need to know the sid which is valid for MSI who uses
+ * bus_master_as (see msi_send_message()).
+ */
+ memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s),
+ &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir",
+ VTD_INTERRUPT_ADDR_SIZE);
+ /*
+ * Hook to root since when PT is enabled vtd_dev_as->iommu
+ * will be disabled.
+ */
+ memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root),
+ VTD_INTERRUPT_ADDR_FIRST,
+ &vtd_dev_as->iommu_ir_fault, 2);
+
+ /*
* Hook both the containers under the root container, we
* switch between DMAR & noDMAR by enable/disable
* corresponding sub-containers
@@ -3503,6 +3710,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->nodmar, 0);
vtd_switch_address_space(vtd_dev_as);
+
+ g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as);
}
return vtd_dev_as;
}
@@ -3609,7 +3818,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
"legacy mode",
bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn),
- vtd_get_domain_id(s, &ce),
+ vtd_get_domain_id(s, &ce, vtd_as->pasid),
ce.hi, ce.lo);
if (vtd_as_has_map_notifier(vtd_as)) {
/* This is required only for MAP typed notifiers */
@@ -3619,10 +3828,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
.notify_unmap = false,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, &ce),
+ .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid),
};
- vtd_page_walk(s, &ce, 0, ~0ULL, &info);
+ vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid);
}
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
@@ -3722,6 +3931,10 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_SC;
}
+ if (s->pasid) {
+ s->ecap |= VTD_ECAP_PASID;
+ }
+
vtd_reset_caches(s);
/* Define registers with default values and bit semantics */
@@ -3795,7 +4008,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
- vtd_as = vtd_find_add_as(s, bus, devfn);
+ vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID);
return &vtd_as->as;
}
@@ -3838,6 +4051,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return false;
}
+ if (s->pasid && !s->scalable_mode) {
+ error_setg(errp, "Need to set scalable mode for PASID");
+ return false;
+ }
+
return true;
}
@@ -3874,6 +4092,17 @@ static void vtd_realize(DeviceState *dev, Error **errp)
X86MachineState *x86ms = X86_MACHINE(ms);
PCIBus *bus = pcms->bus;
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ if (s->pasid && x86_iommu->dt_supported) {
+ /*
+ * PASID-based-Device-TLB Invalidate Descriptor is not
+ * implemented and it requires support from vhost layer which
+ * needs to be implemented in the future.
+ */
+ error_setg(errp, "PASID based device IOTLB is not supported");
+ return;
+ }
if (!vtd_decide_config(s, errp)) {
return;
@@ -3881,7 +4110,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
QLIST_INIT(&s->vtd_as_with_notifiers);
qemu_mutex_init(&s->iommu_lock);
- memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
@@ -3901,10 +4129,10 @@ static void vtd_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
/* No corresponding destroy */
- s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
+ s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal,
g_free, g_free);
- s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
- g_free, g_free);
+ s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
+ g_free, g_free);
vtd_init(s);
sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 930ce61..f090e61 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -114,8 +114,9 @@
VTD_INTERRUPT_ADDR_FIRST + 1)
/* The shift of source_id in the key of IOTLB hash table */
-#define VTD_IOTLB_SID_SHIFT 36
-#define VTD_IOTLB_LVL_SHIFT 52
+#define VTD_IOTLB_SID_SHIFT 20
+#define VTD_IOTLB_LVL_SHIFT 28
+#define VTD_IOTLB_PASID_SHIFT 30
#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
@@ -191,6 +192,7 @@
#define VTD_ECAP_SC (1ULL << 7)
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
+#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
@@ -211,6 +213,8 @@
#define VTD_CAP_DRAIN_READ (1ULL << 55)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
+#define VTD_PASID_ID_SHIFT 20
+#define VTD_PASID_ID_MASK ((1ULL << VTD_PASID_ID_SHIFT) - 1)
/* Supported Adjusted Guest Address Widths */
#define VTD_CAP_SAGAW_SHIFT 8
@@ -262,6 +266,8 @@
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
+#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
+#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
/* DMA Remapping Fault Conditions */
typedef enum VTDFaultReason {
@@ -379,6 +385,11 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
+#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
+#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
+#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK)
+#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
+#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
/* Mask for Device IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
@@ -413,6 +424,7 @@ typedef union VTDInvDesc VTDInvDesc;
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
+ uint32_t pasid;
uint64_t addr;
uint8_t mask;
};
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index ffd1884..170a331 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms)
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
- &e820_reserve, sizeof(e820_reserve));
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
sizeof(struct e820_entry) * e820_get_num_entries());
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index ef14da5..546b703 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms,
hwaddr cxl_size = MiB;
cxl_base = pc_get_cxl_range_start(pcms);
- e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
cxl_resv_end = cxl_base + cxl_size;
@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms,
memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
"cxl-fixed-memory-region", fw->size);
memory_region_add_subregion(system_memory, fw->base, &fw->mr);
- e820_add_entry(fw->base, fw->size, E820_RESERVED);
cxl_fmw_base += fw->size;
cxl_resv_end = cxl_fmw_base;
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index e49814d..04fd71b 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -12,6 +12,8 @@ vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate device
vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
+vtd_inv_desc_iotlb_pasid_pages(uint16_t domain, uint64_t addr, uint8_t mask, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8" pasid 0x%"PRIx32
+vtd_inv_desc_iotlb_pasid(uint16_t domain, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" pasid 0x%"PRIx32
vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
vtd_inv_desc_wait_irq(const char *msg) "%s"
vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64