aboutsummaryrefslogtreecommitdiff
path: root/hw/i386
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386')
-rw-r--r--hw/i386/Kconfig17
-rw-r--r--hw/i386/acpi-build.c209
-rw-r--r--hw/i386/acpi-build.h2
-rw-r--r--hw/i386/acpi-common.c2
-rw-r--r--hw/i386/acpi-microvm.c2
-rw-r--r--hw/i386/amd_iommu.c229
-rw-r--r--hw/i386/amd_iommu.h12
-rw-r--r--hw/i386/e820_memory_layout.c17
-rw-r--r--hw/i386/e820_memory_layout.h8
-rw-r--r--hw/i386/fw_cfg.c34
-rw-r--r--hw/i386/fw_cfg.h1
-rw-r--r--hw/i386/intel_iommu.c1162
-rw-r--r--hw/i386/intel_iommu_internal.h134
-rw-r--r--hw/i386/kvm/apic.c13
-rw-r--r--hw/i386/kvm/clock.c16
-rw-r--r--hw/i386/kvm/i8254.c11
-rw-r--r--hw/i386/kvm/i8259.c6
-rw-r--r--hw/i386/kvm/ioapic.c9
-rw-r--r--hw/i386/kvm/xen-stubs.c13
-rw-r--r--hw/i386/kvm/xen_evtchn.c72
-rw-r--r--hw/i386/kvm/xen_evtchn.h2
-rw-r--r--hw/i386/kvm/xen_gnttab.c8
-rw-r--r--hw/i386/kvm/xen_overlay.c10
-rw-r--r--hw/i386/kvm/xen_primary_console.c6
-rw-r--r--hw/i386/kvm/xen_xenstore.c14
-rw-r--r--hw/i386/meson.build2
-rw-r--r--hw/i386/microvm-dt.c4
-rw-r--r--hw/i386/microvm.c94
-rw-r--r--hw/i386/monitor.c4
-rw-r--r--hw/i386/multiboot.c46
-rw-r--r--hw/i386/nitro_enclave.c353
-rw-r--r--hw/i386/pc.c154
-rw-r--r--hw/i386/pc_piix.c291
-rw-r--r--hw/i386/pc_q35.c275
-rw-r--r--hw/i386/pc_sysfw.c13
-rw-r--r--hw/i386/pc_sysfw_ovmf.c1
-rw-r--r--hw/i386/port92.c6
-rw-r--r--hw/i386/sgx-epc.c9
-rw-r--r--hw/i386/sgx-stub.c11
-rw-r--r--hw/i386/sgx.c60
-rw-r--r--hw/i386/tdvf-hob.c130
-rw-r--r--hw/i386/tdvf-hob.h26
-rw-r--r--hw/i386/tdvf.c189
-rw-r--r--hw/i386/trace-events1
-rw-r--r--hw/i386/vapic.c19
-rw-r--r--hw/i386/vmmouse.c7
-rw-r--r--hw/i386/vmport.c12
-rw-r--r--hw/i386/x86-common.c141
-rw-r--r--hw/i386/x86-cpu.c6
-rw-r--r--hw/i386/x86-iommu.c9
-rw-r--r--hw/i386/x86.c11
-rw-r--r--hw/i386/xen/meson.build1
-rw-r--r--hw/i386/xen/xen-hvm.c34
-rw-r--r--hw/i386/xen/xen-pvh.c125
-rw-r--r--hw/i386/xen/xen_apic.c4
-rw-r--r--hw/i386/xen/xen_platform.c12
-rw-r--r--hw/i386/xen/xen_pvdevice.c7
57 files changed, 2881 insertions, 1185 deletions
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index f4a33b6..eb65bda 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
bool
depends on KVM
+config TDX
+ bool
+ select X86_FW_OVMF
+ depends on KVM
+
config PC
bool
imply APPLESMC
@@ -26,6 +31,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
@@ -43,6 +49,7 @@ config PC
select SERIAL_ISA
select ACPI_PCI
select ACPI_VMGENID
+ select ACPI_VMCLOCK
select VIRTIO_PMEM_SUPPORTED
select VIRTIO_MEM_SUPPORTED
select HV_BALLOON_SUPPORTED
@@ -129,6 +136,16 @@ config MICROVM
select USB_XHCI_SYSBUS
select I8254
+config NITRO_ENCLAVE
+ default y
+ depends on I386 && FDT # for MICROVM
+ depends on LIBCBOR && GNUTLS # for EIF and VIRTIO_NSM
+ depends on VHOST_USER # for VHOST_USER_VSOCK
+ select EIF
+ select MICROVM
+ select VHOST_USER_VSOCK
+ select VIRTIO_NSM
+
config X86_IOMMU
bool
depends on PC
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f4e366f..61851cc 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -22,7 +22,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qmp/qnum.h"
+#include "qobject/qnum.h"
#include "acpi-build.h"
#include "acpi-common.h"
#include "qemu/bitmap.h"
@@ -40,18 +40,19 @@
#include "hw/acpi/acpi_aml_interface.h"
#include "hw/input/i8042.h"
#include "hw/acpi/memory_hotplug.h"
-#include "sysemu/tpm.h"
+#include "system/tpm.h"
#include "hw/acpi/tpm.h"
#include "hw/acpi/vmgenid.h"
+#include "hw/acpi/vmclock.h"
#include "hw/acpi/erst.h"
#include "hw/acpi/piix4.h"
-#include "sysemu/tpm_backend.h"
+#include "system/tpm_backend.h"
#include "hw/rtc/mc146818rtc_regs.h"
#include "migration/vmstate.h"
#include "hw/mem/memory-device.h"
#include "hw/mem/nvdimm.h"
-#include "sysemu/numa.h"
-#include "sysemu/reset.h"
+#include "system/numa.h"
+#include "system/reset.h"
#include "hw/hyperv/vmbus-bridge.h"
/* Supported chipsets: */
@@ -68,7 +69,6 @@
#include "hw/acpi/utils.h"
#include "hw/acpi/pci.h"
#include "hw/acpi/cxl.h"
-#include "hw/acpi/acpi_generic_initiator.h"
#include "qom/qom-qobject.h"
#include "hw/i386/amd_iommu.h"
@@ -139,7 +139,7 @@ static void init_common_fadt_data(MachineState *ms, Object *o,
/*
* "ICH9-LPC" or "PIIX4_PM" has "smm-compat" property to keep the old
* behavior for compatibility irrelevant to smm_enabled, which doesn't
- * comforms to ACPI spec.
+ * conform to the ACPI spec.
*/
bool smm_enabled = object_property_get_bool(o, "smm-compat", NULL) ?
true : x86_machine_is_smm_enabled(x86ms);
@@ -589,8 +589,8 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
}
}
-static bool build_append_notfication_callback(Aml *parent_scope,
- const PCIBus *bus)
+static bool build_append_notification_callback(Aml *parent_scope,
+ const PCIBus *bus)
{
Aml *method;
PCIBus *sec;
@@ -604,7 +604,7 @@ static bool build_append_notfication_callback(Aml *parent_scope,
continue;
}
nr_notifiers = nr_notifiers +
- build_append_notfication_callback(br_scope, sec);
+ build_append_notification_callback(br_scope, sec);
/*
* add new child scope to parent
* and keep track of bus that have PCNT,
@@ -655,6 +655,7 @@ static Aml *aml_pci_pdsm(void)
Aml *acpi_index = aml_local(2);
Aml *zero = aml_int(0);
Aml *one = aml_int(1);
+ Aml *not_supp = aml_int(0xFFFFFFFF);
Aml *func = aml_arg(2);
Aml *params = aml_arg(4);
Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
@@ -679,7 +680,7 @@ static Aml *aml_pci_pdsm(void)
*/
ifctx1 = aml_if(aml_lnot(
aml_or(aml_equal(acpi_index, zero),
- aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL)
+ aml_equal(acpi_index, not_supp), NULL)
));
{
/* have supported functions */
@@ -705,18 +706,30 @@ static Aml *aml_pci_pdsm(void)
{
Aml *pkg = aml_package(2);
- aml_append(pkg, zero);
- /*
- * optional, if not impl. should return null string
- */
- aml_append(pkg, aml_string("%s", ""));
- aml_append(ifctx, aml_store(pkg, ret));
-
aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
+ aml_append(ifctx, aml_store(pkg, ret));
/*
- * update acpi-index to actual value
+ * Windows calls func=7 without checking if it's available,
+ * as workaround Microsoft has suggested to return invalid for func7
+ * Package, so return 2 elements package but only initialize elements
+ * when acpi_index is supported and leave them uninitialized, which
+ * leads elements to being Uninitialized ObjectType and should trip
+ * Windows into discarding result as an unexpected and prevent setting
+ * bogus 'PCI Label' on the device.
*/
- aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero)));
+ ifctx1 = aml_if(aml_lnot(aml_lor(
+ aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp)
+ )));
+ {
+ aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero)));
+ /*
+ * optional, if not impl. should return null string
+ */
+ aml_append(ifctx1, aml_store(aml_string("%s", ""),
+ aml_index(ret, one)));
+ }
+ aml_append(ifctx, ifctx1);
+
aml_append(ifctx, aml_return(ret));
}
@@ -724,120 +737,45 @@ static Aml *aml_pci_pdsm(void)
return method;
}
-/**
- * build_prt_entry:
- * @link_name: link name for PCI route entry
- *
- * build AML package containing a PCI route entry for @link_name
- */
-static Aml *build_prt_entry(const char *link_name)
-{
- Aml *a_zero = aml_int(0);
- Aml *pkg = aml_package(4);
- aml_append(pkg, a_zero);
- aml_append(pkg, a_zero);
- aml_append(pkg, aml_name("%s", link_name));
- aml_append(pkg, a_zero);
- return pkg;
-}
-
/*
- * initialize_route - Initialize the interrupt routing rule
- * through a specific LINK:
- * if (lnk_idx == idx)
- * route using link 'link_name'
- */
-static Aml *initialize_route(Aml *route, const char *link_name,
- Aml *lnk_idx, int idx)
-{
- Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx)));
- Aml *pkg = build_prt_entry(link_name);
-
- aml_append(if_ctx, aml_store(pkg, route));
-
- return if_ctx;
-}
-
-/*
- * build_prt - Define interrupt rounting rules
+ * build_prt - Define interrupt routing rules
*
* Returns an array of 128 routes, one for each device,
* based on device location.
* The main goal is to equally distribute the interrupts
* over the 4 existing ACPI links (works only for i440fx).
- * The hash function is (slot + pin) & 3 -> "LNK[D|A|B|C]".
+ * The hash function is: (slot + pin) & 3 -> "LNK[D|A|B|C]".
*
*/
static Aml *build_prt(bool is_pci0_prt)
{
- Aml *method, *while_ctx, *pin, *res;
+ const int nroutes = 128;
+ Aml *rt_pkg, *method;
+ int pin;
method = aml_method("_PRT", 0, AML_NOTSERIALIZED);
- res = aml_local(0);
- pin = aml_local(1);
- aml_append(method, aml_store(aml_package(128), res));
- aml_append(method, aml_store(aml_int(0), pin));
-
- /* while (pin < 128) */
- while_ctx = aml_while(aml_lless(pin, aml_int(128)));
- {
- Aml *slot = aml_local(2);
- Aml *lnk_idx = aml_local(3);
- Aml *route = aml_local(4);
-
- /* slot = pin >> 2 */
- aml_append(while_ctx,
- aml_store(aml_shiftright(pin, aml_int(2), NULL), slot));
- /* lnk_idx = (slot + pin) & 3 */
- aml_append(while_ctx,
- aml_store(aml_and(aml_add(pin, slot, NULL), aml_int(3), NULL),
- lnk_idx));
-
- /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3 */
- aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0));
- if (is_pci0_prt) {
- Aml *if_device_1, *if_pin_4, *else_pin_4;
-
- /* device 1 is the power-management device, needs SCI */
- if_device_1 = aml_if(aml_equal(lnk_idx, aml_int(1)));
- {
- if_pin_4 = aml_if(aml_equal(pin, aml_int(4)));
- {
- aml_append(if_pin_4,
- aml_store(build_prt_entry("LNKS"), route));
- }
- aml_append(if_device_1, if_pin_4);
- else_pin_4 = aml_else();
- {
- aml_append(else_pin_4,
- aml_store(build_prt_entry("LNKA"), route));
- }
- aml_append(if_device_1, else_pin_4);
- }
- aml_append(while_ctx, if_device_1);
+ assert(nroutes < 256);
+ rt_pkg = aml_package(nroutes);
+
+ for (pin = 0; pin < nroutes; pin++) {
+ Aml *pkg = aml_package(4);
+ int slot = pin >> 2;
+
+ aml_append(pkg, aml_int((slot << 16) | 0xFFFF));
+ aml_append(pkg, aml_int(pin & 3));
+ /* device 1 is the power-management device, needs SCI */
+ if (is_pci0_prt && pin == 4) {
+ aml_append(pkg, aml_name("%s", "LNKS"));
} else {
- aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1));
+ static const char link_name[][5] = {"LNKD", "LNKA", "LNKB", "LNKC"};
+ int hash = (slot + pin) & 3;
+ aml_append(pkg, aml_name("%s", link_name[hash]));
}
- aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2));
- aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3));
-
- /* route[0] = 0x[slot]FFFF */
- aml_append(while_ctx,
- aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF),
- NULL),
- aml_index(route, aml_int(0))));
- /* route[1] = pin & 3 */
- aml_append(while_ctx,
- aml_store(aml_and(pin, aml_int(3), NULL),
- aml_index(route, aml_int(1))));
- /* res[pin] = route */
- aml_append(while_ctx, aml_store(route, aml_index(res, pin)));
- /* pin++ */
- aml_append(while_ctx, aml_increment(pin));
+ aml_append(pkg, aml_int(0));
+ aml_append(rt_pkg, pkg);
}
- aml_append(method, while_ctx);
- /* return res*/
- aml_append(method, aml_return(res));
+
+ aml_append(method, aml_return(rt_pkg));
return method;
}
@@ -1536,7 +1474,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
.fw_unplugs_cpu = pm->smi_on_cpu_unplug,
};
build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry,
- pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02");
+ pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02",
+ AML_SYSTEM_IO);
}
if (pcms->memhp_io_base && nr_mem) {
@@ -1551,6 +1490,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
QLIST_FOREACH(bus, &bus->child, sibling) {
uint8_t bus_num = pci_bus_num(bus);
uint8_t numa_node = pci_bus_numa_node(bus);
+ uint32_t uid;
/* look only for expander root buses */
if (!pci_bus_is_root(bus)) {
@@ -1561,6 +1501,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
root_bus_limit = bus_num - 1;
}
+ uid = object_property_get_uint(OBJECT(bus), "acpi_uid",
+ &error_fatal);
scope = aml_scope("\\_SB");
if (pci_bus_is_cxl(bus)) {
@@ -1568,7 +1510,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
} else {
dev = aml_device("PC%.02X", bus_num);
}
- aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
+ aml_append(dev, aml_name_decl("_UID", aml_int(uid)));
aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
if (pci_bus_is_cxl(bus)) {
struct Aml *aml_pkg = aml_package(2);
@@ -1831,7 +1773,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
PCIBus *b = PCI_HOST_BRIDGE(pci_host)->bus;
scope = aml_scope("\\_SB.PCI0");
- has_pcnt = build_append_notfication_callback(scope, b);
+ has_pcnt = build_append_notification_callback(scope, b);
if (has_pcnt) {
aml_append(dsdt, scope);
}
@@ -2046,7 +1988,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
build_srat_memory(table_data, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
}
- build_srat_generic_pci_initiator(table_data);
+ build_srat_generic_affinity_structures(table_data);
/*
* Entry is required for Windows to enable memory hotplug in OS
@@ -2391,12 +2333,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2);
/* DeviceID */
build_append_int_noprefix(table_data,
- object_property_get_int(OBJECT(&s->pci), "addr",
+ object_property_get_int(OBJECT(s->pci), "addr",
&error_abort), 2);
/* Capability offset */
- build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2);
/* IOMMU base address */
- build_append_int_noprefix(table_data, s->mmio.addr, 8);
+ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8);
/* PCI Segment Group */
build_append_int_noprefix(table_data, 0, 2);
/* IOMMU info */
@@ -2426,12 +2368,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2);
/* DeviceID */
build_append_int_noprefix(table_data,
- object_property_get_int(OBJECT(&s->pci), "addr",
+ object_property_get_int(OBJECT(s->pci), "addr",
&error_abort), 2);
/* Capability offset */
- build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2);
/* IOMMU base address */
- build_append_int_noprefix(table_data, s->mmio.addr, 8);
+ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8);
/* PCI Segment Group */
build_append_int_noprefix(table_data, 0, 2);
/* IOMMU info */
@@ -2504,7 +2446,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
uint8_t *u;
GArray *tables_blob = tables->table_data;
AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
- Object *vmgenid_dev;
+ Object *vmgenid_dev, *vmclock_dev;
char *oem_id;
char *oem_table_id;
@@ -2577,6 +2519,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
tables->vmgenid, tables->linker, x86ms->oem_id);
}
+ vmclock_dev = find_vmclock_dev();
+ if (vmclock_dev) {
+ acpi_add_table(table_offsets, tables_blob);
+ vmclock_build_acpi(VMCLOCK(vmclock_dev), tables_blob, tables->linker,
+ x86ms->oem_id);
+ }
+
if (misc.has_hpet) {
acpi_add_table(table_offsets, tables_blob);
build_hpet(tables_blob, tables->linker, x86ms->oem_id,
diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h
index 0dce155..275ec05 100644
--- a/hw/i386/acpi-build.h
+++ b/hw/i386/acpi-build.h
@@ -5,7 +5,7 @@
extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio;
-/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */
+/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */
#define ACPI_PCIHP_SEJ_BASE 0x8
#define ACPI_PCIHP_BNMR_BASE 0x10
diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c
index 0cc2919..7bd0806 100644
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -23,7 +23,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/utils.h"
diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
index 279da6b..bc65717 100644
--- a/hw/i386/acpi-microvm.c
+++ b/hw/i386/acpi-microvm.c
@@ -24,7 +24,7 @@
#include "qemu/cutils.h"
#include "qapi/error.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/acpi_aml_interface.h"
#include "hw/acpi/aml-build.h"
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 6d4fde7..963aa24 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -32,6 +32,7 @@
#include "trace.h"
#include "hw/i386/apic-msidef.h"
#include "hw/qdev-properties.h"
+#include "kvm/kvm_i386.h"
/* used AMD-Vi MMIO registers */
const char *amdvi_mmio_low[] = {
@@ -60,8 +61,9 @@ struct AMDVIAddressSpace {
uint8_t bus_num; /* bus number */
uint8_t devfn; /* device function */
AMDVIState *iommu_state; /* AMDVI - one per machine */
- MemoryRegion root; /* AMDVI Root memory map region */
+ MemoryRegion root; /* AMDVI Root memory map region */
IOMMUMemoryRegion iommu; /* Device's address translation region */
+ MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */
MemoryRegion iommu_ir; /* Device's interrupt remapping region */
AddressSpace as; /* device's corresponding address space */
};
@@ -165,11 +167,11 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s)
{
MSIMessage msg = {};
MemTxAttrs attrs = {
- .requester_id = pci_requester_id(&s->pci.dev)
+ .requester_id = pci_requester_id(&s->pci->dev)
};
- if (msi_enabled(&s->pci.dev)) {
- msg = msi_get_message(&s->pci.dev, 0);
+ if (msi_enabled(&s->pci->dev)) {
+ msg = msi_get_message(&s->pci->dev, 0);
address_space_stl_le(&address_space_memory, msg.address, msg.data,
attrs, NULL);
}
@@ -237,7 +239,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
amdvi_encode_event(evt, devid, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/*
@@ -254,7 +256,7 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
amdvi_encode_event(evt, devid, devtab, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/* log an event trying to access command buffer
@@ -267,7 +269,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
amdvi_encode_event(evt, 0, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/* log an illegal command event
@@ -308,7 +310,7 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
amdvi_encode_event(evt, devid, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
@@ -357,12 +359,12 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
uint64_t gpa, IOMMUTLBEntry to_cache,
uint16_t domid)
{
- AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
- uint64_t *key = g_new(uint64_t, 1);
- uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
-
/* don't cache erroneous translations */
if (to_cache.perm != IOMMU_NONE) {
+ AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
+ uint64_t *key = g_new(uint64_t, 1);
+ uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+
trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
PCI_FUNC(devid), gpa, to_cache.translated_addr);
@@ -430,6 +432,12 @@ static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
trace_amdvi_ppr_exec();
}
+static void amdvi_intremap_inval_notify_all(AMDVIState *s, bool global,
+ uint32_t index, uint32_t mask)
+{
+ x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
+}
+
static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
{
if (extract64(cmd[0], 0, 60) || cmd[1]) {
@@ -437,6 +445,9 @@ static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
s->cmdbuf + s->cmdbuf_head);
}
+ /* Notify global invalidation */
+ amdvi_intremap_inval_notify_all(s, true, 0, 0);
+
amdvi_iotlb_reset(s);
trace_amdvi_all_inval();
}
@@ -485,6 +496,9 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
return;
}
+ /* Notify global invalidation */
+ amdvi_intremap_inval_notify_all(s, true, 0, 0);
+
trace_amdvi_intr_inval();
}
@@ -1295,15 +1309,15 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
ret = -AMDVI_IR_ERR;
break;
case AMDVI_IOAPIC_INT_TYPE_NMI:
- pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
trace_amdvi_ir_delivery_mode("nmi");
break;
case AMDVI_IOAPIC_INT_TYPE_INIT:
- pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
trace_amdvi_ir_delivery_mode("init");
break;
case AMDVI_IOAPIC_INT_TYPE_EINT:
- pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
trace_amdvi_ir_delivery_mode("eint");
break;
default:
@@ -1436,13 +1450,13 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
* Memory region relationships looks like (Address range shows
* only lower 32 bits to make it short in length...):
*
- * |-----------------+-------------------+----------|
- * | Name | Address range | Priority |
- * |-----------------+-------------------+----------+
- * | amdvi_root | 00000000-ffffffff | 0 |
- * | amdvi_iommu | 00000000-ffffffff | 1 |
- * | amdvi_iommu_ir | fee00000-feefffff | 64 |
- * |-----------------+-------------------+----------|
+ * |--------------------+-------------------+----------|
+ * | Name | Address range | Priority |
+ * |--------------------+-------------------+----------+
+ * | amdvi-root | 00000000-ffffffff | 0 |
+ * | amdvi-iommu_nodma | 00000000-ffffffff | 0 |
+ * | amdvi-iommu_ir | fee00000-feefffff | 1 |
+ * |--------------------+-------------------+----------|
*/
memory_region_init_iommu(&amdvi_dev_as->iommu,
sizeof(amdvi_dev_as->iommu),
@@ -1452,16 +1466,27 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
memory_region_init(&amdvi_dev_as->root, OBJECT(s),
"amdvi_root", UINT64_MAX);
address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
- memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
- &amdvi_ir_ops, s, "amd_iommu_ir",
- AMDVI_INT_ADDR_SIZE);
- memory_region_add_subregion_overlap(&amdvi_dev_as->root,
- AMDVI_INT_ADDR_FIRST,
- &amdvi_dev_as->iommu_ir,
- 64);
memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
MEMORY_REGION(&amdvi_dev_as->iommu),
- 1);
+ 0);
+
+ /* Build the DMA Disabled alias to shared memory */
+ memory_region_init_alias(&amdvi_dev_as->iommu_nodma, OBJECT(s),
+ "amdvi-sys", &s->mr_sys, 0,
+ memory_region_size(&s->mr_sys));
+ memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
+ &amdvi_dev_as->iommu_nodma,
+ 0);
+ /* Build the Interrupt Remapping alias to shared memory */
+ memory_region_init_alias(&amdvi_dev_as->iommu_ir, OBJECT(s),
+ "amdvi-ir", &s->mr_ir, 0,
+ memory_region_size(&s->mr_ir));
+ memory_region_add_subregion_overlap(MEMORY_REGION(&amdvi_dev_as->iommu),
+ AMDVI_INT_ADDR_FIRST,
+ &amdvi_dev_as->iommu_ir, 1);
+
+ memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
+ memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
}
return &iommu_as[devfn]->as;
}
@@ -1560,9 +1585,9 @@ static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
/* reset AMDVI specific capabilities, all r/o */
pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
- AMDVI_BASE_ADDR & ~(0xffff0000));
+ AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
- (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
+ AMDVI_BASE_ADDR >> 32);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
0xff000000);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
@@ -1574,41 +1599,137 @@ static void amdvi_sysbus_reset(DeviceState *dev)
{
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
- msi_reset(&s->pci.dev);
+ msi_reset(&s->pci->dev);
amdvi_init(s);
}
+static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
+ .name = "amd-iommu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .priority = MIG_PRI_IOMMU,
+ .fields = (VMStateField[]) {
+ /* Updated in amdvi_handle_control_write() */
+ VMSTATE_BOOL(enabled, AMDVIState),
+ VMSTATE_BOOL(ga_enabled, AMDVIState),
+ VMSTATE_BOOL(ats_enabled, AMDVIState),
+ VMSTATE_BOOL(cmdbuf_enabled, AMDVIState),
+ VMSTATE_BOOL(completion_wait_intr, AMDVIState),
+ VMSTATE_BOOL(evtlog_enabled, AMDVIState),
+ VMSTATE_BOOL(evtlog_intr, AMDVIState),
+ /* Updated in amdvi_handle_devtab_write() */
+ VMSTATE_UINT64(devtab, AMDVIState),
+ VMSTATE_UINT64(devtab_len, AMDVIState),
+ /* Updated in amdvi_handle_cmdbase_write() */
+ VMSTATE_UINT64(cmdbuf, AMDVIState),
+ VMSTATE_UINT64(cmdbuf_len, AMDVIState),
+ /* Updated in amdvi_handle_cmdhead_write() */
+ VMSTATE_UINT32(cmdbuf_head, AMDVIState),
+ /* Updated in amdvi_handle_cmdtail_write() */
+ VMSTATE_UINT32(cmdbuf_tail, AMDVIState),
+ /* Updated in amdvi_handle_evtbase_write() */
+ VMSTATE_UINT64(evtlog, AMDVIState),
+ VMSTATE_UINT32(evtlog_len, AMDVIState),
+ /* Updated in amdvi_handle_evthead_write() */
+ VMSTATE_UINT32(evtlog_head, AMDVIState),
+ /* Updated in amdvi_handle_evttail_write() */
+ VMSTATE_UINT32(evtlog_tail, AMDVIState),
+ /* Updated in amdvi_handle_pprbase_write() */
+ VMSTATE_UINT64(ppr_log, AMDVIState),
+ VMSTATE_UINT32(pprlog_len, AMDVIState),
+ /* Updated in amdvi_handle_pprhead_write() */
+ VMSTATE_UINT32(pprlog_head, AMDVIState),
+ /* Updated in amdvi_handle_tailhead_write() */
+ VMSTATE_UINT32(pprlog_tail, AMDVIState),
+ /* MMIO registers */
+ VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
{
+ DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev));
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
MachineState *ms = MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(ms);
X86MachineState *x86ms = X86_MACHINE(ms);
PCIBus *bus = pcms->pcibus;
- s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
- amdvi_uint64_equal, g_free, g_free);
+ if (s->pci_id) {
+ PCIDevice *pdev = NULL;
+ int ret = pci_qdev_find_device(s->pci_id, &pdev);
- /* This device should take care of IOMMU PCI properties */
- if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
- return;
+ if (ret) {
+ error_report("Cannot find PCI device '%s'", s->pci_id);
+ return;
+ }
+
+ if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) {
+ error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id);
+ return;
+ }
+
+ s->pci = AMD_IOMMU_PCI(pdev);
+ dc->vmsd = &vmstate_amdvi_sysbus_migratable;
+ } else {
+ s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI));
+ /* This device should take care of IOMMU PCI properties */
+ if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) {
+ return;
+ }
}
+ s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+ amdvi_uint64_equal, g_free, g_free);
+
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
/* set up MMIO */
- memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
- AMDVI_MMIO_SIZE);
+ memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
+ "amdvi-mmio", AMDVI_MMIO_SIZE);
memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
- &s->mmio);
+ &s->mr_mmio);
+
+ /* Create the share memory regions by all devices */
+ memory_region_init(&s->mr_sys, OBJECT(s), "amdvi-sys", UINT64_MAX);
+
+ /* set up the DMA disabled memory region */
+ memory_region_init_alias(&s->mr_nodma, OBJECT(s),
+ "amdvi-nodma", get_system_memory(), 0,
+ memory_region_size(get_system_memory()));
+ memory_region_add_subregion_overlap(&s->mr_sys, 0,
+ &s->mr_nodma, 0);
+
+ /* set up the Interrupt Remapping memory region */
+ memory_region_init_io(&s->mr_ir, OBJECT(s), &amdvi_ir_ops,
+ s, "amdvi-ir", AMDVI_INT_ADDR_SIZE);
+ memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
+ &s->mr_ir, 1);
+
+ if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) {
+ error_report("AMD IOMMU with x2APIC configuration requires xtsup=on");
+ exit(EXIT_FAILURE);
+ }
+
+ if (s->xtsup) {
+ if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
+ error_report("AMD IOMMU xtsup=on requires x2APIC support on "
+ "the KVM side");
+ exit(EXIT_FAILURE);
+ }
+ }
+
pci_setup_iommu(bus, &amdvi_iommu_ops, s);
amdvi_init(s);
}
-static Property amdvi_properties[] = {
+static const Property amdvi_properties[] = {
DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
- DEFINE_PROP_END_OF_LIST(),
+ DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
};
static const VMStateDescription vmstate_amdvi_sysbus = {
@@ -1616,25 +1737,16 @@ static const VMStateDescription vmstate_amdvi_sysbus = {
.unmigratable = 1
};
-static void amdvi_sysbus_instance_init(Object *klass)
-{
- AMDVIState *s = AMD_IOMMU_DEVICE(klass);
-
- object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
-}
-
-static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
+static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
- dc->reset = amdvi_sysbus_reset;
+ device_class_set_legacy_reset(dc, amdvi_sysbus_reset);
dc->vmsd = &vmstate_amdvi_sysbus;
dc->hotpluggable = false;
dc_class->realize = amdvi_sysbus_realize;
dc_class->int_remap = amdvi_int_remap;
- /* Supported by the pc-q35-* machine types */
- dc->user_creatable = true;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
device_class_set_props(dc, amdvi_properties);
@@ -1644,16 +1756,16 @@ static const TypeInfo amdvi_sysbus = {
.name = TYPE_AMD_IOMMU_DEVICE,
.parent = TYPE_X86_IOMMU_DEVICE,
.instance_size = sizeof(AMDVIState),
- .instance_init = amdvi_sysbus_instance_init,
.class_init = amdvi_sysbus_class_init
};
-static void amdvi_pci_class_init(ObjectClass *klass, void *data)
+static void amdvi_pci_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->vendor_id = PCI_VENDOR_ID_AMD;
+ k->device_id = 0x1419;
k->class_id = 0x0806;
k->realize = amdvi_pci_realize;
@@ -1666,13 +1778,14 @@ static const TypeInfo amdvi_pci = {
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(AMDVIPCIState),
.class_init = amdvi_pci_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
};
-static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
+static void amdvi_iommu_memory_region_class_init(ObjectClass *klass,
+ const void *data)
{
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 73619fe..5672bde 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -187,7 +187,7 @@
AMDVI_CAPAB_FLAG_HTTUNNEL | AMDVI_CAPAB_EFR_SUP)
/* AMDVI default address */
-#define AMDVI_BASE_ADDR 0xfed80000
+#define AMDVI_BASE_ADDR 0xfed80000ULL
/* page management constants */
#define AMDVI_PAGE_SHIFT 12
@@ -315,7 +315,8 @@ struct AMDVIPCIState {
struct AMDVIState {
X86IOMMUState iommu; /* IOMMU bus device */
- AMDVIPCIState pci; /* IOMMU PCI device */
+ AMDVIPCIState *pci; /* IOMMU PCI device */
+ char *pci_id; /* ID of AMDVI-PCI device, if user created */
uint32_t version;
@@ -328,7 +329,7 @@ struct AMDVIState {
bool excl_enabled;
hwaddr devtab; /* base address device table */
- size_t devtab_len; /* device table length */
+ uint64_t devtab_len; /* device table length */
hwaddr cmdbuf; /* command buffer base address */
uint64_t cmdbuf_len; /* command buffer length */
@@ -353,7 +354,10 @@ struct AMDVIState {
uint32_t pprlog_head; /* ppr log head */
uint32_t pprlog_tail; /* ppr log tail */
- MemoryRegion mmio; /* MMIO region */
+ MemoryRegion mr_mmio; /* MMIO region */
+ MemoryRegion mr_sys;
+ MemoryRegion mr_nodma;
+ MemoryRegion mr_ir;
uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */
uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */
uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */
diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c
index 06970ac..3e848fb 100644
--- a/hw/i386/e820_memory_layout.c
+++ b/hw/i386/e820_memory_layout.c
@@ -11,22 +11,29 @@
#include "e820_memory_layout.h"
static size_t e820_entries;
-struct e820_entry *e820_table;
+static struct e820_entry *e820_table;
+static gboolean e820_done;
-int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
+void e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
+ assert(!e820_done);
+
/* new "etc/e820" file -- include ram and reserved entries */
e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
e820_table[e820_entries].address = cpu_to_le64(address);
e820_table[e820_entries].length = cpu_to_le64(length);
e820_table[e820_entries].type = cpu_to_le32(type);
e820_entries++;
-
- return e820_entries;
}
-int e820_get_num_entries(void)
+int e820_get_table(struct e820_entry **table)
{
+ e820_done = true;
+
+ if (table) {
+ *table = e820_table;
+ }
+
return e820_entries;
}
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index 7c239aa..b50acfa 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -22,13 +22,9 @@ struct e820_entry {
uint32_t type;
} QEMU_PACKED __attribute((__aligned__(4)));
-extern struct e820_entry *e820_table;
-
-int e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
-int e820_get_num_entries(void);
+void e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
bool e820_get_entry(int index, uint32_t type,
uint64_t *address, uint64_t *length);
-
-
+int e820_get_table(struct e820_entry **table);
#endif
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index 7c43c32..5c0bcd5 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -13,7 +13,7 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/numa.h"
+#include "system/numa.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
#include "hw/firmware/smbios.h"
@@ -26,7 +26,9 @@
#include CONFIG_DEVICES
#include "target/i386/cpu.h"
-struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
+#if !defined(CONFIG_HPET)
+struct hpet_fw_config hpet_fw_cfg = {.count = UINT8_MAX};
+#endif
const char *fw_cfg_arch_key_name(uint16_t key)
{
@@ -48,6 +50,15 @@ const char *fw_cfg_arch_key_name(uint16_t key)
return NULL;
}
+/* Add etc/e820 late, once all regions should be present */
+void fw_cfg_add_e820(FWCfgState *fw_cfg)
+{
+ struct e820_entry *table;
+ int nr_e820 = e820_get_table(&table);
+
+ fw_cfg_add_file(fw_cfg, "etc/e820", table, nr_e820 * sizeof(*table));
+}
+
void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
SmbiosEntryPointType ep_type)
{
@@ -60,6 +71,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineClass *mc = MACHINE_GET_CLASS(pcms);
X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
+ int nr_e820;
if (pcmc->smbios_defaults) {
/* These values are guest ABI, do not change */
@@ -78,8 +90,9 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
}
/* build the array of physical mem area from e820 table */
- mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries());
- for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) {
+ nr_e820 = e820_get_table(NULL);
+ mem_array = g_malloc0(sizeof(*mem_array) * nr_e820);
+ for (i = 0, array_count = 0; i < nr_e820; i++) {
uint64_t addr, len;
if (e820_get_entry(i, E820_RAM, &addr, &len)) {
@@ -132,16 +145,13 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
*/
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, apic_id_limit);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, ms->ram_size);
-#ifdef CONFIG_ACPI
- fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
- acpi_tables, acpi_tables_len);
-#endif
+ if (acpi_builtin()) {
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
+ acpi_tables, acpi_tables_len);
+ }
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
- sizeof(struct e820_entry) * e820_get_num_entries());
-
- fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_fw_cfg, sizeof(hpet_fw_cfg));
/* allocate memory for the NUMA channel: one (64bit) word for the number
* of nodes, one word for each VCPU->node and one word for each node to
* hold the amount of memory.
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
index 92e310f..e560fd7 100644
--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -27,5 +27,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
SmbiosEntryPointType ep_type);
void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
+void fw_cfg_add_e820(FWCfgState *fw_cfg);
#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c4350e0..69d72ad 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -32,9 +32,9 @@
#include "hw/i386/apic-msidef.h"
#include "hw/i386/x86-iommu.h"
#include "hw/pci-host/q35.h"
-#include "sysemu/kvm.h"
-#include "sysemu/dma.h"
-#include "sysemu/sysemu.h"
+#include "system/kvm.h"
+#include "system/dma.h"
+#include "system/system.h"
#include "hw/i386/apic_internal.h"
#include "kvm/kvm_i386.h"
#include "migration/vmstate.h"
@@ -48,7 +48,10 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
-#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
+#define VTD_PE_GET_FL_LEVEL(pe) \
+ (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM))
+#define VTD_PE_GET_SL_LEVEL(pe) \
+ (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
/*
* PCI bus number (or SID) is not reliable since the device is usaully
@@ -61,6 +64,17 @@ struct vtd_as_key {
uint32_t pasid;
};
+/* bus/devfn is PCI device's real BDF not the aliased one */
+struct vtd_hiod_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
+struct vtd_as_raw_key {
+ uint16_t sid;
+ uint32_t pasid;
+};
+
struct vtd_iotlb_key {
uint64_t gfn;
uint32_t pasid;
@@ -250,6 +264,25 @@ static guint vtd_as_hash(gconstpointer v)
return (guint)(value << 8 | key->devfn);
}
+/* Same implementation as vtd_as_hash() */
+static guint vtd_hiod_hash(gconstpointer v)
+{
+ return vtd_as_hash(v);
+}
+
+static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_hiod_key *key1 = v1;
+ const struct vtd_hiod_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static void vtd_hiod_destroy(gpointer v)
+{
+ object_unref(v);
+}
+
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
gpointer user_data)
{
@@ -259,15 +292,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
}
/* The shift of an addr for a certain level of paging structure */
-static inline uint32_t vtd_slpt_level_shift(uint32_t level)
+static inline uint32_t vtd_pt_level_shift(uint32_t level)
{
assert(level != 0);
- return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
+ return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS;
}
-static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
+static inline uint64_t vtd_pt_level_page_mask(uint32_t level)
{
- return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
+ return ~((1ULL << vtd_pt_level_shift(level)) - 1);
}
static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
@@ -277,9 +310,43 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
- return (entry->domain_id == info->domain_id) &&
- (((entry->gfn & info->mask) == gfn) ||
- (entry->gfn == gfn_tlb));
+
+ if (entry->domain_id != info->domain_id) {
+ return false;
+ }
+
+ /*
+ * According to spec, IOTLB entries caching first-stage (PGTT=001b) or
+ * nested (PGTT=011b) mapping associated with specified domain-id are
+ * invalidated. Nested isn't supported yet, so only need to check 001b.
+ */
+ if (entry->pgtt == VTD_SM_PASID_ENTRY_FLT) {
+ return true;
+ }
+
+ return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb;
+}
+
+static gboolean vtd_hash_remove_by_page_piotlb(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+ VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+ uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
+ uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
+
+ /*
+ * According to spec, PASID-based-IOTLB Invalidation in page granularity
+ * doesn't invalidate IOTLB entries caching second-stage (PGTT=010b)
+ * or pass-through (PGTT=100b) mappings. Nested isn't supported yet,
+ * so only need to check first-stage (PGTT=001b) mappings.
+ */
+ if (entry->pgtt != VTD_SM_PASID_ENTRY_FLT) {
+ return false;
+ }
+
+ return entry->domain_id == info->domain_id && entry->pasid == info->pasid &&
+ ((entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb);
}
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
@@ -324,7 +391,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
{
- return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
+ return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
}
/* Must be called with IOMMU lock held */
@@ -333,9 +400,9 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
{
struct vtd_iotlb_key key;
VTDIOTLBEntry *entry;
- int level;
+ unsigned level;
- for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
+ for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) {
key.gfn = vtd_get_iotlb_gfn(addr, level);
key.level = level;
key.sid = source_id;
@@ -352,15 +419,15 @@ out:
/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
- uint16_t domain_id, hwaddr addr, uint64_t slpte,
+ uint16_t domain_id, hwaddr addr, uint64_t pte,
uint8_t access_flags, uint32_t level,
- uint32_t pasid)
+ uint32_t pasid, uint8_t pgtt)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
- trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
+ trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id);
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
trace_vtd_iotlb_reset("iotlb exceeds size limit");
vtd_reset_iotlb_locked(s);
@@ -368,10 +435,11 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->gfn = gfn;
entry->domain_id = domain_id;
- entry->slpte = slpte;
+ entry->pte = pte;
entry->access_flags = access_flags;
- entry->mask = vtd_slpt_level_page_mask(level);
+ entry->mask = vtd_pt_level_page_mask(level);
entry->pasid = pasid;
+ entry->pgtt = pgtt;
key->gfn = gfn;
key->sid = source_id;
@@ -685,32 +753,32 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
}
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
+static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw)
{
- return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
+ return pte & VTD_PT_BASE_ADDR_MASK(aw);
}
/* Whether the pte indicates the address of the page frame */
-static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
+static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level)
{
- return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
+ return level == VTD_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK);
}
-/* Get the content of a spte located in @base_addr[@index] */
-static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
+/* Get the content of a pte located in @base_addr[@index] */
+static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index)
{
- uint64_t slpte;
+ uint64_t pte;
- assert(index < VTD_SL_PT_ENTRY_NR);
+ assert(index < VTD_PT_ENTRY_NR);
if (dma_memory_read(&address_space_memory,
- base_addr + index * sizeof(slpte),
- &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) {
- slpte = (uint64_t)-1;
- return slpte;
+ base_addr + index * sizeof(pte),
+ &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
+ pte = (uint64_t)-1;
+ return pte;
}
- slpte = le64_to_cpu(slpte);
- return slpte;
+ pte = le64_to_cpu(pte);
+ return pte;
}
/* Given an iova and the level of paging structure, return the offset
@@ -718,36 +786,39 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
*/
static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
{
- return (iova >> vtd_slpt_level_shift(level)) &
- ((1ULL << VTD_SL_LEVEL_BITS) - 1);
+ return (iova >> vtd_pt_level_shift(level)) &
+ ((1ULL << VTD_LEVEL_BITS) - 1);
}
/* Check Capability Register to see if the @level of page-table is supported */
-static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
+static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level)
{
return VTD_CAP_SAGAW_MASK & s->cap &
(1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
}
+static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level)
+{
+ return level == VTD_PML4_LEVEL;
+}
+
/* Return true if check passed, otherwise false */
-static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
{
switch (VTD_PE_GET_TYPE(pe)) {
case VTD_SM_PASID_ENTRY_FLT:
+ return !!(s->ecap & VTD_ECAP_FLTS);
case VTD_SM_PASID_ENTRY_SLT:
+ return !!(s->ecap & VTD_ECAP_SLTS);
case VTD_SM_PASID_ENTRY_NESTED:
- break;
+ /* Not support NESTED page table type yet */
+ return false;
case VTD_SM_PASID_ENTRY_PT:
- if (!x86_iommu->pt_supported) {
- return false;
- }
- break;
+ return !!(s->ecap & VTD_ECAP_PT);
default:
/* Unknown type */
return false;
}
- return true;
}
static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
@@ -771,7 +842,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
addr = pasid_dir_base + index * entry_size;
if (dma_memory_read(&address_space_memory, addr,
pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ACCESS_ERR;
}
pdire->val = le64_to_cpu(pdire->val);
@@ -789,28 +860,35 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
dma_addr_t addr,
VTDPASIDEntry *pe)
{
+ uint8_t pgtt;
uint32_t index;
dma_addr_t entry_size;
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
addr = addr + index * entry_size;
if (dma_memory_read(&address_space_memory, addr,
pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_TABLE_ACCESS_ERR;
}
for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
pe->val[i] = le64_to_cpu(pe->val[i]);
}
/* Do translation type check */
- if (!vtd_pe_type_check(x86_iommu, pe)) {
- return -VTD_FR_PASID_TABLE_INV;
+ if (!vtd_pe_type_check(s, pe)) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
+ }
+
+ pgtt = VTD_PE_GET_TYPE(pe);
+ if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
+ !vtd_is_sl_level_supported(s, VTD_PE_GET_SL_LEVEL(pe))) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
- if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
- return -VTD_FR_PASID_TABLE_INV;
+ if (pgtt == VTD_SM_PASID_ENTRY_FLT &&
+ !vtd_is_fl_level_supported(s, VTD_PE_GET_FL_LEVEL(pe))) {
+ return -VTD_FR_PASID_TABLE_ENTRY_INV;
}
return 0;
@@ -851,7 +929,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
}
if (!vtd_pdire_present(&pdire)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ENTRY_P;
}
ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
@@ -860,7 +938,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
}
if (!vtd_pe_present(pe)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_ENTRY_P;
}
return 0;
@@ -913,7 +991,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
}
if (!vtd_pdire_present(&pdire)) {
- return -VTD_FR_PASID_TABLE_INV;
+ return -VTD_FR_PASID_DIR_ENTRY_P;
}
/*
@@ -948,7 +1026,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return VTD_PE_GET_LEVEL(&pe);
+ if (s->flts) {
+ return VTD_PE_GET_FL_LEVEL(&pe);
+ } else {
+ return VTD_PE_GET_SL_LEVEL(&pe);
+ }
}
return vtd_ce_get_level(ce);
@@ -1016,9 +1098,9 @@ static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
}
/* Return true if IOVA passes range check, otherwise false. */
-static inline bool vtd_iova_range_check(IntelIOMMUState *s,
- uint64_t iova, VTDContextEntry *ce,
- uint8_t aw, uint32_t pasid)
+static inline bool vtd_iova_sl_range_check(IntelIOMMUState *s,
+ uint64_t iova, VTDContextEntry *ce,
+ uint8_t aw, uint32_t pasid)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
@@ -1035,7 +1117,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
if (s->root_scalable) {
vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
- return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ if (s->flts) {
+ return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+ } else {
+ return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+ }
}
return vtd_ce_get_slpt_base(ce);
@@ -1059,17 +1145,17 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
/*
* We should have caught a guest-mis-programmed level earlier,
- * via vtd_is_level_supported.
+ * via vtd_is_sl_level_supported.
*/
assert(level < VTD_SPTE_RSVD_LEN);
/*
- * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and
- * checked by vtd_is_last_slpte().
+ * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and
+ * checked by vtd_is_last_pte().
*/
assert(level);
- if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) &&
- (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) {
+ if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) &&
+ (slpte & VTD_PT_PAGE_SIZE_MASK)) {
/* large page */
rsvd_mask = vtd_spte_rsvd_large[level];
} else {
@@ -1093,9 +1179,8 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
- uint64_t xlat, size;
- if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) {
+ if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) {
error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
"pasid=0x%" PRIx32 ")", __func__, iova, pasid);
return -VTD_FR_ADDR_BEYOND_MGAW;
@@ -1106,7 +1191,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
while (true) {
offset = vtd_iova_level_offset(iova, level);
- slpte = vtd_get_slpte(addr, offset);
+ slpte = vtd_get_pte(addr, offset);
if (slpte == (uint64_t)-1) {
error_report_once("%s: detected read error on DMAR slpte "
@@ -1137,37 +1222,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
return -VTD_FR_PAGING_ENTRY_RSVD;
}
- if (vtd_is_last_slpte(slpte, level)) {
+ if (vtd_is_last_pte(slpte, level)) {
*slptep = slpte;
*slpte_level = level;
break;
}
- addr = vtd_get_slpte_addr(slpte, aw_bits);
+ addr = vtd_get_pte_addr(slpte, aw_bits);
level--;
}
- xlat = vtd_get_slpte_addr(*slptep, aw_bits);
- size = ~vtd_slpt_level_page_mask(level) + 1;
-
- /*
- * From VT-d spec 3.14: Untranslated requests and translation
- * requests that result in an address in the interrupt range will be
- * blocked with condition code LGN.4 or SGN.8.
- */
- if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
- xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
- return 0;
- } else {
- error_report_once("%s: xlat address is in interrupt range "
- "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
- "slpte=0x%" PRIx64 ", write=%d, "
- "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
- "pasid=0x%" PRIx32 ")",
- __func__, iova, level, slpte, is_write,
- xlat, size, pasid);
- return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
- -VTD_FR_INTERRUPT_ADDR;
- }
+ return 0;
}
typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private);
@@ -1298,14 +1362,14 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
trace_vtd_page_walk_level(addr, level, start, end);
- subpage_size = 1ULL << vtd_slpt_level_shift(level);
- subpage_mask = vtd_slpt_level_page_mask(level);
+ subpage_size = 1ULL << vtd_pt_level_shift(level);
+ subpage_mask = vtd_pt_level_page_mask(level);
while (iova < end) {
iova_next = (iova & subpage_mask) + subpage_size;
offset = vtd_iova_level_offset(iova, level);
- slpte = vtd_get_slpte(addr, offset);
+ slpte = vtd_get_pte(addr, offset);
if (slpte == (uint64_t)-1) {
trace_vtd_page_walk_skip_read(iova, iova_next);
@@ -1328,12 +1392,12 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
*/
entry_valid = read_cur | write_cur;
- if (!vtd_is_last_slpte(slpte, level) && entry_valid) {
+ if (!vtd_is_last_pte(slpte, level) && entry_valid) {
/*
* This is a valid PDE (or even bigger than PDE). We need
* to walk one further level.
*/
- ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
+ ret = vtd_page_walk_level(vtd_get_pte_addr(slpte, info->aw),
iova, MIN(iova_next, end), level - 1,
read_cur, write_cur, info);
} else {
@@ -1350,7 +1414,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
event.entry.addr_mask = ~subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
- event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
+ event.entry.translated_addr = vtd_get_pte_addr(slpte, info->aw);
event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP :
IOMMU_NOTIFIER_UNMAP;
ret = vtd_page_walk_one(&event, info);
@@ -1384,11 +1448,11 @@ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
uint32_t level = vtd_get_iova_level(s, ce, pasid);
- if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) {
+ if (!vtd_iova_sl_range_check(s, start, ce, info->aw, pasid)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) {
+ if (!vtd_iova_sl_range_check(s, end, ce, info->aw, pasid)) {
/* Fix end so that it reaches the maximum */
end = vtd_iova_limit(s, ce, info->aw, pasid);
}
@@ -1503,7 +1567,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
/* Check if the programming of context-entry is valid */
if (!s->root_scalable &&
- !vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
+ !vtd_is_sl_level_supported(s, vtd_ce_get_level(ce))) {
error_report_once("%s: invalid context entry: hi=%"PRIx64
", lo=%"PRIx64" (level %d not supported)",
__func__, ce->hi, ce->lo,
@@ -1664,8 +1728,6 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
static bool vtd_switch_address_space(VTDAddressSpace *as)
{
bool use_iommu, pt;
- /* Whether we need to take the BQL on our own */
- bool take_bql = !bql_locked();
assert(as);
@@ -1682,9 +1744,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
* from vtd_pt_enable_fast_path(). However the memory APIs need
* it. We'd better make sure we have had it already, or, take it.
*/
- if (take_bql) {
- bql_lock();
- }
+ BQL_LOCK_GUARD();
/* Turn off first then on the other */
if (use_iommu) {
@@ -1737,10 +1797,6 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
memory_region_set_enabled(&as->iommu_ir_fault, false);
}
- if (take_bql) {
- bql_unlock();
- }
-
return use_iommu;
}
@@ -1770,8 +1826,20 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_ROOT_ENTRY_RSVD] = false,
[VTD_FR_PAGING_ENTRY_RSVD] = true,
[VTD_FR_CONTEXT_ENTRY_TT] = true,
- [VTD_FR_PASID_TABLE_INV] = false,
+ [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
+ [VTD_FR_PASID_DIR_ENTRY_P] = true,
+ [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
+ [VTD_FR_PASID_ENTRY_P] = true,
+ [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
+ [VTD_FR_FS_PAGING_ENTRY_INV] = true,
+ [VTD_FR_FS_PAGING_ENTRY_P] = true,
+ [VTD_FR_FS_PAGING_ENTRY_RSVD] = true,
+ [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true,
+ [VTD_FR_FS_NON_CANONICAL] = true,
+ [VTD_FR_FS_PAGING_ENTRY_US] = true,
+ [VTD_FR_SM_WRITE] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
+ [VTD_FR_FS_BIT_UPDATE_FAILED] = true,
[VTD_FR_MAX] = false,
};
@@ -1789,29 +1857,32 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
}
-static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
- gpointer user_data)
+static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value,
+ gpointer user_data)
{
struct vtd_as_key *as_key = (struct vtd_as_key *)key;
- uint16_t target_sid = *(uint16_t *)user_data;
+ struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data;
uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
- return sid == target_sid;
+
+ return (as_key->pasid == target->pasid) && (sid == target->sid);
}
-static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
+ uint16_t sid,
+ uint32_t pasid)
{
- uint8_t bus_num = PCI_BUS_NUM(sid);
- VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
-
- if (vtd_as &&
- (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
- return vtd_as;
- }
+ struct vtd_as_raw_key key = {
+ .sid = sid,
+ .pasid = pasid
+ };
- vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid);
- s->vtd_as_cache[bus_num] = vtd_as;
+ return g_hash_table_find(s->vtd_address_spaces,
+ vtd_find_as_by_sid_and_pasid, &key);
+}
- return vtd_as;
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+ return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
}
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
@@ -1833,6 +1904,157 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+/*
+ * Rsvd field masks for fpte:
+ * vtd_fpte_rsvd 4k pages
+ * vtd_fpte_rsvd_large large pages
+ *
+ * We support only 4-level page tables.
+ */
+#define VTD_FPTE_RSVD_LEN 5
+static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN];
+static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN];
+
+static bool vtd_flpte_nonzero_rsvd(uint64_t flpte, uint32_t level)
+{
+ uint64_t rsvd_mask;
+
+ /*
+ * We should have caught a guest-mis-programmed level earlier,
+ * via vtd_is_fl_level_supported.
+ */
+ assert(level < VTD_FPTE_RSVD_LEN);
+ /*
+ * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and
+ * checked by vtd_is_last_pte().
+ */
+ assert(level);
+
+ if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) &&
+ (flpte & VTD_PT_PAGE_SIZE_MASK)) {
+ /* large page */
+ rsvd_mask = vtd_fpte_rsvd_large[level];
+ } else {
+ rsvd_mask = vtd_fpte_rsvd[level];
+ }
+
+ return flpte & rsvd_mask;
+}
+
+static inline bool vtd_flpte_present(uint64_t flpte)
+{
+ return !!(flpte & VTD_FL_P);
+}
+
+/* Return true if IOVA is canonical, otherwise false. */
+static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova,
+ VTDContextEntry *ce, uint32_t pasid)
+{
+ uint64_t iova_limit = vtd_iova_limit(s, ce, s->aw_bits, pasid);
+ uint64_t upper_bits_mask = ~(iova_limit - 1);
+ uint64_t upper_bits = iova & upper_bits_mask;
+ bool msb = ((iova & (iova_limit >> 1)) != 0);
+
+ if (msb) {
+ return upper_bits == upper_bits_mask;
+ } else {
+ return !upper_bits;
+ }
+}
+
+static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index,
+ uint64_t pte, uint64_t flag)
+{
+ if (pte & flag) {
+ return MEMTX_OK;
+ }
+ pte |= flag;
+ pte = cpu_to_le64(pte);
+ return dma_memory_write(&address_space_memory,
+ base_addr + index * sizeof(pte),
+ &pte, sizeof(pte),
+ MEMTXATTRS_UNSPECIFIED);
+}
+
+/*
+ * Given the @iova, get relevant @flptep. @flpte_level will be the last level
+ * of the translation, can be used for deciding the size of large page.
+ */
+static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint64_t iova, bool is_write,
+ uint64_t *flptep, uint32_t *flpte_level,
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
+{
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
+ uint32_t offset;
+ uint64_t flpte, flag_ad = VTD_FL_A;
+
+ if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
+ error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 ","
+ "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
+ return -VTD_FR_FS_NON_CANONICAL;
+ }
+
+ while (true) {
+ offset = vtd_iova_level_offset(iova, level);
+ flpte = vtd_get_pte(addr, offset);
+
+ if (flpte == (uint64_t)-1) {
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
+ /* Invalid programming of pasid-entry */
+ return -VTD_FR_PASID_ENTRY_FSPTPTR_INV;
+ } else {
+ return -VTD_FR_FS_PAGING_ENTRY_INV;
+ }
+ }
+
+ if (!vtd_flpte_present(flpte)) {
+ *reads = false;
+ *writes = false;
+ return -VTD_FR_FS_PAGING_ENTRY_P;
+ }
+
+ /* No emulated device supports supervisor privilege request yet */
+ if (!(flpte & VTD_FL_US)) {
+ *reads = false;
+ *writes = false;
+ return -VTD_FR_FS_PAGING_ENTRY_US;
+ }
+
+ *reads = true;
+ *writes = (*writes) && (flpte & VTD_FL_RW);
+ if (is_write && !(flpte & VTD_FL_RW)) {
+ return -VTD_FR_SM_WRITE;
+ }
+ if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+ error_report_once("%s: detected flpte reserved non-zero "
+ "iova=0x%" PRIx64 ", level=0x%" PRIx32
+ "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
+ __func__, iova, level, flpte, pasid);
+ return -VTD_FR_FS_PAGING_ENTRY_RSVD;
+ }
+
+ if (vtd_is_last_pte(flpte, level) && is_write) {
+ flag_ad |= VTD_FL_D;
+ }
+
+ if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) {
+ return -VTD_FR_FS_BIT_UPDATE_FAILED;
+ }
+
+ if (vtd_is_last_pte(flpte, level)) {
+ *flptep = flpte;
+ *flpte_level = level;
+ return 0;
+ }
+
+ addr = vtd_get_pte_addr(flpte, aw_bits);
+ level--;
+ }
+}
+
static void vtd_report_fault(IntelIOMMUState *s,
int err, bool is_fpd_set,
uint16_t source_id,
@@ -1869,16 +2091,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
VTDContextEntry ce;
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
- uint64_t slpte, page_mask;
+ uint64_t pte, page_mask;
uint32_t level, pasid = vtd_as->pasid;
uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
- uint8_t access_flags;
+ uint8_t access_flags, pgtt;
bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
+ uint64_t xlat, size;
/*
* We have standalone memory region for interrupt addresses, we
@@ -1890,13 +2113,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry = &vtd_as->context_cache_entry;
- /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */
+ /* Try to fetch pte from IOTLB, we don't need RID2PASID logic */
if (!rid2pasid) {
iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte,
iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
+ pte = iotlb_entry->pte;
access_flags = iotlb_entry->access_flags;
page_mask = iotlb_entry->mask;
goto out;
@@ -1968,35 +2191,65 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
return true;
}
- /* Try to fetch slpte form IOTLB for RID2PASID slow path */
+ /* Try to fetch pte from IOTLB for RID2PASID slow path */
if (rid2pasid) {
iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte,
iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
+ pte = iotlb_entry->pte;
access_flags = iotlb_entry->access_flags;
page_mask = iotlb_entry->mask;
goto out;
}
}
- ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
- &reads, &writes, s->aw_bits, pasid);
+ if (s->flts && s->root_scalable) {
+ ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ pgtt = VTD_SM_PASID_ENTRY_FLT;
+ } else {
+ ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level,
+ &reads, &writes, s->aw_bits, pasid);
+ pgtt = VTD_SM_PASID_ENTRY_SLT;
+ }
+ if (!ret_fr) {
+ xlat = vtd_get_pte_addr(pte, s->aw_bits);
+ size = ~vtd_pt_level_page_mask(level) + 1;
+
+ /*
+ * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation
+ * requests that result in an address in the interrupt range will be
+ * blocked with condition code LGN.4 or SGN.8.
+ */
+ if ((xlat <= VTD_INTERRUPT_ADDR_LAST &&
+ xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) {
+ error_report_once("%s: xlat address is in interrupt range "
+ "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
+ "pte=0x%" PRIx64 ", write=%d, "
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
+ "pasid=0x%" PRIx32 ")",
+ __func__, addr, level, pte, is_write,
+ xlat, size, pasid);
+ ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
+ -VTD_FR_INTERRUPT_ADDR;
+ }
+ }
+
if (ret_fr) {
vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
addr, is_write, pasid != PCI_NO_PASID, pasid);
goto error;
}
- page_mask = vtd_slpt_level_page_mask(level);
+ page_mask = vtd_pt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
- addr, slpte, access_flags, level, pasid);
+ addr, pte, access_flags, level, pasid, pgtt);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
- entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
+ entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
entry->perm = access_flags;
return true;
@@ -2190,8 +2443,13 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
}
}
+/*
+ * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID
+ * is passed as parameter. Piotlb invalidation supports pasid, pasid in its
+ * descriptor is passed which should not be PCI_NO_PASID.
+ */
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
- uint16_t domain_id, hwaddr addr,
+ uint16_t domain_id, hwaddr addr,
uint8_t am, uint32_t pasid)
{
VTDAddressSpace *vtd_as;
@@ -2200,19 +2458,37 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
- if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) {
- continue;
- }
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ uint32_t rid2pasid = PCI_NO_PASID;
+
+ if (s->root_scalable) {
+ rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+ }
+
+ /*
+ * In legacy mode, vtd_as->pasid == pasid is always true.
+ * In scalable mode, for vtd address space backing a PCI
+ * device without pasid, needs to compare pasid with
+ * rid2pasid of this device.
+ */
+ if (!(vtd_as->pasid == pasid ||
+ (vtd_as->pasid == PCI_NO_PASID && pasid == rid2pasid))) {
+ continue;
+ }
+
if (vtd_as_has_map_notifier(vtd_as)) {
/*
- * As long as we have MAP notifications registered in
- * any of our IOMMU notifiers, we need to sync the
- * shadow page table.
+ * When stage-1 translation is off, as long as we have MAP
+ * notifications registered in any of our IOMMU notifiers,
+ * we need to sync the shadow page table. Otherwise VFIO
+ * device attaches to nested page table instead of shadow
+ * page table, so no need to sync.
*/
- vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
+ if (!s->flts || !s->root_scalable) {
+ vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
+ }
} else {
/*
* For UNMAP-only notifiers, we don't need to walk the
@@ -2507,15 +2783,51 @@ static bool vtd_get_inv_desc(IntelIOMMUState *s,
return true;
}
+static bool vtd_inv_desc_reserved_check(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc,
+ uint64_t mask[4], bool dw,
+ const char *func_name,
+ const char *desc_type)
+{
+ if (s->iq_dw) {
+ if (inv_desc->val[0] & mask[0] || inv_desc->val[1] & mask[1] ||
+ inv_desc->val[2] & mask[2] || inv_desc->val[3] & mask[3]) {
+ error_report("%s: invalid %s desc val[3]: 0x%"PRIx64
+ " val[2]: 0x%"PRIx64" val[1]=0x%"PRIx64
+ " val[0]=0x%"PRIx64" (reserved nonzero)",
+ func_name, desc_type, inv_desc->val[3],
+ inv_desc->val[2], inv_desc->val[1],
+ inv_desc->val[0]);
+ return false;
+ }
+ } else {
+ if (dw) {
+ error_report("%s: 256-bit %s desc in 128-bit invalidation queue",
+ func_name, desc_type);
+ return false;
+ }
+
+ if (inv_desc->lo & mask[0] || inv_desc->hi & mask[1]) {
+ error_report("%s: invalid %s desc: hi=%"PRIx64", lo=%"PRIx64
+ " (reserved nonzero)", func_name, desc_type,
+ inv_desc->hi, inv_desc->lo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
{
- if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
- (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
- error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
- " (reserved nonzero)", __func__, inv_desc->hi,
- inv_desc->lo);
+ uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
+ __func__, "wait")) {
return false;
}
+
if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
/* Status Write */
uint32_t status_data = (uint32_t)(inv_desc->lo >>
@@ -2549,13 +2861,14 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
uint16_t sid, fmask;
+ uint64_t mask[4] = {VTD_INV_DESC_CC_RSVD, VTD_INV_DESC_ALL_ONE,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
- if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
- error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64
- " (reserved nonzero)", __func__, inv_desc->hi,
- inv_desc->lo);
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
+ __func__, "cc inv")) {
return false;
}
+
switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
case VTD_INV_DESC_CC_DOMAIN:
trace_vtd_inv_desc_cc_domain(
@@ -2585,12 +2898,11 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
uint16_t domain_id;
uint8_t am;
hwaddr addr;
+ uint64_t mask[4] = {VTD_INV_DESC_IOTLB_RSVD_LO, VTD_INV_DESC_IOTLB_RSVD_HI,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
- if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
- (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
- error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
- ", lo=0x%"PRIx64" (reserved bits unzero)",
- __func__, inv_desc->hi, inv_desc->lo);
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
+ __func__, "iotlb inv")) {
return false;
}
@@ -2628,9 +2940,117 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
return true;
}
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+ VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+
+ return ((entry->domain_id == info->domain_id) &&
+ (entry->pasid == info->pasid));
+}
+
+static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
+ uint16_t domain_id, uint32_t pasid)
+{
+ VTDIOTLBPageInvInfo info;
+ VTDAddressSpace *vtd_as;
+ VTDContextEntry ce;
+
+ info.domain_id = domain_id;
+ info.pasid = pasid;
+
+ vtd_iommu_lock(s);
+ g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
+ &info);
+ vtd_iommu_unlock(s);
+
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce) &&
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+
+ if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
+ vtd_as->pasid != pasid) {
+ continue;
+ }
+
+ if (!s->flts || !vtd_as_has_map_notifier(vtd_as)) {
+ vtd_address_space_sync(vtd_as);
+ }
+ }
+ }
+}
+
+static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
+ uint32_t pasid, hwaddr addr, uint8_t am)
+{
+ VTDIOTLBPageInvInfo info;
+
+ info.domain_id = domain_id;
+ info.pasid = pasid;
+ info.addr = addr;
+ info.mask = ~((1 << am) - 1);
+
+ vtd_iommu_lock(s);
+ g_hash_table_foreach_remove(s->iotlb,
+ vtd_hash_remove_by_page_piotlb, &info);
+ vtd_iommu_unlock(s);
+
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
+}
+
+static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ uint16_t domain_id;
+ uint32_t pasid;
+ hwaddr addr;
+ uint8_t am;
+ uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0,
+ VTD_INV_DESC_PIOTLB_RSVD_VAL1,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+ __func__, "piotlb inv")) {
+ return false;
+ }
+
+ domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
+ pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
+ switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) {
+ case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
+ vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
+ break;
+
+ case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
+ am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]);
+ addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]);
+ vtd_piotlb_page_invalidate(s, domain_id, pasid, addr, am);
+ break;
+
+ default:
+ error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64
+ ", lo=0x%"PRIx64" (type mismatch: 0x%llx)",
+ __func__, inv_desc->val[1], inv_desc->val[0],
+ inv_desc->val[0] & VTD_INV_DESC_IOTLB_G);
+ return false;
+ }
+ return true;
+}
+
static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
+ uint64_t mask[4] = {VTD_INV_DESC_IEC_RSVD, VTD_INV_DESC_ALL_ONE,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
+ __func__, "iec inv")) {
+ return false;
+ }
+
trace_vtd_inv_desc_iec(inv_desc->iec.granularity,
inv_desc->iec.index,
inv_desc->iec.index_mask);
@@ -2641,38 +3061,11 @@ static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
return true;
}
-static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
- VTDInvDesc *inv_desc)
+static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
+ bool size, hwaddr addr)
{
- VTDAddressSpace *vtd_dev_as;
- IOMMUTLBEvent event;
- hwaddr addr;
- uint64_t sz;
- uint16_t sid;
- bool size;
-
- addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
- sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
- size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
-
- if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
- (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
- error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64
- ", lo=%"PRIx64" (reserved nonzero)", __func__,
- inv_desc->hi, inv_desc->lo);
- return false;
- }
-
/*
- * Using sid is OK since the guest should have finished the
- * initialization of both the bus and device.
- */
- vtd_dev_as = vtd_get_as_by_sid(s, sid);
- if (!vtd_dev_as) {
- goto done;
- }
-
- /* According to ATS spec table 2.4:
+ * According to ATS spec table 2.4:
* S = 0, bits 15:12 = xxxx range size: 4K
* S = 1, bits 15:12 = xxx0 range size: 8K
* S = 1, bits 15:12 = xx01 range size: 16K
@@ -2680,6 +3073,10 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
* S = 1, bits 15:12 = 0111 range size: 64K
* ...
*/
+
+ IOMMUTLBEvent event;
+ uint64_t sz;
+
if (size) {
sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
addr &= ~(sz - 1);
@@ -2694,6 +3091,81 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
event.entry.perm = IOMMU_NONE;
event.entry.translated_addr = 0;
memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
+}
+
+static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ uint16_t sid;
+ VTDAddressSpace *vtd_dev_as;
+ bool size;
+ bool global;
+ hwaddr addr;
+ uint32_t pasid;
+ uint64_t mask[4] = {VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0,
+ VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+ __func__, "device piotlb inv")) {
+ return false;
+ }
+
+ global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi);
+ size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi);
+ addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi);
+ sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
+ if (global) {
+ QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
+ if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
+ (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
+ vtd_dev_as->devfn) == sid)) {
+ do_invalidate_device_tlb(vtd_dev_as, size, addr);
+ }
+ }
+ } else {
+ pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo);
+ vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid);
+ if (!vtd_dev_as) {
+ return true;
+ }
+
+ do_invalidate_device_tlb(vtd_dev_as, size, addr);
+ }
+
+ return true;
+}
+
+static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTDAddressSpace *vtd_dev_as;
+ hwaddr addr;
+ uint16_t sid;
+ bool size;
+ uint64_t mask[4] = {VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO,
+ VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI,
+ VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+ if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
+ __func__, "dev-iotlb inv")) {
+ return false;
+ }
+
+ addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
+ sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
+ size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
+
+ /*
+ * Using sid is OK since the guest should have finished the
+ * initialization of both the bus and device.
+ */
+ vtd_dev_as = vtd_get_as_by_sid(s, sid);
+ if (!vtd_dev_as) {
+ goto done;
+ }
+
+ do_invalidate_device_tlb(vtd_dev_as, size, addr);
done:
return true;
@@ -2710,7 +3182,7 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
return false;
}
- desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
+ desc_type = VTD_INV_DESC_TYPE(inv_desc.lo);
/* FIXME: should update at first or at last? */
s->iq_last_desc_type = desc_type;
@@ -2729,15 +3201,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
- /*
- * TODO: the entity of below two cases will be implemented in future series.
- * To make guest (which integrates scalable mode support patch set in
- * iommu driver) work, just return true is enough so far.
- */
- case VTD_INV_DESC_PC:
- break;
-
case VTD_INV_DESC_PIOTLB:
+ trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
+ if (!vtd_process_piotlb_desc(s, &inv_desc)) {
+ return false;
+ }
break;
case VTD_INV_DESC_WAIT:
@@ -2754,6 +3222,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_DEV_PIOTLB:
+ trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_device_piotlb_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
case VTD_INV_DESC_DEVICE:
trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
@@ -2761,6 +3236,16 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ /*
+ * TODO: the entity of below two cases will be implemented in future series.
+ * To make guest (which integrates scalable mode support patch set in
+ * iommu driver) work, just return true is enough so far.
+ */
+ case VTD_INV_DESC_PC:
+ if (s->scalable_mode) {
+ break;
+ }
+ /* fallthrough */
default:
error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
" (unknown type)", __func__, inv_desc.hi,
@@ -2813,6 +3298,7 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s)
if (s->iq_dw && (val & VTD_IQT_QT_256_RSV_BIT)) {
error_report_once("%s: RSV bit is set: val=0x%"PRIx64,
__func__, val);
+ vtd_handle_inv_queue_error(s);
return;
}
s->iq_tail = VTD_IQT_QT(s->iq_dw, val);
@@ -2913,7 +3399,9 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
/* Invalidation Queue Address Register, 64-bit */
case DMAR_IQA_REG:
- val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS);
+ val = s->iq |
+ (vtd_get_quad(s, DMAR_IQA_REG) &
+ (VTD_IQA_QS | VTD_IQA_DW_MASK));
if (size == 4) {
val = val & ((1ULL << 32) - 1);
}
@@ -3323,7 +3811,7 @@ static const MemoryRegionOps vtd_mem_ops = {
},
};
-static Property vtd_properties[] = {
+static const Property vtd_properties[] = {
DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
ON_OFF_AUTO_AUTO),
@@ -3332,11 +3820,13 @@ static Property vtd_properties[] = {
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
+ DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, flts, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
- DEFINE_PROP_END_OF_LIST(),
+ DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
+ DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
};
/* Read IRTE entry with specific index */
@@ -3715,9 +4205,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
VTDAddressSpace *vtd_dev_as;
char name[128];
+ vtd_iommu_lock(s);
vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
+ vtd_iommu_unlock(s);
+
if (!vtd_dev_as) {
- struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
+ struct vtd_as_key *new_key;
+ /* Slow path */
+
+ /*
+ * memory_region_add_subregion_overlap requires the bql,
+ * make sure we own it.
+ */
+ BQL_LOCK_GUARD();
+ vtd_iommu_lock(s);
+
+ /* Check again as we released the lock for a moment */
+ vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
+ if (vtd_dev_as) {
+ vtd_iommu_unlock(s);
+ return vtd_dev_as;
+ }
+
+ /* Still nothing, allocate a new address space */
+ new_key = g_malloc(sizeof(*new_key));
new_key->bus = bus;
new_key->devfn = devfn;
@@ -3808,10 +4319,99 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
vtd_switch_address_space(vtd_dev_as);
g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as);
+
+ vtd_iommu_unlock(s);
}
return vtd_dev_as;
}
+static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ int ret;
+
+ if (!hiodc->get_cap) {
+ error_setg(errp, ".get_cap() not implemented");
+ return false;
+ }
+
+ /* Common checks */
+ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
+ if (ret < 0) {
+ return false;
+ }
+ if (s->aw_bits > ret) {
+ error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
+ return false;
+ }
+
+ if (!s->flts) {
+ /* All checks requested by VTD stage-2 translation pass */
+ return true;
+ }
+
+ error_setg(errp, "host device is uncompatible with stage-1 translation");
+ return false;
+}
+
+static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+ struct vtd_as_key *new_key;
+
+ assert(hiod);
+
+ vtd_iommu_lock(s);
+
+ if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ error_setg(errp, "Host IOMMU device already exist");
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
+ if (!vtd_check_hiod(s, hiod, errp)) {
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod);
+
+ vtd_iommu_unlock(s);
+
+ return true;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ vtd_iommu_lock(s);
+
+ if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ vtd_iommu_unlock(s);
+ return;
+ }
+
+ g_hash_table_remove(s->vtd_host_iommu_dev, &key);
+
+ vtd_iommu_unlock(s);
+}
+
/* Unmap the whole range in the notifier's scope. */
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
{
@@ -3930,34 +4530,12 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
}
-
- return;
}
-/* Do the initialization. It will also be called when reset, so pay
- * attention when adding new initialization stuff.
- */
-static void vtd_init(IntelIOMMUState *s)
+static void vtd_cap_init(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- memset(s->csr, 0, DMAR_REG_SIZE);
- memset(s->wmask, 0, DMAR_REG_SIZE);
- memset(s->w1cmask, 0, DMAR_REG_SIZE);
- memset(s->womask, 0, DMAR_REG_SIZE);
-
- s->root = 0;
- s->root_scalable = false;
- s->dmar_enabled = false;
- s->intr_enabled = false;
- s->iq_head = 0;
- s->iq_tail = 0;
- s->iq = 0;
- s->iq_size = 0;
- s->qi_enabled = false;
- s->iq_last_desc_type = VTD_INV_DESC_NONE;
- s->iq_dw = false;
- s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_MGAW(s->aw_bits);
@@ -3974,27 +4552,6 @@ static void vtd_init(IntelIOMMUState *s)
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
- /*
- * Rsvd field masks for spte
- */
- vtd_spte_rsvd[0] = ~0ULL;
- vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
-
- vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
-
- if (s->scalable_mode || s->snoop_control) {
- vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
- }
-
if (x86_iommu_ir_supported(x86_iommu)) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -4016,7 +4573,12 @@ static void vtd_init(IntelIOMMUState *s)
}
/* TODO: read cap/ecap from host to decide which cap to be exposed. */
- if (s->scalable_mode) {
+ if (s->flts) {
+ s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS;
+ if (s->fs1gp) {
+ s->cap |= VTD_CAP_FS1GP;
+ }
+ } else if (s->scalable_mode) {
s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
}
@@ -4027,6 +4589,68 @@ static void vtd_init(IntelIOMMUState *s)
if (s->pasid) {
s->ecap |= VTD_ECAP_PASID;
}
+}
+
+/*
+ * Do the initialization. It will also be called when reset, so pay
+ * attention when adding new initialization stuff.
+ */
+static void vtd_init(IntelIOMMUState *s)
+{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ memset(s->csr, 0, DMAR_REG_SIZE);
+ memset(s->wmask, 0, DMAR_REG_SIZE);
+ memset(s->w1cmask, 0, DMAR_REG_SIZE);
+ memset(s->womask, 0, DMAR_REG_SIZE);
+
+ s->root = 0;
+ s->root_scalable = false;
+ s->dmar_enabled = false;
+ s->intr_enabled = false;
+ s->iq_head = 0;
+ s->iq_tail = 0;
+ s->iq = 0;
+ s->iq_size = 0;
+ s->qi_enabled = false;
+ s->iq_last_desc_type = VTD_INV_DESC_NONE;
+ s->iq_dw = false;
+ s->next_frcd_reg = 0;
+
+ vtd_cap_init(s);
+
+ /*
+ * Rsvd field masks for spte
+ */
+ vtd_spte_rsvd[0] = ~0ULL;
+ vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported && s->stale_tm);
+ vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+ vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported && s->stale_tm);
+ vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported && s->stale_tm);
+
+ /*
+ * Rsvd field masks for fpte
+ */
+ vtd_fpte_rsvd[0] = ~0ULL;
+ vtd_fpte_rsvd[1] = VTD_FPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[2] = VTD_FPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[3] = VTD_FPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd[4] = VTD_FPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+ vtd_fpte_rsvd_large[2] = VTD_FPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_fpte_rsvd_large[3] = VTD_FPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
+
+ if (s->scalable_mode || s->snoop_control) {
+ vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+ }
vtd_reset_caches(s);
@@ -4086,10 +4710,11 @@ static void vtd_init(IntelIOMMUState *s)
/* Should not reset address_spaces when reset because devices will still use
* the address space they got at first (won't ask the bus again).
*/
-static void vtd_reset(DeviceState *dev)
+static void vtd_reset_exit(Object *obj, ResetType type)
{
- IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+ IntelIOMMUState *s = INTEL_IOMMU_DEVICE(obj);
+ trace_vtd_reset_exit();
vtd_init(s);
vtd_address_space_refresh_all(s);
}
@@ -4107,6 +4732,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
+ .set_iommu_device = vtd_dev_set_iommu_device,
+ .unset_iommu_device = vtd_dev_unset_iommu_device,
};
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4131,14 +4758,26 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
}
}
- /* Currently only address widths supported are 39 and 48 bits */
- if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
- (s->aw_bits != VTD_HOST_AW_48BIT)) {
- error_setg(errp, "Supported values for aw-bits are: %d, %d",
+ if (!s->scalable_mode && s->flts) {
+ error_setg(errp, "x-flts is only available in scalable mode");
+ return false;
+ }
+
+ if (!s->flts && s->aw_bits != VTD_HOST_AW_39BIT &&
+ s->aw_bits != VTD_HOST_AW_48BIT) {
+ error_setg(errp, "%s: supported values for aw-bits are: %d, %d",
+ s->scalable_mode ? "Scalable mode(flts=off)" : "Legacy mode",
VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
return false;
}
+ if (s->flts && s->aw_bits != VTD_HOST_AW_48BIT) {
+ error_setg(errp,
+ "Scalable mode(flts=on): supported value for aw-bits is: %d",
+ VTD_HOST_AW_48BIT);
+ return false;
+ }
+
if (s->scalable_mode && !s->dma_drain) {
error_setg(errp, "Need to set dma_drain for scalable mode");
return false;
@@ -4226,6 +4865,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
g_free, g_free);
s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
g_free, g_free);
+ s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal,
+ g_free, vtd_hiod_destroy);
vtd_init(s);
pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
@@ -4233,19 +4874,22 @@ static void vtd_realize(DeviceState *dev, Error **errp)
qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
}
-static void vtd_class_init(ObjectClass *klass, void *data)
+static void vtd_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
X86IOMMUClass *x86_class = X86_IOMMU_DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
- dc->reset = vtd_reset;
+ /*
+ * Use 'exit' reset phase to make sure all DMA requests
+ * have been quiesced during 'enter' or 'hold' phase
+ */
+ rc->phases.exit = vtd_reset_exit;
dc->vmsd = &vtd_vmstate;
device_class_set_props(dc, vtd_properties);
dc->hotpluggable = false;
x86_class->realize = vtd_realize;
x86_class->int_remap = vtd_int_remap;
- /* Supported by the pc-q35-* machine types */
- dc->user_creatable = true;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->desc = "Intel IOMMU (VT-d) DMA Remapping device";
}
@@ -4258,7 +4902,7 @@ static const TypeInfo vtd_info = {
};
static void vtd_iommu_memory_region_class_init(ObjectClass *klass,
- void *data)
+ const void *data)
{
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99b..e8b211e 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -195,6 +195,7 @@
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
+#define VTD_ECAP_FLTS (1ULL << 47)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -211,6 +212,7 @@
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_FS1GP (1ULL << 56)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
@@ -264,10 +266,10 @@
#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32)
#define VTD_FRCD_SID_MASK 0xffffULL
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
+#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
+#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
-#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
-#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48)
/* DMA Remapping Fault Conditions */
@@ -311,10 +313,28 @@ typedef enum VTDFaultReason {
* request while disabled */
VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
- VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */
+ /* PASID directory entry access failure */
+ VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
+ /* The Present(P) field of pasid directory entry is 0 */
+ VTD_FR_PASID_DIR_ENTRY_P = 0x51,
+ VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
+ /* The Present(P) field of pasid table entry is 0 */
+ VTD_FR_PASID_ENTRY_P = 0x59,
+ VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */
+
+ /* Fail to access a first-level paging entry (not FS_PML4E) */
+ VTD_FR_FS_PAGING_ENTRY_INV = 0x70,
+ VTD_FR_FS_PAGING_ENTRY_P = 0x71,
+ /* Non-zero reserved field in present first-stage paging entry */
+ VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72,
+ VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */
+ VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/
+ VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */
+ VTD_FR_SM_WRITE = 0x85, /* No write permission */
/* Output address in the interrupt address range for scalable mode */
VTD_FR_SM_INTERRUPT_ADDR = 0x87,
+ VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */
VTD_FR_MAX, /* Guard */
} VTDFaultReason;
@@ -356,7 +376,9 @@ union VTDInvDesc {
typedef union VTDInvDesc VTDInvDesc;
/* Masks for struct VTDInvDesc */
-#define VTD_INV_DESC_TYPE 0xf
+#define VTD_INV_DESC_ALL_ONE -1ULL
+#define VTD_INV_DESC_TYPE(val) ((((val) >> 5) & 0x70ULL) | \
+ ((val) & 0xfULL))
#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
#define VTD_INV_DESC_IOTLB 0x2
#define VTD_INV_DESC_DEVICE 0x3
@@ -365,6 +387,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
#define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */
#define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */
+#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
/* Masks for Invalidation Wait Descriptor*/
@@ -372,7 +395,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_WAIT_IF (1ULL << 4)
#define VTD_INV_DESC_WAIT_FN (1ULL << 6)
#define VTD_INV_DESC_WAIT_DATA_SHIFT 32
-#define VTD_INV_DESC_WAIT_RSVD_LO 0Xffffff80ULL
+#define VTD_INV_DESC_WAIT_RSVD_LO 0Xfffff180ULL
#define VTD_INV_DESC_WAIT_RSVD_HI 3ULL
/* Masks for Context-cache Invalidation Descriptor */
@@ -383,7 +406,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_CC_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_CC_SID(val) (((val) >> 32) & 0xffffUL)
#define VTD_INV_DESC_CC_FM(val) (((val) >> 48) & 3UL)
-#define VTD_INV_DESC_CC_RSVD 0xfffc00000000ffc0ULL
+#define VTD_INV_DESC_CC_RSVD 0xfffc00000000f1c0ULL
/* Masks for IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_IOTLB_G (3ULL << 4)
@@ -393,26 +416,34 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL)
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
-#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
+#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
-#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK)
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
/* Mask for Device IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
#define VTD_INV_DESC_DEVICE_IOTLB_SIZE(val) ((val) & 0x1)
#define VTD_INV_DESC_DEVICE_IOTLB_SID(val) (((val) >> 32) & 0xFFFFULL)
#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
-#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
+#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0f1f0
+
+/* Masks for Interrupt Entry Invalidate Descriptor */
+#define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL
+
+/* Masks for PASID based Device IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \
+ 0xfffffffffffff000ULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xffffULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL
+#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL
/* Rsvd field masks for spte */
#define VTD_SPTE_SNP 0x800ULL
-#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, dt_supported) \
- dt_supported ? \
+#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, stale_tm) \
+ stale_tm ? \
(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \
@@ -422,21 +453,49 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \
(0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
-#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw, dt_supported) \
- dt_supported ? \
+#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw, stale_tm) \
+ stale_tm ? \
(0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
(0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
-#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw, dt_supported) \
- dt_supported ? \
+#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw, stale_tm) \
+ stale_tm ? \
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+/* Rsvd field masks for fpte */
+#define VTD_FS_UPPER_IGNORED 0xfff0000000000000ULL
+#define VTD_FPTE_PAGE_L1_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L2_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L3_RSVD_MASK(aw) \
+ (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_PAGE_L4_RSVD_MASK(aw) \
+ (0x80ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+
+#define VTD_FPTE_LPAGE_L2_RSVD_MASK(aw) \
+ (0x1fe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+#define VTD_FPTE_LPAGE_L3_RSVD_MASK(aw) \
+ (0x3fffe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED))
+
+/* Masks for PIOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4)
+#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PIOTLB_AM(val) ((val) & 0x3fULL)
+#define VTD_INV_DESC_PIOTLB_IH(val) (((val) >> 6) & 0x1)
+#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL)
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
uint32_t pasid;
uint64_t addr;
- uint8_t mask;
+ uint64_t mask;
};
typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
@@ -514,27 +573,38 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */
#define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_FLPM 3ULL
+#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL)
+
+/* First Level Paging Structure */
+/* Masks for First Level Paging Entry */
+#define VTD_FL_P 1ULL
+#define VTD_FL_RW (1ULL << 1)
+#define VTD_FL_US (1ULL << 2)
+#define VTD_FL_A (1ULL << 5)
+#define VTD_FL_D (1ULL << 6)
+
/* Second Level Page Translation Pointer*/
#define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL)
-/* Paging Structure common */
-#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7)
-/* Bits to decide the offset for each level */
-#define VTD_SL_LEVEL_BITS 9
-
/* Second Level Paging Structure */
-#define VTD_SL_PML4_LEVEL 4
-#define VTD_SL_PDP_LEVEL 3
-#define VTD_SL_PD_LEVEL 2
-#define VTD_SL_PT_LEVEL 1
-#define VTD_SL_PT_ENTRY_NR 512
-
/* Masks for Second Level Paging Entry */
#define VTD_SL_RW_MASK 3ULL
#define VTD_SL_R 1ULL
#define VTD_SL_W (1ULL << 1)
-#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
#define VTD_SL_IGN_COM 0xbff0000000000000ULL
#define VTD_SL_TM (1ULL << 62)
+/* Common for both First Level and Second Level */
+#define VTD_PML4_LEVEL 4
+#define VTD_PDP_LEVEL 3
+#define VTD_PD_LEVEL 2
+#define VTD_PT_LEVEL 1
+#define VTD_PT_ENTRY_NR 512
+#define VTD_PT_PAGE_SIZE_MASK (1ULL << 7)
+#define VTD_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
+
+/* Bits to decide the offset for each level */
+#define VTD_LEVEL_BITS 9
+
#endif
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index a72c28e..1be9bfe 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -14,9 +14,10 @@
#include "qemu/module.h"
#include "hw/i386/apic_internal.h"
#include "hw/pci/msi.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/kvm.h"
+#include "system/hw_accel.h"
+#include "system/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
@@ -214,7 +219,7 @@ static void kvm_apic_mem_write(void *opaque, hwaddr addr,
static const MemoryRegionOps kvm_apic_io_ops = {
.read = kvm_apic_mem_read,
.write = kvm_apic_mem_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
static void kvm_apic_reset(APICCommonState *s)
@@ -240,7 +245,7 @@ static void kvm_apic_unrealize(DeviceState *dev)
{
}
-static void kvm_apic_class_init(ObjectClass *klass, void *data)
+static void kvm_apic_class_init(ObjectClass *klass, const void *data)
{
APICCommonClass *k = APIC_COMMON_CLASS(klass);
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 40aa9a3..f563827 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -16,9 +16,9 @@
#include "qemu/osdep.h"
#include "qemu/host-utils.h"
#include "qemu/module.h"
-#include "sysemu/kvm.h"
-#include "sysemu/runstate.h"
-#include "sysemu/hw_accel.h"
+#include "system/kvm.h"
+#include "system/runstate.h"
+#include "system/hw_accel.h"
#include "kvm/kvm_i386.h"
#include "migration/vmstate.h"
#include "hw/sysbus.h"
@@ -27,7 +27,6 @@
#include "qapi/error.h"
#include <linux/kvm.h>
-#include "standard-headers/asm-x86/kvm_para.h"
#include "qom/object.h"
#define TYPE_KVM_CLOCK "kvmclock"
@@ -305,13 +304,12 @@ static const VMStateDescription kvmclock_vmsd = {
}
};
-static Property kvmclock_properties[] = {
+static const Property kvmclock_properties[] = {
DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
mach_use_reliable_get_clock, true),
- DEFINE_PROP_END_OF_LIST(),
};
-static void kvmclock_class_init(ObjectClass *klass, void *data)
+static void kvmclock_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -334,8 +332,8 @@ void kvmclock_create(bool create_always)
assert(kvm_enabled());
if (create_always ||
- cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
- (1ULL << KVM_FEATURE_CLOCKSOURCE2))) {
+ cpu->env.features[FEAT_KVM] & (CPUID_KVM_CLOCK |
+ CPUID_KVM_CLOCK2)) {
sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL);
}
}
diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c
index e49b9c4..14b78f3 100644
--- a/hw/i386/kvm/i8254.c
+++ b/hw/i386/kvm/i8254.c
@@ -29,11 +29,11 @@
#include "qapi/error.h"
#include "qemu/module.h"
#include "qemu/timer.h"
-#include "sysemu/runstate.h"
+#include "system/runstate.h"
#include "hw/timer/i8254.h"
#include "hw/timer/i8254_internal.h"
#include "hw/qdev-properties-system.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "target/i386/kvm/kvm_i386.h"
#include "qom/object.h"
@@ -287,13 +287,12 @@ static void kvm_pit_realizefn(DeviceState *dev, Error **errp)
kpc->parent_realize(dev, errp);
}
-static Property kvm_pit_properties[] = {
+static const Property kvm_pit_properties[] = {
DEFINE_PROP_LOSTTICKPOLICY("lost_tick_policy", KVMPITState,
lost_tick_policy, LOST_TICK_POLICY_DELAY),
- DEFINE_PROP_END_OF_LIST(),
};
-static void kvm_pit_class_init(ObjectClass *klass, void *data)
+static void kvm_pit_class_init(ObjectClass *klass, const void *data)
{
KVMPITClass *kpc = KVM_PIT_CLASS(klass);
PITCommonClass *k = PIT_COMMON_CLASS(klass);
@@ -303,7 +302,7 @@ static void kvm_pit_class_init(ObjectClass *klass, void *data)
&kpc->parent_realize);
k->set_channel_gate = kvm_pit_set_gate;
k->get_channel_info = kvm_pit_get_channel_info;
- dc->reset = kvm_pit_reset;
+ device_class_set_legacy_reset(dc, kvm_pit_reset);
device_class_set_props(dc, kvm_pit_properties);
}
diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c
index 3ca0e1f..8a72d6e 100644
--- a/hw/i386/kvm/i8259.c
+++ b/hw/i386/kvm/i8259.c
@@ -16,7 +16,7 @@
#include "qemu/module.h"
#include "hw/intc/kvm_irqcount.h"
#include "hw/irq.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "qom/object.h"
#define TYPE_KVM_I8259 "kvm-i8259"
@@ -139,13 +139,13 @@ qemu_irq *kvm_i8259_init(ISABus *bus)
return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS);
}
-static void kvm_i8259_class_init(ObjectClass *klass, void *data)
+static void kvm_i8259_class_init(ObjectClass *klass, const void *data)
{
KVMPICClass *kpc = KVM_PIC_CLASS(klass);
PICCommonClass *k = PIC_COMMON_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->reset = kvm_pic_reset;
+ device_class_set_legacy_reset(dc, kvm_pic_reset);
device_class_set_parent_realize(dc, kvm_pic_realize, &kpc->parent_realize);
k->pre_save = kvm_pic_get;
k->post_load = kvm_pic_put;
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index b96fe84..693ee97 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -15,7 +15,7 @@
#include "hw/qdev-properties.h"
#include "hw/intc/ioapic_internal.h"
#include "hw/intc/kvm_irqcount.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "kvm/kvm_i386.h"
/* PC Utility function */
@@ -133,12 +133,11 @@ static void kvm_ioapic_realize(DeviceState *dev, Error **errp)
qdev_init_gpio_in(dev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS);
}
-static Property kvm_ioapic_properties[] = {
+static const Property kvm_ioapic_properties[] = {
DEFINE_PROP_UINT32("gsi_base", KVMIOAPICState, kvm_gsi_base, 0),
- DEFINE_PROP_END_OF_LIST()
};
-static void kvm_ioapic_class_init(ObjectClass *klass, void *data)
+static void kvm_ioapic_class_init(ObjectClass *klass, const void *data)
{
IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -146,7 +145,7 @@ static void kvm_ioapic_class_init(ObjectClass *klass, void *data)
k->realize = kvm_ioapic_realize;
k->pre_save = kvm_ioapic_get;
k->post_load = kvm_ioapic_put;
- dc->reset = kvm_ioapic_reset;
+ device_class_set_legacy_reset(dc, kvm_ioapic_reset);
device_class_set_props(dc, kvm_ioapic_properties);
}
diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c
index d03131e..ce73119 100644
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -12,7 +12,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "xen_evtchn.h"
#include "xen_primary_console.h"
@@ -38,15 +37,3 @@ void xen_primary_console_create(void)
void xen_primary_console_set_be_port(uint16_t port)
{
}
-#ifdef TARGET_I386
-EvtchnInfoList *qmp_xen_event_list(Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
- return NULL;
-}
-
-void qmp_xen_event_inject(uint32_t port, Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
-}
-#endif
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 07bd0c9..dd566c4 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -19,11 +19,11 @@
#include "monitor/monitor.h"
#include "monitor/hmp.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-misc-i386.h"
+#include "qobject/qdict.h"
#include "qom/object.h"
#include "exec/target_page.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "migration/vmstate.h"
#include "trace.h"
@@ -41,8 +41,8 @@
#include "xen_overlay.h"
#include "xen_xenstore.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_xen.h"
+#include "system/kvm.h"
+#include "system/kvm_xen.h"
#include <linux/kvm.h>
#include <sys/eventfd.h>
@@ -140,6 +140,8 @@ struct XenEvtchnState {
uint64_t callback_param;
bool evtchn_in_kernel;
+ bool setting_callback_gsi;
+ int extern_gsi_level;
uint32_t callback_gsi;
QEMUBH *gsi_bh;
@@ -269,7 +271,7 @@ static const VMStateDescription xen_evtchn_vmstate = {
}
};
-static void xen_evtchn_class_init(ObjectClass *klass, void *data)
+static void xen_evtchn_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -431,9 +433,22 @@ void xen_evtchn_set_callback_level(int level)
}
if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
- qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
- if (level) {
- /* Ensure the vCPU polls for deassertion */
+ /*
+ * Ugly, but since we hold the BQL we can set this flag so that
+ * xen_evtchn_set_gsi() can tell the difference between this code
+ * setting the GSI, and an external device (PCI INTx) doing so.
+ */
+ s->setting_callback_gsi = true;
+ /* Do not deassert the line if an external device is asserting it. */
+ qemu_set_irq(s->callback_gsis[s->callback_gsi],
+ level || s->extern_gsi_level);
+ s->setting_callback_gsi = false;
+
+ /*
+ * If the callback GSI is the only one asserted, ensure the status
+ * is polled for deassertion in kvm_arch_post_run().
+ */
+ if (level && !s->extern_gsi_level) {
kvm_xen_set_callback_asserted();
}
}
@@ -1596,7 +1611,7 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
return pirq;
}
-bool xen_evtchn_set_gsi(int gsi, int level)
+bool xen_evtchn_set_gsi(int gsi, int *level)
{
XenEvtchnState *s = xen_evtchn_singleton;
int pirq;
@@ -1608,16 +1623,35 @@ bool xen_evtchn_set_gsi(int gsi, int level)
}
/*
- * Check that that it *isn't* the event channel GSI, and thus
- * that we are not recursing and it's safe to take s->port_lock.
- *
- * Locking aside, it's perfectly sane to bail out early for that
- * special case, as it would make no sense for the event channel
- * GSI to be routed back to event channels, when the delivery
- * method is to raise the GSI... that recursion wouldn't *just*
- * be a locking issue.
+ * For the callback_gsi we need to implement a logical OR of the event
+ * channel GSI and the external input (e.g. from PCI INTx), because
+ * QEMU itself doesn't support shared level interrupts via demux or
+ * resamplers.
*/
if (gsi && gsi == s->callback_gsi) {
+ /* Remember the external state of the GSI pin (e.g. from PCI INTx) */
+ if (!s->setting_callback_gsi) {
+ s->extern_gsi_level = *level;
+
+ /*
+ * Don't allow the external device to deassert the line if the
+ * eveht channel GSI should still be asserted.
+ */
+ if (!s->extern_gsi_level) {
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+ if (vi && vi->evtchn_upcall_pending) {
+ /* Need to poll for deassertion */
+ kvm_xen_set_callback_asserted();
+ *level = 1;
+ }
+ }
+ }
+
+ /*
+ * The event channel GSI cannot be routed to PIRQ, as that would make
+ * no sense. It could also deadlock on s->port_lock, if we proceed.
+ * So bail out now.
+ */
return false;
}
@@ -1628,7 +1662,7 @@ bool xen_evtchn_set_gsi(int gsi, int level)
return false;
}
- if (level) {
+ if (*level) {
int port = s->pirq[pirq].port;
s->pirq_gsi_set |= (1U << gsi);
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index b740acf..0521ebc 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -23,7 +23,7 @@ void xen_evtchn_set_callback_level(int level);
int xen_evtchn_set_port(uint16_t port);
-bool xen_evtchn_set_gsi(int gsi, int level);
+bool xen_evtchn_set_gsi(int gsi, int *level);
void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
uint64_t addr, uint32_t data, bool is_masked);
void xen_evtchn_remove_pci_device(PCIDevice *dev);
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 245e4b1..4b9e272 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -17,7 +17,7 @@
#include "qapi/error.h"
#include "qom/object.h"
#include "exec/target_page.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "migration/vmstate.h"
#include "hw/sysbus.h"
@@ -27,8 +27,8 @@
#include "xen_gnttab.h"
#include "xen_primary_console.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_xen.h"
+#include "system/kvm.h"
+#include "system/kvm_xen.h"
#include "hw/xen/interface/memory.h"
#include "hw/xen/interface/grant_table.h"
@@ -135,7 +135,7 @@ static const VMStateDescription xen_gnttab_vmstate = {
}
};
-static void xen_gnttab_class_init(ObjectClass *klass, void *data)
+static void xen_gnttab_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c
index c68e78a..3cb7361 100644
--- a/hw/i386/kvm/xen_overlay.c
+++ b/hw/i386/kvm/xen_overlay.c
@@ -16,15 +16,15 @@
#include "qapi/error.h"
#include "qom/object.h"
#include "exec/target_page.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "migration/vmstate.h"
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
#include "xen_overlay.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_xen.h"
+#include "system/kvm.h"
+#include "system/kvm_xen.h"
#include <linux/kvm.h>
#include "hw/xen/interface/memory.h"
@@ -151,11 +151,11 @@ static void xen_overlay_reset(DeviceState *dev)
kvm_xen_soft_reset();
}
-static void xen_overlay_class_init(ObjectClass *klass, void *data)
+static void xen_overlay_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->reset = xen_overlay_reset;
+ device_class_set_legacy_reset(dc, xen_overlay_reset);
dc->realize = xen_overlay_realize;
dc->vmsd = &xen_overlay_vmstate;
}
diff --git a/hw/i386/kvm/xen_primary_console.c b/hw/i386/kvm/xen_primary_console.c
index abe79f5..6e9d641 100644
--- a/hw/i386/kvm/xen_primary_console.c
+++ b/hw/i386/kvm/xen_primary_console.c
@@ -20,8 +20,8 @@
#include "xen_overlay.h"
#include "xen_primary_console.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_xen.h"
+#include "system/kvm.h"
+#include "system/kvm_xen.h"
#include "trace.h"
@@ -67,7 +67,7 @@ static void xen_primary_console_realize(DeviceState *dev, Error **errp)
xen_primary_console_singleton = s;
}
-static void xen_primary_console_class_init(ObjectClass *klass, void *data)
+static void xen_primary_console_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 1a9bc34..42955cc 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -28,8 +28,8 @@
#include "xen_primary_console.h"
#include "xen_xenstore.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_xen.h"
+#include "system/kvm.h"
+#include "system/kvm_xen.h"
#include "trace.h"
@@ -209,7 +209,6 @@ static int xen_xenstore_post_load(void *opaque, int ver)
{
XenXenstoreState *s = opaque;
GByteArray *save;
- int ret;
/*
* As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -231,8 +230,7 @@ static int xen_xenstore_post_load(void *opaque, int ver)
s->impl_state = NULL;
s->impl_state_size = 0;
- ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
- return ret;
+ return xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
}
static const VMStateDescription xen_xenstore_vmstate = {
@@ -261,7 +259,7 @@ static const VMStateDescription xen_xenstore_vmstate = {
}
};
-static void xen_xenstore_class_init(ObjectClass *klass, void *data)
+static void xen_xenstore_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -532,6 +530,10 @@ static void xs_read(XenXenstoreState *s, unsigned int req_id,
return;
}
+ if (!len) {
+ return;
+ }
+
memcpy(&rsp_data[rsp->len], data->data, len);
rsp->len += len;
}
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 03aad10..7896f34 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -15,6 +15,7 @@ i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
if_false: files('amd_iommu-stub.c'))
i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c'))
i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c'))
+i386_ss.add(when: 'CONFIG_NITRO_ENCLAVE', if_true: files('nitro_enclave.c'))
i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
@@ -31,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/microvm-dt.c b/hw/i386/microvm-dt.c
index b3049e4..cb27dfd 100644
--- a/hw/i386/microvm-dt.c
+++ b/hw/i386/microvm-dt.c
@@ -33,8 +33,8 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
-#include "sysemu/device_tree.h"
-#include "hw/char/serial.h"
+#include "system/device_tree.h"
+#include "hw/char/serial-isa.h"
#include "hw/i386/fw_cfg.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/sysbus.h"
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index fec63ca..e0daf0d 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -22,11 +22,11 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "qapi/qapi-visit-common.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/cpus.h"
-#include "sysemu/numa.h"
-#include "sysemu/reset.h"
-#include "sysemu/runstate.h"
+#include "system/system.h"
+#include "system/cpus.h"
+#include "system/numa.h"
+#include "system/reset.h"
+#include "system/runstate.h"
#include "acpi-microvm.h"
#include "microvm-dt.h"
@@ -39,7 +39,7 @@
#include "hw/intc/i8259.h"
#include "hw/timer/i8254.h"
#include "hw/rtc/mc146818rtc.h"
-#include "hw/char/serial.h"
+#include "hw/char/serial-isa.h"
#include "hw/display/ramfb.h"
#include "hw/i386/topology.h"
#include "hw/i386/e820_memory_layout.h"
@@ -139,7 +139,7 @@ static void create_gpex(MicrovmMachineState *mms)
mms->gpex.mmio64.base, mmio64_alias);
}
- for (i = 0; i < GPEX_NUM_IRQS; i++) {
+ for (i = 0; i < PCI_NUM_PINS; i++) {
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
x86ms->gsi[mms->gpex.irq + i]);
}
@@ -283,6 +283,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
static void microvm_memory_init(MicrovmMachineState *mms)
{
+ MicrovmMachineClass *mmc = MICROVM_MACHINE_GET_CLASS(mms);
MachineState *machine = MACHINE(mms);
X86MachineState *x86ms = X86_MACHINE(mms);
MemoryRegion *ram_below_4g, *ram_above_4g;
@@ -324,13 +325,11 @@ static void microvm_memory_init(MicrovmMachineState *mms)
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
- sizeof(struct e820_entry) * e820_get_num_entries());
rom_set_fw(fw_cfg);
if (machine->kernel_filename != NULL) {
- x86_load_linux(x86ms, fw_cfg, 0, true);
+ mmc->x86_load_linux(x86ms, fw_cfg, 0, true);
}
if (mms->option_roms) {
@@ -452,11 +451,44 @@ static HotplugHandler *microvm_get_hotplug_handler(MachineState *machine,
return NULL;
}
+static void microvm_machine_done(Notifier *notifier, void *data)
+{
+ MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+ machine_done);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+
+ acpi_setup_microvm(mms);
+ dt_setup_microvm(mms);
+ fw_cfg_add_e820(x86ms->fw_cfg);
+}
+
+static void microvm_powerdown_req(Notifier *notifier, void *data)
+{
+ MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+ powerdown_req);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+
+ if (x86ms->acpi_dev) {
+ Object *obj = OBJECT(x86ms->acpi_dev);
+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+ adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
+ ACPI_POWER_DOWN_STATUS);
+ }
+}
+
static void microvm_machine_state_init(MachineState *machine)
{
MicrovmMachineState *mms = MICROVM_MACHINE(machine);
X86MachineState *x86ms = X86_MACHINE(machine);
+ /* State */
+ mms->kernel_cmdline_fixed = false;
+
+ mms->machine_done.notify = microvm_machine_done;
+ qemu_add_machine_init_done_notifier(&mms->machine_done);
+ mms->powerdown_req.notify = microvm_powerdown_req;
+ qemu_register_powerdown_notifier(&mms->powerdown_req);
+
microvm_memory_init(mms);
x86_cpus_init(x86ms, CPU_VERSION_LATEST);
@@ -464,7 +496,7 @@ static void microvm_machine_state_init(MachineState *machine)
microvm_devices_init(mms);
}
-static void microvm_machine_reset(MachineState *machine, ShutdownCause reason)
+static void microvm_machine_reset(MachineState *machine, ResetType type)
{
MicrovmMachineState *mms = MICROVM_MACHINE(machine);
CPUState *cs;
@@ -477,7 +509,7 @@ static void microvm_machine_reset(MachineState *machine, ShutdownCause reason)
mms->kernel_cmdline_fixed = true;
}
- qemu_devices_reset(reason);
+ qemu_devices_reset(type);
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
@@ -582,29 +614,6 @@ static void microvm_machine_set_auto_kernel_cmdline(Object *obj, bool value,
mms->auto_kernel_cmdline = value;
}
-static void microvm_machine_done(Notifier *notifier, void *data)
-{
- MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
- machine_done);
-
- acpi_setup_microvm(mms);
- dt_setup_microvm(mms);
-}
-
-static void microvm_powerdown_req(Notifier *notifier, void *data)
-{
- MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
- powerdown_req);
- X86MachineState *x86ms = X86_MACHINE(mms);
-
- if (x86ms->acpi_dev) {
- Object *obj = OBJECT(x86ms->acpi_dev);
- AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
- adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
- ACPI_POWER_DOWN_STATUS);
- }
-}
-
static void microvm_machine_initfn(Object *obj)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
@@ -616,14 +625,6 @@ static void microvm_machine_initfn(Object *obj)
mms->isa_serial = true;
mms->option_roms = true;
mms->auto_kernel_cmdline = true;
-
- /* State */
- mms->kernel_cmdline_fixed = false;
-
- mms->machine_done.notify = microvm_machine_done;
- qemu_add_machine_init_done_notifier(&mms->machine_done);
- mms->powerdown_req.notify = microvm_powerdown_req;
- qemu_register_powerdown_notifier(&mms->powerdown_req);
}
GlobalProperty microvm_properties[] = {
@@ -634,12 +635,15 @@ GlobalProperty microvm_properties[] = {
{ "pcie-root-port", "io-reserve", "0" },
};
-static void microvm_class_init(ObjectClass *oc, void *data)
+static void microvm_class_init(ObjectClass *oc, const void *data)
{
X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
+ MicrovmMachineClass *mmc = MICROVM_MACHINE_CLASS(oc);
MachineClass *mc = MACHINE_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+ mmc->x86_load_linux = x86_load_linux;
+
mc->init = microvm_machine_state_init;
mc->family = "microvm_i386";
@@ -722,7 +726,7 @@ static const TypeInfo microvm_machine_info = {
.instance_init = microvm_machine_initfn,
.class_size = sizeof(MicrovmMachineClass),
.class_init = microvm_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
},
diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c
index 1ebd356..79df965 100644
--- a/hw/i386/monitor.c
+++ b/hw/i386/monitor.c
@@ -24,9 +24,9 @@
#include "qemu/osdep.h"
#include "monitor/monitor.h"
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "hw/i386/x86.h"
#include "hw/rtc/mc146818rtc.h"
diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c
index 3332712..6e6b96b 100644
--- a/hw/i386/multiboot.c
+++ b/hw/i386/multiboot.c
@@ -29,7 +29,8 @@
#include "multiboot.h"
#include "hw/loader.h"
#include "elf.h"
-#include "sysemu/sysemu.h"
+#include "exec/target_page.h"
+#include "system/system.h"
#include "qemu/error-report.h"
/* Show multiboot debug output */
@@ -133,9 +134,9 @@ static void mb_add_mod(MultibootState *s,
p = (char *)s->mb_buf + s->offset_mbinfo + MB_MOD_SIZE * s->mb_mods_count;
- stl_p(p + MB_MOD_START, start);
- stl_p(p + MB_MOD_END, end);
- stl_p(p + MB_MOD_CMDLINE, cmdline_phys);
+ stl_le_p(p + MB_MOD_START, start);
+ stl_le_p(p + MB_MOD_END, end);
+ stl_le_p(p + MB_MOD_CMDLINE, cmdline_phys);
mb_debug("mod%02d: "HWADDR_FMT_plx" - "HWADDR_FMT_plx,
s->mb_mods_count, start, end);
@@ -168,9 +169,9 @@ int load_multiboot(X86MachineState *x86ms,
/* Ok, let's see if it is a multiboot image.
The header is 12x32bit long, so the latest entry may be 8192 - 48. */
for (i = 0; i < (8192 - 48); i += 4) {
- if (ldl_p(header+i) == 0x1BADB002) {
- uint32_t checksum = ldl_p(header+i+8);
- flags = ldl_p(header+i+4);
+ if (ldl_le_p(header + i) == 0x1BADB002) {
+ uint32_t checksum = ldl_le_p(header + i + 8);
+ flags = ldl_le_p(header + i + 4);
checksum += flags;
checksum += (uint32_t)0x1BADB002;
if (!checksum) {
@@ -202,8 +203,8 @@ int load_multiboot(X86MachineState *x86ms,
}
kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry,
- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE,
- 0, 0);
+ &elf_low, &elf_high, NULL,
+ ELFDATA2LSB, I386_ELF_MACHINE, 0, 0);
if (kernel_size < 0) {
error_report("Error while loading elf kernel");
exit(1);
@@ -223,11 +224,11 @@ int load_multiboot(X86MachineState *x86ms,
mb_kernel_size, (size_t)mh_entry_addr);
} else {
/* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */
- uint32_t mh_header_addr = ldl_p(header+i+12);
- uint32_t mh_load_end_addr = ldl_p(header+i+20);
- uint32_t mh_bss_end_addr = ldl_p(header+i+24);
+ uint32_t mh_header_addr = ldl_le_p(header + i + 12);
+ uint32_t mh_load_end_addr = ldl_le_p(header + i + 20);
+ uint32_t mh_bss_end_addr = ldl_le_p(header + i + 24);
- mh_load_addr = ldl_p(header+i+16);
+ mh_load_addr = ldl_le_p(header + i + 16);
if (mh_header_addr < mh_load_addr) {
error_report("invalid load_addr address");
exit(1);
@@ -239,7 +240,7 @@ int load_multiboot(X86MachineState *x86ms,
uint32_t mb_kernel_text_offset = i - (mh_header_addr - mh_load_addr);
uint32_t mb_load_size = 0;
- mh_entry_addr = ldl_p(header+i+28);
+ mh_entry_addr = ldl_le_p(header + i + 28);
if (mh_load_end_addr) {
if (mh_load_end_addr < mh_load_addr) {
@@ -364,22 +365,21 @@ int load_multiboot(X86MachineState *x86ms,
/* Commandline support */
kcmdline = g_strdup_printf("%s %s", kernel_filename, kernel_cmdline);
- stl_p(bootinfo + MBI_CMDLINE, mb_add_cmdline(&mbs, kcmdline));
-
- stl_p(bootinfo + MBI_BOOTLOADER, mb_add_bootloader(&mbs, bootloader_name));
-
- stl_p(bootinfo + MBI_MODS_ADDR, mbs.mb_buf_phys + mbs.offset_mbinfo);
- stl_p(bootinfo + MBI_MODS_COUNT, mbs.mb_mods_count); /* mods_count */
+ stl_le_p(bootinfo + MBI_CMDLINE, mb_add_cmdline(&mbs, kcmdline));
+ stl_le_p(bootinfo + MBI_BOOTLOADER, mb_add_bootloader(&mbs,
+ bootloader_name));
+ stl_le_p(bootinfo + MBI_MODS_ADDR, mbs.mb_buf_phys + mbs.offset_mbinfo);
+ stl_le_p(bootinfo + MBI_MODS_COUNT, mbs.mb_mods_count); /* mods_count */
/* the kernel is where we want it to be now */
- stl_p(bootinfo + MBI_FLAGS, MULTIBOOT_FLAGS_MEMORY
+ stl_le_p(bootinfo + MBI_FLAGS, MULTIBOOT_FLAGS_MEMORY
| MULTIBOOT_FLAGS_BOOT_DEVICE
| MULTIBOOT_FLAGS_CMDLINE
| MULTIBOOT_FLAGS_MODULES
| MULTIBOOT_FLAGS_MMAP
| MULTIBOOT_FLAGS_BOOTLOADER);
- stl_p(bootinfo + MBI_BOOT_DEVICE, 0x8000ffff); /* XXX: use the -boot switch? */
- stl_p(bootinfo + MBI_MMAP_ADDR, ADDR_E820_MAP);
+ stl_le_p(bootinfo + MBI_BOOT_DEVICE, 0x8000ffff); /* XXX: use the -boot switch? */
+ stl_le_p(bootinfo + MBI_MMAP_ADDR, ADDR_E820_MAP);
mb_debug("multiboot: entry_addr = %#x", mh_entry_addr);
mb_debug(" mb_buf_phys = "HWADDR_FMT_plx, mbs.mb_buf_phys);
diff --git a/hw/i386/nitro_enclave.c b/hw/i386/nitro_enclave.c
new file mode 100644
index 0000000..5ee50f3
--- /dev/null
+++ b/hw/i386/nitro_enclave.c
@@ -0,0 +1,353 @@
+/*
+ * AWS nitro-enclave machine
+ *
+ * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+
+#include "chardev/char.h"
+#include "hw/sysbus.h"
+#include "hw/core/eif.h"
+#include "hw/i386/x86.h"
+#include "hw/i386/microvm.h"
+#include "hw/i386/nitro_enclave.h"
+#include "hw/virtio/virtio-mmio.h"
+#include "hw/virtio/virtio-nsm.h"
+#include "hw/virtio/vhost-user-vsock.h"
+#include "system/hostmem.h"
+
+static BusState *find_free_virtio_mmio_bus(void)
+{
+ BusChild *kid;
+ BusState *bus = sysbus_get_default();
+
+ QTAILQ_FOREACH(kid, &bus->children, sibling) {
+ DeviceState *dev = kid->child;
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MMIO)) {
+ VirtIOMMIOProxy *mmio = VIRTIO_MMIO(OBJECT(dev));
+ VirtioBusState *mmio_virtio_bus = &mmio->bus;
+ BusState *mmio_bus = &mmio_virtio_bus->parent_obj;
+ if (QTAILQ_EMPTY(&mmio_bus->children)) {
+ return mmio_bus;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void vhost_user_vsock_init(NitroEnclaveMachineState *nems)
+{
+ DeviceState *dev = qdev_new(TYPE_VHOST_USER_VSOCK);
+ VHostUserVSock *vsock = VHOST_USER_VSOCK(dev);
+ BusState *bus;
+
+ if (!nems->vsock) {
+ error_report("A valid chardev id for vhost-user-vsock device must be "
+ "provided using the 'vsock' machine option");
+ exit(1);
+ }
+
+ bus = find_free_virtio_mmio_bus();
+ if (!bus) {
+ error_report("Failed to find bus for vhost-user-vsock device");
+ exit(1);
+ }
+
+ Chardev *chardev = qemu_chr_find(nems->vsock);
+ if (!chardev) {
+ error_report("Failed to find chardev with id %s", nems->vsock);
+ exit(1);
+ }
+
+ vsock->conf.chardev.chr = chardev;
+
+ qdev_realize_and_unref(dev, bus, &error_fatal);
+}
+
+static void virtio_nsm_init(NitroEnclaveMachineState *nems)
+{
+ DeviceState *dev = qdev_new(TYPE_VIRTIO_NSM);
+ VirtIONSM *vnsm = VIRTIO_NSM(dev);
+ BusState *bus = find_free_virtio_mmio_bus();
+
+ if (!bus) {
+ error_report("Failed to find bus for virtio-nsm device.");
+ exit(1);
+ }
+
+ qdev_prop_set_string(dev, "module-id", nems->id);
+
+ qdev_realize_and_unref(dev, bus, &error_fatal);
+ nems->vnsm = vnsm;
+}
+
+static void nitro_enclave_devices_init(NitroEnclaveMachineState *nems)
+{
+ vhost_user_vsock_init(nems);
+ virtio_nsm_init(nems);
+}
+
+static void nitro_enclave_machine_state_init(MachineState *machine)
+{
+ NitroEnclaveMachineClass *ne_class =
+ NITRO_ENCLAVE_MACHINE_GET_CLASS(machine);
+ NitroEnclaveMachineState *ne_state = NITRO_ENCLAVE_MACHINE(machine);
+
+ ne_class->parent_init(machine);
+ nitro_enclave_devices_init(ne_state);
+}
+
+static void nitro_enclave_machine_reset(MachineState *machine, ResetType type)
+{
+ NitroEnclaveMachineClass *ne_class =
+ NITRO_ENCLAVE_MACHINE_GET_CLASS(machine);
+ NitroEnclaveMachineState *ne_state = NITRO_ENCLAVE_MACHINE(machine);
+
+ ne_class->parent_reset(machine, type);
+
+ memset(ne_state->vnsm->pcrs, 0, sizeof(ne_state->vnsm->pcrs));
+
+ /* PCR0 */
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 0, ne_state->image_hash,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ /* PCR1 */
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 1, ne_state->bootstrap_hash,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ /* PCR2 */
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 2, ne_state->app_hash,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ /* PCR3 */
+ if (ne_state->parent_role) {
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 3,
+ (uint8_t *) ne_state->parent_role,
+ strlen(ne_state->parent_role));
+ }
+ /* PCR4 */
+ if (ne_state->parent_id) {
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 4,
+ (uint8_t *) ne_state->parent_id,
+ strlen(ne_state->parent_id));
+ }
+ /* PCR8 */
+ if (ne_state->signature_found) {
+ ne_state->vnsm->extend_pcr(ne_state->vnsm, 8,
+ ne_state->fingerprint_hash,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
+ }
+
+ /* First 16 PCRs are locked from boot and reserved for nitro enclave */
+ for (int i = 0; i < 16; ++i) {
+ ne_state->vnsm->lock_pcr(ne_state->vnsm, i);
+ }
+}
+
+static void nitro_enclave_machine_initfn(Object *obj)
+{
+ MicrovmMachineState *mms = MICROVM_MACHINE(obj);
+ X86MachineState *x86ms = X86_MACHINE(obj);
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ nems->id = g_strdup("i-234-enc5678");
+
+ /* AWS nitro enclaves have PCIE and ACPI disabled */
+ mms->pcie = ON_OFF_AUTO_OFF;
+ x86ms->acpi = ON_OFF_AUTO_OFF;
+}
+
+static void x86_load_eif(X86MachineState *x86ms, FWCfgState *fw_cfg,
+ int acpi_data_size, bool pvh_enabled)
+{
+ Error *err = NULL;
+ char *eif_kernel, *eif_initrd, *eif_cmdline;
+ MachineState *machine = MACHINE(x86ms);
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(x86ms);
+
+ if (!read_eif_file(machine->kernel_filename, machine->initrd_filename,
+ &eif_kernel, &eif_initrd, &eif_cmdline,
+ nems->image_hash, nems->bootstrap_hash,
+ nems->app_hash, nems->fingerprint_hash,
+ &(nems->signature_found), &err)) {
+ error_report_err(err);
+ exit(1);
+ }
+
+ g_free(machine->kernel_filename);
+ machine->kernel_filename = eif_kernel;
+ g_free(machine->initrd_filename);
+ machine->initrd_filename = eif_initrd;
+
+ /*
+ * If kernel cmdline argument was provided, let's concatenate it to the
+ * extracted EIF kernel cmdline.
+ */
+ if (machine->kernel_cmdline != NULL) {
+ char *cmd = g_strdup_printf("%s %s", eif_cmdline,
+ machine->kernel_cmdline);
+ g_free(eif_cmdline);
+ g_free(machine->kernel_cmdline);
+ machine->kernel_cmdline = cmd;
+ } else {
+ machine->kernel_cmdline = eif_cmdline;
+ }
+
+ x86_load_linux(x86ms, fw_cfg, 0, true);
+
+ unlink(machine->kernel_filename);
+ unlink(machine->initrd_filename);
+}
+
+static bool create_memfd_backend(MachineState *ms, const char *path,
+ Error **errp)
+{
+ Object *obj;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+ bool r = false;
+
+ obj = object_new(TYPE_MEMORY_BACKEND_MEMFD);
+ if (!object_property_set_int(obj, "size", ms->ram_size, errp)) {
+ goto out;
+ }
+ object_property_add_child(object_get_objects_root(), mc->default_ram_id,
+ obj);
+
+ if (!user_creatable_complete(USER_CREATABLE(obj), errp)) {
+ goto out;
+ }
+ r = object_property_set_link(OBJECT(ms), "memory-backend", obj, errp);
+
+out:
+ object_unref(obj);
+ return r;
+}
+
+static char *nitro_enclave_get_vsock_chardev_id(Object *obj, Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ return g_strdup(nems->vsock);
+}
+
+static void nitro_enclave_set_vsock_chardev_id(Object *obj, const char *value,
+ Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ g_free(nems->vsock);
+ nems->vsock = g_strdup(value);
+}
+
+static char *nitro_enclave_get_id(Object *obj, Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ return g_strdup(nems->id);
+}
+
+static void nitro_enclave_set_id(Object *obj, const char *value,
+ Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ g_free(nems->id);
+ nems->id = g_strdup(value);
+}
+
+static char *nitro_enclave_get_parent_role(Object *obj, Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ return g_strdup(nems->parent_role);
+}
+
+static void nitro_enclave_set_parent_role(Object *obj, const char *value,
+ Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ g_free(nems->parent_role);
+ nems->parent_role = g_strdup(value);
+}
+
+static char *nitro_enclave_get_parent_id(Object *obj, Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ return g_strdup(nems->parent_id);
+}
+
+static void nitro_enclave_set_parent_id(Object *obj, const char *value,
+ Error **errp)
+{
+ NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj);
+
+ g_free(nems->parent_id);
+ nems->parent_id = g_strdup(value);
+}
+
+static void nitro_enclave_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ MicrovmMachineClass *mmc = MICROVM_MACHINE_CLASS(oc);
+ NitroEnclaveMachineClass *nemc = NITRO_ENCLAVE_MACHINE_CLASS(oc);
+
+ mmc->x86_load_linux = x86_load_eif;
+
+ mc->family = "nitro_enclave_i386";
+ mc->desc = "AWS Nitro Enclave";
+
+ nemc->parent_init = mc->init;
+ mc->init = nitro_enclave_machine_state_init;
+
+ nemc->parent_reset = mc->reset;
+ mc->reset = nitro_enclave_machine_reset;
+
+ mc->create_default_memdev = create_memfd_backend;
+
+ object_class_property_add_str(oc, NITRO_ENCLAVE_VSOCK_CHARDEV_ID,
+ nitro_enclave_get_vsock_chardev_id,
+ nitro_enclave_set_vsock_chardev_id);
+ object_class_property_set_description(oc, NITRO_ENCLAVE_VSOCK_CHARDEV_ID,
+ "Set chardev id for vhost-user-vsock "
+ "device");
+
+ object_class_property_add_str(oc, NITRO_ENCLAVE_ID, nitro_enclave_get_id,
+ nitro_enclave_set_id);
+ object_class_property_set_description(oc, NITRO_ENCLAVE_ID,
+ "Set enclave identifier");
+
+ object_class_property_add_str(oc, NITRO_ENCLAVE_PARENT_ROLE,
+ nitro_enclave_get_parent_role,
+ nitro_enclave_set_parent_role);
+ object_class_property_set_description(oc, NITRO_ENCLAVE_PARENT_ROLE,
+ "Set parent instance IAM role ARN");
+
+ object_class_property_add_str(oc, NITRO_ENCLAVE_PARENT_ID,
+ nitro_enclave_get_parent_id,
+ nitro_enclave_set_parent_id);
+ object_class_property_set_description(oc, NITRO_ENCLAVE_PARENT_ID,
+ "Set parent instance identifier");
+}
+
+static const TypeInfo nitro_enclave_machine_info = {
+ .name = TYPE_NITRO_ENCLAVE_MACHINE,
+ .parent = TYPE_MICROVM_MACHINE,
+ .instance_size = sizeof(NitroEnclaveMachineState),
+ .instance_init = nitro_enclave_machine_initfn,
+ .class_size = sizeof(NitroEnclaveMachineClass),
+ .class_init = nitro_enclave_class_init,
+};
+
+static void nitro_enclave_machine_init(void)
+{
+ type_register_static(&nitro_enclave_machine_info);
+}
+type_init(nitro_enclave_machine_init);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7741506..b211633 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -24,13 +24,14 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "exec/target_page.h"
#include "hw/i386/pc.h"
-#include "hw/char/serial.h"
+#include "hw/char/serial-isa.h"
#include "hw/char/parallel.h"
#include "hw/hyperv/hv-balloon.h"
#include "hw/i386/fw_cfg.h"
#include "hw/i386/vmport.h"
-#include "sysemu/cpus.h"
+#include "system/cpus.h"
#include "hw/ide/ide-bus.h"
#include "hw/timer/hpet.h"
#include "hw/loader.h"
@@ -39,12 +40,13 @@
#include "hw/timer/i8254.h"
#include "hw/input/i8042.h"
#include "hw/audio/pcspk.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/xen.h"
-#include "sysemu/reset.h"
+#include "system/system.h"
+#include "system/xen.h"
+#include "system/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
-#include "qapi/qmp/qlist.h"
+#include "qobject/qlist.h"
#include "qemu/error-report.h"
#include "hw/acpi/cpu_hotplug.h"
#include "acpi-build.h"
@@ -79,10 +81,25 @@
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
+GlobalProperty pc_compat_10_0[] = {};
+const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0);
+
+GlobalProperty pc_compat_9_2[] = {};
+const size_t pc_compat_9_2_len = G_N_ELEMENTS(pc_compat_9_2);
+
+GlobalProperty pc_compat_9_1[] = {
+ { "ICH9-LPC", "x-smi-swsmi-timer", "off" },
+ { "ICH9-LPC", "x-smi-periodic-timer", "off" },
+ { TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" },
+ { TYPE_INTEL_IOMMU_DEVICE, "aw-bits", "39" },
+};
+const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1);
+
GlobalProperty pc_compat_9_0[] = {
+ { TYPE_X86_CPU, "x-amd-topoext-features-only", "false" },
{ TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
- { "sev-guest", "legacy-vm-type", "true" },
+ { "sev-guest", "legacy-vm-type", "on" },
{ TYPE_X86_CPU, "legacy-multi-node", "on" },
};
const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
@@ -243,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = {
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
-GlobalProperty pc_compat_2_5[] = {};
-const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
-
-GlobalProperty pc_compat_2_4[] = {
- PC_CPU_MODEL_IDS("2.4.0")
- { "Haswell-" TYPE_X86_CPU, "abm", "off" },
- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
- { TYPE_X86_CPU, "check", "off" },
- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
-};
-const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
-
/*
* @PC_FW_DATA:
* Size of the chunk of memory at the top of RAM for the BIOS ACPI tables
@@ -452,7 +447,7 @@ static int check_fdc(Object *obj, void *opaque)
}
static const char * const fdc_container_path[] = {
- "/unattached", "/peripheral", "/peripheral-anon"
+ "unattached", "peripheral", "peripheral-anon"
};
/*
@@ -466,7 +461,7 @@ static ISADevice *pc_find_fdc0(void)
CheckFdcState state = { 0 };
for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) {
- container = container_get(qdev_get_machine(), fdc_container_path[i]);
+ container = machine_get_container(fdc_container_path[i]);
object_child_foreach(container, check_fdc, &state);
}
@@ -620,11 +615,13 @@ void pc_machine_done(Notifier *notifier, void *data)
/* set the number of CPUs */
x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
- fw_cfg_add_extra_pci_roots(pcms->pcibus, x86ms->fw_cfg);
+ pci_bus_add_fw_cfg_extra_pci_roots(x86ms->fw_cfg, pcms->pcibus,
+ &error_abort);
acpi_setup();
if (x86ms->fw_cfg) {
fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type);
+ fw_cfg_add_e820(x86ms->fw_cfg);
fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
@@ -958,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms,
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -981,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms,
if (machine->device_memory) {
uint64_t *val = g_malloc(sizeof(*val));
- uint64_t res_mem_end = machine->device_memory->base;
-
- if (!pcmc->broken_reserved_end) {
- res_mem_end += memory_region_size(&machine->device_memory->mr);
- }
+ uint64_t res_mem_end;
if (pcms->cxl_devices_state.is_enabled) {
res_mem_end = cxl_resv_end;
+ } else {
+ res_mem_end = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
@@ -1026,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void)
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
- if (!pcmc->broken_reserved_end) {
- hole64_start += size;
- }
+ hole64_start += size;
} else {
hole64_start = pc_above_4g_end(pcms);
}
@@ -1040,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
@@ -1048,14 +1043,14 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
- rom_reset_order_override();
+
return dev;
}
static const MemoryRegionOps ioport80_io_ops = {
.write = ioport80_write,
.read = ioport80_read,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 1,
@@ -1065,7 +1060,7 @@ static const MemoryRegionOps ioport80_io_ops = {
static const MemoryRegionOps ioportF0_io_ops = {
.write = ioportF0_write,
.read = ioportF0_read,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 1,
@@ -1073,7 +1068,7 @@ static const MemoryRegionOps ioportF0_io_ops = {
};
static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl,
- bool create_i8042, bool no_vmport)
+ bool create_i8042, bool no_vmport, Error **errp)
{
int i;
DriveInfo *fd[MAX_FD];
@@ -1098,6 +1093,10 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl,
}
if (!create_i8042) {
+ if (!no_vmport) {
+ error_setg(errp,
+ "vmport requires the i8042 controller to be enabled");
+ }
return;
}
@@ -1215,9 +1214,17 @@ void pc_basic_device_init(struct PCMachineState *pcms,
isa_realize_and_unref(pcms->pcspk, isa_bus, &error_fatal);
}
+ if (pcms->vmport == ON_OFF_AUTO_AUTO) {
+ pcms->vmport = (xen_enabled() || !pcms->i8042_enabled)
+ ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON;
+ }
+
/* Super I/O */
pc_superio_init(isa_bus, create_fdctrl, pcms->i8042_enabled,
- pcms->vmport != ON_OFF_AUTO_ON);
+ pcms->vmport != ON_OFF_AUTO_ON, &error_fatal);
+
+ pcms->machine_done.notify = pc_machine_done;
+ qemu_add_machine_init_done_notifier(&pcms->machine_done);
}
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
@@ -1226,16 +1233,14 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
-
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
/* Anything remaining should be a PCI NIC */
- pci_init_nic_devices(pci_bus, mc->default_nic);
-
- rom_reset_order_override();
+ if (pci_bus) {
+ pci_init_nic_devices(pci_bus, mc->default_nic);
+ }
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
@@ -1676,7 +1681,7 @@ static void pc_machine_initfn(Object *obj)
pcms->sata_enabled = true;
pcms->i8042_enabled = true;
pcms->max_fw_size = 8 * MiB;
-#ifdef CONFIG_HPET
+#if defined(CONFIG_HPET)
pcms->hpet_enabled = true;
#endif
pcms->fd_bootchk = true;
@@ -1689,17 +1694,14 @@ static void pc_machine_initfn(Object *obj)
if (pcmc->pci_enabled) {
cxl_machine_init(obj, &pcms->cxl_devices_state);
}
-
- pcms->machine_done.notify = pc_machine_done;
- qemu_add_machine_init_done_notifier(&pcms->machine_done);
}
-static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
+static void pc_machine_reset(MachineState *machine, ResetType type)
{
CPUState *cs;
X86CPU *cpu;
- qemu_devices_reset(reason);
+ qemu_devices_reset(type);
/* Reset APIC after devices have been reset to cancel
* any changes that qemu_devices_reset() might have done.
@@ -1714,7 +1716,7 @@ static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
static void pc_machine_wakeup(MachineState *machine)
{
cpu_synchronize_all_states();
- pc_machine_reset(machine, SHUTDOWN_CAUSE_NONE);
+ pc_machine_reset(machine, RESET_TYPE_WAKEUP);
cpu_synchronize_all_post_reset();
}
@@ -1737,7 +1739,7 @@ static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
return true;
}
-static void pc_machine_class_init(ObjectClass *oc, void *data)
+static void pc_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
@@ -1773,6 +1775,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->nvdimm_supported = true;
mc->smp_props.dies_supported = true;
mc->smp_props.modules_supported = true;
+ mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1D] = true;
+ mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1I] = true;
+ mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L2] = true;
+ mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L3] = true;
mc->default_ram_id = "pc.ram";
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO;
@@ -1805,6 +1811,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, PC_MACHINE_I8042,
pc_machine_get_i8042, pc_machine_set_i8042);
+ object_class_property_set_description(oc, PC_MACHINE_I8042,
+ "Enable/disable Intel 8042 PS/2 controller emulation");
object_class_property_add_bool(oc, "default-bus-bypass-iommu",
pc_machine_get_default_bus_bypass_iommu,
@@ -1835,7 +1843,7 @@ static const TypeInfo pc_machine_info = {
.instance_init = pc_machine_initfn,
.class_size = sizeof(PCMachineClass),
.class_init = pc_machine_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
},
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index e4930b7..ea7572e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -43,15 +43,15 @@
#include "hw/ide/isa.h"
#include "hw/ide/pci.h"
#include "hw/irq.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "hw/i386/kvm/clock.h"
#include "hw/sysbus.h"
#include "hw/i2c/smbus_eeprom.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "hw/acpi/acpi.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "sysemu/xen.h"
+#include "system/xen.h"
#ifdef CONFIG_XEN
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
@@ -61,10 +61,11 @@
#include "hw/xen/xen.h"
#include "migration/global_state.h"
#include "migration/misc.h"
-#include "sysemu/runstate.h"
-#include "sysemu/numa.h"
+#include "system/runstate.h"
+#include "system/numa.h"
#include "hw/hyperv/vmbus-bridge.h"
#include "hw/mem/nvdimm.h"
+#include "hw/uefi/var-service-api.h"
#include "hw/i386/acpi-build.h"
#include "target/i386/cpu.h"
@@ -284,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type)
pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0");
pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1");
} else {
+ uint32_t irq;
+
isa_bus = isa_bus_new(NULL, system_memory, system_io,
&error_abort);
isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
@@ -291,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type)
x86ms->rtc = isa_new(TYPE_MC146818_RTC);
qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000);
isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal);
+ irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq",
+ &error_fatal);
+ isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq);
i8257_dma_init(OBJECT(machine), isa_bus, 0);
pcms->hpet_enabled = false;
@@ -310,11 +316,6 @@ static void pc_init1(MachineState *machine, const char *pci_type)
pc_vga_init(isa_bus, pcmc->pci_enabled ? pcms->pcibus : NULL);
- assert(pcms->vmport != ON_OFF_AUTO__MAX);
- if (pcms->vmport == ON_OFF_AUTO_AUTO) {
- pcms->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON;
- }
-
/* init basic PC hardware */
pc_basic_device_init(pcms, isa_bus, x86ms->gsi, x86ms->rtc,
!MACHINE_CLASS(pcmc)->no_floppy, 0x4);
@@ -445,12 +446,16 @@ static void pc_xen_hvm_init(MachineState *machine)
}
#endif
-#define DEFINE_I440FX_MACHINE(suffix, name, optionfn) \
- static void pc_init_##suffix(MachineState *machine) \
- { \
- pc_init1(machine, TYPE_I440FX_PCI_DEVICE); \
- } \
- DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
+static void pc_i440fx_init(MachineState *machine)
+{
+ pc_init1(machine, TYPE_I440FX_PCI_DEVICE);
+}
+
+#define DEFINE_I440FX_MACHINE(major, minor) \
+ DEFINE_PC_VER_MACHINE(pc_i440fx, "pc-i440fx", pc_i440fx_init, false, NULL, major, minor);
+
+#define DEFINE_I440FX_MACHINE_AS_LATEST(major, minor) \
+ DEFINE_PC_VER_MACHINE(pc_i440fx, "pc-i440fx", pc_i440fx_init, true, "pc", major, minor);
static void pc_i440fx_machine_options(MachineClass *m)
{
@@ -469,6 +474,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_UEFI_VARS_X64);
object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption",
&PCSouthBridgeOption_lookup,
@@ -478,37 +484,59 @@ static void pc_i440fx_machine_options(MachineClass *m)
"Use a different south bridge than PIIX3");
}
-static void pc_i440fx_9_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_10_1_options(MachineClass *m)
{
pc_i440fx_machine_options(m);
- m->alias = "pc";
- m->is_default = true;
}
-DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1",
- pc_i440fx_9_1_machine_options);
+DEFINE_I440FX_MACHINE_AS_LATEST(10, 1);
+
+static void pc_i440fx_machine_10_0_options(MachineClass *m)
+{
+ pc_i440fx_machine_10_1_options(m);
+ compat_props_add(m->compat_props, hw_compat_10_0, hw_compat_10_0_len);
+ compat_props_add(m->compat_props, pc_compat_10_0, pc_compat_10_0_len);
+}
+
+DEFINE_I440FX_MACHINE(10, 0);
+
+static void pc_i440fx_machine_9_2_options(MachineClass *m)
+{
+ pc_i440fx_machine_10_0_options(m);
+ compat_props_add(m->compat_props, hw_compat_9_2, hw_compat_9_2_len);
+ compat_props_add(m->compat_props, pc_compat_9_2, pc_compat_9_2_len);
+}
+
+DEFINE_I440FX_MACHINE(9, 2);
-static void pc_i440fx_9_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_9_1_options(MachineClass *m)
+{
+ pc_i440fx_machine_9_2_options(m);
+ compat_props_add(m->compat_props, hw_compat_9_1, hw_compat_9_1_len);
+ compat_props_add(m->compat_props, pc_compat_9_1, pc_compat_9_1_len);
+}
+
+DEFINE_I440FX_MACHINE(9, 1);
+
+static void pc_i440fx_machine_9_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_9_1_machine_options(m);
- m->alias = NULL;
- m->is_default = false;
+ pc_i440fx_machine_9_1_options(m);
+ m->smbios_memory_device_size = 16 * GiB;
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
pcmc->isa_bios_alias = false;
}
-DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0",
- pc_i440fx_9_0_machine_options);
+DEFINE_I440FX_MACHINE(9, 0);
-static void pc_i440fx_8_2_machine_options(MachineClass *m)
+static void pc_i440fx_machine_8_2_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_9_0_machine_options(m);
+ pc_i440fx_machine_9_0_options(m);
compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len);
compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len);
@@ -516,28 +544,26 @@ static void pc_i440fx_8_2_machine_options(MachineClass *m)
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
}
-DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2",
- pc_i440fx_8_2_machine_options);
+DEFINE_I440FX_MACHINE(8, 2);
-static void pc_i440fx_8_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_8_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_8_2_machine_options(m);
+ pc_i440fx_machine_8_2_options(m);
pcmc->broken_32bit_mem_addr_check = true;
compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len);
compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len);
}
-DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1",
- pc_i440fx_8_1_machine_options);
+DEFINE_I440FX_MACHINE(8, 1);
-static void pc_i440fx_8_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_8_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_8_1_machine_options(m);
+ pc_i440fx_machine_8_1_options(m);
compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len);
compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len);
@@ -545,268 +571,217 @@ static void pc_i440fx_8_0_machine_options(MachineClass *m)
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
}
-DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0",
- pc_i440fx_8_0_machine_options);
+DEFINE_I440FX_MACHINE(8, 0);
-static void pc_i440fx_7_2_machine_options(MachineClass *m)
+static void pc_i440fx_machine_7_2_options(MachineClass *m)
{
- pc_i440fx_8_0_machine_options(m);
+ pc_i440fx_machine_8_0_options(m);
compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len);
compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len);
}
-DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2",
- pc_i440fx_7_2_machine_options);
+DEFINE_I440FX_MACHINE(7, 2)
-static void pc_i440fx_7_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_7_1_options(MachineClass *m)
{
- pc_i440fx_7_2_machine_options(m);
+ pc_i440fx_machine_7_2_options(m);
compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
}
-DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1",
- pc_i440fx_7_1_machine_options);
+DEFINE_I440FX_MACHINE(7, 1);
-static void pc_i440fx_7_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_7_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_7_1_machine_options(m);
+ pc_i440fx_machine_7_1_options(m);
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
-DEFINE_I440FX_MACHINE(v7_0, "pc-i440fx-7.0",
- pc_i440fx_7_0_machine_options);
+DEFINE_I440FX_MACHINE(7, 0);
-static void pc_i440fx_6_2_machine_options(MachineClass *m)
+static void pc_i440fx_machine_6_2_options(MachineClass *m)
{
- pc_i440fx_7_0_machine_options(m);
+ pc_i440fx_machine_7_0_options(m);
compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
}
-DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2",
- pc_i440fx_6_2_machine_options);
+DEFINE_I440FX_MACHINE(6, 2);
-static void pc_i440fx_6_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_6_1_options(MachineClass *m)
{
- pc_i440fx_6_2_machine_options(m);
+ pc_i440fx_machine_6_2_options(m);
compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
m->smp_props.prefer_sockets = true;
}
-DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1",
- pc_i440fx_6_1_machine_options);
+DEFINE_I440FX_MACHINE(6, 1);
-static void pc_i440fx_6_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_6_0_options(MachineClass *m)
{
- pc_i440fx_6_1_machine_options(m);
+ pc_i440fx_machine_6_1_options(m);
compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
}
-DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0",
- pc_i440fx_6_0_machine_options);
+DEFINE_I440FX_MACHINE(6, 0);
-static void pc_i440fx_5_2_machine_options(MachineClass *m)
+static void pc_i440fx_machine_5_2_options(MachineClass *m)
{
- pc_i440fx_6_0_machine_options(m);
+ pc_i440fx_machine_6_0_options(m);
compat_props_add(m->compat_props, hw_compat_5_2, hw_compat_5_2_len);
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
}
-DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2",
- pc_i440fx_5_2_machine_options);
+DEFINE_I440FX_MACHINE(5, 2);
-static void pc_i440fx_5_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_5_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_5_2_machine_options(m);
+ pc_i440fx_machine_5_2_options(m);
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
pcmc->kvmclock_create_always = false;
pcmc->pci_root_uid = 1;
}
-DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1",
- pc_i440fx_5_1_machine_options);
+DEFINE_I440FX_MACHINE(5, 1);
-static void pc_i440fx_5_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_5_0_options(MachineClass *m)
{
- pc_i440fx_5_1_machine_options(m);
+ pc_i440fx_machine_5_1_options(m);
m->numa_mem_supported = true;
compat_props_add(m->compat_props, hw_compat_5_0, hw_compat_5_0_len);
compat_props_add(m->compat_props, pc_compat_5_0, pc_compat_5_0_len);
m->auto_enable_numa_with_memdev = false;
}
-DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0",
- pc_i440fx_5_0_machine_options);
+DEFINE_I440FX_MACHINE(5, 0);
-static void pc_i440fx_4_2_machine_options(MachineClass *m)
+static void pc_i440fx_machine_4_2_options(MachineClass *m)
{
- pc_i440fx_5_0_machine_options(m);
+ pc_i440fx_machine_5_0_options(m);
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
}
-DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2",
- pc_i440fx_4_2_machine_options);
+DEFINE_I440FX_MACHINE(4, 2);
-static void pc_i440fx_4_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_4_1_options(MachineClass *m)
{
- pc_i440fx_4_2_machine_options(m);
+ pc_i440fx_machine_4_2_options(m);
compat_props_add(m->compat_props, hw_compat_4_1, hw_compat_4_1_len);
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
}
-DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1",
- pc_i440fx_4_1_machine_options);
+DEFINE_I440FX_MACHINE(4, 1);
-static void pc_i440fx_4_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_4_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_4_1_machine_options(m);
+ pc_i440fx_machine_4_1_options(m);
pcmc->default_cpu_version = CPU_VERSION_LEGACY;
compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
}
-DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0",
- pc_i440fx_4_0_machine_options);
+DEFINE_I440FX_MACHINE(4, 0);
-static void pc_i440fx_3_1_machine_options(MachineClass *m)
+static void pc_i440fx_machine_3_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_4_0_machine_options(m);
+ pc_i440fx_machine_4_0_options(m);
m->smbus_no_migration_support = true;
pcmc->pvh_enabled = false;
compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
}
-DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1",
- pc_i440fx_3_1_machine_options);
+DEFINE_I440FX_MACHINE(3, 1);
-static void pc_i440fx_3_0_machine_options(MachineClass *m)
+static void pc_i440fx_machine_3_0_options(MachineClass *m)
{
- pc_i440fx_3_1_machine_options(m);
+ pc_i440fx_machine_3_1_options(m);
compat_props_add(m->compat_props, hw_compat_3_0, hw_compat_3_0_len);
compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len);
}
-DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0",
- pc_i440fx_3_0_machine_options);
+DEFINE_I440FX_MACHINE(3, 0);
-static void pc_i440fx_2_12_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_12_options(MachineClass *m)
{
- pc_i440fx_3_0_machine_options(m);
- m->deprecation_reason = "old and unattended - use a newer version instead";
+ pc_i440fx_machine_3_0_options(m);
compat_props_add(m->compat_props, hw_compat_2_12, hw_compat_2_12_len);
compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len);
}
-DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12",
- pc_i440fx_2_12_machine_options);
+DEFINE_I440FX_MACHINE(2, 12);
-static void pc_i440fx_2_11_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_11_options(MachineClass *m)
{
- pc_i440fx_2_12_machine_options(m);
+ pc_i440fx_machine_2_12_options(m);
compat_props_add(m->compat_props, hw_compat_2_11, hw_compat_2_11_len);
compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len);
}
-DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11",
- pc_i440fx_2_11_machine_options);
+DEFINE_I440FX_MACHINE(2, 11);
-static void pc_i440fx_2_10_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_10_options(MachineClass *m)
{
- pc_i440fx_2_11_machine_options(m);
+ pc_i440fx_machine_2_11_options(m);
compat_props_add(m->compat_props, hw_compat_2_10, hw_compat_2_10_len);
compat_props_add(m->compat_props, pc_compat_2_10, pc_compat_2_10_len);
m->auto_enable_numa_with_memhp = false;
}
-DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10",
- pc_i440fx_2_10_machine_options);
+DEFINE_I440FX_MACHINE(2, 10);
-static void pc_i440fx_2_9_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_9_options(MachineClass *m)
{
- pc_i440fx_2_10_machine_options(m);
+ pc_i440fx_machine_2_10_options(m);
compat_props_add(m->compat_props, hw_compat_2_9, hw_compat_2_9_len);
compat_props_add(m->compat_props, pc_compat_2_9, pc_compat_2_9_len);
}
-DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9",
- pc_i440fx_2_9_machine_options);
+DEFINE_I440FX_MACHINE(2, 9);
-static void pc_i440fx_2_8_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_8_options(MachineClass *m)
{
- pc_i440fx_2_9_machine_options(m);
+ pc_i440fx_machine_2_9_options(m);
compat_props_add(m->compat_props, hw_compat_2_8, hw_compat_2_8_len);
compat_props_add(m->compat_props, pc_compat_2_8, pc_compat_2_8_len);
}
-DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8",
- pc_i440fx_2_8_machine_options);
+DEFINE_I440FX_MACHINE(2, 8);
-static void pc_i440fx_2_7_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_7_options(MachineClass *m)
{
- pc_i440fx_2_8_machine_options(m);
+ pc_i440fx_machine_2_8_options(m);
compat_props_add(m->compat_props, hw_compat_2_7, hw_compat_2_7_len);
compat_props_add(m->compat_props, pc_compat_2_7, pc_compat_2_7_len);
}
-DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7",
- pc_i440fx_2_7_machine_options);
+DEFINE_I440FX_MACHINE(2, 7);
-static void pc_i440fx_2_6_machine_options(MachineClass *m)
+static void pc_i440fx_machine_2_6_options(MachineClass *m)
{
X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_i440fx_2_7_machine_options(m);
+ pc_i440fx_machine_2_7_options(m);
pcmc->legacy_cpu_hotplug = true;
x86mc->fwcfg_dma_enabled = false;
compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len);
compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len);
}
-DEFINE_I440FX_MACHINE(v2_6, "pc-i440fx-2.6",
- pc_i440fx_2_6_machine_options);
-
-static void pc_i440fx_2_5_machine_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_i440fx_2_6_machine_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_I440FX_MACHINE(v2_5, "pc-i440fx-2.5",
- pc_i440fx_2_5_machine_options);
-
-static void pc_i440fx_2_4_machine_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_i440fx_2_5_machine_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_I440FX_MACHINE(v2_4, "pc-i440fx-2.4",
- pc_i440fx_2_4_machine_options)
+DEFINE_I440FX_MACHINE(2, 6);
#ifdef CONFIG_ISAPC
static void isapc_machine_options(MachineClass *m)
@@ -833,20 +808,20 @@ DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa,
#endif
#ifdef CONFIG_XEN
-static void xenfv_4_2_machine_options(MachineClass *m)
+static void xenfv_machine_4_2_options(MachineClass *m)
{
- pc_i440fx_4_2_machine_options(m);
+ pc_i440fx_machine_4_2_options(m);
m->desc = "Xen Fully-virtualized PC";
m->max_cpus = HVM_MAX_VCPUS;
m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
}
DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,
- xenfv_4_2_machine_options);
+ xenfv_machine_4_2_options);
-static void xenfv_3_1_machine_options(MachineClass *m)
+static void xenfv_machine_3_1_options(MachineClass *m)
{
- pc_i440fx_3_1_machine_options(m);
+ pc_i440fx_machine_3_1_options(m);
m->desc = "Xen Fully-virtualized PC";
m->alias = "xenfv";
m->max_cpus = HVM_MAX_VCPUS;
@@ -854,5 +829,5 @@ static void xenfv_3_1_machine_options(MachineClass *m)
}
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
- xenfv_3_1_machine_options);
+ xenfv_machine_3_1_options);
#endif
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index bd7db4a..33211b1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -35,8 +35,8 @@
#include "hw/loader.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/rtc/mc146818rtc.h"
-#include "sysemu/tcg.h"
-#include "sysemu/kvm.h"
+#include "system/tcg.h"
+#include "system/kvm.h"
#include "hw/i386/kvm/clock.h"
#include "hw/pci-host/q35.h"
#include "hw/pci/pcie_port.h"
@@ -55,9 +55,10 @@
#include "hw/usb/hcd-uhci.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "sysemu/numa.h"
+#include "system/numa.h"
#include "hw/hyperv/vmbus-bridge.h"
#include "hw/mem/nvdimm.h"
+#include "hw/uefi/var-service-api.h"
#include "hw/i386/acpi-build.h"
#include "target/i386/cpu.h"
@@ -276,11 +277,6 @@ static void pc_q35_init(MachineState *machine)
x86_register_ferr_irq(x86ms->gsi[13]);
}
- assert(pcms->vmport != ON_OFF_AUTO__MAX);
- if (pcms->vmport == ON_OFF_AUTO_AUTO) {
- pcms->vmport = ON_OFF_AUTO_ON;
- }
-
/* init basic PC hardware */
pc_basic_device_init(pcms, isa_bus, x86ms->gsi, x86ms->rtc, !mc->no_floppy,
0xff0104);
@@ -331,17 +327,14 @@ static void pc_q35_init(MachineState *machine)
}
}
-#define DEFINE_Q35_MACHINE(suffix, name, compatfn, optionfn) \
- static void pc_init_##suffix(MachineState *machine) \
- { \
- void (*compat)(MachineState *m) = (compatfn); \
- if (compat) { \
- compat(machine); \
- } \
- pc_q35_init(machine); \
- } \
- DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
+#define DEFINE_Q35_MACHINE(major, minor) \
+ DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, false, NULL, major, minor);
+#define DEFINE_Q35_MACHINE_AS_LATEST(major, minor) \
+ DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, false, "q35", major, minor);
+
+#define DEFINE_Q35_MACHINE_BUGFIX(major, minor, micro) \
+ DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, false, NULL, major, minor, micro);
static void pc_q35_machine_options(MachineClass *m)
{
@@ -363,36 +356,61 @@ static void pc_q35_machine_options(MachineClass *m)
machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_UEFI_VARS_X64);
compat_props_add(m->compat_props,
pc_q35_compat_defaults, pc_q35_compat_defaults_len);
}
-static void pc_q35_9_1_machine_options(MachineClass *m)
+static void pc_q35_machine_10_1_options(MachineClass *m)
{
pc_q35_machine_options(m);
- m->alias = "q35";
}
-DEFINE_Q35_MACHINE(v9_1, "pc-q35-9.1", NULL,
- pc_q35_9_1_machine_options);
+DEFINE_Q35_MACHINE_AS_LATEST(10, 1);
+
+static void pc_q35_machine_10_0_options(MachineClass *m)
+{
+ pc_q35_machine_10_1_options(m);
+ compat_props_add(m->compat_props, hw_compat_10_0, hw_compat_10_0_len);
+ compat_props_add(m->compat_props, pc_compat_10_0, pc_compat_10_0_len);
+}
+
+DEFINE_Q35_MACHINE(10, 0);
-static void pc_q35_9_0_machine_options(MachineClass *m)
+static void pc_q35_machine_9_2_options(MachineClass *m)
+{
+ pc_q35_machine_10_0_options(m);
+ compat_props_add(m->compat_props, hw_compat_9_2, hw_compat_9_2_len);
+ compat_props_add(m->compat_props, pc_compat_9_2, pc_compat_9_2_len);
+}
+
+DEFINE_Q35_MACHINE(9, 2);
+
+static void pc_q35_machine_9_1_options(MachineClass *m)
+{
+ pc_q35_machine_9_2_options(m);
+ compat_props_add(m->compat_props, hw_compat_9_1, hw_compat_9_1_len);
+ compat_props_add(m->compat_props, pc_compat_9_1, pc_compat_9_1_len);
+}
+
+DEFINE_Q35_MACHINE(9, 1);
+
+static void pc_q35_machine_9_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_9_1_machine_options(m);
- m->alias = NULL;
+ pc_q35_machine_9_1_options(m);
+ m->smbios_memory_device_size = 16 * GiB;
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
pcmc->isa_bios_alias = false;
}
-DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL,
- pc_q35_9_0_machine_options);
+DEFINE_Q35_MACHINE(9, 0);
-static void pc_q35_8_2_machine_options(MachineClass *m)
+static void pc_q35_machine_8_2_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_9_0_machine_options(m);
+ pc_q35_machine_9_0_options(m);
m->max_cpus = 1024;
compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len);
compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len);
@@ -400,26 +418,24 @@ static void pc_q35_8_2_machine_options(MachineClass *m)
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64;
}
-DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL,
- pc_q35_8_2_machine_options);
+DEFINE_Q35_MACHINE(8, 2);
-static void pc_q35_8_1_machine_options(MachineClass *m)
+static void pc_q35_machine_8_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_8_2_machine_options(m);
+ pc_q35_machine_8_2_options(m);
pcmc->broken_32bit_mem_addr_check = true;
compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len);
compat_props_add(m->compat_props, pc_compat_8_1, pc_compat_8_1_len);
}
-DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL,
- pc_q35_8_1_machine_options);
+DEFINE_Q35_MACHINE(8, 1);
-static void pc_q35_8_0_machine_options(MachineClass *m)
+static void pc_q35_machine_8_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_8_1_machine_options(m);
+ pc_q35_machine_8_1_options(m);
compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len);
compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len);
@@ -428,132 +444,120 @@ static void pc_q35_8_0_machine_options(MachineClass *m)
m->max_cpus = 288;
}
-DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL,
- pc_q35_8_0_machine_options);
+DEFINE_Q35_MACHINE(8, 0);
-static void pc_q35_7_2_machine_options(MachineClass *m)
+static void pc_q35_machine_7_2_options(MachineClass *m)
{
- pc_q35_8_0_machine_options(m);
+ pc_q35_machine_8_0_options(m);
compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len);
compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len);
}
-DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
- pc_q35_7_2_machine_options);
+DEFINE_Q35_MACHINE(7, 2);
-static void pc_q35_7_1_machine_options(MachineClass *m)
+static void pc_q35_machine_7_1_options(MachineClass *m)
{
- pc_q35_7_2_machine_options(m);
+ pc_q35_machine_7_2_options(m);
compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
}
-DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
- pc_q35_7_1_machine_options);
+DEFINE_Q35_MACHINE(7, 1);
-static void pc_q35_7_0_machine_options(MachineClass *m)
+static void pc_q35_machine_7_0_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_7_1_machine_options(m);
+ pc_q35_machine_7_1_options(m);
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
-DEFINE_Q35_MACHINE(v7_0, "pc-q35-7.0", NULL,
- pc_q35_7_0_machine_options);
+DEFINE_Q35_MACHINE(7, 0);
-static void pc_q35_6_2_machine_options(MachineClass *m)
+static void pc_q35_machine_6_2_options(MachineClass *m)
{
- pc_q35_7_0_machine_options(m);
+ pc_q35_machine_7_0_options(m);
compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
}
-DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL,
- pc_q35_6_2_machine_options);
+DEFINE_Q35_MACHINE(6, 2);
-static void pc_q35_6_1_machine_options(MachineClass *m)
+static void pc_q35_machine_6_1_options(MachineClass *m)
{
- pc_q35_6_2_machine_options(m);
+ pc_q35_machine_6_2_options(m);
compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
m->smp_props.prefer_sockets = true;
}
-DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL,
- pc_q35_6_1_machine_options);
+DEFINE_Q35_MACHINE(6, 1);
-static void pc_q35_6_0_machine_options(MachineClass *m)
+static void pc_q35_machine_6_0_options(MachineClass *m)
{
- pc_q35_6_1_machine_options(m);
+ pc_q35_machine_6_1_options(m);
compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
}
-DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL,
- pc_q35_6_0_machine_options);
+DEFINE_Q35_MACHINE(6, 0);
-static void pc_q35_5_2_machine_options(MachineClass *m)
+static void pc_q35_machine_5_2_options(MachineClass *m)
{
- pc_q35_6_0_machine_options(m);
+ pc_q35_machine_6_0_options(m);
compat_props_add(m->compat_props, hw_compat_5_2, hw_compat_5_2_len);
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
}
-DEFINE_Q35_MACHINE(v5_2, "pc-q35-5.2", NULL,
- pc_q35_5_2_machine_options);
+DEFINE_Q35_MACHINE(5, 2);
-static void pc_q35_5_1_machine_options(MachineClass *m)
+static void pc_q35_machine_5_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_5_2_machine_options(m);
+ pc_q35_machine_5_2_options(m);
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
pcmc->kvmclock_create_always = false;
pcmc->pci_root_uid = 1;
}
-DEFINE_Q35_MACHINE(v5_1, "pc-q35-5.1", NULL,
- pc_q35_5_1_machine_options);
+DEFINE_Q35_MACHINE(5, 1);
-static void pc_q35_5_0_machine_options(MachineClass *m)
+static void pc_q35_machine_5_0_options(MachineClass *m)
{
- pc_q35_5_1_machine_options(m);
+ pc_q35_machine_5_1_options(m);
m->numa_mem_supported = true;
compat_props_add(m->compat_props, hw_compat_5_0, hw_compat_5_0_len);
compat_props_add(m->compat_props, pc_compat_5_0, pc_compat_5_0_len);
m->auto_enable_numa_with_memdev = false;
}
-DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL,
- pc_q35_5_0_machine_options);
+DEFINE_Q35_MACHINE(5, 0);
-static void pc_q35_4_2_machine_options(MachineClass *m)
+static void pc_q35_machine_4_2_options(MachineClass *m)
{
- pc_q35_5_0_machine_options(m);
+ pc_q35_machine_5_0_options(m);
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
}
-DEFINE_Q35_MACHINE(v4_2, "pc-q35-4.2", NULL,
- pc_q35_4_2_machine_options);
+DEFINE_Q35_MACHINE(4, 2);
-static void pc_q35_4_1_machine_options(MachineClass *m)
+static void pc_q35_machine_4_1_options(MachineClass *m)
{
- pc_q35_4_2_machine_options(m);
+ pc_q35_machine_4_2_options(m);
compat_props_add(m->compat_props, hw_compat_4_1, hw_compat_4_1_len);
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
}
-DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL,
- pc_q35_4_1_machine_options);
+DEFINE_Q35_MACHINE(4, 1);
-static void pc_q35_4_0_1_machine_options(MachineClass *m)
+static void pc_q35_machine_4_0_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_4_1_machine_options(m);
+ pc_q35_machine_4_1_options(m);
pcmc->default_cpu_version = CPU_VERSION_LEGACY;
/*
* This is the default machine for the 4.0-stable branch. It is basically
@@ -564,24 +568,22 @@ static void pc_q35_4_0_1_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
}
-DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL,
- pc_q35_4_0_1_machine_options);
+DEFINE_Q35_MACHINE_BUGFIX(4, 0, 1);
-static void pc_q35_4_0_machine_options(MachineClass *m)
+static void pc_q35_machine_4_0_options(MachineClass *m)
{
- pc_q35_4_0_1_machine_options(m);
+ pc_q35_machine_4_0_1_options(m);
m->default_kernel_irqchip_split = true;
/* Compat props are applied by the 4.0.1 machine */
}
-DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL,
- pc_q35_4_0_machine_options);
+DEFINE_Q35_MACHINE(4, 0);
-static void pc_q35_3_1_machine_options(MachineClass *m)
+static void pc_q35_machine_3_1_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_4_0_machine_options(m);
+ pc_q35_machine_4_0_options(m);
m->default_kernel_irqchip_split = false;
m->smbus_no_migration_support = true;
pcmc->pvh_enabled = false;
@@ -589,121 +591,84 @@ static void pc_q35_3_1_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
}
-DEFINE_Q35_MACHINE(v3_1, "pc-q35-3.1", NULL,
- pc_q35_3_1_machine_options);
+DEFINE_Q35_MACHINE(3, 1);
-static void pc_q35_3_0_machine_options(MachineClass *m)
+static void pc_q35_machine_3_0_options(MachineClass *m)
{
- pc_q35_3_1_machine_options(m);
+ pc_q35_machine_3_1_options(m);
compat_props_add(m->compat_props, hw_compat_3_0, hw_compat_3_0_len);
compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len);
}
-DEFINE_Q35_MACHINE(v3_0, "pc-q35-3.0", NULL,
- pc_q35_3_0_machine_options);
+DEFINE_Q35_MACHINE(3, 0);
-static void pc_q35_2_12_machine_options(MachineClass *m)
+static void pc_q35_machine_2_12_options(MachineClass *m)
{
- pc_q35_3_0_machine_options(m);
+ pc_q35_machine_3_0_options(m);
compat_props_add(m->compat_props, hw_compat_2_12, hw_compat_2_12_len);
compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len);
}
-DEFINE_Q35_MACHINE(v2_12, "pc-q35-2.12", NULL,
- pc_q35_2_12_machine_options);
+DEFINE_Q35_MACHINE(2, 12);
-static void pc_q35_2_11_machine_options(MachineClass *m)
+static void pc_q35_machine_2_11_options(MachineClass *m)
{
- pc_q35_2_12_machine_options(m);
+ pc_q35_machine_2_12_options(m);
m->default_nic = "e1000";
compat_props_add(m->compat_props, hw_compat_2_11, hw_compat_2_11_len);
compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len);
}
-DEFINE_Q35_MACHINE(v2_11, "pc-q35-2.11", NULL,
- pc_q35_2_11_machine_options);
+DEFINE_Q35_MACHINE(2, 11);
-static void pc_q35_2_10_machine_options(MachineClass *m)
+static void pc_q35_machine_2_10_options(MachineClass *m)
{
- pc_q35_2_11_machine_options(m);
+ pc_q35_machine_2_11_options(m);
compat_props_add(m->compat_props, hw_compat_2_10, hw_compat_2_10_len);
compat_props_add(m->compat_props, pc_compat_2_10, pc_compat_2_10_len);
m->auto_enable_numa_with_memhp = false;
}
-DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL,
- pc_q35_2_10_machine_options);
+DEFINE_Q35_MACHINE(2, 10);
-static void pc_q35_2_9_machine_options(MachineClass *m)
+static void pc_q35_machine_2_9_options(MachineClass *m)
{
- pc_q35_2_10_machine_options(m);
+ pc_q35_machine_2_10_options(m);
compat_props_add(m->compat_props, hw_compat_2_9, hw_compat_2_9_len);
compat_props_add(m->compat_props, pc_compat_2_9, pc_compat_2_9_len);
}
-DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL,
- pc_q35_2_9_machine_options);
+DEFINE_Q35_MACHINE(2, 9);
-static void pc_q35_2_8_machine_options(MachineClass *m)
+static void pc_q35_machine_2_8_options(MachineClass *m)
{
- pc_q35_2_9_machine_options(m);
+ pc_q35_machine_2_9_options(m);
compat_props_add(m->compat_props, hw_compat_2_8, hw_compat_2_8_len);
compat_props_add(m->compat_props, pc_compat_2_8, pc_compat_2_8_len);
}
-DEFINE_Q35_MACHINE(v2_8, "pc-q35-2.8", NULL,
- pc_q35_2_8_machine_options);
+DEFINE_Q35_MACHINE(2, 8);
-static void pc_q35_2_7_machine_options(MachineClass *m)
+static void pc_q35_machine_2_7_options(MachineClass *m)
{
- pc_q35_2_8_machine_options(m);
+ pc_q35_machine_2_8_options(m);
m->max_cpus = 255;
compat_props_add(m->compat_props, hw_compat_2_7, hw_compat_2_7_len);
compat_props_add(m->compat_props, pc_compat_2_7, pc_compat_2_7_len);
}
-DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL,
- pc_q35_2_7_machine_options);
+DEFINE_Q35_MACHINE(2, 7);
-static void pc_q35_2_6_machine_options(MachineClass *m)
+static void pc_q35_machine_2_6_options(MachineClass *m)
{
X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_2_7_machine_options(m);
+ pc_q35_machine_2_7_options(m);
pcmc->legacy_cpu_hotplug = true;
x86mc->fwcfg_dma_enabled = false;
compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len);
compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len);
}
-DEFINE_Q35_MACHINE(v2_6, "pc-q35-2.6", NULL,
- pc_q35_2_6_machine_options);
-
-static void pc_q35_2_5_machine_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_q35_2_6_machine_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_Q35_MACHINE(v2_5, "pc-q35-2.5", NULL,
- pc_q35_2_5_machine_options);
-
-static void pc_q35_2_4_machine_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_q35_2_5_machine_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL,
- pc_q35_2_4_machine_options);
+DEFINE_Q35_MACHINE(2, 6);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index ef80281..821396c 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -25,7 +25,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "sysemu/block-backend.h"
+#include "system/block-backend.h"
#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/units.h"
@@ -35,8 +35,9 @@
#include "hw/loader.h"
#include "hw/qdev-properties.h"
#include "hw/block/flash.h"
-#include "sysemu/kvm.h"
-#include "sev.h"
+#include "system/kvm.h"
+#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/hw/i386/pc_sysfw_ovmf.c b/hw/i386/pc_sysfw_ovmf.c
index 07a4c26..da947c3 100644
--- a/hw/i386/pc_sysfw_ovmf.c
+++ b/hw/i386/pc_sysfw_ovmf.c
@@ -26,6 +26,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "hw/i386/pc.h"
+#include "exec/target_page.h"
#include "cpu.h"
#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d"
diff --git a/hw/i386/port92.c b/hw/i386/port92.c
index b25157f..39b6f31 100644
--- a/hw/i386/port92.c
+++ b/hw/i386/port92.c
@@ -7,7 +7,7 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/runstate.h"
+#include "system/runstate.h"
#include "migration/vmstate.h"
#include "hw/irq.h"
#include "hw/isa/isa.h"
@@ -97,12 +97,12 @@ static void port92_realizefn(DeviceState *dev, Error **errp)
isa_register_ioport(isadev, &s->io, 0x92);
}
-static void port92_class_initfn(ObjectClass *klass, void *data)
+static void port92_class_initfn(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = port92_realizefn;
- dc->reset = port92_reset;
+ device_class_set_legacy_reset(dc, port92_reset);
dc->vmsd = &vmstate_port92_isa;
/*
* Reason: unlike ordinary ISA devices, this one needs additional
diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c
index d664829..2b3b282 100644
--- a/hw/i386/sgx-epc.c
+++ b/hw/i386/sgx-epc.c
@@ -17,14 +17,13 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "target/i386/cpu.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
-static Property sgx_epc_properties[] = {
+static const Property sgx_epc_properties[] = {
DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0),
DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0),
DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem,
TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *),
- DEFINE_PROP_END_OF_LIST(),
};
static void sgx_epc_get_size(Object *obj, Visitor *v, const char *name,
@@ -148,7 +147,7 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md,
info->type = MEMORY_DEVICE_INFO_KIND_SGX_EPC;
}
-static void sgx_epc_class_init(ObjectClass *oc, void *data)
+static void sgx_epc_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
@@ -174,7 +173,7 @@ static const TypeInfo sgx_epc_info = {
.instance_init = sgx_epc_init,
.class_init = sgx_epc_class_init,
.class_size = sizeof(DeviceClass),
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_MEMORY_DEVICE },
{ }
},
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 16b1dfd..d295e54 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -3,20 +3,20 @@
#include "monitor/hmp-target.h"
#include "hw/i386/pc.h"
#include "hw/i386/sgx-epc.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
void sgx_epc_build_srat(GArray *table_data)
{
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
@@ -32,6 +32,11 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
memset(&pcms->sgx_epc, 0, sizeof(SGXEPCState));
}
+bool check_sgx_support(void)
+{
+ return false;
+}
+
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
return true;
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index de76397..e280154 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -19,10 +19,10 @@
#include "monitor/hmp-target.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "qapi/qapi-commands-misc-target.h"
-#include "exec/address-spaces.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/reset.h"
+#include "qapi/qapi-commands-misc-i386.h"
+#include "system/address-spaces.h"
+#include "system/hw_accel.h"
+#include "system/reset.h"
#include <sys/ioctl.h>
#include "hw/acpi/aml-build.h"
@@ -84,10 +84,10 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
((high & MAKE_64BIT_MASK(0, 20)) << 32);
}
-static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
+static SgxEpcSectionList *sgx_calc_host_epc_sections(void)
{
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
uint32_t i, type;
uint32_t eax, ebx, ecx, edx;
uint32_t j = 0;
@@ -104,7 +104,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
break;
}
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = j++;
section->size = sgx_calc_section_metric(ecx, edx);
QAPI_LIST_APPEND(tail, section);
@@ -153,18 +153,20 @@ static void sgx_epc_reset(void *opaque)
}
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
uint32_t eax, ebx, ecx, edx;
+ Error *local_err = NULL;
- int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
+ int fd = qemu_open("/dev/sgx_vepc", O_RDWR, &local_err);
if (fd < 0) {
- error_setg(errp, "SGX is not enabled in KVM");
+ error_append_hint(&local_err, "SGX is not enabled in KVM");
+ error_propagate(errp, local_err);
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
info->sgx = ebx & (1U << 2) ? true : false;
@@ -181,17 +183,17 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp)
return info;
}
-static SGXEPCSectionList *sgx_get_epc_sections_list(void)
+static SgxEpcSectionList *sgx_get_epc_sections_list(void)
{
GSList *device_list = sgx_epc_get_device_list();
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
for (; device_list; device_list = device_list->next) {
DeviceState *dev = device_list->data;
Object *obj = OBJECT(dev);
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP,
&error_abort);
section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP,
@@ -203,9 +205,9 @@ static SGXEPCSectionList *sgx_get_epc_sections_list(void)
return head;
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
X86MachineState *x86ms;
PCMachineState *pcms =
(PCMachineState *)object_dynamic_cast(qdev_get_machine(),
@@ -221,7 +223,7 @@ SGXInfo *qmp_query_sgx(Error **errp)
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
info->sgx = true;
info->sgx1 = true;
@@ -235,8 +237,8 @@ SGXInfo *qmp_query_sgx(Error **errp)
void hmp_info_sgx(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
- SGXEPCSectionList *section_list, *section;
- g_autoptr(SGXInfo) info = qmp_query_sgx(&err);
+ SgxEpcSectionList *section_list, *section;
+ g_autoptr(SgxInfo) info = qmp_query_sgx(&err);
uint64_t size = 0;
if (err) {
@@ -264,12 +266,22 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict)
size);
}
+bool check_sgx_support(void)
+{
+ if (!object_dynamic_cast(qdev_get_machine(), TYPE_PC_MACHINE)) {
+ return false;
+ }
+ return true;
+}
+
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
- PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+ PCMachineState *pcms =
+ (PCMachineState *)object_dynamic_cast(qdev_get_machine(),
+ TYPE_PC_MACHINE);
SGXEPCDevice *epc;
- if (pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) {
+ if (!pcms || pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) {
return true;
}
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000..782b3d1
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000..4fc6a37
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000..645d9d1
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "system/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ fw->mem_ptr = flash_ptr;
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 53c02d7..ac9e1a1 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -68,6 +68,7 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low
vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
+vtd_reset_exit(void) ""
# amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
diff --git a/hw/i386/vapic.c b/hw/i386/vapic.c
index f5b1db7..0c1c92c 100644
--- a/hw/i386/vapic.c
+++ b/hw/i386/vapic.c
@@ -11,12 +11,13 @@
#include "qemu/osdep.h"
#include "qemu/module.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/cpus.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/kvm.h"
-#include "sysemu/runstate.h"
-#include "exec/address-spaces.h"
+#include "exec/target_page.h"
+#include "system/system.h"
+#include "system/cpus.h"
+#include "system/hw_accel.h"
+#include "system/kvm.h"
+#include "system/runstate.h"
+#include "system/address-spaces.h"
#include "hw/i386/apic_internal.h"
#include "hw/sysbus.h"
#include "hw/boards.h"
@@ -718,7 +719,7 @@ static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
static const MemoryRegionOps vapic_ops = {
.write = vapic_write,
.read = vapic_read,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
static void vapic_realize(DeviceState *dev, Error **errp)
@@ -846,11 +847,11 @@ static const VMStateDescription vmstate_vapic = {
}
};
-static void vapic_class_init(ObjectClass *klass, void *data)
+static void vapic_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->reset = vapic_reset;
+ device_class_set_legacy_reset(dc, vapic_reset);
dc->vmsd = &vmstate_vapic;
dc->realize = vapic_realize;
}
diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c
index a8d014d..3896159 100644
--- a/hw/i386/vmmouse.c
+++ b/hw/i386/vmmouse.c
@@ -317,17 +317,16 @@ static void vmmouse_realizefn(DeviceState *dev, Error **errp)
vmport_register(VMPORT_CMD_VMMOUSE_DATA, vmmouse_ioport_read, s);
}
-static Property vmmouse_properties[] = {
+static const Property vmmouse_properties[] = {
DEFINE_PROP_LINK("i8042", VMMouseState, i8042, TYPE_I8042, ISAKBDState *),
- DEFINE_PROP_END_OF_LIST(),
};
-static void vmmouse_class_initfn(ObjectClass *klass, void *data)
+static void vmmouse_class_initfn(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = vmmouse_realizefn;
- dc->reset = vmmouse_reset;
+ device_class_set_legacy_reset(dc, vmmouse_reset);
dc->vmsd = &vmstate_vmmouse;
device_class_set_props(dc, vmmouse_properties);
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c
index 7cc75db..6d93457 100644
--- a/hw/i386/vmport.c
+++ b/hw/i386/vmport.c
@@ -33,9 +33,9 @@
#include "hw/i386/vmport.h"
#include "hw/qdev-properties.h"
#include "hw/boards.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/qtest.h"
+#include "system/system.h"
+#include "system/hw_accel.h"
+#include "system/qtest.h"
#include "qemu/log.h"
#include "trace.h"
#include "qom/object.h"
@@ -252,7 +252,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
}
}
-static Property vmport_properties[] = {
+static const Property vmport_properties[] = {
/* Used to enforce compatibility for migration */
DEFINE_PROP_BIT("x-read-set-eax", VMPortState, compat_flags,
VMPORT_COMPAT_READ_SET_EAX_BIT, true),
@@ -284,11 +284,9 @@ static Property vmport_properties[] = {
* 5 - ACE 1.x (Deprecated)
*/
DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
-
- DEFINE_PROP_END_OF_LIST(),
};
-static void vmport_class_initfn(ObjectClass *klass, void *data)
+static void vmport_class_initfn(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index c0c66a0..b1b5f11 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -26,9 +26,9 @@
#include "qemu/units.h"
#include "qemu/datadir.h"
#include "qapi/error.h"
-#include "sysemu/numa.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/xen.h"
+#include "system/numa.h"
+#include "system/system.h"
+#include "system/xen.h"
#include "trace.h"
#include "hw/i386/x86.h"
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -248,9 +249,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
CPUX86State *env = &cpu->env;
MachineState *ms = MACHINE(hotplug_dev);
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
- unsigned int smp_cores = ms->smp.cores;
- unsigned int smp_threads = ms->smp.threads;
- X86CPUTopoInfo topo_info;
+ X86CPUTopoInfo *topo_info = &env->topo_info;
if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
@@ -269,16 +268,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
}
}
- init_topo_info(&topo_info, x86ms);
+ init_topo_info(topo_info, x86ms);
if (ms->smp.modules > 1) {
- env->nr_modules = ms->smp.modules;
- set_bit(CPU_TOPO_LEVEL_MODULE, env->avail_cpu_topo);
+ set_bit(CPU_TOPOLOGY_LEVEL_MODULE, env->avail_cpu_topo);
}
if (ms->smp.dies > 1) {
- env->nr_dies = ms->smp.dies;
- set_bit(CPU_TOPO_LEVEL_DIE, env->avail_cpu_topo);
+ set_bit(CPU_TOPOLOGY_LEVEL_DIE, env->avail_cpu_topo);
}
/*
@@ -329,17 +326,17 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
if (cpu->core_id < 0) {
error_setg(errp, "CPU core-id is not set");
return;
- } else if (cpu->core_id > (smp_cores - 1)) {
+ } else if (cpu->core_id > (ms->smp.cores - 1)) {
error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u",
- cpu->core_id, smp_cores - 1);
+ cpu->core_id, ms->smp.cores - 1);
return;
}
if (cpu->thread_id < 0) {
error_setg(errp, "CPU thread-id is not set");
return;
- } else if (cpu->thread_id > (smp_threads - 1)) {
+ } else if (cpu->thread_id > (ms->smp.threads - 1)) {
error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u",
- cpu->thread_id, smp_threads - 1);
+ cpu->thread_id, ms->smp.threads - 1);
return;
}
@@ -348,12 +345,12 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
topo_ids.module_id = cpu->module_id;
topo_ids.core_id = cpu->core_id;
topo_ids.smt_id = cpu->thread_id;
- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids);
+ cpu->apic_id = x86_apicid_from_topo_ids(topo_info, &topo_ids);
}
cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx);
if (!cpu_slot) {
- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
+ x86_topo_ids_from_apicid(cpu->apic_id, topo_info, &topo_ids);
error_setg(errp,
"Invalid CPU [socket: %u, die: %u, module: %u, core: %u, thread: %u]"
@@ -376,7 +373,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
/* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
* once -smp refactoring is complete and there will be CPU private
* CPUState::nr_cores and CPUState::nr_threads fields instead of globals */
- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
+ x86_topo_ids_from_apicid(cpu->apic_id, topo_info, &topo_ids);
if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) {
error_setg(errp, "property socket-id: %u doesn't match set apic-id:"
" 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id,
@@ -450,8 +447,27 @@ static long get_file_size(FILE *f)
void gsi_handler(void *opaque, int n, int level)
{
GSIState *s = opaque;
+ bool bypass_ioapic = false;
trace_x86_gsi_interrupt(n, level);
+
+#ifdef CONFIG_XEN_EMU
+ /*
+ * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
+ * routing actually works properly under Xen). And then to
+ * *either* the PIRQ handling or the I/OAPIC depending on whether
+ * the former wants it.
+ *
+ * Additionally, this hook allows the Xen event channel GSI to
+ * work around QEMU's lack of support for shared level interrupts,
+ * by keeping track of the externally driven state of the pin and
+ * implementing a logical OR with the state of the evtchn GSI.
+ */
+ if (xen_mode == XEN_EMULATE) {
+ bypass_ioapic = xen_evtchn_set_gsi(n, &level);
+ }
+#endif
+
switch (n) {
case 0 ... ISA_NUM_IRQS - 1:
if (s->i8259_irq[n]) {
@@ -460,18 +476,9 @@ void gsi_handler(void *opaque, int n, int level)
}
/* fall through */
case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
-#ifdef CONFIG_XEN_EMU
- /*
- * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
- * routing actually works properly under Xen). And then to
- * *either* the PIRQ handling or the I/OAPIC depending on
- * whether the former wants it.
- */
- if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
- break;
+ if (!bypass_ioapic) {
+ qemu_set_irq(s->ioapic_irq[n], level);
}
-#endif
- qemu_set_irq(s->ioapic_irq[n], level);
break;
case IO_APIC_SECONDARY_IRQBASE
... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1:
@@ -586,7 +593,7 @@ static bool load_elfboot(const char *kernel_filename,
uint64_t elf_low, elf_high;
int kernel_size;
- if (ldl_p(header) != 0x464c457f) {
+ if (ldl_le_p(header) != 0x464c457f) {
return false; /* no elfboot */
}
@@ -602,8 +609,8 @@ static bool load_elfboot(const char *kernel_filename,
uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
NULL, &elf_note_type, &elf_entry,
- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE,
- 0, 0);
+ &elf_low, &elf_high, NULL,
+ ELFDATA2LSB, I386_ELF_MACHINE, 0, 0);
if (kernel_size < 0) {
error_report("Error while loading elf kernel");
@@ -665,9 +672,12 @@ void x86_load_linux(X86MachineState *x86ms,
exit(1);
}
- /* kernel protocol version */
- if (ldl_p(header + 0x202) == 0x53726448) {
- protocol = lduw_p(header + 0x206);
+ /*
+ * kernel protocol version.
+ * Please see https://www.kernel.org/doc/Documentation/x86/boot.txt
+ */
+ if (ldl_le_p(header + 0x202) == 0x53726448) /* Magic signature "HdrS" */ {
+ protocol = lduw_le_p(header + 0x206);
} else {
/*
* This could be a multiboot kernel. If it is, let's stop treating it
@@ -694,9 +704,11 @@ void x86_load_linux(X86MachineState *x86ms,
strlen(kernel_cmdline) + 1);
fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+ setup = g_memdup2(header, sizeof(header));
+
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
- header, sizeof(header));
+ setup, sizeof(header));
/* load initrd */
if (initrd_filename) {
@@ -759,7 +771,7 @@ void x86_load_linux(X86MachineState *x86ms,
/* highest address for loading the initrd */
if (protocol >= 0x20c &&
- lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
+ lduw_le_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
/*
* Linux has supported initrd up to 4 GB for a very long time (2007,
* long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
@@ -778,7 +790,7 @@ void x86_load_linux(X86MachineState *x86ms,
*/
initrd_max = UINT32_MAX;
} else if (protocol >= 0x203) {
- initrd_max = ldl_p(header + 0x22c);
+ initrd_max = ldl_le_p(header + 0x22c);
} else {
initrd_max = 0x37ffffff;
}
@@ -794,10 +806,10 @@ void x86_load_linux(X86MachineState *x86ms,
sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
if (protocol >= 0x202) {
- stl_p(header + 0x228, cmdline_addr);
+ stl_le_p(header + 0x228, cmdline_addr);
} else {
- stw_p(header + 0x20, 0xA33F);
- stw_p(header + 0x22, cmdline_addr - real_addr);
+ stw_le_p(header + 0x20, 0xA33F);
+ stw_le_p(header + 0x22, cmdline_addr - real_addr);
}
/* handle vga= parameter */
@@ -821,7 +833,7 @@ void x86_load_linux(X86MachineState *x86ms,
exit(1);
}
}
- stw_p(header + 0x1fa, video_mode);
+ stw_le_p(header + 0x1fa, video_mode);
}
/* loader type */
@@ -836,7 +848,7 @@ void x86_load_linux(X86MachineState *x86ms,
/* heap */
if (protocol >= 0x201) {
header[0x211] |= 0x80; /* CAN_USE_HEAP */
- stw_p(header + 0x224, cmdline_addr - real_addr - 0x200);
+ stw_le_p(header + 0x224, cmdline_addr - real_addr - 0x200);
}
/* load initrd */
@@ -876,8 +888,8 @@ void x86_load_linux(X86MachineState *x86ms,
sev_load_ctx.initrd_data = initrd_data;
sev_load_ctx.initrd_size = initrd_size;
- stl_p(header + 0x218, initrd_addr);
- stl_p(header + 0x21c, initrd_size);
+ stl_le_p(header + 0x218, initrd_addr);
+ stl_le_p(header + 0x21c, initrd_size);
}
/* load kernel and setup */
@@ -890,7 +902,6 @@ void x86_load_linux(X86MachineState *x86ms,
fprintf(stderr, "qemu: invalid kernel header\n");
exit(1);
}
- kernel_size -= setup_size;
setup = g_malloc(setup_size);
kernel = g_malloc(kernel_size);
@@ -899,6 +910,7 @@ void x86_load_linux(X86MachineState *x86ms,
fprintf(stderr, "fread() failed\n");
exit(1);
}
+ fseek(f, 0, SEEK_SET);
if (fread(kernel, 1, kernel_size, f) != kernel_size) {
fprintf(stderr, "fread() failed\n");
exit(1);
@@ -923,7 +935,7 @@ void x86_load_linux(X86MachineState *x86ms,
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
kernel = g_realloc(kernel, kernel_size);
- stq_p(header + 0x250, prot_addr + setup_data_offset);
+ stq_le_p(header + 0x250, prot_addr + setup_data_offset);
setup_data = (struct setup_data *)(kernel + setup_data_offset);
setup_data->next = 0;
@@ -940,15 +952,16 @@ void x86_load_linux(X86MachineState *x86ms,
* kernel on the other side of the fw_cfg interface matches the hash of the
* file the user passed in.
*/
- if (!sev_enabled()) {
+ if (!sev_enabled() && protocol > 0) {
memcpy(setup, header, MIN(sizeof(header), setup_size));
}
fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
- sev_load_ctx.kernel_data = (char *)kernel;
- sev_load_ctx.kernel_size = kernel_size;
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size - setup_size);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA,
+ kernel + setup_size, kernel_size - setup_size);
+ sev_load_ctx.kernel_data = (char *)kernel + setup_size;
+ sev_load_ctx.kernel_size = kernel_size - setup_size;
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
@@ -956,6 +969,25 @@ void x86_load_linux(X86MachineState *x86ms,
sev_load_ctx.setup_data = (char *)setup;
sev_load_ctx.setup_size = setup_size;
+ /* kernel without setup header patches */
+ fw_cfg_add_file(fw_cfg, "etc/boot/kernel", kernel, kernel_size);
+
+ if (machine->shim_filename) {
+ GMappedFile *mapped_file;
+ GError *gerr = NULL;
+
+ mapped_file = g_mapped_file_new(machine->shim_filename, false, &gerr);
+ if (!mapped_file) {
+ fprintf(stderr, "qemu: error reading shim %s: %s\n",
+ machine->shim_filename, gerr->message);
+ exit(1);
+ }
+
+ fw_cfg_add_file(fw_cfg, "etc/boot/shim",
+ g_mapped_file_get_contents(mapped_file),
+ g_mapped_file_get_length(mapped_file));
+ }
+
if (sev_enabled()) {
sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal);
}
@@ -1004,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c
index ab29205..c876e67 100644
--- a/hw/i386/x86-cpu.c
+++ b/hw/i386/x86-cpu.c
@@ -21,15 +21,15 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
-#include "sysemu/whpx.h"
-#include "sysemu/cpu-timers.h"
+#include "system/whpx.h"
+#include "system/cpu-timers.h"
#include "trace.h"
#include "hw/i386/x86.h"
#include "target/i386/cpu.h"
#include "hw/intc/i8259.h"
#include "hw/irq.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
/* TSC handling */
uint64_t cpu_get_tsc(CPUX86State *env)
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 60af896..d34a684 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -25,7 +25,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
void x86_iommu_iec_register_notifier(X86IOMMUState *iommu,
iec_notify_fn fn, void *data)
@@ -125,15 +125,14 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
}
}
-static Property x86_iommu_properties[] = {
+static const Property x86_iommu_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("intremap", X86IOMMUState,
intr_supported, ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
- DEFINE_PROP_END_OF_LIST(),
};
-static void x86_iommu_class_init(ObjectClass *klass, void *data)
+static void x86_iommu_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = x86_iommu_realize;
@@ -147,7 +146,7 @@ bool x86_iommu_ir_supported(X86IOMMUState *s)
static const TypeInfo x86_iommu_info = {
.name = TYPE_X86_IOMMU_DEVICE,
- .parent = TYPE_SYS_BUS_DEVICE,
+ .parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
.instance_size = sizeof(X86IOMMUState),
.class_init = x86_iommu_class_init,
.class_size = sizeof(X86IOMMUClass),
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a4aa8e0..f80533d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -27,8 +27,8 @@
#include "qapi/qapi-visit-common.h"
#include "qapi/qapi-visit-machine.h"
#include "qapi/visitor.h"
-#include "sysemu/qtest.h"
-#include "sysemu/numa.h"
+#include "system/qtest.h"
+#include "system/numa.h"
#include "trace.h"
#include "hw/acpi/aml-build.h"
@@ -242,7 +242,7 @@ static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name,
static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
- X86MachineState *x86ms = X86_MACHINE(obj);;
+ X86MachineState *x86ms = X86_MACHINE(obj);
visit_type_OnOffAuto(v, name, &x86ms->pit, errp);
}
@@ -372,7 +372,7 @@ static void x86_machine_initfn(Object *obj)
x86ms->above_4g_mem_start = 4 * GiB;
}
-static void x86_machine_class_init(ObjectClass *oc, void *data)
+static void x86_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
@@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->kvm_type = x86_kvm_type;
- x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
@@ -450,7 +449,7 @@ static const TypeInfo x86_machine_info = {
.instance_init = x86_machine_initfn,
.class_size = sizeof(X86MachineClass),
.class_init = x86_machine_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_NMI },
{ }
},
diff --git a/hw/i386/xen/meson.build b/hw/i386/xen/meson.build
index 3f0df8b..c73c62b 100644
--- a/hw/i386/xen/meson.build
+++ b/hw/i386/xen/meson.build
@@ -4,6 +4,7 @@ i386_ss.add(when: 'CONFIG_XEN', if_true: files(
))
i386_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
'xen-hvm.c',
+ 'xen-pvh.c',
))
i386_ss.add(when: 'CONFIG_XEN_BUS', if_true: files(
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 006d219..ceb2242 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -10,10 +10,12 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qemu/error-report.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-migration.h"
#include "trace.h"
+#include "hw/hw.h"
#include "hw/i386/pc.h"
#include "hw/irq.h"
#include "hw/i386/apic-msidef.h"
@@ -24,6 +26,10 @@
#include "hw/xen/arch_hvm.h"
#include <xen/hvm/e820.h>
#include "exec/target_page.h"
+#include "target/i386/cpu.h"
+#include "system/runstate.h"
+#include "system/xen-mapcache.h"
+#include "system/xen.h"
static MemoryRegion ram_640k, ram_lo, ram_hi;
static MemoryRegion *framebuffer;
@@ -584,6 +590,26 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data)
xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
}
+static bool xen_check_stubdomain(struct xs_handle *xsh)
+{
+ char *dm_path = g_strdup_printf(
+ "/local/domain/%d/image/device-model-domid", xen_domid);
+ char *val;
+ int32_t dm_domid;
+ bool is_stubdom = false;
+
+ val = xs_read(xsh, 0, dm_path, NULL);
+ if (val) {
+ if (sscanf(val, "%d", &dm_domid) == 1) {
+ is_stubdom = dm_domid != 0;
+ }
+ free(val);
+ }
+
+ g_free(dm_path);
+ return is_stubdom;
+}
+
void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
{
MachineState *ms = MACHINE(pcms);
@@ -594,7 +620,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
state = g_new0(XenIOState, 1);
- xen_register_ioreq(state, max_cpus, &xen_memory_listener);
+ xen_register_ioreq(state, max_cpus,
+ HVM_IOREQSRV_BUFIOREQ_ATOMIC,
+ &xen_memory_listener);
+
+ xen_is_stubdomain = xen_check_stubdomain(state->xenstore);
QLIST_INIT(&xen_physmap);
xen_read_physmap(state);
@@ -728,6 +758,4 @@ void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
default:
hw_error("Invalid ioreq type 0x%x\n", req->type);
}
-
- return;
}
diff --git a/hw/i386/xen/xen-pvh.c b/hw/i386/xen/xen-pvh.c
new file mode 100644
index 0000000..067f73e
--- /dev/null
+++ b/hw/i386/xen/xen-pvh.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU Xen PVH x86 Machine
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ * Written by Edgar E. Iglesias <edgar.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "hw/boards.h"
+#include "system/system.h"
+#include "hw/xen/arch_hvm.h"
+#include <xen/hvm/hvm_info_table.h>
+#include "hw/xen/xen-pvh-common.h"
+#include "target/i386/cpu.h"
+
+#define TYPE_XEN_PVH_X86 MACHINE_TYPE_NAME("xenpvh")
+OBJECT_DECLARE_SIMPLE_TYPE(XenPVHx86State, XEN_PVH_X86)
+
+struct XenPVHx86State {
+ /*< private >*/
+ XenPVHMachineState parent;
+
+ DeviceState **cpu;
+};
+
+static DeviceState *xen_pvh_cpu_new(MachineState *ms,
+ int64_t apic_id)
+{
+ Object *cpu = object_new(ms->cpu_type);
+
+ object_property_add_child(OBJECT(ms), "cpu[*]", cpu);
+ object_property_set_uint(cpu, "apic-id", apic_id, &error_fatal);
+ qdev_realize(DEVICE(cpu), NULL, &error_fatal);
+ object_unref(cpu);
+
+ return DEVICE(cpu);
+}
+
+static void xen_pvh_init(MachineState *ms)
+{
+ XenPVHx86State *xp = XEN_PVH_X86(ms);
+ int i;
+
+ /* Create dummy cores. This will indirectly create the APIC MSI window. */
+ xp->cpu = g_malloc(sizeof xp->cpu[0] * ms->smp.max_cpus);
+ for (i = 0; i < ms->smp.max_cpus; i++) {
+ xp->cpu[i] = xen_pvh_cpu_new(ms, i);
+ }
+}
+
+static void xen_pvh_instance_init(Object *obj)
+{
+ XenPVHMachineState *s = XEN_PVH_MACHINE(obj);
+
+ /* Default values. */
+ s->cfg.ram_low = (MemMapEntry) { 0x0, 0x80000000U };
+ s->cfg.ram_high = (MemMapEntry) { 0xC000000000ULL, 0x4000000000ULL };
+ s->cfg.pci_intx_irq_base = 16;
+}
+
+/*
+ * Deliver INTX interrupts to Xen guest.
+ */
+static void xen_pvh_set_pci_intx_irq(void *opaque, int irq, int level)
+{
+ /*
+ * Since QEMU emulates all of the swizziling
+ * We don't want Xen to do any additional swizzling in
+ * xen_set_pci_intx_level() so we always set device to 0.
+ */
+ if (xen_set_pci_intx_level(xen_domid, 0, 0, 0, irq, level)) {
+ error_report("xendevicemodel_set_pci_intx_level failed");
+ }
+}
+
+static void xen_pvh_machine_class_init(ObjectClass *oc, const void *data)
+{
+ XenPVHMachineClass *xpc = XEN_PVH_MACHINE_CLASS(oc);
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->desc = "Xen PVH x86 machine";
+ mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
+
+ /* mc->max_cpus holds the MAX value allowed in the -smp cmd-line opts. */
+ mc->max_cpus = HVM_MAX_VCPUS;
+
+ /* We have an implementation specific init to create CPU objects. */
+ xpc->init = xen_pvh_init;
+
+ /* Enable buffered IOREQs. */
+ xpc->handle_bufioreq = HVM_IOREQSRV_BUFIOREQ_ATOMIC;
+
+ /*
+ * PCI INTX routing.
+ *
+ * We describe the mapping between the 4 INTX interrupt and GSIs
+ * using xen_set_pci_link_route(). xen_pvh_set_pci_intx_irq is
+ * used to deliver the interrupt.
+ */
+ xpc->set_pci_intx_irq = xen_pvh_set_pci_intx_irq;
+ xpc->set_pci_link_route = xen_set_pci_link_route;
+
+ /* List of supported features known to work on PVH x86. */
+ xpc->has_pci = true;
+
+ xen_pvh_class_setup_common_props(xpc);
+}
+
+static const TypeInfo xen_pvh_x86_machine_type = {
+ .name = TYPE_XEN_PVH_X86,
+ .parent = TYPE_XEN_PVH_MACHINE,
+ .class_init = xen_pvh_machine_class_init,
+ .instance_init = xen_pvh_instance_init,
+ .instance_size = sizeof(XenPVHx86State),
+};
+
+static void xen_pvh_machine_register_types(void)
+{
+ type_register_static(&xen_pvh_x86_machine_type);
+}
+
+type_init(xen_pvh_machine_register_types)
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 101e16a..f30398f 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -36,7 +36,7 @@ static void xen_apic_mem_write(void *opaque, hwaddr addr,
static const MemoryRegionOps xen_apic_io_ops = {
.read = xen_apic_mem_read,
.write = xen_apic_mem_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
static void xen_apic_realize(DeviceState *dev, Error **errp)
@@ -76,7 +76,7 @@ static void xen_send_msi(MSIMessage *msi)
xen_hvm_inject_msi(msi->address, msi->data);
}
-static void xen_apic_class_init(ObjectClass *klass, void *data)
+static void xen_apic_class_init(ObjectClass *klass, const void *data)
{
APICCommonClass *k = APIC_COMMON_CLASS(klass);
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 708488a..c8b852b 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -30,8 +30,8 @@
#include "migration/vmstate.h"
#include "net/net.h"
#include "trace.h"
-#include "sysemu/xen.h"
-#include "sysemu/block-backend.h"
+#include "system/xen.h"
+#include "system/block-backend.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qom/object.h"
@@ -514,7 +514,7 @@ static void platform_mmio_write(void *opaque, hwaddr addr,
static const MemoryRegionOps platform_mmio_handler = {
.read = &platform_mmio_read,
.write = &platform_mmio_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
static void platform_mmio_setup(PCIXenPlatformState *d)
@@ -581,7 +581,7 @@ static void platform_reset(DeviceState *dev)
platform_fixed_ioport_reset(s);
}
-static void xen_platform_class_init(ObjectClass *klass, void *data)
+static void xen_platform_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -595,7 +595,7 @@ static void xen_platform_class_init(ObjectClass *klass, void *data)
k->revision = 1;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->desc = "XEN platform pci device";
- dc->reset = platform_reset;
+ device_class_set_legacy_reset(dc, platform_reset);
dc->vmsd = &vmstate_xen_platform;
}
@@ -604,7 +604,7 @@ static const TypeInfo xen_platform_info = {
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PCIXenPlatformState),
.class_init = xen_platform_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
diff --git a/hw/i386/xen/xen_pvdevice.c b/hw/i386/xen/xen_pvdevice.c
index ed62153..87a974a 100644
--- a/hw/i386/xen/xen_pvdevice.c
+++ b/hw/i386/xen/xen_pvdevice.c
@@ -115,15 +115,14 @@ static void xen_pv_realize(PCIDevice *pci_dev, Error **errp)
&d->mmio);
}
-static Property xen_pv_props[] = {
+static const Property xen_pv_props[] = {
DEFINE_PROP_UINT16("vendor-id", XenPVDevice, vendor_id, PCI_VENDOR_ID_XEN),
DEFINE_PROP_UINT16("device-id", XenPVDevice, device_id, 0xffff),
DEFINE_PROP_UINT8("revision", XenPVDevice, revision, 0x01),
DEFINE_PROP_UINT32("size", XenPVDevice, size, 0x400000),
- DEFINE_PROP_END_OF_LIST()
};
-static void xen_pv_class_init(ObjectClass *klass, void *data)
+static void xen_pv_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -140,7 +139,7 @@ static const TypeInfo xen_pv_type_info = {
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(XenPVDevice),
.class_init = xen_pv_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},