aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/arm/meson.build4
-rw-r--r--hw/core/loader.c14
-rw-r--r--hw/core/machine.c18
-rw-r--r--hw/core/meson.build4
-rw-r--r--hw/core/qdev-properties-system.c1
-rw-r--r--hw/i386/Kconfig6
-rw-r--r--hw/i386/amd_iommu.c20
-rw-r--r--hw/i386/kvm/apic.c5
-rw-r--r--hw/i386/meson.build1
-rw-r--r--hw/i386/pc.c71
-rw-r--r--hw/i386/pc_piix.c31
-rw-r--r--hw/i386/pc_q35.c26
-rw-r--r--hw/i386/pc_sysfw.c7
-rw-r--r--hw/i386/tdvf-hob.c130
-rw-r--r--hw/i386/tdvf-hob.h26
-rw-r--r--hw/i386/tdvf.c189
-rw-r--r--hw/i386/x86-common.c6
-rw-r--r--hw/i386/x86.c1
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c15
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c42
-rw-r--r--hw/microblaze/xlnx-zynqmp-pmu.c7
-rw-r--r--hw/net/e1000.c95
-rw-r--r--hw/net/rocker/rocker.h14
-rw-r--r--hw/net/rocker/rocker_hw.h20
-rw-r--r--hw/net/rocker/rocker_of_dpa.c40
-rw-r--r--hw/net/vmxnet3.c44
-rw-r--r--hw/nvram/fw_cfg.c110
-rw-r--r--hw/pci/pci.c206
-rw-r--r--hw/pci/pcie.c78
-rw-r--r--hw/scsi/vmw_pvscsi.c67
-rw-r--r--hw/timer/hpet.c166
-rw-r--r--hw/vfio/container-base.c4
-rw-r--r--hw/vfio/container.c5
-rw-r--r--hw/vfio/cpr.c2
-rw-r--r--hw/vfio/igd.c22
-rw-r--r--hw/vfio/iommufd.c45
-rw-r--r--hw/vfio/listener.c74
-rw-r--r--hw/vfio/pci.c89
-rw-r--r--hw/vfio/vfio-cpr.h15
-rw-r--r--hw/virtio/vhost-vdpa.c116
-rw-r--r--hw/virtio/virtio-pci.c18
-rw-r--r--hw/virtio/virtio.c11
42 files changed, 1151 insertions, 714 deletions
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 5098795..d90be8f 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
-arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')])
+arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')])
arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
@@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
-arm_common_ss.add(fdt, files('boot.c'))
+arm_common_ss.add(files('boot.c'))
hw_arch += {'arm': arm_ss}
hw_common_arch += {'arm': arm_common_ss}
diff --git a/hw/core/loader.c b/hw/core/loader.c
index b792a54..e7056ba 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f)
fw_cfg = f;
}
-void rom_set_order_override(int order)
-{
- if (!fw_cfg)
- return;
- fw_cfg_set_order_override(fw_cfg, order);
-}
-
-void rom_reset_order_override(void)
-{
- if (!fw_cfg)
- return;
- fw_cfg_reset_order_override(fw_cfg);
-}
-
void rom_transaction_begin(void)
{
Rom *rom;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c3f3a50..e869821 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -285,24 +285,6 @@ GlobalProperty hw_compat_2_6[] = {
};
const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6);
-GlobalProperty hw_compat_2_5[] = {
- { "isa-fdc", "fallback", "144" },
- { "pvscsi", "x-old-pci-configuration", "on" },
- { "pvscsi", "x-disable-pcie", "on" },
- { "vmxnet3", "x-old-msi-offsets", "on" },
- { "vmxnet3", "x-disable-pcie", "on" },
-};
-const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5);
-
-GlobalProperty hw_compat_2_4[] = {
- { "e1000", "extra_mac_registers", "off" },
- { "virtio-pci", "x-disable-pcie", "on" },
- { "virtio-pci", "migrate-extra", "off" },
- { "fw_cfg_mem", "dma_enabled", "off" },
- { "fw_cfg_io", "dma_enabled", "off" }
-};
-const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4);
-
MachineState *current_machine;
static char *machine_get_kernel(Object *obj, Error **errp)
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 547de65..b5a545a 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls])
-libsystem_ss.add(files(
+system_ss.add(files(
'cpu-system.c',
'fw-path-provider.c',
'gpio.c',
@@ -46,7 +46,7 @@ libsystem_ss.add(files(
'vm-change-state-handler.c',
'clock-vmstate.c',
))
-libuser_ss.add(files(
+user_ss.add(files(
'cpu-user.c',
'qdev-user.c',
))
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 8e11e63..24e145d 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
if (ctx != bdrv_get_aio_context(bs)) {
error_setg(errp, "Different aio context is not supported for new "
"node");
+ return;
}
blk_replace_bs(blk, bs, errp);
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d34ce07..eb65bda 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
bool
depends on KVM
+config TDX
+ bool
+ select X86_FW_OVMF
+ depends on KVM
+
config PC
bool
imply APPLESMC
@@ -26,6 +31,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 0775c8f..963aa24 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1426,7 +1426,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVIState *s = opaque;
AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
int bus_num = pci_bus_num(bus);
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
iommu_as = s->address_spaces[bus_num];
@@ -1486,15 +1485,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVI_INT_ADDR_FIRST,
&amdvi_dev_as->iommu_ir, 1);
- if (!x86_iommu->pt_supported) {
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- true);
- } else {
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- false);
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true);
- }
+ memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
+ memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
}
return &iommu_as[devfn]->as;
}
@@ -1723,6 +1715,14 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
exit(EXIT_FAILURE);
}
+ if (s->xtsup) {
+ if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
+ error_report("AMD IOMMU xtsup=on requires x2APIC support on "
+ "the KVM side");
+ exit(EXIT_FAILURE);
+ }
+ }
+
pci_setup_iommu(bus, &amdvi_iommu_ops, s);
amdvi_init(s);
}
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 39035db..1be9bfe 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -17,6 +17,7 @@
#include "system/hw_accel.h"
#include "system/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 10bdfde..7896f34 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7065615..b211633 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -44,6 +44,7 @@
#include "system/xen.h"
#include "system/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
#include "qobject/qlist.h"
#include "qemu/error-report.h"
@@ -259,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = {
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
-GlobalProperty pc_compat_2_5[] = {};
-const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
-
-GlobalProperty pc_compat_2_4[] = {
- PC_CPU_MODEL_IDS("2.4.0")
- { "Haswell-" TYPE_X86_CPU, "abm", "off" },
- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
- { TYPE_X86_CPU, "check", "off" },
- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
-};
-const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
-
/*
* @PC_FW_DATA:
* Size of the chunk of memory at the top of RAM for the BIOS ACPI tables
@@ -976,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms,
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -999,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms,
if (machine->device_memory) {
uint64_t *val = g_malloc(sizeof(*val));
- uint64_t res_mem_end = machine->device_memory->base;
-
- if (!pcmc->broken_reserved_end) {
- res_mem_end += memory_region_size(&machine->device_memory->mr);
- }
+ uint64_t res_mem_end;
if (pcms->cxl_devices_state.is_enabled) {
res_mem_end = cxl_resv_end;
+ } else {
+ res_mem_end = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
@@ -1044,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void)
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
- if (!pcmc->broken_reserved_end) {
- hole64_start += size;
- }
+ hole64_start += size;
} else {
hole64_start = pc_above_4g_end(pcms);
}
@@ -1058,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
@@ -1066,7 +1043,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
- rom_reset_order_override();
+
return dev;
}
@@ -1256,8 +1233,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
-
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
@@ -1266,8 +1241,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
if (pci_bus) {
pci_init_nic_devices(pci_bus, mc->default_nic);
}
-
- rom_reset_order_override();
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0dce512..ea7572e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -285,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type)
pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0");
pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1");
} else {
+ uint32_t irq;
+
isa_bus = isa_bus_new(NULL, system_memory, system_io,
&error_abort);
isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
@@ -292,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type)
x86ms->rtc = isa_new(TYPE_MC146818_RTC);
qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000);
isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal);
+ irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq",
+ &error_fatal);
+ isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq);
i8257_dma_init(OBJECT(machine), isa_bus, 0);
pcms->hpet_enabled = false;
@@ -778,32 +783,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m)
DEFINE_I440FX_MACHINE(2, 6);
-static void pc_i440fx_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 5);
-
-static void pc_i440fx_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 4);
-
#ifdef CONFIG_ISAPC
static void isapc_machine_options(MachineClass *m)
{
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c538b3d..33211b1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -672,29 +672,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m)
}
DEFINE_Q35_MACHINE(2, 6);
-
-static void pc_q35_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_q35_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_Q35_MACHINE(2, 5);
-
-static void pc_q35_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_q35_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_Q35_MACHINE(2, 4);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 1eeb58a..821396c 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
#include "hw/block/flash.h"
#include "system/kvm.h"
#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000..782b3d1
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000..4fc6a37
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000..645d9d1
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "system/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ fw->mem_ptr = flash_ptr;
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 1b0671c..b1b5f11 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index e2d0409..f80533d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data)
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->kvm_type = x86_kvm_type;
- x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index bea6b68..6e923c4 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine)
MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
/* init CPUs */
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
@@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine)
dev = qdev_new("xlnx.xps-intc");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine)
/* 2 timers at irq 2 @ 100 Mhz. */
dev = qdev_new("xlnx.xps-timer");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "one-timer-only", 0);
qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine)
SSIBus *spi;
dev = qdev_new("xlnx.xps-spi");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
busdev = SYS_BUS_DEVICE(dev);
sysbus_realize_and_unref(busdev, &error_fatal);
@@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine)
static void petalogix_ml605_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
- }
+ mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
mc->init = petalogix_ml605_init;
}
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 032f6f7..e8d0ddf 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -58,9 +58,20 @@
#define TYPE_PETALOGIX_S3ADSP1800_MACHINE \
MACHINE_TYPE_NAME("petalogix-s3adsp1800")
+struct S3Adsp1800MachineState {
+ MachineState parent_class;
+
+ EndianMode endianness;
+};
+
+OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass,
+ PETALOGIX_S3ADSP1800_MACHINE)
+
+
static void
petalogix_s3adsp1800_init(MachineState *machine)
{
+ S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine);
ram_addr_t ram_size = machine->ram_size;
DeviceState *dev;
MicroBlazeCPU *cpu;
@@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine)
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
MemoryRegion *sysmem = get_system_memory();
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
+ EndianMode endianness = psms->endianness;
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort);
object_property_set_bool(OBJECT(cpu), "little-endian",
- !TARGET_BIG_ENDIAN, &error_abort);
+ endianness == ENDIAN_MODE_LITTLE, &error_abort);
qdev_realize(DEVICE(cpu), NULL, &error_abort);
/* Attach emulated BRAM through the LMB. */
@@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine)
create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000);
- microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size,
- machine->initrd_filename,
+ microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base,
+ ram_size, machine->initrd_filename,
BINARY_DEVICE_TREE_FILE,
NULL);
}
+static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ return ms->endianness;
+}
+
+static void machine_set_endianness(Object *obj, int endianness, Error **errp)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ ms->endianness = endianness;
+}
+
static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc,
const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ ObjectProperty *prop;
mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800";
mc->init = petalogix_s3adsp1800_init;
mc->is_default = true;
+
+ prop = object_class_property_add_enum(oc, "endianness", "EndianMode",
+ &EndianMode_lookup,
+ machine_get_endianness,
+ machine_set_endianness);
+ object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little");
+ object_class_property_set_description(oc, "endianness",
+ "Defines whether the machine runs in big or little endian mode");
}
static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
@@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
.name = TYPE_PETALOGIX_S3ADSP1800_MACHINE,
.parent = TYPE_MACHINE,
.class_init = petalogix_s3adsp1800_machine_class_init,
+ .instance_size = sizeof(S3Adsp1800MachineState),
},
};
diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c
index ed40b5f..e909802 100644
--- a/hw/microblaze/xlnx-zynqmp-pmu.c
+++ b/hw/microblaze/xlnx-zynqmp-pmu.c
@@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "Xilinx ZynqMP PMU machine (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
- }
+ mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
mc->init = xlnx_zynqmp_pmu_init;
}
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index cba4999..a80a7b0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,10 +127,8 @@ struct E1000State_st {
QEMUTimer *flush_queue_timer;
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
-#define E1000_FLAG_MAC_BIT 2
#define E1000_FLAG_TSO_BIT 3
#define E1000_FLAG_VET_BIT 4
-#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
@@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
-#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
* f - flag bits (up to 6 possible flags)
* n - flag needed
- * p - partially implenented */
+ * p - partially implemented */
static const uint8_t mac_reg_access[0x8000] = {
- [IPAV] = markflag(MAC), [WUC] = markflag(MAC),
- [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC),
- [FFVT] = markflag(MAC), [WUPM] = markflag(MAC),
- [ECOL] = markflag(MAC), [MCC] = markflag(MAC),
- [DC] = markflag(MAC), [TNCRS] = markflag(MAC),
- [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC),
- [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC),
- [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC),
- [WUS] = markflag(MAC), [AIT] = markflag(MAC),
- [FFLT] = markflag(MAC), [FFMT] = markflag(MAC),
- [SCC] = markflag(MAC), [FCRUC] = markflag(MAC),
- [LATECOL] = markflag(MAC), [COLC] = markflag(MAC),
- [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC),
- [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC),
- [RJC] = markflag(MAC), [RNBC] = markflag(MAC),
- [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC),
- [RUC] = markflag(MAC), [ROC] = markflag(MAC),
- [GORCL] = markflag(MAC), [GORCH] = markflag(MAC),
- [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC),
- [BPRC] = markflag(MAC), [MPRC] = markflag(MAC),
- [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC),
- [PRC127] = markflag(MAC), [PRC255] = markflag(MAC),
- [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC),
- [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC),
- [PTC127] = markflag(MAC), [PTC255] = markflag(MAC),
- [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC),
- [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC),
- [BPTC] = markflag(MAC),
-
- [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL,
+ [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED,
+ [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED,
+ [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED,
+ [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED,
+ [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED,
+ [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED,
+ [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED,
+ [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED,
+ [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED,
+ [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED,
+ [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED,
+ [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED,
+ [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [BPTC] = MAC_ACCESS_FLAG_NEEDED,
+
+ [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
};
static void
@@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id)
return 0;
}
-static bool e1000_full_mac_needed(void *opaque)
-{
- E1000State *s = opaque;
-
- return chkflag(MAC);
-}
-
static bool e1000_tso_state_needed(void *opaque)
{
E1000State *s = opaque;
@@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = {
.name = "e1000/full_mac_state",
.version_id = 1,
.minimum_version_id = 1,
- .needed = e1000_full_mac_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
VMSTATE_END_OF_LIST()
@@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
static const Property e1000_properties[] = {
DEFINE_NIC_PROPERTIES(E1000State, conf),
- DEFINE_PROP_BIT("extra_mac_registers", E1000State,
- compat_flags, E1000_FLAG_MAC_BIT, true),
DEFINE_PROP_BIT("migrate_tso_props", E1000State,
compat_flags, E1000_FLAG_TSO_BIT, true),
DEFINE_PROP_BIT("init-vet", E1000State,
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index 6e0962f..ae06c1c 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...)
}
#endif
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
-#define __be16 uint16_t
-#define __be32 uint32_t
-#define __be64 uint64_t
-
-static inline bool ipv4_addr_is_multicast(__be32 addr)
+static inline bool ipv4_addr_is_multicast(uint32_t addr)
{
return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
}
@@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr)
typedef struct ipv6_addr {
union {
uint8_t addr8[16];
- __be16 addr16[8];
- __be32 addr32[4];
+ uint16_t addr16[8];
+ uint32_t addr32[4];
};
} Ipv6Addr;
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index 1786323..7ec6bfb 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -9,10 +9,6 @@
#ifndef ROCKER_HW_H
#define ROCKER_HW_H
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
/*
* Return codes
*/
@@ -124,12 +120,12 @@ enum {
*/
typedef struct rocker_desc {
- __le64 buf_addr;
+ uint64_t buf_addr;
uint64_t cookie;
- __le16 buf_size;
- __le16 tlv_size;
- __le16 rsvd[5]; /* pad to 32 bytes */
- __le16 comp_err;
+ uint16_t buf_size;
+ uint16_t tlv_size;
+ uint16_t rsvd[5]; /* pad to 32 bytes */
+ uint16_t comp_err;
} __attribute__((packed, aligned(8))) RockerDesc;
/*
@@ -137,9 +133,9 @@ typedef struct rocker_desc {
*/
typedef struct rocker_tlv {
- __le32 type;
- __le16 len;
- __le16 rsvd;
+ uint32_t type;
+ uint16_t len;
+ uint16_t rsvd;
} __attribute__((packed, aligned(8))) RockerTlv;
/* cmd msg */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index 3378f63..4aed178 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key {
uint32_t tunnel_id; /* overlay tunnel id */
uint32_t tbl_id; /* table id */
struct {
- __be16 vlan_id; /* 0 if no VLAN */
+ uint16_t vlan_id; /* 0 if no VLAN */
MACAddr src; /* ethernet source address */
MACAddr dst; /* ethernet destination address */
- __be16 type; /* ethernet frame type */
+ uint16_t type; /* ethernet frame type */
} eth;
struct {
uint8_t proto; /* IP protocol or ARP opcode */
@@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key {
union {
struct {
struct {
- __be32 src; /* IP source address */
- __be32 dst; /* IP destination address */
+ uint32_t src; /* IP source address */
+ uint32_t dst; /* IP destination address */
} addr;
union {
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
MACAddr sha; /* ARP source hardware address */
@@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key {
Ipv6Addr src; /* IPv6 source address */
Ipv6Addr dst; /* IPv6 destination address */
} addr;
- __be32 label; /* IPv6 flow label */
+ uint32_t label; /* IPv6 flow label */
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
Ipv6Addr target; /* ND target address */
@@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action {
struct {
uint32_t group_id;
uint32_t tun_log_lport;
- __be16 vlan_id;
+ uint16_t vlan_id;
} write;
struct {
- __be16 new_vlan_id;
+ uint16_t new_vlan_id;
uint32_t out_pport;
uint8_t copy_to_cpu;
- __be16 vlan_id;
+ uint16_t vlan_id;
} apply;
} OfDpaFlowAction;
@@ -143,7 +143,7 @@ typedef struct of_dpa_flow {
typedef struct of_dpa_flow_pkt_fields {
uint32_t tunnel_id;
struct eth_header *ethhdr;
- __be16 *h_proto;
+ uint16_t *h_proto;
struct vlan_header *vlanhdr;
struct ip_header *ipv4hdr;
struct ip6_header *ipv6hdr;
@@ -180,7 +180,7 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
} l2_rewrite;
struct {
uint16_t group_count;
@@ -190,13 +190,13 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
uint8_t ttl_check;
} l3_unicast;
};
} OfDpaGroup;
-static int of_dpa_mask2prefix(__be32 mask)
+static int of_dpa_mask2prefix(uint32_t mask)
{
int i;
int count = 32;
@@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc,
fc->iovcnt = iovcnt + 2;
}
-static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id)
+static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
uint16_t h_proto = fields->ethhdr->h_proto;
@@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc)
static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc,
uint8_t *src_mac, uint8_t *dst_mac,
- __be16 vlan_id)
+ uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 83d942a..7c0ca56 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -41,19 +41,9 @@
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
-/* Compatibility flags for migration */
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
- (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
- (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
-
#define VMXNET3_EXP_EP_OFFSET (0x48)
-#define VMXNET3_MSI_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
-#define VMXNET3_MSIX_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
+#define VMXNET3_MSI_OFFSET (0x84)
+#define VMXNET3_MSIX_OFFSET (0x9c)
#define VMXNET3_DSN_OFFSET (0x100)
#define VMXNET3_BAR0_IDX (0)
@@ -61,8 +51,7 @@
#define VMXNET3_MSIX_BAR_IDX (2)
#define VMXNET3_OFF_MSIX_TABLE (0x000)
-#define VMXNET3_OFF_MSIX_PBA(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
+#define VMXNET3_OFF_MSIX_PBA (0x1000)
/* Link speed in Mbps should be shifted by 16 */
#define VMXNET3_LINK_SPEED (1000 << 16)
@@ -2122,8 +2111,8 @@ vmxnet3_init_msix(VMXNET3State *s)
&s->msix_bar,
VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
&s->msix_bar,
- VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
- VMXNET3_MSIX_OFFSET(s), NULL);
+ VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
+ VMXNET3_MSIX_OFFSET, NULL);
if (0 > res) {
VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
@@ -2221,7 +2210,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
/* Interrupt pin A */
pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
- ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
+ ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
/* Any error other than -ENOTSUP(board's MSI support is broken)
* is a programming error. Fall back to INTx silently on -ENOTSUP */
@@ -2249,6 +2238,7 @@ static void vmxnet3_instance_init(Object *obj)
device_add_bootindex_property(obj, &s->conf.bootindex,
"bootindex", "/ethernet-phy@0",
DEVICE(obj));
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
@@ -2472,30 +2462,12 @@ static const VMStateDescription vmstate_vmxnet3 = {
static const Property vmxnet3_properties[] = {
DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
- DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
};
-static void vmxnet3_realize(DeviceState *qdev, Error **errp)
-{
- VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- VMXNET3State *s = VMXNET3(qdev);
-
- if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- vc->parent_dc_realize(qdev, errp);
-}
-
static void vmxnet3_class_init(ObjectClass *class, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
- VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
c->realize = vmxnet3_pci_realize;
c->exit = vmxnet3_pci_uninit;
@@ -2506,8 +2478,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data)
c->class_id = PCI_CLASS_NETWORK_ETHERNET;
c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
- device_class_set_parent_realize(dc, vmxnet3_realize,
- &vc->parent_dc_realize);
dc->desc = "VMWare Paravirtualized Ethernet v3";
device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
dc->vmsd = &vmstate_vmxnet3;
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 237b9f7..aa24050 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value)
g_free(old);
}
-void fw_cfg_set_order_override(FWCfgState *s, int order)
-{
- assert(s->fw_cfg_order_override == 0);
- s->fw_cfg_order_override = order;
-}
-
-void fw_cfg_reset_order_override(FWCfgState *s)
-{
- assert(s->fw_cfg_order_override != 0);
- s->fw_cfg_order_override = 0;
-}
-
-/*
- * This is the legacy order list. For legacy systems, files are in
- * the fw_cfg in the order defined below, by the "order" value. Note
- * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a
- * specific area, but there may be more than one and they occur in the
- * order that the user specifies them on the command line. Those are
- * handled in a special manner, using the order override above.
- *
- * For non-legacy, the files are sorted by filename to avoid this kind
- * of complexity in the future.
- *
- * This is only for x86, other arches don't implement versioning so
- * they won't set legacy mode.
- */
-static struct {
- const char *name;
- int order;
-} fw_cfg_order[] = {
- { "etc/boot-menu-wait", 10 },
- { "bootsplash.jpg", 11 },
- { "bootsplash.bmp", 12 },
- { "etc/boot-fail-wait", 15 },
- { "etc/smbios/smbios-tables", 20 },
- { "etc/smbios/smbios-anchor", 30 },
- { "etc/e820", 40 },
- { "etc/reserved-memory-end", 50 },
- { "genroms/kvmvapic.bin", 55 },
- { "genroms/linuxboot.bin", 60 },
- { }, /* VGA ROMs from pc_vga_init come here, 70. */
- { }, /* NIC option ROMs from pc_nic_init come here, 80. */
- { "etc/system-states", 90 },
- { }, /* User ROMs come here, 100. */
- { }, /* Device FW comes here, 110. */
- { "etc/extra-pci-roots", 120 },
- { "etc/acpi/tables", 130 },
- { "etc/table-loader", 140 },
- { "etc/tpm/log", 150 },
- { "etc/acpi/rsdp", 160 },
- { "bootorder", 170 },
- { "etc/msr_feature_control", 180 },
-
-#define FW_CFG_ORDER_OVERRIDE_LAST 200
-};
-
/*
* Any sub-page size update to these table MRs will be lost during migration,
* as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
@@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
}
}
-static int get_fw_cfg_order(FWCfgState *s, const char *name)
-{
- int i;
-
- if (s->fw_cfg_order_override > 0) {
- return s->fw_cfg_order_override;
- }
-
- for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) {
- if (fw_cfg_order[i].name == NULL) {
- continue;
- }
-
- if (strcmp(name, fw_cfg_order[i].name) == 0) {
- return fw_cfg_order[i].order;
- }
- }
-
- /* Stick unknown stuff at the end. */
- warn_report("Unknown firmware file in legacy mode: %s", name);
- return FW_CFG_ORDER_OVERRIDE_LAST;
-}
-
void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
FWCfgCallback select_cb,
FWCfgWriteCallback write_cb,
@@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
{
int i, index, count;
size_t dsize;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
int order = 0;
if (!s->files) {
@@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
count = be32_to_cpu(s->files->count);
assert(count < fw_cfg_file_slots(s));
- /* Find the insertion point. */
- if (mc->legacy_fw_cfg_order) {
- /*
- * Sort by order. For files with the same order, we keep them
- * in the sequence in which they were added.
- */
- order = get_fw_cfg_order(s, filename);
- for (index = count;
- index > 0 && order < s->entry_order[index - 1];
- index--);
- } else {
- /* Sort by file name. */
- for (index = count;
- index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
- index--);
- }
+ /* Find the insertion point, sorting by file name. */
+ for (index = count;
+ index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
+ index--)
+ ;
/*
* Move all the entries from the index point and after down one
@@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s,
static void fw_cfg_machine_reset(void *opaque)
{
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
FWCfgState *s = opaque;
void *ptr;
size_t len;
@@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque)
ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
g_free(ptr);
- if (!mc->legacy_fw_cfg_order) {
- buf = get_boot_devices_lchs_list(&len);
- ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
- g_free(ptr);
- }
+ buf = get_boot_devices_lchs_list(&len);
+ ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
+ g_free(ptr);
}
static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index f5ab510..9b4bf48 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -128,6 +128,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -135,7 +141,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -804,9 +810,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -1725,7 +1730,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1787,9 +1792,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2935,6 +2939,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2966,6 +2987,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -3100,9 +3285,8 @@ void pci_set_enabled(PCIDevice *d, bool state)
d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1b12db6..eaeb688 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev)
pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
}
}
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+ static const uint16_t control_reg_rw_mask = 0x07;
+ uint16_t capability_reg;
+
+ assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+ PCI_EXT_CAP_PASID_SIZEOF);
+
+ capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT;
+ capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+ capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0;
+ pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+ /* Everything is disabled by default */
+ pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+ pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+ dev->exp.pasid_cap = offset;
+}
+
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap,
+ bool prg_response_pasid_req)
+{
+ static const uint16_t control_reg_rw_mask = 0x3;
+ static const uint16_t status_reg_rw1_mask = 0x3;
+ static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff;
+ uint16_t status_reg;
+
+ status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+ status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+ PCI_EXT_CAP_PRI_SIZEOF);
+ /* Disabled by default */
+
+ pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+ pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+ pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+ pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+ pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask);
+
+ dev->exp.pri_cap = offset;
+}
+
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+ PCI_PRI_CTRL_ENABLE) != 0;
+}
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+ PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+ PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index d5825b6..7c98b1b 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -68,18 +68,7 @@ struct PVSCSIClass {
OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI)
-/* Compatibility flags for migration */
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \
- (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT)
-#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1
-#define PVSCSI_COMPAT_DISABLE_PCIE \
- (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT)
-
-#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \
- ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION)
-#define PVSCSI_MSI_OFFSET(s) \
- (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c)
+#define PVSCSI_MSI_OFFSET (0x7c)
#define PVSCSI_EXP_EP_OFFSET (0x40)
typedef struct PVSCSIRingInfo {
@@ -129,8 +118,6 @@ struct PVSCSIState {
uint8_t msi_used; /* For migration compatibility */
PVSCSIRingInfo rings; /* Data transfer rings manager */
uint32_t resetting; /* Reset in progress */
-
- uint32_t compat_flags;
};
typedef struct PVSCSIRequest {
@@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s)
int res;
PCIDevice *d = PCI_DEVICE(s);
- res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
+ res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL);
if (res < 0) {
trace_pvscsi_init_msi_fail(res);
@@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
trace_pvscsi_state("init");
/* PCI subsystem ID, subsystem vendor ID, revision */
- if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000);
- } else {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- PCI_VENDOR_ID_VMWARE);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- PCI_DEVICE_ID_VMWARE_PVSCSI);
- pci_config_set_revision(pci_dev->config, 0x2);
- }
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
+ PCI_VENDOR_ID_VMWARE);
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
+ PCI_DEVICE_ID_VMWARE_PVSCSI);
+ pci_config_set_revision(pci_dev->config, 0x2);
/* PCI latency timer = 255 */
pci_dev->config[PCI_LATENCY_TIMER] = 0xff;
@@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id)
return 0;
}
-static bool pvscsi_vmstate_need_pcie_device(void *opaque)
-{
- PVSCSIState *s = PVSCSI(opaque);
-
- return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE);
-}
-
-static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id)
-{
- return !pvscsi_vmstate_need_pcie_device(opaque);
-}
-
static const VMStateDescription vmstate_pvscsi_pcie_device = {
.name = "pvscsi/pcie",
- .needed = pvscsi_vmstate_need_pcie_device,
.fields = (const VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState),
VMSTATE_END_OF_LIST()
@@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = {
.pre_save = pvscsi_pre_save,
.post_load = pvscsi_post_load,
.fields = (const VMStateField[]) {
- VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState,
- pvscsi_vmstate_test_pci_device, 0,
- vmstate_pci_device, PCIDevice),
VMSTATE_UINT8(msi_used, PVSCSIState),
VMSTATE_UINT32(resetting, PVSCSIState),
VMSTATE_UINT64(reg_interrupt_status, PVSCSIState),
@@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = {
static const Property pvscsi_properties[] = {
DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
- DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_DISABLE_PCIE_BIT, false),
};
-static void pvscsi_realize(DeviceState *qdev, Error **errp)
+static void pvscsi_instance_init(Object *obj)
{
- PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- PVSCSIState *s = PVSCSI(qdev);
-
- if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- pvs_c->parent_dc_realize(qdev, errp);
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void pvscsi_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
- PVSCSIClass *pvs_k = PVSCSI_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
k->realize = pvscsi_realizefn;
@@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data)
k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
k->class_id = PCI_CLASS_STORAGE_SCSI;
k->subsystem_id = 0x1000;
- device_class_set_parent_realize(dc, pvscsi_realize,
- &pvs_k->parent_dc_realize);
device_class_set_legacy_reset(dc, pvscsi_reset);
dc->vmsd = &vmstate_pvscsi;
device_class_set_props(dc, pvscsi_properties);
@@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = {
.class_size = sizeof(PVSCSIClass),
.instance_size = sizeof(PVSCSIState),
.class_init = pvscsi_class_init,
+ .instance_init = pvscsi_instance_init,
.interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ INTERFACE_PCIE_DEVICE },
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index d1b7bc5..0fd1337 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
uint64_t cur_tick;
trace_hpet_ram_read(addr);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
- uint8_t timer_id = (addr - 0x100) / 0x20;
- HPETTimer *timer = &s->timer[timer_id];
-
- if (timer_id > s->num_timers) {
- trace_hpet_timer_id_out_of_range(timer_id);
- return 0;
- }
-
- switch (addr & 0x18) {
- case HPET_TN_CFG: // including interrupt capabilities
- return timer->config >> shift;
- case HPET_TN_CMP: // comparator register
- return timer->cmp >> shift;
- case HPET_TN_ROUTE:
- return timer->fsb >> shift;
- default:
- trace_hpet_ram_read_invalid();
- break;
- }
- } else {
- switch (addr & ~4) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
case HPET_ID: // including HPET_PERIOD
return s->capability >> shift;
case HPET_CFG:
@@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
trace_hpet_ram_read_invalid();
break;
}
+ } else {
+ uint8_t timer_id = (addr - 0x100) / 0x20;
+ HPETTimer *timer = &s->timer[timer_id];
+
+ if (timer_id > s->num_timers) {
+ trace_hpet_timer_id_out_of_range(timer_id);
+ return 0;
+ }
+
+ switch (addr & 0x1f) {
+ case HPET_TN_CFG: // including interrupt capabilities
+ return timer->config >> shift;
+ case HPET_TN_CMP: // comparator register
+ return timer->cmp >> shift;
+ case HPET_TN_ROUTE:
+ return timer->fsb >> shift;
+ default:
+ trace_hpet_ram_read_invalid();
+ break;
+ }
}
return 0;
}
@@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
uint64_t old_val, new_val, cleared;
trace_hpet_ram_write(addr, value);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
+ case HPET_ID:
+ return;
+ case HPET_CFG:
+ old_val = s->config;
+ new_val = deposit64(old_val, shift, len, value);
+ new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+ s->config = new_val;
+ if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Enable main counter and interrupt generation. */
+ s->hpet_offset =
+ ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ for (i = 0; i < s->num_timers; i++) {
+ if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
+ update_irq(&s->timer[i], 1);
+ }
+ hpet_set_timer(&s->timer[i]);
+ }
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Halt main counter and disable interrupt generation. */
+ s->hpet_counter = hpet_get_ticks(s);
+ for (i = 0; i < s->num_timers; i++) {
+ hpet_del_timer(&s->timer[i]);
+ }
+ }
+ /* i8254 and RTC output pins are disabled
+ * when HPET is in legacy mode */
+ if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_set_irq(s->pit_enabled, 0);
+ qemu_irq_lower(s->irqs[0]);
+ qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_irq_lower(s->irqs[0]);
+ qemu_set_irq(s->pit_enabled, 1);
+ qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
+ }
+ break;
+ case HPET_STATUS:
+ new_val = value << shift;
+ cleared = new_val & s->isr;
+ for (i = 0; i < s->num_timers; i++) {
+ if (cleared & (1 << i)) {
+ update_irq(&s->timer[i], 0);
+ }
+ }
+ break;
+ case HPET_COUNTER:
+ if (hpet_enabled(s)) {
+ trace_hpet_ram_write_counter_write_while_enabled();
+ }
+ s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
+ break;
+ default:
+ trace_hpet_ram_write_invalid();
+ break;
+ }
+ } else {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
break;
}
return;
- } else {
- switch (addr & ~4) {
- case HPET_ID:
- return;
- case HPET_CFG:
- old_val = s->config;
- new_val = deposit64(old_val, shift, len, value);
- new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = new_val;
- if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Enable main counter and interrupt generation. */
- s->hpet_offset =
- ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- for (i = 0; i < s->num_timers; i++) {
- if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
- update_irq(&s->timer[i], 1);
- }
- hpet_set_timer(&s->timer[i]);
- }
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Halt main counter and disable interrupt generation. */
- s->hpet_counter = hpet_get_ticks(s);
- for (i = 0; i < s->num_timers; i++) {
- hpet_del_timer(&s->timer[i]);
- }
- }
- /* i8254 and RTC output pins are disabled
- * when HPET is in legacy mode */
- if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_set_irq(s->pit_enabled, 0);
- qemu_irq_lower(s->irqs[0]);
- qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_irq_lower(s->irqs[0]);
- qemu_set_irq(s->pit_enabled, 1);
- qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
- }
- break;
- case HPET_STATUS:
- new_val = value << shift;
- cleared = new_val & s->isr;
- for (i = 0; i < s->num_timers; i++) {
- if (cleared & (1 << i)) {
- update_irq(&s->timer[i], 0);
- }
- }
- break;
- case HPET_COUNTER:
- if (hpet_enabled(s)) {
- trace_hpet_ram_write_counter_write_while_enabled();
- }
- s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
- break;
- default:
- trace_hpet_ram_write_invalid();
- break;
- }
}
}
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 1c6ca94..d834bd4 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dba..0f948d0 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -33,8 +33,8 @@
#include "qapi/error.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
@@ -207,7 +207,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184..0210e76 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -8,9 +8,9 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d1..e7a9d1f 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (vdev->pdev.qdev.hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab..d3efef7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,20 +21,21 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -592,6 +593,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
@@ -810,21 +815,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
@@ -833,6 +855,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
@@ -858,10 +885,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d..203ed03 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
@@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainerBase *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -449,6 +456,26 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -557,7 +584,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -1010,6 +1037,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
ram_addr_t translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1050,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
@@ -1075,19 +1106,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe..b1250d8 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -511,6 +511,25 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -583,21 +602,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
@@ -2854,6 +2859,18 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
static void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
g_free(vdev->vbasedev.name);
@@ -3005,6 +3022,19 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ vdev->pdev.config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3126,15 +3156,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3189,17 +3218,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
- goto error;
- }
-
if (!vfio_pci_config_setup(vdev, errp)) {
goto error;
}
@@ -3302,17 +3320,6 @@ static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
@@ -3514,7 +3521,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a..0000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 1ab2c11..7061b6e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
@@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
v->shadow_vqs = g_steal_pointer(&shadow_vqs);
}
+static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+{
+ struct vhost_vdpa *v = dev->opaque;
+
+ uint64_t features;
+ uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
+ 0x1ULL << VHOST_BACKEND_F_SUSPEND;
+ int r;
+
+ if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
+ return -EFAULT;
+ }
+
+ features &= f;
+
+ if (vhost_vdpa_first_dev(dev)) {
+ r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+ if (r) {
+ return -EFAULT;
+ }
+ }
+
+ dev->backend_cap = features;
+ v->shared->backend_cap = features;
+
+ return 0;
+}
+
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
{
struct vhost_vdpa *v = opaque;
@@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
v->dev = dev;
dev->opaque = opaque ;
- v->shared->listener = vhost_vdpa_memory_listener;
+
+ ret = vhost_vdpa_set_backend_cap(dev);
+ if (unlikely(ret != 0)) {
+ return ret;
+ }
+
vhost_vdpa_init_svq(dev, v);
error_propagate(&dev->migration_blocker, v->migration_blocker);
@@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
+ v->shared->listener = vhost_vdpa_memory_listener;
return 0;
}
@@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
}
-static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
-{
- struct vhost_vdpa *v = dev->opaque;
-
- uint64_t features;
- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
- 0x1ULL << VHOST_BACKEND_F_SUSPEND;
- int r;
-
- if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
- return -EFAULT;
- }
-
- features &= f;
-
- if (vhost_vdpa_first_dev(dev)) {
- r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
- if (r) {
- return -EFAULT;
- }
- }
-
- dev->backend_cap = features;
- v->shared->backend_cap = features;
-
- return 0;
-}
-
static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
uint32_t *device_id)
{
@@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
trace_vhost_vdpa_reset_device(dev);
+ if (ret) {
+ return ret;
+ }
+
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
v->suspended = false;
- return ret;
+ return 0;
}
static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
@@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
"IOMMU and try again");
return -1;
}
- memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ if (v->shared->listener_registered &&
+ dev->vdev->dma_as != v->shared->listener.address_space) {
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
+ }
+ if (!v->shared->listener_registered) {
+ memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ v->shared->listener_registered = true;
+ }
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
@@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
static void vhost_vdpa_reset_status(struct vhost_dev *dev)
{
- struct vhost_vdpa *v = dev->opaque;
-
if (!vhost_vdpa_last_dev(dev)) {
return;
}
@@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev)
vhost_vdpa_reset_device(dev);
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
- memory_listener_unregister(&v->shared->listener);
}
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
static int vhost_vdpa_set_owner(struct vhost_dev *dev)
{
+ int r;
+ struct vhost_vdpa *v;
+
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
trace_vhost_vdpa_set_owner(dev);
- return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ /*
+ * Being optimistic and listening address space memory. If the device
+ * uses vIOMMU, it is changed at vhost_vdpa_dev_start.
+ */
+ v = dev->opaque;
+ memory_listener_register(&v->shared->listener, &address_space_memory);
+ v->shared->listener_registered = true;
+ return 0;
}
static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
@@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = {
.vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
.vhost_set_vring_call = vhost_vdpa_set_vring_call,
.vhost_get_features = vhost_vdpa_get_features,
- .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
.vhost_set_owner = vhost_vdpa_set_owner,
.vhost_set_vring_endian = NULL,
.vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 9b48aa8..fba2372 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = {
static bool virtio_pci_has_extra_state(DeviceState *d)
{
- VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
- return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
+ return true;
}
static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
@@ -1215,7 +1213,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
static bool virtio_pci_query_guest_notifiers(DeviceState *d)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
- return msix_enabled(&proxy->pci_dev);
+
+ if (msix_enabled(&proxy->pci_dev)) {
+ return true;
+ } else {
+ return pci_irq_disabled(&proxy->pci_dev);
+ }
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
@@ -2363,12 +2366,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
static const Property virtio_pci_properties[] = {
DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
- DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
@@ -2397,8 +2396,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
PCIDevice *pci_dev = &proxy->pci_dev;
- if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
- virtio_pci_modern(proxy)) {
+ if (virtio_pci_modern(proxy)) {
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2e98cec..5534251 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id)
return name;
}
+static void virtio_check_indirect_feature(VirtIODevice *vdev)
+{
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Device %s: indirect_desc was not negotiated!\n",
+ vdev->name);
+ }
+}
+
/* Called within call_rcu(). */
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
@@ -1733,6 +1742,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
@@ -1870,6 +1880,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,