aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-02-22 05:06:39 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2025-02-22 05:06:39 +0800
commitb69801dd6b1eb4d107f7c2f643adf0a4e3ec9124 (patch)
tree9620500ad85d8368314501a3969e20624916db17 /hw
parentf41af4c5857b6983766aaffc041580ff170d0679 (diff)
parentdd6d545e8f2d9a0e8a8c287ec16469f03ef5c198 (diff)
downloadqemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.zip
qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.gz
qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.bz2
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes, cleanups Features: SR-IOV emulation for pci virtio-mem-pci support for s390 interleave support for cxl big endian support for vdpa svq new QAPI events for vhost-user Also vIOMMU reset order fixups are in. Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAme4b8sPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpHKcIAKPJsVqPdda2dJ7b7FdyRT0Q+uwezXqaGHd4 # 7Lzih1wsxYNkwIAyPtEb76/21qiS7BluqlUCfCB66R9xWjP5/KfvAFj4/r4AEduE # fxAgYzotNpv55zcRbcflMyvQ42WGiZZHC+o5Lp7vDXUP3pIyHrl0Ydh5WmcD+hwS # BjXvda58TirQpPJ7rUL+sSfLih17zQkkDcfv5/AgorDy1wK09RBKwMx/gq7wG8yJ # twy8eBY2CmfmFD7eTM+EKqBD2T0kwLEeLfS/F/tl5Fyg6lAiYgYtCbGLpAmWErsg # XZvfZmwqL7CNzWexGvPFnnLyqwC33WUP0k0kT88Y5wh3/h98blw= # =tej8 # -----END PGP SIGNATURE----- # gpg: Signature made Fri 21 Feb 2025 20:21:31 HKT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (41 commits) docs/devel/reset: Document reset expectations for DMA and IOMMU hw/vfio/common: Add a trace point in vfio_reset_handler hw/arm/smmuv3: Move reset to exit phase hw/i386/intel-iommu: Migrate to 3-phase reset hw/virtio/virtio-iommu: Migrate to 3-phase reset vhost-user-snd: correct the calculation of config_size net: vhost-user: add QAPI events to report connection state hw/virtio/virtio-nsm: Respond with correct length vdpa: Fix endian bugs in shadow virtqueue MAINTAINERS: add more files to `vhost` cryptodev/vhost: allocate CryptoDevBackendVhost using g_mem0() vhost-iova-tree: Update documentation vhost-iova-tree, svq: Implement GPA->IOVA & partial IOVA->HVA trees vhost-iova-tree: Implement an IOVA-only tree amd_iommu: Use correct bitmask to set capability BAR amd_iommu: Use correct DTE field for interrupt passthrough hw/virtio: reset virtio balloon stats on machine reset mem/cxl_type3: support 3, 6, 12 and 16 interleave ways hw/mem/cxl_type3: Ensure errp is set on realization failure hw/mem/cxl_type3: Fix special_ops memory leak on msix_init_exclusive_bar() failure ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/arm/smmu-common.c9
-rw-r--r--hw/arm/smmuv3.c14
-rw-r--r--hw/arm/trace-events1
-rw-r--r--hw/cxl/cxl-component-utils.c9
-rw-r--r--hw/cxl/cxl-device-utils.c12
-rw-r--r--hw/cxl/switch-mailbox-cci.c4
-rw-r--r--hw/i386/amd_iommu.c10
-rw-r--r--hw/i386/amd_iommu.h2
-rw-r--r--hw/i386/intel_iommu.c12
-rw-r--r--hw/i386/microvm.c66
-rw-r--r--hw/i386/pc.c6
-rw-r--r--hw/i386/trace-events1
-rw-r--r--hw/mem/cxl_type3.c45
-rw-r--r--hw/net/igb.c10
-rw-r--r--hw/net/virtio-net.c43
-rw-r--r--hw/nvme/ctrl.c22
-rw-r--r--hw/pci/msix.c9
-rw-r--r--hw/pci/pci.c22
-rw-r--r--hw/pci/pcie_sriov.c159
-rw-r--r--hw/pci/trace-events2
-rw-r--r--hw/ppc/spapr_pci.c20
-rw-r--r--hw/s390x/s390-pci-bus.c42
-rw-r--r--hw/vfio/common.c1
-rw-r--r--hw/vfio/trace-events1
-rw-r--r--hw/virtio/trace-events2
-rw-r--r--hw/virtio/vhost-iova-tree.c115
-rw-r--r--hw/virtio/vhost-iova-tree.h8
-rw-r--r--hw/virtio/vhost-shadow-virtqueue.c73
-rw-r--r--hw/virtio/vhost-shadow-virtqueue.h5
-rw-r--r--hw/virtio/vhost-user-snd.c18
-rw-r--r--hw/virtio/vhost-vdpa.c40
-rw-r--r--hw/virtio/virtio-balloon.c30
-rw-r--r--hw/virtio/virtio-iommu.c14
-rw-r--r--hw/virtio/virtio-nsm.c2
34 files changed, 561 insertions, 268 deletions
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index dd74c2e..8c1b407 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -924,7 +924,12 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
}
}
-static void smmu_base_reset_hold(Object *obj, ResetType type)
+/*
+ * Make sure the IOMMU is reset in 'exit' phase after
+ * all outstanding DMA requests have been quiesced during
+ * the 'enter' or 'hold' reset phases
+ */
+static void smmu_base_reset_exit(Object *obj, ResetType type)
{
SMMUState *s = ARM_SMMU(obj);
@@ -949,7 +954,7 @@ static void smmu_base_class_init(ObjectClass *klass, void *data)
device_class_set_props(dc, smmu_dev_properties);
device_class_set_parent_realize(dc, smmu_base_realize,
&sbc->parent_realize);
- rc->phases.hold = smmu_base_reset_hold;
+ rc->phases.exit = smmu_base_reset_exit;
}
static const TypeInfo smmu_base_info = {
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index c0cf5df..b49a59b 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1870,13 +1870,19 @@ static void smmu_init_irq(SMMUv3State *s, SysBusDevice *dev)
}
}
-static void smmu_reset_hold(Object *obj, ResetType type)
+/*
+ * Make sure the IOMMU is reset in 'exit' phase after
+ * all outstanding DMA requests have been quiesced during
+ * the 'enter' or 'hold' reset phases
+ */
+static void smmu_reset_exit(Object *obj, ResetType type)
{
SMMUv3State *s = ARM_SMMUV3(obj);
SMMUv3Class *c = ARM_SMMUV3_GET_CLASS(s);
- if (c->parent_phases.hold) {
- c->parent_phases.hold(obj, type);
+ trace_smmu_reset_exit();
+ if (c->parent_phases.exit) {
+ c->parent_phases.exit(obj, type);
}
smmuv3_init_regs(s);
@@ -1999,7 +2005,7 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
SMMUv3Class *c = ARM_SMMUV3_CLASS(klass);
dc->vmsd = &vmstate_smmuv3;
- resettable_class_set_parent_phases(rc, NULL, smmu_reset_hold, NULL,
+ resettable_class_set_parent_phases(rc, NULL, NULL, smmu_reset_exit,
&c->parent_phases);
device_class_set_parent_realize(dc, smmu_realize,
&c->parent_realize);
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index c64ad34..7790db7 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -56,6 +56,7 @@ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
+smmu_reset_exit(void) ""
# strongarm.c
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index cd116c0..4738959 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -243,8 +243,13 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
POISON_ON_ERR_CAP, 0);
- ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 0);
- ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 0);
+ if (type == CXL2_TYPE3_DEVICE) {
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 1);
+ } else {
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 3_6_12_WAY, 0);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, 16_WAY, 0);
+ }
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, UIO, 0);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
UIO_DECODER_COUNT, 0);
diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 035d034..52ad1e4 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -352,10 +352,8 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
}
}
-static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
+static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate, int msi_n)
{
- const uint8_t msi_n = 9;
-
/* 2048 payload size */
ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
@@ -382,7 +380,7 @@ static void memdev_reg_init_common(CXLDeviceState *cxl_dstate)
cxl_dstate->memdev_status = memdev_status_reg;
}
-void cxl_device_register_init_t3(CXLType3Dev *ct3d)
+void cxl_device_register_init_t3(CXLType3Dev *ct3d, int msi_n)
{
CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
uint64_t *cap_h = cxl_dstate->caps_reg_state64;
@@ -398,7 +396,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d)
device_reg_init_common(cxl_dstate);
cxl_device_cap_init(cxl_dstate, MAILBOX, 2, CXL_DEV_MAILBOX_VERSION);
- mailbox_reg_init_common(cxl_dstate);
+ mailbox_reg_init_common(cxl_dstate, msi_n);
cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000,
CXL_MEM_DEV_STATUS_VERSION);
@@ -408,7 +406,7 @@ void cxl_device_register_init_t3(CXLType3Dev *ct3d)
CXL_MAILBOX_MAX_PAYLOAD_SIZE);
}
-void cxl_device_register_init_swcci(CSWMBCCIDev *sw)
+void cxl_device_register_init_swcci(CSWMBCCIDev *sw, int msi_n)
{
CXLDeviceState *cxl_dstate = &sw->cxl_dstate;
uint64_t *cap_h = cxl_dstate->caps_reg_state64;
@@ -423,7 +421,7 @@ void cxl_device_register_init_swcci(CSWMBCCIDev *sw)
device_reg_init_common(cxl_dstate);
cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1);
- mailbox_reg_init_common(cxl_dstate);
+ mailbox_reg_init_common(cxl_dstate, msi_n);
cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1);
memdev_reg_init_common(cxl_dstate);
diff --git a/hw/cxl/switch-mailbox-cci.c b/hw/cxl/switch-mailbox-cci.c
index 65cdac6..833b824 100644
--- a/hw/cxl/switch-mailbox-cci.c
+++ b/hw/cxl/switch-mailbox-cci.c
@@ -17,10 +17,12 @@
#include "hw/qdev-properties.h"
#include "hw/cxl/cxl.h"
+#define CXL_SWCCI_MSIX_MBOX 3
+
static void cswmbcci_reset(DeviceState *dev)
{
CSWMBCCIDev *cswmb = CXL_SWITCH_MAILBOX_CCI(dev);
- cxl_device_register_init_swcci(cswmb);
+ cxl_device_register_init_swcci(cswmb, CXL_SWCCI_MSIX_MBOX);
}
static void cswbcci_realize(PCIDevice *pci_dev, Error **errp)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index e8e084c..5b21cf1 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1309,15 +1309,15 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
ret = -AMDVI_IR_ERR;
break;
case AMDVI_IOAPIC_INT_TYPE_NMI:
- pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_NMI_PASS_MASK;
trace_amdvi_ir_delivery_mode("nmi");
break;
case AMDVI_IOAPIC_INT_TYPE_INIT:
- pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_INT_PASS_MASK;
trace_amdvi_ir_delivery_mode("init");
break;
case AMDVI_IOAPIC_INT_TYPE_EINT:
- pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
+ pass = dte[2] & AMDVI_DEV_EINT_PASS_MASK;
trace_amdvi_ir_delivery_mode("eint");
break;
default:
@@ -1593,9 +1593,9 @@ static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
/* reset AMDVI specific capabilities, all r/o */
pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
- AMDVI_BASE_ADDR & ~(0xffff0000));
+ AMDVI_BASE_ADDR & MAKE_64BIT_MASK(14, 18));
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
- (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
+ AMDVI_BASE_ADDR >> 32);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
0xff000000);
pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index e0dac4d..2812513 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -187,7 +187,7 @@
AMDVI_CAPAB_FLAG_HTTUNNEL | AMDVI_CAPAB_EFR_SUP)
/* AMDVI default address */
-#define AMDVI_BASE_ADDR 0xfed80000
+#define AMDVI_BASE_ADDR 0xfed80000ULL
/* page management constants */
#define AMDVI_PAGE_SHIFT 12
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 7fde060..dffd7ee 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4697,10 +4697,11 @@ static void vtd_init(IntelIOMMUState *s)
/* Should not reset address_spaces when reset because devices will still use
* the address space they got at first (won't ask the bus again).
*/
-static void vtd_reset(DeviceState *dev)
+static void vtd_reset_exit(Object *obj, ResetType type)
{
- IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+ IntelIOMMUState *s = INTEL_IOMMU_DEVICE(obj);
+ trace_vtd_reset_exit();
vtd_init(s);
vtd_address_space_refresh_all(s);
}
@@ -4864,8 +4865,13 @@ static void vtd_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
X86IOMMUClass *x86_class = X86_IOMMU_DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
- device_class_set_legacy_reset(dc, vtd_reset);
+ /*
+ * Use 'exit' reset phase to make sure all DMA requests
+ * have been quiesced during 'enter' or 'hold' phase
+ */
+ rc->phases.exit = vtd_reset_exit;
dc->vmsd = &vtd_vmstate;
device_class_set_props(dc, vtd_properties);
dc->hotpluggable = false;
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index a8d354a..d0a236c 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -451,11 +451,44 @@ static HotplugHandler *microvm_get_hotplug_handler(MachineState *machine,
return NULL;
}
+static void microvm_machine_done(Notifier *notifier, void *data)
+{
+ MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+ machine_done);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+
+ acpi_setup_microvm(mms);
+ dt_setup_microvm(mms);
+ fw_cfg_add_e820(x86ms->fw_cfg);
+}
+
+static void microvm_powerdown_req(Notifier *notifier, void *data)
+{
+ MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
+ powerdown_req);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+
+ if (x86ms->acpi_dev) {
+ Object *obj = OBJECT(x86ms->acpi_dev);
+ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+ adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
+ ACPI_POWER_DOWN_STATUS);
+ }
+}
+
static void microvm_machine_state_init(MachineState *machine)
{
MicrovmMachineState *mms = MICROVM_MACHINE(machine);
X86MachineState *x86ms = X86_MACHINE(machine);
+ /* State */
+ mms->kernel_cmdline_fixed = false;
+
+ mms->machine_done.notify = microvm_machine_done;
+ qemu_add_machine_init_done_notifier(&mms->machine_done);
+ mms->powerdown_req.notify = microvm_powerdown_req;
+ qemu_register_powerdown_notifier(&mms->powerdown_req);
+
microvm_memory_init(mms);
x86_cpus_init(x86ms, CPU_VERSION_LATEST);
@@ -581,31 +614,6 @@ static void microvm_machine_set_auto_kernel_cmdline(Object *obj, bool value,
mms->auto_kernel_cmdline = value;
}
-static void microvm_machine_done(Notifier *notifier, void *data)
-{
- MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
- machine_done);
- X86MachineState *x86ms = X86_MACHINE(mms);
-
- acpi_setup_microvm(mms);
- dt_setup_microvm(mms);
- fw_cfg_add_e820(x86ms->fw_cfg);
-}
-
-static void microvm_powerdown_req(Notifier *notifier, void *data)
-{
- MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
- powerdown_req);
- X86MachineState *x86ms = X86_MACHINE(mms);
-
- if (x86ms->acpi_dev) {
- Object *obj = OBJECT(x86ms->acpi_dev);
- AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
- adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
- ACPI_POWER_DOWN_STATUS);
- }
-}
-
static void microvm_machine_initfn(Object *obj)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
@@ -617,14 +625,6 @@ static void microvm_machine_initfn(Object *obj)
mms->isa_serial = true;
mms->option_roms = true;
mms->auto_kernel_cmdline = true;
-
- /* State */
- mms->kernel_cmdline_fixed = false;
-
- mms->machine_done.notify = microvm_machine_done;
- qemu_add_machine_init_done_notifier(&mms->machine_done);
- mms->powerdown_req.notify = microvm_powerdown_req;
- qemu_register_powerdown_notifier(&mms->powerdown_req);
}
GlobalProperty microvm_properties[] = {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 22641e6..f199a8c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1241,6 +1241,9 @@ void pc_basic_device_init(struct PCMachineState *pcms,
/* Super I/O */
pc_superio_init(isa_bus, create_fdctrl, pcms->i8042_enabled,
pcms->vmport != ON_OFF_AUTO_ON, &error_fatal);
+
+ pcms->machine_done.notify = pc_machine_done;
+ qemu_add_machine_init_done_notifier(&pcms->machine_done);
}
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
@@ -1714,9 +1717,6 @@ static void pc_machine_initfn(Object *obj)
if (pcmc->pci_enabled) {
cxl_machine_init(obj, &pcms->cxl_devices_state);
}
-
- pcms->machine_done.notify = pc_machine_done;
- qemu_add_machine_init_done_notifier(&pcms->machine_done);
}
static void pc_machine_reset(MachineState *machine, ResetType type)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 53c02d7..ac9e1a1 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -68,6 +68,7 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low
vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
+vtd_reset_exit(void) ""
# amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0ae1704..6fffa21 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,14 @@
#include "hw/cxl/cxl.h"
#include "hw/pci/msix.h"
+/* type3 device private */
+enum CXL_T3_MSIX_VECTOR {
+ CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
+ CXL_T3_MSIX_EVENT_START = 2,
+ CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
+ CXL_T3_MSIX_VECTOR_NR
+};
+
#define DWORD_BYTE 4
#define CXL_CAPACITY_MULTIPLIER (256 * MiB)
@@ -843,7 +851,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
ComponentRegisters *regs = &cxl_cstate->crb;
MemoryRegion *mr = &regs->component_registers;
uint8_t *pci_conf = pci_dev->config;
- unsigned short msix_num = 10;
int i, rc;
uint16_t count;
@@ -884,31 +891,32 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
&ct3d->cxl_dstate.device_registers);
/* MSI(-X) Initialization */
- rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+ rc = msix_init_exclusive_bar(pci_dev, CXL_T3_MSIX_VECTOR_NR, 4, errp);
if (rc) {
- goto err_address_space_free;
+ goto err_free_special_ops;
}
- for (i = 0; i < msix_num; i++) {
+ for (i = 0; i < CXL_T3_MSIX_VECTOR_NR; i++) {
msix_vector_use(pci_dev, i);
}
/* DOE Initialization */
- pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
+ pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
+ CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS);
cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
cxl_cstate->cdat.private = ct3d;
if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
- goto err_free_special_ops;
+ goto err_msix_uninit;
}
pcie_cap_deverr_init(pci_dev);
/* Leave a bit of room for expansion */
- rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
+ rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
if (rc) {
goto err_release_cdat;
}
- cxl_event_init(&ct3d->cxl_dstate, 2);
+ cxl_event_init(&ct3d->cxl_dstate, CXL_T3_MSIX_EVENT_START);
/* Set default value for patrol scrub attributes */
ct3d->patrol_scrub_attrs.scrub_cycle_cap =
@@ -935,9 +943,10 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
err_release_cdat:
cxl_doe_cdat_release(cxl_cstate);
+err_msix_uninit:
+ msix_uninit_exclusive_bar(pci_dev);
err_free_special_ops:
g_free(regs->special_ops);
-err_address_space_free:
if (ct3d->dc.host_dc) {
cxl_destroy_dc_regions(ct3d);
address_space_destroy(&ct3d->dc.host_dc_as);
@@ -959,6 +968,7 @@ static void ct3_exit(PCIDevice *pci_dev)
pcie_aer_exit(pci_dev);
cxl_doe_cdat_release(cxl_cstate);
+ msix_uninit_exclusive_bar(pci_dev);
g_free(regs->special_ops);
if (ct3d->dc.host_dc) {
cxl_destroy_dc_regions(ct3d);
@@ -1090,10 +1100,17 @@ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
continue;
}
- *dpa = dpa_base +
- ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
- ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
- >> iw));
+ if (iw < 8) {
+ *dpa = dpa_base +
+ ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+ ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
+ >> iw));
+ } else {
+ *dpa = dpa_base +
+ ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+ ((((MAKE_64BIT_MASK(ig + iw, 64 - ig - iw) & hpa_offset)
+ >> (ig + iw)) / 3) << (ig + 8)));
+ }
return true;
}
@@ -1202,7 +1219,7 @@ static void ct3d_reset(DeviceState *dev)
pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
- cxl_device_register_init_t3(ct3d);
+ cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
/*
* Bring up an endpoint to target with MCTP over VDM.
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 4d93ce6..c965fc2 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -446,9 +446,13 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
pcie_ari_init(pci_dev, 0x150);
- pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
- IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
- IGB_VF_OFFSET, IGB_VF_STRIDE);
+ if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
+ IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS,
+ IGB_MAX_VF_FUNCTIONS, IGB_VF_OFFSET, IGB_VF_STRIDE,
+ errp)) {
+ igb_cleanup_msix(s);
+ return;
+ }
pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d847429..de87cfa 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1352,18 +1352,25 @@ exit:
static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
{
- bool ret = false;
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
+ return true;
+ }
- if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
- trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
- if (n->ebpf_rss_fds) {
- ret = virtio_net_load_ebpf_fds(n, errp);
- } else {
- ret = ebpf_rss_load(&n->ebpf_rss, errp);
- }
+ trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
+
+ /*
+ * If user explicitly gave QEMU RSS FDs to use, then
+ * failing to use them must be considered a fatal
+ * error. If no RSS FDs were provided, QEMU is trying
+ * eBPF on a "best effort" basis only, so report a
+ * warning and allow fallback to software RSS.
+ */
+ if (n->ebpf_rss_fds) {
+ return virtio_net_load_ebpf_fds(n, errp);
}
- return ret;
+ ebpf_rss_load(&n->ebpf_rss, &error_warn);
+ return true;
}
static void virtio_net_unload_ebpf(VirtIONet *n)
@@ -3913,23 +3920,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
net_rx_pkt_init(&n->rx_pkt);
if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
- Error *err = NULL;
- if (!virtio_net_load_ebpf(n, &err)) {
- /*
- * If user explicitly gave QEMU RSS FDs to use, then
- * failing to use them must be considered a fatal
- * error. If no RSS FDs were provided, QEMU is trying
- * eBPF on a "best effort" basis only, so report a
- * warning and allow fallback to software RSS.
- */
- if (n->ebpf_rss_fds) {
- error_propagate(errp, err);
- } else {
- warn_report("unable to load eBPF RSS: %s",
- error_get_pretty(err));
- error_free(err);
- }
- }
+ virtio_net_load_ebpf(n, errp);
}
}
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 68903d1..8175751 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8481,7 +8481,8 @@ out:
return pow2ceil(bar_size);
}
-static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
+static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
+ Error **errp)
{
uint16_t vf_dev_id = n->params.use_intel_id ?
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
@@ -8490,12 +8491,16 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
le16_to_cpu(cap->vifrsm),
NULL, NULL);
- pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
- n->params.sriov_max_vfs, n->params.sriov_max_vfs,
- NVME_VF_OFFSET, NVME_VF_STRIDE);
+ if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
+ n->params.sriov_max_vfs, n->params.sriov_max_vfs,
+ NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) {
+ return false;
+ }
pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
+
+ return true;
}
static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
@@ -8620,6 +8625,11 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
return false;
}
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
+ !nvme_init_sriov(n, pci_dev, 0x120, errp)) {
+ return false;
+ }
+
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
pcie_cap_deverr_init(pci_dev);
@@ -8649,10 +8659,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
nvme_init_pmr(n, pci_dev);
}
- if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
- nvme_init_sriov(n, pci_dev, 0x120);
- }
-
return true;
}
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 57ec708..66f27b9 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -15,6 +15,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/log.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci.h"
@@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
static void msix_pba_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
+ PCIDevice *dev = opaque;
+
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
+ "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
+ pci_root_bus_path(dev), pci_dev_bus_num(dev),
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
+ addr);
}
static const MemoryRegionOps msix_pba_mmio_ops = {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423..1d42847 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -803,10 +803,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
return !pci_is_express((PCIDevice *)opaque);
}
+static int pci_post_load(void *opaque, int version_id)
+{
+ pcie_sriov_pf_post_load(opaque);
+ return 0;
+}
+
const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
+ .post_load = pci_post_load,
.fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -1391,6 +1398,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
+ assert(!r->size);
r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
@@ -2963,7 +2971,17 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- pci_set_enabled(d, state);
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(d)) {
+ pci_set_enabled(d, state);
+ }
}
void pci_set_enabled(PCIDevice *d, bool state)
@@ -2977,7 +2995,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
& PCI_COMMAND_MASTER) && d->enabled);
- if (!d->enabled) {
+ if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index e9b2322..1eb4358 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,23 +20,37 @@
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride)
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (total_vfs &&
+ (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
+ error_setg(errp, "VF addr overflows");
+ return false;
+ }
+
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
- dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -69,13 +83,37 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ object_unparent(OBJECT(vf));
+ object_unref(vf);
+ unparent_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
+ return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -141,80 +179,36 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
-{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
- }
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
-
- return dev;
-}
-
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
uint16_t i;
uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
- return;
- }
-
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
- }
- devfn += vf_stride;
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
- dev->exp.sriov_pf.num_vfs = num_vfs;
+
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
}
static void unregister_vfs(PCIDevice *dev)
{
- uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
uint16_t i;
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- Error *err = NULL;
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
- error_reportf_err(err, "Failed to unplug: ");
- }
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
- dev->exp.sriov_pf.num_vfs = 0;
+
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -235,15 +229,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
PCI_FUNC(dev->devfn), off, val, len);
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (dev->exp.sriov_pf.num_vfs) {
- if (!(val & PCI_SRIOV_CTRL_VFE)) {
- unregister_vfs(dev);
- }
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
} else {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- }
+ unregister_vfs(dev);
}
+ } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
+ uint8_t *cfg = dev->config + sriov_cap;
+ uint8_t *wmask = dev->wmask + sriov_cap;
+ uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
+ uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
+
+ if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
+ wmask_val |= PCI_SRIOV_CTRL_VFE;
+ }
+
+ pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
+ }
+}
+
+void pcie_sriov_pf_post_load(PCIDevice *dev)
+{
+ if (dev->exp.sriov_cap) {
+ register_vfs(dev);
}
}
@@ -260,6 +268,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev)
unregister_vfs(dev);
pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL,
+ PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
/*
* Default is to use 4K pages, software can modify it
@@ -306,7 +316,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
{
assert(!pci_is_vf(dev));
- if (n < dev->exp.sriov_pf.num_vfs) {
+ if (n < pcie_sriov_num_vfs(dev)) {
return dev->exp.sriov_pf.vf[n];
}
return NULL;
@@ -314,5 +324,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
{
- return dev->exp.sriov_pf.num_vfs;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint8_t *cfg = dev->config + sriov_cap;
+
+ return sriov_cap &&
+ (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
+ pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa..e98f575 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
# hw/pci/pcie_sriov.c
sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
# pcie.c
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 904227d..e0a9d50 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1283,8 +1283,7 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
PciWalkFdt *p = opaque;
int err;
- if (p->err) {
- /* Something's already broken, don't keep going */
+ if (p->err || !pdev->enabled) {
return;
}
@@ -1550,7 +1549,9 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
* hotplug, we do not allow functions to be hotplugged to a
* slot that already has function 0 present
*/
- if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+ if (plugged_dev->hotplugged &&
+ !pci_is_vf(pdev) &&
+ bus->devices[PCI_DEVFN(slotnr, 0)] &&
PCI_FUNC(pdev->devfn) != 0) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" additional functions can no longer be exposed to guest.",
@@ -1572,6 +1573,14 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
SpaprDrc *drc = drc_from_dev(phb, pdev);
uint32_t slotnr = PCI_SLOT(pdev->devfn);
+ /*
+ * If DR or the PCI device is disabled we don't need to do anything
+ * in the case of hotplug or coldplug callbacks.
+ */
+ if (!pdev->enabled) {
+ return;
+ }
+
g_assert(drc);
if (IS_PCI_BRIDGE(plugged_dev)) {
@@ -1647,6 +1656,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
SpaprDrc *drc = drc_from_dev(phb, pdev);
g_assert(drc);
+
+ if (!drc->dev) {
+ return;
+ }
+
g_assert(drc->dev == plugged_dev);
if (!spapr_drc_unplug_requested(drc)) {
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index eead269..913d72c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,14 +971,7 @@ static void s390_pcihost_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
"this device");
}
- if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
- PCIDevice *pdev = PCI_DEVICE(dev);
-
- if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
- error_setg(errp, "multifunction not supported in s390");
- return;
- }
- } else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
+ if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
S390PCIBusDevice *pbdev = S390_PCI_DEVICE(dev);
if (!s390_pci_alloc_idx(s, pbdev)) {
@@ -1069,6 +1062,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
} else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
pdev = PCI_DEVICE(dev);
+ /*
+ * Multifunction is not supported due to the lack of CLP. However,
+ * do not check for multifunction capability for SR-IOV devices because
+ * SR-IOV devices automatically add the multifunction capability whether
+ * the user intends to use the functions other than the PF.
+ */
+ if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION &&
+ !pdev->exp.sriov_cap) {
+ error_setg(errp, "multifunction not supported in s390");
+ return;
+ }
+
if (!dev->id) {
/* In the case the PCI device does not define an id */
/* we generate one based on the PCI address */
@@ -1080,6 +1085,16 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev = s390_pci_find_dev_by_target(s, dev->id);
if (!pbdev) {
+ /*
+ * VFs are automatically created by PF, and creating zpci for them
+ * will result in unexpected usage of fids. Currently QEMU does not
+ * support multifunction for s390x so we don't need zpci for VFs
+ * anyway.
+ */
+ if (pci_is_vf(pdev)) {
+ return;
+ }
+
pbdev = s390_pci_device_new(s, dev->id, errp);
if (!pbdev) {
return;
@@ -1167,7 +1182,10 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
int32_t devfn;
pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
- g_assert(pbdev);
+ if (!pbdev) {
+ g_assert(pci_is_vf(pci_dev));
+ return;
+ }
s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
pbdev->fh, pbdev->fid);
@@ -1206,7 +1224,11 @@ static void s390_pcihost_unplug_request(HotplugHandler *hotplug_dev,
* we've checked the PCI device already (to prevent endless recursion).
*/
pbdev = s390_pci_find_dev_by_pci(s, PCI_DEVICE(dev));
- g_assert(pbdev);
+ if (!pbdev) {
+ g_assert(pci_is_vf(PCI_DEVICE(dev)));
+ return;
+ }
+
pbdev->pci_unplug_request_processed = true;
qdev_unplug(DEVICE(pbdev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index abbdc56..7a4010e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1404,6 +1404,7 @@ void vfio_reset_handler(void *opaque)
{
VFIODevice *vbasedev;
+ trace_vfio_reset_handler();
QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
if (vbasedev->dev->realized) {
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index cab1cf1..c5385e1 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -120,6 +120,7 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype
vfio_legacy_dma_unmap_overflow_workaround(void) ""
vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_reset_handler(void) ""
# platform.c
vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 04e36ae..76f0d45 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -108,7 +108,7 @@ virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PR
virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
# hw/virtio/virtio-iommu.c
-virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_device_reset_exit(void) "reset!"
virtio_iommu_system_reset(void) "system reset!"
virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
virtio_iommu_device_status(uint8_t status) "driver status = %d"
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
index 3d03395..fa4147b 100644
--- a/hw/virtio/vhost-iova-tree.c
+++ b/hw/virtio/vhost-iova-tree.c
@@ -28,12 +28,18 @@ struct VhostIOVATree {
/* IOVA address to qemu memory maps. */
IOVATree *iova_taddr_map;
+
+ /* Allocated IOVA addresses */
+ IOVATree *iova_map;
+
+ /* GPA->IOVA address memory maps */
+ IOVATree *gpa_iova_map;
};
/**
- * Create a new IOVA tree
+ * Create a new VhostIOVATree
*
- * Returns the new IOVA tree
+ * Returns the new VhostIOVATree.
*/
VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
{
@@ -44,25 +50,29 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
tree->iova_last = iova_last;
tree->iova_taddr_map = iova_tree_new();
+ tree->iova_map = iova_tree_new();
+ tree->gpa_iova_map = gpa_tree_new();
return tree;
}
/**
- * Delete an iova tree
+ * Delete a VhostIOVATree
*/
void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
{
iova_tree_destroy(iova_tree->iova_taddr_map);
+ iova_tree_destroy(iova_tree->iova_map);
+ iova_tree_destroy(iova_tree->gpa_iova_map);
g_free(iova_tree);
}
/**
* Find the IOVA address stored from a memory address
*
- * @tree: The iova tree
+ * @tree: The VhostIOVATree
* @map: The map with the memory address
*
- * Return the stored mapping, or NULL if not found.
+ * Returns the stored IOVA->HVA mapping, or NULL if not found.
*/
const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
const DMAMap *map)
@@ -71,40 +81,111 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
}
/**
- * Allocate a new mapping
+ * Allocate a new IOVA range and add the mapping to the IOVA->HVA tree
*
- * @tree: The iova tree
- * @map: The iova map
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (HVA)
*
* Returns:
* - IOVA_OK if the map fits in the container
* - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
* - IOVA_ERR_NOMEM if tree cannot allocate more space.
*
- * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
*/
-int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
{
+ int ret;
+
/* Some vhost devices do not like addr 0. Skip first page */
hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
- if (map->translated_addr + map->size < map->translated_addr ||
- map->perm == IOMMU_NONE) {
+ if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
return IOVA_ERR_INVALID;
}
- /* Allocate a node in IOVA address */
- return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
- tree->iova_last);
+ /* Allocate a node in the IOVA-only tree */
+ ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+ if (unlikely(ret != IOVA_OK)) {
+ return ret;
+ }
+
+ /* Insert a node in the IOVA->HVA tree */
+ map->translated_addr = taddr;
+ return iova_tree_insert(tree->iova_taddr_map, map);
}
/**
- * Remove existing mappings from iova tree
+ * Remove existing mappings from the IOVA-only and IOVA->HVA trees
*
- * @iova_tree: The vhost iova tree
+ * @iova_tree: The VhostIOVATree
* @map: The map to remove
*/
void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
{
iova_tree_remove(iova_tree->iova_taddr_map, map);
+ iova_tree_remove(iova_tree->iova_map, map);
+}
+
+/**
+ * Find the IOVA address stored from a guest memory address (GPA)
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map with the guest memory address
+ *
+ * Returns the stored GPA->IOVA mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree,
+ const DMAMap *map)
+{
+ return iova_tree_find_iova(tree->gpa_iova_map, map);
+}
+
+/**
+ * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree
+ *
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (GPA)
+ *
+ * Returns:
+ * - IOVA_OK if the map fits both containers
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space
+ *
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
+ */
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
+{
+ int ret;
+
+ /* Some vhost devices don't like addr 0. Skip first page */
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+ if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* Allocate a node in the IOVA-only tree */
+ ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+ if (unlikely(ret != IOVA_OK)) {
+ return ret;
+ }
+
+ /* Insert a node in the GPA->IOVA tree */
+ map->translated_addr = taddr;
+ return gpa_tree_insert(tree->gpa_iova_map, map);
+}
+
+/**
+ * Remove existing mappings from the IOVA-only and GPA->IOVA trees
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
+{
+ iova_tree_remove(iova_tree->gpa_iova_map, map);
+ iova_tree_remove(iova_tree->iova_map, map);
}
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
index 4adfd79..0c4ba5a 100644
--- a/hw/virtio/vhost-iova-tree.h
+++ b/hw/virtio/vhost-iova-tree.h
@@ -21,7 +21,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
const DMAMap *map);
-int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map,
+ hwaddr taddr);
void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree,
+ const DMAMap *map);
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
+ hwaddr taddr);
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
#endif
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 37aca8b..2481d49 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -78,24 +78,39 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
* @vaddr: Translated IOVA addresses
* @iovec: Source qemu's VA addresses
* @num: Length of iovec and minimum length of vaddr
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*/
static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
hwaddr *addrs, const struct iovec *iovec,
- size_t num)
+ size_t num, const hwaddr *gpas)
{
if (num == 0) {
return true;
}
for (size_t i = 0; i < num; ++i) {
- DMAMap needle = {
- .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
- .size = iovec[i].iov_len,
- };
Int128 needle_last, map_last;
size_t off;
+ const DMAMap *map;
+ DMAMap needle;
+
+ /* Check if the descriptor is backed by guest memory */
+ if (gpas) {
+ /* Search the GPA->IOVA tree */
+ needle = (DMAMap) {
+ .translated_addr = gpas[i],
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+ } else {
+ /* Search the IOVA->HVA tree */
+ needle = (DMAMap) {
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ }
- const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
/*
* Map cannot be NULL since iova map contains all guest space and
* qemu already has a physical address mapped
@@ -130,6 +145,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
* @sg: Cache for hwaddr
* @iovec: The iovec from the guest
* @num: iovec length
+ * @addr: Descriptors' GPAs, if backed by guest memory
* @more_descs: True if more descriptors come in the chain
* @write: True if they are writeable descriptors
*
@@ -137,7 +153,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
*/
static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
const struct iovec *iovec, size_t num,
- bool more_descs, bool write)
+ const hwaddr *addr, bool more_descs,
+ bool write)
{
uint16_t i = svq->free_head, last = svq->free_head;
unsigned n;
@@ -149,7 +166,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
return true;
}
- ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
if (unlikely(!ok)) {
return false;
}
@@ -165,17 +182,18 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
- i = cpu_to_le16(svq->desc_next[i]);
+ i = svq->desc_next[i];
}
- svq->free_head = le16_to_cpu(svq->desc_next[last]);
+ svq->free_head = svq->desc_next[last];
return true;
}
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
const struct iovec *out_sg, size_t out_num,
+ const hwaddr *out_addr,
const struct iovec *in_sg, size_t in_num,
- unsigned *head)
+ const hwaddr *in_addr, unsigned *head)
{
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
@@ -191,13 +209,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
- false);
+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
+ in_num > 0, false);
if (unlikely(!ok)) {
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
+ true);
if (unlikely(!ok)) {
return false;
}
@@ -228,10 +247,12 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
smp_mb();
if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
- uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]);
+ uint16_t avail_event = le16_to_cpu(
+ *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
} else {
- needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+ needs_kick =
+ !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
}
if (!needs_kick) {
@@ -247,8 +268,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
*/
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
- size_t out_num, const struct iovec *in_sg, size_t in_num,
- VirtQueueElement *elem)
+ size_t out_num, const hwaddr *out_addr,
+ const struct iovec *in_sg, size_t in_num,
+ const hwaddr *in_addr, VirtQueueElement *elem)
{
unsigned qemu_head;
unsigned ndescs = in_num + out_num;
@@ -258,7 +280,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
return -ENOSPC;
}
- ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+ ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
+ in_addr, &qemu_head);
if (unlikely(!ok)) {
return -EINVAL;
}
@@ -274,8 +297,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
VirtQueueElement *elem)
{
- return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
- elem->in_num, elem);
+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
+ elem->in_sg, elem->in_num, elem->in_addr, elem);
}
/**
@@ -365,7 +388,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
return true;
}
- svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
+ svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx);
return svq->last_used_idx != svq->shadow_used_idx;
}
@@ -383,7 +406,7 @@ static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
{
if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
- *used_event = svq->shadow_used_idx;
+ *used_event = cpu_to_le16(svq->shadow_used_idx);
} else {
svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
}
@@ -408,7 +431,7 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
uint16_t num, uint16_t i)
{
for (uint16_t j = 0; j < (num - 1); ++j) {
- i = le16_to_cpu(svq->desc_next[i]);
+ i = svq->desc_next[i];
}
return i;
@@ -683,7 +706,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->desc_state = g_new0(SVQDescState, svq->vring.num);
svq->desc_next = g_new0(uint16_t, svq->vring.num);
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
- svq->desc_next[i] = cpu_to_le16(i + 1);
+ svq->desc_next[i] = i + 1;
}
}
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index 19c842a..9c27373 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -118,8 +118,9 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq);
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
const VirtQueueElement *elem, uint32_t len);
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
- size_t out_num, const struct iovec *in_sg, size_t in_num,
- VirtQueueElement *elem);
+ size_t out_num, const hwaddr *out_addr,
+ const struct iovec *in_sg, size_t in_num,
+ const hwaddr *in_addr, VirtQueueElement *elem);
size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num);
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
diff --git a/hw/virtio/vhost-user-snd.c b/hw/virtio/vhost-user-snd.c
index 8610370..b414c75 100644
--- a/hw/virtio/vhost-user-snd.c
+++ b/hw/virtio/vhost-user-snd.c
@@ -16,6 +16,18 @@
#include "standard-headers/linux/virtio_ids.h"
#include "standard-headers/linux/virtio_snd.h"
+static const VirtIOFeature feature_sizes[] = {
+ {.flags = 1ULL << VIRTIO_SND_F_CTLS,
+ .end = endof(struct virtio_snd_config, controls)},
+ {}
+};
+
+static const VirtIOConfigSizeParams cfg_size_params = {
+ .min_size = endof(struct virtio_snd_config, chmaps),
+ .max_size = sizeof(struct virtio_snd_config),
+ .feature_sizes = feature_sizes
+};
+
static const VMStateDescription vu_snd_vmstate = {
.name = "vhost-user-snd",
.unmigratable = 1,
@@ -23,16 +35,20 @@ static const VMStateDescription vu_snd_vmstate = {
static const Property vsnd_properties[] = {
DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
+ DEFINE_PROP_BIT64("controls", VHostUserBase,
+ parent_obj.host_features, VIRTIO_SND_F_CTLS, false),
};
static void vu_snd_base_realize(DeviceState *dev, Error **errp)
{
VHostUserBase *vub = VHOST_USER_BASE(dev);
VHostUserBaseClass *vubs = VHOST_USER_BASE_GET_CLASS(dev);
+ VirtIODevice *vdev = &vub->parent_obj;
vub->virtio_id = VIRTIO_ID_SOUND;
vub->num_vqs = 4;
- vub->config_size = sizeof(struct virtio_snd_config);
+ vub->config_size = virtio_get_config_size(&cfg_size_params,
+ vdev->host_features);
vub->vq_size = 64;
vubs->parent_realize(dev, errp);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3cdaa12..7efbde3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -360,14 +360,20 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
llsize = int128_sub(llend, int128_make64(iova));
if (s->shadow_data) {
int r;
+ hwaddr gpa = section->offset_within_address_space;
- mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
mem_region.size = int128_get64(llsize) - 1,
mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
- r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
+ r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa);
if (unlikely(r != IOVA_OK)) {
error_report("Can't allocate a mapping (%d)", r);
+
+ if (mem_region.translated_addr == gpa) {
+ error_report("Insertion to GPA->IOVA tree failed");
+ /* Remove the mapping from the IOVA-only tree */
+ goto fail_map;
+ }
goto fail;
}
@@ -386,7 +392,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
fail_map:
if (s->shadow_data) {
- vhost_iova_tree_remove(s->iova_tree, mem_region);
+ vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
}
fail:
@@ -440,21 +446,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
if (s->shadow_data) {
const DMAMap *result;
- const void *vaddr = memory_region_get_ram_ptr(section->mr) +
- section->offset_within_region +
- (iova - section->offset_within_address_space);
DMAMap mem_region = {
- .translated_addr = (hwaddr)(uintptr_t)vaddr,
+ .translated_addr = section->offset_within_address_space,
.size = int128_get64(llsize) - 1,
};
- result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
+ result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region);
if (!result) {
/* The memory listener map wasn't mapped */
return;
}
iova = result->iova;
- vhost_iova_tree_remove(s->iova_tree, *result);
+ vhost_iova_tree_remove_gpa(s->iova_tree, *result);
}
vhost_vdpa_iotlb_batch_begin_once(s);
/*
@@ -1142,16 +1145,23 @@ static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
*
* @v: Vhost-vdpa device
* @needle: The area to search iova
+ * @taddr: The translated address (HVA)
* @errorp: Error pointer
*/
static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
- Error **errp)
+ hwaddr taddr, Error **errp)
{
int r;
- r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle);
+ r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle, taddr);
if (unlikely(r != IOVA_OK)) {
error_setg(errp, "Cannot allocate iova (%d)", r);
+
+ if (needle->translated_addr == taddr) {
+ error_append_hint(errp, "Insertion to IOVA->HVA tree failed");
+ /* Remove the mapping from the IOVA-only tree */
+ vhost_iova_tree_remove(v->shared->iova_tree, *needle);
+ }
return false;
}
@@ -1192,11 +1202,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
vhost_svq_get_vring_addr(svq, &svq_addr);
driver_region = (DMAMap) {
- .translated_addr = svq_addr.desc_user_addr,
.size = driver_size - 1,
.perm = IOMMU_RO,
};
- ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, svq_addr.desc_user_addr,
+ errp);
if (unlikely(!ok)) {
error_prepend(errp, "Cannot create vq driver region: ");
return false;
@@ -1206,11 +1216,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
addr->avail_user_addr = driver_region.iova + avail_offset;
device_region = (DMAMap) {
- .translated_addr = svq_addr.used_user_addr,
.size = device_size - 1,
.perm = IOMMU_RW,
};
- ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, svq_addr.used_user_addr,
+ errp);
if (unlikely(!ok)) {
error_prepend(errp, "Cannot create vq device region: ");
vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr);
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index ad05768..2eb5a14 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -31,7 +31,7 @@
#include "trace.h"
#include "qemu/error-report.h"
#include "migration/misc.h"
-
+#include "system/reset.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
@@ -910,6 +910,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
}
reset_stats(s);
+ s->stats_last_update = 0;
+ qemu_register_resettable(OBJECT(dev));
}
static void virtio_balloon_device_unrealize(DeviceState *dev)
@@ -917,6 +919,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
+ qemu_unregister_resettable(OBJECT(dev));
if (s->free_page_bh) {
qemu_bh_delete(s->free_page_bh);
object_unref(OBJECT(s->iothread));
@@ -987,6 +990,27 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
}
}
+static ResettableState *virtio_balloon_get_reset_state(Object *obj)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+ return &s->reset_state;
+}
+
+static void virtio_balloon_reset_enter(Object *obj, ResetType type)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+
+ /*
+ * When waking up from standby/suspend-to-ram, do not reset stats.
+ */
+ if (type == RESET_TYPE_WAKEUP) {
+ return;
+ }
+
+ reset_stats(s);
+ s->stats_last_update = 0;
+}
+
static void virtio_balloon_instance_init(Object *obj)
{
VirtIOBalloon *s = VIRTIO_BALLOON(obj);
@@ -1038,6 +1062,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
device_class_set_props(dc, virtio_balloon_properties);
dc->vmsd = &vmstate_virtio_balloon;
@@ -1050,6 +1075,9 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
vdc->get_features = virtio_balloon_get_features;
vdc->set_status = virtio_balloon_set_status;
vdc->vmsd = &vmstate_virtio_balloon_device;
+
+ rc->get_state = virtio_balloon_get_reset_state;
+ rc->phases.enter = virtio_balloon_reset_enter;
}
static const TypeInfo virtio_balloon_info = {
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index f41104a..b6e7e01 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1504,11 +1504,11 @@ static void virtio_iommu_device_unrealize(DeviceState *dev)
virtio_cleanup(vdev);
}
-static void virtio_iommu_device_reset(VirtIODevice *vdev)
+static void virtio_iommu_device_reset_exit(Object *obj, ResetType type)
{
- VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+ VirtIOIOMMU *s = VIRTIO_IOMMU(obj);
- trace_virtio_iommu_device_reset();
+ trace_virtio_iommu_device_reset_exit();
if (s->domains) {
g_tree_destroy(s->domains);
@@ -1668,6 +1668,7 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
device_class_set_props(dc, virtio_iommu_properties);
dc->vmsd = &vmstate_virtio_iommu;
@@ -1675,7 +1676,12 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data)
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->realize = virtio_iommu_device_realize;
vdc->unrealize = virtio_iommu_device_unrealize;
- vdc->reset = virtio_iommu_device_reset;
+
+ /*
+ * Use 'exit' reset phase to make sure all DMA requests
+ * have been quiesced during 'enter' or 'hold' phase
+ */
+ rc->phases.exit = virtio_iommu_device_reset_exit;
vdc->get_config = virtio_iommu_get_config;
vdc->set_config = virtio_iommu_set_config;
vdc->get_features = virtio_iommu_get_features;
diff --git a/hw/virtio/virtio-nsm.c b/hw/virtio/virtio-nsm.c
index 098e1ae..b22aa74 100644
--- a/hw/virtio/virtio-nsm.c
+++ b/hw/virtio/virtio-nsm.c
@@ -1596,7 +1596,7 @@ static void handle_input(VirtIODevice *vdev, VirtQueue *vq)
g_free(req.iov_base);
g_free(res.iov_base);
virtqueue_push(vq, out_elem, 0);
- virtqueue_push(vq, in_elem, in_elem->in_sg->iov_len);
+ virtqueue_push(vq, in_elem, sz);
virtio_notify(vdev, vq);
return;