diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-02-22 05:06:39 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-02-22 05:06:39 +0800 |
commit | b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124 (patch) | |
tree | 9620500ad85d8368314501a3969e20624916db17 /hw/pci | |
parent | f41af4c5857b6983766aaffc041580ff170d0679 (diff) | |
parent | dd6d545e8f2d9a0e8a8c287ec16469f03ef5c198 (diff) | |
download | qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.zip qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.gz qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.bz2 |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes, cleanups
Features:
SR-IOV emulation for pci
virtio-mem-pci support for s390
interleave support for cxl
big endian support for vdpa svq
new QAPI events for vhost-user
Also vIOMMU reset order fixups are in.
Fixes, cleanups all over the place.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAme4b8sPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpHKcIAKPJsVqPdda2dJ7b7FdyRT0Q+uwezXqaGHd4
# 7Lzih1wsxYNkwIAyPtEb76/21qiS7BluqlUCfCB66R9xWjP5/KfvAFj4/r4AEduE
# fxAgYzotNpv55zcRbcflMyvQ42WGiZZHC+o5Lp7vDXUP3pIyHrl0Ydh5WmcD+hwS
# BjXvda58TirQpPJ7rUL+sSfLih17zQkkDcfv5/AgorDy1wK09RBKwMx/gq7wG8yJ
# twy8eBY2CmfmFD7eTM+EKqBD2T0kwLEeLfS/F/tl5Fyg6lAiYgYtCbGLpAmWErsg
# XZvfZmwqL7CNzWexGvPFnnLyqwC33WUP0k0kT88Y5wh3/h98blw=
# =tej8
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 21 Feb 2025 20:21:31 HKT
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (41 commits)
docs/devel/reset: Document reset expectations for DMA and IOMMU
hw/vfio/common: Add a trace point in vfio_reset_handler
hw/arm/smmuv3: Move reset to exit phase
hw/i386/intel-iommu: Migrate to 3-phase reset
hw/virtio/virtio-iommu: Migrate to 3-phase reset
vhost-user-snd: correct the calculation of config_size
net: vhost-user: add QAPI events to report connection state
hw/virtio/virtio-nsm: Respond with correct length
vdpa: Fix endian bugs in shadow virtqueue
MAINTAINERS: add more files to `vhost`
cryptodev/vhost: allocate CryptoDevBackendVhost using g_mem0()
vhost-iova-tree: Update documentation
vhost-iova-tree, svq: Implement GPA->IOVA & partial IOVA->HVA trees
vhost-iova-tree: Implement an IOVA-only tree
amd_iommu: Use correct bitmask to set capability BAR
amd_iommu: Use correct DTE field for interrupt passthrough
hw/virtio: reset virtio balloon stats on machine reset
mem/cxl_type3: support 3, 6, 12 and 16 interleave ways
hw/mem/cxl_type3: Ensure errp is set on realization failure
hw/mem/cxl_type3: Fix special_ops memory leak on msix_init_exclusive_bar() failure
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/pci')
-rw-r--r-- | hw/pci/msix.c | 9 | ||||
-rw-r--r-- | hw/pci/pci.c | 22 | ||||
-rw-r--r-- | hw/pci/pcie_sriov.c | 159 | ||||
-rw-r--r-- | hw/pci/trace-events | 2 |
4 files changed, 117 insertions, 75 deletions
diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 57ec708..66f27b9 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -15,6 +15,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/pci/pci.h" @@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, static void msix_pba_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { + PCIDevice *dev = opaque; + + qemu_log_mask(LOG_GUEST_ERROR, + "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X " + "PBA at 0x%" FMT_PCIBUS ", ignoring.\n", + pci_root_bus_path(dev), pci_dev_bus_num(dev), + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + addr); } static const MemoryRegionOps msix_pba_mmio_ops = { diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 2afa423..1d42847 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -803,10 +803,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id) return !pci_is_express((PCIDevice *)opaque); } +static int pci_post_load(void *opaque, int version_id) +{ + pcie_sriov_pf_post_load(opaque); + return 0; +} + const VMStateDescription vmstate_pci_device = { .name = "PCIDevice", .version_id = 2, .minimum_version_id = 1, + .post_load = pci_post_load, .fields = (const VMStateField[]) { VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice, @@ -1391,6 +1398,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); r = &pci_dev->io_regions[region_num]; + assert(!r->size); r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; @@ -2963,7 +2971,17 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) void pci_set_power(PCIDevice *d, bool state) { - pci_set_enabled(d, state); + /* + * Don't change the enabled state of VFs when powering on/off the device. + * + * When powering on, VFs must not be enabled immediately but they must + * wait until the guest configures SR-IOV. + * When powering off, their corresponding PFs will be reset and disable + * VFs. + */ + if (!pci_is_vf(d)) { + pci_set_enabled(d, state); + } } void pci_set_enabled(PCIDevice *d, bool state) @@ -2977,7 +2995,7 @@ void pci_set_enabled(PCIDevice *d, bool state) memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_MASTER) && d->enabled); - if (!d->enabled) { + if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index e9b2322..1eb4358 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,23 +20,37 @@ #include "qapi/error.h" #include "trace.h" -static PCIDevice *register_vf(PCIDevice *pf, int devfn, - const char *name, uint16_t vf_num); -static void unregister_vfs(PCIDevice *dev); +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) +{ + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); + } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; +} -void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride) + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) { + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (total_vfs && + (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) { + error_setg(errp, "VF addr overflows"); + return false; + } + pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; - dev->exp.sriov_pf.num_vfs = 0; - dev->exp.sriov_pf.vfname = g_strdup(vfname); dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -69,13 +83,37 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); qdev_prop_set_bit(&dev->qdev, "multifunction", true); + + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = pci_new(devfn, vfname); + vf->exp.sriov_vf.pf = dev; + vf->exp.sriov_vf.vf_number = i; + + if (!qdev_realize(&vf->qdev, bus, errp)) { + object_unparent(OBJECT(vf)); + object_unref(vf); + unparent_vfs(dev, i); + return false; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vf->config, 0xffff); + pci_config_set_device_id(vf->config, 0xffff); + + dev->exp.sriov_pf.vf[i] = vf; + devfn += vf_stride; + } + + return true; } void pcie_sriov_pf_exit(PCIDevice *dev) { - unregister_vfs(dev); - g_free((char *)dev->exp.sriov_pf.vfname); - dev->exp.sriov_pf.vfname = NULL; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -141,80 +179,36 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, - uint16_t vf_num) -{ - PCIDevice *dev = pci_new(devfn, name); - dev->exp.sriov_vf.pf = pf; - dev->exp.sriov_vf.vf_number = vf_num; - PCIBus *bus = pci_get_bus(pf); - Error *local_err = NULL; - - qdev_realize(&dev->qdev, &bus->qbus, &local_err); - if (local_err) { - error_report_err(local_err); - return NULL; - } - - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(dev->config, 0xffff); - pci_config_set_device_id(dev->config, 0xffff); - - return dev; -} - static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; uint16_t i; uint16_t sriov_cap = dev->exp.sriov_cap; - uint16_t vf_offset = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); - uint16_t vf_stride = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - int32_t devfn = dev->devfn + vf_offset; assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); - if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - return; - } - - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, - dev->exp.sriov_pf.vfname, i); - if (!dev->exp.sriov_pf.vf[i]) { - num_vfs = i; - break; - } - devfn += vf_stride; + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } - dev->exp.sriov_pf.num_vfs = num_vfs; + + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); } static void unregister_vfs(PCIDevice *dev) { - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; uint16_t i; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - Error *err = NULL; - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { - error_reportf_err(err, "Failed to unplug: "); - } - object_unparent(OBJECT(vf)); - object_unref(OBJECT(vf)); + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } - g_free(dev->exp.sriov_pf.vf); - dev->exp.sriov_pf.vf = NULL; - dev->exp.sriov_pf.num_vfs = 0; + + pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -235,15 +229,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, PCI_FUNC(dev->devfn), off, val, len); if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (dev->exp.sriov_pf.num_vfs) { - if (!(val & PCI_SRIOV_CTRL_VFE)) { - unregister_vfs(dev); - } + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); } else { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } + unregister_vfs(dev); } + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { + uint8_t *cfg = dev->config + sriov_cap; + uint8_t *wmask = dev->wmask + sriov_cap; + uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); + uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; + + if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { + wmask_val |= PCI_SRIOV_CTRL_VFE; + } + + pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); + } +} + +void pcie_sriov_pf_post_load(PCIDevice *dev) +{ + if (dev->exp.sriov_cap) { + register_vfs(dev); } } @@ -260,6 +268,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev) unregister_vfs(dev); pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0); + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL, + PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI); /* * Default is to use 4K pages, software can modify it @@ -306,7 +316,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) { assert(!pci_is_vf(dev)); - if (n < dev->exp.sriov_pf.num_vfs) { + if (n < pcie_sriov_num_vfs(dev)) { return dev->exp.sriov_pf.vf[n]; } return NULL; @@ -314,5 +324,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) uint16_t pcie_sriov_num_vfs(PCIDevice *dev) { - return dev->exp.sriov_pf.num_vfs; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint8_t *cfg = dev->config + sriov_cap; + + return sriov_cap && + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 19643aa..e98f575 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask # hw/pci/pcie_sriov.c sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" # pcie.c |