aboutsummaryrefslogtreecommitdiff
path: root/hw/pci
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-02-22 05:06:39 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2025-02-22 05:06:39 +0800
commitb69801dd6b1eb4d107f7c2f643adf0a4e3ec9124 (patch)
tree9620500ad85d8368314501a3969e20624916db17 /hw/pci
parentf41af4c5857b6983766aaffc041580ff170d0679 (diff)
parentdd6d545e8f2d9a0e8a8c287ec16469f03ef5c198 (diff)
downloadqemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.zip
qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.gz
qemu-b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124.tar.bz2
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: features, fixes, cleanups Features: SR-IOV emulation for pci virtio-mem-pci support for s390 interleave support for cxl big endian support for vdpa svq new QAPI events for vhost-user Also vIOMMU reset order fixups are in. Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAme4b8sPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpHKcIAKPJsVqPdda2dJ7b7FdyRT0Q+uwezXqaGHd4 # 7Lzih1wsxYNkwIAyPtEb76/21qiS7BluqlUCfCB66R9xWjP5/KfvAFj4/r4AEduE # fxAgYzotNpv55zcRbcflMyvQ42WGiZZHC+o5Lp7vDXUP3pIyHrl0Ydh5WmcD+hwS # BjXvda58TirQpPJ7rUL+sSfLih17zQkkDcfv5/AgorDy1wK09RBKwMx/gq7wG8yJ # twy8eBY2CmfmFD7eTM+EKqBD2T0kwLEeLfS/F/tl5Fyg6lAiYgYtCbGLpAmWErsg # XZvfZmwqL7CNzWexGvPFnnLyqwC33WUP0k0kT88Y5wh3/h98blw= # =tej8 # -----END PGP SIGNATURE----- # gpg: Signature made Fri 21 Feb 2025 20:21:31 HKT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (41 commits) docs/devel/reset: Document reset expectations for DMA and IOMMU hw/vfio/common: Add a trace point in vfio_reset_handler hw/arm/smmuv3: Move reset to exit phase hw/i386/intel-iommu: Migrate to 3-phase reset hw/virtio/virtio-iommu: Migrate to 3-phase reset vhost-user-snd: correct the calculation of config_size net: vhost-user: add QAPI events to report connection state hw/virtio/virtio-nsm: Respond with correct length vdpa: Fix endian bugs in shadow virtqueue MAINTAINERS: add more files to `vhost` cryptodev/vhost: allocate CryptoDevBackendVhost using g_mem0() vhost-iova-tree: Update documentation vhost-iova-tree, svq: Implement GPA->IOVA & partial IOVA->HVA trees vhost-iova-tree: Implement an IOVA-only tree amd_iommu: Use correct bitmask to set capability BAR amd_iommu: Use correct DTE field for interrupt passthrough hw/virtio: reset virtio balloon stats on machine reset mem/cxl_type3: support 3, 6, 12 and 16 interleave ways hw/mem/cxl_type3: Ensure errp is set on realization failure hw/mem/cxl_type3: Fix special_ops memory leak on msix_init_exclusive_bar() failure ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/pci')
-rw-r--r--hw/pci/msix.c9
-rw-r--r--hw/pci/pci.c22
-rw-r--r--hw/pci/pcie_sriov.c159
-rw-r--r--hw/pci/trace-events2
4 files changed, 117 insertions, 75 deletions
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 57ec708..66f27b9 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -15,6 +15,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/log.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci.h"
@@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
static void msix_pba_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
+ PCIDevice *dev = opaque;
+
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
+ "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
+ pci_root_bus_path(dev), pci_dev_bus_num(dev),
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
+ addr);
}
static const MemoryRegionOps msix_pba_mmio_ops = {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423..1d42847 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -803,10 +803,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
return !pci_is_express((PCIDevice *)opaque);
}
+static int pci_post_load(void *opaque, int version_id)
+{
+ pcie_sriov_pf_post_load(opaque);
+ return 0;
+}
+
const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
+ .post_load = pci_post_load,
.fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -1391,6 +1398,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
+ assert(!r->size);
r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
@@ -2963,7 +2971,17 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- pci_set_enabled(d, state);
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(d)) {
+ pci_set_enabled(d, state);
+ }
}
void pci_set_enabled(PCIDevice *d, bool state)
@@ -2977,7 +2995,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
& PCI_COMMAND_MASTER) && d->enabled);
- if (!d->enabled) {
+ if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index e9b2322..1eb4358 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,23 +20,37 @@
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride)
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (total_vfs &&
+ (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
+ error_setg(errp, "VF addr overflows");
+ return false;
+ }
+
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
- dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -69,13 +83,37 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ object_unparent(OBJECT(vf));
+ object_unref(vf);
+ unparent_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
+ return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -141,80 +179,36 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
-{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
- }
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
-
- return dev;
-}
-
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
uint16_t i;
uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
- return;
- }
-
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
- }
- devfn += vf_stride;
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
- dev->exp.sriov_pf.num_vfs = num_vfs;
+
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
}
static void unregister_vfs(PCIDevice *dev)
{
- uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
uint16_t i;
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- Error *err = NULL;
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
- error_reportf_err(err, "Failed to unplug: ");
- }
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
- dev->exp.sriov_pf.num_vfs = 0;
+
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -235,15 +229,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
PCI_FUNC(dev->devfn), off, val, len);
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (dev->exp.sriov_pf.num_vfs) {
- if (!(val & PCI_SRIOV_CTRL_VFE)) {
- unregister_vfs(dev);
- }
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
} else {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- }
+ unregister_vfs(dev);
}
+ } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
+ uint8_t *cfg = dev->config + sriov_cap;
+ uint8_t *wmask = dev->wmask + sriov_cap;
+ uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
+ uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
+
+ if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
+ wmask_val |= PCI_SRIOV_CTRL_VFE;
+ }
+
+ pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
+ }
+}
+
+void pcie_sriov_pf_post_load(PCIDevice *dev)
+{
+ if (dev->exp.sriov_cap) {
+ register_vfs(dev);
}
}
@@ -260,6 +268,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev)
unregister_vfs(dev);
pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL,
+ PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
/*
* Default is to use 4K pages, software can modify it
@@ -306,7 +316,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
{
assert(!pci_is_vf(dev));
- if (n < dev->exp.sriov_pf.num_vfs) {
+ if (n < pcie_sriov_num_vfs(dev)) {
return dev->exp.sriov_pf.vf[n];
}
return NULL;
@@ -314,5 +324,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
{
- return dev->exp.sriov_pf.num_vfs;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint8_t *cfg = dev->config + sriov_cap;
+
+ return sriov_cap &&
+ (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
+ pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa..e98f575 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
# hw/pci/pcie_sriov.c
sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
# pcie.c