aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-05-15 13:41:56 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2025-05-15 13:41:56 -0400
commit21596064081e8d0c0153f68714981c7f0e040973 (patch)
tree4598685872d139d53c780aebfa9f4b972e2bff7b
parent92fbc2ffc92f387c2ccb00b38ac800ca924c079a (diff)
parent28931c2e1591deb4bfaaf744fdc8813e96c230f1 (diff)
downloadqemu-21596064081e8d0c0153f68714981c7f0e040973.zip
qemu-21596064081e8d0c0153f68714981c7f0e040973.tar.gz
qemu-21596064081e8d0c0153f68714981c7f0e040973.tar.bz2
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pci,pc: fixes, features vhost-scsi now supports scsi hotplug cxl gained a bag of new operations, motably media operations virtio-net now supports SR-IOV emulation pci-testdev now supports backing memory bar with host memory amd iommu now supports migration fixes all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCgAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmgkg0UPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpcDIH+wbrq7DzG+BVOraYtmD69BQCzYszby1mAWry # 2OUYuAx9Oh+DsAwbzwbBdh9+SmJoi1oJ/d8rzSK328hdDrpCaPmc7bcBdAWJ3YcB # bGNPyJ+9eJLRXtlceGIhfAOMLIB0ugXGkHLQ61zlVCTg4Xwnj7/dQp2tAQ1BkTwW # Azc7ujBoJOBF3WVpa1Pqw0t1m3K74bwanOlkIg/JUWXk27sgP2YMnyrcpOu9Iz1T # VazgobyHo5y15V0wvd05w4Bk7cJSHwgW+y3DtgTtIffetIaAbSRgl3Pl5Ic1yKcX # ofg9aDFN6m0S8tv4WgFc+rT3Xaa/aPue9awjD5sEEldRasWKKNo= # =847R # -----END PGP SIGNATURE----- # gpg: Signature made Wed 14 May 2025 07:49:25 EDT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (27 commits) hw/i386/amd_iommu: Allow migration when explicitly create the AMDVI-PCI device hw/i386/amd_iommu: Isolate AMDVI-PCI from amd-iommu device to allow full control over the PCI device creation intel_iommu: Take locks when looking for and creating address spaces intel_iommu: Use BQL_LOCK_GUARD to manage cleanup automatically virtio: Move virtio_reset() virtio: Call set_features during reset vhost-scsi: support VIRTIO_SCSI_F_HOTPLUG vhost-user: return failure if backend crash when live migration vhost: return failure if stop virtqueue failed in vhost_dev_stop system/runstate: add VM state change cb with return value pci-testdev.c: Add membar-backed option for backing membar pcie_sriov: Make a PCI device with user-created VF ARI-capable docs: Document composable SR-IOV device virtio-net: Implement SR-IOV VF virtio-pci: Implement SR-IOV PF pcie_sriov: Allow user to create SR-IOV device pcie_sriov: Check PCI Express for SR-IOV PF pcie_sriov: Ensure PF and VF are mutually exclusive hw/pci: Fix SR-IOV VF number calculation hw/pci: Do not add ROM BAR for SR-IOV VF ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--MAINTAINERS1
-rw-r--r--backends/vhost-user.c20
-rw-r--r--docs/system/devices/cxl.rst18
-rw-r--r--docs/system/index.rst1
-rw-r--r--docs/system/sriov.rst37
-rw-r--r--hw/block/vhost-user-blk.c27
-rw-r--r--hw/block/virtio-blk.c7
-rw-r--r--hw/char/virtio-serial-bus.c3
-rw-r--r--hw/core/vm-change-state-handler.c18
-rw-r--r--hw/cxl/cxl-device-utils.c14
-rw-r--r--hw/cxl/cxl-mailbox-utils.c623
-rw-r--r--hw/display/vhost-user-gpu.c12
-rw-r--r--hw/i386/acpi-build.c8
-rw-r--r--hw/i386/amd_iommu.c101
-rw-r--r--hw/i386/amd_iommu.h5
-rw-r--r--hw/i386/intel_iommu.c35
-rw-r--r--hw/input/virtio-input.c3
-rw-r--r--hw/mem/cxl_type3.c22
-rw-r--r--hw/misc/pci-testdev.c12
-rw-r--r--hw/net/virtio-net.c3
-rw-r--r--hw/pci/pci.c76
-rw-r--r--hw/pci/pcie_sriov.c294
-rw-r--r--hw/scsi/scsi-bus.c2
-rw-r--r--hw/scsi/vhost-scsi-common.c13
-rw-r--r--hw/scsi/vhost-scsi.c8
-rw-r--r--hw/scsi/vhost-user-scsi.c18
-rw-r--r--hw/vfio/migration.c2
-rw-r--r--hw/virtio/vdpa-dev.c5
-rw-r--r--hw/virtio/vhost-user-base.c23
-rw-r--r--hw/virtio/vhost-user-fs.c23
-rw-r--r--hw/virtio/vhost-user-scmi.c27
-rw-r--r--hw/virtio/vhost-user-vsock.c15
-rw-r--r--hw/virtio/vhost-vsock-common.c12
-rw-r--r--hw/virtio/vhost-vsock.c11
-rw-r--r--hw/virtio/vhost.c23
-rw-r--r--hw/virtio/virtio-balloon.c3
-rw-r--r--hw/virtio/virtio-crypto.c3
-rw-r--r--hw/virtio/virtio-iommu.c3
-rw-r--r--hw/virtio/virtio-net-pci.c1
-rw-r--r--hw/virtio/virtio-pci.c24
-rw-r--r--hw/virtio/virtio-rng.c5
-rw-r--r--hw/virtio/virtio.c108
-rw-r--r--include/hw/cxl/cxl_device.h23
-rw-r--r--include/hw/cxl/cxl_mailbox.h1
-rw-r--r--include/hw/pci/pci_device.h6
-rw-r--r--include/hw/pci/pcie_sriov.h21
-rw-r--r--include/hw/virtio/vhost-scsi-common.h2
-rw-r--r--include/hw/virtio/vhost-vsock-common.h2
-rw-r--r--include/hw/virtio/vhost.h8
-rw-r--r--include/hw/virtio/virtio-pci.h1
-rw-r--r--include/hw/virtio/virtio.h2
-rw-r--r--include/system/runstate.h13
-rw-r--r--include/system/vhost-user-backend.h2
-rw-r--r--system/cpus.c8
-rw-r--r--system/runstate.c35
55 files changed, 1437 insertions, 356 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 6dacd6d..b579358 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2058,6 +2058,7 @@ F: hw/pci-bridge/*
F: qapi/pci.json
F: docs/pci*
F: docs/specs/*pci*
+F: docs/system/sriov.rst
PCIE DOE
M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw>
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 94274a6..4284532 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -97,30 +97,28 @@ err_host_notifiers:
vhost_dev_disable_notifiers(&b->dev, b->vdev);
}
-void
+int
vhost_user_backend_stop(VhostUserBackend *b)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
- int ret = 0;
+ int ret;
if (!b->started) {
- return;
+ return 0;
}
- vhost_dev_stop(&b->dev, b->vdev, true);
+ ret = vhost_dev_stop(&b->dev, b->vdev, true);
- if (k->set_guest_notifiers) {
- ret = k->set_guest_notifiers(qbus->parent,
- b->dev.nvqs, false);
- if (ret < 0) {
- error_report("vhost guest notifier cleanup failed: %d", ret);
- }
+ if (k->set_guest_notifiers &&
+ k->set_guest_notifiers(qbus->parent, b->dev.nvqs, false) < 0) {
+ error_report("vhost guest notifier cleanup failed: %d", ret);
+ return -1;
}
- assert(ret >= 0);
vhost_dev_disable_notifiers(&b->dev, b->vdev);
b->started = false;
+ return ret;
}
static void set_chardev(Object *obj, const char *value, Error **errp)
diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index 882b036..e307caf 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -308,7 +308,7 @@ A very simple setup with just one directly attached CXL Type 3 Persistent Memory
-object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
-device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
- -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
+ -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0,sn=0x1 \
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
A very simple setup with just one directly attached CXL Type 3 Volatile Memory device::
@@ -349,13 +349,13 @@ the CXL Type3 device directly attached (no switches).::
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
-device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \
-device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
- -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
+ -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0,sn=0x1 \
-device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \
- -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
+ -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1,sn=0x2 \
-device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \
- -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
+ -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2,sn=0x3 \
-device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \
- -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
+ -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3,sn=0x4 \
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k
An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
@@ -375,13 +375,13 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
-device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
-device cxl-upstream,bus=root_port0,id=us0 \
-device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
- -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \
+ -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,sn=0x1 \
-device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
- -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \
+ -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,sn=0x2 \
-device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
- -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \
+ -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,sn=0x3 \
-device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
- -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \
+ -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
Deprecations
diff --git a/docs/system/index.rst b/docs/system/index.rst
index c21065e..718e9d3 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -39,3 +39,4 @@ or Hypervisor.Framework.
multi-process
confidential-guest-support
vm-templating
+ sriov
diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst
new file mode 100644
index 0000000..d12178f
--- /dev/null
+++ b/docs/system/sriov.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Compsable SR-IOV device
+=======================
+
+SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a
+PCI Express device. It allows a single physical function (PF) to appear as
+multiple virtual functions (VFs) for the main purpose of eliminating software
+overhead in I/O from virtual machines.
+
+There are devices with predefined SR-IOV configurations, but it is also possible
+to compose an SR-IOV device yourself. Composing an SR-IOV device is currently
+only supported by virtio-net-pci.
+
+Users can configure an SR-IOV-capable virtio-net device by adding
+virtio-net-pci functions to a bus. Below is a command line example:
+
+.. code-block:: shell
+
+ -netdev user,id=n -netdev user,id=o
+ -netdev user,id=p -netdev user,id=q
+ -device pcie-root-port,id=b
+ -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f
+
+The VFs specify the paired PF with ``sriov-pf`` property. The PF must be
+added after all VFs. It is the user's responsibility to ensure that VFs have
+function numbers larger than one of the PF, and that the function numbers
+have a consistent stride. Both the PF and VFs are ARI-capable so you can have
+255 VFs at maximum.
+
+You may also need to perform additional steps to activate the SR-IOV feature on
+your guest. For Linux, refer to [1]_.
+
+.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 4bb5ed2..0eebbcd 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -204,7 +204,7 @@ err_host_notifiers:
return ret;
}
-static void vhost_user_blk_stop(VirtIODevice *vdev)
+static int vhost_user_blk_stop(VirtIODevice *vdev)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -212,26 +212,26 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
int ret;
if (!s->started_vu) {
- return;
+ return 0;
}
s->started_vu = false;
if (!k->set_guest_notifiers) {
- return;
+ return 0;
}
- vhost_dev_stop(&s->dev, vdev, true);
+ ret = vhost_dev_stop(&s->dev, vdev, true);
- ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
- if (ret < 0) {
+ if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
- return;
+ return -1;
}
vhost_dev_disable_notifiers(&s->dev, vdev);
+ return ret;
}
-static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
bool should_start = virtio_device_should_start(vdev, status);
@@ -239,11 +239,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
int ret;
if (!s->connected) {
- return;
+ return -1;
}
if (vhost_dev_is_started(&s->dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
@@ -253,9 +253,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
qemu_chr_fe_disconnect(&s->chardev);
}
} else {
- vhost_user_blk_stop(vdev);
+ ret = vhost_user_blk_stop(vdev);
+ if (ret < 0) {
+ return ret;
+ }
}
-
+ return 0;
}
static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index b54d01d..9bab271 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1270,7 +1270,7 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
return features;
}
-static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
+static int virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
@@ -1279,7 +1279,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
}
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- return;
+ return 0;
}
/* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
@@ -1302,6 +1302,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
virtio_vdev_has_feature(vdev,
VIRTIO_BLK_F_WCE));
}
+ return 0;
}
static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
@@ -1802,7 +1803,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
* called after ->start_ioeventfd() has already set blk's AioContext.
*/
s->change =
- qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);
+ qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, NULL, s);
blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index eb79f52..673c50f 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -622,7 +622,7 @@ static void guest_reset(VirtIOSerial *vser)
}
}
-static void set_status(VirtIODevice *vdev, uint8_t status)
+static int set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOSerial *vser;
VirtIOSerialPort *port;
@@ -650,6 +650,7 @@ static void set_status(VirtIODevice *vdev, uint8_t status)
vsc->enable_backend(port, vdev->vm_running);
}
}
+ return 0;
}
static void vser_reset(VirtIODevice *vdev)
diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c
index 7064995..99c642b 100644
--- a/hw/core/vm-change-state-handler.c
+++ b/hw/core/vm-change-state-handler.c
@@ -40,6 +40,7 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
* qdev_add_vm_change_state_handler:
* @dev: the device that owns this handler
* @cb: the callback function to be invoked
+ * @cb_ret: the callback function with return value to be invoked
* @opaque: user data passed to the callback function
*
* This function works like qemu_add_vm_change_state_handler() except callbacks
@@ -50,25 +51,30 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
* controller's callback is invoked before the children on its bus when the VM
* starts running. The order is reversed when the VM stops running.
*
+ * Note that the parameter `cb` and `cb_ret` are mutually exclusive.
+ *
* Returns: an entry to be freed with qemu_del_vm_change_state_handler()
*/
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
VMChangeStateHandler *cb,
+ VMChangeStateHandlerWithRet *cb_ret,
void *opaque)
{
- return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque);
+ assert(!cb || !cb_ret);
+ return qdev_add_vm_change_state_handler_full(dev, cb, NULL, cb_ret, opaque);
}
/*
* Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
- * argument too.
+ * and the cb_ret arguments too.
*/
VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
- DeviceState *dev, VMChangeStateHandler *cb,
- VMChangeStateHandler *prepare_cb, void *opaque)
+ DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb,
+ VMChangeStateHandlerWithRet *cb_ret, void *opaque)
{
int depth = qdev_get_dev_tree_depth(dev);
- return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque,
- depth);
+ assert(!cb || !cb_ret);
+ return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, cb_ret,
+ opaque, depth);
}
diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 52ad1e4..e150d74 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -95,11 +95,15 @@ static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size)
}
if (offset == A_CXL_DEV_MAILBOX_STS) {
uint64_t status_reg = cxl_dstate->mbox_reg_state64[offset / size];
- if (cci->bg.complete_pct) {
- status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP,
- 0);
- cxl_dstate->mbox_reg_state64[offset / size] = status_reg;
- }
+ int bgop;
+
+ qemu_mutex_lock(&cci->bg.lock);
+ bgop = !(cci->bg.complete_pct == 100 || cci->bg.aborted);
+
+ status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP,
+ bgop);
+ cxl_dstate->mbox_reg_state64[offset / size] = status_reg;
+ qemu_mutex_unlock(&cci->bg.lock);
}
return cxl_dstate->mbox_reg_state64[offset / size];
default:
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 516c01d..299f232 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -7,6 +7,8 @@
* COPYING file in the top-level directory.
*/
+#include <math.h>
+
#include "qemu/osdep.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -26,6 +28,11 @@
#define CXL_DC_EVENT_LOG_SIZE 8
#define CXL_NUM_EXTENTS_SUPPORTED 512
#define CXL_NUM_TAGS_SUPPORTED 0
+#define CXL_ALERTS_LIFE_USED_WARN_THRESH (1 << 0)
+#define CXL_ALERTS_OVER_TEMP_WARN_THRESH (1 << 1)
+#define CXL_ALERTS_UNDER_TEMP_WARN_THRESH (1 << 2)
+#define CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH (1 << 3)
+#define CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH (1 << 4)
/*
* How to add a new command, example. The command set FOO, with cmd BAR.
@@ -56,6 +63,9 @@ enum {
INFOSTAT = 0x00,
#define IS_IDENTIFY 0x1
#define BACKGROUND_OPERATION_STATUS 0x2
+ #define GET_RESPONSE_MSG_LIMIT 0x3
+ #define SET_RESPONSE_MSG_LIMIT 0x4
+ #define BACKGROUND_OPERATION_ABORT 0x5
EVENTS = 0x01,
#define GET_RECORDS 0x0
#define CLEAR_RECORDS 0x1
@@ -81,9 +91,13 @@ enum {
#define GET_PARTITION_INFO 0x0
#define GET_LSA 0x2
#define SET_LSA 0x3
+ HEALTH_INFO_ALERTS = 0x42,
+ #define GET_ALERT_CONFIG 0x1
+ #define SET_ALERT_CONFIG 0x2
SANITIZE = 0x44,
#define OVERWRITE 0x0
#define SECURE_ERASE 0x1
+ #define MEDIA_OPERATIONS 0x2
PERSISTENT_MEM = 0x45,
#define GET_SECURITY_STATE 0x0
MEDIA_AND_POISON = 0x43,
@@ -412,12 +426,58 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd,
is_identify->component_type = 0x3; /* Type 3 */
}
- /* TODO: Allow this to vary across different CCIs */
- is_identify->max_message_size = 9; /* 512 bytes - MCTP_CXL_MAILBOX_BYTES */
+ is_identify->max_message_size = (uint8_t)log2(cci->payload_max);
*len_out = sizeof(*is_identify);
return CXL_MBOX_SUCCESS;
}
+/* CXL r3.1 section 8.2.9.1.3: Get Response Message Limit (Opcode 0003h) */
+static CXLRetCode cmd_get_response_msg_limit(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t rsp_limit;
+ } QEMU_PACKED *get_rsp_msg_limit = (void *)payload_out;
+ QEMU_BUILD_BUG_ON(sizeof(*get_rsp_msg_limit) != 1);
+
+ get_rsp_msg_limit->rsp_limit = (uint8_t)log2(cci->payload_max);
+
+ *len_out = sizeof(*get_rsp_msg_limit);
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.1 section 8.2.9.1.4: Set Response Message Limit (Opcode 0004h) */
+static CXLRetCode cmd_set_response_msg_limit(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t rsp_limit;
+ } QEMU_PACKED *in = (void *)payload_in;
+ QEMU_BUILD_BUG_ON(sizeof(*in) != 1);
+ struct {
+ uint8_t rsp_limit;
+ } QEMU_PACKED *out = (void *)payload_out;
+ QEMU_BUILD_BUG_ON(sizeof(*out) != 1);
+
+ if (in->rsp_limit < 8 || in->rsp_limit > 10) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ cci->payload_max = 1 << in->rsp_limit;
+ out->rsp_limit = in->rsp_limit;
+
+ *len_out = sizeof(*out);
+ return CXL_MBOX_SUCCESS;
+}
+
static void cxl_set_dsp_active_bm(PCIBus *b, PCIDevice *d,
void *private)
{
@@ -636,6 +696,41 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+/*
+ * CXL r3.1 Section 8.2.9.1.5:
+ * Request Abort Background Operation (Opcode 0005h)
+ */
+static CXLRetCode cmd_infostat_bg_op_abort(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ int bg_set = cci->bg.opcode >> 8;
+ int bg_cmd = cci->bg.opcode & 0xff;
+ const struct cxl_cmd *bg_c = &cci->cxl_cmd_set[bg_set][bg_cmd];
+
+ if (!(bg_c->effect & CXL_MBOX_BACKGROUND_OPERATION_ABORT)) {
+ return CXL_MBOX_REQUEST_ABORT_NOTSUP;
+ }
+
+ qemu_mutex_lock(&cci->bg.lock);
+ if (cci->bg.runtime) {
+ /* operation is near complete, let it finish */
+ if (cci->bg.complete_pct < 85) {
+ timer_del(cci->bg.timer);
+ cci->bg.ret_code = CXL_MBOX_ABORTED;
+ cci->bg.starttime = 0;
+ cci->bg.runtime = 0;
+ cci->bg.aborted = true;
+ }
+ }
+ qemu_mutex_unlock(&cci->bg.lock);
+
+ return CXL_MBOX_SUCCESS;
+}
+
#define CXL_FW_SLOTS 2
#define CXL_FW_SIZE 0x02000000 /* 32 mb */
@@ -1523,6 +1618,97 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */
+static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLAlertConfig *out = (CXLAlertConfig *)payload_out;
+
+ memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config));
+ *len_out = sizeof(ct3d->alert_config);
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */
+static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLAlertConfig *alert_config = &ct3d->alert_config;
+ struct {
+ uint8_t valid_alert_actions;
+ uint8_t enable_alert_actions;
+ uint8_t life_used_warn_thresh;
+ uint8_t rsvd;
+ uint16_t over_temp_warn_thresh;
+ uint16_t under_temp_warn_thresh;
+ uint16_t cor_vmem_err_warn_thresh;
+ uint16_t cor_pmem_err_warn_thresh;
+ } QEMU_PACKED *in = (void *)payload_in;
+
+ if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARN_THRESH) {
+ /*
+ * CXL r3.2 Table 8-149 The life used warning threshold shall be
+ * less than the life used critical alert value.
+ */
+ if (in->life_used_warn_thresh >=
+ alert_config->life_used_crit_alert_thresh) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ alert_config->life_used_warn_thresh = in->life_used_warn_thresh;
+ alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARN_THRESH;
+ }
+
+ if (in->valid_alert_actions & CXL_ALERTS_OVER_TEMP_WARN_THRESH) {
+ /*
+ * CXL r3.2 Table 8-149 The Device Over-Temperature Warning Threshold
+ * shall be less than the the Device Over-Temperature Critical
+ * Alert Threshold.
+ */
+ if (in->over_temp_warn_thresh >=
+ alert_config->over_temp_crit_alert_thresh) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ alert_config->over_temp_warn_thresh = in->over_temp_warn_thresh;
+ alert_config->enable_alerts |= CXL_ALERTS_OVER_TEMP_WARN_THRESH;
+ }
+
+ if (in->valid_alert_actions & CXL_ALERTS_UNDER_TEMP_WARN_THRESH) {
+ /*
+ * CXL r3.2 Table 8-149 The Device Under-Temperature Warning Threshold
+ * shall be higher than the the Device Under-Temperature Critical
+ * Alert Threshold.
+ */
+ if (in->under_temp_warn_thresh <=
+ alert_config->under_temp_crit_alert_thresh) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ alert_config->under_temp_warn_thresh = in->under_temp_warn_thresh;
+ alert_config->enable_alerts |= CXL_ALERTS_UNDER_TEMP_WARN_THRESH;
+ }
+
+ if (in->valid_alert_actions & CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH) {
+ alert_config->cor_vmem_err_warn_thresh = in->cor_vmem_err_warn_thresh;
+ alert_config->enable_alerts |= CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH;
+ }
+
+ if (in->valid_alert_actions & CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH) {
+ alert_config->cor_pmem_err_warn_thresh = in->cor_pmem_err_warn_thresh;
+ alert_config->enable_alerts |= CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH;
+ }
+ return CXL_MBOX_SUCCESS;
+}
+
/* Perform the actual device zeroing */
static void __do_sanitization(CXLType3Dev *ct3d)
{
@@ -1553,34 +1739,10 @@ static void __do_sanitization(CXLType3Dev *ct3d)
cxl_discard_all_event_records(&ct3d->cxl_dstate);
}
-/*
- * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h)
- *
- * Once the Sanitize command has started successfully, the device shall be
- * placed in the media disabled state. If the command fails or is interrupted
- * by a reset or power failure, it shall remain in the media disabled state
- * until a successful Sanitize command has been completed. During this state:
- *
- * 1. Memory writes to the device will have no effect, and all memory reads
- * will return random values (no user data returned, even for locations that
- * the failed Sanitize operation didn’t sanitize yet).
- *
- * 2. Mailbox commands shall still be processed in the disabled state, except
- * that commands that access Sanitized areas shall fail with the Media Disabled
- * error code.
- */
-static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
- uint8_t *payload_in,
- size_t len_in,
- uint8_t *payload_out,
- size_t *len_out,
- CXLCCI *cci)
+static int get_sanitize_duration(uint64_t total_mem)
{
- CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
- uint64_t total_mem; /* in Mb */
- int secs;
+ int secs = 0;
- total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20;
if (total_mem <= 512) {
secs = 4;
} else if (total_mem <= 1024) {
@@ -1609,6 +1771,39 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
secs = 240 * 60; /* max 4 hrs */
}
+ return secs;
+}
+
+/*
+ * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h)
+ *
+ * Once the Sanitize command has started successfully, the device shall be
+ * placed in the media disabled state. If the command fails or is interrupted
+ * by a reset or power failure, it shall remain in the media disabled state
+ * until a successful Sanitize command has been completed. During this state:
+ *
+ * 1. Memory writes to the device will have no effect, and all memory reads
+ * will return random values (no user data returned, even for locations that
+ * the failed Sanitize operation didn’t sanitize yet).
+ *
+ * 2. Mailbox commands shall still be processed in the disabled state, except
+ * that commands that access Sanitized areas shall fail with the Media Disabled
+ * error code.
+ */
+static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ uint64_t total_mem; /* in Mb */
+ int secs;
+
+ total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20;
+ secs = get_sanitize_duration(total_mem);
+
/* EBUSY other bg cmds as of now */
cci->bg.runtime = secs * 1000UL;
*len_out = 0;
@@ -1619,6 +1814,324 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
return CXL_MBOX_BG_STARTED;
}
+struct dpa_range_list_entry {
+ uint64_t starting_dpa;
+ uint64_t length;
+} QEMU_PACKED;
+
+struct CXLSanitizeInfo {
+ uint32_t dpa_range_count;
+ uint8_t fill_value;
+ struct dpa_range_list_entry dpa_range_list[];
+} QEMU_PACKED;
+
+static uint64_t get_vmr_size(CXLType3Dev *ct3d, MemoryRegion **vmr)
+{
+ MemoryRegion *mr;
+ if (ct3d->hostvmem) {
+ mr = host_memory_backend_get_memory(ct3d->hostvmem);
+ if (vmr) {
+ *vmr = mr;
+ }
+ return memory_region_size(mr);
+ }
+ return 0;
+}
+
+static uint64_t get_pmr_size(CXLType3Dev *ct3d, MemoryRegion **pmr)
+{
+ MemoryRegion *mr;
+ if (ct3d->hostpmem) {
+ mr = host_memory_backend_get_memory(ct3d->hostpmem);
+ if (pmr) {
+ *pmr = mr;
+ }
+ return memory_region_size(mr);
+ }
+ return 0;
+}
+
+static uint64_t get_dc_size(CXLType3Dev *ct3d, MemoryRegion **dc_mr)
+{
+ MemoryRegion *mr;
+ if (ct3d->dc.host_dc) {
+ mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+ if (dc_mr) {
+ *dc_mr = mr;
+ }
+ return memory_region_size(mr);
+ }
+ return 0;
+}
+
+static int validate_dpa_addr(CXLType3Dev *ct3d, uint64_t dpa_addr,
+ size_t length)
+{
+ uint64_t vmr_size, pmr_size, dc_size;
+
+ if ((dpa_addr % CXL_CACHE_LINE_SIZE) ||
+ (length % CXL_CACHE_LINE_SIZE) ||
+ (length <= 0)) {
+ return -EINVAL;
+ }
+
+ vmr_size = get_vmr_size(ct3d, NULL);
+ pmr_size = get_pmr_size(ct3d, NULL);
+ dc_size = get_dc_size(ct3d, NULL);
+
+ if (dpa_addr + length > vmr_size + pmr_size + dc_size) {
+ return -EINVAL;
+ }
+
+ if (dpa_addr > vmr_size + pmr_size) {
+ if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) {
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static int sanitize_range(CXLType3Dev *ct3d, uint64_t dpa_addr, size_t length,
+ uint8_t fill_value)
+{
+
+ uint64_t vmr_size, pmr_size;
+ AddressSpace *as = NULL;
+ MemTxAttrs mem_attrs = {};
+
+ vmr_size = get_vmr_size(ct3d, NULL);
+ pmr_size = get_pmr_size(ct3d, NULL);
+
+ if (dpa_addr < vmr_size) {
+ as = &ct3d->hostvmem_as;
+ } else if (dpa_addr < vmr_size + pmr_size) {
+ as = &ct3d->hostpmem_as;
+ } else {
+ if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) {
+ return -ENODEV;
+ }
+ as = &ct3d->dc.host_dc_as;
+ }
+
+ return address_space_set(as, dpa_addr, fill_value, length, mem_attrs);
+}
+
+/* Perform the actual device zeroing */
+static void __do_sanitize(CXLType3Dev *ct3d)
+{
+ struct CXLSanitizeInfo *san_info = ct3d->media_op_sanitize;
+ int dpa_range_count = san_info->dpa_range_count;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < dpa_range_count; i++) {
+ rc = sanitize_range(ct3d, san_info->dpa_range_list[i].starting_dpa,
+ san_info->dpa_range_list[i].length,
+ san_info->fill_value);
+ if (rc) {
+ goto exit;
+ }
+ }
+exit:
+ g_free(ct3d->media_op_sanitize);
+ ct3d->media_op_sanitize = NULL;
+ return;
+}
+
+enum {
+ MEDIA_OP_CLASS_GENERAL = 0x0,
+ #define MEDIA_OP_GEN_SUBC_DISCOVERY 0x0
+ MEDIA_OP_CLASS_SANITIZE = 0x1,
+ #define MEDIA_OP_SAN_SUBC_SANITIZE 0x0
+ #define MEDIA_OP_SAN_SUBC_ZERO 0x1
+};
+
+struct media_op_supported_list_entry {
+ uint8_t media_op_class;
+ uint8_t media_op_subclass;
+};
+
+struct media_op_discovery_out_pl {
+ uint64_t dpa_range_granularity;
+ uint16_t total_supported_operations;
+ uint16_t num_of_supported_operations;
+ struct media_op_supported_list_entry entry[];
+} QEMU_PACKED;
+
+static const struct media_op_supported_list_entry media_op_matrix[] = {
+ { MEDIA_OP_CLASS_GENERAL, MEDIA_OP_GEN_SUBC_DISCOVERY },
+ { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_SANITIZE },
+ { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_ZERO },
+};
+
+static CXLRetCode media_operations_discovery(uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out)
+{
+ struct {
+ uint8_t media_operation_class;
+ uint8_t media_operation_subclass;
+ uint8_t rsvd[2];
+ uint32_t dpa_range_count;
+ struct {
+ uint16_t start_index;
+ uint16_t num_ops;
+ } discovery_osa;
+ } QEMU_PACKED *media_op_in_disc_pl = (void *)payload_in;
+ struct media_op_discovery_out_pl *media_out_pl =
+ (struct media_op_discovery_out_pl *)payload_out;
+ int num_ops, start_index, i;
+ int count = 0;
+
+ if (len_in < sizeof(*media_op_in_disc_pl)) {
+ return CXL_MBOX_INVALID_PAYLOAD_LENGTH;
+ }
+
+ num_ops = media_op_in_disc_pl->discovery_osa.num_ops;
+ start_index = media_op_in_disc_pl->discovery_osa.start_index;
+
+ /*
+ * As per spec CXL r3.2 8.2.10.9.5.3 dpa_range_count should be zero and
+ * start index should not exceed the total number of entries for discovery
+ * sub class command.
+ */
+ if (media_op_in_disc_pl->dpa_range_count ||
+ start_index > ARRAY_SIZE(media_op_matrix)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ media_out_pl->dpa_range_granularity = CXL_CACHE_LINE_SIZE;
+ media_out_pl->total_supported_operations =
+ ARRAY_SIZE(media_op_matrix);
+ if (num_ops > 0) {
+ for (i = start_index; i < start_index + num_ops; i++) {
+ media_out_pl->entry[count].media_op_class =
+ media_op_matrix[i].media_op_class;
+ media_out_pl->entry[count].media_op_subclass =
+ media_op_matrix[i].media_op_subclass;
+ count++;
+ if (count == num_ops) {
+ break;
+ }
+ }
+ }
+
+ media_out_pl->num_of_supported_operations = count;
+ *len_out = sizeof(*media_out_pl) + count * sizeof(*media_out_pl->entry);
+ return CXL_MBOX_SUCCESS;
+}
+
+static CXLRetCode media_operations_sanitize(CXLType3Dev *ct3d,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ uint8_t fill_value,
+ CXLCCI *cci)
+{
+ struct media_operations_sanitize {
+ uint8_t media_operation_class;
+ uint8_t media_operation_subclass;
+ uint8_t rsvd[2];
+ uint32_t dpa_range_count;
+ struct dpa_range_list_entry dpa_range_list[];
+ } QEMU_PACKED *media_op_in_sanitize_pl = (void *)payload_in;
+ uint32_t dpa_range_count = media_op_in_sanitize_pl->dpa_range_count;
+ uint64_t total_mem = 0;
+ size_t dpa_range_list_size;
+ int secs = 0, i;
+
+ if (dpa_range_count == 0) {
+ return CXL_MBOX_SUCCESS;
+ }
+
+ dpa_range_list_size = dpa_range_count * sizeof(struct dpa_range_list_entry);
+ if (len_in < (sizeof(*media_op_in_sanitize_pl) + dpa_range_list_size)) {
+ return CXL_MBOX_INVALID_PAYLOAD_LENGTH;
+ }
+
+ for (i = 0; i < dpa_range_count; i++) {
+ uint64_t start_dpa =
+ media_op_in_sanitize_pl->dpa_range_list[i].starting_dpa;
+ uint64_t length = media_op_in_sanitize_pl->dpa_range_list[i].length;
+
+ if (validate_dpa_addr(ct3d, start_dpa, length)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ total_mem += length;
+ }
+ ct3d->media_op_sanitize = g_malloc0(sizeof(struct CXLSanitizeInfo) +
+ dpa_range_list_size);
+
+ ct3d->media_op_sanitize->dpa_range_count = dpa_range_count;
+ ct3d->media_op_sanitize->fill_value = fill_value;
+ memcpy(ct3d->media_op_sanitize->dpa_range_list,
+ media_op_in_sanitize_pl->dpa_range_list,
+ dpa_range_list_size);
+ secs = get_sanitize_duration(total_mem >> 20);
+
+ /* EBUSY other bg cmds as of now */
+ cci->bg.runtime = secs * 1000UL;
+ *len_out = 0;
+ /*
+ * media op sanitize is targeted so no need to disable media or
+ * clear event logs
+ */
+ return CXL_MBOX_BG_STARTED;
+}
+
+static CXLRetCode cmd_media_operations(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t media_operation_class;
+ uint8_t media_operation_subclass;
+ uint8_t rsvd[2];
+ uint32_t dpa_range_count;
+ } QEMU_PACKED *media_op_in_common_pl = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ uint8_t media_op_cl = 0;
+ uint8_t media_op_subclass = 0;
+
+ if (len_in < sizeof(*media_op_in_common_pl)) {
+ return CXL_MBOX_INVALID_PAYLOAD_LENGTH;
+ }
+
+ media_op_cl = media_op_in_common_pl->media_operation_class;
+ media_op_subclass = media_op_in_common_pl->media_operation_subclass;
+
+ switch (media_op_cl) {
+ case MEDIA_OP_CLASS_GENERAL:
+ if (media_op_subclass != MEDIA_OP_GEN_SUBC_DISCOVERY) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ return media_operations_discovery(payload_in, len_in, payload_out,
+ len_out);
+ case MEDIA_OP_CLASS_SANITIZE:
+ switch (media_op_subclass) {
+ case MEDIA_OP_SAN_SUBC_SANITIZE:
+ return media_operations_sanitize(ct3d, payload_in, len_in,
+ payload_out, len_out, 0xF,
+ cci);
+ case MEDIA_OP_SAN_SUBC_ZERO:
+ return media_operations_sanitize(ct3d, payload_in, len_in,
+ payload_out, len_out, 0,
+ cci);
+ default:
+ return CXL_MBOX_UNSUPPORTED;
+ }
+ default:
+ return CXL_MBOX_UNSUPPORTED;
+ }
+}
+
static CXLRetCode cmd_get_security_state(const struct cxl_cmd *cmd,
uint8_t *payload_in,
size_t len_in,
@@ -2715,6 +3228,8 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
}
static const struct cxl_cmd cxl_cmd_set[256][256] = {
+ [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT",
+ cmd_infostat_bg_op_abort, 0, 0 },
[EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS",
cmd_events_get_records, 1, 0 },
[EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS",
@@ -2727,9 +3242,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
[FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO",
cmd_firmware_update_get_info, 0, 0 },
[FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER",
- cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION },
+ cmd_firmware_update_transfer, ~0,
+ CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT },
[FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE",
- cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION },
+ cmd_firmware_update_activate, 2,
+ CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT },
[TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
[TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set,
8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
@@ -2755,9 +3272,20 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
[CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
[CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
+ [HEALTH_INFO_ALERTS][GET_ALERT_CONFIG] = {
+ "HEALTH_INFO_ALERTS_GET_ALERT_CONFIG",
+ cmd_get_alert_config, 0, 0 },
+ [HEALTH_INFO_ALERTS][SET_ALERT_CONFIG] = {
+ "HEALTH_INFO_ALERTS_SET_ALERT_CONFIG",
+ cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
[SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
(CXL_MBOX_IMMEDIATE_DATA_CHANGE |
CXL_MBOX_SECURITY_STATE_CHANGE |
+ CXL_MBOX_BACKGROUND_OPERATION |
+ CXL_MBOX_BACKGROUND_OPERATION_ABORT)},
+ [SANITIZE][MEDIA_OPERATIONS] = { "MEDIA_OPERATIONS", cmd_media_operations,
+ ~0,
+ (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
CXL_MBOX_BACKGROUND_OPERATION)},
[PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE",
cmd_get_security_state, 0, 0 },
@@ -2771,7 +3299,8 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
"MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES",
cmd_media_get_scan_media_capabilities, 16, 0 },
[MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA",
- cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION },
+ cmd_media_scan_media, 17,
+ (CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT)},
[MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = {
"MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS",
cmd_media_get_scan_media_results, 0, 0 },
@@ -2795,6 +3324,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
[INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0 },
[INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS",
cmd_infostat_bg_op_sts, 0, 0 },
+ [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT",
+ cmd_infostat_bg_op_abort, 0, 0 },
[TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
[TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8,
CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
@@ -2881,6 +3412,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
cci->bg.opcode = (set << 8) | cmd;
cci->bg.complete_pct = 0;
+ cci->bg.aborted = false;
cci->bg.ret_code = 0;
now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
@@ -2894,10 +3426,12 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
static void bg_timercb(void *opaque)
{
CXLCCI *cci = opaque;
- uint64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
- uint64_t total_time = cci->bg.starttime + cci->bg.runtime;
+ uint64_t now, total_time;
- assert(cci->bg.runtime > 0);
+ qemu_mutex_lock(&cci->bg.lock);
+
+ now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ total_time = cci->bg.starttime + cci->bg.runtime;
if (now >= total_time) { /* we are done */
uint16_t ret = CXL_MBOX_SUCCESS;
@@ -2916,6 +3450,12 @@ static void bg_timercb(void *opaque)
cxl_dev_enable_media(&ct3d->cxl_dstate);
}
break;
+ case 0x4402: /* Media Operations sanitize */
+ {
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ __do_sanitize(ct3d);
+ }
+ break;
case 0x4304: /* scan media */
{
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
@@ -2950,6 +3490,8 @@ static void bg_timercb(void *opaque)
msi_notify(pdev, cxl_dstate->mbox_msi_n);
}
}
+
+ qemu_mutex_unlock(&cci->bg.lock);
}
static void cxl_rebuild_cel(CXLCCI *cci)
@@ -2978,12 +3520,21 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
cci->bg.complete_pct = 0;
cci->bg.starttime = 0;
cci->bg.runtime = 0;
+ cci->bg.aborted = false;
cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
bg_timercb, cci);
+ qemu_mutex_init(&cci->bg.lock);
memset(&cci->fw, 0, sizeof(cci->fw));
cci->fw.active_slot = 1;
cci->fw.slot[cci->fw.active_slot - 1] = true;
+ cci->initialized = true;
+}
+
+void cxl_destroy_cci(CXLCCI *cci)
+{
+ qemu_mutex_destroy(&cci->bg.lock);
+ cci->initialized = false;
}
static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256])
@@ -3047,6 +3598,10 @@ void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf,
static const struct cxl_cmd cxl_cmd_set_t3_fm_owned_ld_mctp[256][256] = {
[INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0},
+ [INFOSTAT][GET_RESPONSE_MSG_LIMIT] = { "GET_RESPONSE_MSG_LIMIT",
+ cmd_get_response_msg_limit, 0, 0 },
+ [INFOSTAT][SET_RESPONSE_MSG_LIMIT] = { "SET_RESPONSE_MSG_LIMIT",
+ cmd_set_response_msg_limit, 1, 0 },
[LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0,
0 },
[LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 43d4c08..9fc6bbc 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -516,7 +516,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev,
}
}
-static void
+static int
vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val)
{
VhostUserGPU *g = VHOST_USER_GPU(vdev);
@@ -525,18 +525,24 @@ vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val)
if (val & VIRTIO_CONFIG_S_DRIVER_OK && vdev->vm_running) {
if (!vhost_user_gpu_do_set_socket(g, &err)) {
error_report_err(err);
- return;
+ return 0;
}
vhost_user_backend_start(g->vhost);
} else {
+ int ret;
+
/* unblock any wait and stop processing */
if (g->vhost_gpu_fd != -1) {
vhost_user_gpu_update_blocked(g, true);
qemu_chr_fe_deinit(&g->vhost_chr, true);
g->vhost_gpu_fd = -1;
}
- vhost_user_backend_stop(g->vhost);
+ ret = vhost_user_backend_stop(g->vhost);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
static bool
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f40ad06..61851cc 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2333,10 +2333,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2);
/* DeviceID */
build_append_int_noprefix(table_data,
- object_property_get_int(OBJECT(&s->pci), "addr",
+ object_property_get_int(OBJECT(s->pci), "addr",
&error_abort), 2);
/* Capability offset */
- build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2);
/* IOMMU base address */
build_append_int_noprefix(table_data, s->mr_mmio.addr, 8);
/* PCI Segment Group */
@@ -2368,10 +2368,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2);
/* DeviceID */
build_append_int_noprefix(table_data,
- object_property_get_int(OBJECT(&s->pci), "addr",
+ object_property_get_int(OBJECT(s->pci), "addr",
&error_abort), 2);
/* Capability offset */
- build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2);
/* IOMMU base address */
build_append_int_noprefix(table_data, s->mr_mmio.addr, 8);
/* PCI Segment Group */
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 2cf7e24..0775c8f 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -167,11 +167,11 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s)
{
MSIMessage msg = {};
MemTxAttrs attrs = {
- .requester_id = pci_requester_id(&s->pci.dev)
+ .requester_id = pci_requester_id(&s->pci->dev)
};
- if (msi_enabled(&s->pci.dev)) {
- msg = msi_get_message(&s->pci.dev, 0);
+ if (msi_enabled(&s->pci->dev)) {
+ msg = msi_get_message(&s->pci->dev, 0);
address_space_stl_le(&address_space_memory, msg.address, msg.data,
attrs, NULL);
}
@@ -239,7 +239,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
amdvi_encode_event(evt, devid, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/*
@@ -256,7 +256,7 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
amdvi_encode_event(evt, devid, devtab, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/* log an event trying to access command buffer
@@ -269,7 +269,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
amdvi_encode_event(evt, 0, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
/* log an illegal command event
@@ -310,7 +310,7 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
amdvi_encode_event(evt, devid, addr, info);
amdvi_log_event(s, evt);
- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS,
PCI_STATUS_SIG_TARGET_ABORT);
}
@@ -1607,26 +1607,92 @@ static void amdvi_sysbus_reset(DeviceState *dev)
{
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
- msi_reset(&s->pci.dev);
+ msi_reset(&s->pci->dev);
amdvi_init(s);
}
+static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
+ .name = "amd-iommu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .priority = MIG_PRI_IOMMU,
+ .fields = (VMStateField[]) {
+ /* Updated in amdvi_handle_control_write() */
+ VMSTATE_BOOL(enabled, AMDVIState),
+ VMSTATE_BOOL(ga_enabled, AMDVIState),
+ VMSTATE_BOOL(ats_enabled, AMDVIState),
+ VMSTATE_BOOL(cmdbuf_enabled, AMDVIState),
+ VMSTATE_BOOL(completion_wait_intr, AMDVIState),
+ VMSTATE_BOOL(evtlog_enabled, AMDVIState),
+ VMSTATE_BOOL(evtlog_intr, AMDVIState),
+ /* Updated in amdvi_handle_devtab_write() */
+ VMSTATE_UINT64(devtab, AMDVIState),
+ VMSTATE_UINT64(devtab_len, AMDVIState),
+ /* Updated in amdvi_handle_cmdbase_write() */
+ VMSTATE_UINT64(cmdbuf, AMDVIState),
+ VMSTATE_UINT64(cmdbuf_len, AMDVIState),
+ /* Updated in amdvi_handle_cmdhead_write() */
+ VMSTATE_UINT32(cmdbuf_head, AMDVIState),
+ /* Updated in amdvi_handle_cmdtail_write() */
+ VMSTATE_UINT32(cmdbuf_tail, AMDVIState),
+ /* Updated in amdvi_handle_evtbase_write() */
+ VMSTATE_UINT64(evtlog, AMDVIState),
+ VMSTATE_UINT32(evtlog_len, AMDVIState),
+ /* Updated in amdvi_handle_evthead_write() */
+ VMSTATE_UINT32(evtlog_head, AMDVIState),
+ /* Updated in amdvi_handle_evttail_write() */
+ VMSTATE_UINT32(evtlog_tail, AMDVIState),
+ /* Updated in amdvi_handle_pprbase_write() */
+ VMSTATE_UINT64(ppr_log, AMDVIState),
+ VMSTATE_UINT32(pprlog_len, AMDVIState),
+ /* Updated in amdvi_handle_pprhead_write() */
+ VMSTATE_UINT32(pprlog_head, AMDVIState),
+ /* Updated in amdvi_handle_tailhead_write() */
+ VMSTATE_UINT32(pprlog_tail, AMDVIState),
+ /* MMIO registers */
+ VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
{
+ DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev));
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
MachineState *ms = MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(ms);
X86MachineState *x86ms = X86_MACHINE(ms);
PCIBus *bus = pcms->pcibus;
- s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
- amdvi_uint64_equal, g_free, g_free);
+ if (s->pci_id) {
+ PCIDevice *pdev = NULL;
+ int ret = pci_qdev_find_device(s->pci_id, &pdev);
- /* This device should take care of IOMMU PCI properties */
- if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
- return;
+ if (ret) {
+ error_report("Cannot find PCI device '%s'", s->pci_id);
+ return;
+ }
+
+ if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) {
+ error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id);
+ return;
+ }
+
+ s->pci = AMD_IOMMU_PCI(pdev);
+ dc->vmsd = &vmstate_amdvi_sysbus_migratable;
+ } else {
+ s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI));
+ /* This device should take care of IOMMU PCI properties */
+ if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) {
+ return;
+ }
}
+ s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+ amdvi_uint64_equal, g_free, g_free);
+
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
@@ -1663,6 +1729,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
static const Property amdvi_properties[] = {
DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
+ DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
};
static const VMStateDescription vmstate_amdvi_sysbus = {
@@ -1670,13 +1737,6 @@ static const VMStateDescription vmstate_amdvi_sysbus = {
.unmigratable = 1
};
-static void amdvi_sysbus_instance_init(Object *klass)
-{
- AMDVIState *s = AMD_IOMMU_DEVICE(klass);
-
- object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
-}
-
static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -1696,7 +1756,6 @@ static const TypeInfo amdvi_sysbus = {
.name = TYPE_AMD_IOMMU_DEVICE,
.parent = TYPE_X86_IOMMU_DEVICE,
.instance_size = sizeof(AMDVIState),
- .instance_init = amdvi_sysbus_instance_init,
.class_init = amdvi_sysbus_class_init
};
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 2812513..5672bde 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -315,7 +315,8 @@ struct AMDVIPCIState {
struct AMDVIState {
X86IOMMUState iommu; /* IOMMU bus device */
- AMDVIPCIState pci; /* IOMMU PCI device */
+ AMDVIPCIState *pci; /* IOMMU PCI device */
+ char *pci_id; /* ID of AMDVI-PCI device, if user created */
uint32_t version;
@@ -328,7 +329,7 @@ struct AMDVIState {
bool excl_enabled;
hwaddr devtab; /* base address device table */
- size_t devtab_len; /* device table length */
+ uint64_t devtab_len; /* device table length */
hwaddr cmdbuf; /* command buffer base address */
uint64_t cmdbuf_len; /* command buffer length */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5f8ed12..69d72ad 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1728,8 +1728,6 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
static bool vtd_switch_address_space(VTDAddressSpace *as)
{
bool use_iommu, pt;
- /* Whether we need to take the BQL on our own */
- bool take_bql = !bql_locked();
assert(as);
@@ -1746,9 +1744,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
* from vtd_pt_enable_fast_path(). However the memory APIs need
* it. We'd better make sure we have had it already, or, take it.
*/
- if (take_bql) {
- bql_lock();
- }
+ BQL_LOCK_GUARD();
/* Turn off first then on the other */
if (use_iommu) {
@@ -1801,10 +1797,6 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
memory_region_set_enabled(&as->iommu_ir_fault, false);
}
- if (take_bql) {
- bql_unlock();
- }
-
return use_iommu;
}
@@ -4213,9 +4205,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
VTDAddressSpace *vtd_dev_as;
char name[128];
+ vtd_iommu_lock(s);
vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
+ vtd_iommu_unlock(s);
+
if (!vtd_dev_as) {
- struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
+ struct vtd_as_key *new_key;
+ /* Slow path */
+
+ /*
+ * memory_region_add_subregion_overlap requires the bql,
+ * make sure we own it.
+ */
+ BQL_LOCK_GUARD();
+ vtd_iommu_lock(s);
+
+ /* Check again as we released the lock for a moment */
+ vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
+ if (vtd_dev_as) {
+ vtd_iommu_unlock(s);
+ return vtd_dev_as;
+ }
+
+ /* Still nothing, allocate a new address space */
+ new_key = g_malloc(sizeof(*new_key));
new_key->bus = bus;
new_key->devfn = devfn;
@@ -4306,6 +4319,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
vtd_switch_address_space(vtd_dev_as);
g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as);
+
+ vtd_iommu_unlock(s);
}
return vtd_dev_as;
}
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index 1818cbd..a3f554f 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -189,7 +189,7 @@ static uint64_t virtio_input_get_features(VirtIODevice *vdev, uint64_t f,
return f;
}
-static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val)
+static int virtio_input_set_status(VirtIODevice *vdev, uint8_t val)
{
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vdev);
VirtIOInput *vinput = VIRTIO_INPUT(vdev);
@@ -202,6 +202,7 @@ static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val)
}
}
}
+ return 0;
}
static void virtio_input_reset(VirtIODevice *vdev)
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index bba923f..94e7274 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -843,6 +843,19 @@ static DOEProtocol doe_cdat_prot[] = {
{ }
};
+/* Initialize CXL device alerts with default threshold values. */
+static void init_alert_config(CXLType3Dev *ct3d)
+{
+ ct3d->alert_config = (CXLAlertConfig) {
+ .life_used_crit_alert_thresh = 75,
+ .life_used_warn_thresh = 40,
+ .over_temp_crit_alert_thresh = 35,
+ .under_temp_crit_alert_thresh = 10,
+ .over_temp_warn_thresh = 25,
+ .under_temp_warn_thresh = 20
+ };
+}
+
static void ct3_realize(PCIDevice *pci_dev, Error **errp)
{
ERRP_GUARD();
@@ -910,6 +923,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
goto err_msix_uninit;
}
+ init_alert_config(ct3d);
pcie_cap_deverr_init(pci_dev);
/* Leave a bit of room for expansion */
rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
@@ -969,6 +983,7 @@ static void ct3_exit(PCIDevice *pci_dev)
cxl_doe_cdat_release(cxl_cstate);
msix_uninit_exclusive_bar(pci_dev);
g_free(regs->special_ops);
+ cxl_destroy_cci(&ct3d->cci);
if (ct3d->dc.host_dc) {
cxl_destroy_dc_regions(ct3d);
address_space_destroy(&ct3d->dc.host_dc_as);
@@ -1224,12 +1239,17 @@ static void ct3d_reset(DeviceState *dev)
* Bring up an endpoint to target with MCTP over VDM.
* This device is emulating an MLD with single LD for now.
*/
+ if (ct3d->vdm_fm_owned_ld_mctp_cci.initialized) {
+ cxl_destroy_cci(&ct3d->vdm_fm_owned_ld_mctp_cci);
+ }
cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
DEVICE(ct3d), DEVICE(ct3d),
512); /* Max payload made up */
+ if (ct3d->ld0_cci.initialized) {
+ cxl_destroy_cci(&ct3d->ld0_cci);
+ }
cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
512); /* Max payload made up */
-
}
static const Property ct3_props[] = {
diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index 3f6a8bb..ba71c50 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -90,6 +90,7 @@ struct PCITestDevState {
int current;
uint64_t membar_size;
+ bool membar_backed;
MemoryRegion membar;
};
@@ -258,8 +259,14 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp)
pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->portio);
if (d->membar_size) {
- memory_region_init(&d->membar, OBJECT(d), "pci-testdev-membar",
- d->membar_size);
+ if (d->membar_backed)
+ memory_region_init_ram(&d->membar, OBJECT(d),
+ "pci-testdev-membar-backed",
+ d->membar_size, NULL);
+ else
+ memory_region_init(&d->membar, OBJECT(d),
+ "pci-testdev-membar",
+ d->membar_size);
pci_register_bar(pci_dev, 2,
PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_PREFETCH |
@@ -321,6 +328,7 @@ static void qdev_pci_testdev_reset(DeviceState *dev)
static const Property pci_testdev_properties[] = {
DEFINE_PROP_SIZE("membar", PCITestDevState, membar_size, 0),
+ DEFINE_PROP_BOOL("membar-backed", PCITestDevState, membar_backed, false),
};
static void pci_testdev_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 2de037c..221252e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -382,7 +382,7 @@ static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
}
}
-static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
+static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
VirtIONet *n = VIRTIO_NET(vdev);
VirtIONetQueue *q;
@@ -437,6 +437,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
}
}
}
+ return 0;
}
static void virtio_net_set_link_status(NetClientState *nc)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 352b3d1..f5ab510 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -94,6 +94,7 @@ static const Property pci_props[] = {
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
QEMU_PCIE_EXT_TAG_BITNR, true),
{ .name = "busnr", .info = &prop_pci_busnr },
@@ -1105,13 +1106,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
- if (pci_is_vf(dev) &&
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ /* SR/IOV is not handled here. */
+ if (pci_is_vf(dev)) {
return;
}
@@ -1144,7 +1140,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- if (bus->devices[PCI_DEVFN(slot, func)]) {
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+ if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1432,6 +1429,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
+ pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1463,7 +1461,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1475,7 +1472,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
r = &pci_dev->io_regions[region_num];
assert(!r->size);
- r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
r->memory = memory;
@@ -1483,22 +1479,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
+ if (pci_is_vf(pci_dev)) {
+ PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
+ assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
-
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
} else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ r->addr = PCI_BAR_UNMAPPED;
+
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ }
}
}
@@ -1587,7 +1596,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
+
+ if (vf_num) {
+ vf_num /= vf_stride;
+ }
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
@@ -2261,6 +2274,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
+ pci_qdev_unrealize(DEVICE(pci_dev));
+ return;
+ }
+
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
@@ -2515,6 +2533,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
return;
}
+ if (pci_is_vf(pdev)) {
+ if (pdev->rom_bar > 0) {
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
+ }
+
+ return;
+ }
+
if (load_file || pdev->romsize == UINT32_MAX) {
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
if (path == NULL) {
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 1eb4358..3ad1874 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -15,11 +15,12 @@
#include "hw/pci/pcie.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
#include "qemu/range.h"
#include "qapi/error.h"
#include "trace.h"
+static GHashTable *pfs;
+
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
{
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -31,17 +32,57 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
dev->exp.sriov_pf.vf = NULL;
}
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
- const char *vfname, uint16_t vf_dev_id,
- uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride,
- Error **errp)
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ }
+
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ uint16_t i;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ }
+
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
+}
+
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
+ uint16_t vf_dev_id, uint16_t init_vfs,
+ uint16_t total_vfs, uint16_t vf_offset,
+ uint16_t vf_stride, Error **errp)
{
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV PF");
+ return false;
+ }
+
+ if (pci_is_vf(dev)) {
+ error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
+ return false;
+ }
+
if (total_vfs &&
(uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
error_setg(errp, "VF addr overflows");
@@ -84,6 +125,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+ return true;
+}
+
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
+{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
+ total_vfs, vf_offset, vf_stride, errp)) {
+ return false;
+ }
+
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -113,7 +176,22 @@ void pcie_sriov_pf_exit(PCIDevice *dev)
{
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ if (dev->exp.sriov_pf.vf_user_created) {
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
+
+ unregister_vfs(dev);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL;
+
+ pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id);
+ pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
+ }
+ } else {
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ }
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -146,69 +224,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory)
{
- PCIIORegion *r;
- PCIBus *bus = pci_get_bus(dev);
uint8_t type;
- pcibus_t size = memory_region_size(memory);
- assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
- assert(region_num >= 0);
- assert(region_num < PCI_NUM_REGIONS);
+ assert(dev->exp.sriov_vf.pf);
type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
- if (!is_power_of_2(size)) {
- error_report("%s: PCI region size must be a power"
- " of two - type=0x%x, size=0x%"FMT_PCIBUS,
- __func__, type, size);
- exit(1);
- }
+ return pci_register_bar(dev, region_num, type, memory);
+}
- r = &dev->io_regions[region_num];
- r->memory = memory;
- r->address_space =
- type & PCI_BASE_ADDRESS_SPACE_IO
- ? bus->address_space_io
- : bus->address_space_mem;
- r->size = size;
- r->type = type;
-
- r->addr = pci_bar_address(dev, region_num, r->type, r->size);
- if (r->addr != PCI_BAR_UNMAPPED) {
- memory_region_add_subregion_overlap(r->address_space,
- r->addr, r->memory, 1);
- }
+static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
+{
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
}
-static void register_vfs(PCIDevice *dev)
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp)
{
- uint16_t num_vfs;
+ GPtrArray *pf;
+ PCIDevice **vfs;
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF;
+ uint16_t vf_dev_id;
+ uint16_t vf_offset;
+ uint16_t vf_stride;
uint16_t i;
- uint16_t sriov_cap = dev->exp.sriov_cap;
- assert(sriov_cap > 0);
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ if (!pfs || !dev->qdev.id) {
+ return 0;
+ }
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
+ if (!pf) {
+ return 0;
}
- pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+ if (pf->len > UINT16_MAX) {
+ error_setg(errp, "too many VFs");
+ return -1;
+ }
+
+ g_ptr_array_sort(pf, compare_vf_devfns);
+ vfs = (void *)pf->pdata;
+
+ if (vfs[0]->devfn <= dev->devfn) {
+ error_setg(errp, "a VF function number is less than the PF function number");
+ return -1;
+ }
+
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
+ vf_offset = vfs[0]->devfn - dev->devfn;
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
+
+ for (i = 0; i < pf->len; i++) {
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
+ return -1;
+ }
+
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
+ return -1;
+ }
+
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
+ return -1;
+ }
+
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
+ error_setg(errp, "inconsistent SR-IOV BARs");
+ return -1;
+ }
+ }
+
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
+ error_setg(errp, "inconsistent SR-IOV stride");
+ return -1;
+ }
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
+ pf->len, vf_offset, vf_stride, errp)) {
+ return -1;
+ }
+
+ if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) {
+ pcie_ari_init(dev, offset + size);
+ size += PCI_ARI_SIZEOF;
+ }
+
+ for (i = 0; i < pf->len; i++) {
+ vfs[i]->exp.sriov_vf.pf = dev;
+ vfs[i]->exp.sriov_vf.vf_number = i;
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
+ }
+
+ dev->exp.sriov_pf.vf = vfs;
+ dev->exp.sriov_pf.vf_user_created = true;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ PCIIORegion *region = &vfs[0]->io_regions[i];
+
+ if (region->size) {
+ pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
+ }
+ }
+
+ return size;
}
-static void unregister_vfs(PCIDevice *dev)
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
{
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- uint16_t i;
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn));
- for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ if (dev->sriov_pf) {
+ PCIDevice *pci_pf;
+ GPtrArray *pf;
+
+ if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
+ error_setg(errp, "user cannot create SR-IOV VF with this device type");
+ return false;
+ }
+
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV VF");
+ return false;
+ }
+
+ if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
+ error_setg(errp, "PCI device specified as SR-IOV PF already exists");
+ return false;
+ }
+
+ if (!pfs) {
+ pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ }
+
+ pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+ if (!pf) {
+ pf = g_ptr_array_new();
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
+ }
+
+ g_ptr_array_add(pf, dev);
}
- pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
+ return true;
+}
+
+void pcie_sriov_unregister_device(PCIDevice *dev)
+{
+ if (dev->sriov_pf && pfs) {
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+
+ if (pf) {
+ g_ptr_array_remove_fast(pf, dev);
+
+ if (!pf->len) {
+ g_hash_table_remove(pfs, dev->sriov_pf);
+ g_ptr_array_free(pf, FALSE);
+ }
+ }
+ }
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -304,7 +492,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
{
- assert(pci_is_vf(dev));
+ assert(dev->exp.sriov_vf.pf);
return dev->exp.sriov_vf.vf_number;
}
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 70be4a7..9b12ee7 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -400,7 +400,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
return;
}
dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev),
- scsi_dma_restart_cb, dev);
+ scsi_dma_restart_cb, NULL, dev);
}
static void scsi_qdev_unrealize(DeviceState *qdev)
diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
index 4c86370..43525ba 100644
--- a/hw/scsi/vhost-scsi-common.c
+++ b/hw/scsi/vhost-scsi-common.c
@@ -101,24 +101,25 @@ err_host_notifiers:
return ret;
}
-void vhost_scsi_common_stop(VHostSCSICommon *vsc)
+int vhost_scsi_common_stop(VHostSCSICommon *vsc)
{
VirtIODevice *vdev = VIRTIO_DEVICE(vsc);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int ret = 0;
- vhost_dev_stop(&vsc->dev, vdev, true);
+ ret = vhost_dev_stop(&vsc->dev, vdev, true);
if (k->set_guest_notifiers) {
- ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
- if (ret < 0) {
- error_report("vhost guest notifier cleanup failed: %d", ret);
+ int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false);
+ if (r < 0) {
+ error_report("vhost guest notifier cleanup failed: %d", ret);
+ return r;
}
}
- assert(ret >= 0);
vhost_dev_disable_notifiers(&vsc->dev, vdev);
+ return ret;
}
uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 10fde8e..cdf405b 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -114,7 +114,7 @@ static void vhost_scsi_stop(VHostSCSI *s)
vhost_scsi_common_stop(vsc);
}
-static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val)
+static int vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val)
{
VHostSCSI *s = VHOST_SCSI(vdev);
VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
@@ -125,7 +125,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val)
}
if (vhost_dev_is_started(&vsc->dev) == start) {
- return;
+ return 0;
}
if (start) {
@@ -139,6 +139,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val)
} else {
vhost_scsi_stop(s);
}
+ return 0;
}
static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -358,6 +359,9 @@ static const Property vhost_scsi_properties[] = {
DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features,
VIRTIO_SCSI_F_T10_PI,
false),
+ DEFINE_PROP_BIT64("hotplug", VHostSCSICommon, host_features,
+ VIRTIO_SCSI_F_HOTPLUG,
+ false),
DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false),
DEFINE_PROP_BOOL("worker_per_virtqueue", VirtIOSCSICommon,
conf.worker_per_virtqueue, false),
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 8298e8c..25f2d89 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp)
return ret;
}
-static void vhost_user_scsi_stop(VHostUserSCSI *s)
+static int vhost_user_scsi_stop(VHostUserSCSI *s)
{
VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
if (!s->started_vu) {
- return;
+ return 0;
}
s->started_vu = false;
- vhost_scsi_common_stop(vsc);
+ return vhost_scsi_common_stop(vsc);
}
-static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserSCSI *s = (VHostUserSCSI *)vdev;
DeviceState *dev = DEVICE(vdev);
@@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
int ret;
if (!s->connected) {
- return;
+ return -1;
}
if (vhost_dev_is_started(&vsc->dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
@@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
qemu_chr_fe_disconnect(&vs->conf.chardev);
}
} else {
- vhost_user_scsi_stop(s);
+ ret = vhost_user_scsi_stop(s);
+ if (ret) {
+ return ret;
+ }
}
+ return 0;
}
static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 1dceab1..b76697bd 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -1016,7 +1016,7 @@ static int vfio_migration_init(VFIODevice *vbasedev)
vfio_vmstate_change_prepare :
NULL;
migration->vm_state = qdev_add_vm_change_state_handler_full(
- vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev);
+ vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev);
migration_add_notifier(&migration->migration_state,
vfio_migration_state_notifier);
diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index dd8837c..d1da40a 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -312,7 +312,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev)
vhost_dev_disable_notifiers(&s->dev, vdev);
}
-static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
{
VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
bool should_start = virtio_device_started(vdev, status);
@@ -324,7 +324,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
}
if (s->started == should_start) {
- return;
+ return 0;
}
if (should_start) {
@@ -335,6 +335,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
} else {
vhost_vdpa_device_stop(vdev);
}
+ return 0;
}
static const Property vhost_vdpa_device_properties[] = {
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index 7714332..ff67a02 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -66,7 +66,7 @@ err_host_notifiers:
vhost_dev_disable_notifiers(&vub->vhost_dev, vdev);
}
-static void vub_stop(VirtIODevice *vdev)
+static int vub_stop(VirtIODevice *vdev)
{
VHostUserBase *vub = VHOST_USER_BASE(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -74,34 +74,39 @@ static void vub_stop(VirtIODevice *vdev)
int ret;
if (!k->set_guest_notifiers) {
- return;
+ return 0;
}
- vhost_dev_stop(&vub->vhost_dev, vdev, true);
+ ret = vhost_dev_stop(&vub->vhost_dev, vdev, true);
- ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false);
- if (ret < 0) {
+ if (k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
- return;
+ return -1;
}
vhost_dev_disable_notifiers(&vub->vhost_dev, vdev);
+ return ret;
}
-static void vub_set_status(VirtIODevice *vdev, uint8_t status)
+static int vub_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserBase *vub = VHOST_USER_BASE(vdev);
bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&vub->vhost_dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
vub_start(vdev);
} else {
- vub_stop(vdev);
+ int ret;
+ ret = vub_stop(vdev);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
/*
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index f6d1fc8..e77c69e 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -100,7 +100,7 @@ err_host_notifiers:
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
}
-static void vuf_stop(VirtIODevice *vdev)
+static int vuf_stop(VirtIODevice *vdev)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -108,34 +108,39 @@ static void vuf_stop(VirtIODevice *vdev)
int ret;
if (!k->set_guest_notifiers) {
- return;
+ return 0;
}
- vhost_dev_stop(&fs->vhost_dev, vdev, true);
+ ret = vhost_dev_stop(&fs->vhost_dev, vdev, true);
- ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
- if (ret < 0) {
+ if (k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
- return;
+ return -1;
}
vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
+ return ret;
}
-static void vuf_set_status(VirtIODevice *vdev, uint8_t status)
+static int vuf_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&fs->vhost_dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
vuf_start(vdev);
} else {
- vuf_stop(vdev);
+ int ret;
+ ret = vuf_stop(vdev);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
static uint64_t vuf_get_features(VirtIODevice *vdev,
diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c
index 7a0f622..f9264c4 100644
--- a/hw/virtio/vhost-user-scmi.c
+++ b/hw/virtio/vhost-user-scmi.c
@@ -83,7 +83,7 @@ err_host_notifiers:
return ret;
}
-static void vu_scmi_stop(VirtIODevice *vdev)
+static int vu_scmi_stop(VirtIODevice *vdev)
{
VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -93,41 +93,46 @@ static void vu_scmi_stop(VirtIODevice *vdev)
/* vhost_dev_is_started() check in the callers is not fully reliable. */
if (!scmi->started_vu) {
- return;
+ return 0;
}
scmi->started_vu = false;
if (!k->set_guest_notifiers) {
- return;
+ return 0;
}
- vhost_dev_stop(vhost_dev, vdev, true);
+ ret = vhost_dev_stop(vhost_dev, vdev, true);
- ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false);
- if (ret < 0) {
+ if (k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
- return;
+ return -1;
}
vhost_dev_disable_notifiers(vhost_dev, vdev);
+ return ret;
}
-static void vu_scmi_set_status(VirtIODevice *vdev, uint8_t status)
+static int vu_scmi_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev);
bool should_start = virtio_device_should_start(vdev, status);
if (!scmi->connected) {
- return;
+ return -1;
}
if (vhost_dev_is_started(&scmi->vhost_dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
vu_scmi_start(vdev);
} else {
- vu_scmi_stop(vdev);
+ int ret;
+ ret = vu_scmi_stop(vdev);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
static uint64_t vu_scmi_get_features(VirtIODevice *vdev, uint64_t features,
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 2776792..993c287 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -54,23 +54,28 @@ const VhostDevConfigOps vsock_ops = {
.vhost_dev_config_notifier = vuv_handle_config_change,
};
-static void vuv_set_status(VirtIODevice *vdev, uint8_t status)
+static int vuv_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
bool should_start = virtio_device_should_start(vdev, status);
+ int ret;
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
- int ret = vhost_vsock_common_start(vdev);
+ ret = vhost_vsock_common_start(vdev);
if (ret < 0) {
- return;
+ return ret;
}
} else {
- vhost_vsock_common_stop(vdev);
+ ret = vhost_vsock_common_stop(vdev);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
static uint64_t vuv_get_features(VirtIODevice *vdev,
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 4b4fbb4..c6c44d8 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -95,7 +95,7 @@ err_host_notifiers:
return ret;
}
-void vhost_vsock_common_stop(VirtIODevice *vdev)
+int vhost_vsock_common_stop(VirtIODevice *vdev)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -103,18 +103,18 @@ void vhost_vsock_common_stop(VirtIODevice *vdev)
int ret;
if (!k->set_guest_notifiers) {
- return;
+ return 0;
}
- vhost_dev_stop(&vvc->vhost_dev, vdev, true);
+ ret = vhost_dev_stop(&vvc->vhost_dev, vdev, true);
- ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false);
- if (ret < 0) {
+ if (k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
- return;
+ return -1;
}
vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev);
+ return ret;
}
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index b73dc72..6e40888 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -67,37 +67,38 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start)
}
-static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status)
+static int vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
bool should_start = virtio_device_should_start(vdev, status);
int ret;
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
- return;
+ return 0;
}
if (should_start) {
ret = vhost_vsock_common_start(vdev);
if (ret < 0) {
- return;
+ return 0;
}
ret = vhost_vsock_set_running(vdev, 1);
if (ret < 0) {
vhost_vsock_common_stop(vdev);
error_report("Error starting vhost vsock: %d", -ret);
- return;
+ return 0;
}
} else {
ret = vhost_vsock_set_running(vdev, 0);
if (ret < 0) {
error_report("vhost vsock set running failed: %d", ret);
- return;
+ return 0;
}
vhost_vsock_common_stop(vdev);
}
+ return 0;
}
static uint64_t vhost_vsock_get_features(VirtIODevice *vdev,
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4cae7c1..fc43853 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1367,10 +1367,10 @@ fail_alloc_desc:
return r;
}
-void vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+int vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
{
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
@@ -1380,7 +1380,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
/* Don't stop the virtqueue which might have not been started */
- return;
+ return 0;
}
r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
@@ -1411,6 +1411,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
0, virtio_queue_get_avail_size(vdev, idx));
vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
0, virtio_queue_get_desc_size(vdev, idx));
+ return r;
}
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
@@ -2135,9 +2136,10 @@ fail_features:
}
/* Host notifiers must be enabled at this point. */
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
{
int i;
+ int rc = 0;
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
@@ -2156,10 +2158,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
vhost_dev_set_vring_enable(hdev, false);
}
for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
+ rc |= vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
}
if (hdev->vhost_ops->vhost_reset_status) {
hdev->vhost_ops->vhost_reset_status(hdev);
@@ -2176,6 +2178,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
hdev->started = false;
vdev->vhost_started = false;
hdev->vdev = NULL;
+ return rc;
}
int vhost_net_set_backend(struct vhost_dev *hdev,
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 91510ec..db787d0 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -958,7 +958,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
s->poison_val = 0;
}
-static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
+static int virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -988,6 +988,7 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
qemu_mutex_unlock(&s->free_page_lock);
}
}
+ return 0;
}
static ResettableState *virtio_balloon_get_reset_state(Object *obj)
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index e24d691..517f208 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -1197,11 +1197,12 @@ static void virtio_crypto_vhost_status(VirtIOCrypto *c, uint8_t status)
}
}
-static void virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status)
+static int virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
virtio_crypto_vhost_status(vcrypto, status);
+ return 0;
}
static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx,
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 5406098..3500f1b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1522,9 +1522,10 @@ static void virtio_iommu_device_reset_exit(Object *obj, ResetType type)
NULL, NULL, virtio_iommu_put_endpoint);
}
-static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
+static int virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
{
trace_virtio_iommu_device_status(status);
+ return 0;
}
static void virtio_iommu_instance_init(Object *obj)
diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c
index 8cf9788..f857a84 100644
--- a/hw/virtio/virtio-net-pci.c
+++ b/hw/virtio/virtio-net-pci.c
@@ -74,6 +74,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, const void *data)
k->device_id = PCI_DEVICE_ID_VIRTIO_NET;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = PCI_CLASS_NETWORK_ETHERNET;
+ k->sriov_vf_user_creatable = true;
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
device_class_set_props(dc, virtio_net_properties);
vpciklass->realize = virtio_net_pci_realize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 0fa8fe4..9b48aa8 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1962,6 +1962,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
uint8_t *config;
uint32_t size;
VirtIODevice *vdev = virtio_bus_get_device(bus);
+ int16_t res;
/*
* Virtio capabilities present without
@@ -2109,6 +2110,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
}
+
+ if (pci_is_vf(&proxy->pci_dev)) {
+ pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset);
+ proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF;
+ } else {
+ res = pcie_sriov_pf_init_from_user_created_vfs(
+ &proxy->pci_dev, proxy->last_pcie_cap_offset, errp);
+ if (res > 0) {
+ proxy->last_pcie_cap_offset += res;
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
+ }
+ }
}
static void virtio_pci_device_unplugged(DeviceState *d)
@@ -2199,7 +2212,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
if (pcie_port && pci_is_express(pci_dev)) {
int pos;
- uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
+ proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
pos = pcie_endpoint_cap_init(pci_dev, 0);
assert(pos > 0);
@@ -2216,9 +2229,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
- pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
+ pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset,
PCI_ERR_SIZEOF, NULL);
- last_pcie_cap_offset += PCI_ERR_SIZEOF;
+ proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF;
}
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
@@ -2243,9 +2256,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
}
if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
- pcie_ats_init(pci_dev, last_pcie_cap_offset,
+ pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset,
proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
- last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
+ proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
}
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
@@ -2273,6 +2286,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
!pci_bus_is_root(pci_get_bus(pci_dev));
bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
+ pcie_sriov_pf_exit(&proxy->pci_dev);
msix_uninit_exclusive_bar(pci_dev);
if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
pci_is_express(pci_dev)) {
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index dcb3c71..3df5d25 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -159,17 +159,18 @@ static void check_rate_limit(void *opaque)
vrng->activate_timer = true;
}
-static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status)
+static int virtio_rng_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIORNG *vrng = VIRTIO_RNG(vdev);
if (!vdev->vm_running) {
- return;
+ return 0;
}
vdev->status = status;
/* Something changed, try to process buffers */
virtio_rng_process(vrng);
+ return 0;
}
static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 480c2e5..2e98cec 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2221,12 +2221,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
{
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
trace_virtio_set_status(vdev, val);
+ int ret = 0;
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
val & VIRTIO_CONFIG_S_FEATURES_OK) {
- int ret = virtio_validate_features(vdev);
-
+ ret = virtio_validate_features(vdev);
if (ret) {
return ret;
}
@@ -2239,11 +2239,15 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
}
if (k->set_status) {
- k->set_status(vdev, val);
+ ret = k->set_status(vdev, val);
+ if (ret) {
+ qemu_log("set %s status to %d failed, old status: %d\n",
+ vdev->name, val, vdev->status);
+ }
}
vdev->status = val;
- return 0;
+ return ret;
}
static enum virtio_device_endian virtio_default_endian(void)
@@ -2316,49 +2320,6 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
}
}
-void virtio_reset(void *opaque)
-{
- VirtIODevice *vdev = opaque;
- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
- int i;
-
- virtio_set_status(vdev, 0);
- if (current_cpu) {
- /* Guest initiated reset */
- vdev->device_endian = virtio_current_cpu_endian();
- } else {
- /* System reset */
- vdev->device_endian = virtio_default_endian();
- }
-
- if (k->get_vhost) {
- struct vhost_dev *hdev = k->get_vhost(vdev);
- /* Only reset when vhost back-end is connected */
- if (hdev && hdev->vhost_ops) {
- vhost_reset_device(hdev);
- }
- }
-
- if (k->reset) {
- k->reset(vdev);
- }
-
- vdev->start_on_kick = false;
- vdev->started = false;
- vdev->broken = false;
- vdev->guest_features = 0;
- vdev->queue_sel = 0;
- vdev->status = 0;
- vdev->disabled = false;
- qatomic_set(&vdev->isr, 0);
- vdev->config_vector = VIRTIO_NO_VECTOR;
- virtio_notify_vector(vdev, vdev->config_vector);
-
- for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- __virtio_queue_reset(vdev, i);
- }
-}
-
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
{
if (!vdev->vq[n].vring.num) {
@@ -3169,6 +3130,49 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
return ret;
}
+void virtio_reset(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+ int i;
+
+ virtio_set_status(vdev, 0);
+ if (current_cpu) {
+ /* Guest initiated reset */
+ vdev->device_endian = virtio_current_cpu_endian();
+ } else {
+ /* System reset */
+ vdev->device_endian = virtio_default_endian();
+ }
+
+ if (k->get_vhost) {
+ struct vhost_dev *hdev = k->get_vhost(vdev);
+ /* Only reset when vhost back-end is connected */
+ if (hdev && hdev->vhost_ops) {
+ vhost_reset_device(hdev);
+ }
+ }
+
+ if (k->reset) {
+ k->reset(vdev);
+ }
+
+ vdev->start_on_kick = false;
+ vdev->started = false;
+ vdev->broken = false;
+ virtio_set_features_nocheck(vdev, 0);
+ vdev->queue_sel = 0;
+ vdev->status = 0;
+ vdev->disabled = false;
+ qatomic_set(&vdev->isr, 0);
+ vdev->config_vector = VIRTIO_NO_VECTOR;
+ virtio_notify_vector(vdev, vdev->config_vector);
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ __virtio_queue_reset(vdev, i);
+ }
+}
+
static void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
Error **errp)
{
@@ -3419,7 +3423,7 @@ void virtio_cleanup(VirtIODevice *vdev)
qemu_del_vm_change_state_handler(vdev->vmstate);
}
-static void virtio_vmstate_change(void *opaque, bool running, RunState state)
+static int virtio_vmstate_change(void *opaque, bool running, RunState state)
{
VirtIODevice *vdev = opaque;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
@@ -3436,8 +3440,12 @@ static void virtio_vmstate_change(void *opaque, bool running, RunState state)
}
if (!backend_run) {
- virtio_set_status(vdev, vdev->status);
+ int ret = virtio_set_status(vdev, vdev->status);
+ if (ret) {
+ return ret;
+ }
}
+ return 0;
}
void virtio_instance_init_common(Object *proxy_obj, void *data,
@@ -3489,7 +3497,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
vdev->config = NULL;
}
vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
- virtio_vmstate_change, vdev);
+ NULL, virtio_vmstate_change, vdev);
vdev->device_endian = virtio_default_endian();
vdev->use_guest_notifier_mask = true;
}
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 3a0ee7e..ed6cd50 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -176,10 +176,12 @@ typedef struct CXLCCI {
uint16_t opcode;
uint16_t complete_pct;
uint16_t ret_code; /* Current value of retcode */
+ bool aborted;
uint64_t starttime;
/* set by each bg cmd, cleared by the bg_timer when complete */
uint64_t runtime;
QEMUTimer *timer;
+ QemuMutex lock; /* serializes mbox abort vs timer cb */
} bg;
/* firmware update */
@@ -201,6 +203,7 @@ typedef struct CXLCCI {
DeviceState *d;
/* Pointer to the device hosting the protocol conversion */
DeviceState *intf;
+ bool initialized;
} CXLCCI;
typedef struct cxl_device_state {
@@ -316,6 +319,7 @@ void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max);
void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
DeviceState *d, size_t payload_max);
void cxl_init_cci(CXLCCI *cci, size_t payload_max);
+void cxl_destroy_cci(CXLCCI *cci);
void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmd_set)[256],
size_t payload_max);
int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
@@ -536,6 +540,21 @@ typedef struct CXLSetFeatureInfo {
size_t data_size;
} CXLSetFeatureInfo;
+struct CXLSanitizeInfo;
+
+typedef struct CXLAlertConfig {
+ uint8_t valid_alerts;
+ uint8_t enable_alerts;
+ uint8_t life_used_crit_alert_thresh;
+ uint8_t life_used_warn_thresh;
+ uint16_t over_temp_crit_alert_thresh;
+ uint16_t under_temp_crit_alert_thresh;
+ uint16_t over_temp_warn_thresh;
+ uint16_t under_temp_warn_thresh;
+ uint16_t cor_vmem_err_warn_thresh;
+ uint16_t cor_pmem_err_warn_thresh;
+} QEMU_PACKED CXLAlertConfig;
+
struct CXLType3Dev {
/* Private */
PCIDevice parent_obj;
@@ -557,6 +576,8 @@ struct CXLType3Dev {
CXLCCI vdm_fm_owned_ld_mctp_cci;
CXLCCI ld0_cci;
+ CXLAlertConfig alert_config;
+
/* PCIe link characteristics */
PCIExpLinkSpeed speed;
PCIExpLinkWidth width;
@@ -602,6 +623,8 @@ struct CXLType3Dev {
uint8_t num_regions; /* 0-8 regions */
CXLDCRegion regions[DCD_MAX_NUM_REGION];
} dc;
+
+ struct CXLSanitizeInfo *media_op_sanitize;
};
#define TYPE_CXL_TYPE3 "cxl-type3"
diff --git a/include/hw/cxl/cxl_mailbox.h b/include/hw/cxl/cxl_mailbox.h
index beb0480..9008402 100644
--- a/include/hw/cxl/cxl_mailbox.h
+++ b/include/hw/cxl/cxl_mailbox.h
@@ -14,5 +14,6 @@
#define CXL_MBOX_IMMEDIATE_LOG_CHANGE (1 << 4)
#define CXL_MBOX_SECURITY_STATE_CHANGE (1 << 5)
#define CXL_MBOX_BACKGROUND_OPERATION (1 << 6)
+#define CXL_MBOX_BACKGROUND_OPERATION_ABORT (1 << 7)
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 345b12e..e41d95b 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -38,6 +38,8 @@ struct PCIDeviceClass {
uint16_t subsystem_id; /* only for header type = 0 */
const char *romfile; /* rom bar */
+
+ bool sriov_vf_user_creatable;
};
enum PCIReqIDType {
@@ -177,6 +179,8 @@ struct PCIDevice {
* realizing the device.
*/
uint32_t max_bounce_buffer_size;
+
+ char *sriov_pf;
};
static inline int pci_intx(PCIDevice *pci_dev)
@@ -209,7 +213,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d)
static inline int pci_is_vf(const PCIDevice *d)
{
- return d->exp.sriov_vf.pf != NULL;
+ return d->sriov_pf || d->exp.sriov_vf.pf != NULL;
}
static inline uint32_t pci_config_size(const PCIDevice *d)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index c5d2d31..aeaa38c 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,6 +18,7 @@
typedef struct PCIESriovPF {
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
+ bool vf_user_created; /* If VFs are created by user */
} PCIESriovPF;
typedef struct PCIESriovVF {
@@ -40,6 +41,26 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory);
+/**
+ * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
+ * VFs, adding ARI to PF
+ * @dev: A PCIe device being realized.
+ * @offset: The offset of the SR-IOV capability.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Initializes a PF with user-created VFs, adding the ARI extended capability to
+ * the PF. The VFs should call pcie_ari_init() to form an ARI device.
+ *
+ * Return: The size of added capabilities. 0 if the user did not create VFs.
+ * -1 if failed.
+ */
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp);
+
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
+void pcie_sriov_unregister_device(PCIDevice *dev);
+
/*
* Default (minimal) page size support values
* as required by the SR/IOV standard:
diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
index c5d2c09..d54d9c9 100644
--- a/include/hw/virtio/vhost-scsi-common.h
+++ b/include/hw/virtio/vhost-scsi-common.h
@@ -40,7 +40,7 @@ struct VHostSCSICommon {
};
int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
-void vhost_scsi_common_stop(VHostSCSICommon *vsc);
+int vhost_scsi_common_stop(VHostSCSICommon *vsc);
char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
DeviceState *dev);
void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h
index 75a74e8..01bf606 100644
--- a/include/hw/virtio/vhost-vsock-common.h
+++ b/include/hw/virtio/vhost-vsock-common.h
@@ -42,7 +42,7 @@ struct VHostVSockCommon {
};
int vhost_vsock_common_start(VirtIODevice *vdev);
-void vhost_vsock_common_stop(VirtIODevice *vdev);
+int vhost_vsock_common_stop(VirtIODevice *vdev);
int vhost_vsock_common_pre_save(void *opaque);
int vhost_vsock_common_post_load(void *opaque, int version_id);
void vhost_vsock_common_realize(VirtIODevice *vdev);
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index bb4b58e..38800a7 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
* Stop the vhost device. After the device is stopped the notifiers
* can be disabled (@vhost_dev_disable_notifiers) and the device can
* be torn down (@vhost_dev_cleanup).
+ *
+ * Return: 0 on success, != 0 on error when stopping dev.
*/
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
/**
* DOC: vhost device configuration handling
@@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
struct vhost_virtqueue *vq, unsigned idx);
-void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq, unsigned idx);
+int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq, unsigned idx);
void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
void vhost_dev_free_inflight(struct vhost_inflight *inflight);
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 31ec144..1dbc385 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -155,6 +155,7 @@ struct VirtIOPCIProxy {
uint32_t modern_io_bar_idx;
uint32_t modern_mem_bar_idx;
int config_cap;
+ uint16_t last_pcie_cap_offset;
uint32_t flags;
bool disable_modern;
bool ignore_backend_features;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 7e0c471..214d4a7 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -186,7 +186,7 @@ struct VirtioDeviceClass {
void (*get_config)(VirtIODevice *vdev, uint8_t *config);
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
- void (*set_status)(VirtIODevice *vdev, uint8_t val);
+ int (*set_status)(VirtIODevice *vdev, uint8_t val);
/* Device must validate queue_index. */
void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
/* Device must validate queue_index. */
diff --git a/include/system/runstate.h b/include/system/runstate.h
index bffc371..fdd5c4a 100644
--- a/include/system/runstate.h
+++ b/include/system/runstate.h
@@ -12,6 +12,7 @@ bool runstate_needs_reset(void);
void runstate_replay_enable(void);
typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
+typedef int VMChangeStateHandlerWithRet(void *opaque, bool running, RunState state);
VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
void *opaque);
@@ -20,21 +21,27 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb,
+ VMChangeStateHandlerWithRet *cb_ret,
void *opaque, int priority);
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
VMChangeStateHandler *cb,
+ VMChangeStateHandlerWithRet *cb_ret,
void *opaque);
VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
- DeviceState *dev, VMChangeStateHandler *cb,
- VMChangeStateHandler *prepare_cb, void *opaque);
+ DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb,
+ VMChangeStateHandlerWithRet *cb_ret, void *opaque);
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
/**
* vm_state_notify: Notify the state of the VM
*
* @running: whether the VM is running or not.
* @state: the #RunState of the VM.
+ *
+ * Return the result of the callback which has return value.
+ * If no callback has return value, still return 0 and the
+ * upper layer should not do additional processing.
*/
-void vm_state_notify(bool running, RunState state);
+int vm_state_notify(bool running, RunState state);
static inline bool shutdown_caused_by_guest(ShutdownCause cause)
{
diff --git a/include/system/vhost-user-backend.h b/include/system/vhost-user-backend.h
index 5ed953c..5634ebd 100644
--- a/include/system/vhost-user-backend.h
+++ b/include/system/vhost-user-backend.h
@@ -43,6 +43,6 @@ struct VhostUserBackend {
int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
unsigned nvqs, Error **errp);
void vhost_user_backend_start(VhostUserBackend *b);
-void vhost_user_backend_stop(VhostUserBackend *b);
+int vhost_user_backend_stop(VhostUserBackend *b);
#endif
diff --git a/system/cpus.c b/system/cpus.c
index 2cc5f88..d16b0df 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -299,14 +299,18 @@ static int do_vm_stop(RunState state, bool send_stop)
if (oldstate == RUN_STATE_RUNNING) {
pause_all_vcpus();
}
- vm_state_notify(0, state);
+ ret = vm_state_notify(0, state);
if (send_stop) {
qapi_event_send_stop();
}
}
bdrv_drain_all();
- ret = bdrv_flush_all();
+ /*
+ * Even if vm_state_notify() return failure,
+ * it would be better to flush as before.
+ */
+ ret |= bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
diff --git a/system/runstate.c b/system/runstate.c
index 272801d..de74d96 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -297,6 +297,7 @@ void qemu_system_vmstop_request(RunState state)
struct VMChangeStateEntry {
VMChangeStateHandler *cb;
VMChangeStateHandler *prepare_cb;
+ VMChangeStateHandlerWithRet *cb_ret;
void *opaque;
QTAILQ_ENTRY(VMChangeStateEntry) entries;
int priority;
@@ -320,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateHandler *cb, void *opaque, int priority)
{
- return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
- priority);
+ return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL,
+ opaque, priority);
}
/**
* qemu_add_vm_change_state_handler_prio_full:
* @cb: the main callback to invoke
* @prepare_cb: a callback to invoke before the main callback
+ * @cb_ret: the main callback to invoke with return value
* @opaque: user data passed to the callbacks
* @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops
@@ -344,6 +346,7 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb,
+ VMChangeStateHandlerWithRet *cb_ret,
void *opaque, int priority)
{
VMChangeStateEntry *e;
@@ -352,6 +355,7 @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
e = g_malloc0(sizeof(*e));
e->cb = cb;
e->prepare_cb = prepare_cb;
+ e->cb_ret = cb_ret;
e->opaque = opaque;
e->priority = priority;
@@ -379,9 +383,10 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
g_free(e);
}
-void vm_state_notify(bool running, RunState state)
+int vm_state_notify(bool running, RunState state)
{
VMChangeStateEntry *e, *next;
+ int ret = 0;
trace_vm_state_notify(running, state, RunState_str(state));
@@ -393,7 +398,17 @@ void vm_state_notify(bool running, RunState state)
}
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
- e->cb(e->opaque, running, state);
+ if (e->cb) {
+ e->cb(e->opaque, running, state);
+ } else if (e->cb_ret) {
+ /*
+ * Here ignore the return value of cb_ret because
+ * we only care about the stopping the device during
+ * the VM live migration to indicate whether the
+ * connection between qemu and backend is normal.
+ */
+ e->cb_ret(e->opaque, running, state);
+ }
}
} else {
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
@@ -403,9 +418,19 @@ void vm_state_notify(bool running, RunState state)
}
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
- e->cb(e->opaque, running, state);
+ if (e->cb) {
+ e->cb(e->opaque, running, state);
+ } else if (e->cb_ret) {
+ /*
+ * We should execute all registered callbacks even if
+ * one of them returns failure, otherwise, some cleanup
+ * work of the device will be skipped.
+ */
+ ret |= e->cb_ret(e->opaque, running, state);
+ }
}
}
+ return ret;
}
static ShutdownCause reset_requested;