diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-01-19 10:17:20 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-01-19 10:17:20 +0000 |
commit | 3e5bdc6573edf0585e4085e6a4e349b135abf3b4 (patch) | |
tree | d5a30ac7e2374d362cc7a927665d1938ed1e03ba /hw | |
parent | b4d6ed1c5ae519d3efb5297be3ef6625ca2a20f4 (diff) | |
parent | f4bf56fb78ed0e9f60fa1ed656c14ff4c494da5a (diff) | |
download | qemu-3e5bdc6573edf0585e4085e6a4e349b135abf3b4.zip qemu-3e5bdc6573edf0585e4085e6a4e349b135abf3b4.tar.gz qemu-3e5bdc6573edf0585e4085e6a4e349b135abf3b4.tar.bz2 |
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: features, fixes, cleanups
A bunch of fixes, cleanus and new features all over the place.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# gpg: Signature made Thu 18 Jan 2018 20:41:03 GMT
# gpg: using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* remotes/mst/tags/for_upstream: (29 commits)
vhost: remove assertion to prevent crash
vhost-user: fix misaligned access to payload
vhost-user: factor out msg head and payload
tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables usage
tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its usage
tests: acpi: init table descriptor in test_dst_table()
tests: acpi: move tested tables array allocation outside of test_acpi_dsdt_table()
x86_iommu: check if machine has PCI bus
x86_iommu: Move machine check to x86_iommu_realize()
vhost-user-test: use init_virtio_dev in multiqueue test
vhost-user-test: make features mask an init_virtio_dev() argument
vhost-user-test: setup virtqueues in all tests
vhost-user-test: extract read-guest-mem test from main loop
vhost-user-test: fix features mask
hw/acpi-build: Make next_base easy to follow
ACPI/unit-test: Add a testcase for RAM allocation in numa node
hw/pci-bridge: fix QEMU crash because of pcie-root-port
intel-iommu: Extend address width to 48 bits
intel-iommu: Redefine macros to enable supporting 48 bit address width
vhost-user: fix multiple queue specification
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/block/Makefile.objs | 3 | ||||
-rw-r--r-- | hw/block/vhost-user-blk.c | 359 | ||||
-rw-r--r-- | hw/i386/acpi-build.c | 5 | ||||
-rw-r--r-- | hw/i386/amd_iommu.c | 13 | ||||
-rw-r--r-- | hw/i386/intel_iommu.c | 136 | ||||
-rw-r--r-- | hw/i386/intel_iommu_internal.h | 43 | ||||
-rw-r--r-- | hw/i386/x86-iommu.c | 13 | ||||
-rw-r--r-- | hw/pci-bridge/gen_pcie_root_port.c | 7 | ||||
-rw-r--r-- | hw/pci/shpc.c | 13 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 318 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 32 | ||||
-rw-r--r-- | hw/virtio/virtio-bus.c | 19 | ||||
-rw-r--r-- | hw/virtio/virtio-pci.c | 55 | ||||
-rw-r--r-- | hw/virtio/virtio-pci.h | 18 | ||||
-rw-r--r-- | hw/virtio/virtio.c | 5 |
15 files changed, 840 insertions, 199 deletions
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index e0ed980..4c19a58 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -13,3 +13,6 @@ obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO) += virtio-blk.o obj-$(CONFIG_VIRTIO) += dataplane/ +ifeq ($(CONFIG_VIRTIO),y) +obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o +endif diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c new file mode 100644 index 0000000..b53b4c9 --- /dev/null +++ b/hw/block/vhost-user-blk.c @@ -0,0 +1,359 @@ +/* + * vhost-user-blk host device + * + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu <changpeng.liu@intel.com> + * + * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by: + * Felipe Franciosi <felipe@nutanix.com> + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Nicholas Bellinger <nab@risingtidesystems.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/typedefs.h" +#include "qemu/cutils.h" +#include "qom/object.h" +#include "hw/qdev-core.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user-blk.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +static const int user_feature_bits[] = { + VIRTIO_BLK_F_SIZE_MAX, + VIRTIO_BLK_F_SEG_MAX, + VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_TOPOLOGY, + VIRTIO_BLK_F_MQ, + VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_FLUSH, + VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); +} + +static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config; + int ret; + + if (blkcfg->wce == s->blkcfg.wce) { + return; + } + + ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, + offsetof(struct virtio_blk_config, wce), + sizeof(blkcfg->wce), + VHOST_SET_CONFIG_TYPE_MASTER); + if (ret) { + error_report("set device config space failed"); + return; + } + + s->blkcfg.wce = blkcfg->wce; +} + +static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) +{ + int ret; + struct virtio_blk_config blkcfg; + VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); + + ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + /* valid for resize only */ + if (blkcfg.capacity != s->blkcfg.capacity) { + s->blkcfg.capacity = blkcfg.capacity; + memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config)); + virtio_notify_config(dev->vdev); + } + + return 0; +} + +const VhostDevConfigOps blk_ops = { + .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, +}; + +static void vhost_user_blk_start(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&s->dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + s->dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&s->dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < s->dev.nvqs; i++) { + vhost_virtqueue_mask(&s->dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_stop(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&s->dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (s->dev.started == should_start) { + return; + } + + if (should_start) { + vhost_user_blk_start(vdev); + } else { + vhost_user_blk_stop(vdev); + } + +} + +static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + uint64_t get_features; + + /* Turn on pre-defined features */ + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); + virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); + virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH); + + if (s->config_wce) { + virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); + } + if (s->config_ro) { + virtio_add_feature(&features, VIRTIO_BLK_F_RO); + } + if (s->num_queues > 1) { + virtio_add_feature(&features, VIRTIO_BLK_F_MQ); + } + + get_features = vhost_get_features(&s->dev, user_feature_bits, features); + + return get_features; +} + +static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + +} + +static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + int i, ret; + + if (!s->chardev.chr) { + error_setg(errp, "vhost-user-blk: chardev is mandatory"); + return; + } + + if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { + error_setg(errp, "vhost-user-blk: invalid number of IO queues"); + return; + } + + if (!s->queue_size) { + error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + return; + } + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + sizeof(struct virtio_blk_config)); + + for (i = 0; i < s->num_queues; i++) { + virtio_add_queue(vdev, s->queue_size, + vhost_user_blk_handle_output); + } + + s->dev.nvqs = s->num_queues; + s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); + s->dev.vq_index = 0; + s->dev.backend_features = 0; + + ret = vhost_dev_init(&s->dev, &s->chardev, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", + strerror(-ret)); + goto virtio_err; + } + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: get block config failed"); + goto vhost_err; + } + + if (s->blkcfg.num_queues != s->num_queues) { + s->blkcfg.num_queues = s->num_queues; + } + + vhost_dev_set_config_notifier(&s->dev, &blk_ops); + + return; + +vhost_err: + vhost_dev_cleanup(&s->dev); +virtio_err: + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(dev); + + vhost_user_blk_set_status(vdev, 0); + vhost_dev_cleanup(&s->dev); + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_instance_init(Object *obj) +{ + VHostUserBlk *s = VHOST_USER_BLK(obj); + + device_add_bootindex_property(obj, &s->bootindex, "bootindex", + "/disk@0,0", DEVICE(obj), NULL); +} + +static const VMStateDescription vmstate_vhost_user_blk = { + .name = "vhost-user-blk", + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property vhost_user_blk_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev), + DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, 1), + DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128), + DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true), + DEFINE_PROP_BIT("config-ro", VHostUserBlk, config_ro, 0, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = vhost_user_blk_properties; + dc->vmsd = &vmstate_vhost_user_blk; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_user_blk_device_realize; + vdc->unrealize = vhost_user_blk_device_unrealize; + vdc->get_config = vhost_user_blk_update_config; + vdc->set_config = vhost_user_blk_set_config; + vdc->get_features = vhost_user_blk_get_features; + vdc->set_status = vhost_user_blk_set_status; +} + +static const TypeInfo vhost_user_blk_info = { + .name = TYPE_VHOST_USER_BLK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBlk), + .instance_init = vhost_user_blk_instance_init, + .class_init = vhost_user_blk_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_user_blk_info); +} + +type_init(virtio_register_types) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 18b939e..dc4b2b9 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2394,7 +2394,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } mem_base = 1ULL << 32; mem_len = next_base - pcms->below_4g_mem_size; - next_base += (1ULL << 32) - pcms->below_4g_mem_size; + next_base = mem_base + mem_len; } numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, mem_base, mem_len, i - 1, @@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiDmarDeviceScope *scope = NULL; /* Root complex IOAPIC use one path[0] only */ size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); + IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); assert(iommu); if (iommu->intr_supported) { @@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) } dmar = acpi_data_push(table_data, sizeof(*dmar)); - dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; + dmar->host_address_width = intel_iommu->aw_bits - 1; dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index eeaf0e0..63d46ff 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1144,18 +1144,9 @@ static void amdvi_realize(DeviceState *dev, Error **err) AMDVIState *s = AMD_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(ms); - PCMachineState *pcms = - PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); - PCIBus *bus; - - if (!pcms) { - error_setg(err, "Machine-type '%s' not supported by amd-iommu", - mc->name); - return; - } + PCMachineState *pcms = PC_MACHINE(ms); + PCIBus *bus = pcms->bus; - bus = pcms->bus; s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, amdvi_uint64_equal, g_free, g_free); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index fe15d3b..2e841cd 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -521,9 +521,9 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK; + return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ @@ -608,35 +608,29 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, return true; } -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw) { uint32_t ce_agaw = vtd_ce_get_agaw(ce); - return 1ULL << MIN(ce_agaw, VTD_MGAW); + return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce, + uint8_t aw) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(ce) - 1)); -} - -static const uint64_t vtd_paging_entry_rsvd_field[] = { - [0] = ~0ULL, - /* For not large page */ - [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - /* For large page */ - [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), -}; + return !(iova & ~(vtd_iova_limit(ce, aw) - 1)); +} + +/* + * Rsvd field masks for spte: + * Index [1] to [4] 4k pages + * Index [5] to [8] large pages + */ +static uint64_t vtd_paging_entry_rsvd_field[9]; static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) { @@ -676,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) */ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes) + bool *reads, bool *writes, uint8_t aw_bits) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); @@ -684,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t slpte; uint64_t access_right_check; - if (!vtd_iova_range_check(iova, ce)) { + if (!vtd_iova_range_check(iova, ce, aw_bits)) { trace_vtd_err_dmar_iova_overflow(iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -721,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, *slpte_level = level; return 0; } - addr = vtd_get_slpte_addr(slpte); + addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } } @@ -739,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); * @read: whether parent level has read permission * @write: whether parent level has write permission * @notify_unmap: whether we should notify invalid entries + * @aw: maximum address width */ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, - void *private, uint32_t level, - bool read, bool write, bool notify_unmap) + void *private, uint32_t level, bool read, + bool write, bool notify_unmap, uint8_t aw) { bool read_cur, write_cur, entry_valid; uint32_t offset; @@ -790,7 +785,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, entry.target_as = &address_space_memory; entry.iova = iova & subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - entry.translated_addr = vtd_get_slpte_addr(slpte); + entry.translated_addr = vtd_get_slpte_addr(slpte, aw); entry.addr_mask = ~subpage_mask; entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); if (!entry_valid && !notify_unmap) { @@ -810,10 +805,10 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_skip_perm(iova, iova_next); goto next; } - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, + ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova, MIN(iova_next, end), hook_fn, private, level - 1, read_cur, write_cur, - notify_unmap); + notify_unmap, aw); if (ret < 0) { return ret; } @@ -834,25 +829,26 @@ next: * @end: IOVA range end address (start <= addr < end) * @hook_fn: the hook that to be called for each detected area * @private: private data for the hook function + * @aw: maximum address width */ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, - bool notify_unmap) + bool notify_unmap, uint8_t aw) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); - if (!vtd_iova_range_check(start, ce)) { + if (!vtd_iova_range_check(start, ce, aw)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(end, ce)) { + if (!vtd_iova_range_check(end, ce, aw)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(ce); + end = vtd_iova_limit(ce, aw); } return vtd_page_walk_level(addr, start, end, hook_fn, private, - level, true, true, notify_unmap); + level, true, true, notify_unmap, aw); } /* Map a device to its corresponding domain (context-entry) */ @@ -874,7 +870,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return -VTD_FR_ROOT_ENTRY_P; } - if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -891,7 +887,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } @@ -1173,7 +1169,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, - &reads, &writes); + &reads, &writes, s->aw_bits); if (ret_fr) { ret_fr = -ret_fr; if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { @@ -1190,7 +1186,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, access_flags, level); out: entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; + entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; @@ -1207,7 +1203,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s) { s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); s->root_extended = s->root & VTD_RTADDR_RTT; - s->root &= VTD_RTADDR_ADDR_MASK; + s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits); trace_vtd_reg_dmar_root(s->root, s->root_extended); } @@ -1223,7 +1219,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) uint64_t value = 0; value = vtd_get_quad_raw(s, DMAR_IRTA_REG); s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); - s->intr_root = value & VTD_IRTA_ADDR_MASK; + s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits); s->intr_eime = value & VTD_IRTA_EIME; /* Notify global invalidation */ @@ -1399,7 +1395,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, vtd_page_invalidate_notify_hook, - (void *)&vtd_as->iommu, true); + (void *)&vtd_as->iommu, true, s->aw_bits); } } } @@ -1479,7 +1475,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) trace_vtd_inv_qi_enable(en); if (en) { - s->iq = iqa_val & VTD_IQA_IQA_MASK; + s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits); /* 2^(x+8) entries */ s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); s->qi_enabled = true; @@ -2410,6 +2406,8 @@ static Property vtd_properties[] = { DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), + DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, + VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_END_OF_LIST(), }; @@ -2765,6 +2763,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) hwaddr size; hwaddr start = n->start; hwaddr end = n->end; + IntelIOMMUState *s = as->iommu_state; /* * Note: all the codes in this function has a assumption that IOVA @@ -2772,12 +2771,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * VT-d spec), otherwise we need to consider overflow of 64 bits. */ - if (end > VTD_ADDRESS_SIZE) { + if (end > VTD_ADDRESS_SIZE(s->aw_bits)) { /* * Don't need to unmap regions that is bigger than the whole * VT-d supported address space size */ - end = VTD_ADDRESS_SIZE; + end = VTD_ADDRESS_SIZE(s->aw_bits); } assert(start <= end); @@ -2789,9 +2788,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * suite the minimum available mask. */ int n = 64 - clz64(size); - if (n > VTD_MGAW) { + if (n > s->aw_bits) { /* should not happen, but in case it happens, limit it */ - n = VTD_MGAW; + n = s->aw_bits; } size = 1ULL << n; } @@ -2851,7 +2850,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), VTD_CONTEXT_ENTRY_DID(ce.hi), ce.hi, ce.lo); - vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); + vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false, + s->aw_bits); } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn)); @@ -2882,10 +2882,27 @@ static void vtd_init(IntelIOMMUState *s) s->qi_enabled = false; s->iq_last_desc_type = VTD_INV_DESC_NONE; s->next_frcd_reg = 0; - s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | + VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + if (s->aw_bits == VTD_HOST_AW_48BIT) { + s->cap |= VTD_CAP_SAGAW_48bit; + } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + /* + * Rsvd field masks for spte + */ + vtd_paging_entry_rsvd_field[0] = ~0ULL; + vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits); + if (x86_iommu->intr_supported) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -3021,26 +3038,25 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } + /* Currently only address widths supported are 39 and 48 bits */ + if ((s->aw_bits != VTD_HOST_AW_39BIT) && + (s->aw_bits != VTD_HOST_AW_48BIT)) { + error_setg(errp, "Supported values for x-aw-bits are: %d, %d", + VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); + return false; + } + return true; } static void vtd_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(ms); - PCMachineState *pcms = - PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); - PCIBus *bus; + PCMachineState *pcms = PC_MACHINE(ms); + PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); - if (!pcms) { - error_setg(errp, "Machine-type '%s' not supported by intel-iommu", - mc->name); - return; - } - - bus = pcms->bus; x86_iommu->type = TYPE_INTEL; if (!vtd_decide_config(s, errp)) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0e73a65..d084099 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -131,7 +131,7 @@ #define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK) /* IVA_REG */ -#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1)) +#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL) #define VTD_IVA_AM(val) ((val) & 0x3fULL) /* GCMD_REG */ @@ -172,10 +172,10 @@ /* RTADDR_REG */ #define VTD_RTADDR_RTT (1ULL << 11) -#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_RTADDR_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) /* IRTA_REG */ -#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IRTA_EIME (1ULL << 11) #define VTD_IRTA_SIZE_MASK (0xfULL) @@ -197,9 +197,8 @@ #define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) -#define VTD_MGAW 39 /* Maximum Guest Address Width */ -#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW) -#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) +#define VTD_ADDRESS_SIZE(aw) (1ULL << (aw)) +#define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL #define VTD_CAP_MAMV (VTD_MAMV << 48) #define VTD_CAP_PSI (1ULL << 39) @@ -213,13 +212,12 @@ #define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) /* 48-bit AGAW, 4-level page-table */ #define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) -#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit /* IQT_REG */ #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) /* IQA_REG */ -#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IQA_IQA_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IQA_QS 0x7ULL /* IQH_REG */ @@ -252,7 +250,7 @@ #define VTD_FRCD_SID_MASK 0xffffULL #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ -#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) +#define VTD_FRCD_FI(val) ((val) & ~0xfffULL) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4) #define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4) #define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) -#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \ - ((1ULL << VTD_MGAW) - 1)) +#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL) #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL @@ -373,6 +370,24 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8 +/* Rsvd field masks for spte */ +#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L3_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; @@ -403,7 +418,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_ROOT_ENTRY_CTP (~0xfffULL) #define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) -#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) +#define VTD_ROOT_ENTRY_RSVD(aw) (0xffeULL | ~VTD_HAW_MASK(aw)) /* Masks for struct VTDContextEntry */ /* lo */ @@ -415,7 +430,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2) /* Second Level Page Translation Pointer*/ #define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) -#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) +#define VTD_CONTEXT_ENTRY_RSVD_LO(aw) (0xff0ULL | ~VTD_HAW_MASK(aw)) /* hi */ #define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK) @@ -439,7 +454,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) +#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #endif diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 293caf8..8a01a2d 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -21,6 +21,8 @@ #include "hw/sysbus.h" #include "hw/boards.h" #include "hw/i386/x86-iommu.h" +#include "hw/i386/pc.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -80,7 +82,18 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev); + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); QLIST_INIT(&x86_iommu->iec_notifiers); + + if (!pcms || !pcms->bus) { + error_setg(errp, "Machine-type '%s' not supported by IOMMU", + mc->name); + return; + } + if (x86_class->realize) { x86_class->realize(dev, errp); } diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index ad4e6aa..0e2f2e8 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -74,8 +74,13 @@ static void gen_rp_realize(DeviceState *dev, Error **errp) PCIDevice *d = PCI_DEVICE(dev); GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d); PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d); + Error *local_err = NULL; - rpc->parent_realize(dev, errp); + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } int rc = pci_bridge_qemu_reserve_cap_init(d, 0, grp->bus_reserve, grp->io_reserve, grp->mem_reserve, grp->pref32_reserve, diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 69fc14b..a8462d4 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/host-utils.h" #include "qemu/range.h" #include "qemu/error-report.h" #include "hw/pci/shpc.h" @@ -122,16 +123,6 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static int roundup_pow_of_two(int x) -{ - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); @@ -656,7 +647,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, int shpc_bar_size(PCIDevice *d) { - return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); + return pow2roundup32(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); } void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 093675e..6eb9798 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -26,6 +26,11 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -65,12 +70,15 @@ typedef enum VhostUserRequest { VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, VHOST_USER_MAX } VhostUserRequest; typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE = 0, VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -92,7 +100,19 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; -typedef struct VhostUserMsg { +typedef struct VhostUserConfig { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +} VhostUserConfig; + +static VhostUserConfig c __attribute__ ((unused)); +#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ + + sizeof(c.size) \ + + sizeof(c.flags)) + +typedef struct { VhostUserRequest request; #define VHOST_USER_VERSION_MASK (0x3) @@ -100,7 +120,9 @@ typedef struct VhostUserMsg { #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) uint32_t flags; uint32_t size; /* the following payload size */ - union { +} QEMU_PACKED VhostUserHeader; + +typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) uint64_t u64; @@ -109,15 +131,18 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; - } payload; + VhostUserConfig config; +} VhostUserPayload; + +typedef struct VhostUserMsg { + VhostUserHeader hdr; + VhostUserPayload payload; } QEMU_PACKED VhostUserMsg; static VhostUserMsg m __attribute__ ((unused)); -#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ - + sizeof(m.flags) \ - + sizeof(m.size)) +#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) -#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) +#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) /* The version of the protocol we support */ #define VHOST_USER_VERSION (0x1) @@ -142,33 +167,33 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { error_report("Failed to read msg header. Read %d instead of %d." - " Original request %d.", r, size, msg->request); + " Original request %d.", r, size, msg->hdr.request); goto fail; } /* validate received flags */ - if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { + if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { error_report("Failed to read msg header." - " Flags 0x%x instead of 0x%x.", msg->flags, + " Flags 0x%x instead of 0x%x.", msg->hdr.flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); goto fail; } /* validate message size is sane */ - if (msg->size > VHOST_USER_PAYLOAD_SIZE) { + if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg->size, + " Size %d exceeds the maximum %zu.", msg->hdr.size, VHOST_USER_PAYLOAD_SIZE); goto fail; } - if (msg->size) { + if (msg->hdr.size) { p += VHOST_USER_HDR_SIZE; - size = msg->size; + size = msg->hdr.size; r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { error_report("Failed to read msg payload." - " Read %d instead of %d.", r, msg->size); + " Read %d instead of %d.", r, msg->hdr.size); goto fail; } } @@ -184,7 +209,7 @@ static int process_message_reply(struct vhost_dev *dev, { VhostUserMsg msg_reply; - if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } @@ -192,10 +217,10 @@ static int process_message_reply(struct vhost_dev *dev, return -1; } - if (msg_reply.request != msg->request) { + if (msg_reply.hdr.request != msg->hdr.request) { error_report("Received unexpected msg type." "Expected %d received %d", - msg->request, msg_reply.request); + msg->hdr.request, msg_reply.hdr.request); return -1; } @@ -222,15 +247,15 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, { struct vhost_user *u = dev->opaque; CharBackend *chr = u->chr; - int ret, size = VHOST_USER_HDR_SIZE + msg->size; + int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; /* * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, * we just need send it once in the first time. For later such * request, we just ignore it. */ - if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { - msg->flags &= ~VHOST_USER_NEED_REPLY_MASK; + if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { + msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } @@ -257,11 +282,11 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); VhostUserMsg msg = { - .request = VHOST_USER_SET_LOG_BASE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_LOG_BASE, + .hdr.flags = VHOST_USER_VERSION, .payload.log.mmap_size = log->size * sizeof(*(log->log)), .payload.log.mmap_offset = 0, - .size = sizeof(msg.payload.log), + .hdr.size = sizeof(msg.payload.log), }; if (shmfd && log->fd != -1) { @@ -273,15 +298,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, } if (shmfd) { - msg.size = 0; + msg.hdr.size = 0; if (vhost_user_read(dev, &msg) < 0) { return -1; } - if (msg.request != VHOST_USER_SET_LOG_BASE) { + if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { error_report("Received unexpected msg type. " "Expected %d received %d", - VHOST_USER_SET_LOG_BASE, msg.request); + VHOST_USER_SET_LOG_BASE, msg.hdr.request); return -1; } } @@ -299,12 +324,12 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, VHOST_USER_PROTOCOL_F_REPLY_ACK); VhostUserMsg msg = { - .request = VHOST_USER_SET_MEM_TABLE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_MEM_TABLE, + .hdr.flags = VHOST_USER_VERSION, }; if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } for (i = 0; i < dev->mem->nregions; ++i) { @@ -317,11 +342,14 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, &offset); fd = memory_region_get_fd(mr); if (fd > 0) { + if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { + error_report("Failed preparing vhost-user memory table msg"); + return -1; + } msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; msg.payload.memory.regions[fd_num].mmap_offset = offset; - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); fds[fd_num++] = fd; } } @@ -334,9 +362,9 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, return -1; } - msg.size = sizeof(msg.payload.memory.nregions); - msg.size += sizeof(msg.payload.memory.padding); - msg.size += fd_num * sizeof(VhostUserMemoryRegion); + msg.hdr.size = sizeof(msg.payload.memory.nregions); + msg.hdr.size += sizeof(msg.payload.memory.padding); + msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { return -1; @@ -353,10 +381,10 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { VhostUserMsg msg = { - .request = VHOST_USER_SET_VRING_ADDR, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_VRING_ADDR, + .hdr.flags = VHOST_USER_VERSION, .payload.addr = *addr, - .size = sizeof(msg.payload.addr), + .hdr.size = sizeof(msg.payload.addr), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -372,10 +400,10 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev, bool cross_endian = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); VhostUserMsg msg = { - .request = VHOST_USER_SET_VRING_ENDIAN, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_VRING_ENDIAN, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (!cross_endian) { @@ -395,10 +423,10 @@ static int vhost_set_vring(struct vhost_dev *dev, struct vhost_vring_state *ring) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -444,10 +472,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { VhostUserMsg msg = { - .request = VHOST_USER_GET_VRING_BASE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_GET_VRING_BASE, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -458,13 +486,13 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, return -1; } - if (msg.request != VHOST_USER_GET_VRING_BASE) { + if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { error_report("Received unexpected msg type. Expected %d received %d", - VHOST_USER_GET_VRING_BASE, msg.request); + VHOST_USER_GET_VRING_BASE, msg.hdr.request); return -1; } - if (msg.size != sizeof(msg.payload.state)) { + if (msg.hdr.size != sizeof(msg.payload.state)) { error_report("Received bad msg size."); return -1; } @@ -481,10 +509,10 @@ static int vhost_set_vring_file(struct vhost_dev *dev, int fds[VHOST_MEMORY_MAX_NREGIONS]; size_t fd_num = 0; VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, - .size = sizeof(msg.payload.u64), + .hdr.size = sizeof(msg.payload.u64), }; if (ioeventfd_enabled() && file->fd > 0) { @@ -515,10 +543,10 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev, static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.u64 = u64, - .size = sizeof(msg.payload.u64), + .hdr.size = sizeof(msg.payload.u64), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -543,8 +571,8 @@ static int vhost_user_set_protocol_features(struct vhost_dev *dev, static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_one_time_request(request) && dev->vq_index != 0) { @@ -559,13 +587,13 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) return -1; } - if (msg.request != request) { + if (msg.hdr.request != request) { error_report("Received unexpected msg type. Expected %d received %d", - request, msg.request); + request, msg.hdr.request); return -1; } - if (msg.size != sizeof(msg.payload.u64)) { + if (msg.hdr.size != sizeof(msg.payload.u64)) { error_report("Received bad msg size."); return -1; } @@ -583,8 +611,8 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) static int vhost_user_set_owner(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_SET_OWNER, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_OWNER, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -597,8 +625,8 @@ static int vhost_user_set_owner(struct vhost_dev *dev) static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_RESET_OWNER, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_RESET_OWNER, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -608,37 +636,56 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return 0; } +static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +{ + int ret = -1; + + if (!dev->config_ops) { + return -1; + } + + if (dev->config_ops->vhost_dev_config_notifier) { + ret = dev->config_ops->vhost_dev_config_notifier(dev); + } + + return ret; +} + static void slave_read(void *opaque) { struct vhost_dev *dev = opaque; struct vhost_user *u = dev->opaque; - VhostUserMsg msg = { 0, }; + VhostUserHeader hdr = { 0, }; + VhostUserPayload payload = { 0, }; int size, ret = 0; /* Read header */ - size = read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE); + size = read(u->slave_fd, &hdr, VHOST_USER_HDR_SIZE); if (size != VHOST_USER_HDR_SIZE) { error_report("Failed to read from slave."); goto err; } - if (msg.size > VHOST_USER_PAYLOAD_SIZE) { + if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg.size, + " Size %d exceeds the maximum %zu.", hdr.size, VHOST_USER_PAYLOAD_SIZE); goto err; } /* Read payload */ - size = read(u->slave_fd, &msg.payload, msg.size); - if (size != msg.size) { + size = read(u->slave_fd, &payload, hdr.size); + if (size != hdr.size) { error_report("Failed to read payload from slave."); goto err; } - switch (msg.request) { + switch (hdr.request) { case VHOST_USER_SLAVE_IOTLB_MSG: - ret = vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb); + ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); + break; + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : + ret = vhost_user_slave_handle_config_change(dev); break; default: error_report("Received unexpected msg type."); @@ -649,15 +696,23 @@ static void slave_read(void *opaque) * REPLY_ACK feature handling. Other reply types has to be managed * directly in their request handlers. */ - if (msg.flags & VHOST_USER_NEED_REPLY_MASK) { - msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; - msg.flags |= VHOST_USER_REPLY_MASK; + if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + struct iovec iovec[2]; + + + hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr.flags |= VHOST_USER_REPLY_MASK; + + payload.u64 = !!ret; + hdr.size = sizeof(payload.u64); - msg.payload.u64 = !!ret; - msg.size = sizeof(msg.payload.u64); + iovec[0].iov_base = &hdr; + iovec[0].iov_len = VHOST_USER_HDR_SIZE; + iovec[1].iov_base = &payload; + iovec[1].iov_len = hdr.size; - size = write(u->slave_fd, &msg, VHOST_USER_HDR_SIZE + msg.size); - if (size != VHOST_USER_HDR_SIZE + msg.size) { + size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); + if (size != VHOST_USER_HDR_SIZE + hdr.size) { error_report("Failed to send msg reply to slave."); goto err; } @@ -675,8 +730,8 @@ err: static int vhost_setup_slave_channel(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_SET_SLAVE_REQ_FD, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, + .hdr.flags = VHOST_USER_VERSION, }; struct vhost_user *u = dev->opaque; int sv[2], ret = 0; @@ -697,7 +752,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } ret = vhost_user_write(dev, &msg, &sv[1], 1); @@ -842,10 +897,10 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ if (virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_RARP)) { - msg.request = VHOST_USER_SEND_RARP; - msg.flags = VHOST_USER_VERSION; + msg.hdr.request = VHOST_USER_SEND_RARP; + msg.hdr.flags = VHOST_USER_VERSION; memcpy((char *)&msg.payload.u64, mac_addr, 6); - msg.size = sizeof(msg.payload.u64); + msg.hdr.size = sizeof(msg.payload.u64); return vhost_user_write(dev, &msg, NULL, 0); } @@ -879,12 +934,12 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return 0; } - msg.request = VHOST_USER_NET_SET_MTU; + msg.hdr.request = VHOST_USER_NET_SET_MTU; msg.payload.u64 = mtu; - msg.size = sizeof(msg.payload.u64); - msg.flags = VHOST_USER_VERSION; + msg.hdr.size = sizeof(msg.payload.u64); + msg.hdr.flags = VHOST_USER_VERSION; if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -903,9 +958,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb_msg *imsg) { VhostUserMsg msg = { - .request = VHOST_USER_IOTLB_MSG, - .size = sizeof(msg.payload.iotlb), - .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + .hdr.request = VHOST_USER_IOTLB_MSG, + .hdr.size = sizeof(msg.payload.iotlb), + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, .payload.iotlb = *imsg, }; @@ -922,6 +977,83 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) /* No-op as the receive channel is not dedicated to IOTLB messages. */ } +static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len) +{ + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_CONFIG, + .hdr.flags = VHOST_USER_VERSION, + .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, + }; + + if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { + return -1; + } + + msg.payload.config.offset = 0; + msg.payload.config.size = config_len; + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + return -1; + } + + if (msg.hdr.request != VHOST_USER_GET_CONFIG) { + error_report("Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_CONFIG, msg.hdr.request); + return -1; + } + + if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { + error_report("Received bad msg size."); + return -1; + } + + memcpy(config, msg.payload.config.region, config_len); + + return 0; +} + +static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + uint8_t *p; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + VhostUserMsg msg = { + .hdr.request = VHOST_USER_SET_CONFIG, + .hdr.flags = VHOST_USER_VERSION, + .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, + }; + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + if (size > VHOST_USER_MAX_CONFIG_SIZE) { + return -1; + } + + msg.payload.config.offset = offset, + msg.payload.config.size = size, + msg.payload.config.flags = flags, + p = msg.payload.config.region; + memcpy(p, data, size); + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (reply_supported) { + return process_message_reply(dev, &msg); + } + + return 0; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_init, @@ -948,4 +1080,6 @@ const VhostOps user_ops = { .vhost_net_set_mtu = vhost_user_net_set_mtu, .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, + .vhost_get_config = vhost_user_get_config, + .vhost_set_config = vhost_user_set_config, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e4290ce..386aef8 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1505,6 +1505,38 @@ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, } } +int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, + uint32_t config_len) +{ + assert(hdev->vhost_ops); + + if (hdev->vhost_ops->vhost_get_config) { + return hdev->vhost_ops->vhost_get_config(hdev, config, config_len); + } + + return -1; +} + +int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + assert(hdev->vhost_ops); + + if (hdev->vhost_ops->vhost_set_config) { + return hdev->vhost_ops->vhost_set_config(hdev, data, offset, + size, flags); + } + + return -1; +} + +void vhost_dev_set_config_notifier(struct vhost_dev *hdev, + const VhostDevConfigOps *ops) +{ + assert(hdev->vhost_ops); + hdev->config_ops = ops; +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 3042232..8106346 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -256,6 +256,15 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus) return k->ioeventfd_assign && k->ioeventfd_enabled(proxy); } +static void virtio_bus_cleanup_event_notifier(EventNotifier *notifier) +{ + /* Test and clear notifier after disabling event, + * in case poll callback didn't have time to run. + */ + virtio_queue_host_notifier_read(notifier); + event_notifier_cleanup(notifier); +} + /* * This function switches ioeventfd on/off in the device. * The caller must set or clear the handlers for the EventNotifier. @@ -283,19 +292,13 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) r = k->ioeventfd_assign(proxy, notifier, n, true); if (r < 0) { error_report("%s: unable to assign ioeventfd: %d", __func__, r); - goto cleanup_event_notifier; + virtio_bus_cleanup_event_notifier(notifier); } - return 0; } else { + notifier->cleanup = virtio_bus_cleanup_event_notifier; k->ioeventfd_assign(proxy, notifier, n, false); } -cleanup_event_notifier: - /* Test and clear notifier after disabling event, - * in case poll callback didn't have time to run. - */ - virtio_queue_host_notifier_read(notifier); - event_notifier_cleanup(notifier); return r; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6c75cca..9ae10f0 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1978,6 +1978,58 @@ static const TypeInfo virtio_blk_pci_info = { .class_init = virtio_blk_pci_class_init, }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* vhost-user-blk */ + +static Property vhost_user_blk_pci_properties[] = { + DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = vhost_user_blk_pci_properties; + k->realize = vhost_user_blk_pci_realize; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; +} + +static void vhost_user_blk_pci_instance_init(Object *obj) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_BLK); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); +} + +static const TypeInfo vhost_user_blk_pci_info = { + .name = TYPE_VHOST_USER_BLK_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostUserBlkPCI), + .instance_init = vhost_user_blk_pci_instance_init, + .class_init = vhost_user_blk_pci_class_init, +}; +#endif + /* virtio-scsi-pci */ static Property virtio_scsi_pci_properties[] = { @@ -2624,6 +2676,9 @@ static void virtio_pci_register_types(void) type_register_static(&virtio_9p_pci_info); #endif type_register_static(&virtio_blk_pci_info); +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) + type_register_static(&vhost_user_blk_pci_info); +#endif type_register_static(&virtio_scsi_pci_info); type_register_static(&virtio_balloon_pci_info); type_register_static(&virtio_serial_pci_info); diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 12d3a90..813082b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -27,6 +27,9 @@ #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user-scsi.h" +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +#include "hw/virtio/vhost-user-blk.h" +#endif #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" @@ -46,6 +49,7 @@ typedef struct VirtIOSerialPCI VirtIOSerialPCI; typedef struct VirtIONetPCI VirtIONetPCI; typedef struct VHostSCSIPCI VHostSCSIPCI; typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; +typedef struct VHostUserBlkPCI VHostUserBlkPCI; typedef struct VirtIORngPCI VirtIORngPCI; typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; @@ -244,6 +248,20 @@ struct VHostUserSCSIPCI { VHostUserSCSI vdev; }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* + * vhost-user-blk-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" +#define VHOST_USER_BLK_PCI(obj) \ + OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) + +struct VHostUserBlkPCI { + VirtIOPCIProxy parent_obj; + VHostUserBlk vdev; +}; +#endif + /* * virtio-blk-pci: This extends VirtioPCIProxy. */ diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d6002ee..3ac3491 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2574,6 +2574,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r, err; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; if (!virtio_queue_get_num(vdev, n)) { @@ -2596,6 +2597,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) } event_notifier_set(&vq->host_notifier); } + memory_region_transaction_commit(); return 0; assign_error: @@ -2609,6 +2611,7 @@ assign_error: r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + memory_region_transaction_commit(); return err; } @@ -2625,6 +2628,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; @@ -2635,6 +2639,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + memory_region_transaction_commit(); } void virtio_device_stop_ioeventfd(VirtIODevice *vdev) |