diff options
49 files changed, 250 insertions, 76 deletions
diff --git a/configs/targets/s390x-linux-user.mak b/configs/targets/s390x-linux-user.mak index e297824..24c04c8 100644 --- a/configs/targets/s390x-linux-user.mak +++ b/configs/targets/s390x-linux-user.mak @@ -2,4 +2,4 @@ TARGET_ARCH=s390x TARGET_SYSTBL_ABI=common,64 TARGET_SYSTBL=syscall.tbl TARGET_BIG_ENDIAN=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak index 258b4cf..70d2f9f 100644 --- a/configs/targets/s390x-softmmu.mak +++ b/configs/targets/s390x-softmmu.mak @@ -1,4 +1,4 @@ TARGET_ARCH=s390x TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index 343120f..a3e949f 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier * LEGACY timer_new_ms() - create a timer * LEGACY qemu_bh_new() - create a BH + * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard * LEGACY qemu_aio_wait() - run an event loop iteration Since they implicitly work on the main loop they cannot be used in code that @@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): * aio_set_event_notifier() - monitor an event notifier * aio_timer_new() - create a timer * aio_bh_new() - create a BH + * aio_bh_new_guarded() - create a BH with a device re-entrancy guard * aio_poll() - run an event loop iteration +The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" +argument, which is used to check for and prevent re-entrancy problems. For +BHs associated with devices, the reentrancy-guard is contained in the +corresponding DeviceState and named "mem_reentrancy_guard". + The AioContext can be obtained from the IOThread using iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). Code that takes an AioContext argument works both in IOThreads or the main diff --git a/gdb-xml/s390-virt-kvm.xml b/gdb-xml/s390-virt-kvm.xml new file mode 100644 index 0000000..a256edd --- /dev/null +++ b/gdb-xml/s390-virt-kvm.xml @@ -0,0 +1,14 @@ +<?xml version="1.0"?> +<!-- Copyright 2023 IBM Corp. + + This work is licensed under the terms of the GNU GPL, version 2 or + (at your option) any later version. See the COPYING file in the + top-level directory. --> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.s390.virt.kvm"> + <reg name="pp" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_token" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_select" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/> +</feature> diff --git a/gdb-xml/s390-virt.xml b/gdb-xml/s390-virt.xml index e2e9a7a..438eb68 100644 --- a/gdb-xml/s390-virt.xml +++ b/gdb-xml/s390-virt.xml @@ -11,8 +11,4 @@ <reg name="cputm" bitsize="64" type="uint64" group="system"/> <reg name="last_break" bitsize="64" type="code_ptr" group="system"/> <reg name="prefix" bitsize="64" type="data_ptr" group="system"/> - <reg name="pp" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_token" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_select" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/> </feature> diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 74f3a05..0e266c5 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { int num_rings; Xen9pfsRing *rings; + MemReentrancyGuard mem_reentrancy_guard; } Xen9pfsDev; static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); @@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + XEN_FLEX_RING_SIZE(ring_order); - xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); + xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, + &xen_9pdev->rings[i], + &xen_9pdev->mem_reentrancy_guard); xen_9pdev->rings[i].out_cons = 0; xen_9pdev->rings[i].out_size = 0; xen_9pdev->rings[i].inprogress = false; diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b28d817..a620299 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, } else { s->ctx = qemu_get_aio_context(); } - s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); + s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, + &DEVICE(vdev)->mem_reentrancy_guard); s->batch_notify_vqs = bitmap_new(conf->num_queues); *dataplane = s; diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 734da42..d8bc39d 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, } else { dataplane->ctx = qemu_get_aio_context(); } - dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, - dataplane); + dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, + dataplane, + &DEVICE(xendev)->mem_reentrancy_guard); return dataplane; } diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 7d4601c..dd619f0 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) return; } - port->bh = qemu_bh_new(flush_queued_data_bh, port); + port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, + &dev->mem_reentrancy_guard); port->elem = NULL; } diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 80ce1e9..f1c0eb7 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); + qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); qxl_reset_state(qxl); - qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); - qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); + qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); + qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, + &DEVICE(qxl)->mem_reentrancy_guard); } static void qxl_realize_primary(PCIDevice *dev, Error **errp) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 5e15c79..66ac9b6 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) g->ctrl_vq = virtio_get_queue(vdev, 0); g->cursor_vq = virtio_get_queue(vdev, 1); - g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); - g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); + g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, + &qdev->mem_reentrancy_guard); + g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, + &qdev->mem_reentrancy_guard); QTAILQ_INIT(&g->reslist); QTAILQ_INIT(&g->cmdq); QTAILQ_INIT(&g->fenceq); diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 55902e1..4e76d6b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) ahci_write_fis_d2h(ad); if (ad->port_regs.cmd_issue && !ad->check_bh) { - ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); + ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, + &ad->mem_reentrancy_guard); qemu_bh_schedule(ad->check_bh); } } diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 303fcd7..2480455 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -321,6 +321,7 @@ struct AHCIDevice { bool init_d2h_sent; AHCICmdHdr *cur_cmd; NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; + MemReentrancyGuard mem_reentrancy_guard; }; struct AHCIPCIState { diff --git a/hw/ide/core.c b/hw/ide/core.c index 45d14a2..de48ff9 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( BlockCompletionFunc *cb, void *cb_opaque, void *opaque) { IDEState *s = opaque; + IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; TrimAIOCB *iocb; /* Paired with a decrement in ide_trim_bh_cb() */ @@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb->s = s; - iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); + iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, + &DEVICE(dev)->mem_reentrancy_guard); iocb->ret = 0; iocb->qiov = qiov; iocb->i = -1; diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 20b5a94..ac3d47d 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", APIC_SPACE_SIZE); + /* + * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can + * write back to apic-msi. As such mark the apic-msi region re-entrancy + * safe. + */ + s->io_memory.disable_reentrancy_guard = true; + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); local_apics[s->id] = s; diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 890ae7b..de056ea 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, TYPE_BCM2835_PROPERTY, 0x10); + + /* + * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from + * iomem. As such, mark iomem as re-entracy safe. + */ + s->iomem.disable_reentrancy_guard = true; + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); } diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c index 632c037..082c698 100644 --- a/hw/misc/imx_rngc.c +++ b/hw/misc/imx_rngc.c @@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); - s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); - s->seed_bh = qemu_bh_new(imx_rngc_seed, s); + s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, + &dev->mem_reentrancy_guard); + s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, + &dev->mem_reentrancy_guard); } static void imx_rngc_reset(DeviceState *dev) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index 43bb1f5..80a789f 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) { DBDMAState *s = MAC_DBDMA(dev); - s->bh = qemu_bh_new(DBDMA_run_bh, s); + s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); } static void mac_dbdma_class_init(ObjectClass *oc, void *data) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 53e1c32..447f669 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) n->vqs[index].tx_vq = virtio_add_queue(vdev, n->net_conf.tx_queue_size, virtio_net_handle_tx_bh); - n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); + n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], + &DEVICE(vdev)->mem_reentrancy_guard); } n->vqs[index].tx_waiting = 0; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f59dfe1..fd917fc 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } - sq->bh = qemu_bh_new(nvme_process_sq, sq); + sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, + &DEVICE(sq->ctrl)->mem_reentrancy_guard); if (n->dbbuf_enabled) { sq->db_addr = n->dbbuf_dbs + (sqid << 3); @@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, } } n->cq[cqid] = cq; - cq->bh = qemu_bh_new(nvme_post_cqes, cq); + cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, + &DEVICE(cq->ctrl)->mem_reentrancy_guard); } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 072ffe3..9a11ac4 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + /* + * Raven's raven_io_ops use the address-space API to access pci-conf-idx + * (which is also owned by the raven device). As such, mark the + * pci_io_non_contiguous as re-entrancy safe. + */ + s->pci_io_non_contiguous.disable_reentrancy_guard = true; + /* CPU address space */ memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, &s->pci_io); diff --git a/hw/rdma/Kconfig b/hw/rdma/Kconfig index 8e22112..840320b 100644 --- a/hw/rdma/Kconfig +++ b/hw/rdma/Kconfig @@ -1,3 +1,3 @@ config VMW_PVRDMA default y if PCI_DEVICES - depends on PVRDMA && PCI && MSI_NONBROKEN + depends on PVRDMA && MSI_NONBROKEN && VMXNET3_PCI diff --git a/hw/rdma/meson.build b/hw/rdma/meson.build index 7325f40..fc79171 100644 --- a/hw/rdma/meson.build +++ b/hw/rdma/meson.build @@ -1,10 +1,12 @@ -specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( +softmmu_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( 'rdma.c', 'rdma_backend.c', - 'rdma_rm.c', 'rdma_utils.c', + 'vmw/pvrdma_qp_ops.c', +)) +specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( + 'rdma_rm.c', 'vmw/pvrdma_cmd.c', 'vmw/pvrdma_dev_ring.c', 'vmw/pvrdma_main.c', - 'vmw/pvrdma_qp_ops.c', )) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index cfd85de..038d564 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -23,10 +23,6 @@ #include "rdma_backend.h" #include "rdma_rm.h" -/* Page directory and page tables */ -#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } -#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } - void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf) { g_string_append_printf(buf, "\ttx : %" PRId64 "\n", diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index af93557..db27872 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, "lsi-io", 256); + /* + * Since we use the address-space API to interact with ram_io, disable the + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); qdev_init_gpio_out(d, &s->ext_irq, 1); diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index c485da7..3de288b 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) } s->max_devices = MPTSAS_NUM_PORTS; - s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); + s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, + &DEVICE(dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); } diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index c971761..3c20b47 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) AioContext *ctx = blk_get_aio_context(s->conf.blk); /* The reference is dropped in scsi_dma_restart_bh.*/ object_ref(OBJECT(s)); - s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); + s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, + &DEVICE(s)->mem_reentrancy_guard); qemu_bh_schedule(s->bh); } } diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index fa76696..4de3453 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); } - s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); + s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, + &DEVICE(pci_dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); /* override default SCSI bus hotplug-handler, with pvscsi's one */ diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index 88f99c0..f013ded 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) QTAILQ_INIT(&uas->results); QTAILQ_INIT(&uas->requests); - uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); + uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, + &d->mem_reentrancy_guard); dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c index 8755e9c..a0c4e78 100644 --- a/hw/usb/hcd-dwc2.c +++ b/hw/usb/hcd-dwc2.c @@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) s->fi = USB_FRMINTVL - 1; s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); - s->async_bh = qemu_bh_new(dwc2_work_bh, s); + s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, + &dev->mem_reentrancy_guard); sysbus_init_irq(sbd, &s->irq); } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index d4da8dc..c930c60 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) } s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); - s->async_bh = qemu_bh_new(ehci_work_bh, s); + s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, + &dev->mem_reentrancy_guard); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index 8ac1175..77baaa7 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); } } - s->bh = qemu_bh_new(uhci_bh, s); + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); s->num_ports_vmstate = NB_PORTS; QTAILQ_INIT(&s->queues); diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 176868d..f500db8 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) static void usb_host_nodev(USBHostDevice *s) { if (!s->bh_nodev) { - s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); + s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, + &DEVICE(s)->mem_reentrancy_guard); } qemu_bh_schedule(s->bh_nodev); } @@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) USBHostDevice *dev = opaque; if (!dev->bh_postld) { - dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); + dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); } qemu_bh_schedule(dev->bh_postld); dev->bh_postld_pending = true; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index fd7df59..39fbaaa 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) } } - dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); - dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); + dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); + dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); packet_id_queue_init(&dev->cancelled, dev, "cancelled"); diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 66cb3f7..38ee660 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) QTAILQ_INIT(&usbif->req_free_q); QSIMPLEQ_INIT(&usbif->hotplug_q); - usbif->bh = qemu_bh_new(usbback_bh, usbif); + usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, + &DEVICE(xendev)->mem_reentrancy_guard); } static int usbback_free(struct XenLegacyDevice *xendev) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index fd06fcf..d004cf2 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -886,8 +886,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) precopy_add_notifier(&s->free_page_hint_notify); object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); + s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s, + &dev->mem_reentrancy_guard); } if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 802e1b9..2fe8045 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) vcrypto->vqs[i].dataq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); vcrypto->vqs[i].dataq_bh = - qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); + qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], + &dev->mem_reentrancy_guard); vcrypto->vqs[i].vcrypto = vcrypto; } diff --git a/include/block/aio.h b/include/block/aio.h index e267d91..89bbc53 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -23,6 +23,8 @@ #include "qemu/thread.h" #include "qemu/timer.h" #include "block/graph-lock.h" +#include "hw/qdev-core.h" + typedef struct BlockAIOCB BlockAIOCB; typedef void BlockCompletionFunc(void *opaque, int ret); @@ -323,9 +325,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * is opaque and must be allocated prior to its use. * * @name: A human-readable identifier for debugging purposes. + * @reentrancy_guard: A guard set when entering a cb to prevent + * device-reentrancy issues */ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name); + const char *name, MemReentrancyGuard *reentrancy_guard); /** * aio_bh_new: Allocate a new bottom half structure @@ -334,7 +338,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * string. */ #define aio_bh_new(ctx, cb, opaque) \ - aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) + +/** + * aio_bh_new_guarded: Allocate a new bottom half structure with a + * reentrancy_guard + * + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name + * string. + */ +#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) /** * aio_notify: Force processing of pending events. diff --git a/include/exec/memory.h b/include/exec/memory.h index 15ade91..e45ce60 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -767,6 +767,8 @@ struct MemoryRegion { bool is_iommu; RAMBlock *ram_block; Object *owner; + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ + DeviceState *dev; const MemoryRegionOps *ops; void *opaque; @@ -791,6 +793,9 @@ struct MemoryRegion { unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; RamDiscardManager *rdm; /* Only for RAM */ + + /* For devices designed to perform re-entrant IO into their own IO MRs */ + bool disable_reentrancy_guard; }; struct IOMMUMemoryRegion { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bd50ad5..7623703 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -162,6 +162,10 @@ struct NamedClockList { QLIST_ENTRY(NamedClockList) node; }; +typedef struct { + bool engaged_in_io; +} MemReentrancyGuard; + /** * DeviceState: * @realized: Indicates whether the device has been fully constructed. @@ -194,6 +198,9 @@ struct DeviceState { int alias_required_for_version; ResettableState reset; GSList *unplug_blockers; + + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ + MemReentrancyGuard mem_reentrancy_guard; }; struct DeviceListener { diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index b3e54e0..68e70e6 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); /* internal interfaces */ +#define qemu_bh_new_guarded(cb, opaque, guard) \ + qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) #define qemu_bh_new(cb, opaque) \ - qemu_bh_new_full((cb), (opaque), (stringify(cb))) -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); + qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard); void qemu_bh_schedule_idle(QEMUBH *bh); enum { diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d768171..eeaec43 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2865,6 +2865,14 @@ sub process { if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); } +# recommend qemu_bh_new_guarded instead of qemu_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { + ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); + } +# recommend aio_bh_new_guarded instead of aio_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { + ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); + } # check for module_init(), use category-specific init macros explicitly please if ($line =~ /^module_init\s*\(/) { ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); diff --git a/softmmu/memory.c b/softmmu/memory.c index b1a6cae..b7b3386 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_size_max = 4; } + /* Do not allow more than one simultaneous access to a device's IO Regions */ + if (mr->dev && !mr->disable_reentrancy_guard && + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { + warn_report_once("Blocked re-entrant IO on MemoryRegion: " + "%s at addr: 0x%" HWADDR_PRIX, + memory_region_name(mr), addr); + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; + } + /* FIXME: support unaligned access? */ access_size = MAX(MIN(size, access_size_max), access_size_min); access_mask = MAKE_64BIT_MASK(0, access_size * 8); @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_mask, attrs); } } + if (mr->dev) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } return r; } @@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, } mr->name = g_strdup(name); mr->owner = owner; + mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); mr->ram_block = NULL; if (name) { diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index 0cb6939..6fbfd41 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -206,12 +206,8 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n) #define S390_VIRT_CPUTM_REGNUM 1 #define S390_VIRT_BEA_REGNUM 2 #define S390_VIRT_PREFIX_REGNUM 3 -#define S390_VIRT_PP_REGNUM 4 -#define S390_VIRT_PFT_REGNUM 5 -#define S390_VIRT_PFS_REGNUM 6 -#define S390_VIRT_PFC_REGNUM 7 /* total number of registers in s390-virt.xml */ -#define S390_NUM_VIRT_REGS 8 +#define S390_NUM_VIRT_REGS 4 static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) { @@ -224,14 +220,6 @@ static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) return gdb_get_regl(mem_buf, env->gbea); case S390_VIRT_PREFIX_REGNUM: return gdb_get_regl(mem_buf, env->psa); - case S390_VIRT_PP_REGNUM: - return gdb_get_regl(mem_buf, env->pp); - case S390_VIRT_PFT_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_token); - case S390_VIRT_PFS_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_select); - case S390_VIRT_PFC_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_compare); default: return 0; } @@ -256,19 +244,51 @@ static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n) env->psa = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PP_REGNUM: + default: + return 0; + } +} + +/* the values represent the positions in s390-virt-kvm.xml */ +#define S390_VIRT_KVM_PP_REGNUM 0 +#define S390_VIRT_KVM_PFT_REGNUM 1 +#define S390_VIRT_KVM_PFS_REGNUM 2 +#define S390_VIRT_KVM_PFC_REGNUM 3 +/* total number of registers in s390-virt-kvm.xml */ +#define S390_NUM_VIRT_KVM_REGS 4 + +static int cpu_read_virt_kvm_reg(CPUS390XState *env, GByteArray *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: + return gdb_get_regl(mem_buf, env->pp); + case S390_VIRT_KVM_PFT_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_token); + case S390_VIRT_KVM_PFS_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_select); + case S390_VIRT_KVM_PFC_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_compare); + default: + return 0; + } +} + +static int cpu_write_virt_kvm_reg(CPUS390XState *env, uint8_t *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: env->pp = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFT_REGNUM: + case S390_VIRT_KVM_PFT_REGNUM: env->pfault_token = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFS_REGNUM: + case S390_VIRT_KVM_PFS_REGNUM: env->pfault_select = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFC_REGNUM: + case S390_VIRT_KVM_PFC_REGNUM: env->pfault_compare = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; @@ -321,10 +341,15 @@ void s390_cpu_gdb_init(CPUState *cs) cpu_write_c_reg, S390_NUM_C_REGS, "s390-cr.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_reg, + cpu_write_virt_reg, + S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + if (kvm_enabled()) { - gdb_register_coprocessor(cs, cpu_read_virt_reg, - cpu_write_virt_reg, - S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_kvm_reg, + cpu_write_virt_kvm_reg, + S390_NUM_VIRT_KVM_REGS, "s390-virt-kvm.xml", + 0); } #endif } diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index bf9f7c4..e4f95b2 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -351,7 +351,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) if (size != msg.size) { qos_printf("%s: Wrong message size received %d != %d\n", __func__, size, msg.size); - return; + goto out; } } @@ -509,6 +509,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) break; } +out: g_mutex_unlock(&s->data_mutex); } diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c index f2bfced..8c9407c 100644 --- a/tests/unit/ptimer-test-stubs.c +++ b/tests/unit/ptimer-test-stubs.c @@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) return deadline; } -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh = g_new(QEMUBH, 1); diff --git a/util/async.c b/util/async.c index 21016a1..a9b528c 100644 --- a/util/async.c +++ b/util/async.c @@ -65,6 +65,7 @@ struct QEMUBH { void *opaque; QSLIST_ENTRY(QEMUBH) next; unsigned flags; + MemReentrancyGuard *reentrancy_guard; }; /* Called concurrently from any thread */ @@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, } QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name) + const char *name, MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh; bh = g_new(QEMUBH, 1); @@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, .cb = cb, .opaque = opaque, .name = name, + .reentrancy_guard = reentrancy_guard, }; return bh; } void aio_bh_call(QEMUBH *bh) { + bool last_engaged_in_io = false; + + if (bh->reentrancy_guard) { + last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; + if (bh->reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } + bh->reentrancy_guard->engaged_in_io = true; + } + bh->cb(bh->opaque); + + if (bh->reentrancy_guard) { + bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; + } } /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ diff --git a/util/main-loop.c b/util/main-loop.c index e180c85..7022f02 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) /* Functions to operate on the main QEMU AioContext. */ -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { - return aio_bh_new_full(qemu_aio_context, cb, opaque, name); + return aio_bh_new_full(qemu_aio_context, cb, opaque, name, + reentrancy_guard); } /* diff --git a/util/trace-events b/util/trace-events index 16f78d8..3f7e766 100644 --- a/util/trace-events +++ b/util/trace-events @@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" # async.c aio_co_schedule(void *ctx, void *co) "ctx %p co %p" aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" +reentrant_aio(void *ctx, const char *name) "ctx %p name %s" # thread-pool.c thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" |