aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS10
-rw-r--r--accel/kvm/kvm-all.c2
-rw-r--r--backends/iommufd.c58
-rw-r--r--backends/trace-events1
-rw-r--r--docs/devel/rust.rst7
-rw-r--r--docs/sphinx/qapi_domain.py25
-rw-r--r--docs/sphinx/qapidoc.py5
-rw-r--r--hw/display/apple-gfx.m10
-rw-r--r--hw/timer/hpet.c21
-rw-r--r--hw/vfio/container-base.c4
-rw-r--r--hw/vfio/container.c5
-rw-r--r--hw/vfio/cpr.c2
-rw-r--r--hw/vfio/igd.c22
-rw-r--r--hw/vfio/iommufd.c45
-rw-r--r--hw/vfio/listener.c74
-rw-r--r--hw/vfio/pci.c89
-rw-r--r--hw/vfio/vfio-cpr.h15
-rw-r--r--hw/virtio/vhost-vdpa.c9
-rw-r--r--include/hw/vfio/vfio-container-base.h83
-rw-r--r--include/hw/vfio/vfio-cpr.h18
-rw-r--r--include/qapi/error-internal.h35
-rw-r--r--include/qemu/futex.h44
-rw-r--r--include/qemu/lockcnt.h2
-rw-r--r--include/qemu/thread-posix.h9
-rw-r--r--include/qemu/thread-win32.h6
-rw-r--r--include/qemu/thread.h21
-rw-r--r--include/system/host_iommu_device.h15
-rw-r--r--include/system/iommufd.h54
-rw-r--r--include/system/kvm.h1
-rw-r--r--include/system/memory.h19
-rw-r--r--meson.build7
-rw-r--r--migration/colo.c20
-rw-r--r--migration/migration.c21
-rw-r--r--migration/migration.h12
-rw-r--r--migration/postcopy-ram.c10
-rw-r--r--migration/savevm.c2
-rw-r--r--python/setup.cfg2
-rw-r--r--python/tests/minreqs.txt31
-rwxr-xr-xpython/tests/qapi-flake8.sh6
-rwxr-xr-xpython/tests/qapi-isort.sh8
-rwxr-xr-xpython/tests/qapi-mypy.sh4
-rwxr-xr-xpython/tests/qapi-pylint.sh8
-rw-r--r--rust/Cargo.lock17
-rw-r--r--rust/Cargo.toml1
-rw-r--r--rust/hw/char/pl011/src/device.rs5
-rw-r--r--rust/hw/timer/hpet/src/device.rs62
-rw-r--r--rust/hw/timer/hpet/src/fw_cfg.rs7
-rw-r--r--rust/meson.build4
-rw-r--r--rust/qemu-api-macros/src/lib.rs7
-rw-r--r--rust/qemu-api/Cargo.toml2
-rw-r--r--rust/qemu-api/meson.build3
-rw-r--r--rust/qemu-api/src/error.rs416
-rw-r--r--rust/qemu-api/src/lib.rs3
-rw-r--r--rust/qemu-api/src/qdev.rs12
-rw-r--r--rust/wrapper.h1
-rwxr-xr-xscripts/archive-source.sh5
-rwxr-xr-xscripts/make-release5
-rw-r--r--scripts/qapi/.flake83
-rw-r--r--scripts/qapi/.isort.cfg7
-rw-r--r--scripts/qapi/backend.py2
-rw-r--r--scripts/qapi/mypy.ini4
-rw-r--r--scripts/qapi/pylintrc1
-rw-r--r--subprojects/.gitignore2
-rw-r--r--subprojects/anyhow-1-rs.wrap7
-rw-r--r--subprojects/foreign-0.3-rs.wrap7
-rw-r--r--subprojects/packagefiles/anyhow-1-rs/meson.build33
-rw-r--r--subprojects/packagefiles/foreign-0.3-rs/meson.build26
-rw-r--r--system/memory.c32
-rw-r--r--target/i386/kvm/kvm.c31
-rw-r--r--target/i386/ops_sse.h16
-rw-r--r--target/i386/tcg/fpu_helper.c101
-rw-r--r--tests/tcg/x86_64/fma.c17
-rw-r--r--tests/unit/test-aio-multithread.c6
-rw-r--r--util/error.c20
-rw-r--r--util/event.c171
-rw-r--r--util/lockcnt.c9
-rw-r--r--util/meson.build3
-rw-r--r--util/qemu-thread-posix.c148
-rw-r--r--util/qemu-thread-win32.c129
79 files changed, 1502 insertions, 665 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 16af379..aa67630 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3032,6 +3032,16 @@ F: include/qemu/co-shared-resource.h
T: git https://gitlab.com/jsnow/qemu.git jobs
T: git https://gitlab.com/vsementsov/qemu.git block
+CheckPoint and Restart (CPR)
+R: Steve Sistare <steven.sistare@oracle.com>
+S: Supported
+F: hw/vfio/cpr*
+F: include/hw/vfio/vfio-cpr.h
+F: include/migration/cpr.h
+F: migration/cpr*
+F: tests/qtest/migration/cpr*
+F: docs/devel/migration/CPR.rst
+
Compute Express Link
M: Jonathan Cameron <jonathan.cameron@huawei.com>
R: Fan Ni <fan.ni@samsung.com>
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 51526d3..a317783 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -99,6 +99,7 @@ bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
bool kvm_msi_use_devid;
+bool kvm_pre_fault_memory_supported;
static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
@@ -2745,6 +2746,7 @@ static int kvm_init(MachineState *ms)
kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY);
if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
diff --git a/backends/iommufd.c b/backends/iommufd.c
index b73f75c..c2c47ab 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -311,6 +311,62 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ uint32_t total_entries = *entry_num;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = total_entries,
+ .data_uptr = (uintptr_t)data,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+ trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
+ total_entries, cache.entry_num,
+ (uintptr_t)data, ret ? errno : 0);
+ *entry_num = cache.entry_num;
+
+ if (ret) {
+ error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
+ " total %d entries, processed %d entries",
+ total_entries, cache.entry_num);
+ } else if (total_entries != cache.entry_num) {
+ error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
+ " entries: total %d entries, processed %d entries."
+ " Kernel BUG?!", total_entries, cache.entry_num);
+ return false;
+ }
+
+ return !ret;
+}
+
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->attach_hwpt);
+ return idevc->attach_hwpt(idev, hwpt_id, errp);
+}
+
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->detach_hwpt);
+ return idevc->detach_hwpt(idev, errp);
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
@@ -349,6 +405,8 @@ static const TypeInfo types[] = {
}, {
.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
.parent = TYPE_HOST_IOMMU_DEVICE,
+ .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
+ .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
.class_init = hiod_iommufd_class_init,
.abstract = true,
}
diff --git a/backends/trace-events b/backends/trace-events
index 40811a3..7278214 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
+iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index 34d9c79..47e9677 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -96,6 +96,11 @@ are missing:
architecture (VMState). Right now, VMState lacks type safety because
it is hard to place the ``VMStateField`` definitions in traits.
+* NUL-terminated file names with ``#[track_caller]`` are scheduled for
+ inclusion as ``#![feature(location_file_nul)]``, but it will be a while
+ before QEMU can use them. For now, there is special code in
+ ``util/error.c`` to support non-NUL-terminated file names.
+
* associated const equality would be nice to have for some users of
``callbacks::FnCall``, but is still experimental. ``ASSERT_IS_SOME``
replaces it.
@@ -155,10 +160,10 @@ module status
``callbacks`` complete
``cell`` stable
``errno`` complete
+``error`` stable
``irq`` complete
``memory`` stable
``module`` complete
-``offset_of`` stable
``qdev`` stable
``qom`` stable
``sysbus`` stable
diff --git a/docs/sphinx/qapi_domain.py b/docs/sphinx/qapi_domain.py
index c94af57..ebc46a7 100644
--- a/docs/sphinx/qapi_domain.py
+++ b/docs/sphinx/qapi_domain.py
@@ -20,16 +20,6 @@ from typing import (
from docutils import nodes
from docutils.parsers.rst import directives
-
-from compat import (
- CompatField,
- CompatGroupedField,
- CompatTypedField,
- KeywordNode,
- ParserFix,
- Signature,
- SpaceNode,
-)
from sphinx import addnodes
from sphinx.directives import ObjectDescription
from sphinx.domains import (
@@ -44,6 +34,16 @@ from sphinx.util import logging
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import make_id, make_refnode
+from compat import (
+ CompatField,
+ CompatGroupedField,
+ CompatTypedField,
+ KeywordNode,
+ ParserFix,
+ Signature,
+ SpaceNode,
+)
+
if TYPE_CHECKING:
from typing import (
@@ -56,7 +56,6 @@ if TYPE_CHECKING:
)
from docutils.nodes import Element, Node
-
from sphinx.addnodes import desc_signature, pending_xref
from sphinx.application import Sphinx
from sphinx.builders import Builder
@@ -168,6 +167,8 @@ class QAPIDescription(ParserFix):
"""
def handle_signature(self, sig: str, signode: desc_signature) -> Signature:
+ # pylint: disable=unused-argument
+
# Do nothing. The return value here is the "name" of the entity
# being documented; for QAPI, this is the same as the
# "signature", which is just a name.
@@ -210,6 +211,8 @@ class QAPIDescription(ParserFix):
def add_target_and_index(
self, name: Signature, sig: str, signode: desc_signature
) -> None:
+ # pylint: disable=unused-argument
+
# name is the return value of handle_signature.
# sig is the original, raw text argument to handle_signature.
# For QAPI, these are identical, currently.
diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
index 661b2c4..8011ac9 100644
--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -27,6 +27,7 @@ https://www.sphinx-doc.org/en/master/development/index.html
from __future__ import annotations
+
__version__ = "2.0"
from contextlib import contextmanager
@@ -56,8 +57,6 @@ from qapi.schema import (
QAPISchemaVisitor,
)
from qapi.source import QAPISourceInfo
-
-from qapidoc_legacy import QAPISchemaGenRSTVisitor # type: ignore
from sphinx import addnodes
from sphinx.directives.code import CodeBlock
from sphinx.errors import ExtensionError
@@ -65,6 +64,8 @@ from sphinx.util import logging
from sphinx.util.docutils import SphinxDirective, switch_source_input
from sphinx.util.nodes import nested_parse_with_titles
+from qapidoc_legacy import QAPISchemaGenRSTVisitor # type: ignore
+
if TYPE_CHECKING:
from typing import (
diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m
index 8dde1f1..174d56a 100644
--- a/hw/display/apple-gfx.m
+++ b/hw/display/apple-gfx.m
@@ -454,7 +454,7 @@ static void set_cursor_glyph(void *opaque)
/* ------ DMA (device reading system memory) ------ */
typedef struct AppleGFXReadMemoryJob {
- QemuSemaphore sem;
+ QemuEvent event;
hwaddr physical_address;
uint64_t length;
void *dst;
@@ -470,7 +470,7 @@ static void apple_gfx_do_read_memory(void *opaque)
job->dst, job->length, MEMTXATTRS_UNSPECIFIED);
job->success = (r == MEMTX_OK);
- qemu_sem_post(&job->sem);
+ qemu_event_set(&job->event);
}
static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
@@ -483,11 +483,11 @@ static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
trace_apple_gfx_read_memory(physical_address, length, dst);
/* Performing DMA requires BQL, so do it in a BH. */
- qemu_sem_init(&job.sem, 0);
+ qemu_event_init(&job.event, 0);
aio_bh_schedule_oneshot(qemu_get_aio_context(),
apple_gfx_do_read_memory, &job);
- qemu_sem_wait(&job.sem);
- qemu_sem_destroy(&job.sem);
+ qemu_event_wait(&job.event);
+ qemu_event_destroy(&job.event);
return job.success;
}
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 0fd1337..cb48cc1 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -328,16 +328,16 @@ static const VMStateDescription vmstate_hpet_timer = {
static const VMStateDescription vmstate_hpet = {
.name = "hpet",
.version_id = 2,
- .minimum_version_id = 1,
+ .minimum_version_id = 2,
.pre_save = hpet_pre_save,
.post_load = hpet_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(config, HPETState),
VMSTATE_UINT64(isr, HPETState),
VMSTATE_UINT64(hpet_counter, HPETState),
- VMSTATE_UINT8_V(num_timers_save, HPETState, 2),
+ VMSTATE_UINT8(num_timers_save, HPETState),
VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers),
- VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
+ VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers_save, 0,
vmstate_hpet_timer, HPETTimer),
VMSTATE_END_OF_LIST()
},
@@ -691,8 +691,14 @@ static void hpet_realize(DeviceState *dev, Error **errp)
int i;
HPETTimer *timer;
+ if (s->num_timers < HPET_MIN_TIMERS || s->num_timers > HPET_MAX_TIMERS) {
+ error_setg(errp, "hpet.num_timers must be between %d and %d",
+ HPET_MIN_TIMERS, HPET_MAX_TIMERS);
+ return;
+ }
if (!s->intcap) {
- warn_report("Hpet's intcap not initialized");
+ error_setg(errp, "hpet.hpet-intcap not initialized");
+ return;
}
if (hpet_fw_cfg.count == UINT8_MAX) {
/* first instance */
@@ -700,7 +706,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
}
if (hpet_fw_cfg.count == 8) {
- error_setg(errp, "Only 8 instances of HPET is allowed");
+ error_setg(errp, "Only 8 instances of HPET are allowed");
return;
}
@@ -710,11 +716,6 @@ static void hpet_realize(DeviceState *dev, Error **errp)
sysbus_init_irq(sbd, &s->irqs[i]);
}
- if (s->num_timers < HPET_MIN_TIMERS) {
- s->num_timers = HPET_MIN_TIMERS;
- } else if (s->num_timers > HPET_MAX_TIMERS) {
- s->num_timers = HPET_MAX_TIMERS;
- }
for (i = 0; i < HPET_MAX_TIMERS; i++) {
timer = &s->timer[i];
timer->qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, timer);
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 1c6ca94..d834bd4 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dba..0f948d0 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -33,8 +33,8 @@
#include "qapi/error.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
@@ -207,7 +207,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184..0210e76 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -8,9 +8,9 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d1..e7a9d1f 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (vdev->pdev.qdev.hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab..d3efef7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,20 +21,21 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -592,6 +593,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
@@ -810,21 +815,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
@@ -833,6 +855,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
@@ -858,10 +885,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d..203ed03 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
@@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainerBase *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -449,6 +456,26 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -557,7 +584,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -1010,6 +1037,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
ram_addr_t translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1050,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
@@ -1075,19 +1106,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe..b1250d8 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -511,6 +511,25 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -583,21 +602,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
@@ -2854,6 +2859,18 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
static void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
g_free(vdev->vbasedev.name);
@@ -3005,6 +3022,19 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ vdev->pdev.config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3126,15 +3156,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3189,17 +3218,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
- goto error;
- }
-
if (!vfio_pci_config_setup(vdev, errp)) {
goto error;
}
@@ -3302,17 +3320,6 @@ static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
@@ -3514,7 +3521,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a..0000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index e20da95..7061b6e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3d392b0..9d37f86 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
@@ -115,13 +115,57 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
ObjectClass parent_class;
- /* basic feature */
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainerBase
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_begin)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_commit)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainerBase to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
/**
* @dma_unmap
*
@@ -132,12 +176,38 @@ struct VFIOIOMMUClass {
* @size: size of the range to unmap
* @iotlb: The IOMMU TLB mapping entry (or NULL)
* @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
void (*detach_device)(VFIODevice *vbasedev);
/* migration feature */
@@ -152,7 +222,7 @@ struct VFIOIOMMUClass {
* @start: indicates whether to start or stop dirty pages tracking
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
@@ -167,7 +237,7 @@ struct VFIOIOMMUClass {
* @size: size of iova range
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
@@ -183,4 +253,7 @@ struct VFIOIOMMUClass {
void (*release)(VFIOContainerBase *bcontainer);
};
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000..750ea5b
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,18 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+struct VFIOContainerBase;
+
+bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
+ Error **errp);
+void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/qapi/error-internal.h b/include/qapi/error-internal.h
new file mode 100644
index 0000000..ff18a20
--- /dev/null
+++ b/include/qapi/error-internal.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU Error Objects - struct definition
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QAPI_ERROR_INTERNAL_H
+
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+
+ /* Used for error_abort only, may be NULL. */
+ const char *func;
+
+ /*
+ * src might be NUL-terminated or not. If it is, src_len is negative.
+ * If it is not, src_len is the length.
+ */
+ const char *src;
+ int src_len;
+ int line;
+ GString *hint;
+};
+
+#endif
diff --git a/include/qemu/futex.h b/include/qemu/futex.h
index 91ae889..607613e 100644
--- a/include/qemu/futex.h
+++ b/include/qemu/futex.h
@@ -1,5 +1,5 @@
/*
- * Wrappers around Linux futex syscall
+ * Wrappers around Linux futex syscall and similar
*
* Copyright Red Hat, Inc. 2017
*
@@ -11,17 +11,35 @@
*
*/
+/*
+ * Note that a wake-up can also be caused by common futex usage patterns in
+ * unrelated code that happened to have previously used the futex word's
+ * memory location (e.g., typical futex-based implementations of Pthreads
+ * mutexes can cause this under some conditions). Therefore, qemu_futex_wait()
+ * callers should always conservatively assume that it is a spurious wake-up,
+ * and use the futex word's value (i.e., the user-space synchronization scheme)
+ * to decide whether to continue to block or not.
+ */
+
#ifndef QEMU_FUTEX_H
#define QEMU_FUTEX_H
+#define HAVE_FUTEX
+
+#ifdef CONFIG_LINUX
#include <sys/syscall.h>
#include <linux/futex.h>
#define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__)
-static inline void qemu_futex_wake(void *f, int n)
+static inline void qemu_futex_wake_all(void *f)
{
- qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0);
+ qemu_futex(f, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+}
+
+static inline void qemu_futex_wake_single(void *f)
+{
+ qemu_futex(f, FUTEX_WAKE, 1, NULL, NULL, 0);
}
static inline void qemu_futex_wait(void *f, unsigned val)
@@ -37,5 +55,25 @@ static inline void qemu_futex_wait(void *f, unsigned val)
}
}
}
+#elif defined(CONFIG_WIN32)
+#include <synchapi.h>
+
+static inline void qemu_futex_wake_all(void *f)
+{
+ WakeByAddressAll(f);
+}
+
+static inline void qemu_futex_wake_single(void *f)
+{
+ WakeByAddressSingle(f);
+}
+
+static inline void qemu_futex_wait(void *f, unsigned val)
+{
+ WaitOnAddress(f, &val, sizeof(val), INFINITE);
+}
+#else
+#undef HAVE_FUTEX
+#endif
#endif /* QEMU_FUTEX_H */
diff --git a/include/qemu/lockcnt.h b/include/qemu/lockcnt.h
index f4b62a3..5a2800e 100644
--- a/include/qemu/lockcnt.h
+++ b/include/qemu/lockcnt.h
@@ -17,7 +17,7 @@
typedef struct QemuLockCnt QemuLockCnt;
struct QemuLockCnt {
-#ifndef CONFIG_LINUX
+#ifndef HAVE_FUTEX
QemuMutex mutex;
#endif
unsigned count;
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 5f2f3d1..758808b 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -32,15 +32,6 @@ struct QemuSemaphore {
unsigned int count;
};
-struct QemuEvent {
-#ifndef __linux__
- pthread_mutex_t lock;
- pthread_cond_t cond;
-#endif
- unsigned value;
- bool initialized;
-};
-
struct QemuThread {
pthread_t thread;
};
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index d95af44..da9e732 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -28,12 +28,6 @@ struct QemuSemaphore {
bool initialized;
};
-struct QemuEvent {
- int value;
- HANDLE event;
- bool initialized;
-};
-
typedef struct QemuThreadData QemuThreadData;
struct QemuThread {
QemuThreadData *data;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 6f800aa..f0302ed 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -3,13 +3,32 @@
#include "qemu/processor.h"
#include "qemu/atomic.h"
+#include "qemu/futex.h"
typedef struct QemuCond QemuCond;
typedef struct QemuSemaphore QemuSemaphore;
-typedef struct QemuEvent QemuEvent;
typedef struct QemuLockCnt QemuLockCnt;
typedef struct QemuThread QemuThread;
+/*
+ * QemuEvent
+ * =========
+ *
+ * QemuEvent is an implementation of Win32 manual-reset event object.
+ * For details, refer to:
+ * https://learn.microsoft.com/en-us/windows/win32/sync/using-event-objects
+ *
+ * QemuEvent is more lightweight than QemuSemaphore when HAVE_FUTEX is defined.
+ */
+typedef struct QemuEvent {
+#ifndef HAVE_FUTEX
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+#endif
+ unsigned value;
+ bool initialized;
+} QemuEvent;
+
#ifdef _WIN32
#include "qemu/thread-win32.h"
#else
diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h
index 809cced..ab849a4 100644
--- a/include/system/host_iommu_device.h
+++ b/include/system/host_iommu_device.h
@@ -14,6 +14,13 @@
#include "qom/object.h"
#include "qapi/error.h"
+#ifdef CONFIG_LINUX
+#include "linux/iommufd.h"
+
+typedef union VendorCaps {
+ struct iommu_hw_info_vtd vtd;
+ struct iommu_hw_info_arm_smmuv3 smmuv3;
+} VendorCaps;
/**
* struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
@@ -22,11 +29,17 @@
*
* @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
* the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
+ *
+ * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on
+ * IOMMUFD this represents a user-space buffer filled by kernel
+ * with host IOMMU @type specific hardware information data)
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
uint64_t hw_caps;
+ VendorCaps vendor_caps;
} HostIOMMUDeviceCaps;
+#endif
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
@@ -38,7 +51,9 @@ struct HostIOMMUDevice {
void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
PCIBus *aliased_bus;
int aliased_devfn;
+#ifdef CONFIG_LINUX
HostIOMMUDeviceCaps caps;
+#endif
};
/**
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index cbab75b..283861b 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -61,6 +61,60 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
uint64_t iova, ram_addr_t size,
uint64_t page_size, uint64_t *data,
Error **errp);
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
+OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
+ HOST_IOMMU_DEVICE_IOMMUFD)
+
+/* Overload of the host IOMMU device for the iommufd backend */
+struct HostIOMMUDeviceIOMMUFD {
+ HostIOMMUDevice parent_obj;
+
+ IOMMUFDBackend *iommufd;
+ uint32_t devid;
+ uint32_t hwpt_id;
+};
+
+struct HostIOMMUDeviceIOMMUFDClass {
+ HostIOMMUDeviceClass parent_class;
+
+ /**
+ * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @hwpt_id: ID of IOMMUFD hardware page table.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
+ Error **errp);
+ /**
+ * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
+};
+
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp);
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp);
#endif
diff --git a/include/system/kvm.h b/include/system/kvm.h
index 62ec131..7cc60d2 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed;
extern bool kvm_gsi_direct_mapping;
extern bool kvm_readonly_mem_allowed;
extern bool kvm_msi_use_devid;
+extern bool kvm_pre_fault_memory_supported;
#define kvm_enabled() (kvm_allowed)
/**
diff --git a/include/system/memory.h b/include/system/memory.h
index fc35a0d..0848690 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -739,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl);
/**
- * memory_get_xlat_addr: Extract addresses from a TLB entry
+ * memory_translate_iotlb: Extract addresses from a TLB entry.
+ * Called with rcu_read_lock held.
*
* @iotlb: pointer to an #IOMMUTLBEntry
- * @vaddr: virtual address
- * @ram_addr: RAM address
- * @read_only: indicates if writes are allowed
- * @mr_has_discard_manager: indicates memory is controlled by a
- * RamDiscardManager
+ * @xlat_p: return the offset of the entry from the start of the returned
+ * MemoryRegion.
* @errp: pointer to Error*, to store an error if it happens.
*
- * Return: true on success, else false setting @errp with error.
+ * Return: On success, return the MemoryRegion containing the @iotlb translated
+ * addr. The MemoryRegion must not be accessed after rcu_read_unlock.
+ * On failure, return NULL, setting @errp with error.
*/
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp);
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp);
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
diff --git a/meson.build b/meson.build
index 967a10e..34729c2 100644
--- a/meson.build
+++ b/meson.build
@@ -838,11 +838,18 @@ emulator_link_args = []
midl = not_found
widl = not_found
pathcch = not_found
+synchronization = not_found
host_dsosuf = '.so'
if host_os == 'windows'
midl = find_program('midl', required: false)
widl = find_program('widl', required: false)
pathcch = cc.find_library('pathcch')
+ synchronization = cc.find_library('Synchronization', required: false)
+ if not synchronization.found()
+ # The library name is lowercase on mingw
+ synchronization = cc.find_library('synchronization', required: true)
+ endif
+
socket = cc.find_library('ws2_32')
winmm = cc.find_library('winmm')
diff --git a/migration/colo.c b/migration/colo.c
index c976b3f..e0f713c 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -146,7 +146,7 @@ static void secondary_vm_do_failover(void)
return;
}
/* Notify COLO incoming thread that failover work is finished */
- qemu_sem_post(&mis->colo_incoming_sem);
+ qemu_event_set(&mis->colo_incoming_event);
/* For Secondary VM, jump to incoming co */
if (mis->colo_incoming_co) {
@@ -195,7 +195,7 @@ static void primary_vm_do_failover(void)
}
/* Notify COLO thread that failover work is finished */
- qemu_sem_post(&s->colo_exit_sem);
+ qemu_event_set(&s->colo_exit_event);
}
COLOMode get_colo_mode(void)
@@ -620,8 +620,8 @@ out:
}
/* Hope this not to be too long to wait here */
- qemu_sem_wait(&s->colo_exit_sem);
- qemu_sem_destroy(&s->colo_exit_sem);
+ qemu_event_wait(&s->colo_exit_event);
+ qemu_event_destroy(&s->colo_exit_event);
/*
* It is safe to unregister notifier after failover finished.
@@ -651,7 +651,7 @@ void migrate_start_colo_process(MigrationState *s)
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify_timer, NULL);
- qemu_sem_init(&s->colo_exit_sem, 0);
+ qemu_event_init(&s->colo_exit_event, false);
colo_process_checkpoint(s);
bql_lock();
}
@@ -808,11 +808,11 @@ void colo_shutdown(void)
case COLO_MODE_PRIMARY:
s = migrate_get_current();
qemu_event_set(&s->colo_checkpoint_event);
- qemu_sem_post(&s->colo_exit_sem);
+ qemu_event_set(&s->colo_exit_event);
break;
case COLO_MODE_SECONDARY:
mis = migration_incoming_get_current();
- qemu_sem_post(&mis->colo_incoming_sem);
+ qemu_event_set(&mis->colo_incoming_event);
break;
default:
break;
@@ -827,7 +827,7 @@ static void *colo_process_incoming_thread(void *opaque)
Error *local_err = NULL;
rcu_register_thread();
- qemu_sem_init(&mis->colo_incoming_sem, 0);
+ qemu_event_init(&mis->colo_incoming_event, false);
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@@ -923,8 +923,8 @@ out:
}
/* Hope this not to be too long to loop here */
- qemu_sem_wait(&mis->colo_incoming_sem);
- qemu_sem_destroy(&mis->colo_incoming_sem);
+ qemu_event_wait(&mis->colo_incoming_event);
+ qemu_event_destroy(&mis->colo_incoming_event);
rcu_unregister_thread();
return NULL;
diff --git a/migration/migration.c b/migration/migration.c
index 4697732..4098870 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1630,7 +1630,7 @@ void migration_cancel(void)
}
/* If the migration is paused, kick it out of the pause */
if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
- qemu_sem_post(&s->pause_sem);
+ qemu_event_set(&s->pause_event);
}
migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
} while (s->state != MIGRATION_STATUS_CANCELLING);
@@ -2342,7 +2342,7 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp)
MigrationStatus_str(s->state));
return;
}
- qemu_sem_post(&s->pause_sem);
+ qemu_event_set(&s->pause_event);
}
int migration_rp_wait(MigrationState *s)
@@ -2911,21 +2911,18 @@ static bool migration_switchover_prepare(MigrationState *s)
return true;
}
- /* Since leaving this state is not atomic with posting the semaphore
+ /*
+ * Since leaving this state is not atomic with setting the event
* it's possible that someone could have issued multiple migrate_continue
- * and the semaphore is incorrectly positive at this point;
- * the docs say it's undefined to reinit a semaphore that's already
- * init'd, so use timedwait to eat up any existing posts.
+ * and the event is incorrectly set at this point so reset it.
*/
- while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
- /* This block intentionally left blank */
- }
+ qemu_event_reset(&s->pause_event);
/* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
bql_unlock();
- qemu_sem_wait(&s->pause_sem);
+ qemu_event_wait(&s->pause_event);
bql_lock();
/*
@@ -4057,7 +4054,7 @@ static void migration_instance_finalize(Object *obj)
qemu_mutex_destroy(&ms->qemu_file_lock);
qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
- qemu_sem_destroy(&ms->pause_sem);
+ qemu_event_destroy(&ms->pause_event);
qemu_sem_destroy(&ms->postcopy_pause_sem);
qemu_sem_destroy(&ms->rp_state.rp_sem);
qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
@@ -4072,7 +4069,7 @@ static void migration_instance_init(Object *obj)
ms->state = MIGRATION_STATUS_NONE;
ms->mbps = -1;
ms->pages_per_second = -1;
- qemu_sem_init(&ms->pause_sem, 0);
+ qemu_event_init(&ms->pause_event, false);
qemu_mutex_init(&ms->error_mutex);
migrate_params_init(&ms->parameters);
diff --git a/migration/migration.h b/migration/migration.h
index d53f7ca..739289d 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -98,9 +98,9 @@ struct MigrationIncomingState {
void (*transport_cleanup)(void *data);
/*
* Used to sync thread creations. Note that we can't create threads in
- * parallel with this sem.
+ * parallel with this event.
*/
- QemuSemaphore thread_sync_sem;
+ QemuEvent thread_sync_event;
/*
* Free at the start of the main state load, set as the main thread finishes
* loading state.
@@ -186,7 +186,7 @@ struct MigrationIncomingState {
/* The coroutine we should enter (back) after failover */
Coroutine *colo_incoming_co;
- QemuSemaphore colo_incoming_sem;
+ QemuEvent colo_incoming_event;
/* Optional load threads pool and its thread exit request flag */
ThreadPool *load_threads;
@@ -379,10 +379,10 @@ struct MigrationState {
QemuSemaphore wait_unplug_sem;
/* Migration is paused due to pause-before-switchover */
- QemuSemaphore pause_sem;
+ QemuEvent pause_event;
- /* The semaphore is used to notify COLO thread that failover is finished */
- QemuSemaphore colo_exit_sem;
+ /* The event is used to notify COLO thread that failover is finished */
+ QemuEvent colo_exit_event;
/* The event is used to notify COLO thread to do checkpoint */
QemuEvent colo_checkpoint_event;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 995614b..75fd310 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -90,10 +90,10 @@ void postcopy_thread_create(MigrationIncomingState *mis,
QemuThread *thread, const char *name,
void *(*fn)(void *), int joinable)
{
- qemu_sem_init(&mis->thread_sync_sem, 0);
+ qemu_event_init(&mis->thread_sync_event, false);
qemu_thread_create(thread, name, fn, mis, joinable);
- qemu_sem_wait(&mis->thread_sync_sem);
- qemu_sem_destroy(&mis->thread_sync_sem);
+ qemu_event_wait(&mis->thread_sync_event);
+ qemu_event_destroy(&mis->thread_sync_event);
}
/* Postcopy needs to detect accesses to pages that haven't yet been copied
@@ -964,7 +964,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
trace_postcopy_ram_fault_thread_entry();
rcu_register_thread();
mis->last_rb = NULL; /* last RAMBlock we sent part of */
- qemu_sem_post(&mis->thread_sync_sem);
+ qemu_event_set(&mis->thread_sync_event);
struct pollfd *pfd;
size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
@@ -1716,7 +1716,7 @@ void *postcopy_preempt_thread(void *opaque)
rcu_register_thread();
- qemu_sem_post(&mis->thread_sync_sem);
+ qemu_event_set(&mis->thread_sync_event);
/*
* The preempt channel is established in asynchronous way. Wait
diff --git a/migration/savevm.c b/migration/savevm.c
index 006514c..52105dd 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2078,7 +2078,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
- qemu_sem_post(&mis->thread_sync_sem);
+ qemu_event_set(&mis->thread_sync_event);
trace_postcopy_ram_listen_thread_start();
rcu_register_thread();
diff --git a/python/setup.cfg b/python/setup.cfg
index c48dff2..d7f5dc7 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -46,6 +46,7 @@ devel =
urwid >= 2.1.2
urwid-readline >= 0.13
Pygments >= 2.9.0
+ sphinx >= 3.4.3
# Provides qom-fuse functionality
fuse =
@@ -78,7 +79,6 @@ exclude = __pycache__,
[mypy]
strict = True
python_version = 3.9
-warn_unused_configs = True
namespace_packages = True
warn_unused_ignores = False
diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt
index 6445407..cd2e2a8 100644
--- a/python/tests/minreqs.txt
+++ b/python/tests/minreqs.txt
@@ -11,6 +11,15 @@
# When adding new dependencies, pin the very oldest non-yanked version
# on PyPI that allows the test suite to pass.
+# For some reason, the presence of packaging==14.0 below requires us to
+# also pin setuptools to version 70 or below. Otherwise, the
+# installation of the QEMU package itself fails, failing to find
+# setuptools.
+setuptools<=70
+
+# Dependencies for qapidoc/qapi_domain et al
+sphinx==3.4.3
+
# Dependencies for the TUI addon (Required for successful linting)
urwid==2.1.2
urwid-readline==0.13
@@ -38,10 +47,32 @@ pyflakes==2.5.0
# Transitive mypy dependencies
mypy-extensions==1.0.0
+tomli==1.1.0
typing-extensions==4.7.1
# Transitive pylint dependencies
astroid==2.15.4
+dill==0.2
lazy-object-proxy==1.4.0
+platformdirs==2.2.0
toml==0.10.0
+tomlkit==0.10.1
wrapt==1.14.0
+
+# Transitive sphinx dependencies
+Jinja2==2.7
+MarkupSafe==1.1.0
+alabaster==0.7.1
+babel==1.3
+docutils==0.12
+imagesize==0.5.0
+packaging==14.0
+pytz==2011b0
+requests==2.5.0
+snowballstemmer==1.1
+sphinxcontrib-applehelp==1.0.0
+sphinxcontrib-devhelp==1.0.0
+sphinxcontrib-htmlhelp==1.0.0
+sphinxcontrib-jsmath==1.0.0
+sphinxcontrib-qthelp==1.0.0
+sphinxcontrib-serializinghtml==1.0.0
diff --git a/python/tests/qapi-flake8.sh b/python/tests/qapi-flake8.sh
new file mode 100755
index 0000000..c69f9ea
--- /dev/null
+++ b/python/tests/qapi-flake8.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+python3 -m flake8 ../scripts/qapi/ \
+ ../docs/sphinx/qapidoc.py \
+ ../docs/sphinx/qapi_domain.py
diff --git a/python/tests/qapi-isort.sh b/python/tests/qapi-isort.sh
new file mode 100755
index 0000000..78dd947
--- /dev/null
+++ b/python/tests/qapi-isort.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+python3 -m isort --sp . -c ../scripts/qapi/
+# Force isort to recognize "compat" as a local module and not third-party
+python3 -m isort --sp . -c -p compat -p qapidoc_legacy \
+ ../docs/sphinx/qapi_domain.py \
+ ../docs/sphinx/qapidoc.py
diff --git a/python/tests/qapi-mypy.sh b/python/tests/qapi-mypy.sh
new file mode 100755
index 0000000..363dbaf
--- /dev/null
+++ b/python/tests/qapi-mypy.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+python3 -m mypy ../scripts/qapi
diff --git a/python/tests/qapi-pylint.sh b/python/tests/qapi-pylint.sh
new file mode 100755
index 0000000..8767d9d
--- /dev/null
+++ b/python/tests/qapi-pylint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+SETUPTOOLS_USE_DISTUTILS=stdlib python3 -m pylint \
+ --rcfile=../scripts/qapi/pylintrc \
+ ../scripts/qapi/ \
+ ../docs/sphinx/qapidoc.py \
+ ../docs/sphinx/qapi_domain.py
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index bccfe85..b785c71 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -3,6 +3,12 @@
version = 3
[[package]]
+name = "anyhow"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+
+[[package]]
name = "arbitrary-int"
version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -45,6 +51,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]]
+name = "foreign"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17ca1b5be8c9d320daf386f1809c7acc0cb09accbae795c2001953fa50585846"
+dependencies = [
+ "libc",
+]
+
+[[package]]
name = "hpet"
version = "0.1.0"
dependencies = [
@@ -114,6 +129,8 @@ dependencies = [
name = "qemu_api"
version = "0.1.0"
dependencies = [
+ "anyhow",
+ "foreign",
"libc",
"qemu_api_macros",
]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index fd4c2fb..0868e1b 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -67,6 +67,7 @@ missing_safety_doc = "deny"
mut_mut = "deny"
needless_bitwise_bool = "deny"
needless_pass_by_ref_mut = "deny"
+needless_update = "deny"
no_effect_underscore_binding = "deny"
option_option = "deny"
or_fun_call = "deny"
diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs
index 0501fa5..be8387f 100644
--- a/rust/hw/char/pl011/src/device.rs
+++ b/rust/hw/char/pl011/src/device.rs
@@ -175,7 +175,7 @@ impl DeviceImpl for PL011State {
fn vmsd() -> Option<&'static VMStateDescription> {
Some(&device_class::VMSTATE_PL011)
}
- const REALIZE: Option<fn(&Self)> = Some(Self::realize);
+ const REALIZE: Option<fn(&Self) -> qemu_api::Result<()>> = Some(Self::realize);
}
impl ResettablePhasesImpl for PL011State {
@@ -619,9 +619,10 @@ impl PL011State {
}
}
- fn realize(&self) {
+ fn realize(&self) -> qemu_api::Result<()> {
self.char_backend
.enable_handlers(self, Self::can_receive, Self::receive, Self::event);
+ Ok(())
}
fn reset_hold(&self, _type: ResetType) {
diff --git a/rust/hw/timer/hpet/src/device.rs b/rust/hw/timer/hpet/src/device.rs
index e3ba62b..735b2fb 100644
--- a/rust/hw/timer/hpet/src/device.rs
+++ b/rust/hw/timer/hpet/src/device.rs
@@ -12,7 +12,7 @@ use std::{
use qemu_api::{
bindings::{
address_space_memory, address_space_stl_le, qdev_prop_bit, qdev_prop_bool,
- qdev_prop_uint32, qdev_prop_uint8,
+ qdev_prop_uint32, qdev_prop_usize,
},
cell::{BqlCell, BqlRefCell},
irq::InterruptSource,
@@ -36,9 +36,9 @@ use crate::fw_cfg::HPETFwConfig;
const HPET_REG_SPACE_LEN: u64 = 0x400; // 1024 bytes
/// Minimum recommended hardware implementation.
-const HPET_MIN_TIMERS: u8 = 3;
+const HPET_MIN_TIMERS: usize = 3;
/// Maximum timers in each timer block.
-const HPET_MAX_TIMERS: u8 = 32;
+const HPET_MAX_TIMERS: usize = 32;
/// Flags that HPETState.flags supports.
const HPET_FLAG_MSI_SUPPORT_SHIFT: usize = 0;
@@ -561,8 +561,8 @@ pub struct HPETState {
/// HPET timer array managed by this timer block.
#[doc(alias = "timer")]
- timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS as usize],
- num_timers: BqlCell<u8>,
+ timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS],
+ num_timers: usize,
num_timers_save: BqlCell<u8>,
/// Instance id (HPET timer block ID).
@@ -570,11 +570,6 @@ pub struct HPETState {
}
impl HPETState {
- // Get num_timers with `usize` type, which is useful to play with array index.
- fn get_num_timers(&self) -> usize {
- self.num_timers.get().into()
- }
-
const fn has_msi_flag(&self) -> bool {
self.flags & (1 << HPET_FLAG_MSI_SUPPORT_SHIFT) != 0
}
@@ -636,7 +631,7 @@ impl HPETState {
self.hpet_offset
.set(ticks_to_ns(self.counter.get()) - CLOCK_VIRTUAL.get_ns());
- for timer in self.timers.iter().take(self.get_num_timers()) {
+ for timer in self.timers.iter().take(self.num_timers) {
let mut t = timer.borrow_mut();
if t.is_int_enabled() && t.is_int_active() {
@@ -648,7 +643,7 @@ impl HPETState {
// Halt main counter and disable interrupt generation.
self.counter.set(self.get_ticks());
- for timer in self.timers.iter().take(self.get_num_timers()) {
+ for timer in self.timers.iter().take(self.num_timers) {
timer.borrow_mut().del_timer();
}
}
@@ -671,7 +666,7 @@ impl HPETState {
let new_val = val << shift;
let cleared = new_val & self.int_status.get();
- for (index, timer) in self.timers.iter().take(self.get_num_timers()).enumerate() {
+ for (index, timer) in self.timers.iter().take(self.num_timers).enumerate() {
if cleared & (1 << index) != 0 {
timer.borrow_mut().update_irq(false);
}
@@ -724,19 +719,17 @@ impl HPETState {
}
}
- fn realize(&self) {
+ fn realize(&self) -> qemu_api::Result<()> {
+ if self.num_timers < HPET_MIN_TIMERS || self.num_timers > HPET_MAX_TIMERS {
+ Err(format!(
+ "hpet.num_timers must be between {HPET_MIN_TIMERS} and {HPET_MAX_TIMERS}"
+ ))?;
+ }
if self.int_route_cap == 0 {
- // TODO: Add error binding: warn_report()
- println!("Hpet's hpet-intcap property not initialized");
+ Err("hpet.hpet-intcap property not initialized")?;
}
- self.hpet_id.set(HPETFwConfig::assign_hpet_id());
-
- if self.num_timers.get() < HPET_MIN_TIMERS {
- self.num_timers.set(HPET_MIN_TIMERS);
- } else if self.num_timers.get() > HPET_MAX_TIMERS {
- self.num_timers.set(HPET_MAX_TIMERS);
- }
+ self.hpet_id.set(HPETFwConfig::assign_hpet_id()?);
self.init_timer();
// 64-bit General Capabilities and ID Register; LegacyReplacementRoute.
@@ -745,16 +738,17 @@ impl HPETState {
1 << HPET_CAP_COUNT_SIZE_CAP_SHIFT |
1 << HPET_CAP_LEG_RT_CAP_SHIFT |
HPET_CAP_VENDER_ID_VALUE << HPET_CAP_VENDER_ID_SHIFT |
- ((self.get_num_timers() - 1) as u64) << HPET_CAP_NUM_TIM_SHIFT | // indicate the last timer
+ ((self.num_timers - 1) as u64) << HPET_CAP_NUM_TIM_SHIFT | // indicate the last timer
(HPET_CLK_PERIOD * FS_PER_NS) << HPET_CAP_CNT_CLK_PERIOD_SHIFT, // 10 ns
);
self.init_gpio_in(2, HPETState::handle_legacy_irq);
self.init_gpio_out(from_ref(&self.pit_enabled));
+ Ok(())
}
fn reset_hold(&self, _type: ResetType) {
- for timer in self.timers.iter().take(self.get_num_timers()) {
+ for timer in self.timers.iter().take(self.num_timers) {
timer.borrow_mut().reset();
}
@@ -782,7 +776,7 @@ impl HPETState {
GlobalRegister::try_from(addr).map(HPETRegister::Global)
} else {
let timer_id: usize = ((addr - 0x100) / 0x20) as usize;
- if timer_id <= self.get_num_timers() {
+ if timer_id < self.num_timers {
// TODO: Add trace point - trace_hpet_ram_[read|write]_timer_id(timer_id)
TimerRegister::try_from(addr & 0x18)
.map(|reg| HPETRegister::Timer(&self.timers[timer_id], reg))
@@ -853,12 +847,12 @@ impl HPETState {
* also added to the migration stream. Check that it matches the value
* that was configured.
*/
- self.num_timers_save.set(self.num_timers.get());
+ self.num_timers_save.set(self.num_timers as u8);
0
}
fn post_load(&self, _version_id: u8) -> i32 {
- for timer in self.timers.iter().take(self.get_num_timers()) {
+ for timer in self.timers.iter().take(self.num_timers) {
let mut t = timer.borrow_mut();
t.cmp64 = t.calculate_cmp64(t.get_state().counter.get(), t.cmp);
@@ -883,7 +877,7 @@ impl HPETState {
}
fn validate_num_timers(&self, _version_id: u8) -> bool {
- self.num_timers.get() == self.num_timers_save.get()
+ self.num_timers == self.num_timers_save.get().into()
}
}
@@ -910,7 +904,7 @@ qemu_api::declare_properties! {
c"timers",
HPETState,
num_timers,
- unsafe { &qdev_prop_uint8 },
+ unsafe { &qdev_prop_usize },
u8,
default = HPET_MIN_TIMERS
),
@@ -1015,16 +1009,16 @@ const VALIDATE_TIMERS_NAME: &CStr = c"num_timers must match";
static VMSTATE_HPET: VMStateDescription = VMStateDescription {
name: c"hpet".as_ptr(),
version_id: 2,
- minimum_version_id: 1,
+ minimum_version_id: 2,
pre_save: Some(hpet_pre_save),
post_load: Some(hpet_post_load),
fields: vmstate_fields! {
vmstate_of!(HPETState, config),
vmstate_of!(HPETState, int_status),
vmstate_of!(HPETState, counter),
- vmstate_of!(HPETState, num_timers_save).with_version_id(2),
+ vmstate_of!(HPETState, num_timers_save),
vmstate_validate!(HPETState, VALIDATE_TIMERS_NAME, HPETState::validate_num_timers),
- vmstate_struct!(HPETState, timers[0 .. num_timers], &VMSTATE_HPET_TIMER, BqlRefCell<HPETTimer>, HPETState::validate_num_timers).with_version_id(0),
+ vmstate_struct!(HPETState, timers[0 .. num_timers_save], &VMSTATE_HPET_TIMER, BqlRefCell<HPETTimer>, HPETState::validate_num_timers).with_version_id(0),
},
subsections: vmstate_subsections! {
VMSTATE_HPET_RTC_IRQ_LEVEL,
@@ -1042,7 +1036,7 @@ impl DeviceImpl for HPETState {
Some(&VMSTATE_HPET)
}
- const REALIZE: Option<fn(&Self)> = Some(Self::realize);
+ const REALIZE: Option<fn(&Self) -> qemu_api::Result<()>> = Some(Self::realize);
}
impl ResettablePhasesImpl for HPETState {
diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs
index 6c10316..619d662 100644
--- a/rust/hw/timer/hpet/src/fw_cfg.rs
+++ b/rust/hw/timer/hpet/src/fw_cfg.rs
@@ -36,7 +36,7 @@ pub static mut hpet_fw_cfg: HPETFwConfig = HPETFwConfig {
};
impl HPETFwConfig {
- pub(crate) fn assign_hpet_id() -> usize {
+ pub(crate) fn assign_hpet_id() -> Result<usize, &'static str> {
assert!(bql_locked());
// SAFETY: all accesses go through these methods, which guarantee
// that the accesses are protected by the BQL.
@@ -48,13 +48,12 @@ impl HPETFwConfig {
}
if fw_cfg.count == 8 {
- // TODO: Add error binding: error_setg()
- panic!("Only 8 instances of HPET is allowed");
+ Err("Only 8 instances of HPET are allowed")?;
}
let id: usize = fw_cfg.count.into();
fw_cfg.count += 1;
- id
+ Ok(id)
}
pub(crate) fn update_hpet_cfg(hpet_id: usize, timer_block_id: u32, address: u64) {
diff --git a/rust/meson.build b/rust/meson.build
index b1b3315..99ae795 100644
--- a/rust/meson.build
+++ b/rust/meson.build
@@ -1,9 +1,13 @@
+subproject('anyhow-1-rs', required: true)
subproject('bilge-0.2-rs', required: true)
subproject('bilge-impl-0.2-rs', required: true)
+subproject('foreign-0.3-rs', required: true)
subproject('libc-0.2-rs', required: true)
+anyhow_rs = dependency('anyhow-1-rs')
bilge_rs = dependency('bilge-0.2-rs')
bilge_impl_rs = dependency('bilge-impl-0.2-rs')
+foreign_rs = dependency('foreign-0.3-rs')
libc_rs = dependency('libc-0.2-rs')
subproject('proc-macro2-1-rs', required: true)
diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs
index 1034707..c18bb4e 100644
--- a/rust/qemu-api-macros/src/lib.rs
+++ b/rust/qemu-api-macros/src/lib.rs
@@ -203,8 +203,8 @@ fn derive_tryinto_body(
Ok(quote! {
#(const #discriminants: #repr = #name::#discriminants as #repr;)*;
match value {
- #(#discriminants => Ok(#name::#discriminants),)*
- _ => Err(value),
+ #(#discriminants => core::result::Result::Ok(#name::#discriminants),)*
+ _ => core::result::Result::Err(value),
}
})
}
@@ -236,7 +236,8 @@ fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStrea
impl core::convert::TryFrom<#repr> for #name {
type Error = #repr;
- fn try_from(value: #repr) -> Result<Self, Self::Error> {
+ #[allow(ambiguous_associated_items)]
+ fn try_from(value: #repr) -> Result<Self, #repr> {
#body
}
}
diff --git a/rust/qemu-api/Cargo.toml b/rust/qemu-api/Cargo.toml
index c96cf50..db7000d 100644
--- a/rust/qemu-api/Cargo.toml
+++ b/rust/qemu-api/Cargo.toml
@@ -15,7 +15,9 @@ rust-version.workspace = true
[dependencies]
qemu_api_macros = { path = "../qemu-api-macros" }
+anyhow = "~1.0"
libc = "0.2.162"
+foreign = "~0.3.1"
[features]
default = ["debug_cell"]
diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build
index b532281..cac8595 100644
--- a/rust/qemu-api/meson.build
+++ b/rust/qemu-api/meson.build
@@ -19,6 +19,7 @@ _qemu_api_rs = static_library(
'src/cell.rs',
'src/chardev.rs',
'src/errno.rs',
+ 'src/error.rs',
'src/irq.rs',
'src/memory.rs',
'src/module.rs',
@@ -35,7 +36,7 @@ _qemu_api_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust',
rust_args: _qemu_api_cfg,
- dependencies: [libc_rs, qemu_api_macros, qemuutil_rs,
+ dependencies: [anyhow_rs, foreign_rs, libc_rs, qemu_api_macros, qemuutil_rs,
qom, hwcore, chardev, migration],
)
diff --git a/rust/qemu-api/src/error.rs b/rust/qemu-api/src/error.rs
new file mode 100644
index 0000000..e114fc4
--- /dev/null
+++ b/rust/qemu-api/src/error.rs
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//! Error propagation for QEMU Rust code
+//!
+//! This module contains [`Error`], the bridge between Rust errors and
+//! [`Result`](std::result::Result)s and QEMU's C [`Error`](bindings::Error)
+//! struct.
+//!
+//! For FFI code, [`Error`] provides functions to simplify conversion between
+//! the Rust ([`Result<>`](std::result::Result)) and C (`Error**`) conventions:
+//!
+//! * [`ok_or_propagate`](crate::Error::ok_or_propagate),
+//! [`bool_or_propagate`](crate::Error::bool_or_propagate),
+//! [`ptr_or_propagate`](crate::Error::ptr_or_propagate) can be used to build
+//! a C return value while also propagating an error condition
+//!
+//! * [`err_or_else`](crate::Error::err_or_else) and
+//! [`err_or_unit`](crate::Error::err_or_unit) can be used to build a `Result`
+//!
+//! This module is most commonly used at the boundary between C and Rust code;
+//! other code will usually access it through the
+//! [`qemu_api::Result`](crate::Result) type alias, and will use the
+//! [`std::error::Error`] interface to let C errors participate in Rust's error
+//! handling functionality.
+//!
+//! Rust code can also create use this module to create an error object that
+//! will be passed up to C code, though in most cases this will be done
+//! transparently through the `?` operator. Errors can be constructed from a
+//! simple error string, from an [`anyhow::Error`] to pass any other Rust error
+//! type up to C code, or from a combination of the two.
+//!
+//! The third case, corresponding to [`Error::with_error`], is the only one that
+//! requires mentioning [`qemu_api::Error`](crate::Error) explicitly. Similar
+//! to how QEMU's C code handles errno values, the string and the
+//! `anyhow::Error` object will be concatenated with `:` as the separator.
+
+use std::{
+ borrow::Cow,
+ ffi::{c_char, c_int, c_void, CStr},
+ fmt::{self, Display},
+ panic, ptr,
+};
+
+use foreign::{prelude::*, OwnedPointer};
+
+use crate::bindings;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Debug)]
+pub struct Error {
+ msg: Option<Cow<'static, str>>,
+ /// Appends the print string of the error to the msg if not None
+ cause: Option<anyhow::Error>,
+ file: &'static str,
+ line: u32,
+}
+
+impl std::error::Error for Error {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ self.cause.as_ref().map(AsRef::as_ref)
+ }
+
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ self.msg
+ .as_deref()
+ .or_else(|| self.cause.as_deref().map(std::error::Error::description))
+ .expect("no message nor cause?")
+ }
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut prefix = "";
+ if let Some(ref msg) = self.msg {
+ write!(f, "{msg}")?;
+ prefix = ": ";
+ }
+ if let Some(ref cause) = self.cause {
+ write!(f, "{prefix}{cause}")?;
+ } else if prefix.is_empty() {
+ panic!("no message nor cause?");
+ }
+ Ok(())
+ }
+}
+
+impl From<String> for Error {
+ #[track_caller]
+ fn from(msg: String) -> Self {
+ let location = panic::Location::caller();
+ Error {
+ msg: Some(Cow::Owned(msg)),
+ cause: None,
+ file: location.file(),
+ line: location.line(),
+ }
+ }
+}
+
+impl From<&'static str> for Error {
+ #[track_caller]
+ fn from(msg: &'static str) -> Self {
+ let location = panic::Location::caller();
+ Error {
+ msg: Some(Cow::Borrowed(msg)),
+ cause: None,
+ file: location.file(),
+ line: location.line(),
+ }
+ }
+}
+
+impl From<anyhow::Error> for Error {
+ #[track_caller]
+ fn from(error: anyhow::Error) -> Self {
+ let location = panic::Location::caller();
+ Error {
+ msg: None,
+ cause: Some(error),
+ file: location.file(),
+ line: location.line(),
+ }
+ }
+}
+
+impl Error {
+ /// Create a new error, prepending `msg` to the
+ /// description of `cause`
+ #[track_caller]
+ pub fn with_error(msg: impl Into<Cow<'static, str>>, cause: impl Into<anyhow::Error>) -> Self {
+ let location = panic::Location::caller();
+ Error {
+ msg: Some(msg.into()),
+ cause: Some(cause.into()),
+ file: location.file(),
+ line: location.line(),
+ }
+ }
+
+ /// Consume a result, returning `false` if it is an error and
+ /// `true` if it is successful. The error is propagated into
+ /// `errp` like the C API `error_propagate` would do.
+ ///
+ /// # Safety
+ ///
+ /// `errp` must be a valid argument to `error_propagate`;
+ /// typically it is received from C code and need not be
+ /// checked further at the Rust↔C boundary.
+ pub unsafe fn bool_or_propagate(result: Result<()>, errp: *mut *mut bindings::Error) -> bool {
+ // SAFETY: caller guarantees errp is valid
+ unsafe { Self::ok_or_propagate(result, errp) }.is_some()
+ }
+
+ /// Consume a result, returning a `NULL` pointer if it is an error and
+ /// a C representation of the contents if it is successful. This is
+ /// similar to the C API `error_propagate`, but it panics if `*errp`
+ /// is not `NULL`.
+ ///
+ /// # Safety
+ ///
+ /// `errp` must be a valid argument to `error_propagate`;
+ /// typically it is received from C code and need not be
+ /// checked further at the Rust↔C boundary.
+ ///
+ /// See [`propagate`](Error::propagate) for more information.
+ #[must_use]
+ pub unsafe fn ptr_or_propagate<T: CloneToForeign>(
+ result: Result<T>,
+ errp: *mut *mut bindings::Error,
+ ) -> *mut T::Foreign {
+ // SAFETY: caller guarantees errp is valid
+ unsafe { Self::ok_or_propagate(result, errp) }.clone_to_foreign_ptr()
+ }
+
+ /// Consume a result in the same way as `self.ok()`, but also propagate
+ /// a possible error into `errp`. This is similar to the C API
+ /// `error_propagate`, but it panics if `*errp` is not `NULL`.
+ ///
+ /// # Safety
+ ///
+ /// `errp` must be a valid argument to `error_propagate`;
+ /// typically it is received from C code and need not be
+ /// checked further at the Rust↔C boundary.
+ ///
+ /// See [`propagate`](Error::propagate) for more information.
+ pub unsafe fn ok_or_propagate<T>(
+ result: Result<T>,
+ errp: *mut *mut bindings::Error,
+ ) -> Option<T> {
+ result.map_err(|err| unsafe { err.propagate(errp) }).ok()
+ }
+
+ /// Equivalent of the C function `error_propagate`. Fill `*errp`
+ /// with the information container in `self` if `errp` is not NULL;
+ /// then consume it.
+ ///
+ /// This is similar to the C API `error_propagate`, but it panics if
+ /// `*errp` is not `NULL`.
+ ///
+ /// # Safety
+ ///
+ /// `errp` must be a valid argument to `error_propagate`; it can be
+ /// `NULL` or it can point to any of:
+ /// * `error_abort`
+ /// * `error_fatal`
+ /// * a local variable of (C) type `Error *`
+ ///
+ /// Typically `errp` is received from C code and need not be
+ /// checked further at the Rust↔C boundary.
+ pub unsafe fn propagate(self, errp: *mut *mut bindings::Error) {
+ if errp.is_null() {
+ return;
+ }
+
+ // SAFETY: caller guarantees that errp and *errp are valid
+ unsafe {
+ assert_eq!(*errp, ptr::null_mut());
+ bindings::error_propagate(errp, self.clone_to_foreign_ptr());
+ }
+ }
+
+ /// Convert a C `Error*` into a Rust `Result`, using
+ /// `Ok(())` if `c_error` is NULL. Free the `Error*`.
+ ///
+ /// # Safety
+ ///
+ /// `c_error` must be `NULL` or valid; typically it was initialized
+ /// with `ptr::null_mut()` and passed by reference to a C function.
+ pub unsafe fn err_or_unit(c_error: *mut bindings::Error) -> Result<()> {
+ // SAFETY: caller guarantees c_error is valid
+ unsafe { Self::err_or_else(c_error, || ()) }
+ }
+
+ /// Convert a C `Error*` into a Rust `Result`, calling `f()` to
+ /// obtain an `Ok` value if `c_error` is NULL. Free the `Error*`.
+ ///
+ /// # Safety
+ ///
+ /// `c_error` must be `NULL` or point to a valid C [`struct
+ /// Error`](bindings::Error); typically it was initialized with
+ /// `ptr::null_mut()` and passed by reference to a C function.
+ pub unsafe fn err_or_else<T, F: FnOnce() -> T>(
+ c_error: *mut bindings::Error,
+ f: F,
+ ) -> Result<T> {
+ // SAFETY: caller guarantees c_error is valid
+ let err = unsafe { Option::<Self>::from_foreign(c_error) };
+ match err {
+ None => Ok(f()),
+ Some(err) => Err(err),
+ }
+ }
+}
+
+impl FreeForeign for Error {
+ type Foreign = bindings::Error;
+
+ unsafe fn free_foreign(p: *mut bindings::Error) {
+ // SAFETY: caller guarantees p is valid
+ unsafe {
+ bindings::error_free(p);
+ }
+ }
+}
+
+impl CloneToForeign for Error {
+ fn clone_to_foreign(&self) -> OwnedPointer<Self> {
+ // SAFETY: all arguments are controlled by this function
+ unsafe {
+ let err: *mut c_void = libc::malloc(std::mem::size_of::<bindings::Error>());
+ let err: &mut bindings::Error = &mut *err.cast();
+ *err = bindings::Error {
+ msg: format!("{self}").clone_to_foreign_ptr(),
+ err_class: bindings::ERROR_CLASS_GENERIC_ERROR,
+ src_len: self.file.len() as c_int,
+ src: self.file.as_ptr().cast::<c_char>(),
+ line: self.line as c_int,
+ func: ptr::null_mut(),
+ hint: ptr::null_mut(),
+ };
+ OwnedPointer::new(err)
+ }
+ }
+}
+
+impl FromForeign for Error {
+ unsafe fn cloned_from_foreign(c_error: *const bindings::Error) -> Self {
+ // SAFETY: caller guarantees c_error is valid
+ unsafe {
+ let error = &*c_error;
+ let file = if error.src_len < 0 {
+ // NUL-terminated
+ CStr::from_ptr(error.src).to_str()
+ } else {
+ // Can become str::from_utf8 with Rust 1.87.0
+ std::str::from_utf8(std::slice::from_raw_parts(
+ &*error.src.cast::<u8>(),
+ error.src_len as usize,
+ ))
+ };
+
+ Error {
+ msg: FromForeign::cloned_from_foreign(error.msg),
+ cause: None,
+ file: file.unwrap(),
+ line: error.line as u32,
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ffi::CStr;
+
+ use anyhow::anyhow;
+ use foreign::OwnedPointer;
+
+ use super::*;
+ use crate::{assert_match, bindings};
+
+ #[track_caller]
+ fn error_for_test(msg: &CStr) -> OwnedPointer<Error> {
+ // SAFETY: all arguments are controlled by this function
+ let location = panic::Location::caller();
+ unsafe {
+ let err: *mut c_void = libc::malloc(std::mem::size_of::<bindings::Error>());
+ let err: &mut bindings::Error = &mut *err.cast();
+ *err = bindings::Error {
+ msg: msg.clone_to_foreign_ptr(),
+ err_class: bindings::ERROR_CLASS_GENERIC_ERROR,
+ src_len: location.file().len() as c_int,
+ src: location.file().as_ptr().cast::<c_char>(),
+ line: location.line() as c_int,
+ func: ptr::null_mut(),
+ hint: ptr::null_mut(),
+ };
+ OwnedPointer::new(err)
+ }
+ }
+
+ unsafe fn error_get_pretty<'a>(local_err: *mut bindings::Error) -> &'a CStr {
+ unsafe { CStr::from_ptr(bindings::error_get_pretty(local_err)) }
+ }
+
+ #[test]
+ #[allow(deprecated)]
+ fn test_description() {
+ use std::error::Error;
+
+ assert_eq!(super::Error::from("msg").description(), "msg");
+ assert_eq!(super::Error::from("msg".to_owned()).description(), "msg");
+ }
+
+ #[test]
+ fn test_display() {
+ assert_eq!(&*format!("{}", Error::from("msg")), "msg");
+ assert_eq!(&*format!("{}", Error::from("msg".to_owned())), "msg");
+ assert_eq!(&*format!("{}", Error::from(anyhow!("msg"))), "msg");
+
+ assert_eq!(
+ &*format!("{}", Error::with_error("msg", anyhow!("cause"))),
+ "msg: cause"
+ );
+ }
+
+ #[test]
+ fn test_bool_or_propagate() {
+ unsafe {
+ let mut local_err: *mut bindings::Error = ptr::null_mut();
+
+ assert!(Error::bool_or_propagate(Ok(()), &mut local_err));
+ assert_eq!(local_err, ptr::null_mut());
+
+ let my_err = Error::from("msg");
+ assert!(!Error::bool_or_propagate(Err(my_err), &mut local_err));
+ assert_ne!(local_err, ptr::null_mut());
+ assert_eq!(error_get_pretty(local_err), c"msg");
+ bindings::error_free(local_err);
+ }
+ }
+
+ #[test]
+ fn test_ptr_or_propagate() {
+ unsafe {
+ let mut local_err: *mut bindings::Error = ptr::null_mut();
+
+ let ret = Error::ptr_or_propagate(Ok("abc".to_owned()), &mut local_err);
+ assert_eq!(String::from_foreign(ret), "abc");
+ assert_eq!(local_err, ptr::null_mut());
+
+ let my_err = Error::from("msg");
+ assert_eq!(
+ Error::ptr_or_propagate(Err::<String, _>(my_err), &mut local_err),
+ ptr::null_mut()
+ );
+ assert_ne!(local_err, ptr::null_mut());
+ assert_eq!(error_get_pretty(local_err), c"msg");
+ bindings::error_free(local_err);
+ }
+ }
+
+ #[test]
+ fn test_err_or_unit() {
+ unsafe {
+ let result = Error::err_or_unit(ptr::null_mut());
+ assert_match!(result, Ok(()));
+
+ let err = error_for_test(c"msg");
+ let err = Error::err_or_unit(err.into_inner()).unwrap_err();
+ assert_eq!(&*format!("{err}"), "msg");
+ }
+ }
+}
diff --git a/rust/qemu-api/src/lib.rs b/rust/qemu-api/src/lib.rs
index 234a94e..93902fc 100644
--- a/rust/qemu-api/src/lib.rs
+++ b/rust/qemu-api/src/lib.rs
@@ -19,6 +19,7 @@ pub mod callbacks;
pub mod cell;
pub mod chardev;
pub mod errno;
+pub mod error;
pub mod irq;
pub mod memory;
pub mod module;
@@ -34,6 +35,8 @@ use std::{
ffi::c_void,
};
+pub use error::{Error, Result};
+
#[cfg(HAVE_GLIB_WITH_ALIGNED_ALLOC)]
extern "C" {
fn g_aligned_alloc0(
diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs
index 1279d7a..0610959 100644
--- a/rust/qemu-api/src/qdev.rs
+++ b/rust/qemu-api/src/qdev.rs
@@ -12,10 +12,11 @@ use std::{
pub use bindings::{ClockEvent, DeviceClass, Property, ResetType};
use crate::{
- bindings::{self, qdev_init_gpio_in, qdev_init_gpio_out, Error, ResettableClass},
+ bindings::{self, qdev_init_gpio_in, qdev_init_gpio_out, ResettableClass},
callbacks::FnCall,
cell::{bql_locked, Opaque},
chardev::Chardev,
+ error::{Error, Result},
irq::InterruptSource,
prelude::*,
qom::{ObjectClass, ObjectImpl, Owned},
@@ -108,7 +109,7 @@ pub trait DeviceImpl: ObjectImpl + ResettablePhasesImpl + IsA<DeviceState> {
///
/// If not `None`, the parent class's `realize` method is overridden
/// with the function pointed to by `REALIZE`.
- const REALIZE: Option<fn(&Self)> = None;
+ const REALIZE: Option<fn(&Self) -> Result<()>> = None;
/// An array providing the properties that the user can set on the
/// device. Not a `const` because referencing statics in constants
@@ -134,10 +135,13 @@ pub trait DeviceImpl: ObjectImpl + ResettablePhasesImpl + IsA<DeviceState> {
/// readable/writeable from one thread at any time.
unsafe extern "C" fn rust_realize_fn<T: DeviceImpl>(
dev: *mut bindings::DeviceState,
- _errp: *mut *mut Error,
+ errp: *mut *mut bindings::Error,
) {
let state = NonNull::new(dev).unwrap().cast::<T>();
- T::REALIZE.unwrap()(unsafe { state.as_ref() });
+ let result = T::REALIZE.unwrap()(unsafe { state.as_ref() });
+ unsafe {
+ Error::ok_or_propagate(result, errp);
+ }
}
unsafe impl InterfaceType for ResettableClass {
diff --git a/rust/wrapper.h b/rust/wrapper.h
index beddd9a..6060d3b 100644
--- a/rust/wrapper.h
+++ b/rust/wrapper.h
@@ -60,6 +60,7 @@ typedef enum memory_order {
#include "hw/qdev-properties-system.h"
#include "hw/irq.h"
#include "qapi/error.h"
+#include "qapi/error-internal.h"
#include "migration/vmstate.h"
#include "chardev/char-serial.h"
#include "exec/memattrs.h"
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index e461c15..035828c 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -27,8 +27,9 @@ sub_file="${sub_tdir}/submodule.tar"
# in their checkout, because the build environment is completely
# different to the host OS.
subprojects="keycodemapdb libvfio-user berkeley-softfloat-3
- berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs
- bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs
+ berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs
+ bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs
+ libc-0.2-rs proc-macro2-1-rs
proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
syn-2-rs unicode-ident-1-rs"
sub_deinit=""
diff --git a/scripts/make-release b/scripts/make-release
index 8c3594a..4509a9f 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -40,8 +40,9 @@ fi
# Only include wraps that are invoked with subproject()
SUBPROJECTS="libvfio-user keycodemapdb berkeley-softfloat-3
- berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs
- bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs
+ berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs
+ bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs
+ libc-0.2-rs proc-macro2-1-rs
proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
syn-2-rs unicode-ident-1-rs"
diff --git a/scripts/qapi/.flake8 b/scripts/qapi/.flake8
deleted file mode 100644
index a873ff6..0000000
--- a/scripts/qapi/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-# Prefer pylint's bare-except checks to flake8's
-extend-ignore = E722
diff --git a/scripts/qapi/.isort.cfg b/scripts/qapi/.isort.cfg
deleted file mode 100644
index 643caa1..0000000
--- a/scripts/qapi/.isort.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-[settings]
-force_grid_wrap=4
-force_sort_within_sections=True
-include_trailing_comma=True
-line_length=72
-lines_after_imports=2
-multi_line_output=3
diff --git a/scripts/qapi/backend.py b/scripts/qapi/backend.py
index 14e60aa6..49ae6ec 100644
--- a/scripts/qapi/backend.py
+++ b/scripts/qapi/backend.py
@@ -13,6 +13,7 @@ from .visit import gen_visit
class QAPIBackend(ABC):
+ # pylint: disable=too-few-public-methods
@abstractmethod
def generate(self,
@@ -36,6 +37,7 @@ class QAPIBackend(ABC):
class QAPICBackend(QAPIBackend):
+ # pylint: disable=too-few-public-methods
def generate(self,
schema: QAPISchema,
diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
deleted file mode 100644
index c9dbcec..0000000
--- a/scripts/qapi/mypy.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[mypy]
-strict = True
-disallow_untyped_calls = False
-python_version = 3.9
diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
index d24eece..e16283a 100644
--- a/scripts/qapi/pylintrc
+++ b/scripts/qapi/pylintrc
@@ -19,6 +19,7 @@ disable=consider-using-f-string,
too-many-instance-attributes,
too-many-positional-arguments,
too-many-statements,
+ unknown-option-value,
useless-option-value,
[REPORTS]
diff --git a/subprojects/.gitignore b/subprojects/.gitignore
index d12d346..f428193 100644
--- a/subprojects/.gitignore
+++ b/subprojects/.gitignore
@@ -6,10 +6,12 @@
/keycodemapdb
/libvfio-user
/slirp
+/anyhow-1.0.98
/arbitrary-int-1.2.7
/bilge-0.2.0
/bilge-impl-0.2.0
/either-1.12.0
+/foreign-0.3.1
/itertools-0.11.0
/libc-0.2.162
/proc-macro-error-1.0.4
diff --git a/subprojects/anyhow-1-rs.wrap b/subprojects/anyhow-1-rs.wrap
new file mode 100644
index 0000000..a69a364
--- /dev/null
+++ b/subprojects/anyhow-1-rs.wrap
@@ -0,0 +1,7 @@
+[wrap-file]
+directory = anyhow-1.0.98
+source_url = https://crates.io/api/v1/crates/anyhow/1.0.98/download
+source_filename = anyhow-1.0.98.tar.gz
+source_hash = e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487
+#method = cargo
+patch_directory = anyhow-1-rs
diff --git a/subprojects/foreign-0.3-rs.wrap b/subprojects/foreign-0.3-rs.wrap
new file mode 100644
index 0000000..0d218ec
--- /dev/null
+++ b/subprojects/foreign-0.3-rs.wrap
@@ -0,0 +1,7 @@
+[wrap-file]
+directory = foreign-0.3.1
+source_url = https://crates.io/api/v1/crates/foreign/0.3.1/download
+source_filename = foreign-0.3.1.tar.gz
+source_hash = 17ca1b5be8c9d320daf386f1809c7acc0cb09accbae795c2001953fa50585846
+#method = cargo
+patch_directory = foreign-0.3-rs
diff --git a/subprojects/packagefiles/anyhow-1-rs/meson.build b/subprojects/packagefiles/anyhow-1-rs/meson.build
new file mode 100644
index 0000000..348bab9
--- /dev/null
+++ b/subprojects/packagefiles/anyhow-1-rs/meson.build
@@ -0,0 +1,33 @@
+project('anyhow-1-rs', 'rust',
+ meson_version: '>=1.5.0',
+ version: '1.0.98',
+ license: 'MIT OR Apache-2.0',
+ default_options: [])
+
+rustc = meson.get_compiler('rust')
+
+rust_args = ['--cap-lints', 'allow']
+rust_args += ['--cfg', 'feature="std"']
+if rustc.version().version_compare('<1.65.0')
+ error('rustc version ' + rustc.version() + ' is unsupported. Please upgrade to at least 1.65.0')
+endif
+rust_args += [ '--cfg', 'std_backtrace' ] # >= 1.65.0
+if rustc.version().version_compare('<1.81.0')
+ rust_args += [ '--cfg', 'anyhow_no_core_error' ]
+endif
+
+_anyhow_rs = static_library(
+ 'anyhow',
+ files('src/lib.rs'),
+ gnu_symbol_visibility: 'hidden',
+ override_options: ['rust_std=2018', 'build.rust_std=2018'],
+ rust_abi: 'rust',
+ rust_args: rust_args,
+ dependencies: [],
+)
+
+anyhow_dep = declare_dependency(
+ link_with: _anyhow_rs,
+)
+
+meson.override_dependency('anyhow-1-rs', anyhow_dep)
diff --git a/subprojects/packagefiles/foreign-0.3-rs/meson.build b/subprojects/packagefiles/foreign-0.3-rs/meson.build
new file mode 100644
index 0000000..0901c02
--- /dev/null
+++ b/subprojects/packagefiles/foreign-0.3-rs/meson.build
@@ -0,0 +1,26 @@
+project('foreign-0.3-rs', 'rust',
+ meson_version: '>=1.5.0',
+ version: '0.2.0',
+ license: 'MIT OR Apache-2.0',
+ default_options: [])
+
+subproject('libc-0.2-rs', required: true)
+libc_rs = dependency('libc-0.2-rs')
+
+_foreign_rs = static_library(
+ 'foreign',
+ files('src/lib.rs'),
+ gnu_symbol_visibility: 'hidden',
+ override_options: ['rust_std=2021', 'build.rust_std=2021'],
+ rust_abi: 'rust',
+ rust_args: [
+ '--cap-lints', 'allow',
+ ],
+ dependencies: [libc_rs],
+)
+
+foreign_dep = declare_dependency(
+ link_with: _foreign_rs,
+)
+
+meson.override_dependency('foreign-0.3-rs', foreign_dep)
diff --git a/system/memory.c b/system/memory.c
index 63b983e..306e9ff 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2174,18 +2174,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
}
/* Called with rcu_read_lock held. */
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp)
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
MemoryRegion *mr;
hwaddr xlat;
hwaddr len = iotlb->addr_mask + 1;
bool writable = iotlb->perm & IOMMU_WO;
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = false;
- }
/*
* The IOMMU TLB entry we have just covers translation through
* this IOMMU to its immediate target. We need to translate
@@ -2195,7 +2191,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
&xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
if (!memory_region_is_ram(mr)) {
error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat);
- return false;
+ return NULL;
} else if (memory_region_has_ram_discard_manager(mr)) {
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
MemoryRegionSection tmp = {
@@ -2203,9 +2199,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
.offset_within_region = xlat,
.size = int128_make64(len),
};
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = true;
- }
/*
* Malicious VMs can map memory into the IOMMU, which is expected
* to remain discarded. vfio will pin all pages, populating memory.
@@ -2216,7 +2209,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_setg(errp, "iommu map to discarded memory (e.g., unplugged"
" via virtio-mem): %" HWADDR_PRIx "",
iotlb->translated_addr);
- return false;
+ return NULL;
}
}
@@ -2226,22 +2219,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*/
if (len & iotlb->addr_mask) {
error_setg(errp, "iommu has granularity incompatible with target AS");
- return false;
- }
-
- if (vaddr) {
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- }
-
- if (ram_addr) {
- *ram_addr = memory_region_get_ram_addr(mr) + xlat;
- }
-
- if (read_only) {
- *read_only = !writable || mr->readonly;
+ return NULL;
}
- return true;
+ *xlat_p = xlat;
+ return mr;
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a6bc089..56a6b9b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6018,9 +6018,11 @@ static bool host_supports_vmx(void)
* because private/shared page tracking is already provided through other
* means, these 2 use-cases should be treated as being mutually-exclusive.
*/
-static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
+static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run)
{
+ struct kvm_pre_fault_memory mem;
uint64_t gpa, size, attributes;
+ int ret;
if (!machine_require_guest_memfd(current_machine))
return -EINVAL;
@@ -6031,13 +6033,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
- return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ if (ret || !kvm_pre_fault_memory_supported) {
+ return ret;
+ }
+
+ /*
+ * Opportunistically pre-fault memory in. Failures are ignored so that any
+ * errors in faulting in the memory will get captured in KVM page fault
+ * path when the guest first accesses the page.
+ */
+ memset(&mem, 0, sizeof(mem));
+ mem.gpa = gpa;
+ mem.size = size;
+ while (mem.size) {
+ if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) {
+ break;
+ }
+ }
+
+ return 0;
}
-static int kvm_handle_hypercall(struct kvm_run *run)
+static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run)
{
if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
- return kvm_handle_hc_map_gpa_range(run);
+ return kvm_handle_hc_map_gpa_range(cpu, run);
return -EINVAL;
}
@@ -6137,7 +6158,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
break;
#endif
case KVM_EXIT_HYPERCALL:
- ret = kvm_handle_hypercall(run);
+ ret = kvm_handle_hypercall(cpu, run);
break;
case KVM_EXIT_SYSTEM_EVENT:
switch (run->system_event.type) {
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f0aa189..a2e4d48 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
for (i = 0; i < 2 << SHIFT; i++) {
d->ZMM_S(i) = float32_div(float32_one,
@@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
#if SHIFT == 1
void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status),
@@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
for (i = 0; i < 2 << SHIFT; i++) {
d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status);
@@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
#if SHIFT == 1
void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
int i;
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
for (i = 1; i < 2 << SHIFT; i++) {
@@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
@@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s,
uint32_t mode)
{
- uint8_t old_flags = get_float_exception_flags(&env->sse_status);
+ int old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
int i;
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 1cbadb1..b3b2382 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env)
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
set_float_default_nan_pattern(0b11000000, &env->sse_status);
/*
- * TODO: x86 does flush-to-zero detection after rounding (the SDM
+ * x86 does flush-to-zero detection after rounding (the SDM
* section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
* when we detect underflow, which x86 does after rounding).
*/
- set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status);
}
-static inline uint8_t save_exception_flags(CPUX86State *env)
+static inline int save_exception_flags(CPUX86State *env)
{
- uint8_t old_flags = get_float_exception_flags(&env->fp_status);
+ int old_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
return old_flags;
}
-static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
+static void merge_exception_flags(CPUX86State *env, int old_flags)
{
- uint8_t new_flags = get_float_exception_flags(&env->fp_status);
+ int new_flags = get_float_exception_flags(&env->fp_status);
float_raise(old_flags, &env->fp_status);
fpu_set_exception(env,
((new_flags & float_flag_invalid ? FPUS_IE : 0) |
@@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
(new_flags & float_flag_overflow ? FPUS_OE : 0) |
(new_flags & float_flag_underflow ? FPUS_UE : 0) |
(new_flags & float_flag_inexact ? FPUS_PE : 0) |
- (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0)));
+ (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0)));
}
static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
floatx80 ret = floatx80_div(a, b, &env->fp_status);
merge_exception_flags(env, old_flags);
return ret;
@@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
void helper_flds_FT0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val)
void helper_fldl_FT0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val)
void helper_flds_ST0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float32 f;
@@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
void helper_fldl_ST0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float64 f;
@@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val)
uint32_t helper_fsts_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env)
uint64_t helper_fstl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env)
int32_t helper_fist_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env)
int32_t helper_fistl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env)
int64_t helper_fistll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64(ST0, &env->fp_status);
@@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env)
int32_t helper_fistt_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env)
int32_t helper_fisttl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env)
int64_t helper_fisttll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
@@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
void helper_fcom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare(ST0, FT0, &env->fp_status);
@@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env)
void helper_fucom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
@@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
void helper_fcomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env)
void helper_fucomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env)
void helper_fadd_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_add(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env)
void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int v;
target_ulong mem_ref, mem_end;
int64_t val;
@@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = {
void helper_f2xm1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t sig = extractFloatx80Frac(ST0);
int32_t exp = extractFloatx80Exp(ST0);
bool sign = extractFloatx80Sign(ST0);
@@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = {
void helper_fpatan(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env)
void helper_fxtract(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
CPU_LDoubleU temp;
temp.d = ST0;
@@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env)
static void helper_fprem_common(CPUX86State *env, bool mod)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t quotient;
CPU_LDoubleU temp0, temp1;
int exp0, exp1, expdiff;
@@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
void helper_fyl2xp1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env)
void helper_fyl2x(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env)
void helper_fsqrt(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_is_neg(ST0)) {
env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
env->fpus |= 0x400;
@@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env)
void helper_frndint(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_round_to_int(ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fscale(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
floatx80_invalid_encoding(ST0, &env->fp_status)) {
float_raise(float_flag_invalid, &env->fp_status);
@@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env)
} else {
int n;
FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
- uint8_t save_flags = get_float_exception_flags(&env->fp_status);
+ int save_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
set_float_exception_flags(save_flags, &env->fp_status);
@@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env)
/* Set exception flags. */
set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
+ (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) |
(mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
(mxcsr & FPUS_OE ? float_flag_overflow : 0) |
(mxcsr & FPUS_UE ? float_flag_underflow : 0) |
@@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env)
void update_mxcsr_from_sse_status(CPUX86State *env)
{
- uint8_t flags = get_float_exception_flags(&env->sse_status);
- /*
- * The MXCSR denormal flag has opposite semantics to
- * float_flag_input_denormal_flushed (the softfloat code sets that flag
- * only when flushing input denormals to zero, but SSE sets it
- * only when not flushing them to zero), so is not converted
- * here.
- */
+ int flags = get_float_exception_flags(&env->sse_status);
env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
+ (flags & float_flag_input_denormal_used ? FPUS_DE : 0) |
(flags & float_flag_divbyzero ? FPUS_ZE : 0) |
(flags & float_flag_overflow ? FPUS_OE : 0) |
(flags & float_flag_underflow ? FPUS_UE : 0) |
diff --git a/tests/tcg/x86_64/fma.c b/tests/tcg/x86_64/fma.c
index 09c622e..3421961 100644
--- a/tests/tcg/x86_64/fma.c
+++ b/tests/tcg/x86_64/fma.c
@@ -79,14 +79,21 @@ static testdata tests[] = {
/*
* Flushing of denormal outputs to zero should also happen after
* rounding, so setting FTZ should not affect the result or the flags.
- * QEMU currently does not emulate this correctly because we do the
- * flush-to-zero check before rounding, so we incorrectly produce a
- * zero result and set Underflow as well as Precision.
*/
-#ifdef ENABLE_FAILING_TESTS
{ 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, true,
0x8010000000000000, 0x20 }, /* Enabling FTZ shouldn't change flags */
-#endif
+ /*
+ * normal * 0 + a denormal. With FTZ disabled this gives an exact
+ * result (equal to the input denormal) that has consumed the denormal.
+ */
+ { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, false,
+ 0x8008000000000000, 0x2 }, /* Denormal */
+ /*
+ * With FTZ enabled, this consumes the denormal, returns zero (because
+ * flushed) and indicates also Underflow and Precision.
+ */
+ { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, true,
+ 0x8000000000000000, 0x32 }, /* Precision, Underflow, Denormal */
};
int main(void)
diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c
index 08d4570..0ead6bf 100644
--- a/tests/unit/test-aio-multithread.c
+++ b/tests/unit/test-aio-multithread.c
@@ -305,7 +305,9 @@ static void mcs_mutex_lock(void)
prev = qatomic_xchg(&mutex_head, id);
if (prev != -1) {
qatomic_set(&nodes[prev].next, id);
- qemu_futex_wait(&nodes[id].locked, 1);
+ while (qatomic_read(&nodes[id].locked) == 1) {
+ qemu_futex_wait(&nodes[id].locked, 1);
+ }
}
}
@@ -328,7 +330,7 @@ static void mcs_mutex_unlock(void)
/* Wake up the next in line. */
next = qatomic_read(&nodes[id].next);
nodes[next].locked = 0;
- qemu_futex_wake(&nodes[next].locked, 1);
+ qemu_futex_wake_single(&nodes[next].locked);
}
static void test_multi_fair_mutex_entry(void *opaque)
diff --git a/util/error.c b/util/error.c
index 673011b..daea214 100644
--- a/util/error.c
+++ b/util/error.c
@@ -15,15 +15,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-
-struct Error
-{
- char *msg;
- ErrorClass err_class;
- const char *src, *func;
- int line;
- GString *hint;
-};
+#include "qapi/error-internal.h"
Error *error_abort;
Error *error_fatal;
@@ -32,8 +24,13 @@ Error *error_warn;
static void error_handle(Error **errp, Error *err)
{
if (errp == &error_abort) {
- fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
- err->func, err->src, err->line);
+ if (err->func) {
+ fprintf(stderr, "Unexpected error in %s() at %.*s:%d:\n",
+ err->func, err->src_len, err->src, err->line);
+ } else {
+ fprintf(stderr, "Unexpected error at %.*s:%d:\n",
+ err->src_len, err->src, err->line);
+ }
error_report("%s", error_get_pretty(err));
if (err->hint) {
error_printf("%s", err->hint->str);
@@ -75,6 +72,7 @@ static void error_setv(Error **errp,
g_free(msg);
}
err->err_class = err_class;
+ err->src_len = -1;
err->src = src;
err->line = line;
err->func = func;
diff --git a/util/event.c b/util/event.c
new file mode 100644
index 0000000..5a8141c
--- /dev/null
+++ b/util/event.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+
+/*
+ * Valid transitions:
+ * - FREE -> SET (qemu_event_set)
+ * - BUSY -> SET (qemu_event_set)
+ * - SET -> FREE (qemu_event_reset)
+ * - FREE -> BUSY (qemu_event_wait)
+ *
+ * With futex, the waking and blocking operations follow
+ * BUSY -> SET and FREE -> BUSY, respectively.
+ *
+ * Without futex, BUSY -> SET and FREE -> BUSY never happen. Instead, the waking
+ * operation follows FREE -> SET and the blocking operation will happen in
+ * qemu_event_wait() if the event is not SET.
+ *
+ * SET->BUSY does not happen (it can be observed from the outside but
+ * it really is SET->FREE->BUSY).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef HAVE_FUTEX
+ pthread_mutex_init(&ev->lock, NULL);
+ pthread_cond_init(&ev->cond, NULL);
+#endif
+
+ ev->value = (init ? EV_SET : EV_FREE);
+ ev->initialized = true;
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+ assert(ev->initialized);
+ ev->initialized = false;
+#ifndef HAVE_FUTEX
+ pthread_mutex_destroy(&ev->lock);
+ pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
+ * ev->value we need a full memory barrier here.
+ */
+ smp_mb();
+ if (qatomic_read(&ev->value) != EV_SET) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier in kernel futex_wait system call. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ qemu_futex_wake_all(ev);
+ }
+ }
+#else
+ pthread_mutex_lock(&ev->lock);
+ /* Pairs with qemu_event_reset()'s load acquire. */
+ qatomic_store_release(&ev->value, EV_SET);
+ pthread_cond_broadcast(&ev->cond);
+ pthread_mutex_unlock(&ev->lock);
+#endif
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
+#else
+ /*
+ * If futexes are not available, there are no EV_FREE->EV_BUSY
+ * transitions because wakeups are done entirely through the
+ * condition variable. Since qatomic_set() only writes EV_FREE,
+ * the load seems useless but in reality, the acquire synchronizes
+ * with qemu_event_set()'s store release: if qemu_event_reset()
+ * sees EV_SET here, then the caller will certainly see a
+ * successful condition and skip qemu_event_wait():
+ *
+ * done = 1; if (done == 0)
+ * qemu_event_set() { qemu_event_reset() {
+ * lock();
+ * ev->value = EV_SET -----> load ev->value
+ * ev->value = old value | EV_FREE
+ * cond_broadcast()
+ * unlock(); }
+ * } if (done == 0)
+ * // qemu_event_wait() not called
+ */
+ qatomic_set(&ev->value, qatomic_load_acquire(&ev->value) | EV_FREE);
+#endif
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ assert(ev->initialized);
+
+#ifdef HAVE_FUTEX
+ while (true) {
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ */
+ unsigned value = qatomic_load_acquire(&ev->value);
+ if (value == EV_SET) {
+ break;
+ }
+
+ if (value == EV_FREE) {
+ /*
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
+ *
+ * This cmpxchg doesn't have particular ordering requirements if it
+ * succeeds (moving the store earlier can only cause
+ * qemu_event_set() to issue _more_ wakeups), the failing case needs
+ * acquire semantics like the load above.
+ */
+ if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ break;
+ }
+ }
+
+ /*
+ * This is the final check for a concurrent set, so it does need
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
+ * The barrier is inside the FUTEX_WAIT system call.
+ */
+ qemu_futex_wait(ev, EV_BUSY);
+ }
+#else
+ pthread_mutex_lock(&ev->lock);
+ while (qatomic_read(&ev->value) != EV_SET) {
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock(&ev->lock);
+#endif
+}
diff --git a/util/lockcnt.c b/util/lockcnt.c
index d07c6cc..92c9f8c 100644
--- a/util/lockcnt.c
+++ b/util/lockcnt.c
@@ -12,10 +12,11 @@
#include "qemu/atomic.h"
#include "trace.h"
-#ifdef CONFIG_LINUX
-#include "qemu/futex.h"
+#ifdef HAVE_FUTEX
-/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
+/*
+ * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the
+ * counter.
* For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
* this is not the most relaxing citation I could make...). It is similar
* to mutex2 in the paper.
@@ -106,7 +107,7 @@ static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
static void lockcnt_wake(QemuLockCnt *lockcnt)
{
trace_lockcnt_futex_wake(lockcnt);
- qemu_futex_wake(&lockcnt->count, 1);
+ qemu_futex_wake_single(&lockcnt->count);
}
void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
diff --git a/util/meson.build b/util/meson.build
index 1adff96..3502938 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -27,7 +27,7 @@ else
util_ss.add(files('event_notifier-win32.c'))
util_ss.add(files('oslib-win32.c'))
util_ss.add(files('qemu-thread-win32.c'))
- util_ss.add(winmm, pathcch)
+ util_ss.add(winmm, pathcch, synchronization)
endif
util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c'))
if glib_has_gslice
@@ -35,6 +35,7 @@ if glib_has_gslice
endif
util_ss.add(files('defer-call.c'))
util_ss.add(files('envlist.c', 'path.c', 'module.c'))
+util_ss.add(files('event.c'))
util_ss.add(files('host-utils.c'))
util_ss.add(files('bitmap.c', 'bitops.c'))
util_ss.add(files('fifo8.c'))
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b2e26e2..ba72544 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -317,154 +317,6 @@ void qemu_sem_wait(QemuSemaphore *sem)
qemu_mutex_unlock(&sem->mutex);
}
-#ifdef __linux__
-#include "qemu/futex.h"
-#else
-static inline void qemu_futex_wake(QemuEvent *ev, int n)
-{
- assert(ev->initialized);
- pthread_mutex_lock(&ev->lock);
- if (n == 1) {
- pthread_cond_signal(&ev->cond);
- } else {
- pthread_cond_broadcast(&ev->cond);
- }
- pthread_mutex_unlock(&ev->lock);
-}
-
-static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
-{
- assert(ev->initialized);
- pthread_mutex_lock(&ev->lock);
- if (ev->value == val) {
- pthread_cond_wait(&ev->cond, &ev->lock);
- }
- pthread_mutex_unlock(&ev->lock);
-}
-#endif
-
-/* Valid transitions:
- * - free->set, when setting the event
- * - busy->set, when setting the event, followed by qemu_futex_wake
- * - set->free, when resetting the event
- * - free->busy, when waiting
- *
- * set->busy does not happen (it can be observed from the outside but
- * it really is set->free->busy).
- *
- * busy->free provably cannot happen; to enforce it, the set->free transition
- * is done with an OR, which becomes a no-op if the event has concurrently
- * transitioned to free or busy.
- */
-
-#define EV_SET 0
-#define EV_FREE 1
-#define EV_BUSY -1
-
-void qemu_event_init(QemuEvent *ev, bool init)
-{
-#ifndef __linux__
- pthread_mutex_init(&ev->lock, NULL);
- pthread_cond_init(&ev->cond, NULL);
-#endif
-
- ev->value = (init ? EV_SET : EV_FREE);
- ev->initialized = true;
-}
-
-void qemu_event_destroy(QemuEvent *ev)
-{
- assert(ev->initialized);
- ev->initialized = false;
-#ifndef __linux__
- pthread_mutex_destroy(&ev->lock);
- pthread_cond_destroy(&ev->cond);
-#endif
-}
-
-void qemu_event_set(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * Pairs with both qemu_event_reset() and qemu_event_wait().
- *
- * qemu_event_set has release semantics, but because it *loads*
- * ev->value we need a full memory barrier here.
- */
- smp_mb();
- if (qatomic_read(&ev->value) != EV_SET) {
- int old = qatomic_xchg(&ev->value, EV_SET);
-
- /* Pairs with memory barrier in kernel futex_wait system call. */
- smp_mb__after_rmw();
- if (old == EV_BUSY) {
- /* There were waiters, wake them up. */
- qemu_futex_wake(ev, INT_MAX);
- }
- }
-}
-
-void qemu_event_reset(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
-
- /*
- * Order reset before checking the condition in the caller.
- * Pairs with the first memory barrier in qemu_event_set().
- */
- smp_mb__after_rmw();
-}
-
-void qemu_event_wait(QemuEvent *ev)
-{
- unsigned value;
-
- assert(ev->initialized);
-
- /*
- * qemu_event_wait must synchronize with qemu_event_set even if it does
- * not go down the slow path, so this load-acquire is needed that
- * synchronizes with the first memory barrier in qemu_event_set().
- *
- * If we do go down the slow path, there is no requirement at all: we
- * might miss a qemu_event_set() here but ultimately the memory barrier in
- * qemu_futex_wait() will ensure the check is done correctly.
- */
- value = qatomic_load_acquire(&ev->value);
- if (value != EV_SET) {
- if (value == EV_FREE) {
- /*
- * Leave the event reset and tell qemu_event_set that there are
- * waiters. No need to retry, because there cannot be a concurrent
- * busy->free transition. After the CAS, the event will be either
- * set or busy.
- *
- * This cmpxchg doesn't have particular ordering requirements if it
- * succeeds (moving the store earlier can only cause qemu_event_set()
- * to issue _more_ wakeups), the failing case needs acquire semantics
- * like the load above.
- */
- if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- return;
- }
- }
-
- /*
- * This is the final check for a concurrent set, so it does need
- * a smp_mb() pairing with the second barrier of qemu_event_set().
- * The barrier is inside the FUTEX_WAIT system call.
- */
- qemu_futex_wait(ev, EV_BUSY);
- }
-}
-
static __thread NotifierList thread_exit;
/*
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index a7fe3cc..ca2e0b5 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -231,135 +231,6 @@ void qemu_sem_wait(QemuSemaphore *sem)
}
}
-/* Wrap a Win32 manual-reset event with a fast userspace path. The idea
- * is to reset the Win32 event lazily, as part of a test-reset-test-wait
- * sequence. Such a sequence is, indeed, how QemuEvents are used by
- * RCU and other subsystems!
- *
- * Valid transitions:
- * - free->set, when setting the event
- * - busy->set, when setting the event, followed by SetEvent
- * - set->free, when resetting the event
- * - free->busy, when waiting
- *
- * set->busy does not happen (it can be observed from the outside but
- * it really is set->free->busy).
- *
- * busy->free provably cannot happen; to enforce it, the set->free transition
- * is done with an OR, which becomes a no-op if the event has concurrently
- * transitioned to free or busy (and is faster than cmpxchg).
- */
-
-#define EV_SET 0
-#define EV_FREE 1
-#define EV_BUSY -1
-
-void qemu_event_init(QemuEvent *ev, bool init)
-{
- /* Manual reset. */
- ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
- ev->value = (init ? EV_SET : EV_FREE);
- ev->initialized = true;
-}
-
-void qemu_event_destroy(QemuEvent *ev)
-{
- assert(ev->initialized);
- ev->initialized = false;
- CloseHandle(ev->event);
-}
-
-void qemu_event_set(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * Pairs with both qemu_event_reset() and qemu_event_wait().
- *
- * qemu_event_set has release semantics, but because it *loads*
- * ev->value we need a full memory barrier here.
- */
- smp_mb();
- if (qatomic_read(&ev->value) != EV_SET) {
- int old = qatomic_xchg(&ev->value, EV_SET);
-
- /* Pairs with memory barrier after ResetEvent. */
- smp_mb__after_rmw();
- if (old == EV_BUSY) {
- /* There were waiters, wake them up. */
- SetEvent(ev->event);
- }
- }
-}
-
-void qemu_event_reset(QemuEvent *ev)
-{
- assert(ev->initialized);
-
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
-
- /*
- * Order reset before checking the condition in the caller.
- * Pairs with the first memory barrier in qemu_event_set().
- */
- smp_mb__after_rmw();
-}
-
-void qemu_event_wait(QemuEvent *ev)
-{
- unsigned value;
-
- assert(ev->initialized);
-
- /*
- * qemu_event_wait must synchronize with qemu_event_set even if it does
- * not go down the slow path, so this load-acquire is needed that
- * synchronizes with the first memory barrier in qemu_event_set().
- *
- * If we do go down the slow path, there is no requirement at all: we
- * might miss a qemu_event_set() here but ultimately the memory barrier in
- * qemu_futex_wait() will ensure the check is done correctly.
- */
- value = qatomic_load_acquire(&ev->value);
- if (value != EV_SET) {
- if (value == EV_FREE) {
- /*
- * Here the underlying kernel event is reset, but qemu_event_set is
- * not yet going to call SetEvent. However, there will be another
- * check for EV_SET below when setting EV_BUSY. At that point it
- * is safe to call WaitForSingleObject.
- */
- ResetEvent(ev->event);
-
- /*
- * It is not clear whether ResetEvent provides this barrier; kernel
- * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
- */
- smp_mb();
-
- /*
- * Leave the event reset and tell qemu_event_set that there are
- * waiters. No need to retry, because there cannot be a concurrent
- * busy->free transition. After the CAS, the event will be either
- * set or busy.
- */
- if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- return;
- }
- }
-
- /*
- * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
- * qemu_event_set() must observe EV_BUSY and call SetEvent().
- */
- WaitForSingleObject(ev->event, INFINITE);
- }
-}
-
struct QemuThreadData {
/* Passed to win32_start_routine. */
void *(*start_routine)(void *);