diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/exec/memop.h | 4 | ||||
-rw-r--r-- | include/gdbstub/commands.h | 2 | ||||
-rw-r--r-- | include/hw/vfio/vfio-container-base.h | 83 | ||||
-rw-r--r-- | include/hw/vfio/vfio-cpr.h | 18 | ||||
-rw-r--r-- | include/qapi/error-internal.h | 35 | ||||
-rw-r--r-- | include/qemu/futex.h | 44 | ||||
-rw-r--r-- | include/qemu/lockcnt.h | 2 | ||||
-rw-r--r-- | include/qemu/thread-posix.h | 9 | ||||
-rw-r--r-- | include/qemu/thread-win32.h | 6 | ||||
-rw-r--r-- | include/qemu/thread.h | 21 | ||||
-rw-r--r-- | include/system/host_iommu_device.h | 15 | ||||
-rw-r--r-- | include/system/iommufd.h | 54 | ||||
-rw-r--r-- | include/system/kvm.h | 1 | ||||
-rw-r--r-- | include/system/memory.h | 19 |
14 files changed, 275 insertions, 38 deletions
diff --git a/include/exec/memop.h b/include/exec/memop.h index 407a47d..cf7da33 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -162,8 +162,8 @@ static inline unsigned memop_size(MemOp op) static inline MemOp size_memop(unsigned size) { #ifdef CONFIG_DEBUG_TCG - /* Power of 2 up to 8. */ - assert((size & (size - 1)) == 0 && size >= 1 && size <= 8); + /* Power of 2 up to 1024 */ + assert(is_power_of_2(size) && size >= 1 && size <= (1 << MO_SIZE)); #endif return (MemOp)ctz32(size); } diff --git a/include/gdbstub/commands.h b/include/gdbstub/commands.h index 40f0514..bff3674 100644 --- a/include/gdbstub/commands.h +++ b/include/gdbstub/commands.h @@ -1,5 +1,5 @@ #ifndef GDBSTUB_COMMANDS_H -#define GDBSTUB +#define GDBSTUB_COMMANDS_H typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx); diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3d392b0..9d37f86 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); @@ -115,13 +115,57 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { ObjectClass parent_class; - /* basic feature */ + /** + * @setup + * + * Perform basic setup of the container, including configuring IOMMU + * capabilities, IOVA ranges, supported page sizes, etc. + * + * @bcontainer: #VFIOContainerBase + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + + /** + * @listener_begin + * + * Called at the beginning of an address space update transaction. + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_begin)(VFIOContainerBase *bcontainer); + + /** + * @listener_commit + * + * Called at the end of an address space update transaction, + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_commit)(VFIOContainerBase *bcontainer); + + /** + * @dma_map + * + * Map an address range into the container. Note that the memory region is + * referenced within an RCU read lock region across this call. + * + * @bcontainer: #VFIOContainerBase to use + * @iova: start address to map + * @size: size of the range to map + * @vaddr: process virtual address of mapping + * @readonly: true if mapping should be readonly + * @mr: the memory region for this mapping + * + * Returns 0 to indicate success and -errno otherwise. + */ int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); /** * @dma_unmap * @@ -132,12 +176,38 @@ struct VFIOIOMMUClass { * @size: size of the range to unmap * @iotlb: The IOMMU TLB mapping entry (or NULL) * @unmap_all: if set, unmap the entire address space + * + * Returns 0 to indicate success and -errno otherwise. */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); + + + /** + * @attach_device + * + * Associate the given device with a container and do some related + * initialization of the device context. + * + * @name: name of the device + * @vbasedev: the device + * @as: address space to use + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); + + /* + * @detach_device + * + * Detach the given device from its container and clean up any necessary + * state. + * + * @vbasedev: the device to disassociate + */ void (*detach_device)(VFIODevice *vbasedev); /* migration feature */ @@ -152,7 +222,7 @@ struct VFIOIOMMUClass { * @start: indicates whether to start or stop dirty pages tracking * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, bool start, Error **errp); @@ -167,7 +237,7 @@ struct VFIOIOMMUClass { * @size: size of iova range * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); @@ -183,4 +253,7 @@ struct VFIOIOMMUClass { void (*release)(VFIOContainerBase *bcontainer); }; +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section); + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h new file mode 100644 index 0000000..750ea5b --- /dev/null +++ b/include/hw/vfio/vfio-cpr.h @@ -0,0 +1,18 @@ +/* + * VFIO CPR + * + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_VFIO_CPR_H +#define HW_VFIO_VFIO_CPR_H + +struct VFIOContainerBase; + +bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, + Error **errp); +void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); + +#endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/qapi/error-internal.h b/include/qapi/error-internal.h new file mode 100644 index 0000000..ff18a20 --- /dev/null +++ b/include/qapi/error-internal.h @@ -0,0 +1,35 @@ +/* + * QEMU Error Objects - struct definition + * + * Copyright IBM, Corp. 2011 + * Copyright (C) 2011-2015 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2. See + * the COPYING.LIB file in the top-level directory. + */ + +#ifndef QAPI_ERROR_INTERNAL_H + +struct Error +{ + char *msg; + ErrorClass err_class; + + /* Used for error_abort only, may be NULL. */ + const char *func; + + /* + * src might be NUL-terminated or not. If it is, src_len is negative. + * If it is not, src_len is the length. + */ + const char *src; + int src_len; + int line; + GString *hint; +}; + +#endif diff --git a/include/qemu/futex.h b/include/qemu/futex.h index 91ae889..607613e 100644 --- a/include/qemu/futex.h +++ b/include/qemu/futex.h @@ -1,5 +1,5 @@ /* - * Wrappers around Linux futex syscall + * Wrappers around Linux futex syscall and similar * * Copyright Red Hat, Inc. 2017 * @@ -11,17 +11,35 @@ * */ +/* + * Note that a wake-up can also be caused by common futex usage patterns in + * unrelated code that happened to have previously used the futex word's + * memory location (e.g., typical futex-based implementations of Pthreads + * mutexes can cause this under some conditions). Therefore, qemu_futex_wait() + * callers should always conservatively assume that it is a spurious wake-up, + * and use the futex word's value (i.e., the user-space synchronization scheme) + * to decide whether to continue to block or not. + */ + #ifndef QEMU_FUTEX_H #define QEMU_FUTEX_H +#define HAVE_FUTEX + +#ifdef CONFIG_LINUX #include <sys/syscall.h> #include <linux/futex.h> #define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) -static inline void qemu_futex_wake(void *f, int n) +static inline void qemu_futex_wake_all(void *f) { - qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); + qemu_futex(f, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); +} + +static inline void qemu_futex_wake_single(void *f) +{ + qemu_futex(f, FUTEX_WAKE, 1, NULL, NULL, 0); } static inline void qemu_futex_wait(void *f, unsigned val) @@ -37,5 +55,25 @@ static inline void qemu_futex_wait(void *f, unsigned val) } } } +#elif defined(CONFIG_WIN32) +#include <synchapi.h> + +static inline void qemu_futex_wake_all(void *f) +{ + WakeByAddressAll(f); +} + +static inline void qemu_futex_wake_single(void *f) +{ + WakeByAddressSingle(f); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + WaitOnAddress(f, &val, sizeof(val), INFINITE); +} +#else +#undef HAVE_FUTEX +#endif #endif /* QEMU_FUTEX_H */ diff --git a/include/qemu/lockcnt.h b/include/qemu/lockcnt.h index f4b62a3..5a2800e 100644 --- a/include/qemu/lockcnt.h +++ b/include/qemu/lockcnt.h @@ -17,7 +17,7 @@ typedef struct QemuLockCnt QemuLockCnt; struct QemuLockCnt { -#ifndef CONFIG_LINUX +#ifndef HAVE_FUTEX QemuMutex mutex; #endif unsigned count; diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index 5f2f3d1..758808b 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -32,15 +32,6 @@ struct QemuSemaphore { unsigned int count; }; -struct QemuEvent { -#ifndef __linux__ - pthread_mutex_t lock; - pthread_cond_t cond; -#endif - unsigned value; - bool initialized; -}; - struct QemuThread { pthread_t thread; }; diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h index d95af44..da9e732 100644 --- a/include/qemu/thread-win32.h +++ b/include/qemu/thread-win32.h @@ -28,12 +28,6 @@ struct QemuSemaphore { bool initialized; }; -struct QemuEvent { - int value; - HANDLE event; - bool initialized; -}; - typedef struct QemuThreadData QemuThreadData; struct QemuThread { QemuThreadData *data; diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 6f800aa..f0302ed 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -3,13 +3,32 @@ #include "qemu/processor.h" #include "qemu/atomic.h" +#include "qemu/futex.h" typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; -typedef struct QemuEvent QemuEvent; typedef struct QemuLockCnt QemuLockCnt; typedef struct QemuThread QemuThread; +/* + * QemuEvent + * ========= + * + * QemuEvent is an implementation of Win32 manual-reset event object. + * For details, refer to: + * https://learn.microsoft.com/en-us/windows/win32/sync/using-event-objects + * + * QemuEvent is more lightweight than QemuSemaphore when HAVE_FUTEX is defined. + */ +typedef struct QemuEvent { +#ifndef HAVE_FUTEX + pthread_mutex_t lock; + pthread_cond_t cond; +#endif + unsigned value; + bool initialized; +} QemuEvent; + #ifdef _WIN32 #include "qemu/thread-win32.h" #else diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h index 809cced..ab849a4 100644 --- a/include/system/host_iommu_device.h +++ b/include/system/host_iommu_device.h @@ -14,6 +14,13 @@ #include "qom/object.h" #include "qapi/error.h" +#ifdef CONFIG_LINUX +#include "linux/iommufd.h" + +typedef union VendorCaps { + struct iommu_hw_info_vtd vtd; + struct iommu_hw_info_arm_smmuv3 smmuv3; +} VendorCaps; /** * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. @@ -22,11 +29,17 @@ * * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + * + * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on + * IOMMUFD this represents a user-space buffer filled by kernel + * with host IOMMU @type specific hardware information data) */ typedef struct HostIOMMUDeviceCaps { uint32_t type; uint64_t hw_caps; + VendorCaps vendor_caps; } HostIOMMUDeviceCaps; +#endif #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) @@ -38,7 +51,9 @@ struct HostIOMMUDevice { void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ PCIBus *aliased_bus; int aliased_devfn; +#ifdef CONFIG_LINUX HostIOMMUDeviceCaps caps; +#endif }; /** diff --git a/include/system/iommufd.h b/include/system/iommufd.h index cbab75b..283861b 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -61,6 +61,60 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, uint64_t iova, ram_addr_t size, uint64_t page_size, uint64_t *data, Error **errp); +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp); #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" +OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, + HOST_IOMMU_DEVICE_IOMMUFD) + +/* Overload of the host IOMMU device for the iommufd backend */ +struct HostIOMMUDeviceIOMMUFD { + HostIOMMUDevice parent_obj; + + IOMMUFDBackend *iommufd; + uint32_t devid; + uint32_t hwpt_id; +}; + +struct HostIOMMUDeviceIOMMUFDClass { + HostIOMMUDeviceClass parent_class; + + /** + * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @hwpt_id: ID of IOMMUFD hardware page table. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id, + Error **errp); + /** + * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp); +}; + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp); +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp); #endif diff --git a/include/system/kvm.h b/include/system/kvm.h index 62ec131..7cc60d2 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; extern bool kvm_msi_use_devid; +extern bool kvm_pre_fault_memory_supported; #define kvm_enabled() (kvm_allowed) /** diff --git a/include/system/memory.h b/include/system/memory.h index fc35a0d..0848690 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -739,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, RamDiscardListener *rdl); /** - * memory_get_xlat_addr: Extract addresses from a TLB entry + * memory_translate_iotlb: Extract addresses from a TLB entry. + * Called with rcu_read_lock held. * * @iotlb: pointer to an #IOMMUTLBEntry - * @vaddr: virtual address - * @ram_addr: RAM address - * @read_only: indicates if writes are allowed - * @mr_has_discard_manager: indicates memory is controlled by a - * RamDiscardManager + * @xlat_p: return the offset of the entry from the start of the returned + * MemoryRegion. * @errp: pointer to Error*, to store an error if it happens. * - * Return: true on success, else false setting @errp with error. + * Return: On success, return the MemoryRegion containing the @iotlb translated + * addr. The MemoryRegion must not be accessed after rcu_read_unlock. + * On failure, return NULL, setting @errp with error. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp); +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp); typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; |