diff options
Diffstat (limited to 'include/system')
-rw-r--r-- | include/system/accel-ops.h | 73 | ||||
-rw-r--r-- | include/system/block-backend-global-state.h | 8 | ||||
-rw-r--r-- | include/system/confidential-guest-support.h | 88 | ||||
-rw-r--r-- | include/system/cpus.h | 5 | ||||
-rw-r--r-- | include/system/host_iommu_device.h | 15 | ||||
-rw-r--r-- | include/system/hvf.h | 50 | ||||
-rw-r--r-- | include/system/hvf_int.h | 41 | ||||
-rw-r--r-- | include/system/hw_accel.h | 19 | ||||
-rw-r--r-- | include/system/igvm-cfg.h | 49 | ||||
-rw-r--r-- | include/system/iommufd.h | 61 | ||||
-rw-r--r-- | include/system/kvm.h | 11 | ||||
-rw-r--r-- | include/system/kvm_int.h | 1 | ||||
-rw-r--r-- | include/system/memory.h | 155 | ||||
-rw-r--r-- | include/system/nvmm.h | 23 | ||||
-rw-r--r-- | include/system/os-wasm.h | 104 | ||||
-rw-r--r-- | include/system/ram_addr.h | 1 | ||||
-rw-r--r-- | include/system/ramblock.h | 22 | ||||
-rw-r--r-- | include/system/runstate.h | 54 | ||||
-rw-r--r-- | include/system/vhost-user-backend.h | 2 | ||||
-rw-r--r-- | include/system/whpx.h | 23 |
20 files changed, 611 insertions, 194 deletions
diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h deleted file mode 100644 index 4c99d25..0000000 --- a/include/system/accel-ops.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Accelerator OPS, used for cpus.c module - * - * Copyright 2021 SUSE LLC - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef ACCEL_OPS_H -#define ACCEL_OPS_H - -#include "exec/vaddr.h" -#include "qom/object.h" - -#define ACCEL_OPS_SUFFIX "-ops" -#define TYPE_ACCEL_OPS "accel" ACCEL_OPS_SUFFIX -#define ACCEL_OPS_NAME(name) (name "-" TYPE_ACCEL_OPS) - -DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS) - -/** - * struct AccelOpsClass - accelerator interfaces - * - * This structure is used to abstract accelerator differences from the - * core CPU code. Not all have to be implemented. - */ -struct AccelOpsClass { - /*< private >*/ - ObjectClass parent_class; - /*< public >*/ - - /* initialization function called when accel is chosen */ - void (*ops_init)(AccelOpsClass *ops); - - bool (*cpus_are_resettable)(void); - void (*cpu_reset_hold)(CPUState *cpu); - - void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */ - void (*kick_vcpu_thread)(CPUState *cpu); - bool (*cpu_thread_is_idle)(CPUState *cpu); - - void (*synchronize_post_reset)(CPUState *cpu); - void (*synchronize_post_init)(CPUState *cpu); - void (*synchronize_state)(CPUState *cpu); - void (*synchronize_pre_loadvm)(CPUState *cpu); - void (*synchronize_pre_resume)(bool step_pending); - - void (*handle_interrupt)(CPUState *cpu, int mask); - - /** - * @get_virtual_clock: fetch virtual clock - * @set_virtual_clock: set virtual clock - * - * These allow the timer subsystem to defer to the accelerator to - * fetch time. The set function is needed if the accelerator wants - * to track the changes to time as the timer is warped through - * various timer events. - */ - int64_t (*get_virtual_clock)(void); - void (*set_virtual_clock)(int64_t time); - - int64_t (*get_elapsed_ticks)(void); - - /* gdbstub hooks */ - bool (*supports_guest_debug)(void); - int (*update_guest_debug)(CPUState *cpu); - int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); - int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len); - void (*remove_all_breakpoints)(CPUState *cpu); -}; - -#endif /* ACCEL_OPS_H */ diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h index 35b5e83..c384964 100644 --- a/include/system/block-backend-global-state.h +++ b/include/system/block-backend-global-state.h @@ -55,7 +55,7 @@ void monitor_remove_blk(BlockBackend *blk); BlockBackendPublic *blk_get_public(BlockBackend *blk); -void blk_remove_bs(BlockBackend *blk); +void GRAPH_UNLOCKED blk_remove_bs(BlockBackend *blk); int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs); @@ -78,8 +78,8 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); void blk_aio_cancel(BlockAIOCB *acb); int blk_commit_all(void); bool blk_in_drain(BlockBackend *blk); -void blk_drain(BlockBackend *blk); -void blk_drain_all(void); +void GRAPH_UNLOCKED blk_drain(BlockBackend *blk); +void GRAPH_UNLOCKED blk_drain_all(void); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error); bool blk_supports_write_perm(BlockBackend *blk); @@ -109,7 +109,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); -void blk_io_limits_disable(BlockBackend *blk); +void GRAPH_UNLOCKED blk_io_limits_disable(BlockBackend *blk); void blk_io_limits_enable(BlockBackend *blk, const char *group); void blk_io_limits_update_group(BlockBackend *blk, const char *group); void blk_set_force_allow_inactivate(BlockBackend *blk); diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h index ea46b50..0cc8b26 100644 --- a/include/system/confidential-guest-support.h +++ b/include/system/confidential-guest-support.h @@ -19,6 +19,7 @@ #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H #include "qom/object.h" +#include "exec/hwaddr.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, @@ -26,6 +27,40 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +typedef enum ConfidentialGuestPlatformType { + CGS_PLATFORM_SEV, + CGS_PLATFORM_SEV_ES, + CGS_PLATFORM_SEV_SNP, +} ConfidentialGuestPlatformType; + +typedef enum ConfidentialGuestMemoryType { + CGS_MEM_RAM, + CGS_MEM_RESERVED, + CGS_MEM_ACPI, + CGS_MEM_NVS, + CGS_MEM_UNUSABLE, +} ConfidentialGuestMemoryType; + +typedef struct ConfidentialGuestMemoryMapEntry { + uint64_t gpa; + uint64_t size; + ConfidentialGuestMemoryType type; +} ConfidentialGuestMemoryMapEntry; + +typedef enum ConfidentialGuestPageType { + CGS_PAGE_TYPE_NORMAL, + CGS_PAGE_TYPE_VMSA, + CGS_PAGE_TYPE_ZERO, + CGS_PAGE_TYPE_UNMEASURED, + CGS_PAGE_TYPE_SECRETS, + CGS_PAGE_TYPE_CPUID, + CGS_PAGE_TYPE_REQUIRED_MEMORY, +} ConfidentialGuestPageType; + +typedef enum ConfidentialGuestPolicyType { + GUEST_POLICY_SEV, +} ConfidentialGuestPolicyType; + struct ConfidentialGuestSupport { Object parent; @@ -64,6 +99,59 @@ typedef struct ConfidentialGuestSupportClass { int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + + /* + * Check to see if this confidential guest supports a particular + * platform or configuration. + * + * Return true if supported or false if not supported. + */ + bool (*check_support)(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary); + + /* + * Configure part of the state of a guest for a particular set of data, page + * type and gpa. This can be used for example to pre-populate and measure + * guest memory contents, define private ranges or set the initial CPU state + * for one or more CPUs. + * + * If memory_type is CGS_PAGE_TYPE_VMSA then ptr points to the initial CPU + * context for a virtual CPU. The format of the data depends on the type of + * confidential virtual machine. For example, for SEV-ES ptr will point to a + * vmcb_save_area structure that should be copied into guest memory at the + * address specified in gpa. The cpu_index parameter contains the index of + * the CPU the VMSA applies to. + */ + int (*set_guest_state)(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp); + + /* + * Set the guest policy. The policy can be used to configure the + * confidential platform, such as if debug is enabled or not and can contain + * information about expected launch measurements, signed verification of + * guest configuration and other platform data. + * + * The format of the policy data is specific to each platform. For example, + * SEV-SNP uses a policy bitfield in the 'policy' argument and provides an + * ID block and ID authentication in the 'policy_data' parameters. The type + * of policy data is identified by the 'policy_type' argument. + */ + int (*set_guest_policy)(ConfidentialGuestPolicyType policy_type, + uint64_t policy, + void *policy_data1, uint32_t policy_data1_size, + void *policy_data2, uint32_t policy_data2_size, + Error **errp); + + /* + * Iterate the system memory map, getting the entry with the given index + * that can be populated into guest memory. + * + * Returns 0 for ok, 1 if the index is out of range and -1 on error. + */ + int (*get_mem_map_entry)(int index, ConfidentialGuestMemoryMapEntry *entry, + Error **errp); } ConfidentialGuestSupportClass; static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, diff --git a/include/system/cpus.h b/include/system/cpus.h index 3226c76..69be6a7 100644 --- a/include/system/cpus.h +++ b/include/system/cpus.h @@ -7,11 +7,6 @@ void cpus_register_accel(const AccelOpsClass *i); /* return registers ops */ const AccelOpsClass *cpus_get_accel(void); -/* accel/dummy-cpus.c */ - -/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */ -void dummy_start_vcpu_thread(CPUState *); - /* interface available for cpus accelerator threads */ /* For temporary buffers for forming a name */ diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h index 809cced..ab849a4 100644 --- a/include/system/host_iommu_device.h +++ b/include/system/host_iommu_device.h @@ -14,6 +14,13 @@ #include "qom/object.h" #include "qapi/error.h" +#ifdef CONFIG_LINUX +#include "linux/iommufd.h" + +typedef union VendorCaps { + struct iommu_hw_info_vtd vtd; + struct iommu_hw_info_arm_smmuv3 smmuv3; +} VendorCaps; /** * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. @@ -22,11 +29,17 @@ * * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + * + * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on + * IOMMUFD this represents a user-space buffer filled by kernel + * with host IOMMU @type specific hardware information data) */ typedef struct HostIOMMUDeviceCaps { uint32_t type; uint64_t hw_caps; + VendorCaps vendor_caps; } HostIOMMUDeviceCaps; +#endif #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) @@ -38,7 +51,9 @@ struct HostIOMMUDevice { void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ PCIBus *aliased_bus; int aliased_devfn; +#ifdef CONFIG_LINUX HostIOMMUDeviceCaps caps; +#endif }; /** diff --git a/include/system/hvf.h b/include/system/hvf.h index 730f927..d3dcf08 100644 --- a/include/system/hvf.h +++ b/include/system/hvf.h @@ -14,19 +14,21 @@ #define HVF_H #include "qemu/accel.h" -#include "qom/object.h" #ifdef COMPILING_PER_TARGET -#include "cpu.h" +# ifdef CONFIG_HVF +# define CONFIG_HVF_IS_POSSIBLE +# endif /* !CONFIG_HVF */ +#else +# define CONFIG_HVF_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ -#ifdef CONFIG_HVF +#ifdef CONFIG_HVF_IS_POSSIBLE extern bool hvf_allowed; #define hvf_enabled() (hvf_allowed) -#else /* !CONFIG_HVF */ +#else /* !CONFIG_HVF_IS_POSSIBLE */ #define hvf_enabled() 0 -#endif /* !CONFIG_HVF */ - -#endif /* COMPILING_PER_TARGET */ +#endif /* !CONFIG_HVF_IS_POSSIBLE */ #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf") @@ -34,38 +36,4 @@ typedef struct HVFState HVFState; DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE, TYPE_HVF_ACCEL) -#ifdef COMPILING_PER_TARGET -struct hvf_sw_breakpoint { - vaddr pc; - vaddr saved_insn; - int use_count; - QTAILQ_ENTRY(hvf_sw_breakpoint) entry; -}; - -struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, - vaddr pc); -int hvf_sw_breakpoints_active(CPUState *cpu); - -int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); -int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); -void hvf_arch_remove_all_hw_breakpoints(void); - -/* - * hvf_update_guest_debug: - * @cs: CPUState for the CPU to update - * - * Update guest to enable or disable debugging. Per-arch specifics will be - * handled by calling down to hvf_arch_update_guest_debug. - */ -int hvf_update_guest_debug(CPUState *cpu); -void hvf_arch_update_guest_debug(CPUState *cpu); - -/* - * Return whether the guest supports debugging. - */ -bool hvf_arch_supports_guest_debug(void); -#endif /* COMPILING_PER_TARGET */ - #endif diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h index 42ae184..a3b06a3 100644 --- a/include/system/hvf_int.h +++ b/include/system/hvf_int.h @@ -11,6 +11,11 @@ #ifndef HVF_INT_H #define HVF_INT_H +#include "qemu/queue.h" +#include "exec/vaddr.h" +#include "qom/object.h" +#include "accel/accel-ops.h" + #ifdef __aarch64__ #include <Hypervisor/Hypervisor.h> typedef hv_vcpu_t hvf_vcpuid; @@ -41,7 +46,8 @@ typedef struct hvf_vcpu_caps { } hvf_vcpu_caps; struct HVFState { - AccelState parent; + AccelState parent_obj; + hvf_slot slots[32]; int num_slots; @@ -57,7 +63,6 @@ struct AccelCPUState { bool vtimer_masked; sigset_t unblock_ipi_mask; bool guest_debug_enabled; - bool dirty; }; void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, @@ -74,4 +79,36 @@ int hvf_put_registers(CPUState *); int hvf_get_registers(CPUState *); void hvf_kick_vcpu_thread(CPUState *cpu); +struct hvf_sw_breakpoint { + vaddr pc; + vaddr saved_insn; + int use_count; + QTAILQ_ENTRY(hvf_sw_breakpoint) entry; +}; + +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, + vaddr pc); +int hvf_sw_breakpoints_active(CPUState *cpu); + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); +void hvf_arch_remove_all_hw_breakpoints(void); + +/* + * hvf_update_guest_debug: + * @cs: CPUState for the CPU to update + * + * Update guest to enable or disable debugging. Per-arch specifics will be + * handled by calling down to hvf_arch_update_guest_debug. + */ +int hvf_update_guest_debug(CPUState *cpu); +void hvf_arch_update_guest_debug(CPUState *cpu); + +/* + * Return whether the guest supports debugging. + */ +bool hvf_arch_supports_guest_debug(void); + #endif diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h index 380e9e6..fa9228d 100644 --- a/include/system/hw_accel.h +++ b/include/system/hw_accel.h @@ -17,9 +17,26 @@ #include "system/whpx.h" #include "system/nvmm.h" +/** + * cpu_synchronize_state: + * cpu_synchronize_pre_loadvm: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers from the hardware accelerator + * (the hardware accelerator is the reference). + */ void cpu_synchronize_state(CPUState *cpu); +void cpu_synchronize_pre_loadvm(CPUState *cpu); + +/** + * cpu_synchronize_post_reset: + * cpu_synchronize_post_init: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers to the hardware accelerator + * (QEMU is the reference). + */ void cpu_synchronize_post_reset(CPUState *cpu); void cpu_synchronize_post_init(CPUState *cpu); -void cpu_synchronize_pre_loadvm(CPUState *cpu); #endif /* QEMU_HW_ACCEL_H */ diff --git a/include/system/igvm-cfg.h b/include/system/igvm-cfg.h new file mode 100644 index 0000000..944f23a --- /dev/null +++ b/include/system/igvm-cfg.h @@ -0,0 +1,49 @@ +/* + * QEMU IGVM interface + * + * Copyright (C) 2024 SUSE + * + * Authors: + * Roy Hopkins <roy.hopkins@randomman.co.uk> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_IGVM_CFG_H +#define QEMU_IGVM_CFG_H + +#include "qom/object.h" + +typedef struct IgvmCfg { + ObjectClass parent_class; + + /* + * filename: Filename that specifies a file that contains the configuration + * of the guest in Independent Guest Virtual Machine (IGVM) + * format. + */ + char *filename; +} IgvmCfg; + +typedef struct IgvmCfgClass { + ObjectClass parent_class; + + /* + * If an IGVM filename has been specified then process the IGVM file. + * Performs a no-op if no filename has been specified. + * If onlyVpContext is true then only the IGVM_VHT_VP_CONTEXT entries + * in the IGVM file will be processed, allowing information about the + * CPU state to be determined before processing the entire file. + * + * Returns 0 for ok and -1 on error. + */ + int (*process)(IgvmCfg *cfg, ConfidentialGuestSupport *cgs, + bool onlyVpContext, Error **errp); + +} IgvmCfgClass; + +#define TYPE_IGVM_CFG "igvm-cfg" + +OBJECT_DECLARE_TYPE(IgvmCfg, IgvmCfgClass, IGVM_CFG) + +#endif diff --git a/include/system/iommufd.h b/include/system/iommufd.h index cbab75b..c9c72ff 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -32,6 +32,7 @@ struct IOMMUFDBackend { /*< protected >*/ int fd; /* /dev/iommu file descriptor */ bool owned; /* is the /dev/iommu opened internally */ + Error *cpr_blocker;/* set if be does not support CPR */ uint32_t users; /*< public >*/ @@ -43,6 +44,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be); bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, Error **errp); void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, int fd, + unsigned long start, bool readonly); int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, @@ -61,6 +65,63 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, uint64_t iova, ram_addr_t size, uint64_t page_size, uint64_t *data, Error **errp); +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp); + +bool iommufd_change_process_capable(IOMMUFDBackend *be); +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp); #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" +OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, + HOST_IOMMU_DEVICE_IOMMUFD) + +/* Overload of the host IOMMU device for the iommufd backend */ +struct HostIOMMUDeviceIOMMUFD { + HostIOMMUDevice parent_obj; + + IOMMUFDBackend *iommufd; + uint32_t devid; + uint32_t hwpt_id; +}; + +struct HostIOMMUDeviceIOMMUFDClass { + HostIOMMUDeviceClass parent_class; + + /** + * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @hwpt_id: ID of IOMMUFD hardware page table. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id, + Error **errp); + /** + * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp); +}; + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp); +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp); #endif diff --git a/include/system/kvm.h b/include/system/kvm.h index b690dda..3c7d314 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; extern bool kvm_msi_use_devid; +extern bool kvm_pre_fault_memory_supported; #define kvm_enabled() (kvm_allowed) /** @@ -194,6 +195,7 @@ bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_max_nested_state_length(void); int kvm_has_gsi_routing(void); +void kvm_close(void); /** * kvm_arm_supports_user_irq @@ -316,14 +318,6 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); bool kvm_device_supported(int vmfd, uint64_t type); /** - * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU - * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. - * - * @returns: 0 when success, errno (<0) when failed. - */ -int kvm_create_vcpu(CPUState *cpu); - -/** * kvm_park_vcpu - Park QEMU KVM vCPU context * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. * @@ -376,6 +370,7 @@ int kvm_arch_get_default_type(MachineState *ms); int kvm_arch_init(MachineState *ms, KVMState *s); +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp); int kvm_arch_init_vcpu(CPUState *cpu); int kvm_arch_destroy_vcpu(CPUState *cpu); diff --git a/include/system/kvm_int.h b/include/system/kvm_int.h index 756a3c0..9247493 100644 --- a/include/system/kvm_int.h +++ b/include/system/kvm_int.h @@ -14,6 +14,7 @@ #include "qemu/accel.h" #include "qemu/queue.h" #include "system/kvm.h" +#include "accel/accel-ops.h" #include "hw/boards.h" #include "hw/i386/topology.h" #include "io/channel-socket.h" diff --git a/include/system/memory.h b/include/system/memory.h index fbbf4cf..e2cd6ed 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -19,7 +19,6 @@ #include "exec/memattrs.h" #include "exec/memop.h" #include "exec/ramlist.h" -#include "exec/tswap.h" #include "qemu/bswap.h" #include "qemu/queue.h" #include "qemu/int128.h" @@ -109,15 +108,34 @@ struct MemoryRegionSection { typedef struct IOMMUTLBEntry IOMMUTLBEntry; -/* See address_space_translate: bit 0 is read, bit 1 is write. */ +/* + * See address_space_translate: + * - bit 0 : read + * - bit 1 : write + * - bit 2 : exec + * - bit 3 : priv + * - bit 4 : global + * - bit 5 : untranslated only + */ typedef enum { IOMMU_NONE = 0, IOMMU_RO = 1, IOMMU_WO = 2, IOMMU_RW = 3, + IOMMU_EXEC = 4, + IOMMU_PRIV = 8, + IOMMU_GLOBAL = 16, + IOMMU_UNTRANSLATED_ONLY = 32, } IOMMUAccessFlags; -#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) +#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | \ + ((w) ? IOMMU_WO : 0)) +#define IOMMU_ACCESS_FLAG_FULL(r, w, x, p, g, uo) \ + (IOMMU_ACCESS_FLAG(r, w) | \ + ((x) ? IOMMU_EXEC : 0) | \ + ((p) ? IOMMU_PRIV : 0) | \ + ((g) ? IOMMU_GLOBAL : 0) | \ + ((uo) ? IOMMU_UNTRANSLATED_ONLY : 0)) struct IOMMUTLBEntry { AddressSpace *target_as; @@ -125,6 +143,7 @@ struct IOMMUTLBEntry { hwaddr translated_addr; hwaddr addr_mask; /* 0xfff = 4k translation */ IOMMUAccessFlags perm; + uint32_t pasid; }; /* @@ -183,6 +202,7 @@ struct IOMMUNotifier { hwaddr start; hwaddr end; int iommu_idx; + void *opaque; QLIST_ENTRY(IOMMUNotifier) node; }; typedef struct IOMMUNotifier IOMMUNotifier; @@ -575,8 +595,20 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl, rdl->double_discard_supported = double_discard_supported; } -typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); -typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); +/** + * typedef ReplayRamDiscardState: + * + * The callback handler for #RamDiscardManagerClass.replay_populated/ + * #RamDiscardManagerClass.replay_discarded to invoke on populated/discarded + * parts. + * + * @section: the #MemoryRegionSection of populated/discarded part + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if failed. + */ +typedef int (*ReplayRamDiscardState)(MemoryRegionSection *section, + void *opaque); /* * RamDiscardManagerClass: @@ -650,36 +682,38 @@ struct RamDiscardManagerClass { /** * @replay_populated: * - * Call the #ReplayRamPopulate callback for all populated parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * Call the #ReplayRamDiscardState callback for all populated parts within + * the #MemoryRegionSection via the #RamDiscardManager. * * In case any call fails, no further calls are made. * * @rdm: the #RamDiscardManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamPopulate callback + * @replay_fn: the #ReplayRamDiscardState callback * @opaque: pointer to forward to the callback * * Returns 0 on success, or a negative error if any notification failed. */ int (*replay_populated)(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, void *opaque); + ReplayRamDiscardState replay_fn, void *opaque); /** * @replay_discarded: * - * Call the #ReplayRamDiscard callback for all discarded parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * Call the #ReplayRamDiscardState callback for all discarded parts within + * the #MemoryRegionSection via the #RamDiscardManager. * * @rdm: the #RamDiscardManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamDiscard callback + * @replay_fn: the #ReplayRamDiscardState callback * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. */ - void (*replay_discarded)(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, void *opaque); + int (*replay_discarded)(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, void *opaque); /** * @register_listener: @@ -720,15 +754,41 @@ uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, const MemoryRegionSection *section); +/** + * ram_discard_manager_replay_populated: + * + * A wrapper to call the #RamDiscardManagerClass.replay_populated callback + * of the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscardState callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque); -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque); +/** + * ram_discard_manager_replay_discarded: + * + * A wrapper to call the #RamDiscardManagerClass.replay_discarded callback + * of the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscardState callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ +int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque); void ram_discard_manager_register_listener(RamDiscardManager *rdm, RamDiscardListener *rdl, @@ -738,21 +798,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, RamDiscardListener *rdl); /** - * memory_get_xlat_addr: Extract addresses from a TLB entry + * memory_translate_iotlb: Extract addresses from a TLB entry. + * Called with rcu_read_lock held. * * @iotlb: pointer to an #IOMMUTLBEntry - * @vaddr: virtual address - * @ram_addr: RAM address - * @read_only: indicates if writes are allowed - * @mr_has_discard_manager: indicates memory is controlled by a - * RamDiscardManager + * @xlat_p: return the offset of the entry from the start of the returned + * MemoryRegion. * @errp: pointer to Error*, to store an error if it happens. * - * Return: true on success, else false setting @errp with error. + * Return: On success, return the MemoryRegion containing the @iotlb translated + * addr. The MemoryRegion must not be accessed after rcu_read_unlock. + * On failure, return NULL, setting @errp with error. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp); +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp); typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; @@ -1212,6 +1271,36 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s); void memory_region_section_free_copy(MemoryRegionSection *s); /** + * memory_region_section_intersect_range: Adjust the memory section to cover + * the intersection with the given range. + * + * @s: the #MemoryRegionSection to be adjusted + * @offset: the offset of the given range in the memory region + * @size: the size of the given range + * + * Returns false if the intersection is empty, otherwise returns true. + */ +static inline bool memory_region_section_intersect_range(MemoryRegionSection *s, + uint64_t offset, + uint64_t size) +{ + uint64_t start = MAX(s->offset_within_region, offset); + Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region), + s->size), + int128_add(int128_make64(offset), + int128_make64(size))); + + if (int128_le(end, int128_make64(start))) { + return false; + } + + s->offset_within_address_space += start - s->offset_within_region; + s->offset_within_region = start; + s->size = int128_sub(end, int128_make64(start)); + return true; +} + +/** * memory_region_init: Initialize a memory region * * The region typically acts as a container for other memory regions. Use @@ -2469,13 +2558,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) * * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion * that does not cover RAM, or a #MemoryRegion that already has a - * #RamDiscardManager assigned. + * #RamDiscardManager assigned. Return 0 if the rdm is set successfully. * * @mr: the #MemoryRegion * @rdm: #RamDiscardManager to set */ -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm); +int memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm); /** * memory_region_find: translate an address/size relative to a diff --git a/include/system/nvmm.h b/include/system/nvmm.h index 6971ddb..7390def 100644 --- a/include/system/nvmm.h +++ b/include/system/nvmm.h @@ -13,17 +13,18 @@ #define QEMU_NVMM_H #ifdef COMPILING_PER_TARGET - -#ifdef CONFIG_NVMM - -int nvmm_enabled(void); - -#else /* CONFIG_NVMM */ - -#define nvmm_enabled() (0) - -#endif /* CONFIG_NVMM */ - +# ifdef CONFIG_NVMM +# define CONFIG_NVMM_IS_POSSIBLE +# endif /* !CONFIG_NVMM */ +#else +# define CONFIG_NVMM_IS_POSSIBLE #endif /* COMPILING_PER_TARGET */ +#ifdef CONFIG_NVMM_IS_POSSIBLE +extern bool nvmm_allowed; +#define nvmm_enabled() (nvmm_allowed) +#else /* !CONFIG_NVMM_IS_POSSIBLE */ +#define nvmm_enabled() 0 +#endif /* !CONFIG_NVMM_IS_POSSIBLE */ + #endif /* QEMU_NVMM_H */ diff --git a/include/system/os-wasm.h b/include/system/os-wasm.h new file mode 100644 index 0000000..3abb3aa --- /dev/null +++ b/include/system/os-wasm.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: MIT */ +/* + * posix specific declarations forked from os-posix.h, removing functions not + * working on Emscripten + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_OS_WASM_H +#define QEMU_OS_WASM_H + +#include <sys/mman.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <sys/un.h> + +#ifdef CONFIG_SYSMACROS +#include <sys/sysmacros.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +void os_set_line_buffering(void); +void os_setup_early_signal_handling(void); +void os_set_proc_name(const char *s); +void os_setup_signal_handling(void); +void os_setup_limits(void); +void os_setup_post(void); +int os_mlock(bool on_fault); +static inline int os_set_daemonize(bool d) +{ + return -1; +}; +bool is_daemonized(void); +static inline void os_daemonize(void) {} + +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + +/* POSIX and Mingw32 differ in the name of the stdio lock functions. */ + +static inline void qemu_flockfile(FILE *f) +{ + flockfile(f); +} + +static inline void qemu_funlockfile(FILE *f) +{ + funlockfile(f); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/system/ram_addr.h b/include/system/ram_addr.h index b4e4425..15a1b1a 100644 --- a/include/system/ram_addr.h +++ b/include/system/ram_addr.h @@ -24,7 +24,6 @@ #include "exec/cputlb.h" #include "exec/ramlist.h" #include "system/ramblock.h" -#include "exec/exec-all.h" #include "system/memory.h" #include "exec/target_page.h" #include "qemu/rcu.h" diff --git a/include/system/ramblock.h b/include/system/ramblock.h index d8a116b..87e847e 100644 --- a/include/system/ramblock.h +++ b/include/system/ramblock.h @@ -22,6 +22,10 @@ #include "exec/cpu-common.h" #include "qemu/rcu.h" #include "exec/ramlist.h" +#include "system/hostmem.h" + +#define TYPE_RAM_BLOCK_ATTRIBUTES "ram-block-attributes" +OBJECT_DECLARE_SIMPLE_TYPE(RamBlockAttributes, RAM_BLOCK_ATTRIBUTES) struct RAMBlock { struct rcu_head rcu; @@ -42,6 +46,7 @@ struct RAMBlock { int fd; uint64_t fd_offset; int guest_memfd; + RamBlockAttributes *attributes; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; @@ -91,4 +96,21 @@ struct RAMBlock { ram_addr_t postcopy_length; }; +struct RamBlockAttributes { + Object parent; + + RAMBlock *ram_block; + + /* 1-setting of the bitmap represents ram is populated (shared) */ + unsigned bitmap_size; + unsigned long *bitmap; + + QLIST_HEAD(, RamDiscardListener) rdl_list; +}; + +RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block); +void ram_block_attributes_destroy(RamBlockAttributes *attr); +int ram_block_attributes_state_change(RamBlockAttributes *attr, uint64_t offset, + uint64_t size, bool to_discard); + #endif diff --git a/include/system/runstate.h b/include/system/runstate.h index bffc371..929379a 100644 --- a/include/system/runstate.h +++ b/include/system/runstate.h @@ -12,29 +12,76 @@ bool runstate_needs_reset(void); void runstate_replay_enable(void); typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); +typedef int VMChangeStateHandlerWithRet(void *opaque, bool running, RunState state); +/** + * qemu_add_vm_change_state_handler: + * @cb: the callback to invoke + * @opaque: user data passed to the callback + * + * Register a callback function that is invoked when the vm starts or stops + * running. + * + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() + */ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, void *opaque); +/** + * qemu_add_vm_change_state_handler_prio: + * @cb: the callback to invoke + * @opaque: user data passed to the callback + * @priority: low priorities execute first when the vm runs and the reverse is + * true when the vm stops + * + * Register a callback function that is invoked when the vm starts or stops + * running. + * + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() + */ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateHandler *cb, void *opaque, int priority); VMChangeStateEntry * +/** + * qemu_add_vm_change_state_handler_prio_full: + * @cb: the main callback to invoke + * @prepare_cb: a callback to invoke before the main callback + * @cb_ret: the main callback to invoke with return value + * @opaque: user data passed to the callbacks + * @priority: low priorities execute first when the vm runs and the reverse is + * true when the vm stops + * + * Register a main callback function and an optional prepare callback function + * that are invoked when the vm starts or stops running. The main callback and + * the prepare callback are called in two separate phases: First all prepare + * callbacks are called and only then all main callbacks are called. As its + * name suggests, the prepare callback can be used to do some preparatory work + * before invoking the main callback. + * + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() + */ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque, int priority); VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateHandler *cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque); VMChangeStateEntry *qdev_add_vm_change_state_handler_full( - DeviceState *dev, VMChangeStateHandler *cb, - VMChangeStateHandler *prepare_cb, void *opaque); + DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque); void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); /** * vm_state_notify: Notify the state of the VM * * @running: whether the VM is running or not. * @state: the #RunState of the VM. + * + * Return the result of the callback which has return value. + * If no callback has return value, still return 0 and the + * upper layer should not do additional processing. */ -void vm_state_notify(bool running, RunState state); +int vm_state_notify(bool running, RunState state); static inline bool shutdown_caused_by_guest(ShutdownCause cause) { @@ -100,6 +147,7 @@ void qemu_system_vmstop_request(RunState reason); void qemu_system_vmstop_request_prepare(void); bool qemu_vmstop_requested(RunState *r); ShutdownCause qemu_shutdown_requested_get(void); +bool qemu_force_shutdown_requested(void); ShutdownCause qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); void qemu_system_reset(ShutdownCause reason); diff --git a/include/system/vhost-user-backend.h b/include/system/vhost-user-backend.h index 5ed953c..5634ebd 100644 --- a/include/system/vhost-user-backend.h +++ b/include/system/vhost-user-backend.h @@ -43,6 +43,6 @@ struct VhostUserBackend { int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, unsigned nvqs, Error **errp); void vhost_user_backend_start(VhostUserBackend *b); -void vhost_user_backend_stop(VhostUserBackend *b); +int vhost_user_backend_stop(VhostUserBackend *b); #endif diff --git a/include/system/whpx.h b/include/system/whpx.h index 00ff409..00f6a3e 100644 --- a/include/system/whpx.h +++ b/include/system/whpx.h @@ -16,19 +16,20 @@ #define QEMU_WHPX_H #ifdef COMPILING_PER_TARGET +# ifdef CONFIG_WHPX +# define CONFIG_WHPX_IS_POSSIBLE +# endif /* !CONFIG_WHPX */ +#else +# define CONFIG_WHPX_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ -#ifdef CONFIG_WHPX - -int whpx_enabled(void); +#ifdef CONFIG_WHPX_IS_POSSIBLE +extern bool whpx_allowed; +#define whpx_enabled() (whpx_allowed) bool whpx_apic_in_platform(void); - -#else /* CONFIG_WHPX */ - -#define whpx_enabled() (0) +#else /* !CONFIG_WHPX_IS_POSSIBLE */ +#define whpx_enabled() 0 #define whpx_apic_in_platform() (0) - -#endif /* CONFIG_WHPX */ - -#endif /* COMPILING_PER_TARGET */ +#endif /* !CONFIG_WHPX_IS_POSSIBLE */ #endif /* QEMU_WHPX_H */ |