From 3df4c28860e100011db3a51a8a331506a3fe51f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 8 Apr 2024 17:53:18 +0200 Subject: util/qemu-config: Extract QMP commands to qemu-config-qmp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QMP is irrelevant for user emulation. Extract the code related to QMP in a different source file, which won't be build for user emulation binaries. This avoid pulling pointless code. Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20240404194757.9343-5-philmd@linaro.org> Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-7-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/config-file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h index b82a778..51b310f 100644 --- a/include/qemu/config-file.h +++ b/include/qemu/config-file.h @@ -8,6 +8,9 @@ QemuOptsList *qemu_find_opts(const char *group); QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); QemuOpts *qemu_find_opts_singleton(const char *group); +extern QemuOptsList *vm_config_groups[]; +extern QemuOptsList *drive_config_groups[]; + void qemu_add_opts(QemuOptsList *list); void qemu_add_drive_opts(QemuOptsList *list); int qemu_global_option(const char *str); -- cgit v1.1 From d4d0ebb7da0489a2fff8c7150a38fa897ca3eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:21 +0200 Subject: stubs: remove obsolete stubs These file define functions are are not called from common code anymore. Delete those functions and, if applicable, the entire files. Signed-off-by: Paolo Bonzini Acked-by: Richard Henderson Message-ID: <20240408155330.522792-10-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- include/sysemu/sysemu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index eb1dc1e..5b4397e 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -71,8 +71,6 @@ Chardev *serial_hd(int i); extern Chardev *parallel_hds[MAX_PARALLEL_PORTS]; -void hmp_info_usb(Monitor *mon, const QDict *qdict); - void add_boot_device_path(int32_t bootindex, DeviceState *dev, const char *suffix); char *get_boot_devices_list(size_t *size); -- cgit v1.1 From 1e1e48792a92652af05f59b4df2f643542c71d90 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Mar 2024 19:29:07 +0100 Subject: kvm: use configs/ definition to conditionalize debug support If an architecture adds support for KVM_CAP_SET_GUEST_DEBUG but QEMU does not have the necessary code, QEMU will fail to build after updating kernel headers. Avoid this by using a #define in config-target.h instead of KVM_CAP_SET_GUEST_DEBUG. Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 2 +- include/sysemu/kvm_int.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index fad9a7e..2cba899 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -224,7 +224,7 @@ void kvm_flush_coalesced_mmio_buffer(void); * calling down to kvm_arch_update_guest_debug after the generic * fields have been set. */ -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); #else static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 882e37e..94488d2 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -78,7 +78,7 @@ struct KVMState struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; bool coalesced_flush_in_progress; int vcpu_events; -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; #endif int max_nested_state_len; -- cgit v1.1 From 85fa9acda887438feb0711970bb528959a37568e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Mar 2024 15:01:51 +0100 Subject: hw: Add compat machines for 9.1 Add 9.1 machine types for arm/i440fx/m68k/q35/s390x/spapr. Reviewed-by: Cornelia Huck Acked-by: Thomas Huth Reviewed-by: Harsh Prateek Bora Reviewed-by: Zhao Liu Cc: Gavin Shan Signed-off-by: Paolo Bonzini --- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/hw/boards.h b/include/hw/boards.h index 8b8f6d5..50e0cf4 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -425,6 +425,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_9_0[]; +extern const size_t hw_compat_9_0_len; + extern GlobalProperty hw_compat_8_2[]; extern const size_t hw_compat_8_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 27a6807..349f79d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -198,6 +198,9 @@ void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size); /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_9_0[]; +extern const size_t pc_compat_9_0_len; + extern GlobalProperty pc_compat_8_2[]; extern const size_t pc_compat_8_2_len; -- cgit v1.1 From b07bf7b73fd02d24a7baa64a580f4974b86bbc86 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 20 Mar 2024 03:39:14 -0500 Subject: q35: Introduce smm_ranges property for q35-pci-host Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, etc... exist for the target platform. TDX doesn't support SMM and doesn't play nice with QEMU modifying related guest memory ranges. Signed-off-by: Isaku Yamahata Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-19-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 1 + include/hw/pci-host/q35.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 349f79d..e522909 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -161,6 +161,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" +#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" void pc_pci_as_mapping_init(MemoryRegion *system_memory, diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index bafcbe6..22fadfa 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -50,6 +50,7 @@ struct MCHPCIState { MemoryRegion tseg_blackhole, tseg_window; MemoryRegion smbase_blackhole, smbase_window; bool has_smram_at_smbase; + bool has_smm_ranges; Range pci_hole; uint64_t below_4g_mem_size; uint64_t above_4g_mem_size; -- cgit v1.1 From 41a605944e3fecae43ca18ded95ec31f28e0c7fe Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:35 -0500 Subject: confidential guest support: Add kvm_init() and kvm_reset() in class Different confidential VMs in different architectures all have the same needs to do their specific initialization (and maybe resetting) stuffs with KVM. Currently each of them exposes individual *_kvm_init() functions and let machine code or kvm code to call it. To facilitate the introduction of confidential guest technology from different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() in ConfidentialGuestSupportClass, and expose two helpers functions for invodking them. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index ba2dd4b..e5b188c 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -23,7 +23,10 @@ #include "qom/object.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" -OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + ConfidentialGuestSupportClass, + CONFIDENTIAL_GUEST_SUPPORT) + struct ConfidentialGuestSupport { Object parent; @@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { typedef struct ConfidentialGuestSupportClass { ObjectClass parent; + + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); } ConfidentialGuestSupportClass; +static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_init) { + return klass->kvm_init(cgs, errp); + } + + return 0; +} + +static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_reset) { + return klass->kvm_reset(cgs, errp); + } + + return 0; +} + #endif /* !CONFIG_USER_ONLY */ #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ -- cgit v1.1 From 00a238b1a845fd5f0acd771664c5e184a63ed9b6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:37 -0500 Subject: ppc/pef: switch to use confidential_guest_kvm_init/reset() Use the unified interface to call confidential guest related kvm_init() and kvm_reset(), to avoid exposing pef specific functions. As a bonus, pef.h goes away since there is no direct call from sPAPR board code to PEF code anymore. Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- include/hw/ppc/pef.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/hw/ppc/pef.h (limited to 'include') diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h deleted file mode 100644 index 707dbe5..0000000 --- a/include/hw/ppc/pef.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * PEF (Protected Execution Facility) for POWER support - * - * Copyright Red Hat. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef HW_PPC_PEF_H -#define HW_PPC_PEF_H - -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); - -#endif /* HW_PPC_PEF_H */ -- cgit v1.1 From ab0c7fb22b56523f24d6e127cd4d10ecff67bf85 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Apr 2024 11:46:47 +0200 Subject: linux-headers: update to current kvm/next Signed-off-by: Paolo Bonzini --- include/standard-headers/asm-x86/bootparam.h | 17 +-- include/standard-headers/asm-x86/kvm_para.h | 3 +- include/standard-headers/asm-x86/setup_data.h | 83 +++++++++++ include/standard-headers/linux/ethtool.h | 48 +++++++ include/standard-headers/linux/fuse.h | 39 +++++- include/standard-headers/linux/input-event-codes.h | 1 + include/standard-headers/linux/virtio_gpu.h | 2 + include/standard-headers/linux/virtio_pci.h | 10 +- include/standard-headers/linux/virtio_snd.h | 154 +++++++++++++++++++++ 9 files changed, 332 insertions(+), 25 deletions(-) create mode 100644 include/standard-headers/asm-x86/setup_data.h (limited to 'include') diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h index 0b06d2b..b582a10 100644 --- a/include/standard-headers/asm-x86/bootparam.h +++ b/include/standard-headers/asm-x86/bootparam.h @@ -2,21 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data/setup_indirect types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 -#define SETUP_APPLE_PROPERTIES 5 -#define SETUP_JAILHOUSE 6 -#define SETUP_CC_BLOB 7 -#define SETUP_IMA 8 -#define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED - -#define SETUP_INDIRECT (1<<31) -#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) +#include "standard-headers/asm-x86/setup_data.h" /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -38,6 +24,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index f0235e5..9a011d2 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -92,7 +92,7 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) /* MSR_KVM_ASYNC_PF_INT */ -#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) /* MSR_KVM_MIGRATION_CONTROL */ #define KVM_MIGRATION_READY (1 << 0) @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { uint32_t token; uint8_t pad[56]; - uint32_t enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h new file mode 100644 index 0000000..09355f5 --- /dev/null +++ b/include/standard-headers/asm-x86/setup_data.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_SETUP_DATA_H +#define _ASM_X86_SETUP_DATA_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_ENUM_MAX SETUP_RNG_SEED + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +#ifndef __ASSEMBLY__ + +#include "standard-headers/linux/types.h" + +/* extensible setup data list node */ +struct setup_data { + uint64_t next; + uint32_t type; + uint32_t len; + uint8_t data[]; +}; + +/* extensible setup indirect data node */ +struct setup_indirect { + uint32_t type; + uint32_t reserved; /* Reserved, must be set to zero. */ + uint64_t len; + uint64_t addr; +}; + +/* + * The E820 memory region entry of the boot protocol ABI: + */ +struct boot_e820_entry { + uint64_t addr; + uint64_t size; + uint32_t type; +} QEMU_PACKED; + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + struct { + uint16_t version; + uint16_t compatible_version; + } QEMU_PACKED hdr; + struct { + uint16_t pm_timer_address; + uint16_t num_cpus; + uint64_t pci_mmconfig_base; + uint32_t tsc_khz; + uint32_t apic_khz; + uint8_t standard_ioapic; + uint8_t cpu_ids[255]; + } QEMU_PACKED v1; + struct { + uint32_t flags; + } QEMU_PACKED v2; +} QEMU_PACKED; + +/* + * IMA buffer setup data information from the previous kernel during kexec + */ +struct ima_setup_data { + uint64_t addr; + uint64_t size; +} QEMU_PACKED; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index dfb54ef..0150378 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ + +/* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ +#define GTPU_V4_FLOW 0x13 /* hash only */ +#define GTPU_V6_FLOW 0x14 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ +#define GTPC_V4_FLOW 0x15 /* hash only */ +#define GTPC_V6_FLOW 0x16 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ +#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ +#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + +/* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ +#define GTPU_EH_V4_FLOW 0x19 /* hash only */ +#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + +/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ +#define GTPU_UL_V4_FLOW 0x1b /* hash only */ +#define GTPU_UL_V6_FLOW 0x1c /* hash only */ +#define GTPU_DL_V4_FLOW 0x1d /* hash only */ +#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 @@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_IP_DST (1 << 5) #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index fc0dcd1..bac9dbc 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -211,6 +211,12 @@ * 7.39 * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures + * + * 7.40 + * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag + * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag + * - add FUSE_NO_EXPORT_SUPPORT init flag + * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag */ #ifndef _LINUX_FUSE_H @@ -242,7 +248,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 39 +#define FUSE_KERNEL_MINOR_VERSION 40 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -349,6 +355,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_PASSTHROUGH: passthrough read/write io for this open file */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -357,6 +364,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_PASSTHROUGH (1 << 7) /** * INIT request/reply flags @@ -406,6 +414,9 @@ struct fuse_file_lock { * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support + * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit + * of the request ID indicates resend requests */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -445,6 +456,9 @@ struct fuse_file_lock { #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) +#define FUSE_PASSTHROUGH (1ULL << 37) +#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) +#define FUSE_HAS_RESEND (1ULL << 39) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP @@ -631,6 +645,7 @@ enum fuse_notify_code { FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_CODE_MAX, }; @@ -757,7 +772,7 @@ struct fuse_create_in { struct fuse_open_out { uint64_t fh; uint32_t open_flags; - uint32_t padding; + int32_t backing_id; }; struct fuse_release_in { @@ -873,7 +888,8 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint32_t unused[6]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -956,6 +972,14 @@ struct fuse_fallocate_in { uint32_t padding; }; +/** + * FUSE request unique ID flag + * + * Indicates whether this is a resend request. The receiver should handle this + * request accordingly. + */ +#define FUSE_UNIQUE_RESEND (1ULL << 63) + struct fuse_in_header { uint32_t len; uint32_t opcode; @@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_backing_map { + int32_t fd; + uint32_t flags; + uint64_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ + struct fuse_backing_map) +#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index f6bab08..2221b0c 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 2da48d3..2db643e 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 +/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_VENUS 4 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index 3e2bc2c..4010216 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 -struct QEMU_PACKED virtio_admin_cmd_hdr { +struct virtio_admin_cmd_hdr { uint16_t opcode; /* * 1 - SR-IOV @@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { uint64_t group_member_id; }; -struct QEMU_PACKED virtio_admin_cmd_status { +struct virtio_admin_cmd_status { uint16_t status; uint16_t status_qualifier; /* Unused, reserved for future extensions. */ uint8_t reserved2[4]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { +struct virtio_admin_cmd_legacy_wr_data { uint8_t offset; /* Starting offset of the register(s) to write. */ uint8_t reserved[7]; uint8_t registers[]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { +struct virtio_admin_cmd_legacy_rd_data { uint8_t offset; /* Starting offset of the register(s) to read. */ }; @@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 -struct QEMU_PACKED virtio_admin_cmd_notify_info_data { +struct virtio_admin_cmd_notify_info_data { uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ uint8_t bar; /* BAR of the member or the owner device */ uint8_t padding[6]; diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h index 1af96b9..860f12e 100644 --- a/include/standard-headers/linux/virtio_snd.h +++ b/include/standard-headers/linux/virtio_snd.h @@ -8,6 +8,14 @@ #include "standard-headers/linux/virtio_types.h" /******************************************************************************* + * FEATURE BITS + */ +enum { + /* device supports control elements */ + VIRTIO_SND_F_CTLS = 0 +}; + +/******************************************************************************* * CONFIGURATION SPACE */ struct virtio_snd_config { @@ -17,6 +25,8 @@ struct virtio_snd_config { uint32_t streams; /* # of available channel maps */ uint32_t chmaps; + /* # of available control elements */ + uint32_t controls; }; enum { @@ -55,6 +65,15 @@ enum { /* channel map control request types */ VIRTIO_SND_R_CHMAP_INFO = 0x0200, + /* control element request types */ + VIRTIO_SND_R_CTL_INFO = 0x0300, + VIRTIO_SND_R_CTL_ENUM_ITEMS, + VIRTIO_SND_R_CTL_READ, + VIRTIO_SND_R_CTL_WRITE, + VIRTIO_SND_R_CTL_TLV_READ, + VIRTIO_SND_R_CTL_TLV_WRITE, + VIRTIO_SND_R_CTL_TLV_COMMAND, + /* jack event types */ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, VIRTIO_SND_EVT_JACK_DISCONNECTED, @@ -63,6 +82,9 @@ enum { VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, VIRTIO_SND_EVT_PCM_XRUN, + /* control element event types */ + VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, + /* common status codes */ VIRTIO_SND_S_OK = 0x8000, VIRTIO_SND_S_BAD_MSG, @@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; }; +/******************************************************************************* + * CONTROL ELEMENTS MESSAGES + */ +struct virtio_snd_ctl_hdr { + /* VIRTIO_SND_R_CTL_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint32_t control_id; +}; + +/* supported roles for control elements */ +enum { + VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, + VIRTIO_SND_CTL_ROLE_VOLUME, + VIRTIO_SND_CTL_ROLE_MUTE, + VIRTIO_SND_CTL_ROLE_GAIN +}; + +/* supported value types for control elements */ +enum { + VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, + VIRTIO_SND_CTL_TYPE_INTEGER, + VIRTIO_SND_CTL_TYPE_INTEGER64, + VIRTIO_SND_CTL_TYPE_ENUMERATED, + VIRTIO_SND_CTL_TYPE_BYTES, + VIRTIO_SND_CTL_TYPE_IEC958 +}; + +/* supported access rights for control elements */ +enum { + VIRTIO_SND_CTL_ACCESS_READ = 0, + VIRTIO_SND_CTL_ACCESS_WRITE, + VIRTIO_SND_CTL_ACCESS_VOLATILE, + VIRTIO_SND_CTL_ACCESS_INACTIVE, + VIRTIO_SND_CTL_ACCESS_TLV_READ, + VIRTIO_SND_CTL_ACCESS_TLV_WRITE, + VIRTIO_SND_CTL_ACCESS_TLV_COMMAND +}; + +struct virtio_snd_ctl_info { + /* common header */ + struct virtio_snd_info hdr; + /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ + uint32_t role; + /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ + uint32_t type; + /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ + uint32_t access; + /* # of members in the element value */ + uint32_t count; + /* index for an element with a non-unique name */ + uint32_t index; + /* name identifier string for the element */ + uint8_t name[44]; + /* additional information about the element's value */ + union { + /* VIRTIO_SND_CTL_TYPE_INTEGER */ + struct { + /* minimum supported value */ + uint32_t min; + /* maximum supported value */ + uint32_t max; + /* fixed step size for value (0 = variable size) */ + uint32_t step; + } integer; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ + struct { + /* minimum supported value */ + uint64_t min; + /* maximum supported value */ + uint64_t max; + /* fixed step size for value (0 = variable size) */ + uint64_t step; + } integer64; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ + struct { + /* # of options supported for value */ + uint32_t items; + } enumerated; + } value; +}; + +struct virtio_snd_ctl_enum_item { + /* option name */ + uint8_t item[64]; +}; + +struct virtio_snd_ctl_iec958 { + /* AES/IEC958 channel status bits */ + uint8_t status[24]; + /* AES/IEC958 subcode bits */ + uint8_t subcode[147]; + /* nothing */ + uint8_t pad; + /* AES/IEC958 subframe bits */ + uint8_t dig_subframe[4]; +}; + +struct virtio_snd_ctl_value { + union { + /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ + uint32_t integer[128]; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ + uint64_t integer64[64]; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ + uint32_t enumerated[128]; + /* VIRTIO_SND_CTL_TYPE_BYTES value */ + uint8_t bytes[512]; + /* VIRTIO_SND_CTL_TYPE_IEC958 value */ + struct virtio_snd_ctl_iec958 iec958; + } value; +}; + +/* supported event reason types */ +enum { + /* element's value has changed */ + VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, + /* element's information has changed */ + VIRTIO_SND_CTL_EVT_MASK_INFO, + /* element's metadata has changed */ + VIRTIO_SND_CTL_EVT_MASK_TLV +}; + +struct virtio_snd_ctl_event { + /* VIRTIO_SND_EVT_CTL_NOTIFY */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint16_t control_id; + /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ + uint16_t mask; +}; + #endif /* VIRTIO_SND_IF_H */ -- cgit v1.1 From 5c3131c392f84c660033d511ec39872d8beb4b1e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 14:41:10 -0400 Subject: KVM: track whether guest state is encrypted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the guest state is encrypted, in which case they do nothing. For the new API using VM types, instead, the ioctls will fail which is a safer and more robust approach. The new API will be the only one available for SEV-SNP and TDX, but it is also usable for SEV and SEV-ES. In preparation for that, require architecture-specific KVM code to communicate the point at which guest state is protected (which must be after kvm_cpu_synchronize_post_init(), though that might change in the future in order to suppor migration). From that point, skip reading registers so that cpu->vcpu_dirty is never true: if it ever becomes true, kvm_arch_put_registers() will fail miserably. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 2 ++ include/sysemu/kvm_int.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 2cba899..14b1ddb 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); +void kvm_mark_guest_state_protected(void); + /** * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page * reported for the VM. diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 94488d2..227b61f 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -87,6 +87,7 @@ struct KVMState bool kernel_irqchip_required; OnOffAuto kernel_irqchip_split; bool sync_mmu; + bool guest_state_protected; uint64_t manual_dirty_log_protect; /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as -- cgit v1.1 From a99c0c66ebe7d8db3af6f16689ade9375247e43e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 14:41:33 -0400 Subject: KVM: remove kvm_arch_cpu_check_are_resettable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Board reset requires writing a fresh CPU state. As far as KVM is concerned, the only thing that blocks reset is that CPU state is encrypted; therefore, kvm_cpus_are_resettable() can simply check if that is the case. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 14b1ddb..bd247f3 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); -/** - * kvm_cpu_check_are_resettable - return whether CPUs can be reset - * - * Returns: true: CPUs are resettable - * false: CPUs are not resettable - */ -bool kvm_cpu_check_are_resettable(void); - -bool kvm_arch_cpu_check_are_resettable(void); - bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); -- cgit v1.1 From 0811baed49010a9b651b8029ab6b9828b09a884f Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:06 -0500 Subject: kvm: Introduce support for memory_attributes Introduce the helper functions to set the attributes of a range of memory to private or shared. This is necessary to notify KVM the private/shared attribute of each gpa range. KVM needs the information to decide the GPA needs to be mapped at hva-based shared memory or guest_memfd based private memory. Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-11-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index bd247f3..594ae9b 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); * reported for the VM. */ bool kvm_hwpoisoned_mem(void); + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + #endif -- cgit v1.1 From 15f7a80c49cb3637f62fa37fa4a17da913bd91ff Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:02 -0500 Subject: RAMBlock: Add support of KVM private guest memfd Add KVM guest_memfd support to RAMBlock so both normal hva based memory and kvm guest memfd based private memory can be associated in one RAMBlock. Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to create private guest_memfd during RAMBlock setup. Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd is more flexible and extensible than simply relying on the VM type because in the future we may have the case that not all the memory of a VM need guest memfd. As a benefit, it also avoid getting MachineState in memory subsystem. Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of confidential guests, such as TDX VM. How and when to set it for memory backends will be implemented in the following patches. Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has KVM guest_memfd allocated. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-7-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 20 +++++++++++++++++--- include/exec/ram_addr.h | 2 +- include/exec/ramblock.h | 1 + include/sysemu/kvm.h | 2 ++ 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/exec/memory.h b/include/exec/memory.h index 8626a35..679a847 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { /* RAM FD is opened read-only */ #define RAM_READONLY_FD (1 << 11) +/* RAM can be private that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD (1 << 12) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. - * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, + * RAM_GUEST_MEMFD. * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the @@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, * (getpagesize()) will be used. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @path: the path in which to allocate the RAM. * @offset: offset within the file referenced by path * @errp: pointer to Error*, to store an error if it happens. @@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, * @size: size of the region. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @fd: the fd to mmap. * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. @@ -1723,6 +1727,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) bool memory_region_is_protected(MemoryRegion *mr); /** + * memory_region_has_guest_memfd: check whether a memory region has guest_memfd + * associated + * + * Returns %true if a memory region's ram_block has valid guest_memfd assigned. + * + * @mr: the memory region being queried + */ +bool memory_region_has_guest_memfd(MemoryRegion *mr); + +/** * memory_region_get_iommu: check whether a memory region is an iommu * * Returns pointer to IOMMUMemoryRegion if a memory region is an iommu, diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index de45ba7..07c8f86 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); * @mr: the memory region where the ram block is * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @mem_path or @fd: specify the backing file or device * @offset: Offset into target file * @errp: pointer to Error*, to store an error if it happens diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 848915e..459c891 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -41,6 +41,7 @@ struct RAMBlock { QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; int fd; uint64_t fd_offset; + int guest_memfd; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 594ae9b..217f3fe 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); */ bool kvm_hwpoisoned_mem(void); +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); -- cgit v1.1 From ce5a983233b4ca94ced88c9581014346509b5c71 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:05 -0500 Subject: kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that backend'ed both by hva-based shared memory and guest memfd based private memory. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-10-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/sysemu/kvm_int.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 227b61f..3f3d13f 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -30,6 +30,8 @@ typedef struct KVMSlot int as_id; /* Cache of the offset in ram address space */ ram_addr_t ram_start_offset; + int guest_memfd; + hwaddr guest_memfd_offset; } KVMSlot; typedef struct KVMMemoryUpdate { -- cgit v1.1 From 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:03 -0500 Subject: HostMem: Add mechanism to opt in kvm guest memfd via MachineState Add a new member "guest_memfd" to memory backends. When it's set to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm guest_memfd will be allocated during RAMBlock allocation. Memory backend's @guest_memfd is wired with @require_guest_memfd field of MachineState. It avoid looking up the machine in phymem.c. MachineState::require_guest_memfd is supposed to be set by any VMs that requires KVM guest memfd as private memory, e.g., TDX VM. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-8-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/hw/boards.h | 2 ++ include/sysemu/hostmem.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/hw/boards.h b/include/hw/boards.h index 50e0cf4..69c1ba4 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); +bool machine_require_guest_memfd(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, @@ -370,6 +371,7 @@ struct MachineState { char *dt_compatible; bool dump_guest_core; bool mem_merge; + bool require_guest_memfd; bool usb; bool usb_disabled; char *firmware; diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 0e411aa..04b884b 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -74,6 +74,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; + bool guest_memfd; uint32_t prealloc_threads; ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); -- cgit v1.1 From b2e9426c04fdd32d93a3a37db6b0c2e67c88c335 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:07 -0500 Subject: physmem: Introduce ram_block_discard_guest_memfd_range() When memory page is converted from private to shared, the original private memory is back'ed by guest_memfd. Introduce ram_block_discard_guest_memfd_range() for discarding memory in guest_memfd. Based on a patch by Isaku Yamahata . Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-12-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/exec/cpu-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 6346df1..6d53188 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length); #endif -- cgit v1.1 From c15e5684071d93174e446be318f49d8d59b15d6d Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:08 -0500 Subject: kvm: handle KVM_EXIT_MEMORY_FAULT Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory conversion on the RAMBlock to turn the memory into desired attribute, switching between private and shared. Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when KVM_EXIT_MEMORY_FAULT happens. Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has guest_memfd memory backend. Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is added. When page is converted from shared to private, the original shared memory can be discarded via ram_block_discard_range(). Note, shared memory can be discarded only when it's not back'ed by hugetlb because hugetlb is supposed to be pre-allocated and no need for discarding. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-13-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 217f3fe..47f9e8b 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); + #endif -- cgit v1.1