aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-10-16 12:34:17 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2023-10-16 12:34:17 -0400
commitbc2b89b38582b1cc7198428c9174fbbbf31245ad (patch)
treed15cdfa911a8b6d6eecec0d0ecaa1d1eed346075 /include
parent63011373ad22c794a013da69663c03f1297a5c56 (diff)
parentee6398d862c108f8136a26d93d26680f3d222a3a (diff)
downloadqemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.zip
qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.gz
qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.bz2
Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu into staging
Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo # vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j # SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV # JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 # rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ # NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ # rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht # oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF # fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH # M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD # m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk # mwPTB4UAk9g= # =s69t # -----END PGP SIGNATURE----- # gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu: virtio-mem: Mark memslot alias memory regions unmergeable memory,vhost: Allow for marking memory device memory regions unmergeable virtio-mem: Expose device memory dynamically via multiple memslots if enabled virtio-mem: Update state to match bitmap as soon as it's been migrated virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb memory: Clarify mapping requirements for RamDiscardManager memory-device,vhost: Support automatic decision on the number of memslots vhost: Add vhost_get_max_memslots() kvm: Add stub for kvm_get_max_memslots() memory-device,vhost: Support memory devices that dynamically consume memslots memory-device: Track required and actually used memslots in DeviceMemoryState stubs: Rename qmp_memory_device.c to memory_device.c memory-device: Support memory devices with multiple memslots vhost: Return number of free memslots kvm: Return number of free memslots softmmu/physmem: Fixup qemu_ram_block_from_host() documentation vhost: Remove vhost_backend_can_merge() callback vhost: Rework memslot filtering and fix "used_memslot" tracking Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/exec/cpu-common.h15
-rw-r--r--include/exec/memory.h27
-rw-r--r--include/hw/boards.h14
-rw-r--r--include/hw/mem/memory-device.h57
-rw-r--r--include/hw/virtio/vhost-backend.h9
-rw-r--r--include/hw/virtio/vhost.h3
-rw-r--r--include/hw/virtio/virtio-mem.h32
-rw-r--r--include/sysemu/kvm.h4
-rw-r--r--include/sysemu/kvm_int.h1
9 files changed, 148 insertions, 14 deletions
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 605b160..30c376a 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -83,6 +83,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
RAMBlock *qemu_ram_block_by_name(const char *name);
+
+/*
+ * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
+ *
+ * @ptr: The host pointer to translate.
+ * @round_offset: Whether to round the result offset down to a target page
+ * @offset: Will be set to the offset within the returned RAMBlock.
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore. If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the memory region that owns the RAMBlock.
+ */
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset);
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c99842d..653a32e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -95,6 +95,7 @@ struct ReservedRegion {
* relative to the region's address space
* @readonly: writes to this section are ignored
* @nonvolatile: this section is non-volatile
+ * @unmergeable: this section should not get merged with adjacent sections
*/
struct MemoryRegionSection {
Int128 size;
@@ -104,6 +105,7 @@ struct MemoryRegionSection {
hwaddr offset_within_address_space;
bool readonly;
bool nonvolatile;
+ bool unmergeable;
};
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
@@ -599,8 +601,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
* populated (consuming memory), to be used/accessed by the VM.
*
* A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
- * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
- * mapped.
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
*
* The #RamDiscardManager is intended to be used by technologies that are
* incompatible with discarding of RAM (e.g., VFIO, which may pin all
@@ -772,6 +775,7 @@ struct MemoryRegion {
bool nonvolatile;
bool rom_device;
bool flush_coalesced_mmio;
+ bool unmergeable;
uint8_t dirty_log_mask;
bool is_iommu;
RAMBlock *ram_block;
@@ -2350,6 +2354,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size);
void memory_region_set_alias_offset(MemoryRegion *mr,
hwaddr offset);
+/*
+ * memory_region_set_unmergeable: Set a memory region unmergeable
+ *
+ * Mark a memory region unmergeable, resulting in the memory region (or
+ * everything contained in a memory region container) not getting merged when
+ * simplifying the address space and notifying memory listeners. Consequently,
+ * memory listeners will never get notified about ranges that are larger than
+ * the original memory regions.
+ *
+ * This is primarily useful when multiple aliases to a RAM memory region are
+ * mapped into a memory region container, and updates (e.g., enable/disable or
+ * map/unmap) of individual memory region aliases are not supposed to affect
+ * other memory regions in the same container.
+ *
+ * @mr: the #MemoryRegion to be updated
+ * @unmergeable: whether to mark the #MemoryRegion unmergeable
+ */
+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable);
+
/**
* memory_region_present: checks if an address relative to a @container
* translates into #MemoryRegion within @container
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 55a64a1..43a56dc 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -297,15 +297,27 @@ struct MachineClass {
* DeviceMemoryState:
* @base: address in guest physical address space where the memory
* address space for memory devices starts
- * @mr: address space container for memory devices
+ * @mr: memory region container for memory devices
+ * @as: address space for memory devices
+ * @listener: memory listener used to track used memslots in the address space
* @dimm_size: the sum of plugged DIMMs' sizes
* @used_region_size: the part of @mr already used by memory devices
+ * @required_memslots: the number of memslots required by memory devices
+ * @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ * automatically decided to use more than
+ * one memslot
*/
typedef struct DeviceMemoryState {
hwaddr base;
MemoryRegion mr;
+ AddressSpace as;
+ MemoryListener listener;
uint64_t dimm_size;
uint64_t used_region_size;
+ unsigned int required_memslots;
+ unsigned int used_memslots;
+ unsigned int memslot_auto_decision_active;
} DeviceMemoryState;
/**
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 48d2611..3354d6c 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -14,6 +14,7 @@
#define MEMORY_DEVICE_H
#include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
#include "qapi/qapi-types-machine.h"
#include "qom/object.h"
@@ -41,6 +42,17 @@ typedef struct MemoryDeviceState MemoryDeviceState;
* successive memory regions are used, a covering memory region has to
* be provided. Scattered memory regions are not supported for single
* devices.
+ *
+ * The device memory region returned via @get_memory_region may either be a
+ * single RAM memory region or a memory region container with subregions
+ * that are RAM memory regions or aliases to RAM memory regions. Other
+ * memory regions or subregions are not supported.
+ *
+ * If the device memory region returned via @get_memory_region is a
+ * memory region container, it's supported to dynamically (un)map subregions
+ * as long as the number of memslots returned by @get_memslots() won't
+ * be exceeded and as long as all memory regions are of the same kind (e.g.,
+ * all RAM or all ROM).
*/
struct MemoryDeviceClass {
/* private */
@@ -89,6 +101,28 @@ struct MemoryDeviceClass {
MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
/*
+ * Optional: Instruct the memory device to decide how many memory slots
+ * it requires, not exceeding the given limit.
+ *
+ * Called exactly once when pre-plugging the memory device, before
+ * querying the number of memslots using @get_memslots the first time.
+ */
+ void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
+ /*
+ * Optional for memory devices that require only a single memslot,
+ * required for all other memory devices: Return the number of memslots
+ * (distinct RAM memory regions in the device memory region) that are
+ * required by the device.
+ *
+ * If this function is not implemented, the assumption is "1".
+ *
+ * Called when (un)plugging the memory device, to check if the requirements
+ * can be satisfied, and to do proper accounting.
+ */
+ unsigned int (*get_memslots)(MemoryDeviceState *md);
+
+ /*
* Optional: Return the desired minimum alignment of the device in guest
* physical address space. The final alignment is computed based on this
* alignment and the alignment requirements of the memory region.
@@ -105,8 +139,31 @@ struct MemoryDeviceClass {
MemoryDeviceInfo *info);
};
+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
MemoryDeviceInfoList *qmp_memory_device_list(void);
uint64_t get_plugged_memory_size(void);
+unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
const uint64_t *legacy_align, Error **errp);
void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 1860b54..96ccc18 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
char *mac_addr);
-typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
- uint64_t start1, uint64_t size1,
- uint64_t start2, uint64_t size2);
typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
uint64_t guest_cid);
typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
@@ -108,8 +105,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,
typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
uint64_t session_id);
-typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,
- MemoryRegionSection *section);
+typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);
typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
uint16_t queue_size,
@@ -138,6 +134,7 @@ typedef struct VhostOps {
vhost_backend_init vhost_backend_init;
vhost_backend_cleanup vhost_backend_cleanup;
vhost_backend_memslots_limit vhost_backend_memslots_limit;
+ vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;
vhost_net_set_backend_op vhost_net_set_backend;
vhost_net_set_mtu_op vhost_net_set_mtu;
vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
@@ -163,7 +160,6 @@ typedef struct VhostOps {
vhost_set_vring_enable_op vhost_set_vring_enable;
vhost_requires_shm_log_op vhost_requires_shm_log;
vhost_migration_done_op vhost_migration_done;
- vhost_backend_can_merge_op vhost_backend_can_merge;
vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
vhost_vsock_set_running_op vhost_vsock_set_running;
vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
@@ -172,7 +168,6 @@ typedef struct VhostOps {
vhost_set_config_op vhost_set_config;
vhost_crypto_create_session_op vhost_crypto_create_session;
vhost_crypto_close_session_op vhost_crypto_close_session;
- vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
vhost_get_inflight_fd_op vhost_get_inflight_fd;
vhost_set_inflight_fd_op vhost_set_inflight_fd;
vhost_dev_start_op vhost_dev_start;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 6a173cb..c7e54676 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -315,7 +315,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
*/
void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
uint64_t features);
-bool vhost_has_free_slot(void);
+unsigned int vhost_get_max_memslots(void);
+unsigned int vhost_get_free_memslots(void);
int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file);
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index ab0fe2b..5f5b02b 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
#define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
#define VIRTIO_MEM_PREALLOC_PROP "prealloc"
+#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots"
struct VirtIOMEM {
VirtIODevice parent_obj;
@@ -44,7 +45,28 @@ struct VirtIOMEM {
int32_t bitmap_size;
unsigned long *bitmap;
- /* assigned memory backend and memory region */
+ /*
+ * With "dynamic-memslots=on": Device memory region in which we dynamically
+ * map the memslots.
+ */
+ MemoryRegion *mr;
+
+ /*
+ * With "dynamic-memslots=on": The individual memslots (aliases into the
+ * memory backend).
+ */
+ MemoryRegion *memslots;
+
+ /* With "dynamic-memslots=on": The total number of memslots. */
+ uint16_t nb_memslots;
+
+ /*
+ * With "dynamic-memslots=on": Size of one memslot (the size of the
+ * last one can differ).
+ */
+ uint64_t memslot_size;
+
+ /* Assigned memory backend with the RAM memory region. */
HostMemoryBackend *memdev;
/* NUMA node */
@@ -82,6 +104,12 @@ struct VirtIOMEM {
*/
bool early_migration;
+ /*
+ * Whether we dynamically map (multiple, if possible) memslots instead of
+ * statically mapping the whole RAM memory region.
+ */
+ bool dynamic_memslots;
+
/* notifiers to notify when "size" changes */
NotifierList size_change_notifiers;
@@ -96,6 +124,8 @@ struct VirtIOMEMClass {
/* public */
void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);
MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);
+ void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);
+ unsigned int (*get_memslots)(VirtIOMEM *vmem);
void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index ee9025f..97a8a4f 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -215,7 +215,8 @@ typedef struct KVMRouteChange {
/* external API */
-bool kvm_has_free_slot(MachineState *ms);
+unsigned int kvm_get_max_memslots(void);
+unsigned int kvm_get_free_memslots(void);
bool kvm_has_sync_mmu(void);
int kvm_has_vcpu_events(void);
int kvm_has_robust_singlestep(void);
@@ -552,7 +553,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
*/
int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
struct ppc_radix_page_info *kvm_get_radix_page_info(void);
-int kvm_get_max_memslots(void);
/* Notify resamplefd for EOI of specific interrupts. */
void kvm_resample_fd_notify(int gsi);
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index a5b9122..075939a 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate {
typedef struct KVMMemoryListener {
MemoryListener listener;
KVMSlot *slots;
+ unsigned int nr_used_slots;
int as_id;
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;