aboutsummaryrefslogtreecommitdiff
path: root/hw/mem
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-10-16 12:34:17 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2023-10-16 12:34:17 -0400
commitbc2b89b38582b1cc7198428c9174fbbbf31245ad (patch)
treed15cdfa911a8b6d6eecec0d0ecaa1d1eed346075 /hw/mem
parent63011373ad22c794a013da69663c03f1297a5c56 (diff)
parentee6398d862c108f8136a26d93d26680f3d222a3a (diff)
downloadqemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.zip
qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.gz
qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.bz2
Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu into staging
Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo # vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j # SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV # JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 # rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ # NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ # rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht # oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF # fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH # M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD # m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk # mwPTB4UAk9g= # =s69t # -----END PGP SIGNATURE----- # gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu: virtio-mem: Mark memslot alias memory regions unmergeable memory,vhost: Allow for marking memory device memory regions unmergeable virtio-mem: Expose device memory dynamically via multiple memslots if enabled virtio-mem: Update state to match bitmap as soon as it's been migrated virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb memory: Clarify mapping requirements for RamDiscardManager memory-device,vhost: Support automatic decision on the number of memslots vhost: Add vhost_get_max_memslots() kvm: Add stub for kvm_get_max_memslots() memory-device,vhost: Support memory devices that dynamically consume memslots memory-device: Track required and actually used memslots in DeviceMemoryState stubs: Rename qmp_memory_device.c to memory_device.c memory-device: Support memory devices with multiple memslots vhost: Return number of free memslots kvm: Return number of free memslots softmmu/physmem: Fixup qemu_ram_block_from_host() documentation vhost: Remove vhost_backend_can_merge() callback vhost: Rework memslot filtering and fix "used_memslot" tracking Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/mem')
-rw-r--r--hw/mem/memory-device.c196
1 files changed, 188 insertions, 8 deletions
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 667d56b..ae38f48 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque)
return 0;
}
-static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
- Error **errp)
+static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+
+ if (mdc->get_memslots) {
+ return mdc->get_memslots(md);
+ }
+ return 1;
+}
+
+/*
+ * Memslots that are reserved by memory devices (required but still reported
+ * as free from KVM / vhost).
+ */
+static unsigned int get_reserved_memslots(MachineState *ms)
+{
+ if (ms->device_memory->used_memslots >
+ ms->device_memory->required_memslots) {
+ /* This is unexpected, and we warned already in the memory notifier. */
+ return 0;
+ }
+ return ms->device_memory->required_memslots -
+ ms->device_memory->used_memslots;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+ if (!current_machine->device_memory) {
+ return 0;
+ }
+ return get_reserved_memslots(current_machine);
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+ if (!current_machine->device_memory) {
+ return false;
+ }
+
+ return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+ MemoryRegion *mr)
+{
+ const unsigned int reserved = get_reserved_memslots(ms);
+ const uint64_t size = memory_region_size(mr);
+ unsigned int max = vhost_get_max_memslots();
+ unsigned int free = vhost_get_free_memslots();
+ uint64_t available_space;
+ unsigned int memslots;
+
+ if (kvm_enabled()) {
+ max = MIN(max, kvm_get_max_memslots());
+ free = MIN(free, kvm_get_free_memslots());
+ }
+
+ /*
+ * If we only have less overall memslots than what we consider reasonable,
+ * just keep it to a minimum.
+ */
+ if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+ return 1;
+ }
+
+ /*
+ * Consider our soft-limit across all memory devices. We don't really
+ * expect to exceed this limit in reasonable configurations.
+ */
+ if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+ ms->device_memory->required_memslots) {
+ return 1;
+ }
+ memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+ ms->device_memory->required_memslots;
+
+ /*
+ * Consider the actually still free memslots. This is only relevant if
+ * other memslot consumers would consume *significantly* more memslots than
+ * what we prepared for (> 253). Unlikely, but let's just handle it
+ * cleanly.
+ */
+ memslots = MIN(memslots, free - reserved);
+ if (memslots < 1 || unlikely(free < reserved)) {
+ return 1;
+ }
+
+ /* We cannot have any other memory devices? So give all to this device. */
+ if (size == ms->maxram_size - ms->ram_size) {
+ return memslots;
+ }
+
+ /*
+ * Simple heuristic: equally distribute the memslots over the space
+ * still available for memory devices.
+ */
+ available_space = ms->maxram_size - ms->ram_size -
+ ms->device_memory->used_region_size;
+ memslots = (double)memslots * size / available_space;
+ return memslots < 1 ? 1 : memslots;
+}
+
+static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
+ MemoryRegion *mr, Error **errp)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const uint64_t used_region_size = ms->device_memory->used_region_size;
const uint64_t size = memory_region_size(mr);
+ const unsigned int reserved_memslots = get_reserved_memslots(ms);
+ unsigned int required_memslots, memslot_limit;
+
+ /*
+ * Instruct the device to decide how many memslots to use, if applicable,
+ * before we query the number of required memslots the first time.
+ */
+ if (mdc->decide_memslots) {
+ memslot_limit = memory_device_memslot_decision_limit(ms, mr);
+ mdc->decide_memslots(md, memslot_limit);
+ }
+ required_memslots = memory_device_get_memslots(md);
- /* we will need a new memory slot for kvm and vhost */
- if (kvm_enabled() && !kvm_has_free_slot(ms)) {
- error_setg(errp, "hypervisor has no free memory slots left");
+ /* we will need memory slots for kvm and vhost */
+ if (kvm_enabled() &&
+ kvm_get_free_memslots() < required_memslots + reserved_memslots) {
+ error_setg(errp, "hypervisor has not enough free memory slots left");
return;
}
- if (!vhost_has_free_slot()) {
- error_setg(errp, "a used vhost backend has no free memory slots left");
+ if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
+ error_setg(errp, "a used vhost backend has not enough free memory slots left");
return;
}
@@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
goto out;
}
- memory_device_check_addable(ms, mr, &local_err);
+ memory_device_check_addable(ms, md, mr, &local_err);
if (local_err) {
goto out;
}
@@ -264,6 +380,7 @@ out:
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
const uint64_t addr = mdc->get_addr(md);
MemoryRegion *mr;
@@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);
ms->device_memory->used_region_size += memory_region_size(mr);
+ ms->device_memory->required_memslots += memslots;
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active++;
+ }
+
memory_region_add_subregion(&ms->device_memory->mr,
addr - ms->device_memory->base, mr);
trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
@@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
MemoryRegion *mr;
/*
@@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);
memory_region_del_subregion(&ms->device_memory->mr, mr);
+
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active--;
+ }
ms->device_memory->used_region_size -= memory_region_size(mr);
+ ms->device_memory->required_memslots -= memslots;
trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
mdc->get_addr(md));
}
@@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
return memory_region_size(mr);
}
+static void memory_devices_region_mod(MemoryListener *listener,
+ MemoryRegionSection *mrs, bool add)
+{
+ DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
+ listener);
+
+ if (!memory_region_is_ram(mrs->mr)) {
+ warn_report("Unexpected memory region mapped into device memory region.");
+ return;
+ }
+
+ /*
+ * The expectation is that each distinct RAM memory region section in
+ * our region for memory devices consumes exactly one memslot in KVM
+ * and in vhost. For vhost, this is true, except:
+ * * ROM memory regions don't consume a memslot. These get used very
+ * rarely for memory devices (R/O NVDIMMs).
+ * * Memslots without a fd (memory-backend-ram) don't necessarily
+ * consume a memslot. Such setups are quite rare and possibly bogus:
+ * the memory would be inaccessible by such vhost devices.
+ *
+ * So for vhost, in corner cases we might over-estimate the number of
+ * memslots that are currently used or that might still be reserved
+ * (required - used).
+ */
+ dms->used_memslots += add ? 1 : -1;
+
+ if (dms->used_memslots > dms->required_memslots) {
+ warn_report("Memory devices use more memory slots than indicated as required.");
+ }
+}
+
+static void memory_devices_region_add(MemoryListener *listener,
+ MemoryRegionSection *mrs)
+{
+ return memory_devices_region_mod(listener, mrs, true);
+}
+
+static void memory_devices_region_del(MemoryListener *listener,
+ MemoryRegionSection *mrs)
+{
+ return memory_devices_region_mod(listener, mrs, false);
+}
+
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
{
g_assert(size);
@@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
size);
+ address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
+ "device-memory");
memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
&ms->device_memory->mr);
+
+ /* Track the number of memslots used by memory devices. */
+ ms->device_memory->listener.region_add = memory_devices_region_add;
+ ms->device_memory->listener.region_del = memory_devices_region_del;
+ memory_listener_register(&ms->device_memory->listener,
+ &ms->device_memory->as);
}
static const TypeInfo memory_device_info = {