diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2023-10-16 12:34:17 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2023-10-16 12:34:17 -0400 |
commit | bc2b89b38582b1cc7198428c9174fbbbf31245ad (patch) | |
tree | d15cdfa911a8b6d6eecec0d0ecaa1d1eed346075 /hw/mem | |
parent | 63011373ad22c794a013da69663c03f1297a5c56 (diff) | |
parent | ee6398d862c108f8136a26d93d26680f3d222a3a (diff) | |
download | qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.zip qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.gz qemu-bc2b89b38582b1cc7198428c9174fbbbf31245ad.tar.bz2 |
Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu into staging
Hi,
"Host Memory Backends" and "Memory devices" queue ("mem"):
- Support memory devices with multiple memslots
- Support memory devices that dynamically consume memslots
- Support memory devices that can automatically decide on the number of
memslots to use
- virtio-mem support for exposing memory dynamically via multiple
memslots
- Some required cleanups/refactorings
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl
# ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo
# vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j
# SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV
# JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7
# rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ
# NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ
# rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht
# oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF
# fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH
# M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD
# m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk
# mwPTB4UAk9g=
# =s69t
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT
# gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg: issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown]
# gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]
# gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A
* tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu:
virtio-mem: Mark memslot alias memory regions unmergeable
memory,vhost: Allow for marking memory device memory regions unmergeable
virtio-mem: Expose device memory dynamically via multiple memslots if enabled
virtio-mem: Update state to match bitmap as soon as it's been migrated
virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb
memory: Clarify mapping requirements for RamDiscardManager
memory-device,vhost: Support automatic decision on the number of memslots
vhost: Add vhost_get_max_memslots()
kvm: Add stub for kvm_get_max_memslots()
memory-device,vhost: Support memory devices that dynamically consume memslots
memory-device: Track required and actually used memslots in DeviceMemoryState
stubs: Rename qmp_memory_device.c to memory_device.c
memory-device: Support memory devices with multiple memslots
vhost: Return number of free memslots
kvm: Return number of free memslots
softmmu/physmem: Fixup qemu_ram_block_from_host() documentation
vhost: Remove vhost_backend_can_merge() callback
vhost: Rework memslot filtering and fix "used_memslot" tracking
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/mem')
-rw-r--r-- | hw/mem/memory-device.c | 196 |
1 files changed, 188 insertions, 8 deletions
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 667d56b..ae38f48 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque) return 0; } -static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, - Error **errp) +static unsigned int memory_device_get_memslots(MemoryDeviceState *md) { + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + + if (mdc->get_memslots) { + return mdc->get_memslots(md); + } + return 1; +} + +/* + * Memslots that are reserved by memory devices (required but still reported + * as free from KVM / vhost). + */ +static unsigned int get_reserved_memslots(MachineState *ms) +{ + if (ms->device_memory->used_memslots > + ms->device_memory->required_memslots) { + /* This is unexpected, and we warned already in the memory notifier. */ + return 0; + } + return ms->device_memory->required_memslots - + ms->device_memory->used_memslots; +} + +unsigned int memory_devices_get_reserved_memslots(void) +{ + if (!current_machine->device_memory) { + return 0; + } + return get_reserved_memslots(current_machine); +} + +bool memory_devices_memslot_auto_decision_active(void) +{ + if (!current_machine->device_memory) { + return false; + } + + return current_machine->device_memory->memslot_auto_decision_active; +} + +static unsigned int memory_device_memslot_decision_limit(MachineState *ms, + MemoryRegion *mr) +{ + const unsigned int reserved = get_reserved_memslots(ms); + const uint64_t size = memory_region_size(mr); + unsigned int max = vhost_get_max_memslots(); + unsigned int free = vhost_get_free_memslots(); + uint64_t available_space; + unsigned int memslots; + + if (kvm_enabled()) { + max = MIN(max, kvm_get_max_memslots()); + free = MIN(free, kvm_get_free_memslots()); + } + + /* + * If we only have less overall memslots than what we consider reasonable, + * just keep it to a minimum. + */ + if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) { + return 1; + } + + /* + * Consider our soft-limit across all memory devices. We don't really + * expect to exceed this limit in reasonable configurations. + */ + if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <= + ms->device_memory->required_memslots) { + return 1; + } + memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT - + ms->device_memory->required_memslots; + + /* + * Consider the actually still free memslots. This is only relevant if + * other memslot consumers would consume *significantly* more memslots than + * what we prepared for (> 253). Unlikely, but let's just handle it + * cleanly. + */ + memslots = MIN(memslots, free - reserved); + if (memslots < 1 || unlikely(free < reserved)) { + return 1; + } + + /* We cannot have any other memory devices? So give all to this device. */ + if (size == ms->maxram_size - ms->ram_size) { + return memslots; + } + + /* + * Simple heuristic: equally distribute the memslots over the space + * still available for memory devices. + */ + available_space = ms->maxram_size - ms->ram_size - + ms->device_memory->used_region_size; + memslots = (double)memslots * size / available_space; + return memslots < 1 ? 1 : memslots; +} + +static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md, + MemoryRegion *mr, Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); const uint64_t used_region_size = ms->device_memory->used_region_size; const uint64_t size = memory_region_size(mr); + const unsigned int reserved_memslots = get_reserved_memslots(ms); + unsigned int required_memslots, memslot_limit; + + /* + * Instruct the device to decide how many memslots to use, if applicable, + * before we query the number of required memslots the first time. + */ + if (mdc->decide_memslots) { + memslot_limit = memory_device_memslot_decision_limit(ms, mr); + mdc->decide_memslots(md, memslot_limit); + } + required_memslots = memory_device_get_memslots(md); - /* we will need a new memory slot for kvm and vhost */ - if (kvm_enabled() && !kvm_has_free_slot(ms)) { - error_setg(errp, "hypervisor has no free memory slots left"); + /* we will need memory slots for kvm and vhost */ + if (kvm_enabled() && + kvm_get_free_memslots() < required_memslots + reserved_memslots) { + error_setg(errp, "hypervisor has not enough free memory slots left"); return; } - if (!vhost_has_free_slot()) { - error_setg(errp, "a used vhost backend has no free memory slots left"); + if (vhost_get_free_memslots() < required_memslots + reserved_memslots) { + error_setg(errp, "a used vhost backend has not enough free memory slots left"); return; } @@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, goto out; } - memory_device_check_addable(ms, mr, &local_err); + memory_device_check_addable(ms, md, mr, &local_err); if (local_err) { goto out; } @@ -264,6 +380,7 @@ out: void memory_device_plug(MemoryDeviceState *md, MachineState *ms) { const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const unsigned int memslots = memory_device_get_memslots(md); const uint64_t addr = mdc->get_addr(md); MemoryRegion *mr; @@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); ms->device_memory->used_region_size += memory_region_size(mr); + ms->device_memory->required_memslots += memslots; + if (mdc->decide_memslots && memslots > 1) { + ms->device_memory->memslot_auto_decision_active++; + } + memory_region_add_subregion(&ms->device_memory->mr, addr - ms->device_memory->base, mr); trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); @@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) { const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const unsigned int memslots = memory_device_get_memslots(md); MemoryRegion *mr; /* @@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); memory_region_del_subregion(&ms->device_memory->mr, mr); + + if (mdc->decide_memslots && memslots > 1) { + ms->device_memory->memslot_auto_decision_active--; + } ms->device_memory->used_region_size -= memory_region_size(mr); + ms->device_memory->required_memslots -= memslots; trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", mdc->get_addr(md)); } @@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md, return memory_region_size(mr); } +static void memory_devices_region_mod(MemoryListener *listener, + MemoryRegionSection *mrs, bool add) +{ + DeviceMemoryState *dms = container_of(listener, DeviceMemoryState, + listener); + + if (!memory_region_is_ram(mrs->mr)) { + warn_report("Unexpected memory region mapped into device memory region."); + return; + } + + /* + * The expectation is that each distinct RAM memory region section in + * our region for memory devices consumes exactly one memslot in KVM + * and in vhost. For vhost, this is true, except: + * * ROM memory regions don't consume a memslot. These get used very + * rarely for memory devices (R/O NVDIMMs). + * * Memslots without a fd (memory-backend-ram) don't necessarily + * consume a memslot. Such setups are quite rare and possibly bogus: + * the memory would be inaccessible by such vhost devices. + * + * So for vhost, in corner cases we might over-estimate the number of + * memslots that are currently used or that might still be reserved + * (required - used). + */ + dms->used_memslots += add ? 1 : -1; + + if (dms->used_memslots > dms->required_memslots) { + warn_report("Memory devices use more memory slots than indicated as required."); + } +} + +static void memory_devices_region_add(MemoryListener *listener, + MemoryRegionSection *mrs) +{ + return memory_devices_region_mod(listener, mrs, true); +} + +static void memory_devices_region_del(MemoryListener *listener, + MemoryRegionSection *mrs) +{ + return memory_devices_region_mod(listener, mrs, false); +} + void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) { g_assert(size); @@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory", size); + address_space_init(&ms->device_memory->as, &ms->device_memory->mr, + "device-memory"); memory_region_add_subregion(get_system_memory(), ms->device_memory->base, &ms->device_memory->mr); + + /* Track the number of memslots used by memory devices. */ + ms->device_memory->listener.region_add = memory_devices_region_add; + ms->device_memory->listener.region_del = memory_devices_region_del; + memory_listener_register(&ms->device_memory->listener, + &ms->device_memory->as); } static const TypeInfo memory_device_info = { |