diff options
Diffstat (limited to 'hw/virtio/virtio-mem.c')
-rw-r--r-- | hw/virtio/virtio-mem.c | 203 |
1 files changed, 126 insertions, 77 deletions
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index ef64bf1..c46f6f9 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -15,20 +15,20 @@ #include "qemu/cutils.h" #include "qemu/error-report.h" #include "qemu/units.h" -#include "sysemu/numa.h" -#include "sysemu/sysemu.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" +#include "system/numa.h" +#include "system/system.h" +#include "system/reset.h" +#include "system/runstate.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "migration/misc.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include CONFIG_DEVICES +#include "hw/acpi/acpi.h" #include "trace.h" static const VMStateDescription vmstate_virtio_mem_device_early; @@ -61,6 +61,8 @@ static uint32_t virtio_mem_default_thp_size(void) } else if (qemu_real_host_page_size() == 64 * KiB) { default_thp_size = 512 * MiB; } +#elif defined(__s390x__) + default_thp_size = 1 * MiB; #endif return default_thp_size; @@ -88,6 +90,7 @@ static uint32_t virtio_mem_default_thp_size(void) static uint32_t thp_size; #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define HPAGE_PATH "/sys/kernel/mm/transparent_hugepage/" static uint32_t virtio_mem_thp_size(void) { gchar *content = NULL; @@ -98,6 +101,12 @@ static uint32_t virtio_mem_thp_size(void) return thp_size; } + /* No THP -> no restrictions. */ + if (!g_file_test(HPAGE_PATH, G_FILE_TEST_EXISTS)) { + thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE; + return thp_size; + } + /* * Try to probe the actual THP size, fallback to (sane but eventually * incorrect) default sizes. @@ -161,7 +170,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) * necessary (as the section size can change). But it's more likely that the * section size will rather get smaller and not bigger over time. */ -#if defined(TARGET_X86_64) || defined(TARGET_I386) +#if defined(TARGET_X86_64) || defined(TARGET_I386) || defined(TARGET_S390X) #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) #elif defined(TARGET_ARM) #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) @@ -181,7 +190,7 @@ static bool virtio_mem_is_busy(void) * after plugging them) until we're running on the destination (as we didn't * migrate these blocks when they were unplugged). */ - return migration_in_incoming_postcopy() || !migration_is_idle(); + return migration_in_incoming_postcopy() || migration_is_running(); } typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg, @@ -235,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -278,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -310,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -346,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl->notify_discard(rdl, &tmp); @@ -362,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } ret = rdl->notify_populate(rdl, &tmp); @@ -379,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl2->notify_discard(rdl2, &tmp); @@ -874,15 +861,16 @@ static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, MachineState *ms = MACHINE(qdev_get_machine()); VirtIOMEM *vmem = VIRTIO_MEM(vdev); - if (ms->numa_state) { -#if defined(CONFIG_ACPI) + if (ms->numa_state && acpi_builtin()) { virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); -#endif } assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO); if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) { virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); } + if (qemu_wakeup_suspend_enabled()) { + virtio_add_feature(&features, VIRTIO_MEM_F_PERSISTENT_SUSPEND); + } return features; } @@ -895,18 +883,6 @@ static int virtio_mem_validate_features(VirtIODevice *vdev) return 0; } -static void virtio_mem_system_reset(void *opaque) -{ - VirtIOMEM *vmem = VIRTIO_MEM(opaque); - - /* - * During usual resets, we will unplug all memory and shrink the usable - * region size. This is, however, not possible in all scenarios. Then, - * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). - */ - virtio_mem_unplug_all(vmem); -} - static void virtio_mem_prepare_mr(VirtIOMEM *vmem) { const uint64_t region_size = memory_region_size(&vmem->memdev->mr); @@ -958,6 +934,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) VirtIOMEM *vmem = VIRTIO_MEM(dev); uint64_t page_size; RAMBlock *rb; + Object *obj; int ret; if (!vmem->memdev) { @@ -990,7 +967,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } - if (enable_mlock) { + if (should_mlock(mlock_state)) { error_setg(errp, "Incompatible with mlock"); return; } @@ -1071,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) } /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be * configured after realizing the device. So in case we have an @@ -1084,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); return; } @@ -1123,14 +1112,28 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmstate_register_any(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, vmem); } - qemu_register_reset(virtio_mem_system_reset, vmem); /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. + * We only want to unplug all memory to start with a clean slate when + * it is safe for the guest -- during system resets that call + * qemu_devices_reset(). + * + * We'll filter out selected qemu_devices_reset() calls used for other + * purposes, like resetting all devices during wakeup from suspend on + * x86 based on the reset type passed to qemu_devices_reset(). + * + * Unplugging all memory during simple device resets can result in the VM + * unexpectedly losing RAM, corrupting VM state. + * + * Simple device resets (or resets triggered by getting a parent device + * reset) must not change the state of plugged memory blocks. Therefore, + * we need a dedicated reset object that only gets called during + * qemu_devices_reset(). */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); + obj = object_new(TYPE_VIRTIO_MEM_SYSTEM_RESET); + vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + vmem->system_reset->vmem = vmem; + qemu_register_resettable(obj); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1138,12 +1141,9 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - qemu_unregister_reset(virtio_mem_system_reset, vmem); + qemu_unregister_resettable(OBJECT(vmem->system_reset)); + object_unref(OBJECT(vmem->system_reset)); + if (vmem->early_migration) { vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, vmem); @@ -1153,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1682,7 +1687,7 @@ static void virtio_mem_instance_finalize(Object *obj) vmem->mr = NULL; } -static Property virtio_mem_properties[] = { +static const Property virtio_mem_properties[] = { DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false), @@ -1696,7 +1701,6 @@ static Property virtio_mem_properties[] = { early_migration, true), DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, dynamic_memslots, false), - DEFINE_PROP_END_OF_LIST(), }; static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, @@ -1728,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayRamDiscardState fn; void *opaque; }; @@ -1736,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); @@ -1760,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); - return 0; + return data->fn(s, data->opaque); } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscardState replay_fn, + void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); struct VirtIOMEMReplayData data = { @@ -1776,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, @@ -1843,7 +1846,7 @@ static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp) } } -static void virtio_mem_class_init(ObjectClass *klass, void *data) +static void virtio_mem_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); @@ -1885,7 +1888,7 @@ static const TypeInfo virtio_mem_info = { .instance_finalize = virtio_mem_instance_finalize, .class_init = virtio_mem_class_init, .class_size = sizeof(VirtIOMEMClass), - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_RAM_DISCARD_MANAGER }, { } }, @@ -1897,3 +1900,49 @@ static void virtio_register_types(void) } type_init(virtio_register_types) + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(VirtioMemSystemReset, virtio_mem_system_reset, VIRTIO_MEM_SYSTEM_RESET, OBJECT, { TYPE_RESETTABLE_INTERFACE }, { }) + +static void virtio_mem_system_reset_init(Object *obj) +{ +} + +static void virtio_mem_system_reset_finalize(Object *obj) +{ +} + +static ResettableState *virtio_mem_system_reset_get_state(Object *obj) +{ + VirtioMemSystemReset *vmem_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + + return &vmem_reset->reset_state; +} + +static void virtio_mem_system_reset_hold(Object *obj, ResetType type) +{ + VirtioMemSystemReset *vmem_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + VirtIOMEM *vmem = vmem_reset->vmem; + + /* + * When waking up from standby/suspend-to-ram, do not unplug any memory. + */ + if (type == RESET_TYPE_WAKEUP) { + return; + } + + /* + * During usual resets, we will unplug all memory and shrink the usable + * region size. This is, however, not possible in all scenarios. Then, + * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). + */ + virtio_mem_unplug_all(vmem); +} + +static void virtio_mem_system_reset_class_init(ObjectClass *klass, + const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->get_state = virtio_mem_system_reset_get_state; + rc->phases.hold = virtio_mem_system_reset_hold; +} |