diff options
Diffstat (limited to 'accel/mshv/mem.c')
-rw-r--r-- | accel/mshv/mem.c | 563 |
1 files changed, 563 insertions, 0 deletions
diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c new file mode 100644 index 0000000..0e2164a --- /dev/null +++ b/accel/mshv/mem.c @@ -0,0 +1,563 @@ +/* + * QEMU MSHV support + * + * Copyright Microsoft, Corp. 2025 + * + * Authors: + * Magnus Kulke <magnuskulke@microsoft.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + */ + +#include "qemu/osdep.h" +#include "qemu/lockable.h" +#include "qemu/error-report.h" +#include "qemu/rcu.h" +#include "linux/mshv.h" +#include "system/address-spaces.h" +#include "system/mshv.h" +#include "system/mshv_int.h" +#include "exec/memattrs.h" +#include <sys/ioctl.h> +#include "trace.h" + +typedef struct SlotsRCUReclaim { + struct rcu_head rcu; + GList *old_head; + MshvMemorySlot *removed_slot; +} SlotsRCUReclaim; + +static void rcu_reclaim_slotlist(struct rcu_head *rcu) +{ + SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu); + g_list_free(r->old_head); + g_free(r->removed_slot); + g_free(r); +} + +static void publish_slots(GList *new_head, GList *old_head, + MshvMemorySlot *removed_slot) +{ + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + qatomic_store_release(&manager->slots, new_head); + + SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1); + r->old_head = old_head; + r->removed_slot = removed_slot; + + call_rcu1(&r->rcu, rcu_reclaim_slotlist); +} + +/* Needs to be called with mshv_state->msm.mutex held */ +static int remove_slot(MshvMemorySlot *slot) +{ + GList *old_head, *new_head; + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + old_head = qatomic_load_acquire(&manager->slots); + + if (!g_list_find(old_head, slot)) { + error_report("slot requested for removal not found"); + return -1; + } + + new_head = g_list_copy(old_head); + new_head = g_list_remove(new_head, slot); + manager->n_slots--; + + publish_slots(new_head, old_head, slot); + + return 0; +} + +/* Needs to be called with mshv_state->msm.mutex held */ +static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr, + uint64_t size, bool readonly) +{ + GList *old_head, *new_head; + MshvMemorySlot *slot; + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + + old_head = qatomic_load_acquire(&manager->slots); + + if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) { + error_report("no free memory slots available"); + return NULL; + } + + slot = g_new0(MshvMemorySlot, 1); + slot->guest_phys_addr = gpa; + slot->userspace_addr = userspace_addr; + slot->memory_size = size; + slot->readonly = readonly; + + new_head = g_list_copy(old_head); + new_head = g_list_append(new_head, slot); + manager->n_slots++; + + publish_slots(new_head, old_head, NULL); + + return slot; +} + +static int slot_overlaps(const MshvMemorySlot *slot1, + const MshvMemorySlot *slot2) +{ + uint64_t start_1 = slot1->userspace_addr, + start_2 = slot2->userspace_addr; + size_t len_1 = slot1->memory_size, + len_2 = slot2->memory_size; + + if (slot1 == slot2) { + return -1; + } + + return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1; +} + +static bool is_mapped(MshvMemorySlot *slot) +{ + /* Subsequent reads of mapped field see a fully-initialized slot */ + return qatomic_load_acquire(&slot->mapped); +} + +/* + * Find slot that is: + * - overlapping in userspace + * - currently mapped in the guest + * + * Needs to be called with mshv_state->msm.mutex or RCU read lock held. + */ +static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot) +{ + GList *found; + MshvMemorySlot *overlap_slot; + + found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps); + + if (!found) { + return NULL; + } + + overlap_slot = found->data; + if (!overlap_slot || !is_mapped(overlap_slot)) { + return NULL; + } + + return overlap_slot; +} + +static int set_guest_memory(int vm_fd, + const struct mshv_user_mem_region *region) +{ + int ret; + + ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region); + if (ret < 0) { + error_report("failed to set guest memory: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map) +{ + struct mshv_user_mem_region region = {0}; + + region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT; + region.size = slot->memory_size; + region.userspace_addr = slot->userspace_addr; + + if (!map) { + region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP); + trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr, + slot->memory_size); + return set_guest_memory(vm_fd, ®ion); + } + + region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE); + if (!slot->readonly) { + region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE); + } + + trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr, + slot->memory_size); + return set_guest_memory(vm_fd, ®ion); +} + +static int slot_matches_region(const MshvMemorySlot *slot1, + const MshvMemorySlot *slot2) +{ + return (slot1->guest_phys_addr == slot2->guest_phys_addr && + slot1->userspace_addr == slot2->userspace_addr && + slot1->memory_size == slot2->memory_size) ? 0 : -1; +} + +/* Needs to be called with mshv_state->msm.mutex held */ +static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size, + uint64_t userspace_addr) +{ + MshvMemorySlot ref_slot = { + .guest_phys_addr = gpa, + .userspace_addr = userspace_addr, + .memory_size = size, + }; + GList *found; + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + found = g_list_find_custom(manager->slots, &ref_slot, + (GCompareFunc) slot_matches_region); + + return found ? found->data : NULL; +} + +static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p) +{ + uint64_t gpa_offset, gpa = *gpa_p; + + gpa_offset = gpa - slot->guest_phys_addr; + return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size) + ? 0 : -1; +} + +/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */ +static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa) +{ + GList *found; + MshvMemorySlot *slot; + + trace_mshv_find_slot_by_gpa(gpa); + + found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa); + if (found) { + slot = found->data; + trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr, + slot->memory_size); + return slot; + } + + return NULL; +} + +/* Needs to be called with mshv_state->msm.mutex held */ +static void set_mapped(MshvMemorySlot *slot, bool mapped) +{ + /* prior writes to mapped field becomes visible before readers see slot */ + qatomic_store_release(&slot->mapped, mapped); +} + +MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa) +{ + MshvMemorySlot *gpa_slot, *overlap_slot; + GList *head; + int ret; + MshvMemorySlotManager *manager = &mshv_state->msm; + + /* fast path, called often by unmapped_gpa vm exit */ + WITH_RCU_READ_LOCK_GUARD() { + assert(manager); + head = qatomic_load_acquire(&manager->slots); + /* return early if no slot is found */ + gpa_slot = find_mem_slot_by_gpa(head, gpa); + if (gpa_slot == NULL) { + return MshvRemapNoMapping; + } + + /* return early if no overlapping slot is found */ + overlap_slot = find_overlap_mem_slot(head, gpa_slot); + if (overlap_slot == NULL) { + return MshvRemapNoOverlap; + } + } + + /* + * We'll modify the mapping list, so we need to upgrade to mutex and + * recheck. + */ + assert(manager); + QEMU_LOCK_GUARD(&manager->mutex); + + /* return early if no slot is found */ + gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa); + if (gpa_slot == NULL) { + return MshvRemapNoMapping; + } + + /* return early if no overlapping slot is found */ + overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot); + if (overlap_slot == NULL) { + return MshvRemapNoOverlap; + } + + /* unmap overlapping slot */ + ret = map_or_unmap(vm_fd, overlap_slot, false); + if (ret < 0) { + error_report("failed to unmap overlap region"); + abort(); + } + set_mapped(overlap_slot, false); + warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx", + overlap_slot->userspace_addr, + overlap_slot->guest_phys_addr, + overlap_slot->memory_size); + + /* map region for gpa */ + ret = map_or_unmap(vm_fd, gpa_slot, true); + if (ret < 0) { + error_report("failed to map new region"); + abort(); + } + set_mapped(gpa_slot, true); + warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx", + gpa_slot->userspace_addr, gpa_slot->guest_phys_addr, + gpa_slot->memory_size); + + return MshvRemapOk; +} + +static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size, + uint8_t *data) +{ + warn_report("read from unmapped mmio region gpa=0x%lx size=%lu", gpa, size); + + if (size == 0 || size > 8) { + error_report("invalid size %lu for reading from unmapped mmio region", + size); + return -1; + } + + memset(data, 0xFF, size); + + return 0; +} + +int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size, + bool is_secure_mode, bool instruction_fetch) +{ + int ret; + MemTxAttrs memattr = { .secure = is_secure_mode }; + + if (instruction_fetch) { + trace_mshv_insn_fetch(gpa, size); + } else { + trace_mshv_mem_read(gpa, size); + } + + ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data, + size, false); + if (ret == MEMTX_OK) { + return 0; + } + + if (ret == MEMTX_DECODE_ERROR) { + return handle_unmapped_mmio_region_read(gpa, size, data); + } + + error_report("failed to read guest memory at 0x%lx", gpa); + return -1; +} + +int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret; + MemTxAttrs memattr = { .secure = is_secure_mode }; + + trace_mshv_mem_write(gpa, size); + ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data, + size, true); + if (ret == MEMTX_OK) { + return 0; + } + + if (ret == MEMTX_DECODE_ERROR) { + warn_report("write to unmapped mmio region gpa=0x%lx size=%lu", gpa, + size); + return 0; + } + + error_report("Failed to write guest memory"); + return -1; +} + +static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size, + uint64_t userspace_addr) +{ + int ret; + MshvMemorySlot *slot; + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + + QEMU_LOCK_GUARD(&manager->mutex); + + slot = find_mem_slot_by_region(gpa, size, userspace_addr); + if (!slot) { + trace_mshv_skip_unset_mem(userspace_addr, gpa, size); + /* no work to do */ + return 0; + } + + if (!is_mapped(slot)) { + /* remove slot, no need to unmap */ + return remove_slot(slot); + } + + ret = map_or_unmap(vm_fd, slot, false); + if (ret < 0) { + error_report("failed to unmap memory region"); + return ret; + } + return remove_slot(slot); +} + +static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly, + uint64_t userspace_addr) +{ + MshvMemorySlot *slot, *overlap_slot; + int ret; + MshvMemorySlotManager *manager = &mshv_state->msm; + + assert(manager); + + QEMU_LOCK_GUARD(&manager->mutex); + + slot = find_mem_slot_by_region(gpa, size, userspace_addr); + if (slot) { + error_report("memory region already mapped at gpa=0x%lx, " + "userspace_addr=0x%lx, size=0x%lx", + slot->guest_phys_addr, slot->userspace_addr, + slot->memory_size); + return -1; + } + + slot = append_slot(gpa, userspace_addr, size, readonly); + + overlap_slot = find_overlap_mem_slot(manager->slots, slot); + if (overlap_slot) { + trace_mshv_remap_attempt(slot->userspace_addr, + slot->guest_phys_addr, + slot->memory_size); + warn_report("attempt to map region [0x%lx-0x%lx], while " + "[0x%lx-0x%lx] is already mapped in the guest", + userspace_addr, userspace_addr + size - 1, + overlap_slot->userspace_addr, + overlap_slot->userspace_addr + + overlap_slot->memory_size - 1); + + /* do not register mem slot in hv, but record for later swap-in */ + set_mapped(slot, false); + + return 0; + } + + ret = map_or_unmap(vm_fd, slot, true); + if (ret < 0) { + error_report("failed to map memory region"); + return -1; + } + set_mapped(slot, true); + + return 0; +} + +static int set_memory(uint64_t gpa, uint64_t size, bool readonly, + uint64_t userspace_addr, bool add) +{ + int vm_fd = mshv_state->vm; + + if (add) { + return tracked_map(vm_fd, gpa, size, readonly, userspace_addr); + } + + return tracked_unmap(vm_fd, gpa, size, userspace_addr); +} + +/* + * Calculate and align the start address and the size of the section. + * Return the size. If the size is 0, the aligned section is empty. + */ +static hwaddr align_section(MemoryRegionSection *section, hwaddr *start) +{ + hwaddr size = int128_get64(section->size); + hwaddr delta, aligned; + + /* + * works in page size chunks, but the function may be called + * with sub-page size and unaligned start address. Pad the start + * address to next and truncate size to previous page boundary. + */ + aligned = ROUND_UP(section->offset_within_address_space, + qemu_real_host_page_size()); + delta = aligned - section->offset_within_address_space; + *start = aligned; + if (delta > size) { + return 0; + } + + return (size - delta) & qemu_real_host_page_mask(); +} + +void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section, + bool add) +{ + int ret = 0; + MemoryRegion *area = section->mr; + bool writable = !area->readonly && !area->rom_device; + hwaddr start_addr, mr_offset, size; + void *ram; + + size = align_section(section, &start_addr); + trace_mshv_set_phys_mem(add, section->mr->name, start_addr); + + size = align_section(section, &start_addr); + trace_mshv_set_phys_mem(add, section->mr->name, start_addr); + + /* + * If the memory device is a writable non-ram area, we do not + * want to map it into the guest memory. If it is not a ROM device, + * we want to remove mshv memory mapping, so accesses will trap. + */ + if (!memory_region_is_ram(area)) { + if (writable) { + return; + } else if (!area->romd_mode) { + add = false; + } + } + + if (!size) { + return; + } + + mr_offset = section->offset_within_region + start_addr - + section->offset_within_address_space; + + ram = memory_region_get_ram_ptr(area) + mr_offset; + + ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add); + if (ret < 0) { + error_report("failed to set memory region"); + abort(); + } +} + +void mshv_init_memory_slot_manager(MshvState *mshv_state) +{ + MshvMemorySlotManager *manager; + + assert(mshv_state); + manager = &mshv_state->msm; + + manager->n_slots = 0; + manager->slots = NULL; + qemu_mutex_init(&manager->mutex); +} |