aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--accel/kvm/kvm-all.c2
-rw-r--r--hw/core/cpu-system.c13
-rw-r--r--hw/core/loader.c2
-rw-r--r--hw/display/apple-gfx.m3
-rw-r--r--hw/remote/vfio-user-obj.c2
-rw-r--r--hw/virtio/virtio-mem.c2
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/exec/memattrs.h5
-rw-r--r--include/exec/memory.h35
-rw-r--r--include/system/os-posix.h2
-rw-r--r--include/system/os-win32.h2
-rw-r--r--include/system/system.h12
-rw-r--r--meson.build6
-rw-r--r--migration/postcopy-ram.c4
-rw-r--r--monitor/hmp-cmds-target.c3
-rw-r--r--os-posix.c15
-rw-r--r--qemu-options.hx14
-rw-r--r--system/globals.c12
-rw-r--r--system/memory_ldst.c.inc18
-rw-r--r--system/physmem.c121
-rw-r--r--system/vl.c52
21 files changed, 229 insertions, 98 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c65b790..f89568b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param)
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
QLIST_REMOVE(page, list);
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
+ qemu_ram_remap(page->ram_addr);
g_free(page);
}
}
diff --git a/hw/core/cpu-system.c b/hw/core/cpu-system.c
index 6aae28a..6e307c8 100644
--- a/hw/core/cpu-system.c
+++ b/hw/core/cpu-system.c
@@ -51,13 +51,18 @@ hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
MemTxAttrs *attrs)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
+ hwaddr paddr;
if (cc->sysemu_ops->get_phys_page_attrs_debug) {
- return cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
+ paddr = cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
+ } else {
+ /* Fallback for CPUs which don't implement the _attrs_ hook */
+ *attrs = MEMTXATTRS_UNSPECIFIED;
+ paddr = cc->sysemu_ops->get_phys_page_debug(cpu, addr);
}
- /* Fallback for CPUs which don't implement the _attrs_ hook */
- *attrs = MEMTXATTRS_UNSPECIFIED;
- return cc->sysemu_ops->get_phys_page_debug(cpu, addr);
+ /* Indicate that this is a debug access. */
+ attrs->debug = 1;
+ return paddr;
}
hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
diff --git a/hw/core/loader.c b/hw/core/loader.c
index fd25c5e..332b879 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -144,7 +144,7 @@ ssize_t load_image_mr(const char *filename, MemoryRegion *mr)
{
ssize_t size;
- if (!memory_access_is_direct(mr, false)) {
+ if (!memory_access_is_direct(mr, false, MEMTXATTRS_UNSPECIFIED)) {
/* Can only load an image into RAM or ROM */
return -1;
}
diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m
index aa1455b..1554f3b 100644
--- a/hw/display/apple-gfx.m
+++ b/hw/display/apple-gfx.m
@@ -137,7 +137,8 @@ void *apple_gfx_host_ptr_for_gpa_range(uint64_t guest_physical,
MEMTXATTRS_UNSPECIFIED);
if (!ram_region || ram_region_length < length ||
- !memory_access_is_direct(ram_region, !read_only)) {
+ !memory_access_is_direct(ram_region, !read_only,
+ MEMTXATTRS_UNSPECIFIED)) {
return NULL;
}
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 9e5ff6d..6e51a92 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -358,7 +358,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
int access_size;
uint64_t val;
- if (memory_access_is_direct(mr, is_write)) {
+ if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
/**
* Some devices expose a PCI expansion ROM, which could be buffer
* based as compared to other regions which are primarily based on
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index b1a0037..7b140ad 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -991,7 +991,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
return;
}
- if (enable_mlock) {
+ if (should_mlock(mlock_state)) {
error_setg(errp, "Incompatible with mlock");
return;
}
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index b1d76d6..3771b21 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
/* memory API */
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+void qemu_ram_remap(ram_addr_t addr);
/* This should not be used by devices. */
ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h
index 060b7e7..8db1d30 100644
--- a/include/exec/memattrs.h
+++ b/include/exec/memattrs.h
@@ -44,6 +44,8 @@ typedef struct MemTxAttrs {
* (see MEMTX_ACCESS_ERROR).
*/
unsigned int memory:1;
+ /* Debug access that can even write to ROM. */
+ unsigned int debug:1;
/* Requester ID (for MSI for example) */
unsigned int requester_id:16;
@@ -56,7 +58,8 @@ typedef struct MemTxAttrs {
* Bus masters which don't specify any attributes will get this
* (via the MEMTXATTRS_UNSPECIFIED constant), so that we can
* distinguish "all attributes deliberately clear" from
- * "didn't specify" if necessary.
+ * "didn't specify" if necessary. "debug" can be set alongside
+ * "unspecified".
*/
bool unspecified;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9f73b59..78c4e0a 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2995,15 +2995,34 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
bool prepare_mmio_access(MemoryRegion *mr);
-static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
+static inline bool memory_region_supports_direct_access(MemoryRegion *mr)
{
- if (is_write) {
- return memory_region_is_ram(mr) && !mr->readonly &&
- !mr->rom_device && !memory_region_is_ram_device(mr);
- } else {
- return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
- memory_region_is_romd(mr);
+ /* ROM DEVICE regions only allow direct access if in ROMD mode. */
+ if (memory_region_is_romd(mr)) {
+ return true;
+ }
+ if (!memory_region_is_ram(mr)) {
+ return false;
+ }
+ /*
+ * RAM DEVICE regions can be accessed directly using memcpy, but it might
+ * be MMIO and access using mempy can be wrong (e.g., using instructions not
+ * intended for MMIO access). So we treat this as IO.
+ */
+ return !memory_region_is_ram_device(mr);
+}
+
+static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write,
+ MemTxAttrs attrs)
+{
+ if (!memory_region_supports_direct_access(mr)) {
+ return false;
+ }
+ /* Debug access can write to ROM. */
+ if (is_write && !attrs.debug) {
+ return !mr->readonly && !mr->rom_device;
}
+ return true;
}
/**
@@ -3036,7 +3055,7 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
fv = address_space_to_flatview(as);
l = len;
mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
- if (len == l && memory_access_is_direct(mr, false)) {
+ if (len == l && memory_access_is_direct(mr, false, attrs)) {
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, len);
} else {
diff --git a/include/system/os-posix.h b/include/system/os-posix.h
index b881ac6..ce5b3bc 100644
--- a/include/system/os-posix.h
+++ b/include/system/os-posix.h
@@ -53,7 +53,7 @@ bool os_set_runas(const char *user_id);
void os_set_chroot(const char *path);
void os_setup_limits(void);
void os_setup_post(void);
-int os_mlock(void);
+int os_mlock(bool on_fault);
/**
* qemu_alloc_stack:
diff --git a/include/system/os-win32.h b/include/system/os-win32.h
index b82a5d3..bc62306 100644
--- a/include/system/os-win32.h
+++ b/include/system/os-win32.h
@@ -123,7 +123,7 @@ static inline bool is_daemonized(void)
return false;
}
-static inline int os_mlock(void)
+static inline int os_mlock(bool on_fault G_GNUC_UNUSED)
{
return -ENOSYS;
}
diff --git a/include/system/system.h b/include/system/system.h
index 0cbb43e..a7effe7 100644
--- a/include/system/system.h
+++ b/include/system/system.h
@@ -44,10 +44,20 @@ extern int display_opengl;
extern const char *keyboard_layout;
extern int old_param;
extern uint8_t *boot_splash_filedata;
-extern bool enable_mlock;
extern bool enable_cpu_pm;
extern QEMUClockType rtc_clock;
+typedef enum {
+ MLOCK_OFF = 0,
+ MLOCK_ON,
+ MLOCK_ON_FAULT,
+} MlockState;
+
+bool should_mlock(MlockState);
+bool is_mlock_on_fault(MlockState);
+
+extern MlockState mlock_state;
+
#define MAX_OPTION_ROMS 16
typedef struct QEMUOptionRom {
const char *name;
diff --git a/meson.build b/meson.build
index 8ed10b6..0ee79c6 100644
--- a/meson.build
+++ b/meson.build
@@ -2885,6 +2885,12 @@ config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + '''
return mlockall(MCL_FUTURE);
}'''))
+config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(gnu_source_prefix + '''
+ #include <sys/mman.h>
+ int main(void) {
+ return mlockall(MCL_FUTURE | MCL_ONFAULT);
+ }'''))
+
have_l2tpv3 = false
if get_option('l2tpv3').allowed() and have_system
have_l2tpv3 = cc.has_type('struct mmsghdr',
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 6a6da6b..5d3edfc 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -651,8 +651,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
mis->have_fault_thread = false;
}
- if (enable_mlock) {
- if (os_mlock() < 0) {
+ if (should_mlock(mlock_state)) {
+ if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
error_report("mlock: %s", strerror(errno));
/*
* It doesn't feel right to fail at this point, we have a valid
diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c
index 27ffe61..239c2a6 100644
--- a/monitor/hmp-cmds-target.c
+++ b/monitor/hmp-cmds-target.c
@@ -301,7 +301,6 @@ void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
{
target_ulong addr = qdict_get_int(qdict, "addr");
- MemTxAttrs attrs;
CPUState *cs = mon_get_cpu(mon);
hwaddr gpa;
@@ -310,7 +309,7 @@ void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
return;
}
- gpa = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
+ gpa = cpu_get_phys_page_debug(cs, addr & TARGET_PAGE_MASK);
if (gpa == -1) {
monitor_printf(mon, "Unmapped\n");
} else {
diff --git a/os-posix.c b/os-posix.c
index 9cce55f..52925c2 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -327,18 +327,29 @@ void os_set_line_buffering(void)
setvbuf(stdout, NULL, _IOLBF, 0);
}
-int os_mlock(void)
+int os_mlock(bool on_fault)
{
#ifdef HAVE_MLOCKALL
int ret = 0;
+ int flags = MCL_CURRENT | MCL_FUTURE;
- ret = mlockall(MCL_CURRENT | MCL_FUTURE);
+ if (on_fault) {
+#ifdef HAVE_MLOCK_ONFAULT
+ flags |= MCL_ONFAULT;
+#else
+ error_report("mlockall: on_fault not supported");
+ return -EINVAL;
+#endif
+ }
+
+ ret = mlockall(flags);
if (ret < 0) {
error_report("mlockall: %s", strerror(errno));
}
return ret;
#else
+ (void)on_fault;
return -ENOSYS;
#endif
}
diff --git a/qemu-options.hx b/qemu-options.hx
index 1b26ad5..61270e3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4632,21 +4632,25 @@ SRST
ERST
DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
- "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
+ "-overcommit [mem-lock=on|off|on-fault][cpu-pm=on|off]\n"
" run qemu with overcommit hints\n"
- " mem-lock=on|off controls memory lock support (default: off)\n"
+ " mem-lock=on|off|on-fault controls memory lock support (default: off)\n"
" cpu-pm=on|off controls cpu power management (default: off)\n",
QEMU_ARCH_ALL)
SRST
-``-overcommit mem-lock=on|off``
+``-overcommit mem-lock=on|off|on-fault``
\
``-overcommit cpu-pm=on|off``
Run qemu with hints about host resource overcommit. The default is
to assume that host overcommits all resources.
Locking qemu and guest memory can be enabled via ``mem-lock=on``
- (disabled by default). This works when host memory is not
- overcommitted and reduces the worst-case latency for guest.
+ or ``mem-lock=on-fault`` (disabled by default). This works when
+ host memory is not overcommitted and reduces the worst-case latency for
+ guest. The on-fault option is better for reducing the memory footprint
+ since it makes allocations lazy, but the pages still get locked in place
+ once faulted by the guest or QEMU. Note that the two options are mutually
+ exclusive.
Guest ability to manage power state of host cpus (increasing latency
for other processes on the same host cpu, but decreasing latency for
diff --git a/system/globals.c b/system/globals.c
index 4867c93..316623b 100644
--- a/system/globals.c
+++ b/system/globals.c
@@ -31,10 +31,20 @@
#include "system/cpus.h"
#include "system/system.h"
+bool should_mlock(MlockState state)
+{
+ return state == MLOCK_ON || state == MLOCK_ON_FAULT;
+}
+
+bool is_mlock_on_fault(MlockState state)
+{
+ return state == MLOCK_ON_FAULT;
+}
+
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int display_opengl;
const char* keyboard_layout;
-bool enable_mlock;
+MlockState mlock_state;
bool enable_cpu_pm;
int autostart = 1;
int vga_interface_type = VGA_NONE;
diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc
index 0e6f394..7f32d3d 100644
--- a/system/memory_ldst.c.inc
+++ b/system/memory_ldst.c.inc
@@ -34,7 +34,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 4 || !memory_access_is_direct(mr, false)) {
+ if (l < 4 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -103,7 +103,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 8 || !memory_access_is_direct(mr, false)) {
+ if (l < 8 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -170,7 +170,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (!memory_access_is_direct(mr, false)) {
+ if (!memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -207,7 +207,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 2 || !memory_access_is_direct(mr, false)) {
+ if (l < 2 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -277,7 +277,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
+ if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs);
@@ -314,7 +314,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
+ if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_32 | devend_memop(endian), attrs);
@@ -377,7 +377,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (!memory_access_is_direct(mr, true)) {
+ if (!memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs);
} else {
@@ -410,7 +410,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 2 || !memory_access_is_direct(mr, true)) {
+ if (l < 2 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_16 | devend_memop(endian), attrs);
@@ -474,7 +474,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 8 || !memory_access_is_direct(mr, true)) {
+ if (l < 8 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_64 | devend_memop(endian), attrs);
diff --git a/system/physmem.c b/system/physmem.c
index 67c9db9..67bdf63 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -573,7 +573,7 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
is_write, true, &as, attrs);
mr = section.mr;
- if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
+ if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) {
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
*plen = MIN(page, *plen);
}
@@ -2275,45 +2275,80 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+/* Simply remap the given VM memory location from start to start+length */
+static int qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
+{
+ int flags, prot;
+ void *area;
+ void *host_startaddr = block->host + start;
+
+ assert(block->fd < 0);
+ flags = MAP_FIXED | MAP_ANONYMOUS;
+ flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+ flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+ prot = PROT_READ;
+ prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+ area = mmap(host_startaddr, length, prot, flags, -1, 0);
+ return area != host_startaddr ? -errno : 0;
+}
+
+/*
+ * qemu_ram_remap - remap a single RAM page
+ *
+ * @addr: address in ram_addr_t address space.
+ *
+ * This function will try remapping a single page of guest RAM identified by
+ * @addr, essentially discarding memory to recover from previously poisoned
+ * memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
+ * does not have to point at the start of the page.
+ *
+ * This function is only to be used during system resets; it will kill the
+ * VM if remapping failed.
+ */
+void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
- ram_addr_t offset;
- int flags;
- void *area, *vaddr;
- int prot;
+ uint64_t offset;
+ void *vaddr;
+ size_t page_size;
RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
if (offset < block->max_length) {
+ /* Respect the pagesize of our RAMBlock */
+ page_size = qemu_ram_pagesize(block);
+ offset = QEMU_ALIGN_DOWN(offset, page_size);
+
vaddr = ramblock_ptr(block, offset);
if (block->flags & RAM_PREALLOC) {
;
} else if (xen_enabled()) {
abort();
} else {
- flags = MAP_FIXED;
- flags |= block->flags & RAM_SHARED ?
- MAP_SHARED : MAP_PRIVATE;
- flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
- prot = PROT_READ;
- prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
- if (block->fd >= 0) {
- area = mmap(vaddr, length, prot, flags, block->fd,
- offset + block->fd_offset);
- } else {
- flags |= MAP_ANONYMOUS;
- area = mmap(vaddr, length, prot, flags, -1, 0);
- }
- if (area != vaddr) {
- error_report("Could not remap addr: "
- RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
- length, addr);
- exit(1);
+ if (ram_block_discard_range(block, offset, page_size) != 0) {
+ /*
+ * Fall back to using mmap() only for anonymous mapping,
+ * as if a backing file is associated we may not be able
+ * to recover the memory in all cases.
+ * So don't take the risk of using only mmap and fail now.
+ */
+ if (block->fd >= 0) {
+ error_report("Could not remap RAM %s:%" PRIx64 "+%"
+ PRIx64 " +%zx", block->idstr, offset,
+ block->fd_offset, page_size);
+ exit(1);
+ }
+ if (qemu_ram_remap_mmap(block, offset, page_size) != 0) {
+ error_report("Could not remap RAM %s:%" PRIx64 " +%zx",
+ block->idstr, offset, page_size);
+ exit(1);
+ }
}
- memory_try_enable_merging(vaddr, length);
- qemu_ram_setup_dump(vaddr, length);
+ memory_try_enable_merging(vaddr, page_size);
+ qemu_ram_setup_dump(vaddr, page_size);
}
+
+ break;
}
}
}
@@ -2869,7 +2904,7 @@ static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
return MEMTX_ACCESS_ERROR;
}
- if (!memory_access_is_direct(mr, true)) {
+ if (!memory_access_is_direct(mr, true, attrs)) {
uint64_t val;
MemTxResult result;
bool release_lock = prepare_mmio_access(mr);
@@ -2965,7 +3000,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
return MEMTX_ACCESS_ERROR;
}
- if (!memory_access_is_direct(mr, false)) {
+ if (!memory_access_is_direct(mr, false, attrs)) {
/* I/O case */
uint64_t val;
MemTxResult result;
@@ -3137,8 +3172,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
l = len;
mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
- if (!(memory_region_is_ram(mr) ||
- memory_region_is_romd(mr))) {
+ if (!memory_region_supports_direct_access(mr)) {
l = memory_access_size(mr, l, addr1);
} else {
/* ROM/RAM case */
@@ -3275,7 +3309,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
while (len > 0) {
l = len;
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
- if (!memory_access_is_direct(mr, is_write)) {
+ if (!memory_access_is_direct(mr, is_write, attrs)) {
l = memory_access_size(mr, l, addr);
if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
return false;
@@ -3355,7 +3389,7 @@ void *address_space_map(AddressSpace *as,
fv = address_space_to_flatview(as);
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
- if (!memory_access_is_direct(mr, is_write)) {
+ if (!memory_access_is_direct(mr, is_write, attrs)) {
size_t used = qatomic_read(&as->bounce_buffer_size);
for (;;) {
hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
@@ -3488,7 +3522,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
mr = cache->mrs.mr;
memory_region_ref(mr);
- if (memory_access_is_direct(mr, is_write)) {
+ if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
/* We don't care about the memory attributes here as we're only
* doing this if we found actual RAM, which behaves the same
* regardless of attributes; so UNSPECIFIED is fine.
@@ -3681,13 +3715,8 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
if (l > len)
l = len;
phys_addr += (addr & ~TARGET_PAGE_MASK);
- if (is_write) {
- res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
- attrs, buf, l);
- } else {
- res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
- attrs, buf, l);
- }
+ res = address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
+ l, is_write);
if (res != MEMTX_OK) {
return -1;
}
@@ -3797,18 +3826,19 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
}
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- start, length);
+ start + rb->fd_offset, length);
if (ret) {
ret = -errno;
- error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
- __func__, rb->idstr, start, length, ret);
+ error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64
+ " +%zx (%d)", __func__, rb->idstr, start,
+ rb->fd_offset, length, ret);
goto err;
}
#else
ret = -ENOSYS;
error_report("%s: fallocate not available/file"
- "%s:%" PRIx64 " +%zx (%d)",
- __func__, rb->idstr, start, length, ret);
+ "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__,
+ rb->idstr, start, rb->fd_offset, length, ret);
goto err;
#endif
}
@@ -3855,6 +3885,7 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
int ret = -1;
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ /* ignore fd_offset with guest_memfd */
ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
start, length);
diff --git a/system/vl.c b/system/vl.c
index 2a570ed..8f77668 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -352,7 +352,7 @@ static QemuOptsList qemu_overcommit_opts = {
.desc = {
{
.name = "mem-lock",
- .type = QEMU_OPT_BOOL,
+ .type = QEMU_OPT_STRING,
},
{
.name = "cpu-pm",
@@ -797,8 +797,8 @@ static QemuOptsList qemu_run_with_opts = {
static void realtime_init(void)
{
- if (enable_mlock) {
- if (os_mlock() < 0) {
+ if (should_mlock(mlock_state)) {
+ if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
error_report("locking memory failed");
exit(1);
}
@@ -1876,6 +1876,44 @@ static void object_option_parse(const char *str)
visit_free(v);
}
+static void overcommit_parse(const char *str)
+{
+ QemuOpts *opts;
+ const char *mem_lock_opt;
+
+ opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
+ str, false);
+ if (!opts) {
+ exit(1);
+ }
+
+ enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
+
+ mem_lock_opt = qemu_opt_get(opts, "mem-lock");
+ if (!mem_lock_opt) {
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "on") == 0) {
+ mlock_state = MLOCK_ON;
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "off") == 0) {
+ mlock_state = MLOCK_OFF;
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "on-fault") == 0) {
+ mlock_state = MLOCK_ON_FAULT;
+ return;
+ }
+
+ error_report("parameter 'mem-lock' expects one of "
+ "'on', 'off', 'on-fault'");
+ exit(1);
+}
+
/*
* Very early object creation, before the sandbox options have been activated.
*/
@@ -3591,13 +3629,7 @@ void qemu_init(int argc, char **argv)
object_option_parse(optarg);
break;
case QEMU_OPTION_overcommit:
- opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
- optarg, false);
- if (!opts) {
- exit(1);
- }
- enable_mlock = qemu_opt_get_bool(opts, "mem-lock", enable_mlock);
- enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
+ overcommit_parse(optarg);
break;
case QEMU_OPTION_compat:
{