From 5c52a219bbd38724650e27e14741190d3004e26b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:04:54 +0200 Subject: softmmu/physmem: Distinguish between file access mode and mmap protection There is a difference between how we open a file and how we mmap it, and we want to support writable private mappings of readonly files. Let's define RAM_READONLY and RAM_READONLY_FD flags, to replace the single "readonly" parameter for file-related functions. In memory_region_init_ram_from_fd() and memory_region_init_ram_from_file(), initialize mr->readonly based on the new RAM_READONLY flag. While at it, add some RAM_* flags we missed to add to the list of accepted flags in the documentation of some functions. No change in functionality intended. We'll make use of both flags next and start setting them independently for memory-backend-file. Message-ID: <20230906120503.359863-3-david@redhat.com> Acked-by: Peter Xu Signed-off-by: David Hildenbrand --- softmmu/memory.c | 8 ++++---- softmmu/physmem.c | 21 ++++++++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'softmmu') diff --git a/softmmu/memory.c b/softmmu/memory.c index 7d9494c..2cb60ec 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -1620,18 +1620,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, uint32_t ram_flags, const char *path, ram_addr_t offset, - bool readonly, Error **errp) { Error *err = NULL; memory_region_init(mr, owner, name, size); mr->ram = true; - mr->readonly = readonly; + mr->readonly = !!(ram_flags & RAM_READONLY); mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->align = align; mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, - offset, readonly, &err); + offset, &err); if (err) { mr->size = int128_zero(); object_unparent(OBJECT(mr)); @@ -1651,10 +1650,11 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, Error *err = NULL; memory_region_init(mr, owner, name, size); mr->ram = true; + mr->readonly = !!(ram_flags & RAM_READONLY); mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, - false, &err); + &err); if (err) { mr->size = int128_zero(); object_unparent(OBJECT(mr)); diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 18277dd..7e03ed7 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1350,7 +1350,6 @@ static int file_ram_open(const char *path, static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, int fd, - bool readonly, bool truncate, off_t offset, Error **errp) @@ -1408,7 +1407,7 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0; + qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0; qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0; qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0; qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0; @@ -1876,7 +1875,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) #ifdef CONFIG_POSIX RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, int fd, off_t offset, - bool readonly, Error **errp) + Error **errp) { RAMBlock *new_block; Error *local_err = NULL; @@ -1884,7 +1883,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | - RAM_PROTECTED | RAM_NAMED_FILE)) == 0); + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | + RAM_READONLY_FD)) == 0); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1919,8 +1919,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; - new_block->host = file_ram_alloc(new_block, size, fd, readonly, - !file_size, offset, errp); + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); if (!new_block->host) { g_free(new_block); return NULL; @@ -1939,20 +1939,19 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, const char *mem_path, - off_t offset, bool readonly, Error **errp) + off_t offset, Error **errp) { int fd; bool created; RAMBlock *block; - fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created, - errp); + fd = file_ram_open(mem_path, memory_region_name(mr), + !!(ram_flags & RAM_READONLY_FD), &created, errp); if (fd < 0) { return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly, - errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp); if (!block) { if (created) { unlink(mem_path); -- cgit v1.1 From 9e6b9f379130c77a08c8ea67a09fc90cb30f8d09 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:04:56 +0200 Subject: softmmu/physmem: Remap with proper protection in qemu_ram_remap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's remap with the proper protection that we can derive from RAM_READONLY. Message-ID: <20230906120503.359863-5-david@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'softmmu') diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 7e03ed7..88482bd 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2069,6 +2069,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) ram_addr_t offset; int flags; void *area, *vaddr; + int prot; RAMBLOCK_FOREACH(block) { offset = addr - block->offset; @@ -2083,13 +2084,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE; flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; + prot = PROT_READ; + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; if (block->fd >= 0) { - area = mmap(vaddr, length, PROT_READ | PROT_WRITE, - flags, block->fd, offset + block->fd_offset); + area = mmap(vaddr, length, prot, flags, block->fd, + offset + block->fd_offset); } else { flags |= MAP_ANONYMOUS; - area = mmap(vaddr, length, PROT_READ | PROT_WRITE, - flags, -1, 0); + area = mmap(vaddr, length, prot, flags, -1, 0); } if (area != vaddr) { error_report("Could not remap addr: " -- cgit v1.1 From b2cccb52bd9bef2948a150d204b20119b6c3ad58 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:04:57 +0200 Subject: softmmu/physmem: Bail out early in ram_block_discard_range() with readonly files fallocate() will fail, let's print a nicer error message. Message-ID: <20230906120503.359863-6-david@redhat.com> Suggested-by: Peter Xu Reviewed-by: Peter Xu Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'softmmu') diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 88482bd..c520c2a 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3482,6 +3482,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) */ #ifdef CONFIG_FALLOCATE_PUNCH_HOLE /* + * fallocate() will fail with readonly files. Let's print a + * proper error message. + */ + if (rb->flags & RAM_READONLY_FD) { + error_report("ram_block_discard_range: Discarding RAM" + " with readonly files is not supported"); + goto err; + + } + /* * We'll discard data from the actual file, even though we only * have a MAP_PRIVATE mapping, possibly messing with other * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to -- cgit v1.1 From 4d6b23f7e2b7a34a6ab3b7c40693d8b1a0dee0b5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:04:58 +0200 Subject: softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true Currently, if a file does not exist yet, file_ram_open() will create new empty file and open it writable. However, it even does that when readonly=true was specified. Specifying O_RDONLY instead to create a new readonly file would theoretically work, however, ftruncate() will refuse to resize the new empty file and we'll get a warning: ftruncate: Invalid argument And later eventually more problems when actually mmap'ing that file and accessing it. If someone intends to let QEMU open+mmap a file read-only, better create+resize+fill that file ahead of time outside of QEMU context. We'll now fail with: ./qemu-system-x86_64 \ -object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory All use cases of readonly files (R/O NVDIMMs, VM templating) work on existing files, so silently creating new files might just hide user errors when accidentally specifying a non-existent file. Note that the only memory-backend-file will end up calling memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() -> file_ram_open(). Move error reporting to the single caller. Message-ID: <20230906120503.359863-7-david@redhat.com> Acked-by: Peter Xu Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'softmmu') diff --git a/softmmu/physmem.c b/softmmu/physmem.c index c520c2a..138402b 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1288,8 +1288,7 @@ static int64_t get_file_align(int fd) static int file_ram_open(const char *path, const char *region_name, bool readonly, - bool *created, - Error **errp) + bool *created) { char *filename; char *sanitized_name; @@ -1304,6 +1303,10 @@ static int file_ram_open(const char *path, break; } if (errno == ENOENT) { + if (readonly) { + /* Refuse to create new, readonly files. */ + return -ENOENT; + } /* @path names a file that doesn't exist, create it */ fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); if (fd >= 0) { @@ -1333,10 +1336,7 @@ static int file_ram_open(const char *path, g_free(filename); } if (errno != EEXIST && errno != EINTR) { - error_setg_errno(errp, errno, - "can't open backing store %s for guest RAM", - path); - return -1; + return -errno; } /* * Try again on EINTR and EEXIST. The latter happens when @@ -1946,8 +1946,10 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, RAMBlock *block; fd = file_ram_open(mem_path, memory_region_name(mr), - !!(ram_flags & RAM_READONLY_FD), &created, errp); + !!(ram_flags & RAM_READONLY_FD), &created); if (fd < 0) { + error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM", + mem_path); return NULL; } -- cgit v1.1 From ca01f1b89b0884e39a58d8bc1d3fbd7ca91272a1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:04:59 +0200 Subject: softmmu/physmem: Never return directories from file_ram_open() open() does not fail on directories when opening them readonly (O_RDONLY). Currently, we succeed opening such directories and fail later during mmap(), resulting in a misleading error message. $ ./qemu-system-x86_64 \ -object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g qemu-system-x86_64: unable to map backing store for guest RAM: No such device To identify directories and handle them accordingly in file_ram_open() also when readonly=true was specified, detect if we just opened a directory using fstat() instead. Then, fail file_ram_open() right away, similarly to how we now fail if the file does not exist and we want to open the file readonly. With this change, we get a nicer error message: qemu-system-x86_64: can't open backing store tmp for guest RAM: Is a directory Note that the only memory-backend-file will end up calling memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() -> file_ram_open(). Message-ID: <20230906120503.359863-8-david@redhat.com> Reported-by: Thiner Logoer Reviewed-by: Peter Xu Tested-by: Mario Casquero Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'softmmu') diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 138402b..f1cd3ec 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1299,6 +1299,25 @@ static int file_ram_open(const char *path, for (;;) { fd = open(path, readonly ? O_RDONLY : O_RDWR); if (fd >= 0) { + /* + * open(O_RDONLY) won't fail with EISDIR. Check manually if we + * opened a directory and fail similarly to how we fail ENOENT + * in readonly mode. Note that mkstemp() would imply O_RDWR. + */ + if (readonly) { + struct stat file_stat; + + if (fstat(fd, &file_stat)) { + close(fd); + if (errno == EINTR) { + continue; + } + return -errno; + } else if (S_ISDIR(file_stat.st_mode)) { + close(fd); + return -EISDIR; + } + } /* @path names an existing file, use it */ break; } -- cgit v1.1 From 6da4b1c25d89cc8e8d1be40b96602d08a3e88e0c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Sep 2023 14:05:02 +0200 Subject: softmmu/physmem: Hint that "readonly=on,rom=off" exists when opening file R/W for private mapping fails It's easy to miss that memory-backend-file with "share=off" (default) will always try opening the file R/W as default, and fail if we don't have write permissions to the file. In that case, the user has to explicit specify "readonly=on,rom=off" to get usable RAM, for example, for VM templating. Let's hint that '-object memory-backend-file,readonly=on,rom=off,...' exists to consume R/O files in a private mapping to create writable RAM, but only if we have permissions to open the file read-only. Message-ID: <20230906120503.359863-11-david@redhat.com> Suggested-by: ThinerLogoer Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'softmmu') diff --git a/softmmu/physmem.c b/softmmu/physmem.c index f1cd3ec..4f6ca65 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1969,6 +1969,25 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, if (fd < 0) { error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM", mem_path); + if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) && + fd == -EACCES) { + /* + * If we can open the file R/O (note: will never create a new file) + * and we are dealing with a private mapping, there are still ways + * to consume such files and get RAM instead of ROM. + */ + fd = file_ram_open(mem_path, memory_region_name(mr), true, + &created); + if (fd < 0) { + return NULL; + } + assert(!created); + close(fd); + error_append_hint(errp, "Consider opening the backing store" + " read-only but still creating writable RAM using" + " '-object memory-backend-file,readonly=on,rom=off...'" + " (see \"VM templating\" documentation)\n"); + } return NULL; } -- cgit v1.1 From 544cff46c018036cd66e98ffb224dd9f098065c8 Mon Sep 17 00:00:00 2001 From: hongmianquan Date: Wed, 30 Aug 2023 11:29:06 +0800 Subject: memory: avoid updating ioeventfds for some address_space When updating ioeventfds, we need to iterate all address spaces, but some address spaces do not register eventfd_add|del call when memory_listener_register() and they do nothing when updating ioeventfds. So we can skip these AS in address_space_update_ioeventfds(). The overhead of memory_region_transaction_commit() can be significantly reduced. For example, a VM with 8 vhost net devices and each one has 64 vectors, can reduce the time spent on memory_region_transaction_commit by 20%. Message-ID: <20230830032906.12488-1-hongmianquan@bytedance.com> Reviewed-by: Peter Xu Signed-off-by: hongmianquan Signed-off-by: David Hildenbrand --- softmmu/memory.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'softmmu') diff --git a/softmmu/memory.c b/softmmu/memory.c index 2cb60ec..c0383a1 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -842,6 +842,10 @@ static void address_space_update_ioeventfds(AddressSpace *as) AddrRange tmp; unsigned i; + if (!as->ioeventfd_notifiers) { + return; + } + /* * It is likely that the number of ioeventfds hasn't changed much, so use * the previous size as the starting value, with some headroom to avoid @@ -3075,6 +3079,10 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as) } listener_add_address_space(listener, as); + + if (listener->eventfd_add || listener->eventfd_del) { + as->ioeventfd_notifiers++; + } } void memory_listener_unregister(MemoryListener *listener) @@ -3083,6 +3091,10 @@ void memory_listener_unregister(MemoryListener *listener) return; } + if (listener->eventfd_add || listener->eventfd_del) { + listener->address_space->ioeventfd_notifiers--; + } + listener_del_address_space(listener, listener->address_space); QTAILQ_REMOVE(&memory_listeners, listener, link); QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as); -- cgit v1.1