aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/nvdimm.txt22
-rw-r--r--qemu-options.hx5
-rw-r--r--util/mmap-alloc.c41
3 files changed, 64 insertions, 4 deletions
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 7231c2d..b531cac 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -144,9 +144,25 @@ Guest Data Persistence
----------------------
Though QEMU supports multiple types of vNVDIMM backends on Linux,
-currently the only one that can guarantee the guest write persistence
-is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to
-which all guest access do not involve any host-side kernel cache.
+the only backend that can guarantee the guest write persistence is:
+
+A. DAX device (e.g., /dev/dax0.0, ) or
+B. DAX file(mounted with dax option)
+
+When using B (A file supporting direct mapping of persistent memory)
+as a backend, write persistence is guaranteed if the host kernel has
+support for the MAP_SYNC flag in the mmap system call (available
+since Linux 4.15 and on certain distro kernels) and additionally
+both 'pmem' and 'share' flags are set to 'on' on the backend.
+
+If these conditions are not satisfied i.e. if either 'pmem' or 'share'
+are not set, if the backend file does not support DAX or if MAP_SYNC
+is not supported by the host kernel, write persistence is not
+guaranteed after a system crash. For compatibility reasons, these
+conditions are ignored if not satisfied. Currently, no way is
+provided to test for them.
+For more details, please reference mmap(2) man page:
+http://man7.org/linux/man-pages/man2/mmap.2.html.
When using other types of backends, it's suggested to set 'unarmed'
option of '-device nvdimm' to 'on', which sets the unarmed flag of the
diff --git a/qemu-options.hx b/qemu-options.hx
index 08749a3..bdc74c0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4233,6 +4233,11 @@ using the SNIA NVM programming model (e.g. Intel NVDIMM).
If @option{pmem} is set to 'on', QEMU will take necessary operations to
guarantee the persistence of its own writes to @option{mem-path}
(e.g. in vNVDIMM label emulation and live migration).
+Also, we will map the backend-file with MAP_SYNC flag, which ensures the
+file metadata is in sync for @option{mem-path} in case of host crash
+or a power failure. MAP_SYNC requires support from both the host kernel
+(since Linux kernel 4.15) and the filesystem of @option{mem-path} mounted
+with DAX option.
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 9713f4b..f7f177d 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -10,6 +10,13 @@
* later. See the COPYING file in the top-level directory.
*/
+#ifdef CONFIG_LINUX
+#include <linux/mman.h>
+#else /* !CONFIG_LINUX */
+#define MAP_SYNC 0x0
+#define MAP_SHARED_VALIDATE 0x0
+#endif /* CONFIG_LINUX */
+
#include "qemu/osdep.h"
#include "qemu/mmap-alloc.h"
#include "qemu/host-utils.h"
@@ -82,6 +89,7 @@ void *qemu_ram_mmap(int fd,
bool is_pmem)
{
int flags;
+ int map_sync_flags = 0;
int guardfd;
size_t offset;
size_t pagesize;
@@ -132,9 +140,40 @@ void *qemu_ram_mmap(int fd,
flags = MAP_FIXED;
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
flags |= shared ? MAP_SHARED : MAP_PRIVATE;
+ if (shared && is_pmem) {
+ map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
+ }
+
offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
- ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0);
+ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+ flags | map_sync_flags, fd, 0);
+
+ if (ptr == MAP_FAILED && map_sync_flags) {
+ if (errno == ENOTSUP) {
+ char *proc_link, *file_name;
+ int len;
+ proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+ file_name = g_malloc0(PATH_MAX);
+ len = readlink(proc_link, file_name, PATH_MAX - 1);
+ if (len < 0) {
+ len = 0;
+ }
+ file_name[len] = '\0';
+ fprintf(stderr, "Warning: requesting persistence across crashes "
+ "for backend file %s failed. Proceeding without "
+ "persistence, data might become corrupted in case of host "
+ "crash.\n", file_name);
+ g_free(proc_link);
+ g_free(file_name);
+ }
+ /*
+ * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
+ * we will remove these flags to handle compatibility.
+ */
+ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+ flags, fd, 0);
+ }
if (ptr == MAP_FAILED) {
munmap(guardptr, total);