aboutsummaryrefslogtreecommitdiff
path: root/util/userfaultfd.c
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2023-02-07 15:57:11 -0500
committerJuan Quintela <quintela@redhat.com>2023-02-11 16:51:09 +0100
commitc40c0463413b941c13fe5f99a90c02d7d6584828 (patch)
treec34bd6ea8ec1f18b2313db12c20fc5d120c564fa /util/userfaultfd.c
parent93e0932b7be2498024cd6ba8446a0fa2cb1769bc (diff)
downloadqemu-c40c0463413b941c13fe5f99a90c02d7d6584828.zip
qemu-c40c0463413b941c13fe5f99a90c02d7d6584828.tar.gz
qemu-c40c0463413b941c13fe5f99a90c02d7d6584828.tar.bz2
util/userfaultfd: Support /dev/userfaultfd
Teach QEMU to use /dev/userfaultfd when it existed and fallback to the system call if either it's not there or doesn't have enough permission. Firstly, as long as the app has permission to access /dev/userfaultfd, it always have the ability to trap kernel faults which QEMU mostly wants. Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be forbidden, so it can be the major way to use postcopy in a restricted environment with strict seccomp setup. Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'util/userfaultfd.c')
-rw-r--r--util/userfaultfd.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
index 4953b31..fdff486 100644
--- a/util/userfaultfd.c
+++ b/util/userfaultfd.c
@@ -18,10 +18,42 @@
#include <poll.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
+#include <fcntl.h>
+
+typedef enum {
+ UFFD_UNINITIALIZED = 0,
+ UFFD_USE_DEV_PATH,
+ UFFD_USE_SYSCALL,
+} uffd_open_mode;
int uffd_open(int flags)
{
#if defined(__NR_userfaultfd)
+ static uffd_open_mode open_mode;
+ static int uffd_dev;
+
+ /* Detect how to generate uffd desc when run the 1st time */
+ if (open_mode == UFFD_UNINITIALIZED) {
+ /*
+ * Make /dev/userfaultfd the default approach because it has better
+ * permission controls, meanwhile allows kernel faults without any
+ * privilege requirement (e.g. SYS_CAP_PTRACE).
+ */
+ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (uffd_dev >= 0) {
+ open_mode = UFFD_USE_DEV_PATH;
+ } else {
+ /* Fallback to the system call */
+ open_mode = UFFD_USE_SYSCALL;
+ }
+ trace_uffd_detect_open_mode(open_mode);
+ }
+
+ if (open_mode == UFFD_USE_DEV_PATH) {
+ assert(uffd_dev >= 0);
+ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
+ }
+
return syscall(__NR_userfaultfd, flags);
#else
return -EINVAL;