aboutsummaryrefslogtreecommitdiff
path: root/tools/virtiofsd
diff options
context:
space:
mode:
Diffstat (limited to 'tools/virtiofsd')
-rw-r--r--tools/virtiofsd/fuse_common.h9
-rw-r--r--tools/virtiofsd/fuse_i.h7
-rw-r--r--tools/virtiofsd/fuse_lowlevel.c180
-rw-r--r--tools/virtiofsd/fuse_lowlevel.h13
-rw-r--r--tools/virtiofsd/helper.c1
-rw-r--r--tools/virtiofsd/passthrough_ll.c467
-rw-r--r--tools/virtiofsd/passthrough_seccomp.c1
7 files changed, 622 insertions, 56 deletions
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index 0c2665b..bf46954 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -378,6 +378,11 @@ struct fuse_file_info {
#define FUSE_CAP_SETXATTR_EXT (1 << 29)
/**
+ * Indicates that file server supports creating file security context
+ */
+#define FUSE_CAP_SECURITY_CTX (1ULL << 32)
+
+/**
* Ioctl flags
*
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
@@ -439,7 +444,7 @@ struct fuse_conn_info {
/**
* Capability flags that the kernel supports (read-only)
*/
- unsigned capable;
+ uint64_t capable;
/**
* Capability flags that the filesystem wants to enable.
@@ -447,7 +452,7 @@ struct fuse_conn_info {
* libfuse attempts to initialize this field with
* reasonable default values before calling the init() handler.
*/
- unsigned want;
+ uint64_t want;
/**
* Maximum number of pending "background" requests. A
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 492e002..a5572fa 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -15,6 +15,12 @@
struct fv_VuDev;
struct fv_QueueInfo;
+struct fuse_security_context {
+ const char *name;
+ uint32_t ctxlen;
+ const void *ctx;
+};
+
struct fuse_req {
struct fuse_session *se;
uint64_t unique;
@@ -35,6 +41,7 @@ struct fuse_req {
} u;
struct fuse_req *next;
struct fuse_req *prev;
+ struct fuse_security_context secctx;
};
struct fuse_notify_req {
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index e4679c7..7529287 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -886,11 +886,63 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid,
}
}
+static int parse_secctx_fill_req(fuse_req_t req, struct fuse_mbuf_iter *iter)
+{
+ struct fuse_secctx_header *fsecctx_header;
+ struct fuse_secctx *fsecctx;
+ const void *secctx;
+ const char *name;
+
+ fsecctx_header = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx_header));
+ if (!fsecctx_header) {
+ return -EINVAL;
+ }
+
+ /*
+ * As of now maximum of one security context is supported. It can
+ * change in future though.
+ */
+ if (fsecctx_header->nr_secctx > 1) {
+ return -EINVAL;
+ }
+
+ /* No security context sent. Maybe no LSM supports it */
+ if (!fsecctx_header->nr_secctx) {
+ return 0;
+ }
+
+ fsecctx = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx));
+ if (!fsecctx) {
+ return -EINVAL;
+ }
+
+ /* struct fsecctx with zero sized context is not expected */
+ if (!fsecctx->size) {
+ return -EINVAL;
+ }
+ name = fuse_mbuf_iter_advance_str(iter);
+ if (!name) {
+ return -EINVAL;
+ }
+
+ secctx = fuse_mbuf_iter_advance(iter, fsecctx->size);
+ if (!secctx) {
+ return -EINVAL;
+ }
+
+ req->secctx.name = name;
+ req->secctx.ctx = secctx;
+ req->secctx.ctxlen = fsecctx->size;
+ return 0;
+}
+
static void do_mknod(fuse_req_t req, fuse_ino_t nodeid,
struct fuse_mbuf_iter *iter)
{
struct fuse_mknod_in *arg;
const char *name;
+ bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
+ int err;
arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
name = fuse_mbuf_iter_advance_str(iter);
@@ -901,6 +953,14 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid,
req->ctx.umask = arg->umask;
+ if (secctx_enabled) {
+ err = parse_secctx_fill_req(req, iter);
+ if (err) {
+ fuse_reply_err(req, -err);
+ return;
+ }
+ }
+
if (req->se->op.mknod) {
req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev);
} else {
@@ -913,6 +973,8 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid,
{
struct fuse_mkdir_in *arg;
const char *name;
+ bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
+ int err;
arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
name = fuse_mbuf_iter_advance_str(iter);
@@ -923,6 +985,14 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid,
req->ctx.umask = arg->umask;
+ if (secctx_enabled) {
+ err = parse_secctx_fill_req(req, iter);
+ if (err) {
+ fuse_reply_err(req, err);
+ return;
+ }
+ }
+
if (req->se->op.mkdir) {
req->se->op.mkdir(req, nodeid, name, arg->mode);
} else {
@@ -969,12 +1039,22 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid,
{
const char *name = fuse_mbuf_iter_advance_str(iter);
const char *linkname = fuse_mbuf_iter_advance_str(iter);
+ bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
+ int err;
if (!name || !linkname) {
fuse_reply_err(req, EINVAL);
return;
}
+ if (secctx_enabled) {
+ err = parse_secctx_fill_req(req, iter);
+ if (err) {
+ fuse_reply_err(req, err);
+ return;
+ }
+ }
+
if (req->se->op.symlink) {
req->se->op.symlink(req, linkname, nodeid, name);
} else {
@@ -1048,6 +1128,8 @@ static void do_link(fuse_req_t req, fuse_ino_t nodeid,
static void do_create(fuse_req_t req, fuse_ino_t nodeid,
struct fuse_mbuf_iter *iter)
{
+ bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
+
if (req->se->op.create) {
struct fuse_create_in *arg;
struct fuse_file_info fi;
@@ -1060,6 +1142,15 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid,
return;
}
+ if (secctx_enabled) {
+ int err;
+ err = parse_secctx_fill_req(req, iter);
+ if (err) {
+ fuse_reply_err(req, err);
+ return;
+ }
+ }
+
memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags;
fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
@@ -1876,15 +1967,30 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
}
}
+static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
+ struct fuse_mbuf_iter *iter)
+{
+ if (req->se->op.syncfs) {
+ req->se->op.syncfs(req, nodeid);
+ } else {
+ fuse_reply_err(req, ENOSYS);
+ }
+}
+
static void do_init(fuse_req_t req, fuse_ino_t nodeid,
struct fuse_mbuf_iter *iter)
{
size_t compat_size = offsetof(struct fuse_init_in, max_readahead);
+ size_t compat2_size = offsetof(struct fuse_init_in, flags) +
+ sizeof(uint32_t);
+ /* Fuse structure extended with minor version 36 */
+ size_t compat3_size = endof(struct fuse_init_in, unused);
struct fuse_init_in *arg;
struct fuse_init_out outarg;
struct fuse_session *se = req->se;
size_t bufsize = se->bufsize;
size_t outargsize = sizeof(outarg);
+ uint64_t flags = 0;
(void)nodeid;
@@ -1897,15 +2003,29 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
/* ...and now consume the new fields. */
if (arg->major == 7 && arg->minor >= 6) {
- if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) {
+ if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) {
+ fuse_reply_err(req, EINVAL);
+ return;
+ }
+ flags |= arg->flags;
+ }
+
+ /*
+ * fuse_init_in was extended again with minor version 36. Just read
+ * current known size of fuse_init so that future extension and
+ * header rebase does not cause breakage.
+ */
+ if (sizeof(*arg) > compat2_size && (arg->flags & FUSE_INIT_EXT)) {
+ if (!fuse_mbuf_iter_advance(iter, compat3_size - compat2_size)) {
fuse_reply_err(req, EINVAL);
return;
}
+ flags |= (uint64_t) arg->flags2 << 32;
}
fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
if (arg->major == 7 && arg->minor >= 6) {
- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags);
+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%016llx\n", flags);
fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead);
}
se->conn.proto_major = arg->major;
@@ -1933,70 +2053,73 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (arg->max_readahead < se->conn.max_readahead) {
se->conn.max_readahead = arg->max_readahead;
}
- if (arg->flags & FUSE_ASYNC_READ) {
+ if (flags & FUSE_ASYNC_READ) {
se->conn.capable |= FUSE_CAP_ASYNC_READ;
}
- if (arg->flags & FUSE_POSIX_LOCKS) {
+ if (flags & FUSE_POSIX_LOCKS) {
se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
}
- if (arg->flags & FUSE_ATOMIC_O_TRUNC) {
+ if (flags & FUSE_ATOMIC_O_TRUNC) {
se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
}
- if (arg->flags & FUSE_EXPORT_SUPPORT) {
+ if (flags & FUSE_EXPORT_SUPPORT) {
se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
}
- if (arg->flags & FUSE_DONT_MASK) {
+ if (flags & FUSE_DONT_MASK) {
se->conn.capable |= FUSE_CAP_DONT_MASK;
}
- if (arg->flags & FUSE_FLOCK_LOCKS) {
+ if (flags & FUSE_FLOCK_LOCKS) {
se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
}
- if (arg->flags & FUSE_AUTO_INVAL_DATA) {
+ if (flags & FUSE_AUTO_INVAL_DATA) {
se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
}
- if (arg->flags & FUSE_DO_READDIRPLUS) {
+ if (flags & FUSE_DO_READDIRPLUS) {
se->conn.capable |= FUSE_CAP_READDIRPLUS;
}
- if (arg->flags & FUSE_READDIRPLUS_AUTO) {
+ if (flags & FUSE_READDIRPLUS_AUTO) {
se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
}
- if (arg->flags & FUSE_ASYNC_DIO) {
+ if (flags & FUSE_ASYNC_DIO) {
se->conn.capable |= FUSE_CAP_ASYNC_DIO;
}
- if (arg->flags & FUSE_WRITEBACK_CACHE) {
+ if (flags & FUSE_WRITEBACK_CACHE) {
se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
}
- if (arg->flags & FUSE_NO_OPEN_SUPPORT) {
+ if (flags & FUSE_NO_OPEN_SUPPORT) {
se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
}
- if (arg->flags & FUSE_PARALLEL_DIROPS) {
+ if (flags & FUSE_PARALLEL_DIROPS) {
se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
}
- if (arg->flags & FUSE_POSIX_ACL) {
+ if (flags & FUSE_POSIX_ACL) {
se->conn.capable |= FUSE_CAP_POSIX_ACL;
}
- if (arg->flags & FUSE_HANDLE_KILLPRIV) {
+ if (flags & FUSE_HANDLE_KILLPRIV) {
se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
}
- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) {
+ if (flags & FUSE_NO_OPENDIR_SUPPORT) {
se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
}
- if (!(arg->flags & FUSE_MAX_PAGES)) {
+ if (!(flags & FUSE_MAX_PAGES)) {
size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
FUSE_BUFFER_HEADER_SIZE;
if (bufsize > max_bufsize) {
bufsize = max_bufsize;
}
}
- if (arg->flags & FUSE_SUBMOUNTS) {
+ if (flags & FUSE_SUBMOUNTS) {
se->conn.capable |= FUSE_CAP_SUBMOUNTS;
}
- if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
+ if (flags & FUSE_HANDLE_KILLPRIV_V2) {
se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
}
- if (arg->flags & FUSE_SETXATTR_EXT) {
+ if (flags & FUSE_SETXATTR_EXT) {
se->conn.capable |= FUSE_CAP_SETXATTR_EXT;
}
+ if (flags & FUSE_SECURITY_CTX) {
+ se->conn.capable |= FUSE_CAP_SECURITY_CTX;
+ }
#ifdef HAVE_SPLICE
#ifdef HAVE_VMSPLICE
se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
@@ -2051,7 +2174,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (se->conn.want & (~se->conn.capable)) {
fuse_log(FUSE_LOG_ERR,
"fuse: error: filesystem requested capabilities "
- "0x%x that are not supported by kernel, aborting.\n",
+ "0x%llx that are not supported by kernel, aborting.\n",
se->conn.want & (~se->conn.capable));
fuse_reply_err(req, EPROTO);
se->error = -EPROTO;
@@ -2062,7 +2185,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) {
se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
}
- if (arg->flags & FUSE_MAX_PAGES) {
+ if (flags & FUSE_MAX_PAGES) {
outarg.flags |= FUSE_MAX_PAGES;
outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1;
}
@@ -2136,8 +2259,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
outarg.flags |= FUSE_SETXATTR_EXT;
}
+ if (se->conn.want & FUSE_CAP_SECURITY_CTX) {
+ /* bits 32..63 get shifted down 32 bits into the flags2 field */
+ outarg.flags2 |= FUSE_SECURITY_CTX >> 32;
+ }
+
fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor);
- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
+ fuse_log(FUSE_LOG_DEBUG, " flags2=0x%08x flags=0x%08x\n", outarg.flags2,
+ outarg.flags);
fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead);
fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write);
fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background);
@@ -2280,6 +2409,7 @@ static struct {
[FUSE_RENAME2] = { do_rename2, "RENAME2" },
[FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
[FUSE_LSEEK] = { do_lseek, "LSEEK" },
+ [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
};
#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index c55c0ca..b889dae 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1226,6 +1226,19 @@ struct fuse_lowlevel_ops {
*/
void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
struct fuse_file_info *fi);
+
+ /**
+ * Synchronize file system content
+ *
+ * If this request is answered with an error code of ENOSYS,
+ * this is treated as success and future calls to syncfs() will
+ * succeed automatically without being sent to the filesystem
+ * process.
+ *
+ * @param req request handle
+ * @param ino the inode number
+ */
+ void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
};
/**
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index a8295d9..e226fc5 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -187,6 +187,7 @@ void fuse_cmdline_help(void)
" default: no_allow_direct_io\n"
" -o announce_submounts Announce sub-mount points to the guest\n"
" -o posix_acl/no_posix_acl Enable/Disable posix_acl. (default: disabled)\n"
+ " -o security_label/no_security_label Enable/Disable security label. (default: disabled)\n"
);
}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b3d0674..dfa2fc2 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -173,10 +173,15 @@ struct lo_data {
/* An O_PATH file descriptor to /proc/self/fd/ */
int proc_self_fd;
+ /* An O_PATH file descriptor to /proc/self/task/ */
+ int proc_self_task;
int user_killpriv_v2, killpriv_v2;
/* If set, virtiofsd is responsible for setting umask during creation */
bool change_umask;
int user_posix_acl, posix_acl;
+ /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */
+ bool use_fscreate;
+ int user_security_label;
};
static const struct fuse_opt lo_opts[] = {
@@ -211,6 +216,8 @@ static const struct fuse_opt lo_opts[] = {
{ "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
{ "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 },
{ "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 },
+ { "security_label", offsetof(struct lo_data, user_security_label), 1 },
+ { "no_security_label", offsetof(struct lo_data, user_security_label), 0 },
FUSE_OPT_END
};
static bool use_syslog = false;
@@ -230,6 +237,11 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
static int xattr_map_client(const struct lo_data *lo, const char *client_name,
char **out_name);
+#define FCHDIR_NOFAIL(fd) do { \
+ int fchdir_res = fchdir(fd); \
+ assert(fchdir_res == 0); \
+ } while (0)
+
static bool is_dot_or_dotdot(const char *name)
{
return name[0] == '.' &&
@@ -257,6 +269,70 @@ static struct lo_data *lo_data(fuse_req_t req)
}
/*
+ * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With
+ * selinux=0, read from fscreate returns -EINVAL.
+ *
+ * TODO: Link with libselinux and use is_selinux_enabled() instead down
+ * the line. It probably will be more reliable indicator.
+ */
+static bool is_fscreate_usable(struct lo_data *lo)
+{
+ char procname[64];
+ int fscreate_fd;
+ size_t bytes_read;
+
+ sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
+ fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR);
+ if (fscreate_fd == -1) {
+ return false;
+ }
+
+ bytes_read = read(fscreate_fd, procname, 64);
+ close(fscreate_fd);
+ if (bytes_read == -1) {
+ return false;
+ }
+ return true;
+}
+
+/* Helpers to set/reset fscreate */
+static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx,
+ size_t ctxlen, int *fd)
+{
+ char procname[64];
+ int fscreate_fd, err = 0;
+ size_t written;
+
+ sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
+ fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY);
+ err = fscreate_fd == -1 ? errno : 0;
+ if (err) {
+ return err;
+ }
+
+ written = write(fscreate_fd, ctx, ctxlen);
+ err = written == -1 ? errno : 0;
+ if (err) {
+ goto out;
+ }
+
+ *fd = fscreate_fd;
+ return 0;
+out:
+ close(fscreate_fd);
+ return err;
+}
+
+static void close_reset_proc_fscreate(int fd)
+{
+ if ((write(fd, NULL, 0)) == -1) {
+ fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno);
+ }
+ close(fd);
+ return;
+}
+
+/*
* Load capng's state from our saved state if the current thread
* hadn't previously been loaded.
* returns 0 on success
@@ -735,6 +811,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n");
conn->want &= ~FUSE_CAP_POSIX_ACL;
}
+
+ if (lo->user_security_label == 1) {
+ if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) {
+ fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label."
+ " kernel does not support FUSE_SECURITY_CTX capability.\n");
+ }
+ conn->want |= FUSE_CAP_SECURITY_CTX;
+ } else {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n");
+ conn->want &= ~FUSE_CAP_SECURITY_CTX;
+ }
}
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
@@ -1284,16 +1371,103 @@ static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask,
}
}
+static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir,
+ const char *name, const char *secctx_name)
+{
+ int path_fd, err;
+ char procname[64];
+ struct lo_data *lo = lo_data(req);
+
+ if (!req->secctx.ctxlen) {
+ return 0;
+ }
+
+ /* Open newly created element with O_PATH */
+ path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
+ err = path_fd == -1 ? errno : 0;
+ if (err) {
+ return err;
+ }
+ sprintf(procname, "%i", path_fd);
+ FCHDIR_NOFAIL(lo->proc_self_fd);
+ /* Set security context. This is not atomic w.r.t file creation */
+ err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen,
+ 0);
+ if (err) {
+ err = errno;
+ }
+ FCHDIR_NOFAIL(lo->root.fd);
+ close(path_fd);
+ return err;
+}
+
+static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir,
+ const char *name, mode_t mode, dev_t rdev,
+ const char *link)
+{
+ int err, fscreate_fd = -1;
+ const char *secctx_name = req->secctx.name;
+ struct lo_cred old = {};
+ struct lo_data *lo = lo_data(req);
+ char *mapped_name = NULL;
+ bool secctx_enabled = req->secctx.ctxlen;
+ bool do_fscreate = false;
+
+ if (secctx_enabled && lo->xattrmap) {
+ err = xattr_map_client(lo, req->secctx.name, &mapped_name);
+ if (err < 0) {
+ return -err;
+ }
+ secctx_name = mapped_name;
+ }
+
+ /*
+ * If security xattr has not been remapped and selinux is enabled on
+ * host, set fscreate and no need to do a setxattr() after file creation
+ */
+ if (secctx_enabled && !mapped_name && lo->use_fscreate) {
+ do_fscreate = true;
+ err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
+ &fscreate_fd);
+ if (err) {
+ goto out;
+ }
+ }
+
+ err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode));
+ if (err) {
+ goto out;
+ }
+
+ err = mknod_wrapper(dir->fd, name, link, mode, rdev);
+ err = err == -1 ? errno : 0;
+ lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode));
+ if (err) {
+ goto out;
+ }
+
+ if (!do_fscreate) {
+ err = do_mknod_symlink_secctx(req, dir, name, secctx_name);
+ if (err) {
+ unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0);
+ }
+ }
+out:
+ if (fscreate_fd != -1) {
+ close_reset_proc_fscreate(fscreate_fd);
+ }
+ g_free(mapped_name);
+ return err;
+}
+
static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
const char *name, mode_t mode, dev_t rdev,
const char *link)
{
- int res;
int saverr;
struct lo_data *lo = lo_data(req);
struct lo_inode *dir;
struct fuse_entry_param e;
- struct lo_cred old = {};
if (is_empty(name)) {
fuse_reply_err(req, ENOENT);
@@ -1311,21 +1485,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
return;
}
- saverr = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode));
+ saverr = do_mknod_symlink(req, dir, name, mode, rdev, link);
if (saverr) {
goto out;
}
- res = mknod_wrapper(dir->fd, name, link, mode, rdev);
-
- saverr = errno;
-
- lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode));
-
- if (res == -1) {
- goto out;
- }
-
saverr = lo_do_lookup(req, parent, name, &e, NULL);
if (saverr) {
goto out;
@@ -2001,6 +2165,190 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
return 0;
}
+static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi, int *open_fd,
+ bool tmpfile)
+{
+ int err, fd;
+ struct lo_cred old = {};
+ struct lo_data *lo = lo_data(req);
+ int flags;
+
+ if (tmpfile) {
+ flags = fi->flags | O_TMPFILE;
+ /*
+ * Don't use O_EXCL as we want to link file later. Also reset O_CREAT
+ * otherwise openat() returns -EINVAL.
+ */
+ flags &= ~(O_CREAT | O_EXCL);
+
+ /* O_TMPFILE needs either O_RDWR or O_WRONLY */
+ if ((flags & O_ACCMODE) == O_RDONLY) {
+ flags |= O_RDWR;
+ }
+ } else {
+ flags = fi->flags | O_CREAT | O_EXCL;
+ }
+
+ err = lo_change_cred(req, &old, lo->change_umask);
+ if (err) {
+ return err;
+ }
+
+ /* Try to create a new file but don't open existing files */
+ fd = openat(parent_inode->fd, name, flags, mode);
+ err = fd == -1 ? errno : 0;
+ lo_restore_cred(&old, lo->change_umask);
+ if (!err) {
+ *open_fd = fd;
+ }
+ return err;
+}
+
+static int do_create_secctx_fscreate(fuse_req_t req,
+ struct lo_inode *parent_inode,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi, int *open_fd)
+{
+ int err = 0, fd = -1, fscreate_fd = -1;
+ struct lo_data *lo = lo_data(req);
+
+ err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
+ &fscreate_fd);
+ if (err) {
+ return err;
+ }
+
+ err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
+
+ close_reset_proc_fscreate(fscreate_fd);
+ if (!err) {
+ *open_fd = fd;
+ }
+ return err;
+}
+
+static int do_create_secctx_tmpfile(fuse_req_t req,
+ struct lo_inode *parent_inode,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi,
+ const char *secctx_name, int *open_fd)
+{
+ int err, fd = -1;
+ struct lo_data *lo = lo_data(req);
+ char procname[64];
+
+ err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true);
+ if (err) {
+ return err;
+ }
+
+ err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
+ if (err) {
+ err = errno;
+ goto out;
+ }
+
+ /* Security context set on file. Link it in place */
+ sprintf(procname, "%d", fd);
+ FCHDIR_NOFAIL(lo->proc_self_fd);
+ err = linkat(AT_FDCWD, procname, parent_inode->fd, name,
+ AT_SYMLINK_FOLLOW);
+ err = err == -1 ? errno : 0;
+ FCHDIR_NOFAIL(lo->root.fd);
+
+out:
+ if (!err) {
+ *open_fd = fd;
+ } else if (fd != -1) {
+ close(fd);
+ }
+ return err;
+}
+
+static int do_create_secctx_noatomic(fuse_req_t req,
+ struct lo_inode *parent_inode,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi,
+ const char *secctx_name, int *open_fd)
+{
+ int err = 0, fd = -1;
+
+ err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
+ if (err) {
+ goto out;
+ }
+
+ /* Set security context. This is not atomic w.r.t file creation */
+ err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
+ err = err == -1 ? errno : 0;
+out:
+ if (!err) {
+ *open_fd = fd;
+ } else {
+ if (fd != -1) {
+ close(fd);
+ unlinkat(parent_inode->fd, name, 0);
+ }
+ }
+ return err;
+}
+
+static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi, int *open_fd)
+{
+ struct lo_data *lo = lo_data(req);
+ char *mapped_name = NULL;
+ int err;
+ const char *ctxname = req->secctx.name;
+ bool secctx_enabled = req->secctx.ctxlen;
+
+ if (secctx_enabled && lo->xattrmap) {
+ err = xattr_map_client(lo, req->secctx.name, &mapped_name);
+ if (err < 0) {
+ return -err;
+ }
+
+ ctxname = mapped_name;
+ }
+
+ if (secctx_enabled) {
+ /*
+ * If security.selinux has not been remapped and selinux is enabled,
+ * use fscreate to set context before file creation. If not, use
+ * tmpfile method for regular files. Otherwise fallback to
+ * non-atomic method of file creation and xattr settting.
+ */
+ if (!mapped_name && lo->use_fscreate) {
+ err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi,
+ open_fd);
+ goto out;
+ } else if (S_ISREG(mode)) {
+ err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi,
+ ctxname, open_fd);
+ /*
+ * If filesystem does not support O_TMPFILE, fallback to non-atomic
+ * method.
+ */
+ if (!err || err != EOPNOTSUPP) {
+ goto out;
+ }
+ }
+
+ err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi,
+ ctxname, open_fd);
+ } else {
+ err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd,
+ false);
+ }
+
+out:
+ g_free(mapped_name);
+ return err;
+}
+
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
mode_t mode, struct fuse_file_info *fi)
{
@@ -2010,7 +2358,6 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
struct lo_inode *inode = NULL;
struct fuse_entry_param e;
int err;
- struct lo_cred old = {};
fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)"
" kill_priv=%d\n", parent, name, fi->kill_priv);
@@ -2026,18 +2373,9 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
- err = lo_change_cred(req, &old, lo->change_umask);
- if (err) {
- goto out;
- }
-
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
- /* Try to create a new file but don't open existing files */
- fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode);
- err = fd == -1 ? errno : 0;
-
- lo_restore_cred(&old, lo->change_umask);
+ err = do_lo_create(req, parent_inode, name, mode, fi, &fd);
/* Ignore the error if file exists and O_EXCL was not given */
if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
@@ -2467,6 +2805,15 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
int res;
(void)ino;
+ if (!(op & LOCK_NB)) {
+ /*
+ * Blocking flock can deadlock as there is only one thread
+ * serving the queue.
+ */
+ fuse_reply_err(req, EOPNOTSUPP);
+ return;
+ }
+
res = flock(lo_fi_fd(req, fi), op);
fuse_reply_err(req, res == -1 ? errno : 0);
@@ -2842,11 +3189,6 @@ static int xattr_map_server(const struct lo_data *lo, const char *server_name,
return -ENODATA;
}
-#define FCHDIR_NOFAIL(fd) do { \
- int fchdir_res = fchdir(fd); \
- assert(fchdir_res == 0); \
- } while (0)
-
static bool block_xattr(struct lo_data *lo, const char *name)
{
/*
@@ -3357,6 +3699,49 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
}
}
+static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
+{
+ int fd, ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
+ inode->fuse_ino);
+
+ fd = lo_inode_open(lo, inode, O_RDONLY);
+ if (fd < 0) {
+ return -fd;
+ }
+
+ if (syncfs(fd) < 0) {
+ ret = errno;
+ }
+
+ close(fd);
+ return ret;
+}
+
+static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
+{
+ struct lo_data *lo = lo_data(req);
+ struct lo_inode *inode = lo_inode(req, ino);
+ int err;
+
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ err = lo_do_syncfs(lo, inode);
+ lo_inode_put(lo, &inode);
+
+ /*
+ * If submounts aren't announced, the client only sends a request to
+ * sync the root inode. TODO: Track submounts internally and iterate
+ * over them as well.
+ */
+
+ fuse_reply_err(req, err);
+}
+
static void lo_destroy(void *userdata)
{
struct lo_data *lo = (struct lo_data *)userdata;
@@ -3417,6 +3802,7 @@ static struct fuse_lowlevel_ops lo_oper = {
.copy_file_range = lo_copy_file_range,
#endif
.lseek = lo_lseek,
+ .syncfs = lo_syncfs,
.destroy = lo_destroy,
};
@@ -3508,6 +3894,15 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
exit(1);
}
+ /* Get the /proc/self/task descriptor */
+ lo->proc_self_task = open("/proc/self/task/", O_PATH);
+ if (lo->proc_self_task == -1) {
+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n");
+ exit(1);
+ }
+
+ lo->use_fscreate = is_fscreate_usable(lo);
+
/*
* We only need /proc/self/fd. Prevent ".." from accessing parent
* directories of /proc/self/fd by bind-mounting it over /proc. Since / was
@@ -3724,6 +4119,14 @@ static void setup_chroot(struct lo_data *lo)
exit(1);
}
+ lo->proc_self_task = open("/proc/self/task", O_PATH);
+ if (lo->proc_self_fd == -1) {
+ fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n");
+ exit(1);
+ }
+
+ lo->use_fscreate = is_fscreate_usable(lo);
+
/*
* Make the shared directory the file system root so that FUSE_OPEN
* (lo_open()) cannot escape the shared directory by opening a symlink.
@@ -3909,6 +4312,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo)
close(lo->proc_self_fd);
}
+ if (lo->proc_self_task >= 0) {
+ close(lo->proc_self_task);
+ }
+
if (lo->root.fd >= 0) {
close(lo->root.fd);
}
@@ -3936,8 +4343,10 @@ int main(int argc, char *argv[])
.posix_lock = 0,
.allow_direct_io = 0,
.proc_self_fd = -1,
+ .proc_self_task = -1,
.user_killpriv_v2 = -1,
.user_posix_acl = -1,
+ .user_security_label = -1,
};
struct lo_map_elem *root_elem;
struct lo_map_elem *reserve_elem;
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index 2bc0127..888295c 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -111,6 +111,7 @@ static const int syscall_allowlist[] = {
SCMP_SYS(set_robust_list),
SCMP_SYS(setxattr),
SCMP_SYS(symlinkat),
+ SCMP_SYS(syncfs),
SCMP_SYS(time), /* Rarely needed, except on static builds */
SCMP_SYS(tgkill),
SCMP_SYS(unlinkat),