diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2020-10-08 09:55:34 +0100 |
---|---|---|
committer | Dr. David Alan Gilbert <dgilbert@redhat.com> | 2020-10-26 18:35:32 +0000 |
commit | 06844584b62a43384642f7243b0fc01c9fff0fc7 (patch) | |
tree | 969594181ec3954c7ea04dad346117c077ada794 /tools | |
parent | 800ad114f10d0bf94e49b1441e1a13064a45a967 (diff) | |
download | qemu-06844584b62a43384642f7243b0fc01c9fff0fc7.zip qemu-06844584b62a43384642f7243b0fc01c9fff0fc7.tar.gz qemu-06844584b62a43384642f7243b0fc01c9fff0fc7.tar.bz2 |
virtiofsd: add container-friendly -o sandbox=chroot option
virtiofsd cannot run in a container because CAP_SYS_ADMIN is required to
create namespaces.
Introduce a weaker sandbox mode that is sufficient in container
environments because the container runtime already sets up namespaces.
Use chroot to restrict path traversal to the shared directory.
virtiofsd loses the following:
1. Mount namespace. The process chroots to the shared directory but
leaves the mounts in place. Seccomp rejects mount(2)/umount(2)
syscalls.
2. Pid namespace. This should be fine because virtiofsd is the only
process running in the container.
3. Network namespace. This should be fine because seccomp already
rejects the connect(2) syscall, but an additional layer of security
is lost. Container runtime-specific network security policies can be
used drop network traffic (except for the vhost-user UNIX domain
socket).
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20201008085534.16070-1-stefanha@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/virtiofsd/helper.c | 8 | ||||
-rw-r--r-- | tools/virtiofsd/passthrough_ll.c | 57 |
2 files changed, 63 insertions, 2 deletions
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index 85770d6..2e181a4 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -166,6 +166,14 @@ void fuse_cmdline_help(void) " enable/disable readirplus\n" " default: readdirplus except with " "cache=none\n" + " -o sandbox=namespace|chroot\n" + " sandboxing mode:\n" + " - namespace: mount, pid, and net\n" + " namespaces with pivot_root(2)\n" + " into shared directory\n" + " - chroot: chroot(2) into shared\n" + " directory (use in containers)\n" + " default: namespace\n" " -o timeout=<number> I/O timeout (seconds)\n" " default: depends on cache= option.\n" " -o writeback|no_writeback enable/disable writeback cache\n" diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 13fdb12..f03b1f9 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -137,8 +137,14 @@ enum { CACHE_ALWAYS, }; +enum { + SANDBOX_NAMESPACE, + SANDBOX_CHROOT, +}; + struct lo_data { pthread_mutex_t mutex; + int sandbox; int debug; int writeback; int flock; @@ -163,6 +169,12 @@ struct lo_data { }; static const struct fuse_opt lo_opts[] = { + { "sandbox=namespace", + offsetof(struct lo_data, sandbox), + SANDBOX_NAMESPACE }, + { "sandbox=chroot", + offsetof(struct lo_data, sandbox), + SANDBOX_CHROOT }, { "writeback", offsetof(struct lo_data, writeback), 1 }, { "no_writeback", offsetof(struct lo_data, writeback), 0 }, { "source=%s", offsetof(struct lo_data, source), 0 }, @@ -2661,14 +2673,54 @@ static void setup_capabilities(char *modcaps_in) } /* + * Use chroot as a weaker sandbox for environments where the process is + * launched without CAP_SYS_ADMIN. + */ +static void setup_chroot(struct lo_data *lo) +{ + lo->proc_self_fd = open("/proc/self/fd", O_PATH); + if (lo->proc_self_fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n"); + exit(1); + } + + /* + * Make the shared directory the file system root so that FUSE_OPEN + * (lo_open()) cannot escape the shared directory by opening a symlink. + * + * The chroot(2) syscall is later disabled by seccomp and the + * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot + * is not possible. + * + * However, it's still possible to escape the chroot via lo->proc_self_fd + * but that requires first gaining control of the process. + */ + if (chroot(lo->source) != 0) { + fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source); + exit(1); + } + + /* Move into the chroot */ + if (chdir("/") != 0) { + fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n"); + exit(1); + } +} + +/* * Lock down this process to prevent access to other processes or files outside * source directory. This reduces the impact of arbitrary code execution bugs. */ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, bool enable_syslog) { - setup_namespaces(lo, se); - setup_mounts(lo->source); + if (lo->sandbox == SANDBOX_NAMESPACE) { + setup_namespaces(lo, se); + setup_mounts(lo->source); + } else { + setup_chroot(lo); + } + setup_seccomp(enable_syslog); setup_capabilities(g_strdup(lo->modcaps)); } @@ -2815,6 +2867,7 @@ int main(int argc, char *argv[]) struct fuse_session *se; struct fuse_cmdline_opts opts; struct lo_data lo = { + .sandbox = SANDBOX_NAMESPACE, .debug = 0, .writeback = 0, .posix_lock = 0, |