/* * Present a block device as a raw image through FUSE * * Copyright (c) 2020 Max Reitz <mreitz@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; under version 2 or later of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define FUSE_USE_VERSION 31 #include "qemu/osdep.h" #include "qemu/memalign.h" #include "block/aio.h" #include "block/block_int-common.h" #include "block/export.h" #include "block/fuse.h" #include "block/qapi.h" #include "qapi/error.h" #include "qapi/qapi-commands-block.h" #include "qemu/main-loop.h" #include "sysemu/block-backend.h" #include <fuse.h> #include <fuse_lowlevel.h> #if defined(CONFIG_FALLOCATE_ZERO_RANGE) #include <linux/falloc.h> #endif #ifdef __linux__ #include <linux/fs.h> #endif /* Prevent overly long bounce buffer allocations */ #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024)) typedef struct FuseExport { BlockExport common; struct fuse_session *fuse_session; struct fuse_buf fuse_buf; bool mounted, fd_handler_set_up; char *mountpoint; bool writable; bool growable; /* Whether allow_other was used as a mount option or not */ bool allow_other; mode_t st_mode; uid_t st_uid; gid_t st_gid; } FuseExport; static GHashTable *exports; static const struct fuse_lowlevel_ops fuse_ops; static void fuse_export_shutdown(BlockExport *exp); static void fuse_export_delete(BlockExport *exp); static void init_exports_table(void); static int setup_fuse_export(FuseExport *exp, const char *mountpoint, bool allow_other, Error **errp); static void read_from_fuse_export(void *opaque); static bool is_regular_file(const char *path, Error **errp); static int fuse_export_create(BlockExport *blk_exp, BlockExportOptions *blk_exp_args, Error **errp) { FuseExport *exp = container_of(blk_exp, FuseExport, common); BlockExportOptionsFuse *args = &blk_exp_args->u.fuse; int ret; assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE); /* For growable and writable exports, take the RESIZE permission */ if (args->growable || blk_exp_args->writable) { uint64_t blk_perm, blk_shared_perm; blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, blk_shared_perm, errp); if (ret < 0) { return ret; } } init_exports_table(); /* * It is important to do this check before calling is_regular_file() -- * that function will do a stat(), which we would have to handle if we * already exported something on @mountpoint. But we cannot, because * we are currently caught up here. * (Note that ideally we would want to resolve relative paths here, * but bdrv_make_absolute_filename() might do the wrong thing for * paths that contain colons, and realpath() would resolve symlinks, * which we do not want: The mount point is not going to be the * symlink's destination, but the link itself.) * So this will not catch all potential clashes, but hopefully at * least the most common one of specifying exactly the same path * string twice. */ if (g_hash_table_contains(exports, args->mountpoint)) { error_setg(errp, "There already is a FUSE export on '%s'", args->mountpoint); ret = -EEXIST; goto fail; } if (!is_regular_file(args->mountpoint, errp)) { ret = -EINVAL; goto fail; } exp->mountpoint = g_strdup(args->mountpoint); exp->writable = blk_exp_args->writable; exp->growable = args->growable; /* set default */ if (!args->has_allow_other) { args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO; } exp->st_mode = S_IFREG | S_IRUSR; if (exp->writable) { exp->st_mode |= S_IWUSR; } exp->st_uid = getuid(); exp->st_gid = getgid(); if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) { /* Ignore errors on our first attempt */ ret = setup_fuse_export(exp, args->mountpoint, true, NULL); exp->allow_other = ret == 0; if (ret < 0) { ret = setup_fuse_export(exp, args->mountpoint, false, errp); } } else { exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON; ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp); } if (ret < 0) { goto fail; } return 0; fail: fuse_export_delete(blk_exp); return ret; } /** * Allocates the global @exports hash table. */ static void init_exports_table(void) { if (exports) { return; } exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); } /** * Create exp->fuse_session and mount it. */ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, bool allow_other, Error **errp) { const char *fuse_argv[4]; char *mount_opts; struct fuse_args fuse_args; int ret; /* * max_read needs to match what fuse_init() sets. * max_write need not be supplied. */ mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s", FUSE_MAX_BOUNCE_BYTES, allow_other ? ",allow_other" : ""); fuse_argv[0] = ""; /* Dummy program name */ fuse_argv[1] = "-o"; fuse_argv[2] = mount_opts; fuse_argv[3] = NULL; fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv); exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops, sizeof(fuse_ops), exp); g_free(mount_opts); if (!exp->fuse_session) { error_setg(errp, "Failed to set up FUSE session"); ret = -EIO; goto fail; } ret = fuse_session_mount(exp->fuse_session, mountpoint); if (ret < 0) { error_setg(errp, "Failed to mount FUSE session to export"); ret = -EIO; goto fail; } exp->mounted = true; g_hash_table_insert(exports, g_strdup(mountpoint), NULL); aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, read_from_fuse_export, NULL, NULL, NULL, exp); exp->fd_handler_set_up = true; return 0; fail: fuse_export_shutdown(&exp->common); return ret; } /** * Callback to be invoked when the FUSE session FD can be read from. * (This is basically the FUSE event loop.) */ static void read_from_fuse_export(void *opaque) { FuseExport *exp = opaque; int ret; blk_exp_ref(&exp->common); do { ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf); } while (ret == -EINTR); if (ret < 0) { goto out; } fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf); out: blk_exp_unref(&exp->common); } static void fuse_export_shutdown(BlockExport *blk_exp) { FuseExport *exp = container_of(blk_exp, FuseExport, common); if (exp->fuse_session) { fuse_session_exit(exp->fuse_session); if (exp->fd_handler_set_up) { aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, NULL, NULL, NULL, NULL, NULL); exp->fd_handler_set_up = false; } } if (exp->mountpoint) { /* * Safe to drop now, because we will not handle any requests * for this export anymore anyway. */ g_hash_table_remove(exports, exp->mountpoint); } } static void fuse_export_delete(BlockExport *blk_exp) { FuseExport *exp = container_of(blk_exp, FuseExport, common); if (exp->fuse_session) { if (exp->mounted) { fuse_session_unmount(exp->fuse_session); } fuse_session_destroy(exp->fuse_session); } free(exp->fuse_buf.mem); g_free(exp->mountpoint); } /** * Check whether @path points to a regular file. If not, put an * appropriate message into *errp. */ static bool is_regular_file(const char *path, Error **errp) { struct stat statbuf; int ret; ret = stat(path, &statbuf); if (ret < 0) { error_setg_errno(errp, errno, "Failed to stat '%s'", path); return false; } if (!S_ISREG(statbuf.st_mode)) { error_setg(errp, "'%s' is not a regular file", path); return false; } return true; } /** * A chance to set change some parameters supplied to FUSE_INIT. */ static void fuse_init(void *userdata, struct fuse_conn_info *conn) { /* * MIN_NON_ZERO() would not be wrong here, but what we set here * must equal what has been passed to fuse_session_new(). * Therefore, as long as max_read must be passed as a mount option * (which libfuse claims will be changed at some point), we have * to set max_read to a fixed value here. */ conn->max_read = FUSE_MAX_BOUNCE_BYTES; conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write); } /** * Let clients look up files. Always return ENOENT because we only * care about the mountpoint itself. */ static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) { fuse_reply_err(req, ENOENT); } /** * Let clients get file attributes (i.e., stat() the file). */ static void fuse_getattr(fuse_req_t req, fuse_ino_t inode, struct fuse_file_info *fi) { struct stat statbuf; int64_t length, allocated_blocks; time_t now = time(NULL); FuseExport *exp = fuse_req_userdata(req); length = blk_getlength(exp->common.blk); if (length < 0) { fuse_reply_err(req, -length); return; } allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk)); if (allocated_blocks <= 0) { allocated_blocks = DIV_ROUND_UP(length, 512); } else { allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512); } statbuf = (struct stat) { .st_ino = inode, .st_mode = exp->st_mode, .st_nlink = 1, .st_uid = exp->st_uid, .st_gid = exp->st_gid, .st_size = length, .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment, .st_blocks = allocated_blocks, .st_atime = now, .st_mtime = now, .st_ctime = now, }; fuse_reply_attr(req, &statbuf, 1.); } static int fuse_do_truncate(const FuseExport *exp, int64_t size, bool req_zero_write, PreallocMode prealloc) { uint64_t blk_perm, blk_shared_perm; BdrvRequestFlags truncate_flags = 0; bool add_resize_perm; int ret, ret_check; /* Growable and writable exports have a permanent RESIZE permission */ add_resize_perm = !exp->growable && !exp->writable; if (req_zero_write) { truncate_flags |= BDRV_REQ_ZERO_WRITE; } if (add_resize_perm) { if (!qemu_in_main_thread()) { /* Changing permissions like below only works in the main thread */ return -EPERM; } blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, blk_shared_perm, NULL); if (ret < 0) { return ret; } } ret = blk_truncate(exp->common.blk, size, true, prealloc, truncate_flags, NULL); if (add_resize_perm) { /* Must succeed, because we are only giving up the RESIZE permission */ ret_check = blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort); assert(ret_check == 0); } return ret; } /** * Let clients set file attributes. Only resizing and changing * permissions (st_mode, st_uid, st_gid) is allowed. * Changing permissions is only allowed as far as it will actually * permit access: Read-only exports cannot be given +w, and exports * without allow_other cannot be given a different UID or GID, and * they cannot be given non-owner access. */ static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf, int to_set, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); int supported_attrs; int ret; supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE; if (exp->allow_other) { supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID; } if (to_set & ~supported_attrs) { fuse_reply_err(req, ENOTSUP); return; } /* Do some argument checks first before committing to anything */ if (to_set & FUSE_SET_ATTR_MODE) { /* * Without allow_other, non-owners can never access the export, so do * not allow setting permissions for them */ if (!exp->allow_other && (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0) { fuse_reply_err(req, EPERM); return; } /* +w for read-only exports makes no sense, disallow it */ if (!exp->writable && (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) { fuse_reply_err(req, EROFS); return; } } if (to_set & FUSE_SET_ATTR_SIZE) { if (!exp->writable) { fuse_reply_err(req, EACCES); return; } ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF); if (ret < 0) { fuse_reply_err(req, -ret); return; } } if (to_set & FUSE_SET_ATTR_MODE) { /* Ignore FUSE-supplied file type, only change the mode */ exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG; } if (to_set & FUSE_SET_ATTR_UID) { exp->st_uid = statbuf->st_uid; } if (to_set & FUSE_SET_ATTR_GID) { exp->st_gid = statbuf->st_gid; } fuse_getattr(req, inode, fi); } /** * Let clients open a file (i.e., the exported image). */ static void fuse_open(fuse_req_t req, fuse_ino_t inode, struct fuse_file_info *fi) { fuse_reply_open(req, fi); } /** * Handle client reads from the exported image. */ static void fuse_read(fuse_req_t req, fuse_ino_t inode, size_t size, off_t offset, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); int64_t length; void *buf; int ret; /* Limited by max_read, should not happen */ if (size > FUSE_MAX_BOUNCE_BYTES) { fuse_reply_err(req, EINVAL); return; } /** * Clients will expect short reads at EOF, so we have to limit * offset+size to the image length. */ length = blk_getlength(exp->common.blk); if (length < 0) { fuse_reply_err(req, -length); return; } if (offset + size > length) { size = length - offset; } buf = qemu_try_blockalign(blk_bs(exp->common.blk), size); if (!buf) { fuse_reply_err(req, ENOMEM); return; } ret = blk_pread(exp->common.blk, offset, size, buf, 0); if (ret >= 0) { fuse_reply_buf(req, buf, size); } else { fuse_reply_err(req, -ret); } qemu_vfree(buf); } /** * Handle client writes to the exported image. */ static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); int64_t length; int ret; /* Limited by max_write, should not happen */ if (size > BDRV_REQUEST_MAX_BYTES) { fuse_reply_err(req, EINVAL); return; } if (!exp->writable) { fuse_reply_err(req, EACCES); return; } /** * Clients will expect short writes at EOF, so we have to limit * offset+size to the image length. */ length = blk_getlength(exp->common.blk); if (length < 0) { fuse_reply_err(req, -length); return; } if (offset + size > length) { if (exp->growable) { ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF); if (ret < 0) { fuse_reply_err(req, -ret); return; } } else { size = length - offset; } } ret = blk_pwrite(exp->common.blk, offset, size, buf, 0); if (ret >= 0) { fuse_reply_write(req, size); } else { fuse_reply_err(req, -ret); } } /** * Let clients perform various fallocate() operations. */ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, off_t offset, off_t length, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); int64_t blk_len; int ret; if (!exp->writable) { fuse_reply_err(req, EACCES); return; } blk_len = blk_getlength(exp->common.blk); if (blk_len < 0) { fuse_reply_err(req, -blk_len); return; } #ifdef CONFIG_FALLOCATE_PUNCH_HOLE if (mode & FALLOC_FL_KEEP_SIZE) { length = MIN(length, blk_len - offset); } #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ if (!mode) { /* We can only fallocate at the EOF with a truncate */ if (offset < blk_len) { fuse_reply_err(req, EOPNOTSUPP); return; } if (offset > blk_len) { /* No preallocation needed here */ ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); if (ret < 0) { fuse_reply_err(req, -ret); return; } } ret = fuse_do_truncate(exp, offset + length, true, PREALLOC_MODE_FALLOC); } #ifdef CONFIG_FALLOCATE_PUNCH_HOLE else if (mode & FALLOC_FL_PUNCH_HOLE) { if (!(mode & FALLOC_FL_KEEP_SIZE)) { fuse_reply_err(req, EINVAL); return; } do { int size = MIN(length, BDRV_REQUEST_MAX_BYTES); ret = blk_pwrite_zeroes(exp->common.blk, offset, size, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); if (ret == -ENOTSUP) { /* * fallocate() specifies to return EOPNOTSUPP for unsupported * operations */ ret = -EOPNOTSUPP; } offset += size; length -= size; } while (ret == 0 && length > 0); } #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ #ifdef CONFIG_FALLOCATE_ZERO_RANGE else if (mode & FALLOC_FL_ZERO_RANGE) { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { /* No need for zeroes, we are going to write them ourselves */ ret = fuse_do_truncate(exp, offset + length, false, PREALLOC_MODE_OFF); if (ret < 0) { fuse_reply_err(req, -ret); return; } } do { int size = MIN(length, BDRV_REQUEST_MAX_BYTES); ret = blk_pwrite_zeroes(exp->common.blk, offset, size, 0); offset += size; length -= size; } while (ret == 0 && length > 0); } #endif /* CONFIG_FALLOCATE_ZERO_RANGE */ else { ret = -EOPNOTSUPP; } fuse_reply_err(req, ret < 0 ? -ret : 0); } /** * Let clients fsync the exported image. */ static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); int ret; ret = blk_flush(exp->common.blk); fuse_reply_err(req, ret < 0 ? -ret : 0); } /** * Called before an FD to the exported image is closed. (libfuse * notes this to be a way to return last-minute errors.) */ static void fuse_flush(fuse_req_t req, fuse_ino_t inode, struct fuse_file_info *fi) { fuse_fsync(req, inode, 1, fi); } #ifdef CONFIG_FUSE_LSEEK /** * Let clients inquire allocation status. */ static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset, int whence, struct fuse_file_info *fi) { FuseExport *exp = fuse_req_userdata(req); if (whence != SEEK_HOLE && whence != SEEK_DATA) { fuse_reply_err(req, EINVAL); return; } while (true) { int64_t pnum; int ret; ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL, offset, INT64_MAX, &pnum, NULL, NULL); if (ret < 0) { fuse_reply_err(req, -ret); return; } if (!pnum && (ret & BDRV_BLOCK_EOF)) { int64_t blk_len; /* * If blk_getlength() rounds (e.g. by sectors), then the * export length will be rounded, too. However, * bdrv_block_status_above() may return EOF at unaligned * offsets. We must not let this become visible and thus * always simulate a hole between @offset (the real EOF) * and @blk_len (the client-visible EOF). */ blk_len = blk_getlength(exp->common.blk); if (blk_len < 0) { fuse_reply_err(req, -blk_len); return; } if (offset > blk_len || whence == SEEK_DATA) { fuse_reply_err(req, ENXIO); } else { fuse_reply_lseek(req, offset); } return; } if (ret & BDRV_BLOCK_DATA) { if (whence == SEEK_DATA) { fuse_reply_lseek(req, offset); return; } } else { if (whence == SEEK_HOLE) { fuse_reply_lseek(req, offset); return; } } /* Safety check against infinite loops */ if (!pnum) { fuse_reply_err(req, ENXIO); return; } offset += pnum; } } #endif static const struct fuse_lowlevel_ops fuse_ops = { .init = fuse_init, .lookup = fuse_lookup, .getattr = fuse_getattr, .setattr = fuse_setattr, .open = fuse_open, .read = fuse_read, .write = fuse_write, .fallocate = fuse_fallocate, .flush = fuse_flush, .fsync = fuse_fsync, #ifdef CONFIG_FUSE_LSEEK .lseek = fuse_lseek, #endif }; const BlockExportDriver blk_exp_fuse = { .type = BLOCK_EXPORT_TYPE_FUSE, .instance_size = sizeof(FuseExport), .create = fuse_export_create, .delete = fuse_export_delete, .request_shutdown = fuse_export_shutdown, };