From a08b4a9fe6cb3c23755db764c9a40510a40a8731 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 Feb 2020 10:45:14 +0000 Subject: docs: Move tools documentation to tools manual Move the following tools documentation files to the new tools manual: docs/interop/qemu-img.rst docs/interop/qemu-nbd.rst docs/interop/virtfs-proxy-helper.rst docs/interop/qemu-trace-stap.rst docs/interop/virtiofsd.rst Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Acked-by: Paolo Bonzini Message-id: 20200217155415.30949-4-peter.maydell@linaro.org --- Makefile | 34 +- docs/interop/conf.py | 12 - docs/interop/index.rst | 5 - docs/interop/qemu-img.rst | 832 ----------------------------------- docs/interop/qemu-nbd.rst | 256 ----------- docs/interop/qemu-trace-stap.rst | 124 ------ docs/interop/virtfs-proxy-helper.rst | 72 --- docs/interop/virtiofsd.rst | 120 ----- docs/tools/conf.py | 17 + docs/tools/index.rst | 6 + docs/tools/qemu-img.rst | 832 +++++++++++++++++++++++++++++++++++ docs/tools/qemu-nbd.rst | 256 +++++++++++ docs/tools/qemu-trace-stap.rst | 124 ++++++ docs/tools/virtfs-proxy-helper.rst | 72 +++ docs/tools/virtiofsd.rst | 120 +++++ 15 files changed, 1445 insertions(+), 1437 deletions(-) delete mode 100644 docs/interop/qemu-img.rst delete mode 100644 docs/interop/qemu-nbd.rst delete mode 100644 docs/interop/qemu-trace-stap.rst delete mode 100644 docs/interop/virtfs-proxy-helper.rst delete mode 100644 docs/interop/virtiofsd.rst create mode 100644 docs/tools/qemu-img.rst create mode 100644 docs/tools/qemu-nbd.rst create mode 100644 docs/tools/qemu-trace-stap.rst create mode 100644 docs/tools/virtfs-proxy-helper.rst create mode 100644 docs/tools/virtiofsd.rst diff --git a/Makefile b/Makefile index 4a27be1..aa9cc0b 100644 --- a/Makefile +++ b/Makefile @@ -345,11 +345,11 @@ endif ifdef BUILD_DOCS DOCS=qemu-doc.html qemu-doc.txt qemu.1 -DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-img.1 -DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-nbd.8 +DOCS+=$(MANUAL_BUILDDIR)/tools/qemu-img.1 +DOCS+=$(MANUAL_BUILDDIR)/tools/qemu-nbd.8 DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-ga.8 ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) -DOCS+=$(MANUAL_BUILDDIR)/interop/virtiofsd.1 +DOCS+=$(MANUAL_BUILDDIR)/tools/virtiofsd.1 endif DOCS+=$(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 @@ -357,10 +357,10 @@ DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qe DOCS+=docs/qemu-cpu-models.7 DOCS+=$(MANUAL_BUILDDIR)/index.html ifdef CONFIG_VIRTFS -DOCS+=$(MANUAL_BUILDDIR)/interop/virtfs-proxy-helper.1 +DOCS+=$(MANUAL_BUILDDIR)/tools/virtfs-proxy-helper.1 endif ifdef CONFIG_TRACE_SYSTEMTAP -DOCS+=$(MANUAL_BUILDDIR)/interop/qemu-trace-stap.1 +DOCS+=$(MANUAL_BUILDDIR)/tools/qemu-trace-stap.1 endif else DOCS= @@ -861,12 +861,12 @@ ifdef CONFIG_POSIX $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-cpu-models.7 "$(DESTDIR)$(mandir)/man7" ifeq ($(CONFIG_TOOLS),y) - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-img.1 "$(DESTDIR)$(mandir)/man1" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/tools/qemu-img.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-nbd.8 "$(DESTDIR)$(mandir)/man8" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/tools/qemu-nbd.8 "$(DESTDIR)$(mandir)/man8" endif ifdef CONFIG_TRACE_SYSTEMTAP - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-trace-stap.1 "$(DESTDIR)$(mandir)/man1" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/tools/qemu-trace-stap.1 "$(DESTDIR)$(mandir)/man1" endif ifneq (,$(findstring qemu-ga,$(TOOLS))) $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-ga.8 "$(DESTDIR)$(mandir)/man8" @@ -877,10 +877,10 @@ endif endif ifdef CONFIG_VIRTFS $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/virtfs-proxy-helper.1 "$(DESTDIR)$(mandir)/man1" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/tools/virtfs-proxy-helper.1 "$(DESTDIR)$(mandir)/man1" endif ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/virtiofsd.1 "$(DESTDIR)$(mandir)/man1" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/tools/virtiofsd.1 "$(DESTDIR)$(mandir)/man1" endif install-datadir: @@ -1062,7 +1062,7 @@ endef $(MANUAL_BUILDDIR)/devel/index.html: $(call manual-deps,devel) $(call build-manual,devel,html) -$(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop) $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/docs/qemu-option-trace.rst.inc +$(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop) $(call build-manual,interop,html) $(MANUAL_BUILDDIR)/specs/index.html: $(call manual-deps,specs) @@ -1071,16 +1071,18 @@ $(MANUAL_BUILDDIR)/specs/index.html: $(call manual-deps,specs) $(MANUAL_BUILDDIR)/system/index.html: $(call manual-deps,system) $(call build-manual,system,html) -$(MANUAL_BUILDDIR)/tools/index.html: $(call manual-deps,tools) +$(MANUAL_BUILDDIR)/tools/index.html: $(call manual-deps,tools) $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/docs/qemu-option-trace.rst.inc $(call build-manual,tools,html) -$(call define-manpage-rule,interop,\ - qemu-ga.8 qemu-img.1 qemu-nbd.8 qemu-trace-stap.1\ - virtiofsd.1 virtfs-proxy-helper.1,\ - $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/docs/qemu-option-trace.rst.inc) +$(call define-manpage-rule,interop,qemu-ga.8) $(call define-manpage-rule,system,qemu-block-drivers.7) +$(call define-manpage-rule,tools,\ + qemu-img.1 qemu-nbd.8 qemu-trace-stap.1\ + virtiofsd.1 virtfs-proxy-helper.1,\ + $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/docs/qemu-option-trace.rst.inc) + $(MANUAL_BUILDDIR)/index.html: $(SRC_PATH)/docs/index.html.in qemu-version.h @mkdir -p "$(MANUAL_BUILDDIR)" $(call quiet-command, sed "s|@@VERSION@@|${VERSION}|g" $< >$@, \ diff --git a/docs/interop/conf.py b/docs/interop/conf.py index b3cda17..42ce7e3 100644 --- a/docs/interop/conf.py +++ b/docs/interop/conf.py @@ -19,16 +19,4 @@ html_theme_options['description'] = u'System Emulation Management and Interopera man_pages = [ ('qemu-ga', 'qemu-ga', u'QEMU Guest Agent', ['Michael Roth '], 8), - ('qemu-img', 'qemu-img', u'QEMU disk image utility', - ['Fabrice Bellard'], 1), - ('qemu-nbd', 'qemu-nbd', u'QEMU Disk Network Block Device Server', - ['Anthony Liguori '], 8), - ('qemu-trace-stap', 'qemu-trace-stap', u'QEMU SystemTap trace tool', - [], 1), - ('virtfs-proxy-helper', 'virtfs-proxy-helper', - u'QEMU 9p virtfs proxy filesystem helper', - ['M. Mohan Kumar'], 1), - ('virtiofsd', 'virtiofsd', u'QEMU virtio-fs shared file system daemon', - ['Stefan Hajnoczi ', - 'Masayoshi Mizuma '], 1), ] diff --git a/docs/interop/index.rst b/docs/interop/index.rst index e8455b4..049387a 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -18,10 +18,5 @@ Contents: live-block-operations pr-helper qemu-ga - qemu-img - qemu-nbd - qemu-trace-stap vhost-user vhost-user-gpu - virtfs-proxy-helper - virtiofsd diff --git a/docs/interop/qemu-img.rst b/docs/interop/qemu-img.rst deleted file mode 100644 index 0080f83..0000000 --- a/docs/interop/qemu-img.rst +++ /dev/null @@ -1,832 +0,0 @@ -QEMU disk image utility -======================= - -Synopsis --------- - -**qemu-img** [*standard options*] *command* [*command options*] - -Description ------------ - -qemu-img allows you to create, convert and modify images offline. It can handle -all image formats supported by QEMU. - -**Warning:** Never use qemu-img to modify images in use by a running virtual -machine or any other process; this may destroy the image. Also, be aware that -querying an image that is being modified by another process may encounter -inconsistent state. - -Options -------- - -.. program:: qemu-img - -Standard options: - -.. option:: -h, --help - - Display this help and exit - -.. option:: -V, --version - - Display version information and exit - -.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] - - .. include:: ../qemu-option-trace.rst.inc - -The following commands are supported: - -.. hxtool-doc:: qemu-img-cmds.hx - -Command parameters: - -*FILENAME* is a disk image filename. - -*FMT* is the disk image format. It is guessed automatically in most -cases. See below for a description of the supported disk formats. - -*SIZE* is the disk image size in bytes. Optional suffixes ``k`` or -``K`` (kilobyte, 1024) ``M`` (megabyte, 1024k) and ``G`` (gigabyte, -1024M) and T (terabyte, 1024G) are supported. ``b`` is ignored. - -*OUTPUT_FILENAME* is the destination disk image filename. - -*OUTPUT_FMT* is the destination format. - -*OPTIONS* is a comma separated list of format specific options in a -name=value format. Use ``-o ?`` for an overview of the options supported -by the used format or see the format descriptions below for details. - -*SNAPSHOT_PARAM* is param used for internal snapshot, format is -'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]'. - -.. - Note the use of a new 'program'; otherwise Sphinx complains about - the -h option appearing both in the above option list and this one. - -.. program:: qemu-img-common-opts - -.. option:: --object OBJECTDEF - - is a QEMU user creatable object definition. See the :manpage:`qemu(1)` - manual page for a description of the object properties. The most common - object type is a ``secret``, which is used to supply passwords and/or - encryption keys. - -.. option:: --image-opts - - Indicates that the source *FILENAME* parameter is to be interpreted as a - full option string, not a plain filename. This parameter is mutually - exclusive with the *-f* parameter. - -.. option:: --target-image-opts - - Indicates that the OUTPUT_FILENAME parameter(s) are to be interpreted as - a full option string, not a plain filename. This parameter is mutually - exclusive with the *-O* parameters. It is currently required to also use - the *-n* parameter to skip image creation. This restriction may be relaxed - in a future release. - -.. option:: --force-share (-U) - - If specified, ``qemu-img`` will open the image in shared mode, allowing - other QEMU processes to open it in write mode. For example, this can be used to - get the image information (with 'info' subcommand) when the image is used by a - running guest. Note that this could produce inconsistent results because of - concurrent metadata changes, etc. This option is only allowed when opening - images in read-only mode. - -.. option:: --backing-chain - - Will enumerate information about backing files in a disk image chain. Refer - below for further description. - -.. option:: -c - - Indicates that target image must be compressed (qcow format only). - -.. option:: -h - - With or without a command, shows help and lists the supported formats. - -.. option:: -p - - Display progress bar (compare, convert and rebase commands only). - If the *-p* option is not used for a command that supports it, the - progress is reported when the process receives a ``SIGUSR1`` or - ``SIGINFO`` signal. - -.. option:: -q - - Quiet mode - do not print any output (except errors). There's no progress bar - in case both *-q* and *-p* options are used. - -.. option:: -S SIZE - - Indicates the consecutive number of bytes that must contain only zeros - for qemu-img to create a sparse image during conversion. This value is rounded - down to the nearest 512 bytes. You may use the common size suffixes like - ``k`` for kilobytes. - -.. option:: -t CACHE - - Specifies the cache mode that should be used with the (destination) file. See - the documentation of the emulator's ``-drive cache=...`` option for allowed - values. - -.. option:: -T SRC_CACHE - - Specifies the cache mode that should be used with the source file(s). See - the documentation of the emulator's ``-drive cache=...`` option for allowed - values. - -Parameters to snapshot subcommand: - -.. program:: qemu-img-snapshot - -.. option:: snapshot - - Is the name of the snapshot to create, apply or delete - -.. option:: -a - - Applies a snapshot (revert disk to saved state) - -.. option:: -c - - Creates a snapshot - -.. option:: -d - - Deletes a snapshot - -.. option:: -l - - Lists all snapshots in the given image - -Parameters to compare subcommand: - -.. program:: qemu-img-compare - -.. option:: -f - - First image format - -.. option:: -F - - Second image format - -.. option:: -s - - Strict mode - fail on different image size or sector allocation - -Parameters to convert subcommand: - -.. program:: qemu-img-convert - -.. option:: -n - - Skip the creation of the target volume - -.. option:: -m - - Number of parallel coroutines for the convert process - -.. option:: -W - - Allow out-of-order writes to the destination. This option improves performance, - but is only recommended for preallocated devices like host devices or other - raw block devices. - -.. option:: -C - - Try to use copy offloading to move data from source image to target. This may - improve performance if the data is remote, such as with NFS or iSCSI backends, - but will not automatically sparsify zero sectors, and may result in a fully - allocated target image depending on the host support for getting allocation - information. - -.. option:: --salvage - - Try to ignore I/O errors when reading. Unless in quiet mode (``-q``), errors - will still be printed. Areas that cannot be read from the source will be - treated as containing only zeroes. - -.. option:: --target-is-zero - - Assume that reading the destination image will always return - zeros. This parameter is mutually exclusive with a destination image - that has a backing file. It is required to also use the ``-n`` - parameter to skip image creation. - -Parameters to dd subcommand: - -.. program:: qemu-img-dd - -.. option:: bs=BLOCK_SIZE - - Defines the block size - -.. option:: count=BLOCKS - - Sets the number of input blocks to copy - -.. option:: if=INPUT - - Sets the input file - -.. option:: of=OUTPUT - - Sets the output file - -.. option:: skip=BLOCKS - - Sets the number of input blocks to skip - -Command description: - -.. program:: qemu-img-commands - -.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t CACHE] -o OPTIONS FILENAME - - Amends the image format specific *OPTIONS* for the image file - *FILENAME*. Not all file formats support this operation. - -.. option:: bench [-c COUNT] [-d DEPTH] [-f FMT] [--flush-interval=FLUSH_INTERVAL] [-i AIO] [-n] [--no-drain] [-o OFFSET] [--pattern=PATTERN] [-q] [-s BUFFER_SIZE] [-S STEP_SIZE] [-t CACHE] [-w] [-U] FILENAME - - Run a simple sequential I/O benchmark on the specified image. If ``-w`` is - specified, a write test is performed, otherwise a read test is performed. - - A total number of *COUNT* I/O requests is performed, each *BUFFER_SIZE* - bytes in size, and with *DEPTH* requests in parallel. The first request - starts at the position given by *OFFSET*, each following request increases - the current position by *STEP_SIZE*. If *STEP_SIZE* is not given, - *BUFFER_SIZE* is used for its value. - - If *FLUSH_INTERVAL* is specified for a write test, the request queue is - drained and a flush is issued before new writes are made whenever the number of - remaining requests is a multiple of *FLUSH_INTERVAL*. If additionally - ``--no-drain`` is specified, a flush is issued without draining the request - queue first. - - if ``-i`` is specified, *AIO* option can be used to specify different - AIO backends: ``threads``, ``native`` or ``io_uring``. - - If ``-n`` is specified, the native AIO backend is used if possible. On - Linux, this option only works if ``-t none`` or ``-t directsync`` is - specified as well. - - For write tests, by default a buffer filled with zeros is written. This can be - overridden with a pattern byte specified by *PATTERN*. - -.. option:: check [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [--output=OFMT] [-r [leaks | all]] [-T SRC_CACHE] [-U] FILENAME - - Perform a consistency check on the disk image *FILENAME*. The command can - output in the format *OFMT* which is either ``human`` or ``json``. - The JSON output is an object of QAPI type ``ImageCheck``. - - If ``-r`` is specified, qemu-img tries to repair any inconsistencies found - during the check. ``-r leaks`` repairs only cluster leaks, whereas - ``-r all`` fixes all kinds of errors, with a higher risk of choosing the - wrong fix or hiding corruption that has already occurred. - - Only the formats ``qcow2``, ``qed`` and ``vdi`` support - consistency checks. - - In case the image does not have any inconsistencies, check exits with ``0``. - Other exit codes indicate the kind of inconsistency found or if another error - occurred. The following table summarizes all exit codes of the check subcommand: - - 0 - Check completed, the image is (now) consistent - 1 - Check not completed because of internal errors - 2 - Check completed, image is corrupted - 3 - Check completed, image has leaked clusters, but is not corrupted - 63 - Checks are not supported by the image format - - If ``-r`` is specified, exit codes representing the image state refer to the - state after (the attempt at) repairing it. That is, a successful ``-r all`` - will yield the exit code 0, independently of the image state before. - -.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-d] [-p] FILENAME - - Commit the changes recorded in *FILENAME* in its base image or backing file. - If the backing file is smaller than the snapshot, then the backing file will be - resized to be the same size as the snapshot. If the snapshot is smaller than - the backing file, the backing file will not be truncated. If you want the - backing file to match the size of the smaller snapshot, you can safely truncate - it yourself once the commit operation successfully completes. - - The image *FILENAME* is emptied after the operation has succeeded. If you do - not need *FILENAME* afterwards and intend to drop it, you may skip emptying - *FILENAME* by specifying the ``-d`` flag. - - If the backing chain of the given image file *FILENAME* has more than one - layer, the backing file into which the changes will be committed may be - specified as *BASE* (which has to be part of *FILENAME*'s backing - chain). If *BASE* is not specified, the immediate backing file of the top - image (which is *FILENAME*) will be used. Note that after a commit operation - all images between *BASE* and the top image will be invalid and may return - garbage data when read. For this reason, ``-b`` implies ``-d`` (so that - the top image stays valid). - -.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2 - - Check if two images have the same content. You can compare images with - different format or settings. - - The format is probed unless you specify it by ``-f`` (used for - *FILENAME1*) and/or ``-F`` (used for *FILENAME2*) option. - - By default, images with different size are considered identical if the larger - image contains only unallocated and/or zeroed sectors in the area after the end - of the other image. In addition, if any sector is not allocated in one image - and contains only zero bytes in the second one, it is evaluated as equal. You - can use Strict mode by specifying the ``-s`` option. When compare runs in - Strict mode, it fails in case image size differs or a sector is allocated in - one image and is not allocated in the second one. - - By default, compare prints out a result message. This message displays - information that both images are same or the position of the first different - byte. In addition, result message can report different image size in case - Strict mode is used. - - Compare exits with ``0`` in case the images are equal and with ``1`` - in case the images differ. Other exit codes mean an error occurred during - execution and standard error output should contain an error message. - The following table sumarizes all exit codes of the compare subcommand: - - 0 - Images are identical - 1 - Images differ - 2 - Error on opening an image - 3 - Error on checking a sector allocation - 4 - Error on reading data - -.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME - - Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* - to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can - be optionally compressed (``-c`` option) or use any format specific - options like encryption (``-o`` option). - - Only the formats ``qcow`` and ``qcow2`` support compression. The - compression is read-only. It means that if a compressed sector is - rewritten, then it is rewritten as uncompressed data. - - Image conversion is also useful to get smaller image when using a - growable format such as ``qcow``: the empty sectors are detected and - suppressed from the destination image. - - *SPARSE_SIZE* indicates the consecutive number of bytes (defaults to 4k) - that must contain only zeros for qemu-img to create a sparse image during - conversion. If *SPARSE_SIZE* is 0, the source will not be scanned for - unallocated or zero sectors, and the destination image will always be - fully allocated. - - You can use the *BACKING_FILE* option to force the output image to be - created as a copy on write image of the specified base image; the - *BACKING_FILE* should have the same content as the input's base image, - however the path, image format, etc may differ. - - If a relative path name is given, the backing file is looked up relative to - the directory containing *OUTPUT_FILENAME*. - - If the ``-n`` option is specified, the target volume creation will be - skipped. This is useful for formats such as ``rbd`` if the target - volume has already been created with site specific options that cannot - be supplied through qemu-img. - - Out of order writes can be enabled with ``-W`` to improve performance. - This is only recommended for preallocated devices like host devices or other - raw block devices. Out of order write does not work in combination with - creating compressed images. - - *NUM_COROUTINES* specifies how many coroutines work in parallel during - the convert process (defaults to 8). - -.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] - - Create the new disk image *FILENAME* of size *SIZE* and format - *FMT*. Depending on the file format, you can add one or more *OPTIONS* - that enable additional features of this format. - - If the option *BACKING_FILE* is specified, then the image will record - only the differences from *BACKING_FILE*. No size needs to be specified in - this case. *BACKING_FILE* will never be modified unless you use the - ``commit`` monitor command (or qemu-img commit). - - If a relative path name is given, the backing file is looked up relative to - the directory containing *FILENAME*. - - Note that a given backing file will be opened to check that it is valid. Use - the ``-u`` option to enable unsafe backing file mode, which means that the - image will be created even if the associated backing file cannot be opened. A - matching backing file must be created or additional options be used to make the - backing file specification valid when you want to use an image created this - way. - - The size can also be specified using the *SIZE* option with ``-o``, - it doesn't need to be specified separately in this case. - - -.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT - - dd copies from *INPUT* file to *OUTPUT* file converting it from - *FMT* format to *OUTPUT_FMT* format. - - The data is by default read and written using blocks of 512 bytes but can be - modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified - dd will stop reading input after reading *BLOCKS* input blocks. - - The size syntax is similar to :manpage:`dd(1)`'s size syntax. - -.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME - - Give information about the disk image *FILENAME*. Use it in - particular to know the size reserved on disk which can be different - from the displayed size. If VM snapshots are stored in the disk image, - they are displayed too. - - If a disk image has a backing file chain, information about each disk image in - the chain can be recursively enumerated by using the option ``--backing-chain``. - - For instance, if you have an image chain like: - - :: - - base.qcow2 <- snap1.qcow2 <- snap2.qcow2 - - To enumerate information about each disk image in the above chain, starting from top to base, do: - - :: - - qemu-img info --backing-chain snap2.qcow2 - - The command can output in the format *OFMT* which is either ``human`` or - ``json``. The JSON output is an object of QAPI type ``ImageInfo``; with - ``--backing-chain``, it is an array of ``ImageInfo`` objects. - - ``--output=human`` reports the following information (for every image in the - chain): - - *image* - The image file name - - *file format* - The image format - - *virtual size* - The size of the guest disk - - *disk size* - How much space the image file occupies on the host file system (may be - shown as 0 if this information is unavailable, e.g. because there is no - file system) - - *cluster_size* - Cluster size of the image format, if applicable - - *encrypted* - Whether the image is encrypted (only present if so) - - *cleanly shut down* - This is shown as ``no`` if the image is dirty and will have to be - auto-repaired the next time it is opened in qemu. - - *backing file* - The backing file name, if present - - *backing file format* - The format of the backing file, if the image enforces it - - *Snapshot list* - A list of all internal snapshots - - *Format specific information* - Further information whose structure depends on the image format. This - section is a textual representation of the respective - ``ImageInfoSpecific*`` QAPI object (e.g. ``ImageInfoSpecificQCow2`` - for qcow2 images). - -.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [-U] FILENAME - - Dump the metadata of image *FILENAME* and its backing file chain. - In particular, this commands dumps the allocation state of every sector - of *FILENAME*, together with the topmost file that allocates it in - the backing file chain. - - Two option formats are possible. The default format (``human``) - only dumps known-nonzero areas of the file. Known-zero parts of the - file are omitted altogether, and likewise for parts that are not allocated - throughout the chain. ``qemu-img`` output will identify a file - from where the data can be read, and the offset in the file. Each line - will include four fields, the first three of which are hexadecimal - numbers. For example the first line of: - - :: - - Offset Length Mapped to File - 0 0x20000 0x50000 /tmp/overlay.qcow2 - 0x100000 0x10000 0x95380000 /tmp/backing.qcow2 - - means that 0x20000 (131072) bytes starting at offset 0 in the image are - available in /tmp/overlay.qcow2 (opened in ``raw`` format) starting - at offset 0x50000 (327680). Data that is compressed, encrypted, or - otherwise not available in raw format will cause an error if ``human`` - format is in use. Note that file names can include newlines, thus it is - not safe to parse this output format in scripts. - - The alternative format ``json`` will return an array of dictionaries - in JSON format. It will include similar information in - the ``start``, ``length``, ``offset`` fields; - it will also include other more specific information: - - - whether the sectors contain actual data or not (boolean field ``data``; - if false, the sectors are either unallocated or stored as optimized - all-zero clusters); - - whether the data is known to read as zero (boolean field ``zero``); - - in order to make the output shorter, the target file is expressed as - a ``depth``; for example, a depth of 2 refers to the backing file - of the backing file of *FILENAME*. - - In JSON format, the ``offset`` field is optional; it is absent in - cases where ``human`` format would omit the entry or exit with an error. - If ``data`` is false and the ``offset`` field is present, the - corresponding sectors in the file are not yet in use, but they are - preallocated. - - For more information, consult ``include/block/block.h`` in QEMU's - source code. - -.. option:: measure [--output=OFMT] [-O OUTPUT_FMT] [-o OPTIONS] [--size N | [--object OBJECTDEF] [--image-opts] [-f FMT] [-l SNAPSHOT_PARAM] FILENAME] - - Calculate the file size required for a new image. This information - can be used to size logical volumes or SAN LUNs appropriately for - the image that will be placed in them. The values reported are - guaranteed to be large enough to fit the image. The command can - output in the format *OFMT* which is either ``human`` or ``json``. - The JSON output is an object of QAPI type ``BlockMeasureInfo``. - - If the size *N* is given then act as if creating a new empty image file - using ``qemu-img create``. If *FILENAME* is given then act as if - converting an existing image file using ``qemu-img convert``. The format - of the new file is given by *OUTPUT_FMT* while the format of an existing - file is given by *FMT*. - - A snapshot in an existing image can be specified using *SNAPSHOT_PARAM*. - - The following fields are reported: - - :: - - required size: 524288 - fully allocated size: 1074069504 - - The ``required size`` is the file size of the new image. It may be smaller - than the virtual disk size if the image format supports compact representation. - - The ``fully allocated size`` is the file size of the new image once data has - been written to all sectors. This is the maximum size that the image file can - occupy with the exception of internal snapshots, dirty bitmaps, vmstate data, - and other advanced image format features. - -.. option:: snapshot [--object OBJECTDEF] [--image-opts] [-U] [-q] [-l | -a SNAPSHOT | -c SNAPSHOT | -d SNAPSHOT] FILENAME - - List, apply, create or delete snapshots in image *FILENAME*. - -.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME - - Changes the backing file of an image. Only the formats ``qcow2`` and - ``qed`` support changing the backing file. - - The backing file is changed to *BACKING_FILE* and (if the image format of - *FILENAME* supports this) the backing file format is changed to - *BACKING_FMT*. If *BACKING_FILE* is specified as "" (the empty - string), then the image is rebased onto no backing file (i.e. it will exist - independently of any backing file). - - If a relative path name is given, the backing file is looked up relative to - the directory containing *FILENAME*. - - *CACHE* specifies the cache mode to be used for *FILENAME*, whereas - *SRC_CACHE* specifies the cache mode for reading backing files. - - There are two different modes in which ``rebase`` can operate: - - Safe mode - This is the default mode and performs a real rebase operation. The - new backing file may differ from the old one and qemu-img rebase - will take care of keeping the guest-visible content of *FILENAME* - unchanged. - - In order to achieve this, any clusters that differ between - *BACKING_FILE* and the old backing file of *FILENAME* are merged - into *FILENAME* before actually changing the backing file. - - Note that the safe mode is an expensive operation, comparable to - converting an image. It only works if the old backing file still - exists. - - Unsafe mode - qemu-img uses the unsafe mode if ``-u`` is specified. In this - mode, only the backing file name and format of *FILENAME* is changed - without any checks on the file contents. The user must take care of - specifying the correct new backing file, or the guest-visible - content of the image will be corrupted. - - This mode is useful for renaming or moving the backing file to - somewhere else. It can be used without an accessible old backing - file, i.e. you can use it to fix an image whose backing file has - already been moved/renamed. - - You can use ``rebase`` to perform a "diff" operation on two - disk images. This can be useful when you have copied or cloned - a guest, and you want to get back to a thin image on top of a - template or base image. - - Say that ``base.img`` has been cloned as ``modified.img`` by - copying it, and that the ``modified.img`` guest has run so there - are now some changes compared to ``base.img``. To construct a thin - image called ``diff.qcow2`` that contains just the differences, do: - - :: - - qemu-img create -f qcow2 -b modified.img diff.qcow2 - qemu-img rebase -b base.img diff.qcow2 - - At this point, ``modified.img`` can be discarded, since - ``base.img + diff.qcow2`` contains the same information. - -.. option:: resize [--object OBJECTDEF] [--image-opts] [-f FMT] [--preallocation=PREALLOC] [-q] [--shrink] FILENAME [+ | -]SIZE - - Change the disk image as if it had been created with *SIZE*. - - Before using this command to shrink a disk image, you MUST use file system and - partitioning tools inside the VM to reduce allocated file systems and partition - sizes accordingly. Failure to do so will result in data loss! - - When shrinking images, the ``--shrink`` option must be given. This informs - qemu-img that the user acknowledges all loss of data beyond the truncated - image's end. - - After using this command to grow a disk image, you must use file system and - partitioning tools inside the VM to actually begin using the new space on the - device. - - When growing an image, the ``--preallocation`` option may be used to specify - how the additional image area should be allocated on the host. See the format - description in the :ref:`notes` section which values are allowed. Using this - option may result in slightly more data being allocated than necessary. - -.. _notes: - -Notes ------ - -Supported image file formats: - -``raw`` - - Raw disk image format (default). This format has the advantage of - being simple and easily exportable to all other emulators. If your - file system supports *holes* (for example in ext2 or ext3 on - Linux or NTFS on Windows), then only the written sectors will reserve - space. Use ``qemu-img info`` to know the real size used by the - image or ``ls -ls`` on Unix/Linux. - - Supported options: - - ``preallocation`` - Preallocation mode (allowed values: ``off``, ``falloc``, - ``full``). ``falloc`` mode preallocates space for image by - calling ``posix_fallocate()``. ``full`` mode preallocates space - for image by writing data to underlying storage. This data may or - may not be zero, depending on the storage location. - -``qcow2`` - - QEMU image format, the most versatile format. Use it to have smaller - images (useful if your filesystem does not supports holes, for example - on Windows), optional AES encryption, zlib based compression and - support of multiple VM snapshots. - - Supported options: - - ``compat`` - Determines the qcow2 version to use. ``compat=0.10`` uses the - traditional image format that can be read by any QEMU since 0.10. - ``compat=1.1`` enables image format extensions that only QEMU 1.1 and - newer understand (this is the default). Amongst others, this includes zero - clusters, which allow efficient copy-on-read for sparse images. - - ``backing_file`` - File name of a base image (see ``create`` subcommand) - - ``backing_fmt`` - Image format of the base image - - ``encryption`` - If this option is set to ``on``, the image is encrypted with - 128-bit AES-CBC. - - The use of encryption in qcow and qcow2 images is considered to be - flawed by modern cryptography standards, suffering from a number - of design problems: - - - The AES-CBC cipher is used with predictable initialization - vectors based on the sector number. This makes it vulnerable to - chosen plaintext attacks which can reveal the existence of - encrypted data. - - - The user passphrase is directly used as the encryption key. A - poorly chosen or short passphrase will compromise the security - of the encryption. - - - In the event of the passphrase being compromised there is no way - to change the passphrase to protect data in any qcow images. The - files must be cloned, using a different encryption passphrase in - the new file. The original file must then be securely erased - using a program like shred, though even this is ineffective with - many modern storage technologies. - - - Initialization vectors used to encrypt sectors are based on the - guest virtual sector number, instead of the host physical - sector. When a disk image has multiple internal snapshots this - means that data in multiple physical sectors is encrypted with - the same initialization vector. With the CBC mode, this opens - the possibility of watermarking attacks if the attack can - collect multiple sectors encrypted with the same IV and some - predictable data. Having multiple qcow2 images with the same - passphrase also exposes this weakness since the passphrase is - directly used as the key. - - Use of qcow / qcow2 encryption is thus strongly discouraged. Users are - recommended to use an alternative encryption technology such as the - Linux dm-crypt / LUKS system. - - ``cluster_size`` - Changes the qcow2 cluster size (must be between 512 and - 2M). Smaller cluster sizes can improve the image file size whereas - larger cluster sizes generally provide better performance. - - ``preallocation`` - Preallocation mode (allowed values: ``off``, ``metadata``, - ``falloc``, ``full``). An image with preallocated metadata is - initially larger but can improve performance when the image needs - to grow. ``falloc`` and ``full`` preallocations are like the same - options of ``raw`` format, but sets up metadata also. - - ``lazy_refcounts`` - If this option is set to ``on``, reference count updates are - postponed with the goal of avoiding metadata I/O and improving - performance. This is particularly interesting with - ``cache=writethrough`` which doesn't batch metadata - updates. The tradeoff is that after a host crash, the reference - count tables must be rebuilt, i.e. on the next open an (automatic) - ``qemu-img check -r all`` is required, which may take some time. - - This option can only be enabled if ``compat=1.1`` is specified. - - ``nocow`` - If this option is set to ``on``, it will turn off COW of the file. It's - only valid on btrfs, no effect on other file systems. - - Btrfs has low performance when hosting a VM image file, even more - when the guest on the VM also using btrfs as file system. Turning - off COW is a way to mitigate this bad performance. Generally there - are two ways to turn off COW on btrfs: - - - Disable it by mounting with nodatacow, then all newly created files - will be NOCOW - - For an empty file, add the NOCOW file attribute. That's what this - option does. - - Note: this option is only valid to new or empty files. If there is - an existing file which is COW and has data blocks already, it - couldn't be changed to NOCOW by setting ``nocow=on``. One can - issue ``lsattr filename`` to check if the NOCOW flag is set or not - (Capital 'C' is NOCOW flag). - -``Other`` - - QEMU also supports various other image file formats for - compatibility with older QEMU versions or other hypervisors, - including VMDK, VDI, VHD (vpc), VHDX, qcow1 and QED. For a full list - of supported formats see ``qemu-img --help``. For a more detailed - description of these formats, see the QEMU block drivers reference - documentation. - - The main purpose of the block drivers for these formats is image - conversion. For running VMs, it is recommended to convert the disk - images to either raw or qcow2 in order to achieve good performance. diff --git a/docs/interop/qemu-nbd.rst b/docs/interop/qemu-nbd.rst deleted file mode 100644 index 667861c..0000000 --- a/docs/interop/qemu-nbd.rst +++ /dev/null @@ -1,256 +0,0 @@ -QEMU Disk Network Block Device Server -===================================== - -Synopsis --------- - -**qemu-nbd** [*OPTION*]... *filename* - -**qemu-nbd** -L [*OPTION*]... - -**qemu-nbd** -d *dev* - -Description ------------ - -Export a QEMU disk image using the NBD protocol. - -Other uses: - -- Bind a /dev/nbdX block device to a QEMU server (on Linux). -- As a client to query exports of a remote NBD server. - -Options -------- - -.. program:: qemu-nbd - -*filename* is a disk image filename, or a set of block -driver options if ``--image-opts`` is specified. - -*dev* is an NBD device. - -.. option:: --object type,id=ID,...props... - - Define a new instance of the *type* object class identified by *ID*. - See the :manpage:`qemu(1)` manual page for full details of the properties - supported. The common object types that it makes sense to define are the - ``secret`` object, which is used to supply passwords and/or encryption - keys, and the ``tls-creds`` object, which is used to supply TLS - credentials for the qemu-nbd server or client. - -.. option:: -p, --port=PORT - - TCP port to listen on as a server, or connect to as a client - (default ``10809``). - -.. option:: -o, --offset=OFFSET - - The offset into the image. - -.. option:: -b, --bind=IFACE - - The interface to bind to as a server, or connect to as a client - (default ``0.0.0.0``). - -.. option:: -k, --socket=PATH - - Use a unix socket with path *PATH*. - -.. option:: --image-opts - - Treat *filename* as a set of image options, instead of a plain - filename. If this flag is specified, the ``-f`` flag should - not be used, instead the :option:`format=` option should be set. - -.. option:: -f, --format=FMT - - Force the use of the block driver for format *FMT* instead of - auto-detecting. - -.. option:: -r, --read-only - - Export the disk as read-only. - -.. option:: -B, --bitmap=NAME - - If *filename* has a qcow2 persistent bitmap *NAME*, expose - that bitmap via the ``qemu:dirty-bitmap:NAME`` context - accessible through NBD_OPT_SET_META_CONTEXT. - -.. option:: -s, --snapshot - - Use *filename* as an external snapshot, create a temporary - file with ``backing_file=``\ *filename*, redirect the write to - the temporary one. - -.. option:: -l, --load-snapshot=SNAPSHOT_PARAM - - Load an internal snapshot inside *filename* and export it - as an read-only device, SNAPSHOT_PARAM format is - ``snapshot.id=[ID],snapshot.name=[NAME]`` or ``[ID_OR_NAME]`` - -.. option:: --cache=CACHE - - The cache mode to be used with the file. See the documentation of - the emulator's ``-drive cache=...`` option for allowed values. - -.. option:: -n, --nocache - - Equivalent to :option:`--cache=none`. - -.. option:: --aio=AIO - - Set the asynchronous I/O mode between ``threads`` (the default), - ``native`` (Linux only), and ``io_uring`` (Linux 5.1+). - -.. option:: --discard=DISCARD - - Control whether ``discard`` (also known as ``trim`` or ``unmap``) - requests are ignored or passed to the filesystem. *DISCARD* is one of - ``ignore`` (or ``off``), ``unmap`` (or ``on``). The default is - ``ignore``. - -.. option:: --detect-zeroes=DETECT_ZEROES - - Control the automatic conversion of plain zero writes by the OS to - driver-specific optimized zero write commands. *DETECT_ZEROES* is one of - ``off``, ``on``, or ``unmap``. ``unmap`` - converts a zero write to an unmap operation and can only be used if - *DISCARD* is set to ``unmap``. The default is ``off``. - -.. option:: -c, --connect=DEV - - Connect *filename* to NBD device *DEV* (Linux only). - -.. option:: -d, --disconnect - - Disconnect the device *DEV* (Linux only). - -.. option:: -e, --shared=NUM - - Allow up to *NUM* clients to share the device (default - ``1``). Safe for readers, but for now, consistency is not - guaranteed between multiple writers. - -.. option:: -t, --persistent - - Don't exit on the last connection. - -.. option:: -x, --export-name=NAME - - Set the NBD volume export name (default of a zero-length string). - -.. option:: -D, --description=DESCRIPTION - - Set the NBD volume export description, as a human-readable - string. - -.. option:: -L, --list - - Connect as a client and list all details about the exports exposed by - a remote NBD server. This enables list mode, and is incompatible - with options that change behavior related to a specific export (such as - :option:`--export-name`, :option:`--offset`, ...). - -.. option:: --tls-creds=ID - - Enable mandatory TLS encryption for the server by setting the ID - of the TLS credentials object previously created with the --object - option; or provide the credentials needed for connecting as a client - in list mode. - -.. option:: --fork - - Fork off the server process and exit the parent once the server is running. - -.. option:: --pid-file=PATH - - Store the server's process ID in the given file. - -.. option:: --tls-authz=ID - - Specify the ID of a qauthz object previously created with the - :option:`--object` option. This will be used to authorize connecting users - against their x509 distinguished name. - -.. option:: -v, --verbose - - Display extra debugging information. - -.. option:: -h, --help - - Display this help and exit. - -.. option:: -V, --version - - Display version information and exit. - -.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] - - .. include:: ../qemu-option-trace.rst.inc - -Examples --------- - -Start a server listening on port 10809 that exposes only the -guest-visible contents of a qcow2 file, with no TLS encryption, and -with the default export name (an empty string). The command is -one-shot, and will block until the first successful client -disconnects: - -:: - - qemu-nbd -f qcow2 file.qcow2 - -Start a long-running server listening with encryption on port 10810, -and whitelist clients with a specific X.509 certificate to connect to -a 1 megabyte subset of a raw file, using the export name 'subset': - -:: - - qemu-nbd \ - --object tls-creds-x509,id=tls0,endpoint=server,dir=/path/to/qemutls \ - --object 'authz-simple,id=auth0,identity=CN=laptop.example.com,,\ - O=Example Org,,L=London,,ST=London,,C=GB' \ - --tls-creds tls0 --tls-authz auth0 \ - -t -x subset -p 10810 \ - --image-opts driver=raw,offset=1M,size=1M,file.driver=file,file.filename=file.raw - -Serve a read-only copy of a guest image over a Unix socket with as -many as 5 simultaneous readers, with a persistent process forked as a -daemon: - -:: - - qemu-nbd --fork --persistent --shared=5 --socket=/path/to/sock \ - --read-only --format=qcow2 file.qcow2 - -Expose the guest-visible contents of a qcow2 file via a block device -/dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for -partitions found within), then disconnect the device when done. -Access to bind qemu-nbd to an /dev/nbd device generally requires root -privileges, and may also require the execution of ``modprobe nbd`` -to enable the kernel NBD client module. *CAUTION*: Do not use -this method to mount filesystems from an untrusted guest image - a -malicious guest may have prepared the image to attempt to trigger -kernel bugs in partition probing or file system mounting. - -:: - - qemu-nbd -c /dev/nbd0 -f qcow2 file.qcow2 - qemu-nbd -d /dev/nbd0 - -Query a remote server to see details about what export(s) it is -serving on port 10809, and authenticating via PSK: - -:: - - qemu-nbd \ - --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=eblake,endpoint=client \ - --tls-creds tls0 -L -b remote.example.com - -See also --------- - -:manpage:`qemu(1)`, :manpage:`qemu-img(1)` diff --git a/docs/interop/qemu-trace-stap.rst b/docs/interop/qemu-trace-stap.rst deleted file mode 100644 index fb70445..0000000 --- a/docs/interop/qemu-trace-stap.rst +++ /dev/null @@ -1,124 +0,0 @@ -QEMU SystemTap trace tool -========================= - -Synopsis --------- - -**qemu-trace-stap** [*GLOBAL-OPTIONS*] *COMMAND* [*COMMAND-OPTIONS*] *ARGS*... - -Description ------------ - -The ``qemu-trace-stap`` program facilitates tracing of the execution -of QEMU emulators using SystemTap. - -It is required to have the SystemTap runtime environment installed to use -this program, since it is a wrapper around execution of the ``stap`` -program. - -Options -------- - -.. program:: qemu-trace-stap - -The following global options may be used regardless of which command -is executed: - -.. option:: --verbose, -v - - Display verbose information about command execution. - -The following commands are valid: - -.. option:: list BINARY PATTERN... - - List all the probe names provided by *BINARY* that match - *PATTERN*. - - If *BINARY* is not an absolute path, it will be located by searching - the directories listed in the ``$PATH`` environment variable. - - *PATTERN* is a plain string that is used to filter the results of - this command. It may optionally contain a ``*`` wildcard to facilitate - matching multiple probes without listing each one explicitly. Multiple - *PATTERN* arguments may be given, causing listing of probes that match - any of the listed names. If no *PATTERN* is given, the all possible - probes will be listed. - - For example, to list all probes available in the ``qemu-system-x86_64`` - binary: - - :: - - $ qemu-trace-stap list qemu-system-x86_64 - - To filter the list to only cover probes related to QEMU's cryptographic - subsystem, in a binary outside ``$PATH`` - - :: - - $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' - -.. option:: run OPTIONS BINARY PATTERN... - - Run a trace session, printing formatted output any time a process that is - executing *BINARY* triggers a probe matching *PATTERN*. - - If *BINARY* is not an absolute path, it will be located by searching - the directories listed in the ``$PATH`` environment variable. - - *PATTERN* is a plain string that matches a probe name shown by the - *LIST* command. It may optionally contain a ``*`` wildcard to - facilitate matching multiple probes without listing each one explicitly. - Multiple *PATTERN* arguments may be given, causing all matching probes - to be monitored. At least one *PATTERN* is required, since stap is not - capable of tracing all known QEMU probes concurrently without overflowing - its trace buffer. - - Invocation of this command does not need to be synchronized with - invocation of the QEMU process(es). It will match probes on all - existing running processes and all future launched processes, - unless told to only monitor a specific process. - - Valid command specific options are: - - .. program:: qemu-trace-stap-run - - .. option:: --pid=PID, -p PID - - Restrict the tracing session so that it only triggers for the process - identified by *PID*. - - For example, to monitor all processes executing ``qemu-system-x86_64`` - as found on ``$PATH``, displaying all I/O related probes: - - :: - - $ qemu-trace-stap run qemu-system-x86_64 'qio*' - - To monitor only the QEMU process with PID 1732 - - :: - - $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' - - To monitor QEMU processes running an alternative binary outside of - ``$PATH``, displaying verbose information about setup of the - tracing environment: - - :: - - $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' - -See also --------- - -:manpage:`qemu(1)`, :manpage:`stap(1)` - -.. - Copyright (C) 2019 Red Hat, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. diff --git a/docs/interop/virtfs-proxy-helper.rst b/docs/interop/virtfs-proxy-helper.rst deleted file mode 100644 index 6cdeedf..0000000 --- a/docs/interop/virtfs-proxy-helper.rst +++ /dev/null @@ -1,72 +0,0 @@ -QEMU 9p virtfs proxy filesystem helper -====================================== - -Synopsis --------- - -**virtfs-proxy-helper** [*OPTIONS*] - -Description ------------ - -Pass-through security model in QEMU 9p server needs root privilege to do -few file operations (like chown, chmod to any mode/uid:gid). There are two -issues in pass-through security model: - -- TOCTTOU vulnerability: Following symbolic links in the server could - provide access to files beyond 9p export path. - -- Running QEMU with root privilege could be a security issue. - -To overcome above issues, following approach is used: A new filesystem -type 'proxy' is introduced. Proxy FS uses chroot + socket combination -for securing the vulnerability known with following symbolic links. -Intention of adding a new filesystem type is to allow qemu to run -in non-root mode, but doing privileged operations using socket IO. - -Proxy helper (a stand alone binary part of qemu) is invoked with -root privileges. Proxy helper chroots into 9p export path and creates -a socket pair or a named socket based on the command line parameter. -QEMU and proxy helper communicate using this socket. QEMU proxy fs -driver sends filesystem request to proxy helper and receives the -response from it. - -The proxy helper is designed so that it can drop root privileges except -for the capabilities needed for doing filesystem operations. - -Options -------- - -The following options are supported: - -.. program:: virtfs-proxy-helper - -.. option:: -h - - Display help and exit - -.. option:: -p, --path PATH - - Path to export for proxy filesystem driver - -.. option:: -f, --fd SOCKET_ID - - Use given file descriptor as socket descriptor for communicating with - qemu proxy fs drier. Usually a helper like libvirt will create - socketpair and pass one of the fds as parameter to this option. - -.. option:: -s, --socket SOCKET_FILE - - Creates named socket file for communicating with qemu proxy fs driver - -.. option:: -u, --uid UID - - uid to give access to named socket file; used in combination with -g. - -.. option:: -g, --gid GID - - gid to give access to named socket file; used in combination with -u. - -.. option:: -n, --nodaemon - - Run as a normal program. By default program will run in daemon mode diff --git a/docs/interop/virtiofsd.rst b/docs/interop/virtiofsd.rst deleted file mode 100644 index 378594c..0000000 --- a/docs/interop/virtiofsd.rst +++ /dev/null @@ -1,120 +0,0 @@ -QEMU virtio-fs shared file system daemon -======================================== - -Synopsis --------- - -**virtiofsd** [*OPTIONS*] - -Description ------------ - -Share a host directory tree with a guest through a virtio-fs device. This -program is a vhost-user backend that implements the virtio-fs device. Each -virtio-fs device instance requires its own virtiofsd process. - -This program is designed to work with QEMU's ``--device vhost-user-fs-pci`` -but should work with any virtual machine monitor (VMM) that supports -vhost-user. See the Examples section below. - -This program must be run as the root user. Upon startup the program will -switch into a new file system namespace with the shared directory tree as its -root. This prevents "file system escapes" due to symlinks and other file -system objects that might lead to files outside the shared directory. The -program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other -vectors that could allow an attacker to compromise the system after gaining -control of the virtiofsd process. - -Options -------- - -.. program:: virtiofsd - -.. option:: -h, --help - - Print help. - -.. option:: -V, --version - - Print version. - -.. option:: -d - - Enable debug output. - -.. option:: --syslog - - Print log messages to syslog instead of stderr. - -.. option:: -o OPTION - - * debug - - Enable debug output. - - * flock|no_flock - - Enable/disable flock. The default is ``no_flock``. - - * log_level=LEVEL - - Print only log messages matching LEVEL or more severe. LEVEL is one of - ``err``, ``warn``, ``info``, or ``debug``. The default is ``info``. - - * norace - - Disable racy fallback. The default is false. - - * posix_lock|no_posix_lock - - Enable/disable remote POSIX locks. The default is ``posix_lock``. - - * readdirplus|no_readdirplus - - Enable/disable readdirplus. The default is ``readdirplus``. - - * source=PATH - - Share host directory tree located at PATH. This option is required. - - * timeout=TIMEOUT - - I/O timeout in seconds. The default depends on cache= option. - - * writeback|no_writeback - - Enable/disable writeback cache. The cache alows the FUSE client to buffer - and merge write requests. The default is ``no_writeback``. - - * xattr|no_xattr - - Enable/disable extended attributes (xattr) on files and directories. The - default is ``no_xattr``. - -.. option:: --socket-path=PATH - - Listen on vhost-user UNIX domain socket at PATH. - -.. option:: --fd=FDNUM - - Accept connections from vhost-user UNIX domain socket file descriptor FDNUM. - The file descriptor must already be listening for connections. - -.. option:: --thread-pool-size=NUM - - Restrict the number of worker threads per request queue to NUM. The default - is 64. - -.. option:: --cache=none|auto|always - - Select the desired trade-off between coherency and performance. ``none`` - forbids the FUSE client from caching to achieve best coherency at the cost of - performance. ``auto`` acts similar to NFS with a 1 second metadata cache - timeout. ``always`` sets a long cache lifetime at the expense of coherency. - -Examples --------- - -Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket -``/var/run/vm001-vhost-fs.sock``: - -:: - - host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 - host# qemu-system-x86_64 \ - -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ - -device vhost-user-fs-pci,chardev=char0,tag=myfs \ - -object memory-backend-memfd,id=mem,size=4G,share=on \ - -numa node,memdev=mem \ - ... - guest# mount -t virtiofs myfs /mnt diff --git a/docs/tools/conf.py b/docs/tools/conf.py index 5646111..9052d17 100644 --- a/docs/tools/conf.py +++ b/docs/tools/conf.py @@ -14,3 +14,20 @@ exec(compile(open(parent_config, "rb").read(), parent_config, 'exec')) # the manual title to appear in the sidebar. html_theme_options['description'] = \ u'Tools Guide' + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('qemu-img', 'qemu-img', u'QEMU disk image utility', + ['Fabrice Bellard'], 1), + ('qemu-nbd', 'qemu-nbd', u'QEMU Disk Network Block Device Server', + ['Anthony Liguori '], 8), + ('qemu-trace-stap', 'qemu-trace-stap', u'QEMU SystemTap trace tool', + [], 1), + ('virtfs-proxy-helper', 'virtfs-proxy-helper', + u'QEMU 9p virtfs proxy filesystem helper', + ['M. Mohan Kumar'], 1), + ('virtiofsd', 'virtiofsd', u'QEMU virtio-fs shared file system daemon', + ['Stefan Hajnoczi ', + 'Masayoshi Mizuma '], 1), +] diff --git a/docs/tools/index.rst b/docs/tools/index.rst index c5a4a13..232ce9f 100644 --- a/docs/tools/index.rst +++ b/docs/tools/index.rst @@ -9,3 +9,9 @@ Contents: .. toctree:: :maxdepth: 2 + + qemu-img + qemu-nbd + qemu-trace-stap + virtfs-proxy-helper + virtiofsd diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst new file mode 100644 index 0000000..0080f83 --- /dev/null +++ b/docs/tools/qemu-img.rst @@ -0,0 +1,832 @@ +QEMU disk image utility +======================= + +Synopsis +-------- + +**qemu-img** [*standard options*] *command* [*command options*] + +Description +----------- + +qemu-img allows you to create, convert and modify images offline. It can handle +all image formats supported by QEMU. + +**Warning:** Never use qemu-img to modify images in use by a running virtual +machine or any other process; this may destroy the image. Also, be aware that +querying an image that is being modified by another process may encounter +inconsistent state. + +Options +------- + +.. program:: qemu-img + +Standard options: + +.. option:: -h, --help + + Display this help and exit + +.. option:: -V, --version + + Display version information and exit + +.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] + + .. include:: ../qemu-option-trace.rst.inc + +The following commands are supported: + +.. hxtool-doc:: qemu-img-cmds.hx + +Command parameters: + +*FILENAME* is a disk image filename. + +*FMT* is the disk image format. It is guessed automatically in most +cases. See below for a description of the supported disk formats. + +*SIZE* is the disk image size in bytes. Optional suffixes ``k`` or +``K`` (kilobyte, 1024) ``M`` (megabyte, 1024k) and ``G`` (gigabyte, +1024M) and T (terabyte, 1024G) are supported. ``b`` is ignored. + +*OUTPUT_FILENAME* is the destination disk image filename. + +*OUTPUT_FMT* is the destination format. + +*OPTIONS* is a comma separated list of format specific options in a +name=value format. Use ``-o ?`` for an overview of the options supported +by the used format or see the format descriptions below for details. + +*SNAPSHOT_PARAM* is param used for internal snapshot, format is +'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]'. + +.. + Note the use of a new 'program'; otherwise Sphinx complains about + the -h option appearing both in the above option list and this one. + +.. program:: qemu-img-common-opts + +.. option:: --object OBJECTDEF + + is a QEMU user creatable object definition. See the :manpage:`qemu(1)` + manual page for a description of the object properties. The most common + object type is a ``secret``, which is used to supply passwords and/or + encryption keys. + +.. option:: --image-opts + + Indicates that the source *FILENAME* parameter is to be interpreted as a + full option string, not a plain filename. This parameter is mutually + exclusive with the *-f* parameter. + +.. option:: --target-image-opts + + Indicates that the OUTPUT_FILENAME parameter(s) are to be interpreted as + a full option string, not a plain filename. This parameter is mutually + exclusive with the *-O* parameters. It is currently required to also use + the *-n* parameter to skip image creation. This restriction may be relaxed + in a future release. + +.. option:: --force-share (-U) + + If specified, ``qemu-img`` will open the image in shared mode, allowing + other QEMU processes to open it in write mode. For example, this can be used to + get the image information (with 'info' subcommand) when the image is used by a + running guest. Note that this could produce inconsistent results because of + concurrent metadata changes, etc. This option is only allowed when opening + images in read-only mode. + +.. option:: --backing-chain + + Will enumerate information about backing files in a disk image chain. Refer + below for further description. + +.. option:: -c + + Indicates that target image must be compressed (qcow format only). + +.. option:: -h + + With or without a command, shows help and lists the supported formats. + +.. option:: -p + + Display progress bar (compare, convert and rebase commands only). + If the *-p* option is not used for a command that supports it, the + progress is reported when the process receives a ``SIGUSR1`` or + ``SIGINFO`` signal. + +.. option:: -q + + Quiet mode - do not print any output (except errors). There's no progress bar + in case both *-q* and *-p* options are used. + +.. option:: -S SIZE + + Indicates the consecutive number of bytes that must contain only zeros + for qemu-img to create a sparse image during conversion. This value is rounded + down to the nearest 512 bytes. You may use the common size suffixes like + ``k`` for kilobytes. + +.. option:: -t CACHE + + Specifies the cache mode that should be used with the (destination) file. See + the documentation of the emulator's ``-drive cache=...`` option for allowed + values. + +.. option:: -T SRC_CACHE + + Specifies the cache mode that should be used with the source file(s). See + the documentation of the emulator's ``-drive cache=...`` option for allowed + values. + +Parameters to snapshot subcommand: + +.. program:: qemu-img-snapshot + +.. option:: snapshot + + Is the name of the snapshot to create, apply or delete + +.. option:: -a + + Applies a snapshot (revert disk to saved state) + +.. option:: -c + + Creates a snapshot + +.. option:: -d + + Deletes a snapshot + +.. option:: -l + + Lists all snapshots in the given image + +Parameters to compare subcommand: + +.. program:: qemu-img-compare + +.. option:: -f + + First image format + +.. option:: -F + + Second image format + +.. option:: -s + + Strict mode - fail on different image size or sector allocation + +Parameters to convert subcommand: + +.. program:: qemu-img-convert + +.. option:: -n + + Skip the creation of the target volume + +.. option:: -m + + Number of parallel coroutines for the convert process + +.. option:: -W + + Allow out-of-order writes to the destination. This option improves performance, + but is only recommended for preallocated devices like host devices or other + raw block devices. + +.. option:: -C + + Try to use copy offloading to move data from source image to target. This may + improve performance if the data is remote, such as with NFS or iSCSI backends, + but will not automatically sparsify zero sectors, and may result in a fully + allocated target image depending on the host support for getting allocation + information. + +.. option:: --salvage + + Try to ignore I/O errors when reading. Unless in quiet mode (``-q``), errors + will still be printed. Areas that cannot be read from the source will be + treated as containing only zeroes. + +.. option:: --target-is-zero + + Assume that reading the destination image will always return + zeros. This parameter is mutually exclusive with a destination image + that has a backing file. It is required to also use the ``-n`` + parameter to skip image creation. + +Parameters to dd subcommand: + +.. program:: qemu-img-dd + +.. option:: bs=BLOCK_SIZE + + Defines the block size + +.. option:: count=BLOCKS + + Sets the number of input blocks to copy + +.. option:: if=INPUT + + Sets the input file + +.. option:: of=OUTPUT + + Sets the output file + +.. option:: skip=BLOCKS + + Sets the number of input blocks to skip + +Command description: + +.. program:: qemu-img-commands + +.. option:: amend [--object OBJECTDEF] [--image-opts] [-p] [-q] [-f FMT] [-t CACHE] -o OPTIONS FILENAME + + Amends the image format specific *OPTIONS* for the image file + *FILENAME*. Not all file formats support this operation. + +.. option:: bench [-c COUNT] [-d DEPTH] [-f FMT] [--flush-interval=FLUSH_INTERVAL] [-i AIO] [-n] [--no-drain] [-o OFFSET] [--pattern=PATTERN] [-q] [-s BUFFER_SIZE] [-S STEP_SIZE] [-t CACHE] [-w] [-U] FILENAME + + Run a simple sequential I/O benchmark on the specified image. If ``-w`` is + specified, a write test is performed, otherwise a read test is performed. + + A total number of *COUNT* I/O requests is performed, each *BUFFER_SIZE* + bytes in size, and with *DEPTH* requests in parallel. The first request + starts at the position given by *OFFSET*, each following request increases + the current position by *STEP_SIZE*. If *STEP_SIZE* is not given, + *BUFFER_SIZE* is used for its value. + + If *FLUSH_INTERVAL* is specified for a write test, the request queue is + drained and a flush is issued before new writes are made whenever the number of + remaining requests is a multiple of *FLUSH_INTERVAL*. If additionally + ``--no-drain`` is specified, a flush is issued without draining the request + queue first. + + if ``-i`` is specified, *AIO* option can be used to specify different + AIO backends: ``threads``, ``native`` or ``io_uring``. + + If ``-n`` is specified, the native AIO backend is used if possible. On + Linux, this option only works if ``-t none`` or ``-t directsync`` is + specified as well. + + For write tests, by default a buffer filled with zeros is written. This can be + overridden with a pattern byte specified by *PATTERN*. + +.. option:: check [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [--output=OFMT] [-r [leaks | all]] [-T SRC_CACHE] [-U] FILENAME + + Perform a consistency check on the disk image *FILENAME*. The command can + output in the format *OFMT* which is either ``human`` or ``json``. + The JSON output is an object of QAPI type ``ImageCheck``. + + If ``-r`` is specified, qemu-img tries to repair any inconsistencies found + during the check. ``-r leaks`` repairs only cluster leaks, whereas + ``-r all`` fixes all kinds of errors, with a higher risk of choosing the + wrong fix or hiding corruption that has already occurred. + + Only the formats ``qcow2``, ``qed`` and ``vdi`` support + consistency checks. + + In case the image does not have any inconsistencies, check exits with ``0``. + Other exit codes indicate the kind of inconsistency found or if another error + occurred. The following table summarizes all exit codes of the check subcommand: + + 0 + Check completed, the image is (now) consistent + 1 + Check not completed because of internal errors + 2 + Check completed, image is corrupted + 3 + Check completed, image has leaked clusters, but is not corrupted + 63 + Checks are not supported by the image format + + If ``-r`` is specified, exit codes representing the image state refer to the + state after (the attempt at) repairing it. That is, a successful ``-r all`` + will yield the exit code 0, independently of the image state before. + +.. option:: commit [--object OBJECTDEF] [--image-opts] [-q] [-f FMT] [-t CACHE] [-b BASE] [-d] [-p] FILENAME + + Commit the changes recorded in *FILENAME* in its base image or backing file. + If the backing file is smaller than the snapshot, then the backing file will be + resized to be the same size as the snapshot. If the snapshot is smaller than + the backing file, the backing file will not be truncated. If you want the + backing file to match the size of the smaller snapshot, you can safely truncate + it yourself once the commit operation successfully completes. + + The image *FILENAME* is emptied after the operation has succeeded. If you do + not need *FILENAME* afterwards and intend to drop it, you may skip emptying + *FILENAME* by specifying the ``-d`` flag. + + If the backing chain of the given image file *FILENAME* has more than one + layer, the backing file into which the changes will be committed may be + specified as *BASE* (which has to be part of *FILENAME*'s backing + chain). If *BASE* is not specified, the immediate backing file of the top + image (which is *FILENAME*) will be used. Note that after a commit operation + all images between *BASE* and the top image will be invalid and may return + garbage data when read. For this reason, ``-b`` implies ``-d`` (so that + the top image stays valid). + +.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2 + + Check if two images have the same content. You can compare images with + different format or settings. + + The format is probed unless you specify it by ``-f`` (used for + *FILENAME1*) and/or ``-F`` (used for *FILENAME2*) option. + + By default, images with different size are considered identical if the larger + image contains only unallocated and/or zeroed sectors in the area after the end + of the other image. In addition, if any sector is not allocated in one image + and contains only zero bytes in the second one, it is evaluated as equal. You + can use Strict mode by specifying the ``-s`` option. When compare runs in + Strict mode, it fails in case image size differs or a sector is allocated in + one image and is not allocated in the second one. + + By default, compare prints out a result message. This message displays + information that both images are same or the position of the first different + byte. In addition, result message can report different image size in case + Strict mode is used. + + Compare exits with ``0`` in case the images are equal and with ``1`` + in case the images differ. Other exit codes mean an error occurred during + execution and standard error output should contain an error message. + The following table sumarizes all exit codes of the compare subcommand: + + 0 + Images are identical + 1 + Images differ + 2 + Error on opening an image + 3 + Error on checking a sector allocation + 4 + Error on reading data + +.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME + + Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* + to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can + be optionally compressed (``-c`` option) or use any format specific + options like encryption (``-o`` option). + + Only the formats ``qcow`` and ``qcow2`` support compression. The + compression is read-only. It means that if a compressed sector is + rewritten, then it is rewritten as uncompressed data. + + Image conversion is also useful to get smaller image when using a + growable format such as ``qcow``: the empty sectors are detected and + suppressed from the destination image. + + *SPARSE_SIZE* indicates the consecutive number of bytes (defaults to 4k) + that must contain only zeros for qemu-img to create a sparse image during + conversion. If *SPARSE_SIZE* is 0, the source will not be scanned for + unallocated or zero sectors, and the destination image will always be + fully allocated. + + You can use the *BACKING_FILE* option to force the output image to be + created as a copy on write image of the specified base image; the + *BACKING_FILE* should have the same content as the input's base image, + however the path, image format, etc may differ. + + If a relative path name is given, the backing file is looked up relative to + the directory containing *OUTPUT_FILENAME*. + + If the ``-n`` option is specified, the target volume creation will be + skipped. This is useful for formats such as ``rbd`` if the target + volume has already been created with site specific options that cannot + be supplied through qemu-img. + + Out of order writes can be enabled with ``-W`` to improve performance. + This is only recommended for preallocated devices like host devices or other + raw block devices. Out of order write does not work in combination with + creating compressed images. + + *NUM_COROUTINES* specifies how many coroutines work in parallel during + the convert process (defaults to 8). + +.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] + + Create the new disk image *FILENAME* of size *SIZE* and format + *FMT*. Depending on the file format, you can add one or more *OPTIONS* + that enable additional features of this format. + + If the option *BACKING_FILE* is specified, then the image will record + only the differences from *BACKING_FILE*. No size needs to be specified in + this case. *BACKING_FILE* will never be modified unless you use the + ``commit`` monitor command (or qemu-img commit). + + If a relative path name is given, the backing file is looked up relative to + the directory containing *FILENAME*. + + Note that a given backing file will be opened to check that it is valid. Use + the ``-u`` option to enable unsafe backing file mode, which means that the + image will be created even if the associated backing file cannot be opened. A + matching backing file must be created or additional options be used to make the + backing file specification valid when you want to use an image created this + way. + + The size can also be specified using the *SIZE* option with ``-o``, + it doesn't need to be specified separately in this case. + + +.. option:: dd [--image-opts] [-U] [-f FMT] [-O OUTPUT_FMT] [bs=BLOCK_SIZE] [count=BLOCKS] [skip=BLOCKS] if=INPUT of=OUTPUT + + dd copies from *INPUT* file to *OUTPUT* file converting it from + *FMT* format to *OUTPUT_FMT* format. + + The data is by default read and written using blocks of 512 bytes but can be + modified by specifying *BLOCK_SIZE*. If count=\ *BLOCKS* is specified + dd will stop reading input after reading *BLOCKS* input blocks. + + The size syntax is similar to :manpage:`dd(1)`'s size syntax. + +.. option:: info [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [--backing-chain] [-U] FILENAME + + Give information about the disk image *FILENAME*. Use it in + particular to know the size reserved on disk which can be different + from the displayed size. If VM snapshots are stored in the disk image, + they are displayed too. + + If a disk image has a backing file chain, information about each disk image in + the chain can be recursively enumerated by using the option ``--backing-chain``. + + For instance, if you have an image chain like: + + :: + + base.qcow2 <- snap1.qcow2 <- snap2.qcow2 + + To enumerate information about each disk image in the above chain, starting from top to base, do: + + :: + + qemu-img info --backing-chain snap2.qcow2 + + The command can output in the format *OFMT* which is either ``human`` or + ``json``. The JSON output is an object of QAPI type ``ImageInfo``; with + ``--backing-chain``, it is an array of ``ImageInfo`` objects. + + ``--output=human`` reports the following information (for every image in the + chain): + + *image* + The image file name + + *file format* + The image format + + *virtual size* + The size of the guest disk + + *disk size* + How much space the image file occupies on the host file system (may be + shown as 0 if this information is unavailable, e.g. because there is no + file system) + + *cluster_size* + Cluster size of the image format, if applicable + + *encrypted* + Whether the image is encrypted (only present if so) + + *cleanly shut down* + This is shown as ``no`` if the image is dirty and will have to be + auto-repaired the next time it is opened in qemu. + + *backing file* + The backing file name, if present + + *backing file format* + The format of the backing file, if the image enforces it + + *Snapshot list* + A list of all internal snapshots + + *Format specific information* + Further information whose structure depends on the image format. This + section is a textual representation of the respective + ``ImageInfoSpecific*`` QAPI object (e.g. ``ImageInfoSpecificQCow2`` + for qcow2 images). + +.. option:: map [--object OBJECTDEF] [--image-opts] [-f FMT] [--output=OFMT] [-U] FILENAME + + Dump the metadata of image *FILENAME* and its backing file chain. + In particular, this commands dumps the allocation state of every sector + of *FILENAME*, together with the topmost file that allocates it in + the backing file chain. + + Two option formats are possible. The default format (``human``) + only dumps known-nonzero areas of the file. Known-zero parts of the + file are omitted altogether, and likewise for parts that are not allocated + throughout the chain. ``qemu-img`` output will identify a file + from where the data can be read, and the offset in the file. Each line + will include four fields, the first three of which are hexadecimal + numbers. For example the first line of: + + :: + + Offset Length Mapped to File + 0 0x20000 0x50000 /tmp/overlay.qcow2 + 0x100000 0x10000 0x95380000 /tmp/backing.qcow2 + + means that 0x20000 (131072) bytes starting at offset 0 in the image are + available in /tmp/overlay.qcow2 (opened in ``raw`` format) starting + at offset 0x50000 (327680). Data that is compressed, encrypted, or + otherwise not available in raw format will cause an error if ``human`` + format is in use. Note that file names can include newlines, thus it is + not safe to parse this output format in scripts. + + The alternative format ``json`` will return an array of dictionaries + in JSON format. It will include similar information in + the ``start``, ``length``, ``offset`` fields; + it will also include other more specific information: + + - whether the sectors contain actual data or not (boolean field ``data``; + if false, the sectors are either unallocated or stored as optimized + all-zero clusters); + - whether the data is known to read as zero (boolean field ``zero``); + - in order to make the output shorter, the target file is expressed as + a ``depth``; for example, a depth of 2 refers to the backing file + of the backing file of *FILENAME*. + + In JSON format, the ``offset`` field is optional; it is absent in + cases where ``human`` format would omit the entry or exit with an error. + If ``data`` is false and the ``offset`` field is present, the + corresponding sectors in the file are not yet in use, but they are + preallocated. + + For more information, consult ``include/block/block.h`` in QEMU's + source code. + +.. option:: measure [--output=OFMT] [-O OUTPUT_FMT] [-o OPTIONS] [--size N | [--object OBJECTDEF] [--image-opts] [-f FMT] [-l SNAPSHOT_PARAM] FILENAME] + + Calculate the file size required for a new image. This information + can be used to size logical volumes or SAN LUNs appropriately for + the image that will be placed in them. The values reported are + guaranteed to be large enough to fit the image. The command can + output in the format *OFMT* which is either ``human`` or ``json``. + The JSON output is an object of QAPI type ``BlockMeasureInfo``. + + If the size *N* is given then act as if creating a new empty image file + using ``qemu-img create``. If *FILENAME* is given then act as if + converting an existing image file using ``qemu-img convert``. The format + of the new file is given by *OUTPUT_FMT* while the format of an existing + file is given by *FMT*. + + A snapshot in an existing image can be specified using *SNAPSHOT_PARAM*. + + The following fields are reported: + + :: + + required size: 524288 + fully allocated size: 1074069504 + + The ``required size`` is the file size of the new image. It may be smaller + than the virtual disk size if the image format supports compact representation. + + The ``fully allocated size`` is the file size of the new image once data has + been written to all sectors. This is the maximum size that the image file can + occupy with the exception of internal snapshots, dirty bitmaps, vmstate data, + and other advanced image format features. + +.. option:: snapshot [--object OBJECTDEF] [--image-opts] [-U] [-q] [-l | -a SNAPSHOT | -c SNAPSHOT | -d SNAPSHOT] FILENAME + + List, apply, create or delete snapshots in image *FILENAME*. + +.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME + + Changes the backing file of an image. Only the formats ``qcow2`` and + ``qed`` support changing the backing file. + + The backing file is changed to *BACKING_FILE* and (if the image format of + *FILENAME* supports this) the backing file format is changed to + *BACKING_FMT*. If *BACKING_FILE* is specified as "" (the empty + string), then the image is rebased onto no backing file (i.e. it will exist + independently of any backing file). + + If a relative path name is given, the backing file is looked up relative to + the directory containing *FILENAME*. + + *CACHE* specifies the cache mode to be used for *FILENAME*, whereas + *SRC_CACHE* specifies the cache mode for reading backing files. + + There are two different modes in which ``rebase`` can operate: + + Safe mode + This is the default mode and performs a real rebase operation. The + new backing file may differ from the old one and qemu-img rebase + will take care of keeping the guest-visible content of *FILENAME* + unchanged. + + In order to achieve this, any clusters that differ between + *BACKING_FILE* and the old backing file of *FILENAME* are merged + into *FILENAME* before actually changing the backing file. + + Note that the safe mode is an expensive operation, comparable to + converting an image. It only works if the old backing file still + exists. + + Unsafe mode + qemu-img uses the unsafe mode if ``-u`` is specified. In this + mode, only the backing file name and format of *FILENAME* is changed + without any checks on the file contents. The user must take care of + specifying the correct new backing file, or the guest-visible + content of the image will be corrupted. + + This mode is useful for renaming or moving the backing file to + somewhere else. It can be used without an accessible old backing + file, i.e. you can use it to fix an image whose backing file has + already been moved/renamed. + + You can use ``rebase`` to perform a "diff" operation on two + disk images. This can be useful when you have copied or cloned + a guest, and you want to get back to a thin image on top of a + template or base image. + + Say that ``base.img`` has been cloned as ``modified.img`` by + copying it, and that the ``modified.img`` guest has run so there + are now some changes compared to ``base.img``. To construct a thin + image called ``diff.qcow2`` that contains just the differences, do: + + :: + + qemu-img create -f qcow2 -b modified.img diff.qcow2 + qemu-img rebase -b base.img diff.qcow2 + + At this point, ``modified.img`` can be discarded, since + ``base.img + diff.qcow2`` contains the same information. + +.. option:: resize [--object OBJECTDEF] [--image-opts] [-f FMT] [--preallocation=PREALLOC] [-q] [--shrink] FILENAME [+ | -]SIZE + + Change the disk image as if it had been created with *SIZE*. + + Before using this command to shrink a disk image, you MUST use file system and + partitioning tools inside the VM to reduce allocated file systems and partition + sizes accordingly. Failure to do so will result in data loss! + + When shrinking images, the ``--shrink`` option must be given. This informs + qemu-img that the user acknowledges all loss of data beyond the truncated + image's end. + + After using this command to grow a disk image, you must use file system and + partitioning tools inside the VM to actually begin using the new space on the + device. + + When growing an image, the ``--preallocation`` option may be used to specify + how the additional image area should be allocated on the host. See the format + description in the :ref:`notes` section which values are allowed. Using this + option may result in slightly more data being allocated than necessary. + +.. _notes: + +Notes +----- + +Supported image file formats: + +``raw`` + + Raw disk image format (default). This format has the advantage of + being simple and easily exportable to all other emulators. If your + file system supports *holes* (for example in ext2 or ext3 on + Linux or NTFS on Windows), then only the written sectors will reserve + space. Use ``qemu-img info`` to know the real size used by the + image or ``ls -ls`` on Unix/Linux. + + Supported options: + + ``preallocation`` + Preallocation mode (allowed values: ``off``, ``falloc``, + ``full``). ``falloc`` mode preallocates space for image by + calling ``posix_fallocate()``. ``full`` mode preallocates space + for image by writing data to underlying storage. This data may or + may not be zero, depending on the storage location. + +``qcow2`` + + QEMU image format, the most versatile format. Use it to have smaller + images (useful if your filesystem does not supports holes, for example + on Windows), optional AES encryption, zlib based compression and + support of multiple VM snapshots. + + Supported options: + + ``compat`` + Determines the qcow2 version to use. ``compat=0.10`` uses the + traditional image format that can be read by any QEMU since 0.10. + ``compat=1.1`` enables image format extensions that only QEMU 1.1 and + newer understand (this is the default). Amongst others, this includes zero + clusters, which allow efficient copy-on-read for sparse images. + + ``backing_file`` + File name of a base image (see ``create`` subcommand) + + ``backing_fmt`` + Image format of the base image + + ``encryption`` + If this option is set to ``on``, the image is encrypted with + 128-bit AES-CBC. + + The use of encryption in qcow and qcow2 images is considered to be + flawed by modern cryptography standards, suffering from a number + of design problems: + + - The AES-CBC cipher is used with predictable initialization + vectors based on the sector number. This makes it vulnerable to + chosen plaintext attacks which can reveal the existence of + encrypted data. + + - The user passphrase is directly used as the encryption key. A + poorly chosen or short passphrase will compromise the security + of the encryption. + + - In the event of the passphrase being compromised there is no way + to change the passphrase to protect data in any qcow images. The + files must be cloned, using a different encryption passphrase in + the new file. The original file must then be securely erased + using a program like shred, though even this is ineffective with + many modern storage technologies. + + - Initialization vectors used to encrypt sectors are based on the + guest virtual sector number, instead of the host physical + sector. When a disk image has multiple internal snapshots this + means that data in multiple physical sectors is encrypted with + the same initialization vector. With the CBC mode, this opens + the possibility of watermarking attacks if the attack can + collect multiple sectors encrypted with the same IV and some + predictable data. Having multiple qcow2 images with the same + passphrase also exposes this weakness since the passphrase is + directly used as the key. + + Use of qcow / qcow2 encryption is thus strongly discouraged. Users are + recommended to use an alternative encryption technology such as the + Linux dm-crypt / LUKS system. + + ``cluster_size`` + Changes the qcow2 cluster size (must be between 512 and + 2M). Smaller cluster sizes can improve the image file size whereas + larger cluster sizes generally provide better performance. + + ``preallocation`` + Preallocation mode (allowed values: ``off``, ``metadata``, + ``falloc``, ``full``). An image with preallocated metadata is + initially larger but can improve performance when the image needs + to grow. ``falloc`` and ``full`` preallocations are like the same + options of ``raw`` format, but sets up metadata also. + + ``lazy_refcounts`` + If this option is set to ``on``, reference count updates are + postponed with the goal of avoiding metadata I/O and improving + performance. This is particularly interesting with + ``cache=writethrough`` which doesn't batch metadata + updates. The tradeoff is that after a host crash, the reference + count tables must be rebuilt, i.e. on the next open an (automatic) + ``qemu-img check -r all`` is required, which may take some time. + + This option can only be enabled if ``compat=1.1`` is specified. + + ``nocow`` + If this option is set to ``on``, it will turn off COW of the file. It's + only valid on btrfs, no effect on other file systems. + + Btrfs has low performance when hosting a VM image file, even more + when the guest on the VM also using btrfs as file system. Turning + off COW is a way to mitigate this bad performance. Generally there + are two ways to turn off COW on btrfs: + + - Disable it by mounting with nodatacow, then all newly created files + will be NOCOW + - For an empty file, add the NOCOW file attribute. That's what this + option does. + + Note: this option is only valid to new or empty files. If there is + an existing file which is COW and has data blocks already, it + couldn't be changed to NOCOW by setting ``nocow=on``. One can + issue ``lsattr filename`` to check if the NOCOW flag is set or not + (Capital 'C' is NOCOW flag). + +``Other`` + + QEMU also supports various other image file formats for + compatibility with older QEMU versions or other hypervisors, + including VMDK, VDI, VHD (vpc), VHDX, qcow1 and QED. For a full list + of supported formats see ``qemu-img --help``. For a more detailed + description of these formats, see the QEMU block drivers reference + documentation. + + The main purpose of the block drivers for these formats is image + conversion. For running VMs, it is recommended to convert the disk + images to either raw or qcow2 in order to achieve good performance. diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst new file mode 100644 index 0000000..667861c --- /dev/null +++ b/docs/tools/qemu-nbd.rst @@ -0,0 +1,256 @@ +QEMU Disk Network Block Device Server +===================================== + +Synopsis +-------- + +**qemu-nbd** [*OPTION*]... *filename* + +**qemu-nbd** -L [*OPTION*]... + +**qemu-nbd** -d *dev* + +Description +----------- + +Export a QEMU disk image using the NBD protocol. + +Other uses: + +- Bind a /dev/nbdX block device to a QEMU server (on Linux). +- As a client to query exports of a remote NBD server. + +Options +------- + +.. program:: qemu-nbd + +*filename* is a disk image filename, or a set of block +driver options if ``--image-opts`` is specified. + +*dev* is an NBD device. + +.. option:: --object type,id=ID,...props... + + Define a new instance of the *type* object class identified by *ID*. + See the :manpage:`qemu(1)` manual page for full details of the properties + supported. The common object types that it makes sense to define are the + ``secret`` object, which is used to supply passwords and/or encryption + keys, and the ``tls-creds`` object, which is used to supply TLS + credentials for the qemu-nbd server or client. + +.. option:: -p, --port=PORT + + TCP port to listen on as a server, or connect to as a client + (default ``10809``). + +.. option:: -o, --offset=OFFSET + + The offset into the image. + +.. option:: -b, --bind=IFACE + + The interface to bind to as a server, or connect to as a client + (default ``0.0.0.0``). + +.. option:: -k, --socket=PATH + + Use a unix socket with path *PATH*. + +.. option:: --image-opts + + Treat *filename* as a set of image options, instead of a plain + filename. If this flag is specified, the ``-f`` flag should + not be used, instead the :option:`format=` option should be set. + +.. option:: -f, --format=FMT + + Force the use of the block driver for format *FMT* instead of + auto-detecting. + +.. option:: -r, --read-only + + Export the disk as read-only. + +.. option:: -B, --bitmap=NAME + + If *filename* has a qcow2 persistent bitmap *NAME*, expose + that bitmap via the ``qemu:dirty-bitmap:NAME`` context + accessible through NBD_OPT_SET_META_CONTEXT. + +.. option:: -s, --snapshot + + Use *filename* as an external snapshot, create a temporary + file with ``backing_file=``\ *filename*, redirect the write to + the temporary one. + +.. option:: -l, --load-snapshot=SNAPSHOT_PARAM + + Load an internal snapshot inside *filename* and export it + as an read-only device, SNAPSHOT_PARAM format is + ``snapshot.id=[ID],snapshot.name=[NAME]`` or ``[ID_OR_NAME]`` + +.. option:: --cache=CACHE + + The cache mode to be used with the file. See the documentation of + the emulator's ``-drive cache=...`` option for allowed values. + +.. option:: -n, --nocache + + Equivalent to :option:`--cache=none`. + +.. option:: --aio=AIO + + Set the asynchronous I/O mode between ``threads`` (the default), + ``native`` (Linux only), and ``io_uring`` (Linux 5.1+). + +.. option:: --discard=DISCARD + + Control whether ``discard`` (also known as ``trim`` or ``unmap``) + requests are ignored or passed to the filesystem. *DISCARD* is one of + ``ignore`` (or ``off``), ``unmap`` (or ``on``). The default is + ``ignore``. + +.. option:: --detect-zeroes=DETECT_ZEROES + + Control the automatic conversion of plain zero writes by the OS to + driver-specific optimized zero write commands. *DETECT_ZEROES* is one of + ``off``, ``on``, or ``unmap``. ``unmap`` + converts a zero write to an unmap operation and can only be used if + *DISCARD* is set to ``unmap``. The default is ``off``. + +.. option:: -c, --connect=DEV + + Connect *filename* to NBD device *DEV* (Linux only). + +.. option:: -d, --disconnect + + Disconnect the device *DEV* (Linux only). + +.. option:: -e, --shared=NUM + + Allow up to *NUM* clients to share the device (default + ``1``). Safe for readers, but for now, consistency is not + guaranteed between multiple writers. + +.. option:: -t, --persistent + + Don't exit on the last connection. + +.. option:: -x, --export-name=NAME + + Set the NBD volume export name (default of a zero-length string). + +.. option:: -D, --description=DESCRIPTION + + Set the NBD volume export description, as a human-readable + string. + +.. option:: -L, --list + + Connect as a client and list all details about the exports exposed by + a remote NBD server. This enables list mode, and is incompatible + with options that change behavior related to a specific export (such as + :option:`--export-name`, :option:`--offset`, ...). + +.. option:: --tls-creds=ID + + Enable mandatory TLS encryption for the server by setting the ID + of the TLS credentials object previously created with the --object + option; or provide the credentials needed for connecting as a client + in list mode. + +.. option:: --fork + + Fork off the server process and exit the parent once the server is running. + +.. option:: --pid-file=PATH + + Store the server's process ID in the given file. + +.. option:: --tls-authz=ID + + Specify the ID of a qauthz object previously created with the + :option:`--object` option. This will be used to authorize connecting users + against their x509 distinguished name. + +.. option:: -v, --verbose + + Display extra debugging information. + +.. option:: -h, --help + + Display this help and exit. + +.. option:: -V, --version + + Display version information and exit. + +.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] + + .. include:: ../qemu-option-trace.rst.inc + +Examples +-------- + +Start a server listening on port 10809 that exposes only the +guest-visible contents of a qcow2 file, with no TLS encryption, and +with the default export name (an empty string). The command is +one-shot, and will block until the first successful client +disconnects: + +:: + + qemu-nbd -f qcow2 file.qcow2 + +Start a long-running server listening with encryption on port 10810, +and whitelist clients with a specific X.509 certificate to connect to +a 1 megabyte subset of a raw file, using the export name 'subset': + +:: + + qemu-nbd \ + --object tls-creds-x509,id=tls0,endpoint=server,dir=/path/to/qemutls \ + --object 'authz-simple,id=auth0,identity=CN=laptop.example.com,,\ + O=Example Org,,L=London,,ST=London,,C=GB' \ + --tls-creds tls0 --tls-authz auth0 \ + -t -x subset -p 10810 \ + --image-opts driver=raw,offset=1M,size=1M,file.driver=file,file.filename=file.raw + +Serve a read-only copy of a guest image over a Unix socket with as +many as 5 simultaneous readers, with a persistent process forked as a +daemon: + +:: + + qemu-nbd --fork --persistent --shared=5 --socket=/path/to/sock \ + --read-only --format=qcow2 file.qcow2 + +Expose the guest-visible contents of a qcow2 file via a block device +/dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for +partitions found within), then disconnect the device when done. +Access to bind qemu-nbd to an /dev/nbd device generally requires root +privileges, and may also require the execution of ``modprobe nbd`` +to enable the kernel NBD client module. *CAUTION*: Do not use +this method to mount filesystems from an untrusted guest image - a +malicious guest may have prepared the image to attempt to trigger +kernel bugs in partition probing or file system mounting. + +:: + + qemu-nbd -c /dev/nbd0 -f qcow2 file.qcow2 + qemu-nbd -d /dev/nbd0 + +Query a remote server to see details about what export(s) it is +serving on port 10809, and authenticating via PSK: + +:: + + qemu-nbd \ + --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=eblake,endpoint=client \ + --tls-creds tls0 -L -b remote.example.com + +See also +-------- + +:manpage:`qemu(1)`, :manpage:`qemu-img(1)` diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst new file mode 100644 index 0000000..fb70445 --- /dev/null +++ b/docs/tools/qemu-trace-stap.rst @@ -0,0 +1,124 @@ +QEMU SystemTap trace tool +========================= + +Synopsis +-------- + +**qemu-trace-stap** [*GLOBAL-OPTIONS*] *COMMAND* [*COMMAND-OPTIONS*] *ARGS*... + +Description +----------- + +The ``qemu-trace-stap`` program facilitates tracing of the execution +of QEMU emulators using SystemTap. + +It is required to have the SystemTap runtime environment installed to use +this program, since it is a wrapper around execution of the ``stap`` +program. + +Options +------- + +.. program:: qemu-trace-stap + +The following global options may be used regardless of which command +is executed: + +.. option:: --verbose, -v + + Display verbose information about command execution. + +The following commands are valid: + +.. option:: list BINARY PATTERN... + + List all the probe names provided by *BINARY* that match + *PATTERN*. + + If *BINARY* is not an absolute path, it will be located by searching + the directories listed in the ``$PATH`` environment variable. + + *PATTERN* is a plain string that is used to filter the results of + this command. It may optionally contain a ``*`` wildcard to facilitate + matching multiple probes without listing each one explicitly. Multiple + *PATTERN* arguments may be given, causing listing of probes that match + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + + For example, to list all probes available in the ``qemu-system-x86_64`` + binary: + + :: + + $ qemu-trace-stap list qemu-system-x86_64 + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + + $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' + +.. option:: run OPTIONS BINARY PATTERN... + + Run a trace session, printing formatted output any time a process that is + executing *BINARY* triggers a probe matching *PATTERN*. + + If *BINARY* is not an absolute path, it will be located by searching + the directories listed in the ``$PATH`` environment variable. + + *PATTERN* is a plain string that matches a probe name shown by the + *LIST* command. It may optionally contain a ``*`` wildcard to + facilitate matching multiple probes without listing each one explicitly. + Multiple *PATTERN* arguments may be given, causing all matching probes + to be monitored. At least one *PATTERN* is required, since stap is not + capable of tracing all known QEMU probes concurrently without overflowing + its trace buffer. + + Invocation of this command does not need to be synchronized with + invocation of the QEMU process(es). It will match probes on all + existing running processes and all future launched processes, + unless told to only monitor a specific process. + + Valid command specific options are: + + .. program:: qemu-trace-stap-run + + .. option:: --pid=PID, -p PID + + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + + For example, to monitor all processes executing ``qemu-system-x86_64`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + + $ qemu-trace-stap run qemu-system-x86_64 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + + $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the + tracing environment: + + :: + + $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' + +See also +-------- + +:manpage:`qemu(1)`, :manpage:`stap(1)` + +.. + Copyright (C) 2019 Red Hat, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. diff --git a/docs/tools/virtfs-proxy-helper.rst b/docs/tools/virtfs-proxy-helper.rst new file mode 100644 index 0000000..6cdeedf --- /dev/null +++ b/docs/tools/virtfs-proxy-helper.rst @@ -0,0 +1,72 @@ +QEMU 9p virtfs proxy filesystem helper +====================================== + +Synopsis +-------- + +**virtfs-proxy-helper** [*OPTIONS*] + +Description +----------- + +Pass-through security model in QEMU 9p server needs root privilege to do +few file operations (like chown, chmod to any mode/uid:gid). There are two +issues in pass-through security model: + +- TOCTTOU vulnerability: Following symbolic links in the server could + provide access to files beyond 9p export path. + +- Running QEMU with root privilege could be a security issue. + +To overcome above issues, following approach is used: A new filesystem +type 'proxy' is introduced. Proxy FS uses chroot + socket combination +for securing the vulnerability known with following symbolic links. +Intention of adding a new filesystem type is to allow qemu to run +in non-root mode, but doing privileged operations using socket IO. + +Proxy helper (a stand alone binary part of qemu) is invoked with +root privileges. Proxy helper chroots into 9p export path and creates +a socket pair or a named socket based on the command line parameter. +QEMU and proxy helper communicate using this socket. QEMU proxy fs +driver sends filesystem request to proxy helper and receives the +response from it. + +The proxy helper is designed so that it can drop root privileges except +for the capabilities needed for doing filesystem operations. + +Options +------- + +The following options are supported: + +.. program:: virtfs-proxy-helper + +.. option:: -h + + Display help and exit + +.. option:: -p, --path PATH + + Path to export for proxy filesystem driver + +.. option:: -f, --fd SOCKET_ID + + Use given file descriptor as socket descriptor for communicating with + qemu proxy fs drier. Usually a helper like libvirt will create + socketpair and pass one of the fds as parameter to this option. + +.. option:: -s, --socket SOCKET_FILE + + Creates named socket file for communicating with qemu proxy fs driver + +.. option:: -u, --uid UID + + uid to give access to named socket file; used in combination with -g. + +.. option:: -g, --gid GID + + gid to give access to named socket file; used in combination with -u. + +.. option:: -n, --nodaemon + + Run as a normal program. By default program will run in daemon mode diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst new file mode 100644 index 0000000..378594c --- /dev/null +++ b/docs/tools/virtiofsd.rst @@ -0,0 +1,120 @@ +QEMU virtio-fs shared file system daemon +======================================== + +Synopsis +-------- + +**virtiofsd** [*OPTIONS*] + +Description +----------- + +Share a host directory tree with a guest through a virtio-fs device. This +program is a vhost-user backend that implements the virtio-fs device. Each +virtio-fs device instance requires its own virtiofsd process. + +This program is designed to work with QEMU's ``--device vhost-user-fs-pci`` +but should work with any virtual machine monitor (VMM) that supports +vhost-user. See the Examples section below. + +This program must be run as the root user. Upon startup the program will +switch into a new file system namespace with the shared directory tree as its +root. This prevents "file system escapes" due to symlinks and other file +system objects that might lead to files outside the shared directory. The +program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other +vectors that could allow an attacker to compromise the system after gaining +control of the virtiofsd process. + +Options +------- + +.. program:: virtiofsd + +.. option:: -h, --help + + Print help. + +.. option:: -V, --version + + Print version. + +.. option:: -d + + Enable debug output. + +.. option:: --syslog + + Print log messages to syslog instead of stderr. + +.. option:: -o OPTION + + * debug - + Enable debug output. + + * flock|no_flock - + Enable/disable flock. The default is ``no_flock``. + + * log_level=LEVEL - + Print only log messages matching LEVEL or more severe. LEVEL is one of + ``err``, ``warn``, ``info``, or ``debug``. The default is ``info``. + + * norace - + Disable racy fallback. The default is false. + + * posix_lock|no_posix_lock - + Enable/disable remote POSIX locks. The default is ``posix_lock``. + + * readdirplus|no_readdirplus - + Enable/disable readdirplus. The default is ``readdirplus``. + + * source=PATH - + Share host directory tree located at PATH. This option is required. + + * timeout=TIMEOUT - + I/O timeout in seconds. The default depends on cache= option. + + * writeback|no_writeback - + Enable/disable writeback cache. The cache alows the FUSE client to buffer + and merge write requests. The default is ``no_writeback``. + + * xattr|no_xattr - + Enable/disable extended attributes (xattr) on files and directories. The + default is ``no_xattr``. + +.. option:: --socket-path=PATH + + Listen on vhost-user UNIX domain socket at PATH. + +.. option:: --fd=FDNUM + + Accept connections from vhost-user UNIX domain socket file descriptor FDNUM. + The file descriptor must already be listening for connections. + +.. option:: --thread-pool-size=NUM + + Restrict the number of worker threads per request queue to NUM. The default + is 64. + +.. option:: --cache=none|auto|always + + Select the desired trade-off between coherency and performance. ``none`` + forbids the FUSE client from caching to achieve best coherency at the cost of + performance. ``auto`` acts similar to NFS with a 1 second metadata cache + timeout. ``always`` sets a long cache lifetime at the expense of coherency. + +Examples +-------- + +Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket +``/var/run/vm001-vhost-fs.sock``: + +:: + + host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 + host# qemu-system-x86_64 \ + -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ + -device vhost-user-fs-pci,chardev=char0,tag=myfs \ + -object memory-backend-memfd,id=mem,size=4G,share=on \ + -numa node,memdev=mem \ + ... + guest# mount -t virtiofs myfs /mnt -- cgit v1.1