From 4ab1559085d688dddf4de77f1ead3102e243e668 Mon Sep 17 00:00:00 2001 From: Chunyan Liu Date: Mon, 30 Jun 2014 14:29:58 +0800 Subject: qemu-img create: add 'nocow' option Add 'nocow' option so that users could have a chance to set NOCOW flag to newly created files. It's useful on btrfs file system to enhance performance. Btrfs has low performance when hosting VM images, even more when the guest in those VM are also using btrfs as file system. One way to mitigate this bad performance is to turn off COW attributes on VM files. Generally, there are two ways to turn off NOCOW on btrfs: a) by mounting fs with nodatacow, then all newly created files will be NOCOW. b) per file. Add the NOCOW file attribute. It could only be done to empty or new files. This patch tries the second way, according to the option, it could add NOCOW per file. For most block drivers, since the create file step is in raw-posix.c, so we can do setting NOCOW flag ioctl in raw-posix.c only. But there are some exceptions, like block/vpc.c and block/vdi.c, they are creating file by calling qemu_open directly. For them, do the same setting NOCOW flag ioctl work in them separately. [Fixed up 082.out due to the new 'nocow' creation option --Stefan] Signed-off-by: Chunyan Liu Signed-off-by: Stefan Hajnoczi --- block/qed.c | 6 +++--- block/raw-posix.c | 25 +++++++++++++++++++++++++ block/vdi.c | 29 +++++++++++++++++++++++++++++ block/vmdk.c | 6 +++--- block/vpc.c | 29 +++++++++++++++++++++++++++++ include/block/block_int.h | 1 + qemu-doc.texi | 16 ++++++++++++++++ qemu-img.texi | 16 ++++++++++++++++ tests/qemu-iotests/082.out | 24 ++++++++++++++++++++++++ 9 files changed, 146 insertions(+), 6 deletions(-) diff --git a/block/qed.c b/block/qed.c index eddae92..b69374b 100644 --- a/block/qed.c +++ b/block/qed.c @@ -567,7 +567,7 @@ static void bdrv_qed_close(BlockDriverState *bs) static int qed_create(const char *filename, uint32_t cluster_size, uint64_t image_size, uint32_t table_size, const char *backing_file, const char *backing_fmt, - Error **errp) + QemuOpts *opts, Error **errp) { QEDHeader header = { .magic = QED_MAGIC, @@ -586,7 +586,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, int ret = 0; BlockDriverState *bs; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -682,7 +682,7 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) } ret = qed_create(filename, cluster_size, image_size, table_size, - backing_file, backing_fmt, errp); + backing_file, backing_fmt, opts, errp); finish: g_free(backing_file); diff --git a/block/raw-posix.c b/block/raw-posix.c index dacf4fb..825a0c8 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -55,6 +55,9 @@ #include #include #include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif #endif #ifdef CONFIG_FIEMAP #include @@ -1278,12 +1281,14 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) int fd; int result = 0; int64_t total_size = 0; + bool nocow = false; strstart(filename, "file:", &filename); /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE; + nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -1291,6 +1296,21 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) result = -errno; error_setg_errno(errp, -result, "Could not create file"); } else { + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value + * will be ignored since any failure of this operation should not + * block the left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { result = -errno; error_setg_errno(errp, -result, "Could not resize file"); @@ -1477,6 +1497,11 @@ static QemuOptsList raw_create_opts = { .type = QEMU_OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/block/vdi.c b/block/vdi.c index 01fe22e..197bd77 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,6 +53,13 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif #if defined(CONFIG_UUID) #include @@ -683,6 +690,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) VdiHeader header; size_t i; size_t bmap_size; + bool nocow = false; logout("\n"); @@ -699,6 +707,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) image_type = VDI_TYPE_STATIC; } #endif + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); if (bytes > VDI_DISK_SIZE_MAX) { result = -ENOTSUP; @@ -716,6 +725,21 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* We need enough blocks to store the given disk size, so always round up. */ blocks = (bytes + block_size - 1) / block_size; @@ -818,6 +842,11 @@ static QemuOptsList vdi_create_opts = { .def_value_str = "off" }, #endif + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, /* TODO: An additional option to set UUID values might be useful. */ { /* end of list */ } } diff --git a/block/vmdk.c b/block/vmdk.c index d0de019..27a78da 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1529,7 +1529,7 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain, - Error **errp) + QemuOpts *opts, Error **errp) { int ret, i; BlockDriverState *bs = NULL; @@ -1539,7 +1539,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, uint32_t *gd_buf = NULL; int gd_buf_size; - ret = bdrv_create_file(filename, NULL, &local_err); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1845,7 +1845,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) path, desc_filename); if (vmdk_create_extent(ext_filename, size, - flat, compress, zeroed_grain, errp)) { + flat, compress, zeroed_grain, opts, errp)) { ret = -EINVAL; goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 798d854..8b376a4 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -29,6 +29,13 @@ #if defined(CONFIG_UUID) #include #endif +#ifdef __linux__ +#include +#include +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif /**************************************************************/ @@ -751,6 +758,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool nocow = false; /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); @@ -767,6 +775,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } else { disk_type = VHD_DYNAMIC; } + nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); /* Create the file */ fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); @@ -775,6 +784,21 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } + if (nocow) { +#ifdef __linux__ + /* Set NOCOW flag to solve performance issue on fs like btrfs. + * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will + * be ignored since any failure of this operation should not block the + * left work. + */ + int attr; + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { + attr |= FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &attr); + } +#endif + } + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that @@ -884,6 +908,11 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = BLOCK_OPT_NOCOW, + .type = QEMU_OPT_BOOL, + .help = "Turn off copy-on-write (valid only on btrfs)" + }, { /* end of list */ } } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 53e77cf..eaf6e31 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -54,6 +54,7 @@ #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" #define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" typedef struct BdrvTrackedRequest { BlockDriverState *bs; diff --git a/qemu-doc.texi b/qemu-doc.texi index 88ec9bb..ad92c85 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -589,6 +589,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item qed diff --git a/qemu-img.texi b/qemu-img.texi index c68b541..8496f3b 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -474,6 +474,22 @@ check -r all} is required, which may take some time. This option can only be enabled if @code{compat=1.1} is specified. +@item nocow +If this option is set to @code{on}, it will trun off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capitabl 'C' is NOCOW flag). + @end table @item Other diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index 28309a0..413e7ef 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -66,6 +66,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -77,6 +78,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 128M Supported options: @@ -88,6 +90,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 128M Supported options: @@ -99,6 +102,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -110,6 +114,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 128M Supported options: @@ -121,6 +126,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 128M Supported options: @@ -132,6 +138,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 128M Supported options: @@ -143,6 +150,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 128M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,help' encryption=off cluster_size=65536 lazy_refcounts=off @@ -247,6 +255,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -258,6 +267,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -269,6 +279,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -280,6 +291,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -291,6 +303,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -302,6 +315,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -313,6 +327,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base Supported options: @@ -324,6 +339,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2.base': Could not open backing file: Could not open 'TEST_DIR/t.qcow2,help': No such file or directory @@ -417,6 +433,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ? TEST_DIR/t.qcow2 Supported options: @@ -428,6 +445,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 Supported options: @@ -439,6 +457,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 Supported options: @@ -450,6 +469,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -461,6 +481,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 Supported options: @@ -472,6 +493,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 Supported options: @@ -483,6 +505,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 Supported options: @@ -494,6 +517,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata) lazy_refcounts Postpone refcount updates +nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 -- cgit v1.1