From 1f71049523f4fc0738f96c74bfcce012521fa0f0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 12 Oct 2012 14:29:18 +0200 Subject: qemu-img: Fix division by zero for zero size images Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- qemu-img.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index f17f187..849eb41 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -674,7 +674,7 @@ static int img_convert(int argc, char **argv) QEMUOptionParameter *out_baseimg_param; char *options = NULL; const char *snapshot_name = NULL; - float local_progress; + float local_progress = 0; int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */ fmt = NULL; @@ -914,8 +914,10 @@ static int img_convert(int argc, char **argv) sector_num = 0; nb_sectors = total_sectors; - local_progress = (float)100 / - (nb_sectors / MIN(nb_sectors, cluster_sectors)); + if (nb_sectors != 0) { + local_progress = (float)100 / + (nb_sectors / MIN(nb_sectors, cluster_sectors)); + } for(;;) { int64_t bs_num; @@ -986,8 +988,10 @@ static int img_convert(int argc, char **argv) sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; - local_progress = (float)100 / - (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512)); + if (nb_sectors != 0) { + local_progress = (float)100 / + (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512)); + } for(;;) { nb_sectors = total_sectors - sector_num; @@ -1585,7 +1589,7 @@ static int img_rebase(int argc, char **argv) int n; uint8_t * buf_old; uint8_t * buf_new; - float local_progress; + float local_progress = 0; buf_old = qemu_blockalign(bs, IO_BUF_SIZE); buf_new = qemu_blockalign(bs, IO_BUF_SIZE); @@ -1594,8 +1598,11 @@ static int img_rebase(int argc, char **argv) bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors); bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); - local_progress = (float)100 / - (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512)); + if (num_sectors != 0) { + local_progress = (float)100 / + (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512)); + } + for (sector = 0; sector < num_sectors; sector += n) { /* How many sectors can we handle with the next read? */ -- cgit v1.1 From ee17f9d5fd44fba950ab2290dd30f38d1cd2b625 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 12 Oct 2012 14:24:42 +0200 Subject: qemu-iotests: Test qemu-img operation on zero size image Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- tests/qemu-iotests/042 | 78 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/042.out | 15 +++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 94 insertions(+) create mode 100755 tests/qemu-iotests/042 create mode 100644 tests/qemu-iotests/042.out diff --git a/tests/qemu-iotests/042 b/tests/qemu-iotests/042 new file mode 100755 index 0000000..c3c3ca8 --- /dev/null +++ b/tests/qemu-iotests/042 @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Test qemu-img operation on zero size images +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 qcow qed vmdk +_supported_proto file +_supported_os Linux + +echo +echo "== Creating zero size image ==" + +_make_test_img 0 +_check_test_img + +mv $TEST_IMG $TEST_IMG.orig + +echo +echo "== Converting the image ==" + +$QEMU_IMG convert -O $IMGFMT $TEST_IMG.orig $TEST_IMG +_check_test_img + +echo +echo "== Converting the image, compressed ==" + +if [ "$IMGFMT" == "qcow2" ]; then + $QEMU_IMG convert -c -O $IMGFMT $TEST_IMG.orig $TEST_IMG +fi +_check_test_img + +echo +echo "== Rebasing the image ==" + +$QEMU_IMG rebase -u -b $TEST_IMG.orig $TEST_IMG +$QEMU_IMG rebase -b $TEST_IMG.orig $TEST_IMG +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 + diff --git a/tests/qemu-iotests/042.out b/tests/qemu-iotests/042.out new file mode 100644 index 0000000..dc80f4b --- /dev/null +++ b/tests/qemu-iotests/042.out @@ -0,0 +1,15 @@ +QA output created by 042 + +== Creating zero size image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 +No errors were found on the image. + +== Converting the image == +No errors were found on the image. + +== Converting the image, compressed == +No errors were found on the image. + +== Rebasing the image == +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 66d2ba9..d5f9ab0 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -47,3 +47,4 @@ 038 rw auto backing 039 rw auto 040 rw auto +042 rw auto quick -- cgit v1.1 From b3d0380ec245d73e5233366f541497ef92b2e283 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Mon, 15 Oct 2012 16:58:02 -0400 Subject: qmp: fix __accept() in qmp.py In QEMUMonitorProtocol, commit e9d17b6 removed the __sockfile creation from __negotiate_capabilities(), which breaks _accept(). This causes failures in qemu-io python based tests (i.e. tests 030 and 040). This patch creates the sockfile in __accept() as well. Signed-off-by: Jeff Cody Reviewed-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- QMP/qmp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/QMP/qmp.py b/QMP/qmp.py index 33c7d36..32510a1 100644 --- a/QMP/qmp.py +++ b/QMP/qmp.py @@ -96,6 +96,7 @@ class QEMUMonitorProtocol: @raise QMPCapabilitiesError if fails to negotiate capabilities """ self.__sock, _ = self.__sock.accept() + self.__sockfile = self.__sock.makefile() return self.__negotiate_capabilities() def cmd_obj(self, qmp_cmd): -- cgit v1.1 From a616673dd1c2e00db5e3458d2ba4b6619b78876a Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Tue, 16 Oct 2012 13:46:18 +0100 Subject: qemu-img rebase: use empty string to rebase without backing file This patch allows an empty filename to be passed as the new base image name for qemu-img rebase to mean base the image on no backing file (i.e. independent of any backing file). According to Eric Blake, qemu-img rebase already supports this when '-u' is used; this adds support when -u is not used. Signed-off-by: Alex Bligh Signed-off-by: Kevin Wolf --- qemu-img.c | 29 +++++++++++++++++++---------- qemu-img.texi | 4 +++- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 849eb41..c092ccf 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1562,13 +1562,15 @@ static int img_rebase(int argc, char **argv) error_report("Could not open old backing file '%s'", backing_name); goto out; } - - bs_new_backing = bdrv_new("new_backing"); - ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS, + if (out_baseimg[0]) { + bs_new_backing = bdrv_new("new_backing"); + ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS, new_backing_drv); - if (ret) { - error_report("Could not open new backing file '%s'", out_baseimg); - goto out; + if (ret) { + error_report("Could not open new backing file '%s'", + out_baseimg); + goto out; + } } } @@ -1584,7 +1586,7 @@ static int img_rebase(int argc, char **argv) if (!unsafe) { uint64_t num_sectors; uint64_t old_backing_num_sectors; - uint64_t new_backing_num_sectors; + uint64_t new_backing_num_sectors = 0; uint64_t sector; int n; uint8_t * buf_old; @@ -1596,7 +1598,9 @@ static int img_rebase(int argc, char **argv) bdrv_get_geometry(bs, &num_sectors); bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors); - bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); + if (bs_new_backing) { + bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); + } if (num_sectors != 0) { local_progress = (float)100 / @@ -1636,7 +1640,7 @@ static int img_rebase(int argc, char **argv) } } - if (sector >= new_backing_num_sectors) { + if (sector >= new_backing_num_sectors || !bs_new_backing) { memset(buf_new, 0, n * BDRV_SECTOR_SIZE); } else { if (sector + n > new_backing_num_sectors) { @@ -1682,7 +1686,12 @@ static int img_rebase(int argc, char **argv) * backing file are overwritten in the COW file now, so the visible content * doesn't change when we switch the backing file. */ - ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); + if (out_baseimg && *out_baseimg) { + ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); + } else { + ret = bdrv_change_backing_file(bs, NULL, NULL); + } + if (ret == -ENOSPC) { error_report("Could not change the backing file to '%s': No " "space left in the file header", out_baseimg); diff --git a/qemu-img.texi b/qemu-img.texi index 8b05f2c..42ec392 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -148,7 +148,9 @@ Changes the backing file of an image. Only the formats @code{qcow2} and The backing file is changed to @var{backing_file} and (if the image format of @var{filename} supports this) the backing file format is changed to -@var{backing_fmt}. +@var{backing_fmt}. If @var{backing_file} is specified as ``'' (the empty +string), then the image is rebased onto no backing file (i.e. it will exist +independently of any backing file). There are two different modes in which @code{rebase} can operate: @table @option -- cgit v1.1 From b1b1d783eabdb6ac4e4578b2c04b0c24483dce77 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 16 Oct 2012 15:49:09 -0400 Subject: block: make bdrv_find_backing_image compare canonical filenames Currently, bdrv_find_backing_image compares bs->backing_file with what is passed in as a backing_file name. Mismatches may occur, however, when bs->backing_file and backing_file are not both absolute or relative. Use path_combine() to make sure any relative backing filenames are relative to the current image filename being searched, and then use realpath() to make all comparisons based on absolute filenames. If either backing_file or bs->backing_file is determine to be a protocol, then no filename normalization is performed. This also changes bdrv_find_backing_image to no longer be recursive, but iterative. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index e95f613..5e7fc9e 100644 --- a/block.c +++ b/block.c @@ -3123,22 +3123,70 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, return -ENOTSUP; } +/* backing_file can either be relative, or absolute, or a protocol. If it is + * relative, it must be relative to the chain. So, passing in bs->filename + * from a BDS as backing_file should not be done, as that may be relative to + * the CWD rather than the chain. */ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file) { - if (!bs->drv) { + char *filename_full = NULL; + char *backing_file_full = NULL; + char *filename_tmp = NULL; + int is_protocol = 0; + BlockDriverState *curr_bs = NULL; + BlockDriverState *retval = NULL; + + if (!bs || !bs->drv || !backing_file) { return NULL; } - if (bs->backing_hd) { - if (strcmp(bs->backing_file, backing_file) == 0) { - return bs->backing_hd; + filename_full = g_malloc(PATH_MAX); + backing_file_full = g_malloc(PATH_MAX); + filename_tmp = g_malloc(PATH_MAX); + + is_protocol = path_has_protocol(backing_file); + + for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { + + /* If either of the filename paths is actually a protocol, then + * compare unmodified paths; otherwise make paths relative */ + if (is_protocol || path_has_protocol(curr_bs->backing_file)) { + if (strcmp(backing_file, curr_bs->backing_file) == 0) { + retval = curr_bs->backing_hd; + break; + } } else { - return bdrv_find_backing_image(bs->backing_hd, backing_file); + /* If not an absolute filename path, make it relative to the current + * image's filename path */ + path_combine(filename_tmp, PATH_MAX, curr_bs->filename, + backing_file); + + /* We are going to compare absolute pathnames */ + if (!realpath(filename_tmp, filename_full)) { + continue; + } + + /* We need to make sure the backing filename we are comparing against + * is relative to the current image filename (or absolute) */ + path_combine(filename_tmp, PATH_MAX, curr_bs->filename, + curr_bs->backing_file); + + if (!realpath(filename_tmp, backing_file_full)) { + continue; + } + + if (strcmp(backing_file_full, filename_full) == 0) { + retval = curr_bs->backing_hd; + break; + } } } - return NULL; + g_free(filename_full); + g_free(backing_file_full); + g_free(filename_tmp); + return retval; } int bdrv_get_backing_file_depth(BlockDriverState *bs) -- cgit v1.1 From d5208c45be38ab858db6ec5a5097aa1c1a8ebbc9 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 16 Oct 2012 15:49:10 -0400 Subject: block: in commit, determine base image from the top image This simplifies some code and error checking, and also fixes a bug. bdrv_find_backing_image() should only be passed absolute filenames, or filenames relative to the chain. In the QMP message handler for block commit, when looking up the base do so from the determined top image, so we know it is reachable from top. Some of the error messages put out by block-commit have changed slightly, which causes 2 tests cases for block-commit to fail. This patch updates the test cases to look for the correct error output. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/commit.c | 9 --------- blockdev.c | 21 +++++++++++---------- tests/qemu-iotests/040 | 4 ++-- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/block/commit.c b/block/commit.c index 733c914..13d9e82 100644 --- a/block/commit.c +++ b/block/commit.c @@ -211,15 +211,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } - /* top and base may be valid, but let's make sure that base is reachable - * from top */ - if (bdrv_find_backing_image(top, base->filename) != base) { - error_setg(errp, - "Base (%s) is not reachable from top (%s)", - base->filename, top->filename); - return; - } - overlay_bs = bdrv_find_overlay(bs, top); if (overlay_bs == NULL) { diff --git a/blockdev.c b/blockdev.c index 99828ad..46e4bbd 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1157,16 +1157,6 @@ void qmp_block_commit(const char *device, error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } - if (base && has_base) { - base_bs = bdrv_find_backing_image(bs, base); - } else { - base_bs = bdrv_find_base(bs); - } - - if (base_bs == NULL) { - error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); - return; - } /* default top_bs is the active layer */ top_bs = bs; @@ -1182,6 +1172,17 @@ void qmp_block_commit(const char *device, return; } + if (has_base && base) { + base_bs = bdrv_find_backing_image(top_bs, base); + } else { + base_bs = bdrv_find_base(top_bs); + } + + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + return; + } + commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, &local_err); if (local_err != NULL) { diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 258e7ea..0fa6441 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -111,7 +111,7 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_no_active_commit() result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img) self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % backing_img) def test_top_invalid(self): self.assert_no_active_commit() @@ -135,7 +135,7 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_no_active_commit() result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img}) + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img) def test_top_omitted(self): self.assert_no_active_commit() -- cgit v1.1 From 6bf0d1f478a5a425c0c024994375592805d591c0 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 16 Oct 2012 15:49:12 -0400 Subject: qemu-iotests: add relative backing file tests for block-commit (040) The previous block commit used absolute filenames for all block-commit images and commands; this adds relative filenames for the same tests. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/040 | 102 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/040.out | 4 +- 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 0fa6441..aad535a 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -26,6 +26,7 @@ import os import iotests from iotests import qemu_img, qemu_io import struct +import errno backing_img = os.path.join(iotests.test_dir, 'backing.img') mid_img = os.path.join(iotests.test_dir, 'mid.img') @@ -143,6 +144,107 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_qmp(result, 'error/class', 'GenericError') self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") +class TestRelativePaths(ImageCommitTestCase): + image_len = 1 * 1024 * 1024 + test_len = 1 * 1024 * 256 + + dir1 = "dir1" + dir2 = "dir2/" + dir3 = "dir2/dir3/" + + test_img = os.path.join(iotests.test_dir, dir3, 'test.img') + mid_img = "../mid.img" + backing_img = "../dir1/backing.img" + + backing_img_abs = os.path.join(iotests.test_dir, dir1, 'backing.img') + mid_img_abs = os.path.join(iotests.test_dir, dir2, 'mid.img') + + def setUp(self): + try: + os.mkdir(os.path.join(iotests.test_dir, self.dir1)) + os.mkdir(os.path.join(iotests.test_dir, self.dir2)) + os.mkdir(os.path.join(iotests.test_dir, self.dir3)) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + self.create_image(self.backing_img_abs, TestRelativePaths.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.backing_img_abs, self.mid_img_abs) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.mid_img_abs, self.test_img) + qemu_img('rebase', '-u', '-b', self.backing_img, self.mid_img_abs) + qemu_img('rebase', '-u', '-b', self.mid_img, self.test_img) + qemu_io('-c', 'write -P 0xab 0 524288', self.backing_img_abs) + qemu_io('-c', 'write -P 0xef 524288 524288', self.mid_img_abs) + self.vm = iotests.VM().add_drive(self.test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(self.test_img) + os.remove(self.mid_img_abs) + os.remove(self.backing_img_abs) + try: + os.rmdir(os.path.join(iotests.test_dir, self.dir1)) + os.rmdir(os.path.join(iotests.test_dir, self.dir3)) + os.rmdir(os.path.join(iotests.test_dir, self.dir2)) + except OSError as exception: + if exception.errno != errno.EEXIST and exception.errno != errno.ENOTEMPTY: + raise + + def test_commit(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_commit() + self.vm.shutdown() + + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', self.backing_img_abs).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', self.backing_img_abs).find("verification failed")) + + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_top_same_base(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img) + + def test_top_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % self.backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image file badfile not found') + + def test_base_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='badfile') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + + def test_top_is_active(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.test_img, base='%s' % self.backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported') + + def test_top_and_base_reversed(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.backing_img, base='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img) + class TestSetSpeed(ImageCommitTestCase): image_len = 80 * 1024 * 1024 # MB diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out index dae404e..b6f2576 100644 --- a/tests/qemu-iotests/040.out +++ b/tests/qemu-iotests/040.out @@ -1,5 +1,5 @@ -......... +................ ---------------------------------------------------------------------- -Ran 9 tests +Ran 16 tests OK -- cgit v1.1 From 9699bf0d06eb42625779216586a28d19d8fc005d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 17 Oct 2012 14:02:31 +0200 Subject: qemu-img: Add --backing-chain option to info command The qemu-img info --backing-chain option enumerates the backing file chain. For example, for base.qcow2 <- snap1.qcow2 <- snap2.qcow2 the output becomes: $ qemu-img info --backing-chain snap2.qcow2 image: snap2.qcow2 file format: qcow2 virtual size: 100M (104857600 bytes) disk size: 196K cluster_size: 65536 backing file: snap1.qcow2 backing file format: qcow2 image: snap1.qcow2 file format: qcow2 virtual size: 100M (104857600 bytes) disk size: 196K cluster_size: 65536 backing file: base.qcow2 backing file format: qcow2 image: base.qcow2 file format: qcow2 virtual size: 100M (104857600 bytes) disk size: 136K cluster_size: 65536 Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- qemu-img.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 153 insertions(+), 14 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index c092ccf..b17bddd 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1112,6 +1112,23 @@ static void dump_snapshots(BlockDriverState *bs) g_free(sn_tab); } +static void dump_json_image_info_list(ImageInfoList *list) +{ + Error *errp = NULL; + QString *str; + QmpOutputVisitor *ov = qmp_output_visitor_new(); + QObject *obj; + visit_type_ImageInfoList(qmp_output_get_visitor(ov), + &list, NULL, &errp); + obj = qmp_output_get_qobject(ov); + str = qobject_to_json_pretty(obj); + assert(str != NULL); + printf("%s\n", qstring_get_str(str)); + qobject_decref(obj); + qmp_output_visitor_cleanup(ov); + QDECREF(str); +} + static void collect_snapshots(BlockDriverState *bs , ImageInfo *info) { int i, sn_count; @@ -1251,9 +1268,129 @@ static void dump_human_image_info(ImageInfo *info) printf("backing file format: %s\n", info->backing_filename_format); } } + + if (info->has_snapshots) { + SnapshotInfoList *elem; + char buf[256]; + + printf("Snapshot list:\n"); + printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL)); + + /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but + * we convert to the block layer's native QEMUSnapshotInfo for now. + */ + for (elem = info->snapshots; elem; elem = elem->next) { + QEMUSnapshotInfo sn = { + .vm_state_size = elem->value->vm_state_size, + .date_sec = elem->value->date_sec, + .date_nsec = elem->value->date_nsec, + .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL + + elem->value->vm_clock_nsec, + }; + + pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); + pstrcpy(sn.name, sizeof(sn.name), elem->value->name); + printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), &sn)); + } + } +} + +static void dump_human_image_info_list(ImageInfoList *list) +{ + ImageInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { + if (delim) { + printf("\n"); + } + delim = true; + + dump_human_image_info(elem->value); + } +} + +static gboolean str_equal_func(gconstpointer a, gconstpointer b) +{ + return strcmp(a, b) == 0; +} + +/** + * Open an image file chain and return an ImageInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) + * @chain: true - enumerate entire backing file chain + * false - only topmost image file + * + * Returns a list of ImageInfo objects or NULL if there was an error opening an + * image file. If there was an error a message will have been printed to + * stderr. + */ +static ImageInfoList *collect_image_info_list(const char *filename, + const char *fmt, + bool chain) +{ + ImageInfoList *head = NULL; + ImageInfoList **last = &head; + GHashTable *filenames; + + filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL); + + while (filename) { + BlockDriverState *bs; + ImageInfo *info; + ImageInfoList *elem; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", + filename); + goto err; + } + g_hash_table_insert(filenames, (gpointer)filename, NULL); + + bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, + false); + if (!bs) { + goto err; + } + + info = g_new0(ImageInfo, 1); + collect_image_info(bs, info, filename, fmt); + collect_snapshots(bs, info); + + elem = g_new0(ImageInfoList, 1); + elem->value = info; + *last = elem; + last = &elem->next; + + bdrv_delete(bs); + + filename = fmt = NULL; + if (chain) { + if (info->has_full_backing_filename) { + filename = info->full_backing_filename; + } else if (info->has_backing_filename) { + filename = info->backing_filename; + } + if (info->has_backing_filename_format) { + fmt = info->backing_filename_format; + } + } + } + g_hash_table_destroy(filenames); + return head; + +err: + qapi_free_ImageInfoList(head); + g_hash_table_destroy(filenames); + return NULL; } -enum {OPTION_OUTPUT = 256}; +enum { + OPTION_OUTPUT = 256, + OPTION_BACKING_CHAIN = 257, +}; typedef enum OutputFormat { OFORMAT_JSON, @@ -1264,9 +1401,9 @@ static int img_info(int argc, char **argv) { int c; OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; const char *filename, *fmt, *output; - BlockDriverState *bs; - ImageInfo *info; + ImageInfoList *list; fmt = NULL; output = NULL; @@ -1276,6 +1413,7 @@ static int img_info(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, {"output", required_argument, 0, OPTION_OUTPUT}, + {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, "f:h", @@ -1294,6 +1432,9 @@ static int img_info(int argc, char **argv) case OPTION_OUTPUT: output = optarg; break; + case OPTION_BACKING_CHAIN: + chain = true; + break; } } if (optind >= argc) { @@ -1310,27 +1451,25 @@ static int img_info(int argc, char **argv) return 1; } - bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, false); - if (!bs) { + list = collect_image_info_list(filename, fmt, chain); + if (!list) { return 1; } - info = g_new0(ImageInfo, 1); - collect_image_info(bs, info, filename, fmt); - switch (output_format) { case OFORMAT_HUMAN: - dump_human_image_info(info); - dump_snapshots(bs); + dump_human_image_info_list(list); break; case OFORMAT_JSON: - collect_snapshots(bs, info); - dump_json_image_info(info); + if (chain) { + dump_json_image_info_list(list); + } else { + dump_json_image_info(list->value); + } break; } - qapi_free_ImageInfo(info); - bdrv_delete(bs); + qapi_free_ImageInfoList(list); return 0; } -- cgit v1.1 From 514d9da5a9a820b43a2cb90b439dd570a7835114 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 17 Oct 2012 14:02:32 +0200 Subject: qemu-iotests: Add 043 backing file chain infinite loop test This new test verifies that qemu-img info --backing-chain safely aborts when an image file has a backing file infinite loop. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/043 | 95 ++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/043.out | 66 ++++++++++++++++++++++++++++++ tests/qemu-iotests/common.rc | 10 +++++ tests/qemu-iotests/group | 1 + 4 files changed, 172 insertions(+) create mode 100755 tests/qemu-iotests/043 create mode 100644 tests/qemu-iotests/043.out diff --git a/tests/qemu-iotests/043 b/tests/qemu-iotests/043 new file mode 100755 index 0000000..3ba08dc --- /dev/null +++ b/tests/qemu-iotests/043 @@ -0,0 +1,95 @@ +#!/bin/bash +# +# Test that qemu-img info --backing-chain detects infinite loops +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=stefanha@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f $TEST_IMG.[123].base +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# Any format supporting backing files +_supported_fmt qcow qcow2 vmdk qed +_supported_proto generic +_supported_os Linux + + +size=128M +_make_test_img $size +$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG + +echo +echo "== backing file references self ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.base +_make_test_img -b $TEST_IMG.base $size +$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG.base + +echo +echo "== parent references self ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.1.base +_make_test_img -b $TEST_IMG.1.base $size +mv $TEST_IMG $TEST_IMG.2.base +_make_test_img -b $TEST_IMG.2.base $size +mv $TEST_IMG $TEST_IMG.3.base +_make_test_img -b $TEST_IMG.3.base $size +$QEMU_IMG rebase -u -b $TEST_IMG.2.base $TEST_IMG.1.base + +echo +echo "== ancestor references another ancestor ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.1.base +_make_test_img -b $TEST_IMG.1.base $size +mv $TEST_IMG $TEST_IMG.2.base +_make_test_img -b $TEST_IMG.2.base $size + +echo +echo "== finite chain of length 3 (human) ==" +_img_info --backing-chain + +echo +echo "== finite chain of length 3 (json) ==" +_img_info --backing-chain --output=json + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out new file mode 100644 index 0000000..ad23337 --- /dev/null +++ b/tests/qemu-iotests/043.out @@ -0,0 +1,66 @@ +QA output created by 043 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 + +== backing file references self == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.base' + +== parent references self == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.3.base' + +== ancestor references another ancestor == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT.2.base' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' + +== finite chain of length 3 (human) == +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.2.base + +image: TEST_DIR/t.IMGFMT.2.base +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.1.base + +image: TEST_DIR/t.IMGFMT.1.base +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 + +== finite chain of length 3 (json) == +[ + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT", + "cluster-size": 65536, + "format": "IMGFMT", + "backing-filename": "TEST_DIR/t.IMGFMT.2.base", + "dirty-flag": false + }, + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT.2.base", + "cluster-size": 65536, + "format": "IMGFMT", + "backing-filename": "TEST_DIR/t.IMGFMT.1.base", + "dirty-flag": false + }, + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT.1.base", + "cluster-size": 65536, + "format": "IMGFMT", + "dirty-flag": false + } +] +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index d534e94..334534f 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -145,6 +145,16 @@ _check_test_img() sed -e 's/qemu-img\: This image format does not support checks/No errors were found on the image./' } +_img_info() +{ + $QEMU_IMG info "$@" $TEST_IMG 2>&1 | \ + sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ + -e "s#$TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGFMT#IMGFMT#g" \ + -e "/^disk size:/ D" \ + -e "/actual-size/ D" +} + _get_pids_by_name() { if [ $# -ne 1 ] diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index d5f9ab0..7407492 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -48,3 +48,4 @@ 039 rw auto 040 rw auto 042 rw auto quick +043 rw auto backing -- cgit v1.1 From e53575606aa5567bde3246cdc3af1fdc757f77c8 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Thu, 18 Oct 2012 11:25:34 +0530 Subject: qemu-img: document 'info --backing-chain' Signed-off-by: Kashyap Chamarthy Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- qemu-img-cmds.hx | 4 ++-- qemu-img.texi | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 0ef82e9..a181363 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF("info", img_info, - "info [-f fmt] [--output=ofmt] filename") + "info [-f fmt] [--output=ofmt] [--backing-chain] filename") STEXI -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} ETEXI DEF("snapshot", img_snapshot, diff --git a/qemu-img.texi b/qemu-img.texi index 42ec392..60b83fc 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -28,6 +28,10 @@ Command parameters: is the disk image format. It is guessed automatically in most cases. See below for a description of the supported disk formats. +@item --backing-chain +will enumerate information about backing files in a disk image chain. Refer +below for further description. + @item size is the disk image size in bytes. Optional suffixes @code{k} or @code{K} (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M) @@ -129,7 +133,7 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different @@ -137,6 +141,21 @@ from the displayed size. If VM snapshots are stored in the disk image, they are displayed too. The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. +If a disk image has a backing file chain, information about each disk image in +the chain can be recursively enumerated by using the option @code{--backing-chain}. + +For instance, if you have an image chain like: + +@example +base.qcow2 <- snap1.qcow2 <- snap2.qcow2 +@end example + +To enumerate information about each disk image in the above chain, starting from top to base, do: + +@example +qemu-img info --backing-chain snap2.qcow2 +@end example + @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename} List, apply, create or delete snapshots in image @var{filename}. -- cgit v1.1 From 80168bff43760bde98388480dc7c93f94693421c Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 17 Oct 2012 16:45:25 -0300 Subject: block: bdrv_create(): don't leak cco.filename on error Signed-off-by: Luiz Capitulino Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 5e7fc9e..7e26b6f 100644 --- a/block.c +++ b/block.c @@ -379,7 +379,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, }; if (!drv->bdrv_create) { - return -ENOTSUP; + ret = -ENOTSUP; + goto out; } if (qemu_in_coroutine()) { @@ -394,8 +395,9 @@ int bdrv_create(BlockDriver *drv, const char* filename, } ret = cco.ret; - g_free(cco.filename); +out: + g_free(cco.filename); return ret; } -- cgit v1.1 From 9ac54af0c35d3f931653efae5698ef0f465eac7c Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Thu, 18 Oct 2012 15:19:31 -0400 Subject: monitor: Allow add-fd to any specified fd set The first call to add an fd to an fd set was previously not allowed to choose the fd set ID. The ID was generated as the first available and ensuing calls could add more fds by specifying the fd set ID. This change allows users to choose the fd set ID on the first call. Signed-off-by: Corey Bryant Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- monitor.c | 54 +++++++++++++++++++++++++++++++++++++----------------- qapi-schema.json | 2 +- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/monitor.c b/monitor.c index d17ae2d..3519b39 100644 --- a/monitor.c +++ b/monitor.c @@ -2135,7 +2135,7 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, { int fd; Monitor *mon = cur_mon; - MonFdset *mon_fdset; + MonFdset *mon_fdset = NULL; MonFdsetFd *mon_fdset_fd; AddfdInfo *fdinfo; @@ -2147,34 +2147,54 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, if (has_fdset_id) { QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - if (mon_fdset->id == fdset_id) { + /* Break if match found or match impossible due to ordering by ID */ + if (fdset_id <= mon_fdset->id) { + if (fdset_id < mon_fdset->id) { + mon_fdset = NULL; + } break; } } - if (mon_fdset == NULL) { - error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", - "an existing fdset-id"); - goto error; - } - } else { + } + + if (mon_fdset == NULL) { int64_t fdset_id_prev = -1; MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets); - /* Use first available fdset ID */ - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - mon_fdset_cur = mon_fdset; - if (fdset_id_prev == mon_fdset_cur->id - 1) { - fdset_id_prev = mon_fdset_cur->id; - continue; + if (has_fdset_id) { + if (fdset_id < 0) { + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", + "a non-negative value"); + goto error; + } + /* Use specified fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id < mon_fdset_cur->id) { + break; + } + } + } else { + /* Use first available fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id_prev == mon_fdset_cur->id - 1) { + fdset_id_prev = mon_fdset_cur->id; + continue; + } + break; } - break; } mon_fdset = g_malloc0(sizeof(*mon_fdset)); - mon_fdset->id = fdset_id_prev + 1; + if (has_fdset_id) { + mon_fdset->id = fdset_id; + } else { + mon_fdset->id = fdset_id_prev + 1; + } /* The fdset list is ordered by fdset ID */ - if (mon_fdset->id == 0) { + if (!mon_fdset_cur) { QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next); } else if (mon_fdset->id < mon_fdset_cur->id) { QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next); diff --git a/qapi-schema.json b/qapi-schema.json index c615ee2..36278cc 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2625,7 +2625,7 @@ # # Returns: @AddfdInfo on success # If file descriptor was not received, FdNotSupplied -# If @fdset-id does not exist, InvalidParameterValue +# If @fdset-id is a negative value, InvalidParameterValue # # Notes: The list of fd sets is shared by all monitor connections. # -- cgit v1.1 From e446f70d54b4920e8ca5af509271b69eab86e37b Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Thu, 18 Oct 2012 15:19:32 -0400 Subject: monitor: Enable adding an inherited fd to an fd set qmp_add_fd() gets an fd that was received over a socket with SCM_RIGHTS and adds it to an fd set. This patch adds support that will enable adding an fd that was inherited on the command line to an fd set. Note: All of the code added to monitor_fdset_add_fd(), with the exception of the error path for non-valid fdset-id, is code motion from qmp_add_fd(). Signed-off-by: Corey Bryant Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- monitor.c | 157 ++++++++++++++++++++++++++++++++++---------------------------- monitor.h | 3 ++ 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/monitor.c b/monitor.c index 3519b39..6e99049 100644 --- a/monitor.c +++ b/monitor.c @@ -2135,8 +2135,6 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, { int fd; Monitor *mon = cur_mon; - MonFdset *mon_fdset = NULL; - MonFdsetFd *mon_fdset_fd; AddfdInfo *fdinfo; fd = qemu_chr_fe_get_msgfd(mon->chr); @@ -2145,78 +2143,12 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, goto error; } - if (has_fdset_id) { - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - /* Break if match found or match impossible due to ordering by ID */ - if (fdset_id <= mon_fdset->id) { - if (fdset_id < mon_fdset->id) { - mon_fdset = NULL; - } - break; - } - } + fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id, + has_opaque, opaque, errp); + if (fdinfo) { + return fdinfo; } - if (mon_fdset == NULL) { - int64_t fdset_id_prev = -1; - MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets); - - if (has_fdset_id) { - if (fdset_id < 0) { - error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", - "a non-negative value"); - goto error; - } - /* Use specified fdset ID */ - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - mon_fdset_cur = mon_fdset; - if (fdset_id < mon_fdset_cur->id) { - break; - } - } - } else { - /* Use first available fdset ID */ - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - mon_fdset_cur = mon_fdset; - if (fdset_id_prev == mon_fdset_cur->id - 1) { - fdset_id_prev = mon_fdset_cur->id; - continue; - } - break; - } - } - - mon_fdset = g_malloc0(sizeof(*mon_fdset)); - if (has_fdset_id) { - mon_fdset->id = fdset_id; - } else { - mon_fdset->id = fdset_id_prev + 1; - } - - /* The fdset list is ordered by fdset ID */ - if (!mon_fdset_cur) { - QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next); - } else if (mon_fdset->id < mon_fdset_cur->id) { - QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next); - } else { - QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next); - } - } - - mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd)); - mon_fdset_fd->fd = fd; - mon_fdset_fd->removed = false; - if (has_opaque) { - mon_fdset_fd->opaque = g_strdup(opaque); - } - QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next); - - fdinfo = g_malloc0(sizeof(*fdinfo)); - fdinfo->fdset_id = mon_fdset->id; - fdinfo->fd = mon_fdset_fd->fd; - - return fdinfo; - error: if (fd != -1) { close(fd); @@ -2301,6 +2233,87 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) return fdset_list; } +AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, + bool has_opaque, const char *opaque, + Error **errp) +{ + MonFdset *mon_fdset = NULL; + MonFdsetFd *mon_fdset_fd; + AddfdInfo *fdinfo; + + if (has_fdset_id) { + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + /* Break if match found or match impossible due to ordering by ID */ + if (fdset_id <= mon_fdset->id) { + if (fdset_id < mon_fdset->id) { + mon_fdset = NULL; + } + break; + } + } + } + + if (mon_fdset == NULL) { + int64_t fdset_id_prev = -1; + MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets); + + if (has_fdset_id) { + if (fdset_id < 0) { + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", + "a non-negative value"); + return NULL; + } + /* Use specified fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id < mon_fdset_cur->id) { + break; + } + } + } else { + /* Use first available fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id_prev == mon_fdset_cur->id - 1) { + fdset_id_prev = mon_fdset_cur->id; + continue; + } + break; + } + } + + mon_fdset = g_malloc0(sizeof(*mon_fdset)); + if (has_fdset_id) { + mon_fdset->id = fdset_id; + } else { + mon_fdset->id = fdset_id_prev + 1; + } + + /* The fdset list is ordered by fdset ID */ + if (!mon_fdset_cur) { + QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next); + } else if (mon_fdset->id < mon_fdset_cur->id) { + QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next); + } else { + QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next); + } + } + + mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd)); + mon_fdset_fd->fd = fd; + mon_fdset_fd->removed = false; + if (has_opaque) { + mon_fdset_fd->opaque = g_strdup(opaque); + } + QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next); + + fdinfo = g_malloc0(sizeof(*fdinfo)); + fdinfo->fdset_id = mon_fdset->id; + fdinfo->fd = mon_fdset_fd->fd; + + return fdinfo; +} + int monitor_fdset_get_fd(int64_t fdset_id, int flags) { #ifndef _WIN32 diff --git a/monitor.h b/monitor.h index b6e7d95..d4c017e 100644 --- a/monitor.h +++ b/monitor.h @@ -90,6 +90,9 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret); int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret); +AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, + bool has_opaque, const char *opaque, + Error **errp); int monitor_fdset_get_fd(int64_t fdset_id, int flags); int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd); int monitor_fdset_dup_fd_remove(int dup_fd); -- cgit v1.1 From ebe52b592dd5867fce7238f49b8c0416c3eedb6c Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Thu, 18 Oct 2012 15:19:33 -0400 Subject: monitor: Prevent removing fd from set during init If an fd is added to an fd set via the command line, and it is not referenced by another command line option (ie. -drive), then clean it up after QEMU initialization is complete. Signed-off-by: Corey Bryant Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- monitor.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/monitor.c b/monitor.c index 6e99049..1d3dd9a 100644 --- a/monitor.c +++ b/monitor.c @@ -2105,8 +2105,9 @@ static void monitor_fdset_cleanup(MonFdset *mon_fdset) MonFdsetFd *mon_fdset_fd_next; QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) { - if (mon_fdset_fd->removed || - (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) { + if ((mon_fdset_fd->removed || + (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) && + runstate_is_running()) { close(mon_fdset_fd->fd); g_free(mon_fdset_fd->opaque); QLIST_REMOVE(mon_fdset_fd, next); -- cgit v1.1 From 587ed6be0b6331b11169da8846b8442840d5428c Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Thu, 18 Oct 2012 15:19:34 -0400 Subject: qemu-config: Add new -add-fd command line option This option can be used for passing file descriptors on the command line. It mirrors the existing add-fd QMP command which allows an fd to be passed to QEMU via SCM_RIGHTS and added to an fd set. This can be combined with commands such as -drive to link file descriptors in an fd set to a drive: qemu-kvm -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk This example adds dups of fds 3 and 4, and the accompanying opaque strings to the fd set with ID=2. qemu_open() already knows how to handle a filename of this format. qemu_open() searches the corresponding fd set for an fd and when it finds a match, QEMU goes on to use a dup of that fd just like it would have used an fd that it opened itself. Signed-off-by: Corey Bryant Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-config.c | 22 ++++++++++++++ qemu-options.hx | 36 ++++++++++++++++++++++ vl.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/qemu-config.c b/qemu-config.c index cd1ec21..601237d 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -653,6 +653,27 @@ QemuOptsList qemu_boot_opts = { }, }; +static QemuOptsList qemu_add_fd_opts = { + .name = "add-fd", + .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head), + .desc = { + { + .name = "fd", + .type = QEMU_OPT_NUMBER, + .help = "file descriptor of which a duplicate is added to fd set", + },{ + .name = "set", + .type = QEMU_OPT_NUMBER, + .help = "ID of the fd set to add fd to", + },{ + .name = "opaque", + .type = QEMU_OPT_STRING, + .help = "free-form string used to describe fd", + }, + { /* end of list */ } + }, +}; + static QemuOptsList *vm_config_groups[32] = { &qemu_drive_opts, &qemu_chardev_opts, @@ -669,6 +690,7 @@ static QemuOptsList *vm_config_groups[32] = { &qemu_boot_opts, &qemu_iscsi_opts, &qemu_sandbox_opts, + &qemu_add_fd_opts, NULL, }; diff --git a/qemu-options.hx b/qemu-options.hx index 46f0539..a67a255 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -253,6 +253,14 @@ qemu-system-i386 -drive file=file,index=2,media=disk qemu-system-i386 -drive file=file,index=3,media=disk @end example +You can open an image using pre-opened file descriptors from an fd set: +@example +qemu-system-i386 +-add-fd fd=3,set=2,opaque="rdwr:/path/to/file" +-add-fd fd=4,set=2,opaque="rdonly:/path/to/file" +-drive file=/dev/fdset/2,index=0,media=disk +@end example + You can connect a CDROM to the slave of ide0: @example qemu-system-i386 -drive file=file,if=ide,index=1,media=cdrom @@ -285,6 +293,34 @@ qemu-system-i386 -hda a -hdb b @end example ETEXI +DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, + "-add-fd fd=fd,set=set[,opaque=opaque]\n" + " Add 'fd' to fd 'set'\n", QEMU_ARCH_ALL) +STEXI +@item -add-fd fd=@var{fd},set=@var{set}[,opaque=@var{opaque}] +@findex -add-fd + +Add a file descriptor to an fd set. Valid options are: + +@table @option +@item fd=@var{fd} +This option defines the file descriptor of which a duplicate is added to fd set. +The file descriptor cannot be stdin, stdout, or stderr. +@item set=@var{set} +This option defines the ID of the fd set to add the file descriptor to. +@item opaque=@var{opaque} +This option defines a free-form string that can be used to describe @var{fd}. +@end table + +You can open an image using pre-opened file descriptors from an fd set: +@example +qemu-system-i386 +-add-fd fd=3,set=2,opaque="rdwr:/path/to/file" +-add-fd fd=4,set=2,opaque="rdonly:/path/to/file" +-drive file=/dev/fdset/2,index=0,media=disk +@end example +ETEXI + DEF("set", HAS_ARG, QEMU_OPTION_set, "-set group.id.arg=value\n" " set parameter for item of type \n" diff --git a/vl.c b/vl.c index ee3c43a..b870caf 100644 --- a/vl.c +++ b/vl.c @@ -790,6 +790,78 @@ static int parse_sandbox(QemuOpts *opts, void *opaque) return 0; } +#ifndef _WIN32 +static int parse_add_fd(QemuOpts *opts, void *opaque) +{ + int fd, dupfd, flags; + int64_t fdset_id; + const char *fd_opaque = NULL; + + fd = qemu_opt_get_number(opts, "fd", -1); + fdset_id = qemu_opt_get_number(opts, "set", -1); + fd_opaque = qemu_opt_get(opts, "opaque"); + + if (fd < 0) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd option is required and must be non-negative"); + return -1; + } + + if (fd <= STDERR_FILENO) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd cannot be a standard I/O stream"); + return -1; + } + + /* + * All fds inherited across exec() necessarily have FD_CLOEXEC + * clear, while qemu sets FD_CLOEXEC on all other fds used internally. + */ + flags = fcntl(fd, F_GETFD); + if (flags == -1 || (flags & FD_CLOEXEC)) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd is not valid or already in use"); + return -1; + } + + if (fdset_id < 0) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "set option is required and must be non-negative"); + return -1; + } + +#ifdef F_DUPFD_CLOEXEC + dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + dupfd = dup(fd); + if (dupfd != -1) { + qemu_set_cloexec(dupfd); + } +#endif + if (dupfd == -1) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "Error duplicating fd: %s", strerror(errno)); + return -1; + } + + /* add the duplicate fd, and optionally the opaque string, to the fd set */ + monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque ? true : false, + fd_opaque, NULL); + + return 0; +} + +static int cleanup_add_fd(QemuOpts *opts, void *opaque) +{ + int fd; + + fd = qemu_opt_get_number(opts, "fd", -1); + close(fd); + + return 0; +} +#endif + /***********************************************************/ /* QEMU Block devices */ @@ -3309,6 +3381,18 @@ int main(int argc, char **argv, char **envp) exit(0); } break; + case QEMU_OPTION_add_fd: +#ifndef _WIN32 + opts = qemu_opts_parse(qemu_find_opts("add-fd"), optarg, 0); + if (!opts) { + exit(0); + } +#else + error_report("File descriptor passing is disabled on this " + "platform"); + exit(1); +#endif + break; default: os_parse_cmd_args(popt->index, optarg); } @@ -3320,6 +3404,16 @@ int main(int argc, char **argv, char **envp) exit(1); } +#ifndef _WIN32 + if (qemu_opts_foreach(qemu_find_opts("add-fd"), parse_add_fd, NULL, 1)) { + exit(1); + } + + if (qemu_opts_foreach(qemu_find_opts("add-fd"), cleanup_add_fd, NULL, 1)) { + exit(1); + } +#endif + if (machine == NULL) { fprintf(stderr, "No machine found.\n"); exit(1); -- cgit v1.1 From ac84adac48963653638a1c95dd7c6ab8b4d5a4f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:15 +0200 Subject: block: add bdrv_query_info Extract it out of the implementation of "info block". Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 106 ++++++++++++++++++++++++++++++++-------------------------------- block.h | 1 + 2 files changed, 54 insertions(+), 53 deletions(-) diff --git a/block.c b/block.c index 7e26b6f..2ba83db 100644 --- a/block.c +++ b/block.c @@ -2799,69 +2799,69 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, return 0; } -BlockInfoList *qmp_query_block(Error **errp) +BlockInfo *bdrv_query_info(BlockDriverState *bs) { - BlockInfoList *head = NULL, *cur_item = NULL; - BlockDriverState *bs; + BlockInfo *info = g_malloc0(sizeof(*info)); + info->device = g_strdup(bs->device_name); + info->type = g_strdup("unknown"); + info->locked = bdrv_dev_is_medium_locked(bs); + info->removable = bdrv_dev_has_removable_media(bs); - QTAILQ_FOREACH(bs, &bdrv_states, list) { - BlockInfoList *info = g_malloc0(sizeof(*info)); + if (bdrv_dev_has_removable_media(bs)) { + info->has_tray_open = true; + info->tray_open = bdrv_dev_is_tray_open(bs); + } - info->value = g_malloc0(sizeof(*info->value)); - info->value->device = g_strdup(bs->device_name); - info->value->type = g_strdup("unknown"); - info->value->locked = bdrv_dev_is_medium_locked(bs); - info->value->removable = bdrv_dev_has_removable_media(bs); + if (bdrv_iostatus_is_enabled(bs)) { + info->has_io_status = true; + info->io_status = bs->iostatus; + } - if (bdrv_dev_has_removable_media(bs)) { - info->value->has_tray_open = true; - info->value->tray_open = bdrv_dev_is_tray_open(bs); + if (bs->drv) { + info->has_inserted = true; + info->inserted = g_malloc0(sizeof(*info->inserted)); + info->inserted->file = g_strdup(bs->filename); + info->inserted->ro = bs->read_only; + info->inserted->drv = g_strdup(bs->drv->format_name); + info->inserted->encrypted = bs->encrypted; + info->inserted->encryption_key_missing = bdrv_key_required(bs); + + if (bs->backing_file[0]) { + info->inserted->has_backing_file = true; + info->inserted->backing_file = g_strdup(bs->backing_file); } - if (bdrv_iostatus_is_enabled(bs)) { - info->value->has_io_status = true; - info->value->io_status = bs->iostatus; + info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); + + if (bs->io_limits_enabled) { + info->inserted->bps = + bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; + info->inserted->bps_rd = + bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; + info->inserted->bps_wr = + bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; + info->inserted->iops = + bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; + info->inserted->iops_rd = + bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; + info->inserted->iops_wr = + bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; } + } + return info; +} - if (bs->drv) { - info->value->has_inserted = true; - info->value->inserted = g_malloc0(sizeof(*info->value->inserted)); - info->value->inserted->file = g_strdup(bs->filename); - info->value->inserted->ro = bs->read_only; - info->value->inserted->drv = g_strdup(bs->drv->format_name); - info->value->inserted->encrypted = bs->encrypted; - info->value->inserted->encryption_key_missing = bdrv_key_required(bs); - if (bs->backing_file[0]) { - info->value->inserted->has_backing_file = true; - info->value->inserted->backing_file = g_strdup(bs->backing_file); - } +BlockInfoList *qmp_query_block(Error **errp) +{ + BlockInfoList *head = NULL, **p_next = &head; + BlockDriverState *bs; - info->value->inserted->backing_file_depth = - bdrv_get_backing_file_depth(bs); - - if (bs->io_limits_enabled) { - info->value->inserted->bps = - bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - info->value->inserted->bps_rd = - bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; - info->value->inserted->bps_wr = - bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; - info->value->inserted->iops = - bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - info->value->inserted->iops_rd = - bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; - info->value->inserted->iops_wr = - bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; - } - } + QTAILQ_FOREACH(bs, &bdrv_states, list) { + BlockInfoList *info = g_malloc0(sizeof(*info)); + info->value = bdrv_query_info(bs); - /* XXX: waiting for the qapi to support GSList */ - if (!cur_item) { - head = cur_item = info; - } else { - cur_item->next = info; - cur_item = info; - } + *p_next = info; + p_next = &info->next; } return head; diff --git a/block.h b/block.h index e2d89d7..2ede16d 100644 --- a/block.h +++ b/block.h @@ -313,6 +313,7 @@ void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz); +BlockInfo *bdrv_query_info(BlockDriverState *s); int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_is_snapshot(BlockDriverState *bs); BlockDriverState *bdrv_snapshots(void); -- cgit v1.1 From 9887b616619f62977682e76927a9b5a6134cc8bf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:16 +0200 Subject: block: add bdrv_query_stats qmp_query_blockstat cannot have errors, remove the Error argument and create a new public function bdrv_query_stats out of it. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 18 ++++++------------ block.h | 1 + 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 2ba83db..a723be2 100644 --- a/block.c +++ b/block.c @@ -2867,8 +2867,7 @@ BlockInfoList *qmp_query_block(Error **errp) return head; } -/* Consider exposing this as a full fledged QMP command */ -static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) +BlockStats *bdrv_query_stats(const BlockDriverState *bs) { BlockStats *s; @@ -2892,7 +2891,7 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) if (bs->file) { s->has_parent = true; - s->parent = qmp_query_blockstat(bs->file, NULL); + s->parent = bdrv_query_stats(bs->file); } return s; @@ -2900,20 +2899,15 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) BlockStatsList *qmp_query_blockstats(Error **errp) { - BlockStatsList *head = NULL, *cur_item = NULL; + BlockStatsList *head = NULL, **p_next = &head; BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { BlockStatsList *info = g_malloc0(sizeof(*info)); - info->value = qmp_query_blockstat(bs, NULL); + info->value = bdrv_query_stats(bs); - /* XXX: waiting for the qapi to support GSList */ - if (!cur_item) { - head = cur_item = info; - } else { - cur_item->next = info; - cur_item = info; - } + *p_next = info; + p_next = &info->next; } return head; diff --git a/block.h b/block.h index 2ede16d..ed4b90d 100644 --- a/block.h +++ b/block.h @@ -314,6 +314,7 @@ void bdrv_get_backing_filename(BlockDriverState *bs, void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz); BlockInfo *bdrv_query_info(BlockDriverState *s); +BlockStats *bdrv_query_stats(const BlockDriverState *bs); int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_is_snapshot(BlockDriverState *bs); BlockDriverState *bdrv_snapshots(void); -- cgit v1.1 From 9156df12a4f3b3db63d1b292d081d814f02e311a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:17 +0200 Subject: block: add bdrv_open_backing_file Mirroring runs without the backing file so that it can be copied outside QEMU. However, we need to add it at the time the job is completed and QEMU switches to the target. Factor out the common bits of opening an image and completing a mirroring operation. The new function does not assume that the file is closed immediately after it returns failure, so it keeps the BDRV_O_NO_BACKING flag up-to-date. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 56 ++++++++++++++++++++++++++++++++++++++------------------ block.h | 1 + 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/block.c b/block.c index a723be2..e06d78f 100644 --- a/block.c +++ b/block.c @@ -736,6 +736,42 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) return 0; } +int bdrv_open_backing_file(BlockDriverState *bs) +{ + char backing_filename[PATH_MAX]; + int back_flags, ret; + BlockDriver *back_drv = NULL; + + if (bs->backing_hd != NULL) { + return 0; + } + + bs->open_flags &= ~BDRV_O_NO_BACKING; + if (bs->backing_file[0] == '\0') { + return 0; + } + + bs->backing_hd = bdrv_new(""); + bdrv_get_full_backing_filename(bs, backing_filename, + sizeof(backing_filename)); + + if (bs->backing_format[0] != '\0') { + back_drv = bdrv_find_format(bs->backing_format); + } + + /* backing files always opened read-only */ + back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT); + + ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); + if (ret < 0) { + bdrv_delete(bs->backing_hd); + bs->backing_hd = NULL; + bs->open_flags |= BDRV_O_NO_BACKING; + return ret; + } + return 0; +} + /* * Opens a disk image (raw, qcow2, vmdk, ...) */ @@ -823,24 +859,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, } /* If there is a backing file, use it */ - if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { - char backing_filename[PATH_MAX]; - int back_flags; - BlockDriver *back_drv = NULL; - - bs->backing_hd = bdrv_new(""); - bdrv_get_full_backing_filename(bs, backing_filename, - sizeof(backing_filename)); - - if (bs->backing_format[0] != '\0') { - back_drv = bdrv_find_format(bs->backing_format); - } - - /* backing files always opened read-only */ - back_flags = - flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); - - ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); + if ((flags & BDRV_O_NO_BACKING) == 0) { + ret = bdrv_open_backing_file(bs); if (ret < 0) { bdrv_close(bs); return ret; diff --git a/block.h b/block.h index ed4b90d..096fa09 100644 --- a/block.h +++ b/block.h @@ -133,6 +133,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); void bdrv_delete(BlockDriverState *bs); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); +int bdrv_open_backing_file(BlockDriverState *bs); int bdrv_open(BlockDriverState *bs, const char *filename, int flags, BlockDriver *drv); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, -- cgit v1.1 From 1755da16e32c15b22a521e8a38539e4b5cf367f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:18 +0200 Subject: block: introduce new dirty bitmap functionality Assert that write_compressed is never used with the dirty bitmap. Setting the bits early is wrong, because a coroutine might concurrently examine them and copy incomplete data from the source. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------ block.h | 5 +++-- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/block.c b/block.c index e06d78f..2c3d3da 100644 --- a/block.c +++ b/block.c @@ -2391,7 +2391,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, } if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + bdrv_set_dirty(bs, sector_num, nb_sectors); } if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { @@ -2960,9 +2960,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; - if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 1); - } + assert(!bs->dirty_bitmap); return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -4269,13 +4267,54 @@ int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) if (bs->dirty_bitmap && (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { - return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & - (1UL << (chunk % (sizeof(unsigned long) * 8)))); + return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] & + (1UL << (chunk % BITS_PER_LONG))); } else { return 0; } } +int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector) +{ + int64_t chunk; + int bit, elem; + + /* Avoid an infinite loop. */ + assert(bs->dirty_count > 0); + + sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; + + QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG); + elem = chunk / BITS_PER_LONG; + bit = chunk % BITS_PER_LONG; + for (;;) { + if (sector >= bs->total_sectors) { + sector = 0; + bit = elem = 0; + } + if (bit == 0 && bs->dirty_bitmap[elem] == 0) { + sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG; + elem++; + } else { + if (bs->dirty_bitmap[elem] & (1UL << bit)) { + return sector; + } + sector += BDRV_SECTORS_PER_DIRTY_CHUNK; + if (++bit == BITS_PER_LONG) { + bit = 0; + elem++; + } + } + } +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + set_dirty_bitmap(bs, cur_sector, nr_sectors, 1); +} + void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { diff --git a/block.h b/block.h index 096fa09..27b8f80 100644 --- a/block.h +++ b/block.h @@ -353,8 +353,9 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size); void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable); int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors); +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector); int64_t bdrv_get_dirty_count(BlockDriverState *bs); void bdrv_enable_copy_on_read(BlockDriverState *bs); -- cgit v1.1 From b9a9b3a4626aa099f829e2a6036bfaa0c8e47700 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Aug 2012 15:23:44 +0200 Subject: block: export dirty bitmap information in query-block Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 7 +++++++ qapi-schema.json | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 2c3d3da..ad5e240 100644 --- a/block.c +++ b/block.c @@ -2837,6 +2837,13 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs) info->io_status = bs->iostatus; } + if (bs->dirty_bitmap) { + info->has_dirty = true; + info->dirty = g_malloc0(sizeof(*info->dirty)); + info->dirty->count = bdrv_get_dirty_count(bs) * + BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE; + } + if (bs->drv) { info->has_inserted = true; info->inserted = g_malloc0(sizeof(*info->inserted)); diff --git a/qapi-schema.json b/qapi-schema.json index 36278cc..dfcbb67 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -661,6 +661,18 @@ { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] } ## +# @BlockDirtyInfo: +# +# Block dirty bitmap information. +# +# @count: number of dirty bytes according to the dirty bitmap +# +# Since: 1.3 +## +{ 'type': 'BlockDirtyInfo', + 'data': {'count': 'int'} } + +## # @BlockInfo: # # Block device information. This structure describes a virtual device and @@ -679,6 +691,9 @@ # @tray_open: #optional True if the device has a tray and it is open # (only present if removable is true) # +# @dirty: #optional dirty bitmap information (only present if the dirty +# bitmap is enabled) +# # @io-status: #optional @BlockDeviceIoStatus. Only present if the device # supports it and the VM is configured to stop on errors # @@ -690,7 +705,8 @@ { 'type': 'BlockInfo', 'data': {'device': 'str', 'type': 'str', 'removable': 'bool', 'locked': 'bool', '*inserted': 'BlockDeviceInfo', - '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus'} } + '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus', + '*dirty': 'BlockDirtyInfo' } } ## # @query-block: -- cgit v1.1 From 65f4632243f526958aa1f6b3911add98329c3796 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:20 +0200 Subject: block: rename block_job_complete to block_job_completed The imperative will be used for the QMP command. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/commit.c | 2 +- block/stream.c | 4 ++-- blockjob.c | 2 +- blockjob.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/block/commit.c b/block/commit.c index 13d9e82..fae7958 100644 --- a/block/commit.c +++ b/block/commit.c @@ -160,7 +160,7 @@ exit_restore_reopen: bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block/stream.c b/block/stream.c index 7926652..0c0fc7a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -86,7 +86,7 @@ static void coroutine_fn stream_run(void *opaque) s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { - block_job_complete(&s->common, s->common.len); + block_job_completed(&s->common, s->common.len); return; } @@ -184,7 +184,7 @@ wait: } qemu_vfree(buf); - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/blockjob.c b/blockjob.c index f55f55a..b5c16f3 100644 --- a/blockjob.c +++ b/blockjob.c @@ -71,7 +71,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, return job; } -void block_job_complete(BlockJob *job, int ret) +void block_job_completed(BlockJob *job, int ret) { BlockDriverState *bs = job->bs; diff --git a/blockjob.h b/blockjob.h index 930cc3c..c2261a9 100644 --- a/blockjob.h +++ b/blockjob.h @@ -135,14 +135,14 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); /** - * block_job_complete: + * block_job_completed: * @job: The job being completed. * @ret: The status code. * * Call the completion function that was registered at creation time, and * free @job. */ -void block_job_complete(BlockJob *job, int ret); +void block_job_completed(BlockJob *job, int ret); /** * block_job_set_speed: -- cgit v1.1 From aeae883baf2377b714a41529f94905046fa058f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:21 +0200 Subject: block: add block-job-complete While streaming can be dropped as soon as it progressed through the whole image, mirroring needs to be completed manually for two reasons: 1) so that management knows exactly when the VM switches to the target; 2) because for other use cases such as replication, we may leave the operation running for the whole life of the virtual machine. Add a new block job command that manually completes background operations. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- blockdev.c | 13 +++++++++++++ blockjob.c | 10 ++++++++++ blockjob.h | 15 +++++++++++++++ hmp-commands.hx | 17 ++++++++++++++++- hmp.c | 10 ++++++++++ hmp.h | 1 + qapi-schema.json | 25 +++++++++++++++++++++++++ qerror.h | 3 +++ qmp-commands.hx | 5 +++++ trace-events | 1 + 10 files changed, 99 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 46e4bbd..02d3e0b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1266,6 +1266,19 @@ void qmp_block_job_resume(const char *device, Error **errp) block_job_resume(job); } +void qmp_block_job_complete(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_complete(job); + block_job_complete(job, errp); +} + static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) { BlockJobInfoList **prev = opaque; diff --git a/blockjob.c b/blockjob.c index b5c16f3..c93a0e0 100644 --- a/blockjob.c +++ b/blockjob.c @@ -99,6 +99,16 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) job->speed = speed; } +void block_job_complete(BlockJob *job, Error **errp) +{ + if (job->paused || job->cancelled || !job->job_type->complete) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + job->job_type->complete(job, errp); +} + void block_job_pause(BlockJob *job) { job->paused = true; diff --git a/blockjob.h b/blockjob.h index c2261a9..c44e2ea 100644 --- a/blockjob.h +++ b/blockjob.h @@ -41,6 +41,12 @@ typedef struct BlockJobType { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); + + /** + * Optional callback for job types whose completion must be triggered + * manually. + */ + void (*complete)(BlockJob *job, Error **errp); } BlockJobType; /** @@ -164,6 +170,15 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); void block_job_cancel(BlockJob *job); /** + * block_job_complete: + * @job: The job to be completed. + * @errp: Error object. + * + * Asynchronously complete the specified job. + */ +void block_job_complete(BlockJob *job, Error **errp); + +/** * block_job_is_cancelled: * @job: The job being queried. * diff --git a/hmp-commands.hx b/hmp-commands.hx index e0b537d..fb2f237 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -109,7 +109,22 @@ ETEXI STEXI @item block_job_cancel @findex block_job_cancel -Stop an active block streaming operation. +Stop an active background block operation (streaming, mirroring). +ETEXI + + { + .name = "block_job_complete", + .args_type = "device:B", + .params = "device", + .help = "stop an active background block operation", + .mhandler.cmd = hmp_block_job_complete, + }, + +STEXI +@item block_job_complete +@findex block_job_complete +Manually trigger completion of an active background block operation. +For mirroring, this will switch the device to the destination path. ETEXI { diff --git a/hmp.c b/hmp.c index 2b97982..574517a 100644 --- a/hmp.c +++ b/hmp.c @@ -990,6 +990,16 @@ void hmp_block_job_resume(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &error); } +void hmp_block_job_complete(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_complete(device, &error); + + hmp_handle_error(mon, &error); +} + typedef struct MigrationStatus { QEMUTimer *timer; diff --git a/hmp.h b/hmp.h index 71ea384..7bdd23c 100644 --- a/hmp.h +++ b/hmp.h @@ -66,6 +66,7 @@ void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); void hmp_block_job_pause(Monitor *mon, const QDict *qdict); void hmp_block_job_resume(Monitor *mon, const QDict *qdict); +void hmp_block_job_complete(Monitor *mon, const QDict *qdict); void hmp_migrate(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); diff --git a/qapi-schema.json b/qapi-schema.json index dfcbb67..9482976 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2033,6 +2033,31 @@ { 'command': 'block-job-resume', 'data': { 'device': 'str' } } ## +# @block-job-complete: +# +# Manually trigger completion of an active background block operation. This +# is supported for drive mirroring, where it also switches the device to +# write to the target path only. +# +# This command completes an active background block operation synchronously. +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event +# is not defined. Note that if an I/O error occurs during the processing of +# this command: 1) the command itself will fail; 2) the error will be processed +# according to the rerror/werror arguments that were specified when starting +# the operation. +# +# A cancelled or paused job cannot be completed. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-complete', 'data': { 'device': 'str' } } + +## # @ObjectTypeInfo: # # This structure describes a search result from @qom-list-types diff --git a/qerror.h b/qerror.h index c91708c..dacac52 100644 --- a/qerror.h +++ b/qerror.h @@ -54,6 +54,9 @@ void assert_no_error(Error *err); #define QERR_BLOCK_JOB_PAUSED \ ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused" +#define QERR_BLOCK_JOB_NOT_READY \ + ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" diff --git a/qmp-commands.hx b/qmp-commands.hx index 5ba8c48..4f9c711 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -843,6 +843,11 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_resume, }, { + .name = "block-job-complete", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_complete, + }, + { .name = "transaction", .args_type = "actions:q", .mhandler.cmd_new = qmp_marshal_input_transaction, diff --git a/trace-events b/trace-events index e2d4580..9ab8e27 100644 --- a/trace-events +++ b/trace-events @@ -81,6 +81,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) " qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" qmp_block_job_resume(void *job) "job %p" +qmp_block_job_complete(void *job) "job %p" block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" -- cgit v1.1 From a66a2a368383e627b929bf42d1b972822491404b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Jul 2012 15:15:47 +0200 Subject: block: introduce BLOCK_JOB_READY event Even for jobs that need to be manually completed, management may want to take care itself of the completion, not requiring the user to issue a command to terminate the job. In this case we want to avoid that they poll us continuously, waiting for completion to become available. Thus, add a new event that signals the phase switch and the availability of the block-job-complete command. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- QMP/qmp-events.txt | 18 ++++++++++++++++++ blockdev.c | 14 -------------- blockjob.c | 21 +++++++++++++++++++++ blockjob.h | 16 ++++++++++++++++ monitor.c | 1 + monitor.h | 1 + qapi-schema.json | 3 ++- 7 files changed, 59 insertions(+), 15 deletions(-) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 987c575..b2698e4 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -118,6 +118,24 @@ Example: "action": "stop" }, "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +BLOCK_JOB_READY +--------------- + +Emitted when a block job is ready to complete. + +Data: + +- "device": device name (json-string) + +Example: + +{ "event": "BLOCK_JOB_READY", + "data": { "device": "ide0-hd1" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR +event. + DEVICE_TRAY_MOVED ----------------- diff --git a/blockdev.c b/blockdev.c index 02d3e0b..248d5f6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1056,20 +1056,6 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp) } } -static QObject *qobject_from_block_job(BlockJob *job) -{ - return qobject_from_jsonf("{ 'type': %s," - "'device': %s," - "'len': %" PRId64 "," - "'offset': %" PRId64 "," - "'speed': %" PRId64 " }", - job->job_type->job_type, - bdrv_get_device_name(job->bs), - job->len, - job->offset, - job->speed); -} - static void block_job_cb(void *opaque, int ret) { BlockDriverState *bs = opaque; diff --git a/blockjob.c b/blockjob.c index c93a0e0..fbb7e1c 100644 --- a/blockjob.c +++ b/blockjob.c @@ -225,6 +225,27 @@ static void block_job_iostatus_set_err(BlockJob *job, int error) } +QObject *qobject_from_block_job(BlockJob *job) +{ + return qobject_from_jsonf("{ 'type': %s," + "'device': %s," + "'len': %" PRId64 "," + "'offset': %" PRId64 "," + "'speed': %" PRId64 " }", + job->job_type->job_type, + bdrv_get_device_name(job->bs), + job->len, + job->offset, + job->speed); +} + +void block_job_ready(BlockJob *job) +{ + QObject *data = qobject_from_block_job(job); + monitor_protocol_event(QEVENT_BLOCK_JOB_READY, data); + qobject_decref(data); +} + BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, BlockdevOnError on_err, int is_read, int error) diff --git a/blockjob.h b/blockjob.h index c44e2ea..fb2392e 100644 --- a/blockjob.h +++ b/blockjob.h @@ -211,6 +211,22 @@ void block_job_pause(BlockJob *job); void block_job_resume(BlockJob *job); /** + * qobject_from_block_job: + * @job: The job whose information is requested. + * + * Return a QDict corresponding to @job's query-block-jobs entry. + */ +QObject *qobject_from_block_job(BlockJob *job); + +/** + * block_job_ready: + * @job: The job which is now ready to complete. + * + * Send a BLOCK_JOB_READY event for the specified job. + */ +void block_job_ready(BlockJob *job); + +/** * block_job_is_paused: * @job: The job being queried. * diff --git a/monitor.c b/monitor.c index 1d3dd9a..eeef32e 100644 --- a/monitor.c +++ b/monitor.c @@ -451,6 +451,7 @@ static const char *monitor_event_names[] = { [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED", [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED", [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR", + [QEVENT_BLOCK_JOB_READY] = "BLOCK_JOB_READY", [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED", [QEVENT_SUSPEND] = "SUSPEND", [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK", diff --git a/monitor.h b/monitor.h index d4c017e..b4ef955 100644 --- a/monitor.h +++ b/monitor.h @@ -39,6 +39,7 @@ typedef enum MonitorEvent { QEVENT_BLOCK_JOB_COMPLETED, QEVENT_BLOCK_JOB_CANCELLED, QEVENT_BLOCK_JOB_ERROR, + QEVENT_BLOCK_JOB_READY, QEVENT_DEVICE_TRAY_MOVED, QEVENT_SUSPEND, QEVENT_SUSPEND_DISK, diff --git a/qapi-schema.json b/qapi-schema.json index 9482976..37bbeca 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2037,7 +2037,8 @@ # # Manually trigger completion of an active background block operation. This # is supported for drive mirroring, where it also switches the device to -# write to the target path only. +# write to the target path only. The ability to complete is signaled with +# a BLOCK_JOB_READY event. # # This command completes an active background block operation synchronously. # The ordering of this command's return with the BLOCK_JOB_COMPLETED event -- cgit v1.1 From 893f7ebafe4e8afc0ce4dbd9e64b3752f3036bbb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:23 +0200 Subject: mirror: introduce mirror job This patch adds the implementation of a new job that mirrors a disk to a new image while letting the guest continue using the old image. The target is treated as a "black box" and data is copied from the source to the target in the background. This can be used for several purposes, including storage migration, continuous replication, and observation of the guest I/O in an external program. It is also a first step in replacing the inefficient block migration code that is part of QEMU. The job is possibly never-ending, but it is logically structured into two phases: 1) copy all data as fast as possible until the target first gets in sync with the source; 2) keep target in sync and ensure that reopening to the target gets a correct (full) copy of the source data. The second phase is indicated by the progress in "info block-jobs" reporting the current offset to be equal to the length of the file. When the job is cancelled in the second phase, QEMU will run the job until the source is clean and quiescent, then it will report successful completion of the job. In other words, the BLOCK_JOB_CANCELLED event means that the target may _not_ be consistent with a past state of the source; the BLOCK_JOB_COMPLETED event means that the target is consistent with a past state of the source. (Note that it could already happen that management lost the race against QEMU and got a completion event instead of cancellation). It is not yet possible to complete the job and switch over to the target disk. The next patches will fix this and add many refinements to the basic idea introduced here. These include improved error management, some tunable knobs and performance optimizations. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/Makefile.objs | 1 + block/mirror.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 20 +++++ qapi-schema.json | 17 ++++ trace-events | 7 ++ 5 files changed, 280 insertions(+) create mode 100644 block/mirror.c diff --git a/block/Makefile.objs b/block/Makefile.objs index 554f429..806e526 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -12,3 +12,4 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o common-obj-y += stream.o common-obj-y += commit.o +common-obj-y += mirror.o diff --git a/block/mirror.c b/block/mirror.c new file mode 100644 index 0000000..b353798 --- /dev/null +++ b/block/mirror.c @@ -0,0 +1,235 @@ +/* + * Image mirroring + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "blockjob.h" +#include "block_int.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct MirrorBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *target; + MirrorSyncMode mode; + int64_t sector_num; + uint8_t *buf; +} MirrorBlockJob; + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->common.bs; + BlockDriverState *target = s->target; + QEMUIOVector qiov; + int ret, nb_sectors; + int64_t end; + struct iovec iov; + + end = s->common.len >> BDRV_SECTOR_BITS; + s->sector_num = bdrv_get_next_dirty(source, s->sector_num); + nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); + bdrv_reset_dirty(source, s->sector_num, nb_sectors); + + /* Copy the dirty cluster. */ + iov.iov_base = s->buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + trace_mirror_one_iteration(s, s->sector_num, nb_sectors); + ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + return ret; + } + return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); +} + +static void coroutine_fn mirror_run(void *opaque) +{ + MirrorBlockJob *s = opaque; + BlockDriverState *bs = s->common.bs; + int64_t sector_num, end; + int ret = 0; + int n; + bool synced = false; + + if (block_job_is_cancelled(&s->common)) { + goto immediate_exit; + } + + s->common.len = bdrv_getlength(bs); + if (s->common.len < 0) { + block_job_completed(&s->common, s->common.len); + return; + } + + end = s->common.len >> BDRV_SECTOR_BITS; + s->buf = qemu_blockalign(bs, BLOCK_SIZE); + + if (s->mode != MIRROR_SYNC_MODE_NONE) { + /* First part, loop on the sectors and initialize the dirty bitmap. */ + BlockDriverState *base; + base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; + for (sector_num = 0; sector_num < end; ) { + int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + ret = bdrv_co_is_allocated_above(bs, base, + sector_num, next - sector_num, &n); + + if (ret < 0) { + goto immediate_exit; + } + + assert(n > 0); + if (ret == 1) { + bdrv_set_dirty(bs, sector_num, n); + sector_num = next; + } else { + sector_num += n; + } + } + } + + s->sector_num = -1; + for (;;) { + uint64_t delay_ns; + int64_t cnt; + bool should_complete; + + cnt = bdrv_get_dirty_count(bs); + if (cnt != 0) { + ret = mirror_iteration(s); + if (ret < 0) { + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(bs); + } + + should_complete = false; + if (cnt == 0) { + trace_mirror_before_flush(s); + ret = bdrv_flush(s->target); + if (ret < 0) { + goto immediate_exit; + } + + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + synced = true; + s->common.offset = end * BDRV_SECTOR_SIZE; + should_complete = block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); + } + + if (cnt == 0 && should_complete) { + /* The dirty bitmap is not updated while operations are pending. + * If we're about to exit, wait for pending operations before + * calling bdrv_get_dirty_count(bs), or we may exit while the + * source has dirty data to copy! + * + * Note that I/O can be submitted by the guest while + * mirror_populate runs. + */ + trace_mirror_before_drain(s, cnt); + bdrv_drain_all(); + cnt = bdrv_get_dirty_count(bs); + } + + ret = 0; + trace_mirror_before_sleep(s, cnt, synced); + if (!synced) { + /* Publish progress */ + s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; + + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + } else { + delay_ns = 0; + } + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + } else if (!should_complete) { + delay_ns = (cnt == 0 ? SLICE_TIME : 0); + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + } else if (cnt == 0) { + /* The two disks are in sync. Exit and report successful + * completion. + */ + assert(QLIST_EMPTY(&bs->tracked_requests)); + s->common.cancelled = false; + break; + } + } + +immediate_exit: + g_free(s->buf); + bdrv_set_dirty_tracking(bs, false); + bdrv_close(s->target); + bdrv_delete(s->target); + block_job_completed(&s->common, ret); +} + +static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType mirror_job_type = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = "mirror", + .set_speed = mirror_set_speed, +}; + +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + MirrorBlockJob *s; + + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->target = target; + s->mode = mode; + bdrv_set_dirty_tracking(bs, true); + bdrv_set_enable_write_cache(s->target, true); + s->common.co = qemu_coroutine_create(mirror_run); + trace_mirror_start(bs, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block_int.h b/block_int.h index f4bae04..aaa46a8 100644 --- a/block_int.h +++ b/block_int.h @@ -331,4 +331,24 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/* + * mirror_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @mode: Whether to collapse all images in the chain to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @bs until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/qapi-schema.json b/qapi-schema.json index 37bbeca..8c4b7c8 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1166,6 +1166,23 @@ 'data': ['report', 'ignore', 'enospc', 'stop'] } ## +# @MirrorSyncMode: +# +# An enumeration of possible behaviors for the initial synchronization +# phase of storage mirroring. +# +# @top: copies data in the topmost image to the destination +# +# @full: copies data from all images to the destination +# +# @none: only copy data written from now on +# +# Since: 1.3 +## +{ 'enum': 'MirrorSyncMode', + 'data': ['top', 'full', 'none'] } + +## # @BlockJobInfo: # # Information about a long-running block device operation. diff --git a/trace-events b/trace-events index 9ab8e27..09b5d55 100644 --- a/trace-events +++ b/trace-events @@ -77,6 +77,13 @@ stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" +# block/mirror.c +mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p" +mirror_before_flush(void *s) "s %p" +mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 +mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d" +mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" + # blockdev.c qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" -- cgit v1.1 From d9b902db3fb71fdcdfbb2aa9cc94a410dee864e5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:24 +0200 Subject: qmp: add drive-mirror command This adds the monitor commands that start the mirroring job. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- blockdev.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ hmp-commands.hx | 21 ++++++++++ hmp.c | 28 +++++++++++++ hmp.h | 1 + qapi-schema.json | 34 +++++++++++++++ qmp-commands.hx | 42 +++++++++++++++++++ 6 files changed, 250 insertions(+) diff --git a/blockdev.c b/blockdev.c index 248d5f6..431c678 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1181,6 +1181,130 @@ void qmp_block_commit(const char *device, drive_get_ref(drive_get_by_blockdev(bs)); } +void qmp_drive_mirror(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, Error **errp) +{ + BlockDriverInfo bdi; + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + BlockDriver *proto_drv; + BlockDriver *drv = NULL; + Error *local_err = NULL; + int flags; + uint64_t size; + int ret; + + if (!has_speed) { + speed = 0; + } + if (!has_mode) { + mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + if (!bdrv_is_inserted(bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + return; + } + + if (!has_format) { + format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name; + } + if (format) { + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + } + + if (bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } + + flags = bs->open_flags | BDRV_O_RDWR; + source = bs->backing_hd; + if (!source && sync == MIRROR_SYNC_MODE_TOP) { + sync = MIRROR_SYNC_MODE_FULL; + } + + proto_drv = bdrv_find_protocol(target); + if (!proto_drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + + if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) { + /* create new image w/o backing file */ + assert(format && drv); + bdrv_get_geometry(bs, &size); + size *= 512; + ret = bdrv_img_create(target, format, + NULL, NULL, NULL, size, flags); + } else { + switch (mode) { + case NEW_IMAGE_MODE_EXISTING: + ret = 0; + break; + case NEW_IMAGE_MODE_ABSOLUTE_PATHS: + /* create new image with backing file */ + ret = bdrv_img_create(target, format, + source->filename, + source->drv->format_name, + NULL, -1, flags); + break; + default: + abort(); + } + } + + if (ret) { + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + + target_bs = bdrv_new(""); + ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv); + + if (ret < 0) { + bdrv_delete(target_bs); + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + + /* We need a backing file if we will copy parts of a cluster. */ + if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 && + bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) { + ret = bdrv_open_backing_file(target_bs); + if (ret < 0) { + bdrv_delete(target_bs); + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + } + + mirror_start(bs, target_bs, speed, sync, block_job_cb, bs, &local_err); + if (local_err != NULL) { + bdrv_delete(target_bs); + error_propagate(errp, local_err); + return; + } + + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + static BlockJob *find_block_job(const char *device) { BlockDriverState *bs; diff --git a/hmp-commands.hx b/hmp-commands.hx index fb2f237..f916385 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1004,6 +1004,27 @@ Snapshot device, using snapshot file as target if provided ETEXI { + .name = "drive_mirror", + .args_type = "reuse:-n,full:-f,device:B,target:s,format:s?", + .params = "[-n] [-f] device target [format]", + .help = "initiates live storage\n\t\t\t" + "migration for a device. The device's contents are\n\t\t\t" + "copied to the new image file, including data that\n\t\t\t" + "is written after the command is started.\n\t\t\t" + "The -n flag requests QEMU to reuse the image found\n\t\t\t" + "in new-image-file, instead of recreating it from scratch.\n\t\t\t" + "The -f flag requests QEMU to copy the whole disk,\n\t\t\t" + "so that the result does not need a backing file.\n\t\t\t", + .mhandler.cmd = hmp_drive_mirror, + }, +STEXI +@item drive_mirror +@findex drive_mirror +Start mirroring a block device's writes to a new destination, +using the specified target. +ETEXI + + { .name = "drive_add", .args_type = "pci_addr:s,opts:s", .params = "[[:]:]\n" diff --git a/hmp.c b/hmp.c index 574517a..e530253 100644 --- a/hmp.c +++ b/hmp.c @@ -771,6 +771,34 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &errp); } +void hmp_drive_mirror(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *filename = qdict_get_str(qdict, "target"); + const char *format = qdict_get_try_str(qdict, "format"); + int reuse = qdict_get_try_bool(qdict, "reuse", 0); + int full = qdict_get_try_bool(qdict, "full", 0); + enum NewImageMode mode; + Error *errp = NULL; + + if (!filename) { + error_set(&errp, QERR_MISSING_PARAMETER, "target"); + hmp_handle_error(mon, &errp); + return; + } + + if (reuse) { + mode = NEW_IMAGE_MODE_EXISTING; + } else { + mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + qmp_drive_mirror(device, filename, !!format, format, + full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, + true, mode, false, 0, &errp); + hmp_handle_error(mon, &errp); +} + void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); diff --git a/hmp.h b/hmp.h index 7bdd23c..34eb2b3 100644 --- a/hmp.h +++ b/hmp.h @@ -51,6 +51,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict); void hmp_balloon(Monitor *mon, const QDict *qdict); void hmp_block_resize(Monitor *mon, const QDict *qdict); void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict); +void hmp_drive_mirror(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); diff --git a/qapi-schema.json b/qapi-schema.json index 8c4b7c8..a066cd5 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1606,6 +1606,40 @@ 'data': { 'device': 'str', '*base': 'str', 'top': 'str', '*speed': 'int' } } +## +# @drive-mirror +# +# Start mirroring a block device's writes to a new destination. +# +# @device: the name of the device whose writes should be mirrored. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 1.3 +## +{ 'command': 'drive-mirror', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', + '*speed': 'int' } } + +## # @migrate_cancel # # Cancel the current executing migration process. diff --git a/qmp-commands.hx b/qmp-commands.hx index 4f9c711..614baea 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -936,6 +936,48 @@ Example: EQMP { + .name = "drive-mirror", + .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?", + .mhandler.cmd_new = qmp_marshal_input_drive_mirror, + }, + +SQMP +drive-mirror +------------ + +Start mirroring a block device's writes to a new destination. target +specifies the target of the new image. If the file exists, or if it is +a device, it will be used as the new destination for writes. If it does not +exist, a new file will be created. format specifies the format of the +mirror image, default is to probe if mode='existing', else the format +of the source. + +Arguments: + +- "device": device name to operate on (json-string) +- "target": name of new image file (json-string) +- "format": format of new image (json-string, optional) +- "mode": how an image file should be created into the target + file/device (NewImageMode, optional, default 'absolute-paths') +- "speed": maximum speed of the streaming job, in bytes per second + (json-int) +- "sync": what parts of the disk image should be copied to the destination; + possibilities include "full" for all the disk, "top" for only the sectors + allocated in the topmost image, or "none" to only replicate new I/O + (MirrorSyncMode). + + +Example: + +-> { "execute": "drive-mirror", "arguments": { "device": "ide-hd0", + "target": "/some/place/my-image", + "sync": "full", + "format": "qcow2" } } +<- { "return": {} } + +EQMP + + { .name = "balloon", .args_type = "value:M", .mhandler.cmd_new = qmp_marshal_input_balloon, -- cgit v1.1 From d63ffd87acad618a4a64b8812b64ad88577ae9b1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:25 +0200 Subject: mirror: implement completion Switching to the target of the migration is done mostly asynchronously, and reported to management via the BLOCK_JOB_COMPLETED event; the only synchronous phase is opening the backing files. bdrv_open_backing_file can always be done, even for migration of the full image (aka sync: 'full'). In this case, qmp_drive_mirror will create the target disk with no backing file at all, and bdrv_open_backing_file will be a no-op. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index b353798..6320f6a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,6 +32,8 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; MirrorSyncMode mode; + bool synced; + bool should_complete; int64_t sector_num; uint8_t *buf; } MirrorBlockJob; @@ -70,7 +72,6 @@ static void coroutine_fn mirror_run(void *opaque) int64_t sector_num, end; int ret = 0; int n; - bool synced = false; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; @@ -136,9 +137,14 @@ static void coroutine_fn mirror_run(void *opaque) * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ - synced = true; s->common.offset = end * BDRV_SECTOR_SIZE; - should_complete = block_job_is_cancelled(&s->common); + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(bs); } @@ -157,8 +163,8 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt, synced); - if (!synced) { + trace_mirror_before_sleep(s, cnt, s->synced); + if (!s->synced) { /* Publish progress */ s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; @@ -191,6 +197,12 @@ static void coroutine_fn mirror_run(void *opaque) immediate_exit: g_free(s->buf); bdrv_set_dirty_tracking(bs, false); + if (s->should_complete && ret == 0) { + if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { + bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + } + bdrv_swap(s->target, s->common.bs); + } bdrv_close(s->target); bdrv_delete(s->target); block_job_completed(&s->common, ret); @@ -207,10 +219,33 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } +static void mirror_complete(BlockJob *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + int ret; + + ret = bdrv_open_backing_file(s->target); + if (ret < 0) { + char backing_filename[PATH_MAX]; + bdrv_get_full_backing_filename(s->target, backing_filename, + sizeof(backing_filename)); + error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); + return; + } + if (!s->synced) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + s->should_complete = true; + block_job_resume(job); +} + static BlockJobType mirror_job_type = { .instance_size = sizeof(MirrorBlockJob), .job_type = "mirror", .set_speed = mirror_set_speed, + .complete = mirror_complete, }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, -- cgit v1.1 From 44c7ca5ebfb20c8086ac78f72a1bc6566828f956 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2012 16:39:22 +0200 Subject: qemu-iotests: add mirroring test case Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 362 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/041.out | 5 + tests/qemu-iotests/group | 1 + 3 files changed, 368 insertions(+) create mode 100755 tests/qemu-iotests/041 create mode 100644 tests/qemu-iotests/041.out diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 new file mode 100755 index 0000000..fd9760a --- /dev/null +++ b/tests/qemu-iotests/041 @@ -0,0 +1,362 @@ +#!/usr/bin/env python +# +# Tests for image mirroring. +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import time +import os +import iotests +from iotests import qemu_img, qemu_io +import struct + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img') +test_img = os.path.join(iotests.test_dir, 'test.img') +target_img = os.path.join(iotests.test_dir, 'target.img') + +class ImageMirroringTestCase(iotests.QMPTestCase): + '''Abstract base class for image mirroring test cases''' + + def assert_no_active_mirrors(self): + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return', []) + + def cancel_and_wait(self, drive='drive0', wait_ready=True): + '''Cancel a block job and wait for it to finish''' + if wait_ready: + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + ready = True + + result = self.vm.qmp('block-job-cancel', device=drive, + force=not wait_ready) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED' or \ + event['event'] == 'BLOCK_JOB_CANCELLED': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + if wait_ready: + self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + cancelled = True + + self.assert_no_active_mirrors() + + def complete_and_wait(self, drive='drive0', wait_ready=True): + '''Complete a block job and wait for it to finish''' + if wait_ready: + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + ready = True + + result = self.vm.qmp('block-job-complete', device=drive) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + + def create_image(self, name, size): + file = open(name, 'w') + i = 0 + while i < size: + sector = struct.pack('>l504xl', i / 512, i / 512) + file.write(sector) + i = i + 512 + file.close() + + def compare_images(self, img1, img2): + try: + qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img1, img1 + '.raw') + qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img2, img2 + '.raw') + file1 = open(img1 + '.raw', 'r') + file2 = open(img2 + '.raw', 'r') + return file1.read() == file2.read() + finally: + if file1 is not None: + file1.close() + if file2 is not None: + file2.close() + try: + os.remove(img1 + '.raw') + except OSError: + pass + try: + os.remove(img2 + '.raw') + except OSError: + pass + +class TestSingleDrive(ImageMirroringTestCase): + image_len = 1 * 1024 * 1024 # MB + + def setUp(self): + self.create_image(backing_img, TestSingleDrive.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + try: + os.remove(target_img) + except OSError: + pass + + def test_complete(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_cancel(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait(wait_ready=False) + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + + def test_cancel_after_ready(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_pause(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_large_cluster(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s' + % (TestSingleDrive.image_len, backing_img), target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_medium_not_found(self): + result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', + target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): + result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full', + target=target_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + +class TestMirrorNoBacking(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + def complete_and_wait(self, drive='drive0', wait_ready=True): + self.create_image(target_backing_img, TestMirrorNoBacking.image_len) + return ImageMirroringTestCase.complete_and_wait(self, drive, wait_ready) + + def compare_images(self, img1, img2): + self.create_image(target_backing_img, TestMirrorNoBacking.image_len) + return ImageMirroringTestCase.compare_images(self, img1, img2) + + def setUp(self): + self.create_image(backing_img, TestMirrorNoBacking.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(target_backing_img) + os.remove(target_img) + + def test_complete(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_cancel(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + +class TestSetSpeed(ImageMirroringTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(target_img) + + def test_set_speed(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + # Default speed is 0 + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 0) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024) + + self.cancel_and_wait() + + # Check setting speed in drive-mirror works + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, speed=4*1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024) + + self.cancel_and_wait() + + def test_set_speed_invalid(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + self.cancel_and_wait() + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out new file mode 100644 index 0000000..281b69e --- /dev/null +++ b/tests/qemu-iotests/041.out @@ -0,0 +1,5 @@ +............ +---------------------------------------------------------------------- +Ran 12 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7407492..ac86f54 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -47,5 +47,6 @@ 038 rw auto backing 039 rw auto 040 rw auto +041 rw auto backing 042 rw auto quick 043 rw auto backing -- cgit v1.1 From 3bd293c3fdc8b4052b9fc357e0b28cba20e73099 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:27 +0200 Subject: iostatus: forward block_job_iostatus_reset to block job Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 3 +++ blockjob.c | 3 +++ blockjob.h | 6 +++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index ad5e240..1564137 100644 --- a/block.c +++ b/block.c @@ -4369,6 +4369,9 @@ void bdrv_iostatus_reset(BlockDriverState *bs) { if (bdrv_iostatus_is_enabled(bs)) { bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + if (bs->job) { + block_job_iostatus_reset(bs->job); + } } } diff --git a/blockjob.c b/blockjob.c index fbb7e1c..cda12c6 100644 --- a/blockjob.c +++ b/blockjob.c @@ -142,6 +142,9 @@ bool block_job_is_cancelled(BlockJob *job) void block_job_iostatus_reset(BlockJob *job) { job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + if (job->job_type->iostatus_reset) { + job->job_type->iostatus_reset(job); + } } struct BlockCancelData { diff --git a/blockjob.h b/blockjob.h index fb2392e..3792b73 100644 --- a/blockjob.h +++ b/blockjob.h @@ -42,6 +42,9 @@ typedef struct BlockJobType { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); + /** Optional callback for job types that need to forward I/O status reset */ + void (*iostatus_reset)(BlockJob *job); + /** * Optional callback for job types whose completion must be triggered * manually. @@ -253,7 +256,8 @@ int block_job_cancel_sync(BlockJob *job); * block_job_iostatus_reset: * @job: The job whose I/O status should be reset. * - * Reset I/O status on @job. + * Reset I/O status on @job and on BlockDriverState objects it uses, + * other than job->bs. */ void block_job_iostatus_reset(BlockJob *job); -- cgit v1.1 From b952b5589a36114e06201c0d2e82c293dbad2b1f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:28 +0200 Subject: mirror: add support for on-source-error/on-target-error Error management is important for mirroring; otherwise, an error on the target (even something as "innocent" as ENOSPC) requires to start again with a full copy. Similar to on_read_error/on_write_error, two separate knobs are provided for on_source_error (reads) and on_target_error (writes). The default is 'report' for both. The 'ignore' policy will leave the sector dirty, so that it will be retried later. Thus, it will not cause corruption. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 94 +++++++++++++++++++++++++++++++++++++++++++------------- block_int.h | 4 +++ blockdev.c | 14 +++++++-- hmp.c | 3 +- qapi-schema.json | 11 ++++++- qmp-commands.hx | 8 ++++- 6 files changed, 108 insertions(+), 26 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 6320f6a..d6618a4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,13 +32,28 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; MirrorSyncMode mode; + BlockdevOnError on_source_error, on_target_error; bool synced; bool should_complete; int64_t sector_num; uint8_t *buf; } MirrorBlockJob; -static int coroutine_fn mirror_iteration(MirrorBlockJob *s) +static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, + int error) +{ + s->synced = false; + if (read) { + return block_job_error_action(&s->common, s->common.bs, + s->on_source_error, true, error); + } else { + return block_job_error_action(&s->common, s->target, + s->on_target_error, false, error); + } +} + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s, + BlockErrorAction *p_action) { BlockDriverState *source = s->common.bs; BlockDriverState *target = s->target; @@ -60,9 +75,21 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_one_iteration(s, s->sector_num, nb_sectors); ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); if (ret < 0) { - return ret; + *p_action = mirror_error_action(s, true, -ret); + goto fail; + } + ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, false, -ret); + s->synced = false; + goto fail; } - return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + return 0; + +fail: + /* Try again later. */ + bdrv_set_dirty(source, s->sector_num, nb_sectors); + return ret; } static void coroutine_fn mirror_run(void *opaque) @@ -117,8 +144,9 @@ static void coroutine_fn mirror_run(void *opaque) cnt = bdrv_get_dirty_count(bs); if (cnt != 0) { - ret = mirror_iteration(s); - if (ret < 0) { + BlockErrorAction action = BDRV_ACTION_REPORT; + ret = mirror_iteration(s, &action); + if (ret < 0 && action == BDRV_ACTION_REPORT) { goto immediate_exit; } cnt = bdrv_get_dirty_count(bs); @@ -129,23 +157,25 @@ static void coroutine_fn mirror_run(void *opaque) trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { - goto immediate_exit; - } - - /* We're out of the streaming phase. From now on, if the job - * is cancelled we will actually complete all pending I/O and - * report completion. This way, block-job-cancel will leave - * the target in a consistent state. - */ - s->common.offset = end * BDRV_SECTOR_SIZE; - if (!s->synced) { - block_job_ready(&s->common); - s->synced = true; + if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + } else { + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + s->common.offset = end * BDRV_SECTOR_SIZE; + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); } - - should_complete = s->should_complete || - block_job_is_cancelled(&s->common); - cnt = bdrv_get_dirty_count(bs); } if (cnt == 0 && should_complete) { @@ -197,6 +227,7 @@ static void coroutine_fn mirror_run(void *opaque) immediate_exit: g_free(s->buf); bdrv_set_dirty_tracking(bs, false); + bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); @@ -219,6 +250,13 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } +static void mirror_iostatus_reset(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -245,25 +283,39 @@ static BlockJobType mirror_job_type = { .instance_size = sizeof(MirrorBlockJob), .job_type = "mirror", .set_speed = mirror_set_speed, + .iostatus_reset= mirror_iostatus_reset, .complete = mirror_complete, }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { MirrorBlockJob *s; + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); if (!s) { return; } + s->on_source_error = on_source_error; + s->on_target_error = on_target_error; s->target = target; s->mode = mode; bdrv_set_dirty_tracking(bs, true); bdrv_set_enable_write_cache(s->target, true); + bdrv_set_on_error(s->target, on_target_error, on_target_error); + bdrv_iostatus_enable(s->target); s->common.co = qemu_coroutine_create(mirror_run); trace_mirror_start(bs, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block_int.h b/block_int.h index aaa46a8..00204eb 100644 --- a/block_int.h +++ b/block_int.h @@ -337,6 +337,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @mode: Whether to collapse all images in the chain to the target. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -348,6 +350,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/blockdev.c b/blockdev.c index 431c678..a068a4b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1185,7 +1185,10 @@ void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, - bool has_speed, int64_t speed, Error **errp) + bool has_speed, int64_t speed, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) { BlockDriverInfo bdi; BlockDriverState *bs; @@ -1200,6 +1203,12 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_speed) { speed = 0; } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } if (!has_mode) { mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } @@ -1292,7 +1301,8 @@ void qmp_drive_mirror(const char *device, const char *target, } } - mirror_start(bs, target_bs, speed, sync, block_job_cb, bs, &local_err); + mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_delete(target_bs); error_propagate(errp, local_err); diff --git a/hmp.c b/hmp.c index e530253..4a458ac 100644 --- a/hmp.c +++ b/hmp.c @@ -795,7 +795,8 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) qmp_drive_mirror(device, filename, !!format, format, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, - true, mode, false, 0, &errp); + true, mode, false, 0, + false, 0, false, 0, &errp); hmp_handle_error(mon, &errp); } diff --git a/qapi-schema.json b/qapi-schema.json index a066cd5..6aa443e 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1629,6 +1629,14 @@ # (all the disk, only the sectors allocated in the topmost image, or # only new I/O). # +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound # @@ -1637,7 +1645,8 @@ { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int' } } + '*speed': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } ## # @migrate_cancel diff --git a/qmp-commands.hx b/qmp-commands.hx index 614baea..c31312f 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -937,7 +937,8 @@ EQMP { .name = "drive-mirror", - .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?", + .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," + "on-source-error:s?,on-target-error:s?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -965,6 +966,11 @@ Arguments: possibilities include "full" for all the disk, "top" for only the sectors allocated in the topmost image, or "none" to only replicate new I/O (MirrorSyncMode). +- "on-source-error": the action to take on an error on the source + (BlockdevOnError, default 'report') +- "on-target-error": the action to take on an error on the target + (BlockdevOnError, default 'report') + Example: -- cgit v1.1 From 9eb80eadd498fae2ad9eecbb89646b0cc4929cac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:29 +0200 Subject: qmp: add pull_event function This function is unlike get_events in that it makes it easy to process one event at a time. This is useful in the mirroring test cases, where we want to process just one event (BLOCK_JOB_ERROR) and leave the others to a helper function. Acked-by: Luiz Capitulino Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- QMP/qmp.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/QMP/qmp.py b/QMP/qmp.py index 32510a1..c551df1 100644 --- a/QMP/qmp.py +++ b/QMP/qmp.py @@ -136,6 +136,26 @@ class QEMUMonitorProtocol: raise Exception(ret['error']['desc']) return ret['return'] + def pull_event(self, wait=False): + """ + Get and delete the first available QMP event. + + @param wait: block until an event is available (bool) + """ + self.__sock.setblocking(0) + try: + self.__json_read() + except socket.error, err: + if err[0] == errno.EAGAIN: + # No data available + pass + self.__sock.setblocking(1) + if not self.__events and wait: + self.__json_read(only_event=True) + event = self.__events[0] + del self.__events[0] + return event + def get_events(self, wait=False): """ Get a list of available QMP events. -- cgit v1.1 From 9dfa9f593045d70572ebfd94f59cd33ec99e847c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2012 16:39:23 +0200 Subject: qemu-iotests: add testcases for mirroring on-source-error/on-target-error The new options are tested with blkdebug on both the source and the target. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 253 ++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/041.out | 4 +- tests/qemu-iotests/iotests.py | 4 + 3 files changed, 259 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index fd9760a..c6eb851 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -292,6 +292,259 @@ class TestMirrorNoBacking(ImageMirroringTestCase): self.assertTrue(self.compare_images(test_img, target_img), 'target image does not match source after mirroring') +class TestReadErrors(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should be a multiple of twice the default granularity + # so that we hit this offset first in state 1 + MIRROR_GRANULARITY = 1024 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event)) + file.close() + + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestReadErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(False, 'job completed unexpectedly') + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + self.vm.shutdown() + + def test_ignore_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, on_source_error='ignore') + self.assert_qmp(result, 'return', {}) + + event = self.vm.get_qmp_event(wait=True) + self.assertEquals(event['event'], 'BLOCK_JOB_ERROR') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.complete_and_wait() + self.vm.shutdown() + + def test_stop_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, on_source_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/device', 'drive0') + ready = True + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + + self.complete_and_wait(wait_ready=False) + self.assert_no_active_mirrors() + self.vm.shutdown() + +class TestWriteErrors(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should be a multiple of twice the default granularity + # so that we hit this offset first in state 1 + MIRROR_GRANULARITY = 1024 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event)) + file.close() + + def setUp(self): + self.blkdebug_file = target_img + ".blkdebug" + self.create_image(backing_img, TestWriteErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "write_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, '-obacking_file=%s' %(backing_img), test_img) + self.vm = iotests.VM().add_drive(test_img) + self.target_img = 'blkdebug:%s:%s' % (self.blkdebug_file, target_img) + qemu_img('create', '-f', iotests.imgfmt, '-osize=%d' %(TestWriteErrors.image_len), target_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img) + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(False, 'job completed unexpectedly') + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + self.vm.shutdown() + + def test_ignore_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img, + on_target_error='ignore') + self.assert_qmp(result, 'return', {}) + + event = self.vm.get_qmp_event(wait=True) + self.assertEquals(event['event'], 'BLOCK_JOB_ERROR') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.complete_and_wait() + self.vm.shutdown() + + def test_stop_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img, + on_target_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/device', 'drive0') + ready = True + + self.complete_and_wait(wait_ready=False) + self.assert_no_active_mirrors() + self.vm.shutdown() + class TestSetSpeed(ImageMirroringTestCase): image_len = 80 * 1024 * 1024 # MB diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index 281b69e..71009c2 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -............ +.................. ---------------------------------------------------------------------- -Ran 12 tests +Ran 18 tests OK diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 3c60b2d..735c674 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -106,6 +106,10 @@ class VM(object): return self._qmp.cmd(cmd, args=qmp_args) + def get_qmp_event(self, wait=False): + '''Poll for one queued QMP events and return it''' + return self._qmp.pull_event(wait=wait) + def get_qmp_events(self, wait=False): '''Poll for queued QMP events and return a list of dicts''' events = self._qmp.get_events(wait=wait) -- cgit v1.1 From 3b6eda2f57a5b7ed047077b6272c2b5a9e3531ca Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Thu, 18 Oct 2012 16:41:04 -0400 Subject: osdep: Less restrictive F_SEFL in qemu_dup_flags() qemu_dup_flags() currently limits the flags that can be set on the fcntl() F_SETFL call to those that we currently know can be set with fcntl() F_SETFL. The problem with this is that it will prevent use of new flags in the future without a code update. This patch relaxes the checking and lets fcntl() determine the flags it can set. Signed-off-by: Corey Bryant Signed-off-by: Kevin Wolf --- osdep.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/osdep.c b/osdep.c index 3b25297..c822a01 100644 --- a/osdep.c +++ b/osdep.c @@ -88,7 +88,6 @@ static int qemu_dup_flags(int fd, int flags) int ret; int serrno; int dup_flags; - int setfl_flags; #ifdef F_DUPFD_CLOEXEC ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); @@ -113,16 +112,7 @@ static int qemu_dup_flags(int fd, int flags) } /* Set/unset flags that we can with fcntl */ - setfl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; -#ifdef O_NOATIME - setfl_flags |= O_NOATIME; -#endif -#ifdef O_DIRECT - setfl_flags |= O_DIRECT; -#endif - dup_flags &= ~setfl_flags; - dup_flags |= (flags & setfl_flags); - if (fcntl(ret, F_SETFL, dup_flags) == -1) { + if (fcntl(ret, F_SETFL, flags) == -1) { goto fail; } -- cgit v1.1