diff options
-rw-r--r-- | block.c | 63 | ||||
-rw-r--r-- | block/backup.c | 1 | ||||
-rw-r--r-- | block/block-backend.c | 69 | ||||
-rw-r--r-- | block/commit.c | 2 | ||||
-rw-r--r-- | block/file-posix.c | 36 | ||||
-rw-r--r-- | block/mirror.c | 7 | ||||
-rw-r--r-- | blockjob.c | 3 | ||||
-rw-r--r-- | hw/block/hd-geometry.c | 7 | ||||
-rw-r--r-- | include/sysemu/block-backend.h | 3 | ||||
-rw-r--r-- | qemu-deprecated.texi | 7 | ||||
-rw-r--r-- | qemu-img.c | 5 | ||||
-rwxr-xr-x | tests/qemu-iotests/118 | 84 | ||||
-rw-r--r-- | tests/qemu-iotests/118.out | 4 | ||||
-rwxr-xr-x | tests/qemu-iotests/234 | 30 | ||||
-rwxr-xr-x | tests/qemu-iotests/258 | 163 | ||||
-rw-r--r-- | tests/qemu-iotests/258.out | 33 | ||||
-rwxr-xr-x | tests/qemu-iotests/262 | 82 | ||||
-rw-r--r-- | tests/qemu-iotests/262.out | 17 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 2 | ||||
-rw-r--r-- | tests/qemu-iotests/iotests.py | 16 | ||||
-rw-r--r-- | tests/test-bdrv-drain.c | 476 |
21 files changed, 983 insertions, 127 deletions
@@ -2169,16 +2169,8 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - if (c == NULL) { - *nperm = perm & DEFAULT_PERM_PASSTHROUGH; - *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; - return; - } - - *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | - (c->perm & DEFAULT_PERM_UNCHANGED); - *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | - (c->shared_perm & DEFAULT_PERM_UNCHANGED); + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; } void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, @@ -2239,13 +2231,27 @@ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; - int i; + int new_bs_quiesce_counter; + int drain_saldo; assert(!child->frozen); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } + + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; + + /* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ + while (drain_saldo > 0 && child->role->drained_begin) { + bdrv_parent_drained_begin_single(child, true); + drain_saldo--; + } + if (old_bs) { /* Detach first so that the recursive drain sections coming from @child * are already gone and we only end the drain sections that came from @@ -2253,28 +2259,22 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (child->role->detach) { child->role->detach(child); } - while (child->parent_quiesce_counter) { - bdrv_parent_drained_end_single(child); - } QLIST_REMOVE(child, next_parent); - } else { - assert(child->parent_quiesce_counter == 0); } child->bs = new_bs; if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - if (new_bs->quiesce_counter) { - int num = new_bs->quiesce_counter; - if (child->role->parent_is_bds) { - num -= bdrv_drain_all_count; - } - assert(num >= 0); - for (i = 0; i < num; i++) { - bdrv_parent_drained_begin_single(child, true); - } - } + + /* + * Detaching the old node may have led to the new node's + * quiesce_counter having been decreased. Not a problem, we + * just need to recognize this here and then invoke + * drained_end appropriately more often. + */ + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; /* Attach only after starting new drained sections, so that recursive * drain sections coming from @child don't get an extra .drained_begin @@ -2283,6 +2283,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child, child->role->attach(child); } } + + /* + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been attached. + */ + while (drain_saldo < 0 && child->role->drained_end) { + bdrv_parent_drained_end_single(child); + drain_saldo++; + } } /* @@ -4500,6 +4509,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, int ret = -EIO; bdrv_ref(top); + bdrv_subtree_drained_begin(top); if (!top->drv || !base->drv) { goto exit; @@ -4571,6 +4581,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, ret = 0; exit: + bdrv_subtree_drained_end(top); bdrv_unref(top); return ret; } diff --git a/block/backup.c b/block/backup.c index b26c22c..4743c8f 100644 --- a/block/backup.c +++ b/block/backup.c @@ -644,6 +644,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto error; } + blk_set_disable_request_queuing(job->target, true); job->on_source_error = on_source_error; job->on_target_error = on_target_error; diff --git a/block/block-backend.c b/block/block-backend.c index 84e76bf..1c605d5 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -81,6 +81,9 @@ struct BlockBackend { QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; int quiesce_counter; + CoQueue queued_requests; + bool disable_request_queuing; + VMChangeStateEntry *vmsh; bool force_allow_inactivate; @@ -341,6 +344,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) block_acct_init(&blk->stats); + qemu_co_queue_init(&blk->queued_requests); notifier_list_init(&blk->remove_bs_notifiers); notifier_list_init(&blk->insert_bs_notifiers); QLIST_INIT(&blk->aio_notifiers); @@ -1098,6 +1102,11 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) blk->allow_aio_context_change = allow; } +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) +{ + blk->disable_request_queuing = disable; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -1129,13 +1138,24 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return 0; } +static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) +{ + if (blk->quiesce_counter && !blk->disable_request_queuing) { + qemu_co_queue_wait(&blk->queued_requests, NULL); + } +} + int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret; - BlockDriverState *bs = blk_bs(blk); + BlockDriverState *bs; + + blk_wait_while_drained(blk); + /* Call blk_bs() only after waiting, the graph may have changed */ + bs = blk_bs(blk); trace_blk_co_preadv(blk, bs, offset, bytes, flags); ret = blk_check_byte_request(blk, offset, bytes); @@ -1161,8 +1181,12 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; - BlockDriverState *bs = blk_bs(blk); + BlockDriverState *bs; + + blk_wait_while_drained(blk); + /* Call blk_bs() only after waiting, the graph may have changed */ + bs = blk_bs(blk); trace_blk_co_pwritev(blk, bs, offset, bytes, flags); ret = blk_check_byte_request(blk, offset, bytes); @@ -1239,22 +1263,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, return rwco.ret; } -int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, - int count) -{ - int ret; - - ret = blk_check_byte_request(blk, offset, count); - if (ret < 0) { - return ret; - } - - blk_root_drained_begin(blk->root); - ret = blk_pread(blk, offset, buf, count); - blk_root_drained_end(blk->root, NULL); - return ret; -} - int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1367,6 +1375,12 @@ static void blk_aio_read_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; + if (rwco->blk->quiesce_counter) { + blk_dec_in_flight(rwco->blk); + blk_wait_while_drained(rwco->blk); + blk_inc_in_flight(rwco->blk); + } + assert(qiov->size == acb->bytes); rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, qiov, rwco->flags); @@ -1379,6 +1393,12 @@ static void blk_aio_write_entry(void *opaque) BlkRwCo *rwco = &acb->rwco; QEMUIOVector *qiov = rwco->iobuf; + if (rwco->blk->quiesce_counter) { + blk_dec_in_flight(rwco->blk); + blk_wait_while_drained(rwco->blk); + blk_inc_in_flight(rwco->blk); + } + assert(!qiov || qiov->size == acb->bytes); rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, qiov, rwco->flags); @@ -1500,6 +1520,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb) int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { + blk_wait_while_drained(blk); + if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -1540,7 +1562,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) { - int ret = blk_check_byte_request(blk, offset, bytes); + int ret; + + blk_wait_while_drained(blk); + + ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } @@ -1550,6 +1576,8 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) int blk_co_flush(BlockBackend *blk) { + blk_wait_while_drained(blk); + if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2250,6 +2278,9 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) if (blk->dev_ops && blk->dev_ops->drained_end) { blk->dev_ops->drained_end(blk->dev_opaque); } + while (qemu_co_enter_next(&blk->queued_requests, NULL)) { + /* Resume all queued requests */ + } } } diff --git a/block/commit.c b/block/commit.c index 2c5a6d4..408ae15 100644 --- a/block/commit.c +++ b/block/commit.c @@ -350,6 +350,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto fail; } + blk_set_disable_request_queuing(s->base, true); s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ @@ -358,6 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto fail; } + blk_set_disable_request_queuing(s->top, true); s->backing_file_str = g_strdup(backing_file_str); s->on_error = on_error; diff --git a/block/file-posix.c b/block/file-posix.c index 4479cc7..b8b4dad 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -323,6 +323,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) BDRVRawState *s = bs->opaque; char *buf; size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); + size_t alignments[] = {1, 512, 1024, 2048, 4096}; /* For SCSI generic devices the alignment is not really used. With buffered I/O, we don't have any restrictions. */ @@ -349,25 +350,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) } #endif - /* If we could not get the sizes so far, we can only guess them */ - if (!s->buf_align) { + /* + * If we could not get the sizes so far, we can only guess them. First try + * to detect request alignment, since it is more likely to succeed. Then + * try to detect buf_align, which cannot be detected in some cases (e.g. + * Gluster). If buf_align cannot be detected, we fallback to the value of + * request_alignment. + */ + + if (!bs->bl.request_alignment) { + int i; size_t align; - buf = qemu_memalign(max_align, 2 * max_align); - for (align = 512; align <= max_align; align <<= 1) { - if (raw_is_io_aligned(fd, buf + align, max_align)) { - s->buf_align = align; + buf = qemu_memalign(max_align, max_align); + for (i = 0; i < ARRAY_SIZE(alignments); i++) { + align = alignments[i]; + if (raw_is_io_aligned(fd, buf, align)) { + /* Fallback to safe value. */ + bs->bl.request_alignment = (align != 1) ? align : max_align; break; } } qemu_vfree(buf); } - if (!bs->bl.request_alignment) { + if (!s->buf_align) { + int i; size_t align; - buf = qemu_memalign(s->buf_align, max_align); - for (align = 512; align <= max_align; align <<= 1) { - if (raw_is_io_aligned(fd, buf, align)) { - bs->bl.request_alignment = align; + buf = qemu_memalign(max_align, 2 * max_align); + for (i = 0; i < ARRAY_SIZE(alignments); i++) { + align = alignments[i]; + if (raw_is_io_aligned(fd, buf + align, max_align)) { + /* Fallback to request_aligment. */ + s->buf_align = (align != 1) ? align : bs->bl.request_alignment; break; } } diff --git a/block/mirror.c b/block/mirror.c index 9f5c59e..9b36391 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -656,7 +656,10 @@ static int mirror_exit_common(Job *job) s->target = NULL; /* We don't access the source any more. Dropping any WRITE/RESIZE is - * required before it could become a backing file of target_bs. */ + * required before it could become a backing file of target_bs. Not having + * these permissions any more means that we can't allow any new requests on + * mirror_top_bs from now on, so keep it drained. */ + bdrv_drained_begin(mirror_top_bs); bs_opaque->stop = true; bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, &error_abort); @@ -724,6 +727,7 @@ static int mirror_exit_common(Job *job) bs_opaque->job = NULL; bdrv_drained_end(src); + bdrv_drained_end(mirror_top_bs); s->in_drain = false; bdrv_unref(mirror_top_bs); bdrv_unref(src); @@ -1632,6 +1636,7 @@ static BlockJob *mirror_start_job( blk_set_force_allow_inactivate(s->target); } blk_set_allow_aio_context_change(s->target, true); + blk_set_disable_request_queuing(s->target, true); s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; @@ -446,6 +446,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + /* Disable request queuing in the BlockBackend to avoid deadlocks on drain: + * The job reports that it's busy until it reaches a pause point. */ + blk_set_disable_request_queuing(blk, true); blk_set_allow_aio_context_change(blk, true); /* Only set speed when necessary to avoid NotSupported error */ diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c index 79384a2..dcbccee 100644 --- a/hw/block/hd-geometry.c +++ b/hw/block/hd-geometry.c @@ -63,12 +63,7 @@ static int guess_disk_lchs(BlockBackend *blk, blk_get_geometry(blk, &nb_sectors); - /** - * The function will be invoked during startup not only in sync I/O mode, - * but also in async I/O mode. So the I/O throttling function has to - * be disabled temporarily here, not permanently. - */ - if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) { + if (blk_pread(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) { return -1; } /* test msdos magic */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 733c495..368d53a 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -104,6 +104,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); void blk_iostatus_enable(BlockBackend *blk); bool blk_iostatus_is_enabled(const BlockBackend *blk); BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); @@ -117,8 +118,6 @@ char *blk_get_attached_dev_id(BlockBackend *blk); BlockBackend *blk_by_dev(void *dev); BlockBackend *blk_by_qdev_id(const char *id, Error **errp); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); -int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, - int bytes); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index fff07bb..f7680c0 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -305,6 +305,13 @@ to just export the entire image and then mount only /dev/nbd0p1 than it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a subset of the image. +@subsection qemu-img convert -n -o (since 4.2.0) + +All options specified in @option{-o} are image creation options, so +they have no effect when used with @option{-n} to skip image creation. +Silently ignored options can be confusing, so this combination of +options will be made an error in future versions. + @section Build system @subsection Python 2 support (since 4.1.0) @@ -2231,6 +2231,11 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (skip_create && options) { + warn_report("-o has no effect when skipping image creation"); + warn_report("This will become an error in future QEMU versions."); + } + s.src_num = argc - optind - 1; out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL; diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 index 499c5f0..6f45779 100755 --- a/tests/qemu-iotests/118 +++ b/tests/qemu-iotests/118 @@ -33,6 +33,8 @@ def interface_to_device_name(interface): return 'ide-cd' elif interface == 'floppy': return 'floppy' + elif interface == 'scsi': + return 'scsi-cd' else: return None @@ -40,10 +42,14 @@ class ChangeBaseClass(iotests.QMPTestCase): has_opened = False has_closed = False + device_name = 'qdev0' + use_drive = False + def process_events(self): for event in self.vm.get_qmp_events(wait=False): if (event['event'] == 'DEVICE_TRAY_MOVED' and - event['data']['device'] == 'drive0'): + (event['data']['device'] == 'drive0' or + event['data']['id'] == self.device_name)): if event['data']['tray-open'] == False: self.has_closed = True else: @@ -67,9 +73,11 @@ class ChangeBaseClass(iotests.QMPTestCase): class GeneralChangeTestsBaseClass(ChangeBaseClass): - device_name = 'qdev0' - def test_change(self): + # 'change' requires a drive name, so skip the test for blockdev + if not self.use_drive: + return + result = self.vm.qmp('change', device='drive0', target=new_img, arg=iotests.imgfmt) self.assert_qmp(result, 'return', {}) @@ -292,13 +300,21 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass): class TestInitiallyFilled(GeneralChangeTestsBaseClass): was_empty = False - def setUp(self, media, interface): + def setUp(self): qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k') qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') self.vm = iotests.VM() - self.vm.add_drive(old_img, 'media=%s' % media, 'none') + if self.use_drive: + self.vm.add_drive(old_img, 'media=%s' % self.media, 'none') + else: + self.vm.add_blockdev([ 'node-name=drive0', + 'driver=%s' % iotests.imgfmt, + 'file.driver=file', + 'file.filename=%s' % old_img ]) + if self.interface == 'scsi': + self.vm.add_device('virtio-scsi-pci') self.vm.add_device('%s,drive=drive0,id=%s' % - (interface_to_device_name(interface), + (interface_to_device_name(self.interface), self.device_name)) self.vm.launch() @@ -327,11 +343,16 @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass): class TestInitiallyEmpty(GeneralChangeTestsBaseClass): was_empty = True - def setUp(self, media, interface): + def setUp(self): qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k') - self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none') - self.vm.add_device('%s,drive=drive0,id=%s' % - (interface_to_device_name(interface), + self.vm = iotests.VM() + if self.use_drive: + self.vm.add_drive(None, 'media=%s' % self.media, 'none') + if self.interface == 'scsi': + self.vm.add_device('virtio-scsi-pci') + self.vm.add_device('%s,%sid=%s' % + (interface_to_device_name(self.interface), + 'drive=drive0,' if self.use_drive else '', self.device_name)) self.vm.launch() @@ -349,36 +370,25 @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass): # Should be a no-op self.assert_qmp(result, 'return', {}) -class TestCDInitiallyFilled(TestInitiallyFilled): - TestInitiallyFilled = TestInitiallyFilled - has_real_tray = True - - def setUp(self): - self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide') +# Do this in a function to avoid leaking variables like case into the global +# name space (otherwise tests would be run for the abstract base classes) +def create_basic_test_classes(): + for (media, interface, has_real_tray) in [ ('cdrom', 'ide', True), + ('cdrom', 'scsi', True), + ('disk', 'floppy', False) ]: -class TestCDInitiallyEmpty(TestInitiallyEmpty): - TestInitiallyEmpty = TestInitiallyEmpty - has_real_tray = True - - def setUp(self): - self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide') + for case in [ TestInitiallyFilled, TestInitiallyEmpty ]: + for use_drive in [ True, False ]: + attr = { 'media': media, + 'interface': interface, + 'has_real_tray': has_real_tray, + 'use_drive': use_drive } -class TestFloppyInitiallyFilled(TestInitiallyFilled): - TestInitiallyFilled = TestInitiallyFilled - has_real_tray = False + name = '%s_%s_%s_%s' % (case.__name__, media, interface, + 'drive' if use_drive else 'blockdev') + globals()[name] = type(name, (case, ), attr) - def setUp(self): - self.TestInitiallyFilled.setUp(self, 'disk', 'floppy') - -class TestFloppyInitiallyEmpty(TestInitiallyEmpty): - TestInitiallyEmpty = TestInitiallyEmpty - has_real_tray = False - - def setUp(self): - self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy') - # FDDs not having a real tray and there not being a medium inside the - # tray at startup means the tray will be considered open - self.has_opened = True +create_basic_test_classes() class TestChangeReadOnly(ChangeBaseClass): device_name = 'qdev0' diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out index 4823c11..bf5bfd5 100644 --- a/tests/qemu-iotests/118.out +++ b/tests/qemu-iotests/118.out @@ -1,5 +1,5 @@ -............................................................... +....................................................................................................................................................................... ---------------------------------------------------------------------- -Ran 63 tests +Ran 167 tests OK diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 index c4c26bc..34c818c 100755 --- a/tests/qemu-iotests/234 +++ b/tests/qemu-iotests/234 @@ -26,22 +26,6 @@ import os iotests.verify_image_format(supported_fmts=['qcow2']) iotests.verify_platform(['linux']) -def enable_migration_events(vm, name): - iotests.log('Enabling migration QMP events on %s...' % name) - iotests.log(vm.qmp('migrate-set-capabilities', capabilities=[ - { - 'capability': 'events', - 'state': True - } - ])) - -def wait_migration(vm): - while True: - event = vm.event_wait('MIGRATION') - iotests.log(event, filters=[iotests.filter_qmp_event]) - if event['data']['status'] == 'completed': - break - with iotests.FilePath('img') as img_path, \ iotests.FilePath('backing') as backing_path, \ iotests.FilePath('mig_fifo_a') as fifo_a, \ @@ -62,7 +46,7 @@ with iotests.FilePath('img') as img_path, \ .add_blockdev('%s,file=drive0-backing-file,node-name=drive0-backing' % (iotests.imgfmt)) .launch()) - enable_migration_events(vm_a, 'A') + vm_a.enable_migration_events('A') iotests.log('Launching destination VM...') (vm_b.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path)) @@ -72,7 +56,7 @@ with iotests.FilePath('img') as img_path, \ .add_incoming("exec: cat '%s'" % (fifo_a)) .launch()) - enable_migration_events(vm_b, 'B') + vm_b.enable_migration_events('B') # Add a child node that was created after the parent node. The reverse case # is covered by the -blockdev options above. @@ -85,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) with iotests.Timeout(3, 'Migration does not complete'): # Wait for the source first (which includes setup=setup) - wait_migration(vm_a) + vm_a.wait_migration() # Wait for the destination second (which does not) - wait_migration(vm_b) + vm_b.wait_migration() iotests.log(vm_a.qmp('query-migrate')['return']['status']) iotests.log(vm_b.qmp('query-migrate')['return']['status']) @@ -105,7 +89,7 @@ with iotests.FilePath('img') as img_path, \ .add_incoming("exec: cat '%s'" % (fifo_b)) .launch()) - enable_migration_events(vm_a, 'A') + vm_a.enable_migration_events('A') iotests.log(vm_a.qmp('blockdev-snapshot', node='drive0-backing', overlay='drive0')) @@ -114,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) with iotests.Timeout(3, 'Migration does not complete'): # Wait for the source first (which includes setup=setup) - wait_migration(vm_b) + vm_b.wait_migration() # Wait for the destination second (which does not) - wait_migration(vm_a) + vm_a.wait_migration() iotests.log(vm_a.qmp('query-migrate')['return']['status']) iotests.log(vm_b.qmp('query-migrate')['return']['status']) diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258 new file mode 100755 index 0000000..b84cf02 --- /dev/null +++ b/tests/qemu-iotests/258 @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# +# Very specific tests for adjacent commit/stream block jobs +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Creator/Owner: Max Reitz <mreitz@redhat.com> + +import iotests +from iotests import log, qemu_img, qemu_io_silent, \ + filter_qmp_testfiles, filter_qmp_imgfmt + +# Need backing file and change-backing-file support +iotests.verify_image_format(supported_fmts=['qcow2', 'qed']) +iotests.verify_platform(['linux']) + + +# Returns a node for blockdev-add +def node(node_name, path, backing=None, fmt=None, throttle=None): + if fmt is None: + fmt = iotests.imgfmt + + res = { + 'node-name': node_name, + 'driver': fmt, + 'file': { + 'driver': 'file', + 'filename': path + } + } + + if backing is not None: + res['backing'] = backing + + if throttle: + res['file'] = { + 'driver': 'throttle', + 'throttle-group': throttle, + 'file': res['file'] + } + + return res + +# Finds a node in the debug block graph +def find_graph_node(graph, node_id): + return next(node for node in graph['nodes'] if node['id'] == node_id) + + +def test_concurrent_finish(write_to_stream_node): + log('') + log('=== Commit and stream finish concurrently (letting %s write) ===' % \ + ('stream' if write_to_stream_node else 'commit')) + log('') + + # All chosen in such a way that when the commit job wants to + # finish, it polls and thus makes stream finish concurrently -- + # and the other way around, depending on whether the commit job + # is finalized before stream completes or not. + + with iotests.FilePath('node4.img') as node4_path, \ + iotests.FilePath('node3.img') as node3_path, \ + iotests.FilePath('node2.img') as node2_path, \ + iotests.FilePath('node1.img') as node1_path, \ + iotests.FilePath('node0.img') as node0_path, \ + iotests.VM() as vm: + + # It is important to use raw for the base layer (so that + # permissions are just handed through to the protocol layer) + assert qemu_img('create', '-f', 'raw', node0_path, '64M') == 0 + + stream_throttle=None + commit_throttle=None + + for path in [node1_path, node2_path, node3_path, node4_path]: + assert qemu_img('create', '-f', iotests.imgfmt, path, '64M') == 0 + + if write_to_stream_node: + # This is what (most of the time) makes commit finish + # earlier and then pull in stream + assert qemu_io_silent(node2_path, + '-c', 'write %iK 64K' % (65536 - 192), + '-c', 'write %iK 64K' % (65536 - 64)) == 0 + + stream_throttle='tg' + else: + # And this makes stream finish earlier + assert qemu_io_silent(node1_path, + '-c', 'write %iK 64K' % (65536 - 64)) == 0 + + commit_throttle='tg' + + vm.launch() + + vm.qmp_log('object-add', + qom_type='throttle-group', + id='tg', + props={ + 'x-iops-write': 1, + 'x-iops-write-max': 1 + }) + + vm.qmp_log('blockdev-add', + filters=[filter_qmp_testfiles, filter_qmp_imgfmt], + **node('node4', node4_path, throttle=stream_throttle, + backing=node('node3', node3_path, + backing=node('node2', node2_path, + backing=node('node1', node1_path, + backing=node('node0', node0_path, throttle=commit_throttle, + fmt='raw')))))) + + vm.qmp_log('block-commit', + job_id='commit', + device='node4', + filter_node_name='commit-filter', + top_node='node1', + base_node='node0', + auto_finalize=False) + + vm.qmp_log('block-stream', + job_id='stream', + device='node3', + base_node='commit-filter') + + if write_to_stream_node: + vm.run_job('commit', auto_finalize=False, auto_dismiss=True) + vm.run_job('stream', auto_finalize=True, auto_dismiss=True) + else: + # No, the jobs do not really finish concurrently here, + # the stream job does complete strictly before commit. + # But still, this is close enough for what we want to + # test. + vm.run_job('stream', auto_finalize=True, auto_dismiss=True) + vm.run_job('commit', auto_finalize=False, auto_dismiss=True) + + # Assert that the backing node of node3 is node 0 now + graph = vm.qmp('x-debug-query-block-graph')['return'] + for edge in graph['edges']: + if edge['name'] == 'backing' and \ + find_graph_node(graph, edge['parent'])['name'] == 'node3': + assert find_graph_node(graph, edge['child'])['name'] == 'node0' + break + + +def main(): + log('Running tests:') + test_concurrent_finish(True) + test_concurrent_finish(False) + +if __name__ == '__main__': + main() diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out new file mode 100644 index 0000000..ce6e9ba --- /dev/null +++ b/tests/qemu-iotests/258.out @@ -0,0 +1,33 @@ +Running tests: + +=== Commit and stream finish concurrently (letting stream write) === + +{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}} +{"return": {}} +{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}} +{"return": {}} +{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}} +{"return": {}} +{"execute": "job-finalize", "arguments": {"id": "commit"}} +{"return": {}} +{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + +=== Commit and stream finish concurrently (letting commit write) === + +{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}} +{"return": {}} +{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}} +{"return": {}} +{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}} +{"return": {}} +{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}} +{"return": {}} +{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"execute": "job-finalize", "arguments": {"id": "commit"}} +{"return": {}} +{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 new file mode 100755 index 0000000..398f635 --- /dev/null +++ b/tests/qemu-iotests/262 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> +# +# Test migration with filter drivers present. Keep everything in an +# iothread just for fun. + +import iotests +import os + +iotests.verify_image_format(supported_fmts=['qcow2']) +iotests.verify_platform(['linux']) + +with iotests.FilePath('img') as img_path, \ + iotests.FilePath('mig_fifo') as fifo, \ + iotests.VM(path_suffix='a') as vm_a, \ + iotests.VM(path_suffix='b') as vm_b: + + def add_opts(vm): + vm.add_object('iothread,id=iothread0') + vm.add_object('throttle-group,id=tg0,x-bps-total=65536') + vm.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path)) + vm.add_blockdev('%s,file=drive0-file,node-name=drive0-fmt' % (iotests.imgfmt)) + vm.add_blockdev('copy-on-read,file=drive0-fmt,node-name=drive0-cor') + vm.add_blockdev('throttle,file=drive0-cor,node-name=drive0-throttle,throttle-group=tg0') + vm.add_blockdev('blkdebug,image=drive0-throttle,node-name=drive0-dbg') + vm.add_blockdev('null-co,node-name=null,read-zeroes=on') + vm.add_blockdev('blkverify,test=drive0-dbg,raw=null,node-name=drive0-verify') + + if iotests.supports_quorum(): + vm.add_blockdev('quorum,children.0=drive0-verify,vote-threshold=1,node-name=drive0-quorum') + root = "drive0-quorum" + else: + root = "drive0-verify" + + vm.add_device('virtio-blk,drive=%s,iothread=iothread0' % root) + + iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, img_path, '64M') + + os.mkfifo(fifo) + + iotests.log('Launching source VM...') + add_opts(vm_a) + vm_a.launch() + + vm_a.enable_migration_events('A') + + iotests.log('Launching destination VM...') + add_opts(vm_b) + vm_b.add_incoming("exec: cat '%s'" % (fifo)) + vm_b.launch() + + vm_b.enable_migration_events('B') + + iotests.log('Starting migration to B...') + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) + vm_a.wait_migration() + # Wait for the destination second (which does not) + vm_b.wait_migration() + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) + + iotests.log(vm_a.qmp('query-status')) + iotests.log(vm_b.qmp('query-status')) diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out new file mode 100644 index 0000000..5a58e5e --- /dev/null +++ b/tests/qemu-iotests/262.out @@ -0,0 +1,17 @@ +Launching source VM... +Enabling migration QMP events on A... +{"return": {}} +Launching destination VM... +Enabling migration QMP events on B... +{"return": {}} +Starting migration to B... +{"return": {}} +{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +completed +completed +{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} +{"return": {"running": true, "singlestep": false, "status": "running"}} diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index f13e5f2..5a37839 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -271,3 +271,5 @@ 254 rw backing quick 255 rw quick 256 rw quick +258 rw quick +262 rw quick migration diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index ce74177..91172c3 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -583,6 +583,22 @@ class VM(qtest.QEMUQtestMachine): elif status == 'null': return error + def enable_migration_events(self, name): + log('Enabling migration QMP events on %s...' % name) + log(self.qmp('migrate-set-capabilities', capabilities=[ + { + 'capability': 'events', + 'state': True + } + ])) + + def wait_migration(self): + while True: + event = self.event_wait('MIGRATION') + log(event, filters=[filter_qmp_event]) + if event['data']['status'] == 'completed': + break + def node_info(self, node_name): nodes = self.qmp('query-named-block-nodes') for x in nodes['return']: diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 481b750..374bef6 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -101,6 +101,13 @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, nperm, nshared); } +static int bdrv_test_change_backing_file(BlockDriverState *bs, + const char *backing_file, + const char *backing_fmt) +{ + return 0; +} + static BlockDriver bdrv_test = { .format_name = "test", .instance_size = sizeof(BDRVTestState), @@ -112,6 +119,8 @@ static BlockDriver bdrv_test = { .bdrv_co_drain_end = bdrv_test_co_drain_end, .bdrv_child_perm = bdrv_test_child_perm, + + .bdrv_change_backing_file = bdrv_test_change_backing_file, }; static void aio_ret_cb(void *opaque, int ret) @@ -678,6 +687,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) &error_abort); s = bs->opaque; blk_insert_bs(blk, bs, &error_abort); + blk_set_disable_request_queuing(blk, true); blk_set_aio_context(blk, ctx_a, &error_abort); aio_context_acquire(ctx_a); @@ -1672,6 +1682,466 @@ static void test_blockjob_commit_by_drained_end(void) bdrv_unref(bs_child); } + +typedef struct TestSimpleBlockJob { + BlockJob common; + bool should_complete; + bool *did_complete; +} TestSimpleBlockJob; + +static int coroutine_fn test_simple_job_run(Job *job, Error **errp) +{ + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); + + while (!s->should_complete) { + job_sleep_ns(job, 0); + } + + return 0; +} + +static void test_simple_job_clean(Job *job) +{ + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); + *s->did_complete = true; +} + +static const BlockJobDriver test_simple_job_driver = { + .job_driver = { + .instance_size = sizeof(TestSimpleBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_simple_job_run, + .clean = test_simple_job_clean, + }, +}; + +static int drop_intermediate_poll_update_filename(BdrvChild *child, + BlockDriverState *new_base, + const char *filename, + Error **errp) +{ + /* + * We are free to poll here, which may change the block graph, if + * it is not drained. + */ + + /* If the job is not drained: Complete it, schedule job_exit() */ + aio_poll(qemu_get_current_aio_context(), false); + /* If the job is not drained: Run job_exit(), finish the job */ + aio_poll(qemu_get_current_aio_context(), false); + + return 0; +} + +/** + * Test a poll in the midst of bdrv_drop_intermediate(). + * + * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(), + * which can yield or poll. This may lead to graph changes, unless + * the whole subtree in question is drained. + * + * We test this on the following graph: + * + * Job + * + * | + * job-node + * | + * v + * + * job-node + * + * | + * backing + * | + * v + * + * node-2 --chain--> node-1 --chain--> node-0 + * + * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0). + * + * This first updates node-2's backing filename by invoking + * drop_intermediate_poll_update_filename(), which polls twice. This + * causes the job to finish, which in turns causes the job-node to be + * deleted. + * + * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it + * already has a pointer to the BdrvChild edge between job-node and + * node-1. When it tries to handle that edge, we probably get a + * segmentation fault because the object no longer exists. + * + * + * The solution is for bdrv_drop_intermediate() to drain top's + * subtree. This prevents graph changes from happening just because + * BdrvChildRole.update_filename() yields or polls. Thus, the block + * job is paused during that drained section and must finish before or + * after. + * + * (In addition, bdrv_replace_child() must keep the job paused.) + */ +static void test_drop_intermediate_poll(void) +{ + static BdrvChildRole chain_child_role; + BlockDriverState *chain[3]; + TestSimpleBlockJob *job; + BlockDriverState *job_node; + bool job_has_completed = false; + int i; + int ret; + + chain_child_role = child_backing; + chain_child_role.update_filename = drop_intermediate_poll_update_filename; + + for (i = 0; i < 3; i++) { + char name[32]; + snprintf(name, 32, "node-%i", i); + + chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort); + } + + job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR, + &error_abort); + bdrv_set_backing_hd(job_node, chain[1], &error_abort); + + /* + * Establish the chain last, so the chain links are the first + * elements in the BDS.parents lists + */ + for (i = 0; i < 3; i++) { + if (i) { + /* Takes the reference to chain[i - 1] */ + chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1], + "chain", &chain_child_role, + &error_abort); + } + } + + job = block_job_create("job", &test_simple_job_driver, NULL, job_node, + 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); + + /* The job has a reference now */ + bdrv_unref(job_node); + + job->did_complete = &job_has_completed; + + job_start(&job->common.job); + job->should_complete = true; + + g_assert(!job_has_completed); + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); + g_assert(ret == 0); + g_assert(job_has_completed); + + bdrv_unref(chain[2]); +} + + +typedef struct BDRVReplaceTestState { + bool was_drained; + bool was_undrained; + bool has_read; + + int drain_count; + + bool yield_before_read; + Coroutine *io_co; + Coroutine *drain_co; +} BDRVReplaceTestState; + +static void bdrv_replace_test_close(BlockDriverState *bs) +{ +} + +/** + * If @bs has a backing file: + * Yield if .yield_before_read is true (and wait for drain_begin to + * wake us up). + * Forward the read to bs->backing. Set .has_read to true. + * If drain_begin has woken us, wake it in turn. + * + * Otherwise: + * Set .has_read to true and return success. + */ +static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + int flags) +{ + BDRVReplaceTestState *s = bs->opaque; + + if (bs->backing) { + int ret; + + g_assert(!s->drain_count); + + s->io_co = qemu_coroutine_self(); + if (s->yield_before_read) { + s->yield_before_read = false; + qemu_coroutine_yield(); + } + s->io_co = NULL; + + ret = bdrv_preadv(bs->backing, offset, qiov); + s->has_read = true; + + /* Wake up drain_co if it runs */ + if (s->drain_co) { + aio_co_wake(s->drain_co); + } + + return ret; + } + + s->has_read = true; + return 0; +} + +/** + * If .drain_count is 0, wake up .io_co if there is one; and set + * .was_drained. + * Increment .drain_count. + */ +static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) +{ + BDRVReplaceTestState *s = bs->opaque; + + if (!s->drain_count) { + /* Keep waking io_co up until it is done */ + s->drain_co = qemu_coroutine_self(); + while (s->io_co) { + aio_co_wake(s->io_co); + s->io_co = NULL; + qemu_coroutine_yield(); + } + s->drain_co = NULL; + + s->was_drained = true; + } + s->drain_count++; +} + +/** + * Reduce .drain_count, set .was_undrained once it reaches 0. + * If .drain_count reaches 0 and the node has a backing file, issue a + * read request. + */ +static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) +{ + BDRVReplaceTestState *s = bs->opaque; + + g_assert(s->drain_count > 0); + if (!--s->drain_count) { + int ret; + + s->was_undrained = true; + + if (bs->backing) { + char data; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); + + /* Queue a read request post-drain */ + ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); + g_assert(ret >= 0); + } + } +} + +static BlockDriver bdrv_replace_test = { + .format_name = "replace_test", + .instance_size = sizeof(BDRVReplaceTestState), + + .bdrv_close = bdrv_replace_test_close, + .bdrv_co_preadv = bdrv_replace_test_co_preadv, + + .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, + .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, + + .bdrv_child_perm = bdrv_format_default_perms, +}; + +static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque) +{ + int ret; + char data; + + ret = blk_co_pread(opaque, 0, 1, &data, 0); + g_assert(ret >= 0); +} + +/** + * We test two things: + * (1) bdrv_replace_child_noperm() must not undrain the parent if both + * children are drained. + * (2) bdrv_replace_child_noperm() must never flush I/O requests to a + * drained child. If the old child is drained, it must flush I/O + * requests after the new one has been attached. If the new child + * is drained, it must flush I/O requests before the old one is + * detached. + * + * To do so, we create one parent node and two child nodes; then + * attach one of the children (old_child_bs) to the parent, then + * drain both old_child_bs and new_child_bs according to + * old_drain_count and new_drain_count, respectively, and finally + * we invoke bdrv_replace_node() to replace old_child_bs by + * new_child_bs. + * + * The test block driver we use here (bdrv_replace_test) has a read + * function that: + * - For the parent node, can optionally yield, and then forwards the + * read to bdrv_preadv(), + * - For the child node, just returns immediately. + * + * If the read yields, the drain_begin function will wake it up. + * + * The drain_end function issues a read on the parent once it is fully + * undrained (which simulates requests starting to come in again). + */ +static void do_test_replace_child_mid_drain(int old_drain_count, + int new_drain_count) +{ + BlockBackend *parent_blk; + BlockDriverState *parent_bs; + BlockDriverState *old_child_bs, *new_child_bs; + BDRVReplaceTestState *parent_s; + BDRVReplaceTestState *old_child_s, *new_child_s; + Coroutine *io_co; + int i; + + parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0, + &error_abort); + parent_s = parent_bs->opaque; + + parent_blk = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + blk_insert_bs(parent_blk, parent_bs, &error_abort); + + old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0, + &error_abort); + new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0, + &error_abort); + old_child_s = old_child_bs->opaque; + new_child_s = new_child_bs->opaque; + + /* So that we can read something */ + parent_bs->total_sectors = 1; + old_child_bs->total_sectors = 1; + new_child_bs->total_sectors = 1; + + bdrv_ref(old_child_bs); + parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child", + &child_backing, &error_abort); + + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_begin(old_child_bs); + } + for (i = 0; i < new_drain_count; i++) { + bdrv_drained_begin(new_child_bs); + } + + if (!old_drain_count) { + /* + * Start a read operation that will yield, so it will not + * complete before the node is drained. + */ + parent_s->yield_before_read = true; + io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co, + parent_blk); + qemu_coroutine_enter(io_co); + } + + /* If we have started a read operation, it should have yielded */ + g_assert(!parent_s->has_read); + + /* Reset drained status so we can see what bdrv_replace_node() does */ + parent_s->was_drained = false; + parent_s->was_undrained = false; + + g_assert(parent_bs->quiesce_counter == old_drain_count); + bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); + g_assert(parent_bs->quiesce_counter == new_drain_count); + + if (!old_drain_count && !new_drain_count) { + /* + * From undrained to undrained drains and undrains the parent, + * because bdrv_replace_node() contains a drained section for + * @old_child_bs. + */ + g_assert(parent_s->was_drained && parent_s->was_undrained); + } else if (!old_drain_count && new_drain_count) { + /* + * From undrained to drained should drain the parent and keep + * it that way. + */ + g_assert(parent_s->was_drained && !parent_s->was_undrained); + } else if (old_drain_count && !new_drain_count) { + /* + * From drained to undrained should undrain the parent and + * keep it that way. + */ + g_assert(!parent_s->was_drained && parent_s->was_undrained); + } else /* if (old_drain_count && new_drain_count) */ { + /* + * From drained to drained must not undrain the parent at any + * point + */ + g_assert(!parent_s->was_drained && !parent_s->was_undrained); + } + + if (!old_drain_count || !new_drain_count) { + /* + * If !old_drain_count, we have started a read request before + * bdrv_replace_node(). If !new_drain_count, the parent must + * have been undrained at some point, and + * bdrv_replace_test_co_drain_end() starts a read request + * then. + */ + g_assert(parent_s->has_read); + } else { + /* + * If the parent was never undrained, there is no way to start + * a read request. + */ + g_assert(!parent_s->has_read); + } + + /* A drained child must have not received any request */ + g_assert(!(old_drain_count && old_child_s->has_read)); + g_assert(!(new_drain_count && new_child_s->has_read)); + + for (i = 0; i < new_drain_count; i++) { + bdrv_drained_end(new_child_bs); + } + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_end(old_child_bs); + } + + /* + * By now, bdrv_replace_test_co_drain_end() must have been called + * at some point while the new child was attached to the parent. + */ + g_assert(parent_s->has_read); + g_assert(new_child_s->has_read); + + blk_unref(parent_blk); + bdrv_unref(parent_bs); + bdrv_unref(old_child_bs); + bdrv_unref(new_child_bs); +} + +static void test_replace_child_mid_drain(void) +{ + int old_drain_count, new_drain_count; + + for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) { + for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) { + do_test_replace_child_mid_drain(old_drain_count, new_drain_count); + } + } +} + int main(int argc, char **argv) { int ret; @@ -1758,6 +2228,12 @@ int main(int argc, char **argv) g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end", test_blockjob_commit_by_drained_end); + g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll", + test_drop_intermediate_poll); + + g_test_add_func("/bdrv-drain/replace_child/mid-drain", + test_replace_child_mid_drain); + ret = g_test_run(); qemu_event_destroy(&done_event); return ret; |