diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-07-22 10:13:34 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-07-22 10:13:34 -0500 |
commit | 5447a9afc4150693d3909a8632891061147e170d (patch) | |
tree | 497739f22cd788660db5a927eb50d7c6de38b9af | |
parent | 293706dd682f578b457d052988cf3c20b4eab82d (diff) | |
parent | a23fdf355969d331f60593fa5b857d43aec25aac (diff) | |
download | qemu-5447a9afc4150693d3909a8632891061147e170d.zip qemu-5447a9afc4150693d3909a8632891061147e170d.tar.gz qemu-5447a9afc4150693d3909a8632891061147e170d.tar.bz2 |
Merge remote-tracking branch 'stefanha/block' into staging
# By Peter Lieven (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
block/raw: add .bdrv_get_info
block: fix bdrv_read_unthrottled()
cpus: Let vm_stop[_force_state]() always flush block devices
block-migration: efficiently encode zero blocks
block/raw: add bdrv_co_write_zeroes
block: add bdrv_write_zeroes()
block: fix vvfat error path for enable_write_target
QEMUBH: make AioContext's bh re-entrant
dataplane: sync virtio.c and vring.c virtqueue state
gluster: Add discard support for GlusterFS block driver.
gluster: Use pkg-config to configure GlusterFS block driver
Message-id: 1374223132-29107-1-git-send-email-stefanha@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
-rw-r--r-- | async.c | 33 | ||||
-rw-r--r-- | block-migration.c | 32 | ||||
-rw-r--r-- | block.c | 29 | ||||
-rw-r--r-- | block/gluster.c | 45 | ||||
-rw-r--r-- | block/raw.c | 14 | ||||
-rw-r--r-- | block/vvfat.c | 25 | ||||
-rwxr-xr-x | configure | 25 | ||||
-rw-r--r-- | cpus.c | 9 | ||||
-rw-r--r-- | hw/block/dataplane/virtio-blk.c | 2 | ||||
-rw-r--r-- | hw/virtio/dataplane/vring.c | 8 | ||||
-rw-r--r-- | include/block/aio.h | 7 | ||||
-rw-r--r-- | include/block/block.h | 2 | ||||
-rw-r--r-- | include/hw/virtio/dataplane/vring.h | 2 | ||||
-rw-r--r-- | include/migration/migration.h | 1 | ||||
-rw-r--r-- | migration.c | 9 | ||||
-rw-r--r-- | qapi-schema.json | 8 |
16 files changed, 203 insertions, 48 deletions
@@ -47,11 +47,16 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) bh->ctx = ctx; bh->cb = cb; bh->opaque = opaque; + qemu_mutex_lock(&ctx->bh_lock); bh->next = ctx->first_bh; + /* Make sure that the members are ready before putting bh into list */ + smp_wmb(); ctx->first_bh = bh; + qemu_mutex_unlock(&ctx->bh_lock); return bh; } +/* Multiple occurrences of aio_bh_poll cannot be called concurrently */ int aio_bh_poll(AioContext *ctx) { QEMUBH *bh, **bhp, *next; @@ -61,9 +66,15 @@ int aio_bh_poll(AioContext *ctx) ret = 0; for (bh = ctx->first_bh; bh; bh = next) { + /* Make sure that fetching bh happens before accessing its members */ + smp_read_barrier_depends(); next = bh->next; if (!bh->deleted && bh->scheduled) { bh->scheduled = 0; + /* Paired with write barrier in bh schedule to ensure reading for + * idle & callbacks coming after bh's scheduling. + */ + smp_rmb(); if (!bh->idle) ret = 1; bh->idle = 0; @@ -75,6 +86,7 @@ int aio_bh_poll(AioContext *ctx) /* remove deleted bhs */ if (!ctx->walking_bh) { + qemu_mutex_lock(&ctx->bh_lock); bhp = &ctx->first_bh; while (*bhp) { bh = *bhp; @@ -85,6 +97,7 @@ int aio_bh_poll(AioContext *ctx) bhp = &bh->next; } } + qemu_mutex_unlock(&ctx->bh_lock); } return ret; @@ -94,24 +107,38 @@ void qemu_bh_schedule_idle(QEMUBH *bh) { if (bh->scheduled) return; - bh->scheduled = 1; bh->idle = 1; + /* Make sure that idle & any writes needed by the callback are done + * before the locations are read in the aio_bh_poll. + */ + smp_wmb(); + bh->scheduled = 1; } void qemu_bh_schedule(QEMUBH *bh) { if (bh->scheduled) return; - bh->scheduled = 1; bh->idle = 0; + /* Make sure that idle & any writes needed by the callback are done + * before the locations are read in the aio_bh_poll. + */ + smp_wmb(); + bh->scheduled = 1; aio_notify(bh->ctx); } + +/* This func is async. + */ void qemu_bh_cancel(QEMUBH *bh) { bh->scheduled = 0; } +/* This func is async.The bottom half will do the delete action at the finial + * end. + */ void qemu_bh_delete(QEMUBH *bh) { bh->scheduled = 0; @@ -176,6 +203,7 @@ aio_ctx_finalize(GSource *source) thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); event_notifier_cleanup(&ctx->notifier); + qemu_mutex_destroy(&ctx->bh_lock); g_array_free(ctx->pollfds, TRUE); } @@ -211,6 +239,7 @@ AioContext *aio_context_new(void) ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; + qemu_mutex_init(&ctx->bh_lock); event_notifier_init(&ctx->notifier, false); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) diff --git a/block-migration.c b/block-migration.c index 2fd7699..f803f20 100644 --- a/block-migration.c +++ b/block-migration.c @@ -29,6 +29,7 @@ #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 #define BLK_MIG_FLAG_EOS 0x02 #define BLK_MIG_FLAG_PROGRESS 0x04 +#define BLK_MIG_FLAG_ZERO_BLOCK 0x08 #define MAX_IS_ALLOCATED_SEARCH 65536 @@ -80,6 +81,7 @@ typedef struct BlkMigState { int shared_base; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; int64_t total_sector_sum; + bool zero_blocks; /* Protected by lock. */ QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; @@ -114,16 +116,30 @@ static void blk_mig_unlock(void) static void blk_send(QEMUFile *f, BlkMigBlock * blk) { int len; + uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; + + if (block_mig_state.zero_blocks && + buffer_is_zero(blk->buf, BLOCK_SIZE)) { + flags |= BLK_MIG_FLAG_ZERO_BLOCK; + } /* sector number and flags */ qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS) - | BLK_MIG_FLAG_DEVICE_BLOCK); + | flags); /* device name */ len = strlen(blk->bmds->bs->device_name); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len); + /* if a block is zero we need to flush here since the network + * bandwidth is now a lot higher than the storage device bandwidth. + * thus if we queue zero blocks we slow down the migration */ + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { + qemu_fflush(f); + return; + } + qemu_put_buffer(f, blk->buf, BLOCK_SIZE); } @@ -344,6 +360,7 @@ static void init_blk_migration(QEMUFile *f) block_mig_state.total_sector_sum = 0; block_mig_state.prev_progress = -1; block_mig_state.bulk_completed = 0; + block_mig_state.zero_blocks = migrate_zero_blocks(); bdrv_iterate(init_blk_migration_it, NULL); } @@ -762,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - buf = g_malloc(BLOCK_SIZE); - - qemu_get_buffer(f, buf, BLOCK_SIZE); - ret = bdrv_write(bs, addr, buf, nr_sectors); + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { + ret = bdrv_write_zeroes(bs, addr, nr_sectors); + } else { + buf = g_malloc(BLOCK_SIZE); + qemu_get_buffer(f, buf, BLOCK_SIZE); + ret = bdrv_write(bs, addr, buf, nr_sectors); + g_free(buf); + } - g_free(buf); if (ret < 0) { return ret; } @@ -2162,6 +2162,7 @@ typedef struct RwCo { QEMUIOVector *qiov; bool is_write; int ret; + BdrvRequestFlags flags; } RwCo; static void coroutine_fn bdrv_rw_co_entry(void *opaque) @@ -2170,10 +2171,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) if (!rwco->is_write) { rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, 0); + rwco->nb_sectors, rwco->qiov, + rwco->flags); } else { rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, 0); + rwco->nb_sectors, rwco->qiov, + rwco->flags); } } @@ -2181,7 +2184,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) * Process a vectored synchronous request using coroutines */ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, bool is_write) + QEMUIOVector *qiov, bool is_write, + BdrvRequestFlags flags) { Coroutine *co; RwCo rwco = { @@ -2191,6 +2195,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, .qiov = qiov, .is_write = is_write, .ret = NOT_DONE, + .flags = flags, }; assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0); @@ -2222,7 +2227,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, * Process a synchronous request using coroutines */ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write) + int nb_sectors, bool is_write, BdrvRequestFlags flags) { QEMUIOVector qiov; struct iovec iov = { @@ -2231,14 +2236,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, }; qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_rwv_co(bs, sector_num, &qiov, is_write); + return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags); } /* return < 0 if error. See bdrv_write() for the return codes */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { - return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false); + return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); } /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ @@ -2250,7 +2255,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, enabled = bs->io_limits_enabled; bs->io_limits_enabled = false; - ret = bdrv_read(bs, 0, buf, 1); + ret = bdrv_read(bs, sector_num, buf, nb_sectors); bs->io_limits_enabled = enabled; return ret; } @@ -2264,12 +2269,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true); + return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); } int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) { - return bdrv_rwv_co(bs, sector_num, qiov, true); + return bdrv_rwv_co(bs, sector_num, qiov, true, 0); +} + +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +{ + return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, + BDRV_REQ_ZERO_WRITE); } int bdrv_pread(BlockDriverState *bs, int64_t offset, diff --git a/block/gluster.c b/block/gluster.c index 61424bc..6de418c 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -532,6 +532,39 @@ out: return NULL; } +#ifdef CONFIG_GLUSTERFS_DISCARD +static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, + void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} +#endif + static int64_t qemu_gluster_getlength(BlockDriverState *bs) { BDRVGlusterState *s = bs->opaque; @@ -602,6 +635,9 @@ static BlockDriver bdrv_gluster = { .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_has_zero_init = qemu_gluster_has_zero_init, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_aio_discard = qemu_gluster_aio_discard, +#endif .create_options = qemu_gluster_create_options, }; @@ -618,6 +654,9 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_has_zero_init = qemu_gluster_has_zero_init, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_aio_discard = qemu_gluster_aio_discard, +#endif .create_options = qemu_gluster_create_options, }; @@ -634,6 +673,9 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_has_zero_init = qemu_gluster_has_zero_init, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_aio_discard = qemu_gluster_aio_discard, +#endif .create_options = qemu_gluster_create_options, }; @@ -650,6 +692,9 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_has_zero_init = qemu_gluster_has_zero_init, +#ifdef CONFIG_GLUSTERFS_DISCARD + .bdrv_aio_discard = qemu_gluster_aio_discard, +#endif .create_options = qemu_gluster_create_options, }; diff --git a/block/raw.c b/block/raw.c index ce10422..f1682d4 100644 --- a/block/raw.c +++ b/block/raw.c @@ -42,6 +42,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum); } +static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors) +{ + return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors); +} + static int64_t raw_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); @@ -114,6 +121,11 @@ static int raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file); } +static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + return bdrv_get_info(bs->file, bdi); +} + static BlockDriver bdrv_raw = { .format_name = "raw", @@ -128,10 +140,12 @@ static BlockDriver bdrv_raw = { .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, .bdrv_co_is_allocated = raw_co_is_allocated, + .bdrv_co_write_zeroes = raw_co_write_zeroes, .bdrv_co_discard = raw_co_discard, .bdrv_probe = raw_probe, .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, .bdrv_truncate = raw_truncate, .bdrv_is_inserted = raw_is_inserted, diff --git a/block/vvfat.c b/block/vvfat.c index 87b0279..cd3b8ed 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1164,8 +1164,8 @@ DLOG(if (stderr == NULL) { s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1); if (qemu_opt_get_bool(opts, "rw", false)) { - if (enable_write_target(s)) { - ret = -EIO; + ret = enable_write_target(s); + if (ret < 0) { goto fail; } bs->read_only = 0; @@ -2917,9 +2917,7 @@ static int enable_write_target(BDRVVVFATState *s) s->qcow_filename = g_malloc(1024); ret = get_tmp_filename(s->qcow_filename, 1024); if (ret < 0) { - g_free(s->qcow_filename); - s->qcow_filename = NULL; - return ret; + goto err; } bdrv_qcow = bdrv_find_format("qcow"); @@ -2927,18 +2925,18 @@ static int enable_write_target(BDRVVVFATState *s) set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512); set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:"); - if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0) - return -1; + ret = bdrv_create(bdrv_qcow, s->qcow_filename, options); + if (ret < 0) { + goto err; + } s->qcow = bdrv_new(""); - if (s->qcow == NULL) { - return -1; - } ret = bdrv_open(s->qcow, s->qcow_filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow); if (ret < 0) { - return ret; + bdrv_delete(s->qcow); + goto err; } #ifndef _WIN32 @@ -2951,6 +2949,11 @@ static int enable_write_target(BDRVVVFATState *s) *(void**)s->bs->backing_hd->opaque = s; return 0; + +err: + g_free(s->qcow_filename); + s->qcow_filename = NULL; + return ret; } static void vvfat_close(BlockDriverState *bs) @@ -237,6 +237,7 @@ libiscsi="" coroutine="" seccomp="" glusterfs="" +glusterfs_discard="no" virtio_blk_data_plane="" gtk="" gtkabi="2.0" @@ -2570,23 +2571,21 @@ fi ########################################## # glusterfs probe if test "$glusterfs" != "no" ; then - cat > $TMPC <<EOF -#include <glusterfs/api/glfs.h> -int main(void) { - (void) glfs_new("volume"); - return 0; -} -EOF - glusterfs_libs="-lgfapi -lgfrpc -lgfxdr" - if compile_prog "" "$glusterfs_libs" ; then - glusterfs=yes + if $pkg_config --atleast-version=3 glusterfs-api >/dev/null 2>&1; then + glusterfs="yes" + glusterfs_cflags=`$pkg_config --cflags glusterfs-api 2>/dev/null` + glusterfs_libs=`$pkg_config --libs glusterfs-api 2>/dev/null` + CFLAGS="$CFLAGS $glusterfs_cflags" libs_tools="$glusterfs_libs $libs_tools" libs_softmmu="$glusterfs_libs $libs_softmmu" + if $pkg_config --atleast-version=5 glusterfs-api >/dev/null 2>&1; then + glusterfs_discard="yes" + fi else if test "$glusterfs" = "yes" ; then feature_not_found "GlusterFS backend support" fi - glusterfs=no + glusterfs="no" fi fi @@ -3969,6 +3968,10 @@ if test "$glusterfs" = "yes" ; then echo "CONFIG_GLUSTERFS=y" >> $config_host_mak fi +if test "$glusterfs_discard" = "yes" ; then + echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak +fi + if test "$libssh2" = "yes" ; then echo "CONFIG_LIBSSH2=y" >> $config_host_mak fi @@ -443,11 +443,12 @@ static int do_vm_stop(RunState state) pause_all_vcpus(); runstate_set(state); vm_state_notify(0, state); - bdrv_drain_all(); - ret = bdrv_flush_all(); monitor_protocol_event(QEVENT_STOP, NULL); } + bdrv_drain_all(); + ret = bdrv_flush_all(); + return ret; } @@ -1126,7 +1127,9 @@ int vm_stop_force_state(RunState state) return vm_stop(state); } else { runstate_set(state); - return 0; + /* Make sure to return an error if the flush in a previous vm_stop() + * failed. */ + return bdrv_flush_all(); } } diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 0356665..2faed43 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -537,7 +537,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, 1, false); - vring_teardown(&s->vring); + vring_teardown(&s->vring, s->vdev, 0); s->started = false; s->stopping = false; } diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index e0d6e83..82cc151 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -39,8 +39,8 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096); - vring->last_avail_idx = 0; - vring->last_used_idx = 0; + vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n); + vring->last_used_idx = vring->vr.used->idx; vring->signalled_used = 0; vring->signalled_used_valid = false; @@ -49,8 +49,10 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) return true; } -void vring_teardown(Vring *vring) +void vring_teardown(Vring *vring, VirtIODevice *vdev, int n) { + virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx); + hostmem_finalize(&vring->hostmem); } diff --git a/include/block/aio.h b/include/block/aio.h index 1836793..cc77771 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -17,6 +17,7 @@ #include "qemu-common.h" #include "qemu/queue.h" #include "qemu/event_notifier.h" +#include "qemu/thread.h" typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef void BlockDriverCompletionFunc(void *opaque, int ret); @@ -53,6 +54,8 @@ typedef struct AioContext { */ int walking_handlers; + /* lock to protect between bh's adders and deleter */ + QemuMutex bh_lock; /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; @@ -127,6 +130,8 @@ void aio_notify(AioContext *ctx); * aio_bh_poll: Poll bottom halves for an AioContext. * * These are internal functions used by the QEMU main loop. + * And notice that multiple occurrences of aio_bh_poll cannot + * be called concurrently */ int aio_bh_poll(AioContext *ctx); @@ -163,6 +168,8 @@ void qemu_bh_cancel(QEMUBH *bh); * Deleting a bottom half frees the memory that was allocated for it by * qemu_bh_new. It also implies canceling the bottom half if it was * scheduled. + * This func is async. The bottom half will do the delete action at the finial + * end. * * @bh: The bottom half to be deleted. */ diff --git a/include/block/block.h b/include/block/block.h index b6b9014..742fce5 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -157,6 +157,8 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h index 9380cb5..c0b69ff 100644 --- a/include/hw/virtio/dataplane/vring.h +++ b/include/hw/virtio/dataplane/vring.h @@ -50,7 +50,7 @@ static inline void vring_set_broken(Vring *vring) } bool vring_setup(Vring *vring, VirtIODevice *vdev, int n); -void vring_teardown(Vring *vring); +void vring_teardown(Vring *vring, VirtIODevice *vdev, int n); void vring_disable_notification(VirtIODevice *vdev, Vring *vring); bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring); diff --git a/include/migration/migration.h b/include/migration/migration.h index bc9fde0..701709a1 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -124,6 +124,7 @@ void migrate_add_blocker(Error *reason); void migrate_del_blocker(Error *reason); bool migrate_rdma_pin_all(void); +bool migrate_zero_blocks(void); bool migrate_auto_converge(void); diff --git a/migration.c b/migration.c index 9f5a423..a9c0421 100644 --- a/migration.c +++ b/migration.c @@ -493,6 +493,15 @@ bool migrate_auto_converge(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; } +bool migrate_zero_blocks(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; +} + int migrate_use_xbzrle(void) { MigrationState *s; diff --git a/qapi-schema.json b/qapi-schema.json index 8d33d52..592bb9c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -613,10 +613,16 @@ # Disabled by default. Experimental: may (or may not) be renamed after # further testing is complete. (since 1.6) # +# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This +# essentially saves 1MB of zeroes per block on the wire. Enabling requires +# source and target VM to support this feature. To enable it is sufficient +# to enable the capability on the source VM. The feature is disabled by +# default. (since 1.6) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', - 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] } + 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] } ## # @MigrationCapabilityStatus |