aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Wolf <kwolf@redhat.com>2018-06-18 17:20:41 +0200
committerKevin Wolf <kwolf@redhat.com>2018-06-18 17:20:42 +0200
commit4c790afe2503eab12874508acab5b388d7babfd2 (patch)
tree4873e48e955cda582dd62c2bb5680c032b59793e
parentf45280cbf66d8e58224f6a253d0ae2aa72cc6280 (diff)
parente38da02091eeed56bb370ec9d72c4367d4e9ada3 (diff)
downloadqemu-4c790afe2503eab12874508acab5b388d7babfd2.zip
qemu-4c790afe2503eab12874508acab5b388d7babfd2.tar.gz
qemu-4c790afe2503eab12874508acab5b388d7babfd2.tar.bz2
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-06-18' into queue-block
Block patches: - Active mirror (blockdev-mirror copy-mode=write-blocking) # gpg: Signature made Mon Jun 18 17:08:19 2018 CEST # gpg: using RSA key F407DB0061D5CF40 # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * mreitz/tags/pull-block-2018-06-18: iotests: Add test for active mirroring block/mirror: Add copy mode QAPI interface block/mirror: Add active mirroring job: Add job_progress_increase_remaining() block/mirror: Add MirrorBDSOpaque block/dirty-bitmap: Add bdrv_dirty_iter_next_area test-hbitmap: Add non-advancing iter_next tests hbitmap: Add @advance param to hbitmap_iter_next() block: Generalize should_update_child() rule block/mirror: Use source as a BdrvChild block/mirror: Wait for in-flight op conflicts block/mirror: Use CoQueue to wait on in-flight ops block/mirror: Convert to coroutines block/mirror: Pull out mirror_perform() Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-rw-r--r--block.c44
-rw-r--r--block/backup.c2
-rw-r--r--block/dirty-bitmap.c57
-rw-r--r--block/mirror.c605
-rw-r--r--blockdev.c9
-rw-r--r--include/block/block_int.h4
-rw-r--r--include/block/dirty-bitmap.h2
-rw-r--r--include/qemu/hbitmap.h5
-rw-r--r--include/qemu/job.h15
-rw-r--r--job.c5
-rw-r--r--qapi/block-core.json29
-rwxr-xr-xtests/qemu-iotests/151120
-rw-r--r--tests/qemu-iotests/151.out5
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-hbitmap.c38
-rw-r--r--util/hbitmap.c10
16 files changed, 799 insertions, 152 deletions
diff --git a/block.c b/block.c
index 6c12800..1b8147c 100644
--- a/block.c
+++ b/block.c
@@ -3427,16 +3427,39 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
return false;
}
- if (c->role == &child_backing) {
- /* If @from is a backing file of @to, ignore the child to avoid
- * creating a loop. We only want to change the pointer of other
- * parents. */
- QLIST_FOREACH(to_c, &to->children, next) {
- if (to_c == c) {
- break;
- }
- }
- if (to_c) {
+ /* If the child @c belongs to the BDS @to, replacing the current
+ * c->bs by @to would mean to create a loop.
+ *
+ * Such a case occurs when appending a BDS to a backing chain.
+ * For instance, imagine the following chain:
+ *
+ * guest device -> node A -> further backing chain...
+ *
+ * Now we create a new BDS B which we want to put on top of this
+ * chain, so we first attach A as its backing node:
+ *
+ * node B
+ * |
+ * v
+ * guest device -> node A -> further backing chain...
+ *
+ * Finally we want to replace A by B. When doing that, we want to
+ * replace all pointers to A by pointers to B -- except for the
+ * pointer from B because (1) that would create a loop, and (2)
+ * that pointer should simply stay intact:
+ *
+ * guest device -> node B
+ * |
+ * v
+ * node A -> further backing chain...
+ *
+ * In general, when replacing a node A (c->bs) by a node B (@to),
+ * if A is a child of B, that means we cannot replace A by B there
+ * because that would create a loop. Silently detaching A from B
+ * is also not really an option. So overall just leaving A in
+ * place there is the most sensible choice. */
+ QLIST_FOREACH(to_c, &to->children, next) {
+ if (to_c == c) {
return false;
}
}
@@ -3462,6 +3485,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
/* Put all parents into @list and calculate their cumulative permissions */
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ assert(c->bs == from);
if (!should_update_child(c, to)) {
continue;
}
diff --git a/block/backup.c b/block/backup.c
index 5661435..d18be40 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -354,7 +354,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
HBitmapIter hbi;
hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
- while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
+ while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) {
do {
if (yield_and_check(job)) {
return 0;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 383d742..db1782e 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -519,7 +519,62 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
{
- return hbitmap_iter_next(&iter->hbi);
+ return hbitmap_iter_next(&iter->hbi, true);
+}
+
+/**
+ * Return the next consecutively dirty area in the dirty bitmap
+ * belonging to the given iterator @iter.
+ *
+ * @max_offset: Maximum value that may be returned for
+ * *offset + *bytes
+ * @offset: Will contain the start offset of the next dirty area
+ * @bytes: Will contain the length of the next dirty area
+ *
+ * Returns: True if a dirty area could be found before max_offset
+ * (which means that *offset and *bytes then contain valid
+ * values), false otherwise.
+ *
+ * Note that @iter is never advanced if false is returned. If an area
+ * is found (which means that true is returned), it will be advanced
+ * past that area.
+ */
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
+ uint64_t *offset, int *bytes)
+{
+ uint32_t granularity = bdrv_dirty_bitmap_granularity(iter->bitmap);
+ uint64_t gran_max_offset;
+ int64_t ret;
+ int size;
+
+ if (max_offset == iter->bitmap->size) {
+ /* If max_offset points to the image end, round it up by the
+ * bitmap granularity */
+ gran_max_offset = ROUND_UP(max_offset, granularity);
+ } else {
+ gran_max_offset = max_offset;
+ }
+
+ ret = hbitmap_iter_next(&iter->hbi, false);
+ if (ret < 0 || ret + granularity > gran_max_offset) {
+ return false;
+ }
+
+ *offset = ret;
+ size = 0;
+
+ assert(granularity <= INT_MAX);
+
+ do {
+ /* Advance iterator */
+ ret = hbitmap_iter_next(&iter->hbi, true);
+ size += granularity;
+ } while (ret + granularity <= gran_max_offset &&
+ hbitmap_iter_next(&iter->hbi, false) == ret + granularity &&
+ size <= INT_MAX - granularity);
+
+ *bytes = MIN(size, max_offset - *offset);
+ return true;
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
diff --git a/block/mirror.c b/block/mirror.c
index c2146c1..61bd9f3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -13,6 +13,8 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
+#include "qemu/coroutine.h"
+#include "qemu/range.h"
#include "trace.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
@@ -33,11 +35,12 @@ typedef struct MirrorBuffer {
QSIMPLEQ_ENTRY(MirrorBuffer) next;
} MirrorBuffer;
+typedef struct MirrorOp MirrorOp;
+
typedef struct MirrorBlockJob {
BlockJob common;
BlockBackend *target;
BlockDriverState *mirror_top_bs;
- BlockDriverState *source;
BlockDriverState *base;
/* The name of the graph node to replace */
@@ -48,8 +51,12 @@ typedef struct MirrorBlockJob {
Error *replace_blocker;
bool is_none_mode;
BlockMirrorBackingMode backing_mode;
+ MirrorCopyMode copy_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
+ /* Set when the target is synced (dirty bitmap is clean, nothing
+ * in flight) and the job is running in active mode */
+ bool actively_synced;
bool should_complete;
int64_t granularity;
size_t buf_size;
@@ -65,25 +72,47 @@ typedef struct MirrorBlockJob {
unsigned long *in_flight_bitmap;
int in_flight;
int64_t bytes_in_flight;
+ QTAILQ_HEAD(MirrorOpList, MirrorOp) ops_in_flight;
int ret;
bool unmap;
- bool waiting_for_io;
int target_cluster_size;
int max_iov;
bool initial_zeroing_ongoing;
+ int in_active_write_counter;
} MirrorBlockJob;
-typedef struct MirrorOp {
+typedef struct MirrorBDSOpaque {
+ MirrorBlockJob *job;
+} MirrorBDSOpaque;
+
+struct MirrorOp {
MirrorBlockJob *s;
QEMUIOVector qiov;
int64_t offset;
uint64_t bytes;
-} MirrorOp;
+
+ /* The pointee is set by mirror_co_read(), mirror_co_zero(), and
+ * mirror_co_discard() before yielding for the first time */
+ int64_t *bytes_handled;
+
+ bool is_pseudo_op;
+ bool is_active_write;
+ CoQueue waiting_requests;
+
+ QTAILQ_ENTRY(MirrorOp) next;
+};
+
+typedef enum MirrorMethod {
+ MIRROR_METHOD_COPY,
+ MIRROR_METHOD_ZERO,
+ MIRROR_METHOD_DISCARD,
+} MirrorMethod;
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
int error)
{
s->synced = false;
+ s->actively_synced = false;
if (read) {
return block_job_error_action(&s->common, s->on_source_error,
true, error);
@@ -93,7 +122,42 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
}
}
-static void mirror_iteration_done(MirrorOp *op, int ret)
+static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
+ MirrorBlockJob *s,
+ uint64_t offset,
+ uint64_t bytes)
+{
+ uint64_t self_start_chunk = offset / s->granularity;
+ uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
+ uint64_t self_nb_chunks = self_end_chunk - self_start_chunk;
+
+ while (find_next_bit(s->in_flight_bitmap, self_end_chunk,
+ self_start_chunk) < self_end_chunk &&
+ s->ret >= 0)
+ {
+ MirrorOp *op;
+
+ QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+ uint64_t op_start_chunk = op->offset / s->granularity;
+ uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes,
+ s->granularity) -
+ op_start_chunk;
+
+ if (op == self) {
+ continue;
+ }
+
+ if (ranges_overlap(self_start_chunk, self_nb_chunks,
+ op_start_chunk, op_nb_chunks))
+ {
+ qemu_co_queue_wait(&op->waiting_requests, NULL);
+ break;
+ }
+ }
+ }
+}
+
+static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
{
MirrorBlockJob *s = op->s;
struct iovec *iov;
@@ -113,7 +177,9 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
chunk_num = op->offset / s->granularity;
nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
+
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
+ QTAILQ_REMOVE(&s->ops_in_flight, op, next);
if (ret >= 0) {
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
@@ -123,16 +189,13 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
}
}
qemu_iovec_destroy(&op->qiov);
- g_free(op);
- if (s->waiting_for_io) {
- qemu_coroutine_enter(s->common.job.co);
- }
+ qemu_co_queue_restart_all(&op->waiting_requests);
+ g_free(op);
}
-static void mirror_write_complete(void *opaque, int ret)
+static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
{
- MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
@@ -149,9 +212,8 @@ static void mirror_write_complete(void *opaque, int ret)
aio_context_release(blk_get_aio_context(s->common.blk));
}
-static void mirror_read_complete(void *opaque, int ret)
+static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
{
- MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
aio_context_acquire(blk_get_aio_context(s->common.blk));
@@ -166,8 +228,9 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_iteration_done(op, ret);
} else {
- blk_aio_pwritev(s->target, op->offset, &op->qiov,
- 0, mirror_write_complete, op);
+ ret = blk_co_pwritev(s->target, op->offset,
+ op->qiov.size, &op->qiov, 0);
+ mirror_write_complete(op, ret);
}
aio_context_release(blk_get_aio_context(s->common.blk));
}
@@ -216,68 +279,80 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
return ret;
}
-static inline void mirror_wait_for_io(MirrorBlockJob *s)
+static inline void mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+{
+ MirrorOp *op;
+
+ QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+ /* Do not wait on pseudo ops, because it may in turn wait on
+ * some other operation to start, which may in fact be the
+ * caller of this function. Since there is only one pseudo op
+ * at any given time, we will always find some real operation
+ * to wait on. */
+ if (!op->is_pseudo_op && op->is_active_write == active) {
+ qemu_co_queue_wait(&op->waiting_requests, NULL);
+ return;
+ }
+ }
+ abort();
+}
+
+static inline void mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
{
- assert(!s->waiting_for_io);
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
+ /* Only non-active operations use up in-flight slots */
+ mirror_wait_for_any_operation(s, false);
}
-/* Submit async read while handling COW.
- * Returns: The number of bytes copied after and including offset,
- * excluding any bytes copied prior to offset due to alignment.
- * This will be @bytes if no alignment is necessary, or
- * (new_end - offset) if tail is rounded up or down due to
- * alignment or buffer limit.
+/* Perform a mirror copy operation.
+ *
+ * *op->bytes_handled is set to the number of bytes copied after and
+ * including offset, excluding any bytes copied prior to offset due
+ * to alignment. This will be op->bytes if no alignment is necessary,
+ * or (new_end - op->offset) if the tail is rounded up or down due to
+ * alignment or buffer limit.
*/
-static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
- uint64_t bytes)
+static void coroutine_fn mirror_co_read(void *opaque)
{
- BlockBackend *source = s->common.blk;
+ MirrorOp *op = opaque;
+ MirrorBlockJob *s = op->s;
int nb_chunks;
uint64_t ret;
- MirrorOp *op;
uint64_t max_bytes;
max_bytes = s->granularity * s->max_iov;
/* We can only handle as much as buf_size at a time. */
- bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
- assert(bytes);
- assert(bytes < BDRV_REQUEST_MAX_BYTES);
- ret = bytes;
+ op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes));
+ assert(op->bytes);
+ assert(op->bytes < BDRV_REQUEST_MAX_BYTES);
+ *op->bytes_handled = op->bytes;
if (s->cow_bitmap) {
- ret += mirror_cow_align(s, &offset, &bytes);
+ *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes);
}
- assert(bytes <= s->buf_size);
+ /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */
+ assert(*op->bytes_handled <= UINT_MAX);
+ assert(op->bytes <= s->buf_size);
/* The offset is granularity-aligned because:
* 1) Caller passes in aligned values;
* 2) mirror_cow_align is used only when target cluster is larger. */
- assert(QEMU_IS_ALIGNED(offset, s->granularity));
+ assert(QEMU_IS_ALIGNED(op->offset, s->granularity));
/* The range is sector-aligned, since bdrv_getlength() rounds up. */
- assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
- nb_chunks = DIV_ROUND_UP(bytes, s->granularity);
+ assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE));
+ nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
while (s->buf_free_count < nb_chunks) {
- trace_mirror_yield_in_flight(s, offset, s->in_flight);
- mirror_wait_for_io(s);
+ trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
+ mirror_wait_for_free_in_flight_slot(s);
}
- /* Allocate a MirrorOp that is used as an AIO callback. */
- op = g_new(MirrorOp, 1);
- op->s = s;
- op->offset = offset;
- op->bytes = bytes;
-
/* Now make a QEMUIOVector taking enough granularity-sized chunks
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
- size_t remaining = bytes - op->qiov.size;
+ size_t remaining = op->bytes - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
@@ -286,44 +361,92 @@ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
/* Copy the dirty cluster. */
s->in_flight++;
- s->bytes_in_flight += bytes;
- trace_mirror_one_iteration(s, offset, bytes);
+ s->bytes_in_flight += op->bytes;
+ trace_mirror_one_iteration(s, op->offset, op->bytes);
- blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
- return ret;
+ ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
+ &op->qiov, 0);
+ mirror_read_complete(op, ret);
}
-static void mirror_do_zero_or_discard(MirrorBlockJob *s,
- int64_t offset,
- uint64_t bytes,
- bool is_discard)
+static void coroutine_fn mirror_co_zero(void *opaque)
{
- MirrorOp *op;
+ MirrorOp *op = opaque;
+ int ret;
- /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
- * so the freeing in mirror_iteration_done is nop. */
- op = g_new0(MirrorOp, 1);
- op->s = s;
- op->offset = offset;
- op->bytes = bytes;
+ op->s->in_flight++;
+ op->s->bytes_in_flight += op->bytes;
+ *op->bytes_handled = op->bytes;
- s->in_flight++;
- s->bytes_in_flight += bytes;
- if (is_discard) {
- blk_aio_pdiscard(s->target, offset,
- op->bytes, mirror_write_complete, op);
- } else {
- blk_aio_pwrite_zeroes(s->target, offset,
- op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
- mirror_write_complete, op);
+ ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
+ op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
+ mirror_write_complete(op, ret);
+}
+
+static void coroutine_fn mirror_co_discard(void *opaque)
+{
+ MirrorOp *op = opaque;
+ int ret;
+
+ op->s->in_flight++;
+ op->s->bytes_in_flight += op->bytes;
+ *op->bytes_handled = op->bytes;
+
+ ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes);
+ mirror_write_complete(op, ret);
+}
+
+static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
+ unsigned bytes, MirrorMethod mirror_method)
+{
+ MirrorOp *op;
+ Coroutine *co;
+ int64_t bytes_handled = -1;
+
+ op = g_new(MirrorOp, 1);
+ *op = (MirrorOp){
+ .s = s,
+ .offset = offset,
+ .bytes = bytes,
+ .bytes_handled = &bytes_handled,
+ };
+ qemu_co_queue_init(&op->waiting_requests);
+
+ switch (mirror_method) {
+ case MIRROR_METHOD_COPY:
+ co = qemu_coroutine_create(mirror_co_read, op);
+ break;
+ case MIRROR_METHOD_ZERO:
+ co = qemu_coroutine_create(mirror_co_zero, op);
+ break;
+ case MIRROR_METHOD_DISCARD:
+ co = qemu_coroutine_create(mirror_co_discard, op);
+ break;
+ default:
+ abort();
}
+
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
+ qemu_coroutine_enter(co);
+ /* At this point, ownership of op has been moved to the coroutine
+ * and the object may already be freed */
+
+ /* Assert that this value has been set */
+ assert(bytes_handled >= 0);
+
+ /* Same assertion as in mirror_co_read() (and for mirror_co_read()
+ * and mirror_co_discard(), bytes_handled == op->bytes, which
+ * is the @bytes parameter given to this function) */
+ assert(bytes_handled <= UINT_MAX);
+ return bytes_handled;
}
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
- BlockDriverState *source = s->source;
- int64_t offset, first_chunk;
- uint64_t delay_ns = 0;
+ BlockDriverState *source = s->mirror_top_bs->backing->bs;
+ MirrorOp *pseudo_op;
+ int64_t offset;
+ uint64_t delay_ns = 0, ret = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -339,11 +462,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
- first_chunk = offset / s->granularity;
- while (test_bit(first_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, offset, s->in_flight);
- mirror_wait_for_io(s);
- }
+ mirror_wait_on_conflicts(NULL, s, offset, 1);
job_pause_point(&s->common.job);
@@ -380,16 +499,27 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * s->granularity);
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
+ /* Before claiming an area in the in-flight bitmap, we have to
+ * create a MirrorOp for it so that conflicting requests can wait
+ * for it. mirror_perform() will create the real MirrorOps later,
+ * for now we just create a pseudo operation that will wake up all
+ * conflicting requests once all real operations have been
+ * launched. */
+ pseudo_op = g_new(MirrorOp, 1);
+ *pseudo_op = (MirrorOp){
+ .offset = offset,
+ .bytes = nb_chunks * s->granularity,
+ .is_pseudo_op = true,
+ };
+ qemu_co_queue_init(&pseudo_op->waiting_requests);
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next);
+
bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
while (nb_chunks > 0 && offset < s->bdev_length) {
int ret;
int64_t io_bytes;
int64_t io_bytes_acct;
- enum MirrorMethod {
- MIRROR_METHOD_COPY,
- MIRROR_METHOD_ZERO,
- MIRROR_METHOD_DISCARD
- } mirror_method = MIRROR_METHOD_COPY;
+ MirrorMethod mirror_method = MIRROR_METHOD_COPY;
assert(!(offset % s->granularity));
ret = bdrv_block_status_above(source, NULL, offset,
@@ -419,37 +549,34 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
while (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield_in_flight(s, offset, s->in_flight);
- mirror_wait_for_io(s);
+ mirror_wait_for_free_in_flight_slot(s);
}
if (s->ret < 0) {
- return 0;
+ ret = 0;
+ goto fail;
}
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
- switch (mirror_method) {
- case MIRROR_METHOD_COPY:
- io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
- break;
- case MIRROR_METHOD_ZERO:
- case MIRROR_METHOD_DISCARD:
- mirror_do_zero_or_discard(s, offset, io_bytes,
- mirror_method == MIRROR_METHOD_DISCARD);
- if (write_zeroes_ok) {
- io_bytes_acct = 0;
- } else {
- io_bytes_acct = io_bytes;
- }
- break;
- default:
- abort();
+ io_bytes = mirror_perform(s, offset, io_bytes, mirror_method);
+ if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) {
+ io_bytes_acct = 0;
+ } else {
+ io_bytes_acct = io_bytes;
}
assert(io_bytes);
offset += io_bytes;
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
}
- return delay_ns;
+
+ ret = delay_ns;
+fail:
+ QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
+ qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
+ g_free(pseudo_op);
+
+ return ret;
}
static void mirror_free_init(MirrorBlockJob *s)
@@ -476,7 +603,7 @@ static void mirror_free_init(MirrorBlockJob *s)
static void mirror_wait_for_all_io(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
- mirror_wait_for_io(s);
+ mirror_wait_for_free_in_flight_slot(s);
}
}
@@ -489,8 +616,9 @@ static void mirror_exit(Job *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
BlockJob *bjob = &s->common;
MirrorExitData *data = opaque;
+ MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque;
AioContext *replace_aio_context = NULL;
- BlockDriverState *src = s->source;
+ BlockDriverState *src = s->mirror_top_bs->backing->bs;
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
@@ -581,6 +709,7 @@ static void mirror_exit(Job *job, void *opaque)
blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+ bs_opaque->job = NULL;
job_completed(job, data->ret, NULL);
g_free(data);
@@ -605,7 +734,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
int64_t offset;
BlockDriverState *base = s->base;
- BlockDriverState *bs = s->source;
+ BlockDriverState *bs = s->mirror_top_bs->backing->bs;
BlockDriverState *target_bs = blk_bs(s->target);
int ret;
int64_t count;
@@ -631,11 +760,11 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
if (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
s->in_flight);
- mirror_wait_for_io(s);
+ mirror_wait_for_free_in_flight_slot(s);
continue;
}
- mirror_do_zero_or_discard(s, offset, bytes, false);
+ mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO);
offset += bytes;
}
@@ -687,7 +816,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
- BlockDriverState *bs = s->source;
+ BlockDriverState *bs = s->mirror_top_bs->backing->bs;
BlockDriverState *target_bs = blk_bs(s->target);
bool need_drain = true;
int64_t length;
@@ -730,6 +859,7 @@ static void coroutine_fn mirror_run(void *opaque)
/* Transition to the READY state and wait for complete. */
job_transition_to_ready(&s->common.job);
s->synced = true;
+ s->actively_synced = true;
while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
job_yield(&s->common.job);
}
@@ -781,6 +911,12 @@ static void coroutine_fn mirror_run(void *opaque)
int64_t cnt, delta;
bool should_complete;
+ /* Do not start passive operations while there are active
+ * writes in progress */
+ while (s->in_active_write_counter) {
+ mirror_wait_for_any_operation(s, true);
+ }
+
if (s->ret < 0) {
ret = s->ret;
goto immediate_exit;
@@ -804,7 +940,7 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
- mirror_wait_for_io(s);
+ mirror_wait_for_free_in_flight_slot(s);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);
@@ -826,6 +962,9 @@ static void coroutine_fn mirror_run(void *opaque)
*/
job_transition_to_ready(&s->common.job);
s->synced = true;
+ if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
+ s->actively_synced = true;
+ }
}
should_complete = s->should_complete ||
@@ -1024,16 +1163,232 @@ static const BlockJobDriver commit_active_job_driver = {
.drain = mirror_drain,
};
+static void do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BdrvDirtyBitmapIter *iter;
+ QEMUIOVector target_qiov;
+ uint64_t dirty_offset;
+ int dirty_bytes;
+
+ if (qiov) {
+ qemu_iovec_init(&target_qiov, qiov->niov);
+ }
+
+ iter = bdrv_dirty_iter_new(job->dirty_bitmap);
+ bdrv_set_dirty_iter(iter, offset);
+
+ while (true) {
+ bool valid_area;
+ int ret;
+
+ bdrv_dirty_bitmap_lock(job->dirty_bitmap);
+ valid_area = bdrv_dirty_iter_next_area(iter, offset + bytes,
+ &dirty_offset, &dirty_bytes);
+ if (!valid_area) {
+ bdrv_dirty_bitmap_unlock(job->dirty_bitmap);
+ break;
+ }
+
+ bdrv_reset_dirty_bitmap_locked(job->dirty_bitmap,
+ dirty_offset, dirty_bytes);
+ bdrv_dirty_bitmap_unlock(job->dirty_bitmap);
+
+ job_progress_increase_remaining(&job->common.job, dirty_bytes);
+
+ assert(dirty_offset - offset <= SIZE_MAX);
+ if (qiov) {
+ qemu_iovec_reset(&target_qiov);
+ qemu_iovec_concat(&target_qiov, qiov,
+ dirty_offset - offset, dirty_bytes);
+ }
+
+ switch (method) {
+ case MIRROR_METHOD_COPY:
+ ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes,
+ qiov ? &target_qiov : NULL, flags);
+ break;
+
+ case MIRROR_METHOD_ZERO:
+ assert(!qiov);
+ ret = blk_co_pwrite_zeroes(job->target, dirty_offset, dirty_bytes,
+ flags);
+ break;
+
+ case MIRROR_METHOD_DISCARD:
+ assert(!qiov);
+ ret = blk_co_pdiscard(job->target, dirty_offset, dirty_bytes);
+ break;
+
+ default:
+ abort();
+ }
+
+ if (ret >= 0) {
+ job_progress_update(&job->common.job, dirty_bytes);
+ } else {
+ BlockErrorAction action;
+
+ bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_offset, dirty_bytes);
+ job->actively_synced = false;
+
+ action = mirror_error_action(job, false, -ret);
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
+ if (!job->ret) {
+ job->ret = ret;
+ }
+ break;
+ }
+ }
+ }
+
+ bdrv_dirty_iter_free(iter);
+ if (qiov) {
+ qemu_iovec_destroy(&target_qiov);
+ }
+}
+
+static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
+ uint64_t offset,
+ uint64_t bytes)
+{
+ MirrorOp *op;
+ uint64_t start_chunk = offset / s->granularity;
+ uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
+
+ op = g_new(MirrorOp, 1);
+ *op = (MirrorOp){
+ .s = s,
+ .offset = offset,
+ .bytes = bytes,
+ .is_active_write = true,
+ };
+ qemu_co_queue_init(&op->waiting_requests);
+ QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
+
+ s->in_active_write_counter++;
+
+ mirror_wait_on_conflicts(op, s, offset, bytes);
+
+ bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
+
+ return op;
+}
+
+static void coroutine_fn active_write_settle(MirrorOp *op)
+{
+ uint64_t start_chunk = op->offset / op->s->granularity;
+ uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
+ op->s->granularity);
+
+ if (!--op->s->in_active_write_counter && op->s->actively_synced) {
+ BdrvChild *source = op->s->mirror_top_bs->backing;
+
+ if (QLIST_FIRST(&source->bs->parents) == source &&
+ QLIST_NEXT(source, next_parent) == NULL)
+ {
+ /* Assert that we are back in sync once all active write
+ * operations are settled.
+ * Note that we can only assert this if the mirror node
+ * is the source node's only parent. */
+ assert(!bdrv_get_dirty_count(op->s->dirty_bitmap));
+ }
+ }
+ bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
+ QTAILQ_REMOVE(&op->s->ops_in_flight, op, next);
+ qemu_co_queue_restart_all(&op->waiting_requests);
+ g_free(op);
+}
+
static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
+static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
+ MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ MirrorOp *op = NULL;
+ MirrorBDSOpaque *s = bs->opaque;
+ int ret = 0;
+ bool copy_to_target;
+
+ copy_to_target = s->job->ret >= 0 &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+
+ if (copy_to_target) {
+ op = active_write_prepare(s->job, offset, bytes);
+ }
+
+ switch (method) {
+ case MIRROR_METHOD_COPY:
+ ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+ break;
+
+ case MIRROR_METHOD_ZERO:
+ ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+ break;
+
+ case MIRROR_METHOD_DISCARD:
+ ret = bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
+ break;
+
+ default:
+ abort();
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (copy_to_target) {
+ do_sync_target_write(s->job, method, offset, bytes, qiov, flags);
+ }
+
+out:
+ if (copy_to_target) {
+ active_write_settle(op);
+ }
+ return ret;
+}
+
static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+ MirrorBDSOpaque *s = bs->opaque;
+ QEMUIOVector bounce_qiov;
+ void *bounce_buf;
+ int ret = 0;
+ bool copy_to_target;
+
+ copy_to_target = s->job->ret >= 0 &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+
+ if (copy_to_target) {
+ /* The guest might concurrently modify the data to write; but
+ * the data on source and destination must match, so we have
+ * to use a bounce buffer if we are going to write to the
+ * target now. */
+ bounce_buf = qemu_blockalign(bs, bytes);
+ iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes);
+
+ qemu_iovec_init(&bounce_qiov, 1);
+ qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
+ qiov = &bounce_qiov;
+ }
+
+ ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
+ flags);
+
+ if (copy_to_target) {
+ qemu_iovec_destroy(&bounce_qiov);
+ qemu_vfree(bounce_buf);
+ }
+
+ return ret;
}
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
@@ -1048,13 +1403,15 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
- return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+ return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
+ flags);
}
static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
int64_t offset, int bytes)
{
- return bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
+ return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
+ NULL, 0);
}
static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
@@ -1116,10 +1473,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
const BlockJobDriver *driver,
bool is_none_mode, BlockDriverState *base,
bool auto_complete, const char *filter_node_name,
- bool is_mirror,
+ bool is_mirror, MirrorCopyMode copy_mode,
Error **errp)
{
MirrorBlockJob *s;
+ MirrorBDSOpaque *bs_opaque;
BlockDriverState *mirror_top_bs;
bool target_graph_mod;
bool target_is_backing;
@@ -1155,6 +1513,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
mirror_top_bs->total_sectors = bs->total_sectors;
mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+ bs_opaque = g_new0(MirrorBDSOpaque, 1);
+ mirror_top_bs->opaque = bs_opaque;
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
@@ -1179,10 +1539,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
if (!s) {
goto fail;
}
+ bs_opaque->job = s;
+
/* The block job now has a reference to this node */
bdrv_unref(mirror_top_bs);
- s->source = bs;
s->mirror_top_bs = mirror_top_bs;
/* No resize for the target either; while the mirror is still running, a
@@ -1220,6 +1581,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
s->on_target_error = on_target_error;
s->is_none_mode = is_none_mode;
s->backing_mode = backing_mode;
+ s->copy_mode = copy_mode;
s->base = base;
s->granularity = granularity;
s->buf_size = ROUND_UP(buf_size, granularity);
@@ -1255,6 +1617,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
}
}
+ QTAILQ_INIT(&s->ops_in_flight);
+
trace_mirror_start(bs, s, opaque);
job_start(&s->common.job);
return;
@@ -1267,6 +1631,7 @@ fail:
g_free(s->replaces);
blk_unref(s->target);
+ bs_opaque->job = NULL;
job_early_fail(&s->common.job);
}
@@ -1283,7 +1648,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, const char *filter_node_name, Error **errp)
+ bool unmap, const char *filter_node_name,
+ MirrorCopyMode copy_mode, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
@@ -1298,7 +1664,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL,
&mirror_job_driver, is_none_mode, base, false,
- filter_node_name, true, errp);
+ filter_node_name, true, copy_mode, errp);
}
void commit_active_start(const char *job_id, BlockDriverState *bs,
@@ -1321,7 +1687,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, true, cb, opaque,
&commit_active_job_driver, false, base, auto_complete,
- filter_node_name, false, &local_err);
+ filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
+ &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;
diff --git a/blockdev.c b/blockdev.c
index 7f65cd7..58d7570 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3586,6 +3586,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_unmap, bool unmap,
bool has_filter_node_name,
const char *filter_node_name,
+ bool has_copy_mode, MirrorCopyMode copy_mode,
Error **errp)
{
@@ -3610,6 +3611,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (!has_filter_node_name) {
filter_node_name = NULL;
}
+ if (!has_copy_mode) {
+ copy_mode = MIRROR_COPY_MODE_BACKGROUND;
+ }
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3640,7 +3644,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
has_replaces ? replaces : NULL,
speed, granularity, buf_size, sync, backing_mode,
on_source_error, on_target_error, unmap, filter_node_name,
- errp);
+ copy_mode, errp);
}
void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3786,6 +3790,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
arg->has_on_target_error, arg->on_target_error,
arg->has_unmap, arg->unmap,
false, NULL,
+ arg->has_copy_mode, arg->copy_mode,
&local_err);
bdrv_unref(target_bs);
error_propagate(errp, local_err);
@@ -3806,6 +3811,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
BlockdevOnError on_target_error,
bool has_filter_node_name,
const char *filter_node_name,
+ bool has_copy_mode, MirrorCopyMode copy_mode,
Error **errp)
{
BlockDriverState *bs;
@@ -3838,6 +3844,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
has_on_target_error, on_target_error,
true, true,
has_filter_node_name, filter_node_name,
+ has_copy_mode, copy_mode,
&local_err);
error_propagate(errp, local_err);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7cd7eed..74646ed 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1031,6 +1031,7 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
* @filter_node_name: The node name that should be assigned to the filter
* driver that the mirror job inserts into the graph above @bs. NULL means that
* a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
* @errp: Error object.
*
* Start a mirroring operation on @bs. Clusters that are allocated
@@ -1044,7 +1045,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
- bool unmap, const char *filter_node_name, Error **errp);
+ bool unmap, const char *filter_node_name,
+ MirrorCopyMode copy_mode, Error **errp);
/*
* backup_job_create:
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 02e0cba..288dc6a 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -82,6 +82,8 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
int64_t offset, int64_t bytes);
int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+bool bdrv_dirty_iter_next_area(BdrvDirtyBitmapIter *iter, uint64_t max_offset,
+ uint64_t *offset, int *bytes);
void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 6b6490e..ddca52c 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -324,11 +324,14 @@ void hbitmap_free_meta(HBitmap *hb);
/**
* hbitmap_iter_next:
* @hbi: HBitmapIter to operate on.
+ * @advance: If true, advance the iterator. Otherwise, the next call
+ * of this function will return the same result (if that
+ * position is still dirty).
*
* Return the next bit that is set in @hbi's associated HBitmap,
* or -1 if all remaining bits are zero.
*/
-int64_t hbitmap_iter_next(HBitmapIter *hbi);
+int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance);
/**
* hbitmap_iter_next_word:
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 1d82053..18c9223 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -335,6 +335,21 @@ void job_progress_update(Job *job, uint64_t done);
*/
void job_progress_set_remaining(Job *job, uint64_t remaining);
+/**
+ * @job: The job whose expected progress end value is updated
+ * @delta: Value which is to be added to the current expected end
+ * value
+ *
+ * Increases the expected end value of the progress counter of a job.
+ * This is useful for parenthesis operations: If a job has to
+ * conditionally perform a high-priority operation as part of its
+ * progress, it calls this function with the expected operation's
+ * length before, and job_progress_update() afterwards.
+ * (So the operation acts as a parenthesis in regards to the main job
+ * operation running in background.)
+ */
+void job_progress_increase_remaining(Job *job, uint64_t delta);
+
/** To be called when a cancelled job is finalised. */
void job_event_cancelled(Job *job);
diff --git a/job.c b/job.c
index 84e1402..fa671b4 100644
--- a/job.c
+++ b/job.c
@@ -385,6 +385,11 @@ void job_progress_set_remaining(Job *job, uint64_t remaining)
job->progress_total = job->progress_current + remaining;
}
+void job_progress_increase_remaining(Job *job, uint64_t delta)
+{
+ job->progress_total += delta;
+}
+
void job_event_cancelled(Job *job)
{
notifier_list_notify(&job->on_finalize_cancelled, job);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ab629d1..cc3ede0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1051,6 +1051,24 @@
'data': ['top', 'full', 'none', 'incremental'] }
##
+# @MirrorCopyMode:
+#
+# An enumeration whose values tell the mirror block job when to
+# trigger writes to the target.
+#
+# @background: copy data in background only.
+#
+# @write-blocking: when data is written to the source, write it
+# (synchronously) to the target as well. In
+# addition, data is copied in background just like in
+# @background mode.
+#
+# Since: 3.0
+##
+{ 'enum': 'MirrorCopyMode',
+ 'data': ['background', 'write-blocking'] }
+
+##
# @BlockJobInfo:
#
# Information about a long-running block device operation.
@@ -1692,6 +1710,9 @@
# written. Both will result in identical contents.
# Default is true. (Since 2.4)
#
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+# (Since: 3.0)
+#
# Since: 1.3
##
{ 'struct': 'DriveMirror',
@@ -1701,7 +1722,7 @@
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
- '*unmap': 'bool' } }
+ '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
##
# @BlockDirtyBitmap:
@@ -1964,6 +1985,9 @@
# above @device. If this option is not given, a node name is
# autogenerated. (Since: 2.9)
#
+# @copy-mode: when to copy data to the destination; defaults to 'background'
+# (Since: 3.0)
+#
# Returns: nothing on success.
#
# Since: 2.6
@@ -1984,7 +2008,8 @@
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
- '*filter-node-name': 'str' } }
+ '*filter-node-name': 'str',
+ '*copy-mode': 'MirrorCopyMode' } }
##
# @block_set_io_throttle:
diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
new file mode 100755
index 0000000..fe53b9f
--- /dev/null
+++ b/tests/qemu-iotests/151
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# Tests for active mirroring
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
+
+class TestActiveMirror(iotests.QMPTestCase):
+ image_len = 128 * 1024 * 1024 # MB
+ potential_writes_in_flight = True
+
+ def setUp(self):
+ qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
+ qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
+
+ blk_source = {'id': 'source',
+ 'if': 'none',
+ 'node-name': 'source-node',
+ 'driver': iotests.imgfmt,
+ 'file': {'driver': 'file',
+ 'filename': source_img}}
+
+ blk_target = {'node-name': 'target-node',
+ 'driver': iotests.imgfmt,
+ 'file': {'driver': 'file',
+ 'filename': target_img}}
+
+ self.vm = iotests.VM()
+ self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source))
+ self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target))
+ self.vm.add_device('virtio-blk,drive=source')
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+
+ if not self.potential_writes_in_flight:
+ self.assertTrue(iotests.compare_images(source_img, target_img),
+ 'mirror target does not match source')
+
+ os.remove(source_img)
+ os.remove(target_img)
+
+ def doActiveIO(self, sync_source_and_target):
+ # Fill the source image
+ self.vm.hmp_qemu_io('source',
+ 'write -P 1 0 %i' % self.image_len);
+
+ # Start some background requests
+ for offset in range(1 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -P 2 %i 1M' % offset)
+ for offset in range(2 * self.image_len / 8, 3 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+ # Start the block job
+ result = self.vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ filter_node_name='mirror-node',
+ device='source-node',
+ target='target-node',
+ sync='full',
+ copy_mode='write-blocking')
+ self.assert_qmp(result, 'return', {})
+
+ # Start some more requests
+ for offset in range(3 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
+ for offset in range(4 * self.image_len / 8, 5 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+ # Wait for the READY event
+ self.wait_ready(drive='mirror')
+
+ # Now start some final requests; all of these (which land on
+ # the source) should be settled using the active mechanism.
+ # The mirror code itself asserts that the source BDS's dirty
+ # bitmap will stay clean between READY and COMPLETED.
+ for offset in range(5 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -P 3 %i 1M' % offset)
+ for offset in range(6 * self.image_len / 8, 7 * self.image_len / 8, 1024 * 1024):
+ self.vm.hmp_qemu_io('source', 'aio_write -z %i 1M' % offset)
+
+ if sync_source_and_target:
+ # If source and target should be in sync after the mirror,
+ # we have to flush before completion
+ self.vm.hmp_qemu_io('source', 'aio_flush')
+ self.potential_writes_in_flight = False
+
+ self.complete_and_wait(drive='mirror', wait_ready=False)
+
+ def testActiveIO(self):
+ self.doActiveIO(False)
+
+ def testActiveIOFlushed(self):
+ self.doActiveIO(True)
+
+
+
+if __name__ == '__main__':
+ iotests.main(supported_fmts=['qcow2', 'raw'])
diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out
new file mode 100644
index 0000000..fbc63e6
--- /dev/null
+++ b/tests/qemu-iotests/151.out
@@ -0,0 +1,5 @@
+..
+----------------------------------------------------------------------
+Ran 2 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 937a3d0..eea7581 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -157,6 +157,7 @@
148 rw auto quick
149 rw auto sudo
150 rw auto quick
+151 rw auto
152 rw auto quick
153 rw auto quick
154 rw auto backing quick
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index f29631f..5e67ac1 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -30,6 +30,18 @@ typedef struct TestHBitmapData {
} TestHBitmapData;
+static int64_t check_hbitmap_iter_next(HBitmapIter *hbi)
+{
+ int next0, next1;
+
+ next0 = hbitmap_iter_next(hbi, false);
+ next1 = hbitmap_iter_next(hbi, true);
+
+ g_assert_cmpint(next0, ==, next1);
+
+ return next0;
+}
+
/* Check that the HBitmap and the shadow bitmap contain the same data,
* ignoring the same "first" bits.
*/
@@ -46,7 +58,7 @@ static void hbitmap_test_check(TestHBitmapData *data,
i = first;
for (;;) {
- next = hbitmap_iter_next(&hbi);
+ next = check_hbitmap_iter_next(&hbi);
if (next < 0) {
next = data->size;
}
@@ -435,25 +447,25 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
/* Note that hbitmap_test_check has to be invoked manually in this test. */
hbitmap_test_init(data, 131072 << 7, 7);
hbitmap_iter_init(&hbi, data->hb, 0);
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
hbitmap_iter_init(&hbi, data->hb, 0);
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+ g_assert_cmpint(hbitmap_iter_next(&hbi, true), <, 0);
hbitmap_test_set(data, (131072 << 7) - 8, 8);
hbitmap_iter_init(&hbi, data->hb, 0);
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
- g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), ==, 131071 << 7);
+ g_assert_cmpint(check_hbitmap_iter_next(&hbi), <, 0);
}
static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
@@ -893,7 +905,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData *data,
for (i = 0; i < num_positions; i++) {
hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
hbitmap_iter_init(&iter, data->hb, 0);
- next = hbitmap_iter_next(&iter);
+ next = check_hbitmap_iter_next(&iter);
if (i == num_positions - 1) {
g_assert_cmpint(next, ==, -1);
} else {
@@ -919,10 +931,10 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
hbitmap_iter_init(&hbi, data->hb, BITS_PER_LONG - 1);
- hbitmap_iter_next(&hbi);
+ check_hbitmap_iter_next(&hbi);
hbitmap_reset_all(data->hb);
- hbitmap_iter_next(&hbi);
+ check_hbitmap_iter_next(&hbi);
}
static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 58a2c93..bcd3040 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -141,7 +141,7 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
return cur;
}
-int64_t hbitmap_iter_next(HBitmapIter *hbi)
+int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance)
{
unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
@@ -154,8 +154,12 @@ int64_t hbitmap_iter_next(HBitmapIter *hbi)
}
}
- /* The next call will resume work from the next bit. */
- hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+ if (advance) {
+ /* The next call will resume work from the next bit. */
+ hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+ } else {
+ hbi->cur[HBITMAP_LEVELS - 1] = cur;
+ }
item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
return item << hbi->granularity;