aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/mirror.c102
-rw-r--r--trace-events4
2 files changed, 94 insertions, 12 deletions
diff --git a/block/mirror.c b/block/mirror.c
index 896972c..4696dc8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -17,7 +17,15 @@
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
-#define SLICE_TIME 100000000ULL /* ns */
+#define SLICE_TIME 100000000ULL /* ns */
+#define MAX_IN_FLIGHT 16
+
+/* The mirroring buffer is a list of granularity-sized chunks.
+ * Free chunks are organized in a list.
+ */
+typedef struct MirrorBuffer {
+ QSIMPLEQ_ENTRY(MirrorBuffer) next;
+} MirrorBuffer;
typedef struct MirrorBlockJob {
BlockJob common;
@@ -33,7 +41,10 @@ typedef struct MirrorBlockJob {
unsigned long *cow_bitmap;
HBitmapIter hbi;
uint8_t *buf;
+ QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
+ int buf_free_count;
+ unsigned long *in_flight_bitmap;
int in_flight;
int ret;
} MirrorBlockJob;
@@ -41,7 +52,6 @@ typedef struct MirrorBlockJob {
typedef struct MirrorOp {
MirrorBlockJob *s;
QEMUIOVector qiov;
- struct iovec iov;
int64_t sector_num;
int nb_sectors;
} MirrorOp;
@@ -62,15 +72,24 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
static void mirror_iteration_done(MirrorOp *op, int ret)
{
MirrorBlockJob *s = op->s;
+ struct iovec *iov;
int64_t chunk_num;
- int nb_chunks, sectors_per_chunk;
+ int i, nb_chunks, sectors_per_chunk;
trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
s->in_flight--;
+ iov = op->qiov.iov;
+ for (i = 0; i < op->qiov.niov; i++) {
+ MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
+ QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
+ s->buf_free_count++;
+ }
+
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = op->sector_num / sectors_per_chunk;
nb_chunks = op->nb_sectors / sectors_per_chunk;
+ bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
if (s->cow_bitmap && ret >= 0) {
bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
}
@@ -120,8 +139,8 @@ static void mirror_read_complete(void *opaque, int ret)
static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = s->common.bs;
- int nb_sectors, sectors_per_chunk;
- int64_t end, sector_num, chunk_num;
+ int nb_sectors, sectors_per_chunk, nb_chunks;
+ int64_t end, sector_num, chunk_num, next_sector, hbitmap_next_sector;
MirrorOp *op;
s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -132,6 +151,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(s->sector_num >= 0);
}
+ hbitmap_next_sector = s->sector_num;
+
/* If we have no backing file yet in the destination, and the cluster size
* is very large, we need to do COW ourselves. The first time a cluster is
* copied, copy it entirely.
@@ -147,19 +168,56 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
bdrv_round_to_clusters(s->target,
sector_num, sectors_per_chunk,
&sector_num, &nb_sectors);
+
+ /* The rounding may make us copy sectors before the
+ * first dirty one.
+ */
+ chunk_num = sector_num / sectors_per_chunk;
+ }
+
+ /* Wait for I/O to this cluster (from a previous iteration) to be done. */
+ while (test_bit(chunk_num, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ qemu_coroutine_yield();
}
end = s->common.len >> BDRV_SECTOR_BITS;
nb_sectors = MIN(nb_sectors, end - sector_num);
+ nb_chunks = (nb_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+ while (s->buf_free_count < nb_chunks) {
+ trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
+ qemu_coroutine_yield();
+ }
+
+ /* We have enough free space to copy these sectors. */
+ bitmap_set(s->in_flight_bitmap, chunk_num, nb_chunks);
/* Allocate a MirrorOp that is used as an AIO callback. */
op = g_slice_new(MirrorOp);
op->s = s;
- op->iov.iov_base = s->buf;
- op->iov.iov_len = nb_sectors * 512;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
- qemu_iovec_init_external(&op->qiov, &op->iov, 1);
+
+ /* Now make a QEMUIOVector taking enough granularity-sized chunks
+ * from s->buf_free.
+ */
+ qemu_iovec_init(&op->qiov, nb_chunks);
+ next_sector = sector_num;
+ while (nb_chunks-- > 0) {
+ MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+ QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
+ s->buf_free_count--;
+ qemu_iovec_add(&op->qiov, buf, s->granularity);
+
+ /* Advance the HBitmapIter in parallel, so that we do not examine
+ * the same sector twice.
+ */
+ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+ hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
+ }
+
+ next_sector += sectors_per_chunk;
+ }
bdrv_reset_dirty(source, sector_num, nb_sectors);
@@ -170,6 +228,23 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
mirror_read_complete, op);
}
+static void mirror_free_init(MirrorBlockJob *s)
+{
+ int granularity = s->granularity;
+ size_t buf_size = s->buf_size;
+ uint8_t *buf = s->buf;
+
+ assert(s->buf_free_count == 0);
+ QSIMPLEQ_INIT(&s->buf_free);
+ while (buf_size != 0) {
+ MirrorBuffer *cur = (MirrorBuffer *)buf;
+ QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
+ s->buf_free_count++;
+ buf_size -= granularity;
+ buf += granularity;
+ }
+}
+
static void mirror_drain(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
@@ -198,6 +273,9 @@ static void coroutine_fn mirror_run(void *opaque)
return;
}
+ length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
+ s->in_flight_bitmap = bitmap_new(length);
+
/* If we have no backing file yet in the destination, we cannot let
* the destination do COW. Instead, we copy sectors around the
* dirty data if needed. We need a bitmap to do that.
@@ -208,7 +286,6 @@ static void coroutine_fn mirror_run(void *opaque)
bdrv_get_info(s->target, &bdi);
if (s->granularity < bdi.cluster_size) {
s->buf_size = MAX(s->buf_size, bdi.cluster_size);
- length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
s->cow_bitmap = bitmap_new(length);
}
}
@@ -216,6 +293,7 @@ static void coroutine_fn mirror_run(void *opaque)
end = s->common.len >> BDRV_SECTOR_BITS;
s->buf = qemu_blockalign(bs, s->buf_size);
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ mirror_free_init(s);
if (s->mode != MIRROR_SYNC_MODE_NONE) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -261,8 +339,9 @@ static void coroutine_fn mirror_run(void *opaque)
*/
if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- if (s->in_flight > 0) {
- trace_mirror_yield(s, s->in_flight, cnt);
+ if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+ (cnt == 0 && s->in_flight > 0)) {
+ trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
qemu_coroutine_yield();
continue;
} else if (cnt != 0) {
@@ -354,6 +433,7 @@ immediate_exit:
assert(s->in_flight == 0);
qemu_vfree(s->buf);
g_free(s->cow_bitmap);
+ g_free(s->in_flight_bitmap);
bdrv_set_dirty_tracking(bs, 0);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
diff --git a/trace-events b/trace-events
index 43e6b73..b4a23cd 100644
--- a/trace-events
+++ b/trace-events
@@ -86,7 +86,9 @@ mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
-mirror_yield(void *s, int64_t cnt, int in_flight) "s %p dirty count %"PRId64" in_flight %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"