From 765d9df9626f45a821f221f7a46ef524354b3600 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 28 Sep 2017 14:13:25 -0500 Subject: block: Typo fix in copy_on_readv() Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/io.c b/block/io.c index 4378ae4..d633b0f 100644 --- a/block/io.c +++ b/block/io.c @@ -954,7 +954,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions * would be obtained anyway, but internally by the copy-on-read code. As - * long as it is implemented here rather than in a separat filter driver, + * long as it is implemented here rather than in a separate filter driver, * the copy-on-read code doesn't have its own BdrvChild, however, for which * it could request permissions. Therefore we have to bypass the permission * system for the moment. */ -- cgit v1.1 From ecbfa2817d61fd102d291487ec1e61571b3b6581 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:08 -0500 Subject: hbitmap: Rename serialization_granularity to serialization_align The only client of hbitmap_serialization_granularity() is dirty-bitmap's bdrv_dirty_bitmap_serialization_align(). Keeping the two names consistent is worthwhile, and the shorter name is more representative of what the function returns (the required alignment to be used for start/count of other serialization functions, where violating the alignment causes assertion failures). Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 30462d4..0490ca3 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -617,7 +617,7 @@ uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_granularity(bitmap->bitmap); + return hbitmap_serialization_align(bitmap->bitmap); } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, -- cgit v1.1 From 113754f3a83dac7989ca94c4408a494560df1d73 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:09 -0500 Subject: qcow2: Ensure bitmap serialization is aligned When subdividing a bitmap serialization, the code in hbitmap.c enforces that start/count parameters are aligned (except that count can end early at end-of-bitmap). We exposed this required alignment through bdrv_dirty_bitmap_serialization_align(), but forgot to actually check that we comply with it. Fortunately, qcow2 is never dividing bitmap serialization smaller than one cluster (which is a minimum of 512 bytes); so we are always compliant with the serialization alignment (which insists that we partition at least 64 bits per chunk) because we are doing at least 4k bits per chunk. Still, it's safer to add an assertion (for the unlikely case that we'd ever support a cluster smaller than 512 bytes, or if the hbitmap implementation changes what it considers to be aligned), rather than leaving bdrv_dirty_bitmap_serialization_align() without a caller. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 14f41d0..1fc375b 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -274,10 +274,13 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, const BdrvDirtyBitmap *bitmap) { - uint32_t sector_granularity = + uint64_t sector_granularity = bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; + uint64_t sbc = sector_granularity * (s->cluster_size << 3); - return (uint64_t)sector_granularity * (s->cluster_size << 3); + assert(QEMU_IS_ALIGNED(sbc, + bdrv_dirty_bitmap_serialization_align(bitmap))); + return sbc; } /* load_bitmap_data -- cgit v1.1 From dfe55c35772b7ebc6f282a94d4048c9f85b7b26b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:10 -0500 Subject: dirty-bitmap: Drop unused functions We had several functions that no one is currently using, and which use sector-based interfaces. I'm trying to convert towards byte-based interfaces, so it's easier to just drop the unused functions: bdrv_dirty_bitmap_get_meta bdrv_dirty_bitmap_get_meta_locked bdrv_dirty_bitmap_reset_meta bdrv_dirty_bitmap_meta_granularity Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 0490ca3..42a55e4 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -173,45 +173,6 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) qemu_mutex_unlock(bitmap->mutex); } -int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - uint64_t i; - int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); - - /* To optimize: we can make hbitmap to internally check the range in a - * coarse level, or at least do it word by word. */ - for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) { - if (hbitmap_get(bitmap->meta, i)) { - return true; - } - } - return false; -} - -int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - bool dirty; - - qemu_mutex_lock(bitmap->mutex); - dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); - - return dirty; -} - -void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - qemu_mutex_lock(bitmap->mutex); - hbitmap_reset(bitmap->meta, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); -} - int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { return bitmap->size; @@ -511,11 +472,6 @@ uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap) -{ - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta); -} - BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, uint64_t first_sector) { -- cgit v1.1 From 1b6cc579ded4f9d7923d9d59147512edd623f4c0 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:11 -0500 Subject: dirty-bitmap: Avoid size query failure during truncate We've previously fixed several places where we failed to account for possible errors from bdrv_nb_sectors(). Fix another one by making bdrv_dirty_bitmap_truncate() take the new size from the caller instead of querying itself; then adjust the sole caller bdrv_truncate() to pass the size just determined by a successful resize, or to reuse the size given to the original truncate operation when refresh_total_sectors() was not able to confirm the actual size (the two sizes can potentially differ according to rounding constraints), thus avoiding sizing the bitmaps to -1. This also fixes a bug where not all failure paths in bdrv_truncate() would set errp. Note that bdrv_truncate() is still a bit awkward. We may want to revisit it later and clean up things to better guarantee that a resize attempt either fails cleanly up front, or cannot fail after guest-visible changes have been made (if temporary changes are made, then they need to be cleanly rolled back). But that is a task for another day; for now, the goal is the bare minimum fix to ensure that just bdrv_dirty_bitmap_truncate() cannot fail. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 42a55e4..ee164fb 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -1,7 +1,7 @@ /* * Block Dirty Bitmap * - * Copyright (c) 2016 Red Hat. Inc + * Copyright (c) 2016-2017 Red Hat. Inc * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -302,10 +302,10 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, * Truncates _all_ bitmaps attached to a BDS. * Called with BQL taken. */ -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) { BdrvDirtyBitmap *bitmap; - uint64_t size = bdrv_nb_sectors(bs); + int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE); bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { -- cgit v1.1 From ebfcd2e75f719c5d74ba72bbca84fa9854b6698f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:12 -0500 Subject: dirty-bitmap: Change bdrv_dirty_bitmap_size() to report bytes We're already reporting bytes for bdrv_dirty_bitmap_granularity(); mixing bytes and sectors in our return values is a recipe for confusion. A later cleanup will convert dirty bitmap internals to be entirely byte-based, but in the meantime, we should report the bitmap size in bytes. The only external caller in qcow2-bitmap.c is temporarily more verbose (because it is still using sector-based math), but will later be switched to track progress by bytes instead of sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 2 +- block/qcow2-bitmap.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index ee164fb..755555a 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -175,7 +175,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { - return bitmap->size; + return bitmap->size * BDRV_SECTOR_SIZE; } const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 1fc375b..9c185b5 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -295,10 +295,11 @@ static int load_bitmap_data(BlockDriverState *bs, BDRVQcow2State *s = bs->opaque; uint64_t sector, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { return -EINVAL; @@ -307,7 +308,7 @@ static int load_bitmap_data(BlockDriverState *bs, buf = g_malloc(s->cluster_size); sbc = sectors_covered_by_bitmap_cluster(s, bitmap); for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { - uint64_t count = MIN(bm_size - sector, sbc); + uint64_t count = MIN(bm_sectors - sector, sbc); uint64_t entry = bitmap_table[i]; uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; @@ -1077,13 +1078,14 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int64_t sector; uint64_t sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); uint8_t *buf = NULL; BdrvDirtyBitmapIter *dbi; uint64_t *tb; uint64_t tb_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); if (tb_size > BME_MAX_TABLE_SIZE || tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) @@ -1101,7 +1103,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap, 0); buf = g_malloc(s->cluster_size); sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - assert(DIV_ROUND_UP(bm_size, sbc) == tb_size); + assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size); while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { uint64_t cluster = sector / sbc; @@ -1109,7 +1111,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int64_t off; sector = cluster * sbc; - end = MIN(bm_size, sector + sbc); + end = MIN(bm_sectors, sector + sbc); write_size = bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); assert(write_size <= s->cluster_size); @@ -1141,7 +1143,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, goto fail; } - if (end >= bm_size) { + if (end >= bm_sectors) { break; } -- cgit v1.1 From 993e6525bfcc67ba48fe55bd64ec043a4b721e1d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:13 -0500 Subject: dirty-bitmap: Track bitmap size by bytes We are still using an internal hbitmap that tracks a size in sectors, with the granularity scaled down accordingly, because it lets us use a shortcut for our iterators which are currently sector-based. But there's no reason we can't track the dirty bitmap size in bytes, since it is (mostly) an internal-only variable (remember, the size is how many bytes are covered by the bitmap, not how many bytes the bitmap occupies). A later cleanup will convert dirty bitmap internals to be entirely byte-based, eliminating the intermediate sector rounding added here; and technically, since bdrv_getlength() already rounds up to sectors, our use of DIV_ROUND_UP is more for theoretical completeness than for any actual rounding. Use is_power_of_2() while at it, instead of open-coding that. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 755555a..6d32554 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -42,7 +42,7 @@ struct BdrvDirtyBitmap { HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ - int64_t size; /* Size of the bitmap (Number of sectors) */ + int64_t size; /* Size of the bitmap, in bytes */ bool disabled; /* Bitmap is disabled. It ignores all writes to the device */ int active_iterators; /* How many iterators are active */ @@ -115,17 +115,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, { int64_t bitmap_size; BdrvDirtyBitmap *bitmap; - uint32_t sector_granularity; - assert((granularity & (granularity - 1)) == 0); + assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE); if (name && bdrv_find_dirty_bitmap(bs, name)) { error_setg(errp, "Bitmap already exists: %s", name); return NULL; } - sector_granularity = granularity >> BDRV_SECTOR_BITS; - assert(sector_granularity); - bitmap_size = bdrv_nb_sectors(bs); + bitmap_size = bdrv_getlength(bs); if (bitmap_size < 0) { error_setg_errno(errp, -bitmap_size, "could not get length of device"); errno = -bitmap_size; @@ -133,7 +130,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->mutex = &bs->dirty_bitmap_mutex; - bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + /* + * TODO - let hbitmap track full granularity. For now, it is tracking + * only sector granularity, as a shortcut for our iterators. + */ + bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE), + ctz32(granularity) - BDRV_SECTOR_BITS); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; @@ -175,7 +177,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { - return bitmap->size * BDRV_SECTOR_SIZE; + return bitmap->size; } const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap) @@ -305,14 +307,13 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) { BdrvDirtyBitmap *bitmap; - int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE); bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); - hbitmap_truncate(bitmap->bitmap, size); - bitmap->size = size; + hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE)); + bitmap->size = bytes; } bdrv_dirty_bitmaps_unlock(bs); } @@ -549,7 +550,8 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) hbitmap_reset_all(bitmap->bitmap); } else { HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(bitmap->size, + bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size, + BDRV_SECTOR_SIZE), hbitmap_granularity(backup)); *out = backup; } -- cgit v1.1 From 86f6ae67e157362f3b141649874213ce01dcc622 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:14 -0500 Subject: dirty-bitmap: Change bdrv_dirty_bitmap_*serialize*() to take bytes Right now, the dirty-bitmap code exposes the fact that we use a scale of sector granularity in the underlying hbitmap to anything that wants to serialize a dirty bitmap. It's nicer to uniformly expose bytes as our dirty-bitmap interface, matching the previous change to bitmap size. The only caller to serialization is currently qcow2-cluster.c, which becomes a bit more verbose because it is still tracking sectors for other reasons, but a later patch will fix that to more uniformly use byte offsets everywhere. Likewise, within dirty-bitmap, we have to add more assertions that we are not truncating incorrectly, which can go away once the internal hbitmap is byte-based rather than sector-based. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 37 ++++++++++++++++++++++++------------- block/qcow2-bitmap.c | 22 ++++++++++++++-------- 2 files changed, 38 insertions(+), 21 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 6d32554..555c736 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -568,42 +568,53 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) } uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count) + uint64_t offset, uint64_t bytes) { - return hbitmap_serialization_size(bitmap->bitmap, start, count); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + return hbitmap_serialization_size(bitmap->bitmap, + offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS); } uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_align(bitmap->bitmap); + return hbitmap_serialization_align(bitmap->bitmap) * BDRV_SECTOR_SIZE; } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count) + uint8_t *buf, uint64_t offset, + uint64_t bytes) { - hbitmap_serialize_part(bitmap->bitmap, buf, start, count); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_serialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS); } void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count, bool finish) + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish) { - hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_zeroes(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish); + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); + hbitmap_deserialize_ones(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, finish); } void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 9c185b5..9e79999 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -278,7 +278,7 @@ static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; uint64_t sbc = sector_granularity * (s->cluster_size << 3); - assert(QEMU_IS_ALIGNED(sbc, + assert(QEMU_IS_ALIGNED(sbc << BDRV_SECTOR_BITS, bdrv_dirty_bitmap_serialization_align(bitmap))); return sbc; } @@ -299,7 +299,7 @@ static int load_bitmap_data(BlockDriverState *bs, uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { return -EINVAL; @@ -316,7 +316,9 @@ static int load_bitmap_data(BlockDriverState *bs, if (offset == 0) { if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { - bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count, + bdrv_dirty_bitmap_deserialize_ones(bitmap, + sector * BDRV_SECTOR_SIZE, + count * BDRV_SECTOR_SIZE, false); } else { /* No need to deserialize zeros because the dirty bitmap is @@ -327,7 +329,9 @@ static int load_bitmap_data(BlockDriverState *bs, if (ret < 0) { goto finish; } - bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count, + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, + sector * BDRV_SECTOR_SIZE, + count * BDRV_SECTOR_SIZE, false); } } @@ -1085,7 +1089,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, uint64_t *tb; uint64_t tb_size = size_to_clusters(s, - bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors)); + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); if (tb_size > BME_MAX_TABLE_SIZE || tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) @@ -1112,8 +1116,8 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, sector = cluster * sbc; end = MIN(bm_sectors, sector + sbc); - write_size = - bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, + sector * BDRV_SECTOR_SIZE, (end - sector) * BDRV_SECTOR_SIZE); assert(write_size <= s->cluster_size); off = qcow2_alloc_clusters(bs, s->cluster_size); @@ -1125,7 +1129,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } tb[cluster] = off; - bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector); + bdrv_dirty_bitmap_serialize_part(bitmap, buf, + sector * BDRV_SECTOR_SIZE, + (end - sector) * BDRV_SECTOR_SIZE); if (write_size < s->cluster_size) { memset(buf + write_size, 0, s->cluster_size - write_size); } -- cgit v1.1 From c7e7c87ac8e03fe088f4b1f820da30e85a07fd6b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:15 -0500 Subject: qcow2: Switch sectors_covered_by_bitmap_cluster() to byte-based We are gradually converting to byte-based interfaces, as they are easier to reason about than sector-based. Change the qcow2 bitmap helper function sectors_covered_by_bitmap_cluster(), renaming it to bytes_covered_by_bitmap_cluster() in the process. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'block') diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 9e79999..8324468 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -269,18 +269,16 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) return 0; } -/* This function returns the number of disk sectors covered by a single qcow2 - * cluster of bitmap data. */ -static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, - const BdrvDirtyBitmap *bitmap) +/* Return the disk size covered by a single qcow2 cluster of bitmap data. */ +static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s, + const BdrvDirtyBitmap *bitmap) { - uint64_t sector_granularity = - bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; - uint64_t sbc = sector_granularity * (s->cluster_size << 3); + uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + uint64_t limit = granularity * (s->cluster_size << 3); - assert(QEMU_IS_ALIGNED(sbc << BDRV_SECTOR_BITS, + assert(QEMU_IS_ALIGNED(limit, bdrv_dirty_bitmap_serialization_align(bitmap))); - return sbc; + return limit; } /* load_bitmap_data @@ -293,7 +291,7 @@ static int load_bitmap_data(BlockDriverState *bs, { int ret = 0; BDRVQcow2State *s = bs->opaque; - uint64_t sector, sbc; + uint64_t sector, limit, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; @@ -306,7 +304,8 @@ static int load_bitmap_data(BlockDriverState *bs, } buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + sbc = limit >> BDRV_SECTOR_BITS; for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { uint64_t count = MIN(bm_sectors - sector, sbc); uint64_t entry = bitmap_table[i]; @@ -1080,7 +1079,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, int ret; BDRVQcow2State *s = bs->opaque; int64_t sector; - uint64_t sbc; + uint64_t limit, sbc; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); @@ -1106,8 +1105,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap, 0); buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + sbc = limit >> BDRV_SECTOR_BITS; + assert(DIV_ROUND_UP(bm_size, limit) == tb_size); while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { uint64_t cluster = sector / sbc; -- cgit v1.1 From 715a74d819926af38bfeddb3ae29c9fe6b7736bb Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:16 -0500 Subject: dirty-bitmap: Set iterator start by offset, not sector All callers to bdrv_dirty_iter_new() passed 0 for their initial starting point, drop that parameter. Most callers to bdrv_set_dirty_iter() were scaling a byte offset to a sector number; the exception qcow2-bitmap will be converted later to use byte rather than sector iteration. Move the scaling to occur internally to dirty bitmap code instead, so that callers now pass in bytes. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/backup.c | 5 ++--- block/dirty-bitmap.c | 9 ++++----- block/mirror.c | 4 ++-- block/qcow2-bitmap.c | 4 ++-- 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'block') diff --git a/block/backup.c b/block/backup.c index 517c300..ac9c018 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,7 +372,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); clusters_per_iter = MAX((granularity / job->cluster_size), 1); - dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0); + dbi = bdrv_dirty_iter_new(job->sync_bitmap); /* Find the next dirty sector(s) */ while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) { @@ -403,8 +403,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ if (granularity < job->cluster_size) { - bdrv_set_dirty_iter(dbi, - cluster * job->cluster_size / BDRV_SECTOR_SIZE); + bdrv_set_dirty_iter(dbi, cluster * job->cluster_size); } last_cluster = cluster - 1; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 555c736..8450947 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -473,11 +473,10 @@ uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, - uint64_t first_sector) +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) { BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); - hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0); iter->bitmap = bitmap; bitmap->active_iterators++; return iter; @@ -645,9 +644,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, /** * Advance a BdrvDirtyBitmapIter to an arbitrary offset. */ -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num) +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) { - hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset >> BDRV_SECTOR_BITS); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index 6f5cb9f..0c705e0 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -373,7 +373,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ - bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS); + bdrv_set_dirty_iter(s->dbi, next_offset); next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; } assert(next_dirty == next_offset); @@ -796,7 +796,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(!s->dbi); - s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 8324468..90756cf 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1103,7 +1103,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, return NULL; } - dbi = bdrv_dirty_iter_new(bitmap, 0); + dbi = bdrv_dirty_iter_new(bitmap); buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); sbc = limit >> BDRV_SECTOR_BITS; @@ -1153,7 +1153,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, break; } - bdrv_set_dirty_iter(dbi, end); + bdrv_set_dirty_iter(dbi, end * BDRV_SECTOR_SIZE); } *bitmap_table_size = tb_size; -- cgit v1.1 From f798184cfdcb7f92a38c5f717d675bd75e1fd3ac Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:17 -0500 Subject: dirty-bitmap: Change bdrv_dirty_iter_next() to report byte offset Thanks to recent cleanups, most callers were scaling a return value of sectors into bytes (the exception, in qcow2-bitmap, will be converted to byte-based iteration later). Update the interface to do the scaling internally instead. In qcow2-bitmap, the code was specifically checking for an error return of -1. To avoid a regression, we either have to make sure we continue to return -1 (rather than a scaled -512) on error, or we have to fix the caller to treat all negative values as error rather than just one magic value. It's easy enough to make both changes at the same time, even though either one in isolation would work. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/backup.c | 2 +- block/dirty-bitmap.c | 3 ++- block/mirror.c | 8 ++++---- block/qcow2-bitmap.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/backup.c b/block/backup.c index ac9c018..06ddbfd 100644 --- a/block/backup.c +++ b/block/backup.c @@ -375,7 +375,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) dbi = bdrv_dirty_iter_new(job->sync_bitmap); /* Find the next dirty sector(s) */ - while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) { + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { cluster = offset / job->cluster_size; /* Fake progress updates for any clusters we skipped */ diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 8450947..e451916 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -503,7 +503,8 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - return hbitmap_iter_next(&iter->hbi); + int64_t ret = hbitmap_iter_next(&iter->hbi); + return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE; } /* Called within bdrv_dirty_bitmap_lock..unlock */ diff --git a/block/mirror.c b/block/mirror.c index 0c705e0..de0a027 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -336,10 +336,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); bdrv_dirty_bitmap_lock(s->dirty_bitmap); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + offset = bdrv_dirty_iter_next(s->dbi); if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + offset = bdrv_dirty_iter_next(s->dbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * BDRV_SECTOR_SIZE); assert(offset >= 0); @@ -370,11 +370,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) break; } - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + next_dirty = bdrv_dirty_iter_next(s->dbi); if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ bdrv_set_dirty_iter(s->dbi, next_offset); - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + next_dirty = bdrv_dirty_iter_next(s->dbi); } assert(next_dirty == next_offset); nb_chunks++; diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 90756cf..2d8dcba 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1109,7 +1109,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, sbc = limit >> BDRV_SECTOR_BITS; assert(DIV_ROUND_UP(bm_size, limit) == tb_size); - while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { + while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) { uint64_t cluster = sector / sbc; uint64_t end, write_size; int64_t off; -- cgit v1.1 From 9a46dba7b76f5198555819905d1d8235947ba98f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:18 -0500 Subject: dirty-bitmap: Change bdrv_get_dirty_count() to report bytes Thanks to recent cleanups, all callers were scaling a return value of sectors into bytes; do the scaling internally instead. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 4 ++-- block/mirror.c | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index e451916..8322e23 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -423,7 +423,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS; + info->count = bdrv_get_dirty_count(bm); info->granularity = bdrv_dirty_bitmap_granularity(bm); info->has_name = !!bm->name; info->name = g_strdup(bm->name); @@ -652,7 +652,7 @@ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { - return hbitmap_count(bitmap->bitmap); + return hbitmap_count(bitmap->bitmap) << BDRV_SECTOR_BITS; } int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) diff --git a/block/mirror.c b/block/mirror.c index de0a027..c5212d2 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -340,8 +340,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); offset = bdrv_dirty_iter_next(s->dbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * - BDRV_SECTOR_SIZE); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(offset >= 0); } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); @@ -811,11 +810,10 @@ static void coroutine_fn mirror_run(void *opaque) cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* s->common.offset contains the number of bytes already processed so - * far, cnt is the number of dirty sectors remaining and + * far, cnt is the number of dirty bytes remaining and * s->bytes_in_flight is the number of bytes currently being * processed; together those are the current total operation length */ - s->common.len = s->common.offset + s->bytes_in_flight + - cnt * BDRV_SECTOR_SIZE; + s->common.len = s->common.offset + s->bytes_in_flight + cnt; /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. @@ -827,8 +825,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE, - s->buf_free_count, s->in_flight); + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { @@ -869,7 +866,7 @@ static void coroutine_fn mirror_run(void *opaque) * whether to switch to target check one last time if I/O has * come in the meanwhile, and if not flush the data to disk. */ - trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE); + trace_mirror_before_drain(s, cnt); bdrv_drained_begin(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); @@ -888,8 +885,7 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE, - s->synced, delay_ns); + trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { -- cgit v1.1 From 3b5d4df0c6b52746c6194bd2ea65828822db8438 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:19 -0500 Subject: dirty-bitmap: Change bdrv_get_dirty_locked() to take bytes Half the callers were already scaling bytes to sectors; the other half can eventually be simplified to use byte iteration. Both callers were already using the result as a bool, so make that explicit. Making the change also makes it easier for a future dirty-bitmap patch to offload scaling over to the internal hbitmap. Remember, asking whether a byte is dirty is effectively asking whether the entire granularity containing the byte is dirty, since we only track dirtiness by granularity. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Juan Quintela Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 8 ++++---- block/mirror.c | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 8322e23..ad559c6 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -438,13 +438,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) } /* Called within bdrv_dirty_bitmap_lock..unlock */ -int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector) +bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t offset) { if (bitmap) { - return hbitmap_get(bitmap->bitmap, sector); + return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS); } else { - return 0; + return false; } } diff --git a/block/mirror.c b/block/mirror.c index c5212d2..545dfc5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -361,8 +361,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int64_t next_offset = offset + nb_chunks * s->granularity; int64_t next_chunk = next_offset / s->granularity; if (next_offset >= s->bdev_length || - !bdrv_get_dirty_locked(source, s->dirty_bitmap, - next_offset >> BDRV_SECTOR_BITS)) { + !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { -- cgit v1.1 From e0d7f73e63427521aabd7311a86fe90fd02897c0 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:20 -0500 Subject: dirty-bitmap: Change bdrv_[re]set_dirty_bitmap() to use bytes Some of the callers were already scaling bytes to sectors; others can be easily converted to pass byte offsets, all in our shift towards a consistent byte interface everywhere. Making the change will also make it easier to write the hold-out callers to use byte rather than sectors for their iterations; it also makes it easier for a future dirty-bitmap patch to offload scaling over to the internal hbitmap. Although all callers happen to pass sector-aligned values, make the internal scaling robust to any sub-sector requests. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 22 ++++++++++++++-------- block/mirror.c | 16 ++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index ad559c6..117837b 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -509,35 +509,41 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } diff --git a/block/mirror.c b/block/mirror.c index 545dfc5..5c0f79f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -141,8 +141,7 @@ static void mirror_write_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -161,8 +160,7 @@ static void mirror_read_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, true, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -382,8 +380,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) * calling bdrv_get_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ - bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS, - nb_chunks * sectors_per_chunk); + bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, + nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); @@ -625,7 +623,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } @@ -681,7 +679,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) n = count >> BDRV_SECTOR_BITS; assert(n > 0); if (ret == 1) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); + bdrv_set_dirty_bitmap(s->dirty_bitmap, + sector_num * BDRV_SECTOR_SIZE, + n * BDRV_SECTOR_SIZE); } sector_num += n; } -- cgit v1.1 From 23ca459a455c83ffadb03ab1eedf0b6cff62bfeb Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:21 -0500 Subject: mirror: Switch mirror_dirty_init() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/mirror.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'block') diff --git a/block/mirror.c b/block/mirror.c index 5c0f79f..459b80f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -612,15 +612,13 @@ static void mirror_throttle(MirrorBlockJob *s) static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { - int64_t sector_num, end; + int64_t offset; BlockDriverState *base = s->base; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); - int ret, n; + int ret; int64_t count; - end = s->bdev_length / BDRV_SECTOR_SIZE; - if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); @@ -628,9 +626,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } s->initial_zeroing_ongoing = true; - for (sector_num = 0; sector_num < end; ) { - int nb_sectors = MIN(end - sector_num, - QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS); + for (offset = 0; offset < s->bdev_length; ) { + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -646,9 +644,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, false); - sector_num += nb_sectors; + mirror_do_zero_or_discard(s, offset, bytes, false); + offset += bytes; } mirror_wait_for_all_io(s); @@ -656,10 +653,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } /* First part, loop on the sectors and initialize the dirty bitmap. */ - for (sector_num = 0; sector_num < end; ) { + for (offset = 0; offset < s->bdev_length; ) { /* Just to make sure we are not exceeding int limit. */ - int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, - end - sector_num); + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -667,23 +664,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) return 0; } - ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, &count); + ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); if (ret < 0) { return ret; } - /* TODO: Relax this once bdrv_is_allocated_above and dirty - * bitmaps no longer require sector alignment. */ - assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); - n = count >> BDRV_SECTOR_BITS; - assert(n > 0); + assert(count); if (ret == 1) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, - sector_num * BDRV_SECTOR_SIZE, - n * BDRV_SECTOR_SIZE); + bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); } - sector_num += n; + offset += count; } return 0; } -- cgit v1.1 From b85ee45334585c49c5489a303336df119408763a Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:22 -0500 Subject: qcow2: Switch qcow2_measure() to byte-based iteration This is new code, but it is easier to read if it makes passes over the image using bytes rather than sectors (and will get easier in the future when bdrv_get_block_status is converted to byte-based). Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index 970006f..b8da8ca 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3673,20 +3673,19 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, */ required = virtual_size; } else { - int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE; - int64_t sector_num; + int64_t offset; int pnum = 0; - for (sector_num = 0; - sector_num < ssize / BDRV_SECTOR_SIZE; - sector_num += pnum) { - int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num, - BDRV_REQUEST_MAX_SECTORS); + for (offset = 0; offset < ssize; + offset += pnum * BDRV_SECTOR_SIZE) { + int nb_sectors = MIN(ssize - offset, + BDRV_REQUEST_MAX_BYTES) / BDRV_SECTOR_SIZE; BlockDriverState *file; int64_t ret; ret = bdrv_get_block_status_above(in_bs, NULL, - sector_num, nb_sectors, + offset >> BDRV_SECTOR_BITS, + nb_sectors, &pnum, &file); if (ret < 0) { error_setg_errno(&local_err, -ret, @@ -3699,12 +3698,11 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { /* Extend pnum to end of cluster for next iteration */ - pnum = ROUND_UP(sector_num + pnum, cluster_sectors) - - sector_num; + pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE, + cluster_size) - offset) >> BDRV_SECTOR_BITS; /* Count clusters we've seen */ - required += (sector_num % cluster_sectors + pnum) * - BDRV_SECTOR_SIZE; + required += offset % cluster_size + pnum * BDRV_SECTOR_SIZE; } } } -- cgit v1.1 From ab94db6f7609e562b5f68cc71807d5d50e24e224 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:23 -0500 Subject: qcow2: Switch load_bitmap_data() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'block') diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 2d8dcba..02512a2 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -291,9 +291,8 @@ static int load_bitmap_data(BlockDriverState *bs, { int ret = 0; BDRVQcow2State *s = bs->opaque; - uint64_t sector, limit, sbc; + uint64_t offset, limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); - uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); uint8_t *buf = NULL; uint64_t i, tab_size = size_to_clusters(s, @@ -305,32 +304,27 @@ static int load_bitmap_data(BlockDriverState *bs, buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); - sbc = limit >> BDRV_SECTOR_BITS; - for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { - uint64_t count = MIN(bm_sectors - sector, sbc); + for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { + uint64_t count = MIN(bm_size - offset, limit); uint64_t entry = bitmap_table[i]; - uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; assert(check_table_entry(entry, s->cluster_size) == 0); - if (offset == 0) { + if (data_offset == 0) { if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { - bdrv_dirty_bitmap_deserialize_ones(bitmap, - sector * BDRV_SECTOR_SIZE, - count * BDRV_SECTOR_SIZE, + bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false); } else { /* No need to deserialize zeros because the dirty bitmap is * already cleared */ } } else { - ret = bdrv_pread(bs->file, offset, buf, s->cluster_size); + ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size); if (ret < 0) { goto finish; } - bdrv_dirty_bitmap_deserialize_part(bitmap, buf, - sector * BDRV_SECTOR_SIZE, - count * BDRV_SECTOR_SIZE, + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count, false); } } -- cgit v1.1 From 49d741b5041b79214db58f364cebe2f367517711 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:24 -0500 Subject: qcow2: Switch store_bitmap_data() to byte-based iteration Now that we have adjusted the majority of the calls this function makes to be byte-based, it is easier to read the code if it makes passes over the image using bytes rather than sectors. iotests 165 was rather weak - on a default 64k-cluster image, where bitmap granularity also defaults to 64k bytes, a single cluster of the bitmap table thus covers (64*1024*8) bits which each cover 64k bytes, or 32G of image space. But the test only uses a 1G image, so it cannot trigger any more than one loop of the code in store_bitmap_data(); and it was writing to the first cluster. In order to test that we are properly aligning which portions of the bitmap are being written to the file, we really want to test a case where the first dirty bit returned by bdrv_dirty_iter_next() is not aligned to the start of a cluster, which we can do by modifying the test to write data that doesn't happen to fall in the first cluster of the image. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: John Snow Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'block') diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 02512a2..f45e46c 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1072,10 +1072,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, { int ret; BDRVQcow2State *s = bs->opaque; - int64_t sector; - uint64_t limit, sbc; + int64_t offset; + uint64_t limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); - uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); uint8_t *buf = NULL; BdrvDirtyBitmapIter *dbi; @@ -1100,18 +1099,22 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, dbi = bdrv_dirty_iter_new(bitmap); buf = g_malloc(s->cluster_size); limit = bytes_covered_by_bitmap_cluster(s, bitmap); - sbc = limit >> BDRV_SECTOR_BITS; assert(DIV_ROUND_UP(bm_size, limit) == tb_size); - while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) { - uint64_t cluster = sector / sbc; + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { + uint64_t cluster = offset / limit; uint64_t end, write_size; int64_t off; - sector = cluster * sbc; - end = MIN(bm_sectors, sector + sbc); - write_size = bdrv_dirty_bitmap_serialization_size(bitmap, - sector * BDRV_SECTOR_SIZE, (end - sector) * BDRV_SECTOR_SIZE); + /* + * We found the first dirty offset, but want to write out the + * entire cluster of the bitmap that includes that offset, + * including any leading zero bits. + */ + offset = QEMU_ALIGN_DOWN(offset, limit); + end = MIN(bm_size, offset + limit); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset, + end - offset); assert(write_size <= s->cluster_size); off = qcow2_alloc_clusters(bs, s->cluster_size); @@ -1123,9 +1126,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } tb[cluster] = off; - bdrv_dirty_bitmap_serialize_part(bitmap, buf, - sector * BDRV_SECTOR_SIZE, - (end - sector) * BDRV_SECTOR_SIZE); + bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset); if (write_size < s->cluster_size) { memset(buf + write_size, 0, s->cluster_size - write_size); } @@ -1143,11 +1144,11 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, goto fail; } - if (end >= bm_sectors) { + if (end >= bm_size) { break; } - bdrv_set_dirty_iter(dbi, end * BDRV_SECTOR_SIZE); + bdrv_set_dirty_iter(dbi, end); } *bitmap_table_size = tb_size; -- cgit v1.1 From 0fdf1a4f68e5f27df9bb10a7223bd519c8d30e31 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:25 -0500 Subject: dirty-bitmap: Switch bdrv_set_dirty() to bytes Both callers already had bytes available, but were scaling to sectors. Move the scaling to internal code. In the case of bdrv_aligned_pwritev(), we are now passing the exact offset rather than a rounded sector-aligned value, but that's okay as long as dirty bitmap widens start/bytes to granularity boundaries. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 7 ++++--- block/io.c | 6 ++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 117837b..58a3f33 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -628,10 +628,10 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) hbitmap_deserialize_finish(bitmap->bitmap); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int64_t nr_sectors) +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -643,7 +643,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, continue; } assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, + end_sector - (offset >> BDRV_SECTOR_BITS)); } bdrv_dirty_bitmaps_unlock(bs); } diff --git a/block/io.c b/block/io.c index d633b0f..e0f9045 100644 --- a/block/io.c +++ b/block/io.c @@ -1334,7 +1334,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bool waited; int ret; - int64_t start_sector = offset >> BDRV_SECTOR_BITS; int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); uint64_t bytes_remaining = bytes; int max_transfer; @@ -1409,7 +1408,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, start_sector, end_sector - start_sector); + bdrv_set_dirty(bs, offset, bytes); stat64_max(&bs->wr_highest_offset, offset + bytes); @@ -2438,8 +2437,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, ret = 0; out: atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, - req.bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty(bs, req.offset, req.bytes); tracked_request_end(&req); bdrv_dec_in_flight(bs); return ret; -- cgit v1.1 From ca759622441da1d19107efaa62d9d63fb83ebb79 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 25 Sep 2017 09:55:26 -0500 Subject: dirty-bitmap: Convert internal hbitmap size/granularity Now that all callers are using byte-based interfaces, there's no reason for our internal hbitmap to remain with sector-based granularity. It also simplifies our internal scaling, since we already know that hbitmap widens requests out to granularity boundaries. Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 62 +++++++++++++++------------------------------------- 1 file changed, 18 insertions(+), 44 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 58a3f33..bd04e99 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -38,7 +38,7 @@ */ struct BdrvDirtyBitmap { QemuMutex *mutex; - HBitmap *bitmap; /* Dirty sector bitmap implementation */ + HBitmap *bitmap; /* Dirty bitmap implementation */ HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ @@ -130,12 +130,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->mutex = &bs->dirty_bitmap_mutex; - /* - * TODO - let hbitmap track full granularity. For now, it is tracking - * only sector granularity, as a shortcut for our iterators. - */ - bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE), - ctz32(granularity) - BDRV_SECTOR_BITS); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity)); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; @@ -312,7 +307,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); - hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE)); + hbitmap_truncate(bitmap->bitmap, bytes); bitmap->size = bytes; } bdrv_dirty_bitmaps_unlock(bs); @@ -442,7 +437,7 @@ bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t offset) { if (bitmap) { - return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS); + return hbitmap_get(bitmap->bitmap, offset); } else { return false; } @@ -470,7 +465,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) { - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); + return 1U << hbitmap_granularity(bitmap->bitmap); } BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) @@ -503,20 +498,16 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter) int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) { - int64_t ret = hbitmap_iter_next(&iter->hbi); - return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE; + return hbitmap_iter_next(&iter->hbi); } /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes) { - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_set(bitmap->bitmap, offset, bytes); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -531,12 +522,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t offset, int64_t bytes) { - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_reset(bitmap->bitmap, offset, bytes); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, @@ -556,8 +544,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) hbitmap_reset_all(bitmap->bitmap); } else { HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size, - BDRV_SECTOR_SIZE), + bitmap->bitmap = hbitmap_alloc(bitmap->size, hbitmap_granularity(backup)); *out = backup; } @@ -576,51 +563,40 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - return hbitmap_serialization_size(bitmap->bitmap, - offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + return hbitmap_serialization_size(bitmap->bitmap, offset, bytes); } uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_align(bitmap->bitmap) * BDRV_SECTOR_SIZE; + return hbitmap_serialization_align(bitmap->bitmap); } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, uint8_t *buf, uint64_t offset, uint64_t bytes) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_serialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes); } void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, uint8_t *buf, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_zeroes(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, uint64_t offset, uint64_t bytes, bool finish) { - assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); - hbitmap_deserialize_ones(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS, finish); + hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) @@ -631,7 +607,6 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -643,8 +618,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) continue; } assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS, - end_sector - (offset >> BDRV_SECTOR_BITS)); + hbitmap_set(bitmap->bitmap, offset, bytes); } bdrv_dirty_bitmaps_unlock(bs); } @@ -654,12 +628,12 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) */ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) { - hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset >> BDRV_SECTOR_BITS); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { - return hbitmap_count(bitmap->bitmap) << BDRV_SECTOR_BITS; + return hbitmap_count(bitmap->bitmap); } int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) -- cgit v1.1 From 61f09cea01391eaa23ea3bc78ab37a7d2da565fb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 19 Sep 2017 16:22:54 +0200 Subject: commit: Support multiple roots above top node This changes the commit block job to support operation in a graph where there is more than a single active layer that references the top node. This involves inserting the commit filter node not only on the path between the given active node and the top node, but between the top node and all of its parents. On completion, bdrv_drop_intermediate() must consider all parents for updating the backing file link. These parents may be backing files themselves and as such read-only; reopen them temporarily if necessary. Previously this was achieved by the bdrv_reopen() calls in the commit block job that made overlay_bs read-write for the whole duration of the block job, even though write access is only needed on completion. Now that we consider all parents, overlay_bs is meaningless. It is left in place in this commit, but we'll remove it soon. Signed-off-by: Kevin Wolf --- block/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/commit.c b/block/commit.c index 8f0e835..610e1cd 100644 --- a/block/commit.c +++ b/block/commit.c @@ -350,7 +350,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, error_propagate(errp, local_err); goto fail; } - bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err); + bdrv_replace_node(top, commit_top_bs, &local_err); if (local_err) { bdrv_unref(commit_top_bs); commit_top_bs = NULL; -- cgit v1.1 From bde70715b67cc5183b00b445b811c1dfc0f74d2e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 27 Jun 2017 20:36:18 +0200 Subject: commit: Remove overlay_bs We don't need to make any assumptions about the graph layout above the top node of the commit operation any more. Remove the use of bdrv_find_overlay() and related variables from the commit job code. bdrv_drop_intermediate() doesn't use the 'active' parameter any more, so we can just drop it. The overlay node was previously added to the block job to get a BLK_PERM_GRAPH_MOD. We really need to respect those permissions in bdrv_drop_intermediate() now, but as long as we haven't figured out yet how BLK_PERM_GRAPH_MOD is actually supposed to work, just leave a TODO comment there. With this change, it is now possible to perform another block job on an overlay node without conflicts. qemu-iotests 030 is changed accordingly. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/commit.c | 62 ++++++++++++++-------------------------------------------- 1 file changed, 15 insertions(+), 47 deletions(-) (limited to 'block') diff --git a/block/commit.c b/block/commit.c index 610e1cd..5036eec 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,13 +36,11 @@ enum { typedef struct CommitBlockJob { BlockJob common; RateLimit limit; - BlockDriverState *active; BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; int base_flags; - int orig_overlay_flags; char *backing_file_str; } CommitBlockJob; @@ -81,18 +79,15 @@ static void commit_complete(BlockJob *job, void *opaque) { CommitBlockJob *s = container_of(job, CommitBlockJob, common); CommitCompleteData *data = opaque; - BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); + BlockDriverState *commit_top_bs = s->commit_top_bs; int ret = data->ret; bool remove_commit_top_bs = false; - /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */ + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ bdrv_ref(top); - if (overlay_bs) { - bdrv_ref(overlay_bs); - } + bdrv_ref(commit_top_bs); /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before * the normal backing chain can be restored. */ @@ -100,9 +95,9 @@ static void commit_complete(BlockJob *job, void *opaque) if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + ret = bdrv_drop_intermediate(s->commit_top_bs, base, s->backing_file_str); - } else if (overlay_bs) { + } else { /* XXX Can (or should) we somehow keep 'consistent read' blocked even * after the failed/cancelled commit job is gone? If we already wrote * something to base, the intermediate images aren't valid any more. */ @@ -115,9 +110,6 @@ static void commit_complete(BlockJob *job, void *opaque) if (s->base_flags != bdrv_get_flags(base)) { bdrv_reopen(base, s->base_flags, NULL); } - if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { - bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); - } g_free(s->backing_file_str); blk_unref(s->top); @@ -134,10 +126,13 @@ static void commit_complete(BlockJob *job, void *opaque) * filter driver from the backing chain. Do this as the final step so that * the 'consistent read' permission can be granted. */ if (remove_commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), + &error_abort); } - bdrv_unref(overlay_bs); + bdrv_unref(commit_top_bs); bdrv_unref(top); } @@ -283,10 +278,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; - int orig_overlay_flags; int orig_base_flags; BlockDriverState *iter; - BlockDriverState *overlay_bs; BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; int ret; @@ -297,31 +290,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - overlay_bs = bdrv_find_overlay(bs, top); - - if (overlay_bs == NULL) { - error_setg(errp, "Could not find overlay image for %s:", top->filename); - return; - } - s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } - orig_base_flags = bdrv_get_flags(base); - orig_overlay_flags = bdrv_get_flags(overlay_bs); - - /* convert base & overlay_bs to r/w, if necessary */ + /* convert base to r/w, if necessary */ + orig_base_flags = bdrv_get_flags(base); if (!(orig_base_flags & BDRV_O_RDWR)) { reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, orig_base_flags | BDRV_O_RDWR); } - if (!(orig_overlay_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, - orig_overlay_flags | BDRV_O_RDWR); - } + if (reopen_queue) { bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { @@ -382,14 +363,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string. */ - ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, - BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); - if (ret < 0) { - goto fail; - } - s->base = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, @@ -408,13 +381,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->active = bs; - - s->base_flags = orig_base_flags; - s->orig_overlay_flags = orig_overlay_flags; - + s->base_flags = orig_base_flags; s->backing_file_str = g_strdup(backing_file_str); - s->on_error = on_error; trace_commit_start(bs, base, top, s); @@ -429,7 +397,7 @@ fail: blk_unref(s->top); } if (commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_replace_node(commit_top_bs, top, &error_abort); } block_job_early_fail(&s->common); } -- cgit v1.1 From 9cdcfd9f7afd0274919af95a164178ac6ee847ca Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:44 -0500 Subject: block: Uniform handling of 0-length bdrv_get_block_status() Handle a 0-length block status request up front, with a uniform return value claiming the area is not allocated. Most callers don't pass a length of 0 to bdrv_get_block_status() and friends; but it definitely happens with a 0-length read when copy-on-read is enabled. While we could audit all callers to ensure that they never make a 0-length request, and then assert that fact, it was just as easy to fix things to always report success (as long as the callers are careful to not go into an infinite loop). However, we had inconsistent behavior on whether the status is reported as allocated or defers to the backing layer, depending on what callbacks the driver implements, and possibly wasting quite a few CPU cycles to get to that answer. Consistently reporting unallocated up front doesn't really hurt anything, and makes it easier both for callers (0-length requests now have well-defined behavior) and for drivers (drivers don't have to deal with 0-length requests). Signed-off-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block') diff --git a/block/io.c b/block/io.c index e0f9045..94f7470 100644 --- a/block/io.c +++ b/block/io.c @@ -1777,6 +1777,10 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum = 0; return BDRV_BLOCK_EOF; } + if (!nb_sectors) { + *pnum = 0; + return 0; + } n = total_sectors - sector_num; if (n < nb_sectors) { -- cgit v1.1 From d855ebcd3cca4080a81aeec9c0a27af006734280 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:46 -0500 Subject: block: Add blkdebug hook for copy-on-read Make it possible to inject errors on writes performed during a read operation due to copy-on-read semantics. Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Reviewed-by: Kevin Wolf Reviewed-by: John Snow Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/io.c b/block/io.c index 94f7470..a5598ed 100644 --- a/block/io.c +++ b/block/io.c @@ -983,6 +983,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, goto err; } + bdrv_debug_event(bs, BLKDBG_COR_WRITE); if (drv->bdrv_co_pwrite_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { /* FIXME: Should we (perhaps conditionally) be setting -- cgit v1.1 From cb2e28780c7080af489e72227683fe374f05022d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 5 Oct 2017 14:02:47 -0500 Subject: block: Perform copy-on-read in loop Improve our braindead copy-on-read implementation. Pre-patch, we have multiple issues: - we create a bounce buffer and perform a write for the entire request, even if the active image already has 99% of the clusters occupied, and really only needs to copy-on-read the remaining 1% of the clusters - our bounce buffer was as large as the read request, and can needlessly exhaust our memory by using double the memory of the request size (the original request plus our bounce buffer), rather than a capped maximum overhead beyond the original - if a driver has a max_transfer limit, we are bypassing the normal code in bdrv_aligned_preadv() that fragments to that limit, and instead attempt to read the entire buffer from the driver in one go, which some drivers may assert on - a client can request a large request of nearly 2G such that rounding the request out to cluster boundaries results in a byte count larger than 2G. While this cannot exceed 32 bits, it DOES have some follow-on problems: -- the call to bdrv_driver_pread() can assert for exceeding BDRV_REQUEST_MAX_BYTES, if the driver is old and lacks .bdrv_co_preadv -- if the buffer is all zeroes, the subsequent call to bdrv_co_do_pwrite_zeroes is a no-op due to a negative size, which means we did not actually copy on read Fix all of these issues by breaking up the action into a loop, where each iteration is capped to sane limits. Also, querying the allocation status allows us to optimize: when data is already present in the active layer, we don't need to bounce. Note that the code has a telling comment that copy-on-read should probably be a filter driver rather than a bolt-on hack in io.c; but that remains a task for another day. CC: qemu-stable@nongnu.org Signed-off-by: Eric Blake Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/io.c | 120 +++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 38 deletions(-) (limited to 'block') diff --git a/block/io.c b/block/io.c index a5598ed..8e41907 100644 --- a/block/io.c +++ b/block/io.c @@ -34,6 +34,9 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ +#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -945,11 +948,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, BlockDriver *drv = bs->drv; struct iovec iov; - QEMUIOVector bounce_qiov; + QEMUIOVector local_qiov; int64_t cluster_offset; unsigned int cluster_bytes; size_t skip_bytes; int ret; + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, + BDRV_REQUEST_MAX_BYTES); + unsigned int progress = 0; /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions @@ -961,53 +967,95 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); /* Cover entire cluster so no additional backing file I/O is required when - * allocating cluster in the image file. + * allocating cluster in the image file. Note that this value may exceed + * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which + * is one reason we loop rather than doing it all at once. */ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); + skip_bytes = offset - cluster_offset; trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); - iov.iov_len = cluster_bytes; - iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); + bounce_buffer = qemu_try_blockalign(bs, + MIN(MIN(max_transfer, cluster_bytes), + MAX_BOUNCE_BUFFER)); if (bounce_buffer == NULL) { ret = -ENOMEM; goto err; } - qemu_iovec_init_external(&bounce_qiov, &iov, 1); + while (cluster_bytes) { + int64_t pnum; - ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - if (ret < 0) { - goto err; - } + ret = bdrv_is_allocated(bs, cluster_offset, + MIN(cluster_bytes, max_transfer), &pnum); + if (ret < 0) { + /* Safe to treat errors in querying allocation as if + * unallocated; we'll probably fail again soon on the + * read, but at least that will set a decent errno. + */ + pnum = MIN(cluster_bytes, max_transfer); + } - bdrv_debug_event(bs, BLKDBG_COR_WRITE); - if (drv->bdrv_co_pwrite_zeroes && - buffer_is_zero(bounce_buffer, iov.iov_len)) { - /* FIXME: Should we (perhaps conditionally) be setting - * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy - * that still correctly reads as zero? */ - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); - } else { - /* This does not change the data on the disk, it is not necessary - * to flush even in cache=writethrough mode. - */ - ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - } + assert(skip_bytes < pnum); - if (ret < 0) { - /* It might be okay to ignore write errors for guest requests. If this - * is a deliberate copy-on-read then we don't want to ignore the error. - * Simply report it in all cases. - */ - goto err; - } + if (ret <= 0) { + /* Must copy-on-read; use the bounce buffer */ + iov.iov_base = bounce_buffer; + iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + qemu_iovec_init_external(&local_qiov, &iov, 1); - skip_bytes = offset - cluster_offset; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); + ret = bdrv_driver_preadv(bs, cluster_offset, pnum, + &local_qiov, 0); + if (ret < 0) { + goto err; + } + + bdrv_debug_event(bs, BLKDBG_COR_WRITE); + if (drv->bdrv_co_pwrite_zeroes && + buffer_is_zero(bounce_buffer, pnum)) { + /* FIXME: Should we (perhaps conditionally) be setting + * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy + * that still correctly reads as zero? */ + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0); + } else { + /* This does not change the data on the disk, it is not + * necessary to flush even in cache=writethrough mode. + */ + ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, + &local_qiov, 0); + } + + if (ret < 0) { + /* It might be okay to ignore write errors for guest + * requests. If this is a deliberate copy-on-read + * then we don't want to ignore the error. Simply + * report it in all cases. + */ + goto err; + } + + qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, + pnum - skip_bytes); + } else { + /* Read directly into the destination */ + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); + ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, + &local_qiov, 0); + qemu_iovec_destroy(&local_qiov); + if (ret < 0) { + goto err; + } + } + + cluster_offset += pnum; + cluster_bytes -= pnum; + progress += pnum - skip_bytes; + skip_bytes = 0; + } + ret = 0; err: qemu_vfree(bounce_buffer); @@ -1213,9 +1261,6 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); } -/* Maximum buffer for write zeroes fallback, in bytes */ -#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1230,8 +1275,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); - int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, - MAX_WRITE_ZEROES_BOUNCE_BUFFER); + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; -- cgit v1.1 From 161253e2d0a83a1b33bca019c6e926013e1a03db Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:35 +0100 Subject: block: use 1 MB bounce buffers for crypto instead of 16KB Using 16KB bounce buffers creates a significant performance penalty for I/O to encrypted volumes on storage which high I/O latency (rotating rust & network drives), because it triggers lots of fairly small I/O operations. On tests with rotating rust, and cache=none|directsync, write speed increased from 2MiB/s to 32MiB/s, on a par with that achieved by the in-kernel luks driver. With other cache modes the in-kernel driver is still notably faster because it is able to report completion of the I/O request before any encryption is done, while the in-QEMU driver must encrypt the data before completion. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-2-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'block') diff --git a/block/crypto.c b/block/crypto.c index 58ef6f2..684cabe 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -379,7 +379,11 @@ static void block_crypto_close(BlockDriverState *bs) } -#define BLOCK_CRYPTO_MAX_SECTORS 32 +/* + * 1 MB bounce buffer gives good performance / memory tradeoff + * when using cache=none|directsync. + */ +#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, @@ -396,12 +400,11 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we don't wish to expose cipher text + * in qiov which points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -411,8 +414,8 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_reset(&hd_qiov); @@ -464,12 +467,11 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we're not permitted to touch + * contents of qiov - it points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -479,8 +481,8 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_to_buf(qiov, bytes_done, -- cgit v1.1 From 31376555c7b447afb1bf9084eacbb8f566ff6b9d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:37 +0100 Subject: block: fix data type casting for crypto payload offset The crypto APIs report the offset of the data payload as an uint64_t type, but the block driver is casting to size_t or ssize_t which will potentially truncate. Most of the block APIs use int64_t for offsets meanwhile, so even if using uint64_t in the crypto block driver we are still at risk of truncation. Change the block crypto driver to use uint64_t, but add asserts that the value is less than INT64_MAX. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-4-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/crypto.c b/block/crypto.c index 684cabe..61f5d77 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -364,8 +364,9 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + assert(payload_offset < (INT64_MAX - offset)); offset += payload_offset; @@ -395,8 +396,9 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -462,8 +464,9 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -524,7 +527,9 @@ static int64_t block_crypto_getlength(BlockDriverState *bs) BlockCrypto *crypto = bs->opaque; int64_t len = bdrv_getlength(bs->file->bs); - ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t offset = qcrypto_block_get_payload_offset(crypto->block); + assert(offset < INT64_MAX); + assert(offset < len); len -= offset; -- cgit v1.1 From a73466fbad6d48ba356940474cd72da602373304 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:38 +0100 Subject: block: convert crypto driver to bdrv_co_preadv|pwritev Make the crypto driver implement the bdrv_co_preadv|pwritev callbacks, and also use bdrv_co_preadv|pwritev for I/O with the protocol driver beneath. This replaces sector based I/O with byte based I/O, and allows us to stop assuming the physical sector size matches the encryption sector size. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-5-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 106 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 52 deletions(-) (limited to 'block') diff --git a/block/crypto.c b/block/crypto.c index 61f5d77..965c173 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -387,18 +387,23 @@ static void block_crypto_close(BlockDriverState *bs) #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int -block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -413,37 +418,29 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; - - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_readv(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } - qemu_iovec_from_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -455,18 +452,23 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, static coroutine_fn int -block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -481,37 +483,29 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } - - qemu_iovec_to_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - if (qcrypto_block_encrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_writev(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -521,6 +515,13 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, return ret; } +static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + bs->bl.request_alignment = sector_size; /* No sub-sector I/O */ +} + static int64_t block_crypto_getlength(BlockDriverState *bs) { @@ -620,8 +621,9 @@ BlockDriver bdrv_crypto_luks = { .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, - .bdrv_co_readv = block_crypto_co_readv, - .bdrv_co_writev = block_crypto_co_writev, + .bdrv_refresh_limits = block_crypto_refresh_limits, + .bdrv_co_preadv = block_crypto_co_preadv, + .bdrv_co_pwritev = block_crypto_co_pwritev, .bdrv_getlength = block_crypto_getlength, .bdrv_get_info = block_crypto_get_info_luks, .bdrv_get_specific_info = block_crypto_get_specific_info_luks, -- cgit v1.1 From 4609742a495d98ac358098e10d91890185dcdc60 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:39 +0100 Subject: block: convert qcrypto_block_encrypt|decrypt to take bytes offset Instead of sector offset, take the bytes offset when encrypting or decrypting data. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-6-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 12 ++++-------- block/qcow.c | 11 +++++++---- block/qcow2-cluster.c | 8 +++----- block/qcow2.c | 4 ++-- 4 files changed, 16 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/crypto.c b/block/crypto.c index 965c173..edf53d4 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -398,7 +398,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -430,15 +429,14 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } @@ -463,7 +461,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -488,8 +485,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } @@ -503,7 +500,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } diff --git a/block/qcow.c b/block/qcow.c index f450b00..9569dee 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -478,7 +478,9 @@ static int get_cluster_offset(BlockDriverState *bs, for(i = 0; i < s->cluster_sectors; i++) { if (i < n_start || i >= n_end) { memset(s->cluster_data, 0x00, 512); - if (qcrypto_block_encrypt(s->crypto, start_sect + i, + if (qcrypto_block_encrypt(s->crypto, + (start_sect + i) * + BDRV_SECTOR_SIZE, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -668,7 +670,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_decrypt(s->crypto, sector_num, buf, + if (qcrypto_block_decrypt(s->crypto, + sector_num * BDRV_SECTOR_SIZE, buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; @@ -740,8 +743,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, + buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d2518d1..0e5aec8 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -446,15 +446,13 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t sector = (s->crypt_physical_offset ? + int64_t offset = (s->crypt_physical_offset ? (cluster_offset + offset_in_cluster) : - (src_cluster_offset + offset_in_cluster)) - >> BDRV_SECTOR_BITS; + (src_cluster_offset + offset_in_cluster)); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector, buffer, - bytes, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) { return false; } } diff --git a/block/qcow2.c b/block/qcow2.c index b8da8ca..3359739 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1811,7 +1811,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_decrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { @@ -1946,7 +1946,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_encrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { ret = -EIO; -- cgit v1.1 From d67a6b09b4ac27a4fac07544ded79b40d2717a0d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:40 +0100 Subject: block: support passthrough of BDRV_REQ_FUA in crypto driver The BDRV_REQ_FUA flag can trivially be allowed in the crypt driver as a passthrough to the underlying block driver. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-7-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/crypto.c b/block/crypto.c index edf53d4..60ddf86 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, return -EINVAL; } + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -462,7 +465,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - assert(!flags); + assert(!(flags & ~BDRV_REQ_FUA)); assert(payload_offset < INT64_MAX); assert(QEMU_IS_ALIGNED(offset, sector_size)); assert(QEMU_IS_ALIGNED(bytes, sector_size)); @@ -495,7 +498,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, - cur_bytes, &hd_qiov, 0); + cur_bytes, &hd_qiov, flags); if (ret < 0) { goto cleanup; } -- cgit v1.1 From 18775ff32697ab6e1fd47989673bf1de54d0d942 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 28 Sep 2017 15:03:00 +0300 Subject: block/mirror: check backing in bdrv_mirror_top_refresh_filename Backing may be zero after failed bdrv_attach_child in bdrv_set_backing_hd, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170928120300.58164-1-vsementsov@virtuozzo.com Reviewed-by: John Snow Signed-off-by: Max Reitz --- block/mirror.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/mirror.c b/block/mirror.c index 459b80f..3b6f0c5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1058,6 +1058,11 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd */ + return; + } bdrv_refresh_filename(bs->backing->bs); pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->backing->bs->filename); -- cgit v1.1 From 76a2a30a99c670e9ec1b4a5d976868059c6bc258 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:12 +0300 Subject: qcow2: fix return error code in qcow2_truncate() Signed-off-by: Pavel Butsykin Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Max Reitz Message-id: 20170929121613.25997-2-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index 3359739..960b3ab 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3167,7 +3167,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return ret; + return old_file_size; } nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, @@ -3196,7 +3196,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return -allocation_start; + return allocation_start; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, -- cgit v1.1 From 163bc39d2c2921430e5c23f4d0a0966d62f67a02 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:13 +0300 Subject: qcow2: truncate the tail of the image file after shrinking the image Now after shrinking the image, at the end of the image file, there might be a tail that probably will never be used. So we can find the last used cluster and cut the tail. Signed-off-by: Pavel Butsykin Reviewed-by: John Snow Message-id: 20170929121613.25997-3-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 22 ++++++++++++++++++++++ block/qcow2.c | 23 +++++++++++++++++++++++ block/qcow2.h | 1 + 3 files changed, 46 insertions(+) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 88d5a3f..aa3fd6c 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3181,3 +3181,25 @@ out: g_free(reftable_tmp); return ret; } + +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + + for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + return ret; + } + if (refcount > 0) { + return i; + } + } + qcow2_signal_corruption(bs, true, -1, -1, + "There are no references in the refcount table."); + return -EIO; +} diff --git a/block/qcow2.c b/block/qcow2.c index 960b3ab..f63d183 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, new_l1_size = size_to_l1(s, offset); if (offset < old_length) { + int64_t last_cluster, old_file_size; if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); @@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, "Failed to discard unused refblocks"); return ret; } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + return old_file_size; + } + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster < 0) { + error_setg_errno(errp, -last_cluster, + "Failed to find the last cluster"); + return last_cluster; + } + if ((last_cluster + 1) * s->cluster_size < old_file_size) { + ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + warn_report("Failed to truncate the tail of the image: %s", + strerror(-ret)); + ret = 0; + } + } } else { ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index 5a289a8..782a206 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, -- cgit v1.1 From ce960aa9062a407d0ca15aee3dcd3bd84a4e24f9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 29 Sep 2017 18:22:55 +0300 Subject: block/mirror: check backing in bdrv_mirror_top_flush Backing may be zero after failed bdrv_append in mirror_start_job, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170929152255.5431-1-vsementsov@virtuozzo.com Signed-off-by: Max Reitz --- block/mirror.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block') diff --git a/block/mirror.c b/block/mirror.c index 3b6f0c5..153758c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1041,6 +1041,10 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_append in mirror_start_job */ + return 0; + } return bdrv_co_flush(bs->backing->bs); } -- cgit v1.1