diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-07-10 17:28:29 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-07-10 17:28:29 +0100 |
commit | 7851f1a70657245fab837615087ceaf4541df71f (patch) | |
tree | b3b4359e4764fdf80a9f29bc6a35b27f8cd29749 /block | |
parent | 0956ee3e4027d9063b08477a1c4f2b719380aac4 (diff) | |
parent | cd47d792d7a27a57f4b621e2ff1ed8f4e83de1e9 (diff) | |
download | qemu-7851f1a70657245fab837615087ceaf4541df71f.zip qemu-7851f1a70657245fab837615087ceaf4541df71f.tar.gz qemu-7851f1a70657245fab837615087ceaf4541df71f.tar.bz2 |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches:
- Copy offloading fixes for when the copy increases the image size
- Temporary revert of the removal of deprecated -drive options
- Fix request serialisation in the image fleecing scenario
- Fix copy-on-read crash with unaligned image size
- Fix another drain crash
# gpg: Signature made Tue 10 Jul 2018 16:37:52 BST
# gpg: using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream: (24 commits)
block: Use common write req handling in truncate
block: Fix bdrv_co_truncate overlap check
block: Use common req handling in copy offloading
block: Use common req handling for discard
block: Fix handling of image enlarging write
block: Extract common write req handling
block: Use uint64_t for BdrvTrackedRequest byte fields
block: Use BdrvChild to discard
block: Add copy offloading trace points
block: Prefix file driver trace points with "file_"
Revert "block: Remove deprecated -drive geometry options"
Revert "block: Remove deprecated -drive option addr"
Revert "block: Remove deprecated -drive option serial"
Revert "block: Remove dead deprecation warning code"
block/blklogwrites: Make sure the log sector size is not too small
qapi/block-core.json: Add missing documentation for blklogwrites log-append option
block/backup: fix fleecing scheme: use serialized writes
block: add BDRV_REQ_SERIALISING flag
block: split flags in copy_range
block/io: fix copy_range
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/backup.c | 20 | ||||
-rw-r--r-- | block/blkdebug.c | 2 | ||||
-rw-r--r-- | block/blklogwrites.c | 7 | ||||
-rwxr-xr-x | block/blkreplay.c | 2 | ||||
-rw-r--r-- | block/block-backend.c | 8 | ||||
-rw-r--r-- | block/copy-on-read.c | 2 | ||||
-rw-r--r-- | block/file-posix.c | 25 | ||||
-rw-r--r-- | block/file-win32.c | 2 | ||||
-rw-r--r-- | block/io.c | 318 | ||||
-rw-r--r-- | block/iscsi.c | 12 | ||||
-rw-r--r-- | block/mirror.c | 2 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 20 | ||||
-rw-r--r-- | block/raw-format.c | 26 | ||||
-rw-r--r-- | block/throttle.c | 2 | ||||
-rw-r--r-- | block/trace-events | 10 |
16 files changed, 311 insertions, 149 deletions
diff --git a/block/backup.c b/block/backup.c index 81895dd..319fc92 100644 --- a/block/backup.c +++ b/block/backup.c @@ -47,6 +47,8 @@ typedef struct BackupBlockJob { HBitmap *copy_bitmap; bool use_copy_range; int64_t copy_range_size; + + bool serialize_target_writes; } BackupBlockJob; static const BlockJobDriver backup_job_driver; @@ -102,6 +104,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, QEMUIOVector qiov; BlockBackend *blk = job->common.blk; int nbytes; + int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; + int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1); nbytes = MIN(job->cluster_size, job->len - start); @@ -112,8 +116,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, iov.iov_len = nbytes; qemu_iovec_init_external(&qiov, &iov, 1); - ret = blk_co_preadv(blk, start, qiov.size, &qiov, - is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); + ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags); if (ret < 0) { trace_backup_do_cow_read_fail(job, start, ret); if (error_is_read) { @@ -124,11 +127,11 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, if (qemu_iovec_is_zero(&qiov)) { ret = blk_co_pwrite_zeroes(job->target, start, - qiov.size, BDRV_REQ_MAY_UNMAP); + qiov.size, write_flags | BDRV_REQ_MAY_UNMAP); } else { ret = blk_co_pwritev(job->target, start, - qiov.size, &qiov, - job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); + qiov.size, &qiov, write_flags | + (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0)); } if (ret < 0) { trace_backup_do_cow_write_fail(job, start, ret); @@ -156,6 +159,8 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, int nr_clusters; BlockBackend *blk = job->common.blk; int nbytes; + int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; + int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); nbytes = MIN(job->copy_range_size, end - start); @@ -163,7 +168,7 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, hbitmap_reset(job->copy_bitmap, start / job->cluster_size, nr_clusters); ret = blk_co_copy_range(blk, start, job->target, start, nbytes, - is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); + read_flags, write_flags); if (ret < 0) { trace_backup_do_cow_copy_range_fail(job, start, ret); hbitmap_set(job->copy_bitmap, start / job->cluster_size, @@ -701,6 +706,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, sync_bitmap : NULL; job->compress = compress; + /* Detect image-fleecing (and similar) schemes */ + job->serialize_target_writes = bdrv_chain_contains(target, bs); + /* If there is no backing file on the target, we cannot rely on COW if our * backup cluster size is smaller than the target cluster size. Even for * targets with a backing file, try to avoid COW if possible. */ diff --git a/block/blkdebug.c b/block/blkdebug.c index 526af2a..0457bf5 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -625,7 +625,7 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, return err; } - return bdrv_co_pdiscard(bs->file->bs, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes); } static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs, diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 63bf6b3..ff98cd5 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -89,7 +89,10 @@ static inline uint32_t blk_log_writes_log2(uint32_t value) static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size) { - return sector_size < (1ull << 24) && is_power_of_2(sector_size); + return is_power_of_2(sector_size) && + sector_size >= sizeof(struct log_write_super) && + sector_size >= sizeof(struct log_write_entry) && + sector_size < (1ull << 24); } static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log, @@ -483,7 +486,7 @@ static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) static int coroutine_fn blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) { - return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes); + return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes); } static int coroutine_fn diff --git a/block/blkreplay.c b/block/blkreplay.c index b016dbe..766150a 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -113,7 +113,7 @@ static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { uint64_t reqid = blkreplay_next_id(); - int ret = bdrv_co_pdiscard(bs->file->bs, offset, bytes); + int ret = bdrv_co_pdiscard(bs->file, offset, bytes); block_request_create(reqid, bs, qemu_coroutine_self()); qemu_coroutine_yield(); diff --git a/block/block-backend.c b/block/block-backend.c index 6b75bca..f2f75a9 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -419,6 +419,7 @@ static void drive_info_del(DriveInfo *dinfo) return; } qemu_opts_del(dinfo->opts); + g_free(dinfo->serial); g_free(dinfo); } @@ -1559,7 +1560,7 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) return ret; } - return bdrv_co_pdiscard(blk_bs(blk), offset, bytes); + return bdrv_co_pdiscard(blk->root, offset, bytes); } int blk_co_flush(BlockBackend *blk) @@ -2218,7 +2219,8 @@ void blk_unregister_buf(BlockBackend *blk, void *host) int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BlockBackend *blk_out, int64_t off_out, - int bytes, BdrvRequestFlags flags) + int bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { int r; r = blk_check_byte_request(blk_in, off_in, bytes); @@ -2231,5 +2233,5 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, } return bdrv_co_copy_range(blk_in->root, off_in, blk_out->root, off_out, - bytes, flags); + bytes, read_flags, write_flags); } diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 1dcdaee..a19164f 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -116,7 +116,7 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { - return bdrv_co_pdiscard(bs->file->bs, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/block/file-posix.c b/block/file-posix.c index 349f77a..28824aa 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1488,6 +1488,8 @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, aiocb->aio_fd2, &out_off, bytes, 0); + trace_file_copy_file_range(aiocb->bs, aiocb->aio_fildes, in_off, + aiocb->aio_fd2, out_off, bytes, 0, ret); if (ret == 0) { /* No progress (e.g. when beyond EOF), let the caller fall back to * buffer I/O. */ @@ -1743,7 +1745,7 @@ static int paio_submit_co_full(BlockDriverState *bs, int fd, assert(qiov->size == bytes); } - trace_paio_submit_co(offset, bytes, type); + trace_file_paio_submit_co(offset, bytes, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_co(pool, aio_worker, acb); } @@ -2589,18 +2591,23 @@ static void raw_abort_perm_update(BlockDriverState *bs) raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); } -static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) +static int coroutine_fn raw_co_copy_range_from( + BlockDriverState *bs, BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, + BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { - return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); } static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { BDRVRawState *s = bs->opaque; BDRVRawState *src_s; diff --git a/block/file-win32.c b/block/file-win32.c index 0411fe8..f1e2187 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -162,7 +162,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, acb->aio_nbytes = count; acb->aio_offset = offset; - trace_paio_submit(acb, opaque, offset, count, type); + trace_file_paio_submit(acb, opaque, offset, count, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } @@ -40,6 +40,7 @@ static AioWait drain_all_aio_wait; +static void bdrv_parent_cb_resize(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -52,9 +53,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } - if (c->role->drained_begin) { - c->role->drained_begin(c); - } + bdrv_parent_drained_begin_single(c, false); } } @@ -73,6 +72,14 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, } } +static bool bdrv_parent_drained_poll_single(BdrvChild *c) +{ + if (c->role->drained_poll) { + return c->role->drained_poll(c); + } + return false; +} + static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, bool ignore_bds_parents) { @@ -83,14 +90,22 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) { continue; } - if (c->role->drained_poll) { - busy |= c->role->drained_poll(c); - } + busy |= bdrv_parent_drained_poll_single(c); } return busy; } +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) +{ + if (c->role->drained_begin) { + c->role->drained_begin(c); + } + if (poll) { + BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c)); + } +} + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) { dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); @@ -587,9 +602,11 @@ static void tracked_request_end(BdrvTrackedRequest *req) static void tracked_request_begin(BdrvTrackedRequest *req, BlockDriverState *bs, int64_t offset, - unsigned int bytes, + uint64_t bytes, enum BdrvTrackedRequestType type) { + assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes); + *req = (BdrvTrackedRequest){ .bs = bs, .offset = offset, @@ -611,7 +628,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req, static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) { int64_t overlap_offset = req->offset & ~(align - 1); - unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) + uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) - overlap_offset; if (!req->serialising) { @@ -623,6 +640,18 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); } +static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) +{ + /* + * If the request is serialising, overlap_offset and overlap_bytes are set, + * so we can check if the request is aligned. Otherwise, don't care and + * return false. + */ + + return req->serialising && (req->offset == req->overlap_offset) && + (req->bytes == req->overlap_bytes); +} + /** * Round a region to cluster boundaries */ @@ -657,7 +686,7 @@ static int bdrv_get_cluster_size(BlockDriverState *bs) } static bool tracked_request_overlaps(BdrvTrackedRequest *req, - int64_t offset, unsigned int bytes) + int64_t offset, uint64_t bytes) { /* aaaa bbbb */ if (offset >= req->overlap_offset + req->overlap_bytes) { @@ -1186,6 +1215,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, pnum = MIN(cluster_bytes, max_transfer); } + /* Stop at EOF if the image ends in the middle of the cluster */ + if (ret == 0 && pnum == 0) { + assert(progress >= bytes); + break; + } + assert(skip_bytes < pnum); if (ret <= 0) { @@ -1291,6 +1326,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, mark_request_serialising(req, bdrv_get_cluster_size(bs)); } + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(flags & BDRV_REQ_SERIALISING)); + if (!(flags & BDRV_REQ_NO_SERIALISING)) { wait_serialising_requests(req); } @@ -1538,6 +1576,92 @@ fail: return ret; } +static inline int coroutine_fn +bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, + BdrvTrackedRequest *req, int flags) +{ + BlockDriverState *bs = child->bs; + bool waited; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + + if (bs->read_only) { + return -EPERM; + } + + /* BDRV_REQ_NO_SERIALISING is only for read operation */ + assert(!(flags & BDRV_REQ_NO_SERIALISING)); + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); + + if (flags & BDRV_REQ_SERIALISING) { + mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } + + waited = wait_serialising_requests(req); + + assert(!waited || !req->serialising || + is_request_serialising_and_aligned(req)); + assert(req->overlap_offset <= offset); + assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); + + switch (req->type) { + case BDRV_TRACKED_WRITE: + case BDRV_TRACKED_DISCARD: + if (flags & BDRV_REQ_WRITE_UNCHANGED) { + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + } else { + assert(child->perm & BLK_PERM_WRITE); + } + return notifier_with_return_list_notify(&bs->before_write_notifiers, + req); + case BDRV_TRACKED_TRUNCATE: + assert(child->perm & BLK_PERM_RESIZE); + return 0; + default: + abort(); + } +} + +static inline void coroutine_fn +bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes, + BdrvTrackedRequest *req, int ret) +{ + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + BlockDriverState *bs = child->bs; + + atomic_inc(&bs->write_gen); + + /* + * Discard cannot extend the image, but in error handling cases, such as + * when reverting a qcow2 cluster allocation, the discarded range can pass + * the end of image file, so we cannot assert about BDRV_TRACKED_DISCARD + * here. Instead, just skip it, since semantically a discard request + * beyond EOF cannot expand the image anyway. + */ + if (ret == 0 && + (req->type == BDRV_TRACKED_TRUNCATE || + end_sector > bs->total_sectors) && + req->type != BDRV_TRACKED_DISCARD) { + bs->total_sectors = end_sector; + bdrv_parent_cb_resize(bs); + bdrv_dirty_bitmap_truncate(bs, end_sector << BDRV_SECTOR_BITS); + } + if (req->bytes) { + switch (req->type) { + case BDRV_TRACKED_WRITE: + stat64_max(&bs->wr_highest_offset, offset + bytes); + /* fall through, to set dirty bits */ + case BDRV_TRACKED_DISCARD: + bdrv_set_dirty(bs, offset, bytes); + break; + default: + break; + } + } +} + /* * Forwards an already correctly aligned write request to the BlockDriver, * after possibly fragmenting it. @@ -1548,10 +1672,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; - bool waited; int ret; - int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); uint64_t bytes_remaining = bytes; int max_transfer; @@ -1567,23 +1689,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); assert(!qiov || bytes == qiov->size); - assert((bs->open_flags & BDRV_O_NO_IO) == 0); - assert(!(flags & ~BDRV_REQ_MASK)); max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); - waited = wait_serialising_requests(req); - assert(!waited || !req->serialising); - assert(req->overlap_offset <= offset); - assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); - if (flags & BDRV_REQ_WRITE_UNCHANGED) { - assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); - } else { - assert(child->perm & BLK_PERM_WRITE); - } - assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); - - ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); + ret = bdrv_co_write_req_prepare(child, offset, bytes, req, flags); if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && @@ -1632,15 +1741,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); - atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, offset, bytes); - - stat64_max(&bs->wr_highest_offset, offset + bytes); - if (ret >= 0) { - bs->total_sectors = MAX(bs->total_sectors, end_sector); ret = 0; } + bdrv_co_write_req_finish(child, offset, bytes, req, ret); return ret; } @@ -1755,10 +1859,6 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, if (!bs->drv) { return -ENOMEDIUM; } - if (bs->read_only) { - return -EPERM; - } - assert(!(bs->open_flags & BDRV_O_INACTIVE)); ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { @@ -2590,7 +2690,7 @@ int bdrv_flush(BlockDriverState *bs) } typedef struct DiscardCo { - BlockDriverState *bs; + BdrvChild *child; int64_t offset; int bytes; int ret; @@ -2599,17 +2699,17 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) { DiscardCo *rwco = opaque; - rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes); + rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes); } -int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) +int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) { BdrvTrackedRequest req; int max_pdiscard, ret; int head, tail, align; + BlockDriverState *bs = child->bs; - if (!bs->drv) { + if (!bs || !bs->drv) { return -ENOMEDIUM; } @@ -2620,10 +2720,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { return ret; - } else if (bs->read_only) { - return -EPERM; } - assert(!(bs->open_flags & BDRV_O_INACTIVE)); /* Do nothing if disabled. */ if (!(bs->open_flags & BDRV_O_UNMAP)) { @@ -2647,7 +2744,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, bdrv_inc_in_flight(bs); tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD); - ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); + ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0); if (ret < 0) { goto out; } @@ -2713,18 +2810,17 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, } ret = 0; out: - atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, req.offset, req.bytes); + bdrv_co_write_req_finish(child, req.offset, req.bytes, &req, ret); tracked_request_end(&req); bdrv_dec_in_flight(bs); return ret; } -int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes) { Coroutine *co; DiscardCo rwco = { - .bs = bs, + .child = child, .offset = offset, .bytes = bytes, .ret = NOT_DONE, @@ -2735,8 +2831,8 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) bdrv_pdiscard_co_entry(&rwco); } else { co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE); + bdrv_coroutine_enter(child->bs, co); + BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE); } return rwco.ret; @@ -2888,15 +2984,13 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host) } } -static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, - uint64_t src_offset, - BdrvChild *dst, - uint64_t dst_offset, - uint64_t bytes, - BdrvRequestFlags flags, - bool recurse_src) +static int coroutine_fn bdrv_co_copy_range_internal( + BdrvChild *src, uint64_t src_offset, BdrvChild *dst, + uint64_t dst_offset, uint64_t bytes, + BdrvRequestFlags read_flags, BdrvRequestFlags write_flags, + bool recurse_src) { - BdrvTrackedRequest src_req, dst_req; + BdrvTrackedRequest req; int ret; if (!dst || !dst->bs) { @@ -2906,8 +3000,8 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, if (ret) { return ret; } - if (flags & BDRV_REQ_ZERO_WRITE) { - return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags); + if (write_flags & BDRV_REQ_ZERO_WRITE) { + return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags); } if (!src || !src->bs) { @@ -2923,32 +3017,44 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, || src->bs->encrypted || dst->bs->encrypted) { return -ENOTSUP; } - bdrv_inc_in_flight(src->bs); - bdrv_inc_in_flight(dst->bs); - tracked_request_begin(&src_req, src->bs, src_offset, - bytes, BDRV_TRACKED_READ); - tracked_request_begin(&dst_req, dst->bs, dst_offset, - bytes, BDRV_TRACKED_WRITE); - if (!(flags & BDRV_REQ_NO_SERIALISING)) { - wait_serialising_requests(&src_req); - wait_serialising_requests(&dst_req); - } if (recurse_src) { + bdrv_inc_in_flight(src->bs); + tracked_request_begin(&req, src->bs, src_offset, bytes, + BDRV_TRACKED_READ); + + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(read_flags & BDRV_REQ_SERIALISING)); + if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { + wait_serialising_requests(&req); + } + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, src, src_offset, dst, dst_offset, - bytes, flags); + bytes, + read_flags, write_flags); + + tracked_request_end(&req); + bdrv_dec_in_flight(src->bs); } else { - ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, - src, src_offset, - dst, dst_offset, - bytes, flags); - } - tracked_request_end(&src_req); - tracked_request_end(&dst_req); - bdrv_dec_in_flight(src->bs); - bdrv_dec_in_flight(dst->bs); + bdrv_inc_in_flight(dst->bs); + tracked_request_begin(&req, dst->bs, dst_offset, bytes, + BDRV_TRACKED_WRITE); + ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req, + write_flags); + if (!ret) { + ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, + src, src_offset, + dst, dst_offset, + bytes, + read_flags, write_flags); + } + bdrv_co_write_req_finish(dst, dst_offset, bytes, &req, ret); + tracked_request_end(&req); + bdrv_dec_in_flight(dst->bs); + } + return ret; } @@ -2958,10 +3064,14 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, * semantics. */ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { + trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, - bytes, flags, true); + bytes, read_flags, write_flags, true); } /* Copy range from @src to @dst. @@ -2970,19 +3080,24 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, * semantics. */ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { + trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, - bytes, flags, false); + bytes, read_flags, write_flags, false); } int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { return bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, - bytes, flags); + bytes, read_flags, write_flags); } static void bdrv_parent_cb_resize(BlockDriverState *bs) @@ -3007,7 +3122,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, int64_t old_size, new_bytes; int ret; - assert(child->perm & BLK_PERM_RESIZE); /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { @@ -3032,14 +3146,26 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, } bdrv_inc_in_flight(bs); - tracked_request_begin(&req, bs, offset, new_bytes, BDRV_TRACKED_TRUNCATE); + tracked_request_begin(&req, bs, offset - new_bytes, new_bytes, + BDRV_TRACKED_TRUNCATE); /* If we are growing the image and potentially using preallocation for the * new area, we need to make sure that no write requests are made to it * concurrently or they might be overwritten by preallocation. */ if (new_bytes) { mark_request_serialising(&req, 1); - wait_serialising_requests(&req); + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); + ret = -EACCES; + goto out; + } + ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req, + 0); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to prepare request for truncation"); + goto out; } if (!drv->bdrv_co_truncate) { @@ -3051,13 +3177,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, ret = -ENOTSUP; goto out; } - if (bs->read_only) { - error_setg(errp, "Image is read-only"); - ret = -EACCES; - goto out; - } - - assert(!(bs->open_flags & BDRV_O_INACTIVE)); ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); if (ret < 0) { @@ -3069,9 +3188,10 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, } else { offset = bs->total_sectors * BDRV_SECTOR_SIZE; } - bdrv_dirty_bitmap_truncate(bs, offset); - bdrv_parent_cb_resize(bs); - atomic_inc(&bs->write_gen); + /* It's possible that truncation succeeded but refresh_total_sectors + * failed, but the latter doesn't affect how we should finish the request. + * Pass 0 as the last parameter so that dirty bitmaps etc. are handled. */ + bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0); out: tracked_request_end(&req); diff --git a/block/iscsi.c b/block/iscsi.c index ead2bd5..bb69faf 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -44,6 +44,7 @@ #include "qapi/qmp/qstring.h" #include "crypto/secret.h" #include "scsi/utils.h" +#include "trace.h" /* Conflict between scsi/utils.h and libiscsi! :( */ #define SCSI_XFER_NONE ISCSI_XFER_NONE @@ -2193,9 +2194,11 @@ static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, - BdrvRequestFlags flags) + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { - return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, + read_flags, write_flags); } static struct scsi_task *iscsi_xcopy_task(int param_len) @@ -2332,7 +2335,8 @@ static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs, BdrvChild *dst, uint64_t dst_offset, uint64_t bytes, - BdrvRequestFlags flags) + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { IscsiLun *dst_lun = dst->bs->opaque; IscsiLun *src_lun; @@ -2396,6 +2400,8 @@ retry: } out_unlock: + + trace_iscsi_xcopy(src_lun, src_offset, dst_lun, dst_offset, bytes, r); g_free(iscsi_task.task); qemu_mutex_unlock(&dst_lun->mutex); g_free(iscsi_task.err_str); diff --git a/block/mirror.c b/block/mirror.c index 61bd9f3..b48c3f8 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1333,7 +1333,7 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, break; case MIRROR_METHOD_DISCARD: - ret = bdrv_co_pdiscard(bs->backing->bs, offset, bytes); + ret = bdrv_co_pdiscard(bs->backing, offset, bytes); break; default: diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 1b9ecb1..3c539f0 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -734,7 +734,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret) /* Discard is optional, ignore the return value */ if (ret >= 0) { - bdrv_pdiscard(bs->file->bs, d->offset, d->bytes); + bdrv_pdiscard(bs->file, d->offset, d->bytes); } g_free(d); diff --git a/block/qcow2.c b/block/qcow2.c index 5d668fc..6162ed8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3259,13 +3259,14 @@ static int coroutine_fn qcow2_co_copy_range_from(BlockDriverState *bs, BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { BDRVQcow2State *s = bs->opaque; int ret; unsigned int cur_bytes; /* number of bytes in current iteration */ BdrvChild *child = NULL; - BdrvRequestFlags cur_flags; + BdrvRequestFlags cur_write_flags; assert(!bs->encrypted); qemu_co_mutex_lock(&s->lock); @@ -3274,7 +3275,7 @@ qcow2_co_copy_range_from(BlockDriverState *bs, uint64_t copy_offset = 0; /* prepare next request */ cur_bytes = MIN(bytes, INT_MAX); - cur_flags = flags; + cur_write_flags = write_flags; ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); if (ret < 0) { @@ -3286,20 +3287,20 @@ qcow2_co_copy_range_from(BlockDriverState *bs, if (bs->backing && bs->backing->bs) { int64_t backing_length = bdrv_getlength(bs->backing->bs); if (src_offset >= backing_length) { - cur_flags |= BDRV_REQ_ZERO_WRITE; + cur_write_flags |= BDRV_REQ_ZERO_WRITE; } else { child = bs->backing; cur_bytes = MIN(cur_bytes, backing_length - src_offset); copy_offset = src_offset; } } else { - cur_flags |= BDRV_REQ_ZERO_WRITE; + cur_write_flags |= BDRV_REQ_ZERO_WRITE; } break; case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_ZERO_ALLOC: - cur_flags |= BDRV_REQ_ZERO_WRITE; + cur_write_flags |= BDRV_REQ_ZERO_WRITE; break; case QCOW2_CLUSTER_COMPRESSED: @@ -3322,7 +3323,7 @@ qcow2_co_copy_range_from(BlockDriverState *bs, ret = bdrv_co_copy_range_from(child, copy_offset, dst, dst_offset, - cur_bytes, cur_flags); + cur_bytes, read_flags, cur_write_flags); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto out; @@ -3343,7 +3344,8 @@ static int coroutine_fn qcow2_co_copy_range_to(BlockDriverState *bs, BdrvChild *src, uint64_t src_offset, BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + uint64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { BDRVQcow2State *s = bs->opaque; int offset_in_cluster; @@ -3386,7 +3388,7 @@ qcow2_co_copy_range_to(BlockDriverState *bs, ret = bdrv_co_copy_range_to(src, src_offset, bs->file, cluster_offset + offset_in_cluster, - cur_bytes, flags); + cur_bytes, read_flags, write_flags); qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; diff --git a/block/raw-format.c b/block/raw-format.c index 8e648a5..2fd69cd 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -297,7 +297,7 @@ static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, if (ret) { return ret; } - return bdrv_co_pdiscard(bs->file->bs, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes); } static int64_t raw_getlength(BlockDriverState *bs) @@ -498,9 +498,13 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) } static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { int ret; @@ -509,13 +513,17 @@ static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, return ret; } return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, - bytes, flags); + bytes, read_flags, write_flags); } static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, - BdrvChild *src, uint64_t src_offset, - BdrvChild *dst, uint64_t dst_offset, - uint64_t bytes, BdrvRequestFlags flags) + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags) { int ret; @@ -524,7 +532,7 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, return ret; } return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, - flags); + read_flags, write_flags); } BlockDriver bdrv_raw = { diff --git a/block/throttle.c b/block/throttle.c index f617f23..636c976 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -149,7 +149,7 @@ static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs, ThrottleGroupMember *tgm = bs->opaque; throttle_group_co_io_limits_intercept(tgm, bytes, true); - return bdrv_co_pdiscard(bs->file->bs, offset, bytes); + return bdrv_co_pdiscard(bs->file, offset, bytes); } static int throttle_co_flush(BlockDriverState *bs) diff --git a/block/trace-events b/block/trace-events index c35287b..3e8c47b 100644 --- a/block/trace-events +++ b/block/trace-events @@ -15,6 +15,8 @@ bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x" bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %"PRId64 +bdrv_co_copy_range_from(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x" +bdrv_co_copy_range_to(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x" # block/stream.c stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d" @@ -55,8 +57,9 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p" # block/file-win32.c # block/file-posix.c -paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d" -paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" +file_paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d" +file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" +file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64 # block/qcow2.c qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" @@ -150,3 +153,6 @@ nvme_free_req_queue_wait(void *q) "q %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" + +# block/iscsi.c +iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d" |