diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-11-05 20:59:47 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-11-05 20:59:47 +0000 |
commit | e9102eb84dbc82ffe4bfb379a5ae233a967a900b (patch) | |
tree | 7256ddf3fa38ce94ae58233af899e175426cd0db | |
parent | 412fbef3d076c43e56451bacb28c4544858c66a3 (diff) | |
parent | 292d06b925b2787ee6f2430996b95651cae42fce (diff) | |
download | qemu-e9102eb84dbc82ffe4bfb379a5ae233a967a900b.zip qemu-e9102eb84dbc82ffe4bfb379a5ae233a967a900b.tar.gz qemu-e9102eb84dbc82ffe4bfb379a5ae233a967a900b.tar.bz2 |
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-11-04' into staging
Block patches for 4.2-rc0:
- Work around XFS write-zeroes bug in file-posix block driver
- Fix backup job with compression
- Fix to the NVMe block driver header
# gpg: Signature made Mon 04 Nov 2019 09:01:16 GMT
# gpg: using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg: issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40
* remotes/maxreitz/tags/pull-block-2019-11-04:
block/file-posix: Let post-EOF fallocate serialize
block: Add bdrv_co_get_self_request()
block: Make wait/mark serialising requests public
block/block-copy: fix s->copy_size for compressed cluster
nvme: fix NSSRS offset in CAP register
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block/block-copy.c | 4 | ||||
-rw-r--r-- | block/file-posix.c | 36 | ||||
-rw-r--r-- | block/io.c | 42 | ||||
-rw-r--r-- | include/block/block_int.h | 4 | ||||
-rw-r--r-- | include/block/nvme.h | 2 |
5 files changed, 73 insertions, 15 deletions
diff --git a/block/block-copy.c b/block/block-copy.c index c39cc9c..79798a1 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -109,9 +109,9 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, s->use_copy_range = false; s->copy_size = cluster_size; } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) { - /* Compression is not supported for copy_range */ + /* Compression supports only cluster-size writes and no copy-range. */ s->use_copy_range = false; - s->copy_size = MAX(cluster_size, BLOCK_COPY_MAX_BUFFER); + s->copy_size = cluster_size; } else { /* * copy_range does not respect max_transfer (it's a TODO), so we factor diff --git a/block/file-posix.c b/block/file-posix.c index 0b7e904..1f0f61a 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2721,6 +2721,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, RawPosixAIOData acb; ThreadPoolFunc *handler; +#ifdef CONFIG_FALLOCATE + if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { + BdrvTrackedRequest *req; + uint64_t end; + + /* + * This is a workaround for a bug in the Linux XFS driver, + * where writes submitted through the AIO interface will be + * discarded if they happen beyond a concurrently running + * fallocate() that increases the file length (i.e., both the + * write and the fallocate() happen beyond the EOF). + * + * To work around it, we extend the tracked request for this + * zero write until INT64_MAX (effectively infinity), and mark + * it as serializing. + * + * We have to enable this workaround for all filesystems and + * AIO modes (not just XFS with aio=native), because for + * remote filesystems we do not know the host configuration. + */ + + req = bdrv_co_get_self_request(bs); + assert(req); + assert(req->type == BDRV_TRACKED_WRITE); + assert(req->offset <= offset); + assert(req->offset + req->bytes >= offset + bytes); + + end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; + req->bytes = end - req->offset; + req->overlap_bytes = req->bytes; + + bdrv_mark_request_serialising(req, bs->bl.request_alignment); + bdrv_wait_serialising_requests(req); + } +#endif + acb = (RawPosixAIOData) { .bs = bs, .aio_fildes = s->fd, @@ -715,7 +715,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req, qemu_co_mutex_unlock(&bs->reqs_lock); } -static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) +void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) { int64_t overlap_offset = req->offset & ~(align - 1); uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) @@ -743,6 +743,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) } /** + * Return the tracked request on @bs for the current coroutine, or + * NULL if there is none. + */ +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) +{ + BdrvTrackedRequest *req; + Coroutine *self = qemu_coroutine_self(); + + QLIST_FOREACH(req, &bs->tracked_requests, list) { + if (req->co == self) { + return req; + } + } + + return NULL; +} + +/** * Round a region to cluster boundaries */ void bdrv_round_to_clusters(BlockDriverState *bs, @@ -805,7 +823,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs) bdrv_wakeup(bs); } -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) +bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) { BlockDriverState *bs = self->bs; BdrvTrackedRequest *req; @@ -1437,14 +1455,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, * with each other for the same cluster. For example, in copy-on-read * it ensures that the CoR read and write operations are atomic and * guest writes cannot interleave between them. */ - mark_request_serialising(req, bdrv_get_cluster_size(bs)); + bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); } /* BDRV_REQ_SERIALISING is only for write operation */ assert(!(flags & BDRV_REQ_SERIALISING)); if (!(flags & BDRV_REQ_NO_SERIALISING)) { - wait_serialising_requests(req); + bdrv_wait_serialising_requests(req); } if (flags & BDRV_REQ_COPY_ON_READ) { @@ -1841,10 +1859,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, assert(!(flags & ~BDRV_REQ_MASK)); if (flags & BDRV_REQ_SERIALISING) { - mark_request_serialising(req, bdrv_get_cluster_size(bs)); + bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); } - waited = wait_serialising_requests(req); + waited = bdrv_wait_serialising_requests(req); assert(!waited || !req->serialising || is_request_serialising_and_aligned(req)); @@ -2008,8 +2026,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, padding = bdrv_init_padding(bs, offset, bytes, &pad); if (padding) { - mark_request_serialising(req, align); - wait_serialising_requests(req); + bdrv_mark_request_serialising(req, align); + bdrv_wait_serialising_requests(req); bdrv_padding_rmw_read(child, req, &pad, true); @@ -2111,8 +2129,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, } if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { - mark_request_serialising(&req, align); - wait_serialising_requests(&req); + bdrv_mark_request_serialising(&req, align); + bdrv_wait_serialising_requests(&req); bdrv_padding_rmw_read(child, &req, &pad, false); } @@ -3205,7 +3223,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( /* BDRV_REQ_SERIALISING is only for write operation */ assert(!(read_flags & BDRV_REQ_SERIALISING)); if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { - wait_serialising_requests(&req); + bdrv_wait_serialising_requests(&req); } ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, @@ -3336,7 +3354,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, * new area, we need to make sure that no write requests are made to it * concurrently or they might be overwritten by preallocation. */ if (new_bytes) { - mark_request_serialising(&req, 1); + bdrv_mark_request_serialising(&req, 1); } if (bs->read_only) { error_setg(errp, "Image is read-only"); diff --git a/include/block/block_int.h b/include/block/block_int.h index 02dc003..dd033d0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -999,6 +999,10 @@ extern unsigned int bdrv_drain_all_count; void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); +bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); +void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + int get_tmp_filename(char *filename, int size); BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, const char *filename); diff --git a/include/block/nvme.h b/include/block/nvme.h index ab5943b..8fb941c 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -23,7 +23,7 @@ enum NvmeCapShift { CAP_AMS_SHIFT = 17, CAP_TO_SHIFT = 24, CAP_DSTRD_SHIFT = 32, - CAP_NSSRS_SHIFT = 33, + CAP_NSSRS_SHIFT = 36, CAP_CSS_SHIFT = 37, CAP_MPSMIN_SHIFT = 48, CAP_MPSMAX_SHIFT = 52, |