diff options
author | Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 2021-09-03 13:28:06 +0300 |
---|---|---|
committer | Eric Blake <eblake@redhat.com> | 2021-09-29 13:46:32 -0500 |
commit | 0c8022876f2183f93e23a7314862140c94ee62e7 (patch) | |
tree | 2edf3dde61173cc11417cf5ac44b279c2c0c8e5e /block | |
parent | 39af49c0d7e0a2a285f1bcbd3db0db88f15b1d8c (diff) | |
download | qemu-0c8022876f2183f93e23a7314862140c94ee62e7.zip qemu-0c8022876f2183f93e23a7314862140c94ee62e7.tar.gz qemu-0c8022876f2183f93e23a7314862140c94ee62e7.tar.bz2 |
block: use int64_t instead of int in driver discard handlers
We are generally moving to int64_t for both offset and bytes parameters
on all io paths.
Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.
We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).
So, convert driver discard handlers bytes parameter to int64_t.
The only caller of all updated function is bdrv_co_pdiscard in
block/io.c. It is already prepared to work with 64bit requests, but
pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver.
Let's look at all updated functions:
blkdebug: all calculations are still OK, thanks to
bdrv_check_qiov_request().
both rule_check and bdrv_co_pdiscard are 64bit
blklogwrites: pass to blk_loc_writes_co_log which is 64bit
blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK
copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to
cbw_do_copy_before_write which is 64bit
file-posix: one handler calls raw_account_discard() is 64bit and both
handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass
to RawPosixAIOData::aio_nbytes, which is 64bit (and calls
raw_account_discard())
gluster: somehow, third argument of glfs_discard_async is size_t.
Let's set max_pdiscard accordingly.
iscsi: iscsi_allocmap_set_invalid is 64bit,
!is_byte_request_lun_aligned is 64bit.
list.num is uint32_t. Let's clarify max_pdiscard and
pdiscard_alignment.
mirror_top: pass to bdrv_mirror_top_do_write() which is
64bit
nbd: protocol limitation. max_pdiscard is alredy set strict enough,
keep it as is for now.
nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits
to nvme_refresh_limits().
preallocate: pass to bdrv_co_pdiscard() which is 64bit.
rbd: pass to qemu_rbd_start_co() which is 64bit.
qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(),
qcow2_cluster_discard() is 64bit.
raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too.
throttle: pass to bdrv_co_pdiscard() which is 64bit and to
throttle_group_co_io_limits_intercept() which is 64bit as well.
test-block-iothread: bytes argument is unused
Great! Now all drivers are prepared to handle 64bit discard requests,
or else have explicit max_pdiscard limits.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blkdebug.c | 2 | ||||
-rw-r--r-- | block/blklogwrites.c | 4 | ||||
-rw-r--r-- | block/blkreplay.c | 2 | ||||
-rw-r--r-- | block/copy-before-write.c | 2 | ||||
-rw-r--r-- | block/copy-on-read.c | 2 | ||||
-rw-r--r-- | block/file-posix.c | 7 | ||||
-rw-r--r-- | block/filter-compress.c | 2 | ||||
-rw-r--r-- | block/gluster.c | 7 | ||||
-rw-r--r-- | block/iscsi.c | 16 | ||||
-rw-r--r-- | block/mirror.c | 2 | ||||
-rw-r--r-- | block/nbd.c | 6 | ||||
-rw-r--r-- | block/nvme.c | 14 | ||||
-rw-r--r-- | block/preallocate.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 2 | ||||
-rw-r--r-- | block/raw-format.c | 2 | ||||
-rw-r--r-- | block/rbd.c | 4 | ||||
-rw-r--r-- | block/throttle.c | 2 | ||||
-rw-r--r-- | block/trace-events | 4 |
18 files changed, 53 insertions, 29 deletions
diff --git a/block/blkdebug.c b/block/blkdebug.c index 742b4a3..bbf2948 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -717,7 +717,7 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { uint32_t align = bs->bl.pdiscard_alignment; int err; diff --git a/block/blklogwrites.c b/block/blklogwrites.c index d7ae64c..f7a251e 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -484,9 +484,9 @@ static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) } static int coroutine_fn -blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { - return blk_log_writes_co_log(bs, offset, count, NULL, 0, + return blk_log_writes_co_log(bs, offset, bytes, NULL, 0, blk_log_writes_co_do_file_pdiscard, LOG_DISCARD_FLAG, false); } diff --git a/block/blkreplay.c b/block/blkreplay.c index 89d74a3..dcbe780 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -105,7 +105,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { uint64_t reqid = blkreplay_next_id(); int ret = bdrv_co_pdiscard(bs->file, offset, bytes); diff --git a/block/copy-before-write.c b/block/copy-before-write.c index d210e87..c30a5ff 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -64,7 +64,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, } static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { int ret = cbw_do_copy_before_write(bs, offset, bytes, 0); if (ret < 0) { diff --git a/block/copy-on-read.c b/block/copy-on-read.c index f83dd83..1fc7fb3 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -201,7 +201,7 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/block/file-posix.c b/block/file-posix.c index f375070..c62e427 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2942,7 +2942,8 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) } static coroutine_fn int -raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) +raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes, + bool blkdev) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; @@ -2966,7 +2967,7 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev) } static coroutine_fn int -raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { return raw_do_pdiscard(bs, offset, bytes, false); } @@ -3591,7 +3592,7 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) #endif /* linux */ static coroutine_fn int -hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { BDRVRawState *s = bs->opaque; int ret; diff --git a/block/filter-compress.c b/block/filter-compress.c index fb85686..d5be538 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -94,7 +94,7 @@ static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/block/gluster.c b/block/gluster.c index 4e3c9cd..398976b 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -891,6 +891,7 @@ out: static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; + bs->bl.max_pdiscard = SIZE_MAX; } static int qemu_gluster_reopen_prepare(BDRVReopenState *state, @@ -1297,18 +1298,20 @@ error: #ifdef CONFIG_GLUSTERFS_DISCARD static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, - int64_t offset, int size) + int64_t offset, int64_t bytes) { int ret; GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; + assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */ + acb.size = 0; acb.ret = 0; acb.coroutine = qemu_coroutine_self(); acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb); + ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb); if (ret < 0) { return -errno; } diff --git a/block/iscsi.c b/block/iscsi.c index 74ff7e3..57aa07a 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1138,7 +1138,8 @@ iscsi_getlength(BlockDriverState *bs) } static int -coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) +coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, + int64_t bytes) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; @@ -1154,6 +1155,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) return 0; } + /* + * We don't want to overflow list.num which is uint32_t. + * We rely on our max_pdiscard. + */ + assert(bytes / iscsilun->block_size <= UINT32_MAX); + list.lba = offset / iscsilun->block_size; list.num = bytes / iscsilun->block_size; @@ -2071,10 +2078,9 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp) } if (iscsilun->lbp.lbpu) { - if (iscsilun->bl.max_unmap < 0xffffffff / block_size) { - bs->bl.max_pdiscard = - iscsilun->bl.max_unmap * iscsilun->block_size; - } + bs->bl.max_pdiscard = + MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size, + (uint64_t)UINT32_MAX * iscsilun->block_size); bs->bl.pdiscard_alignment = iscsilun->bl.opt_unmap_gran * iscsilun->block_size; } else { diff --git a/block/mirror.c b/block/mirror.c index fab7508..c962e8b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1508,7 +1508,7 @@ static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes, NULL, 0); diff --git a/block/nbd.c b/block/nbd.c index c0c479a..a66b2c2 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1457,15 +1457,17 @@ static int nbd_client_co_flush(BlockDriverState *bs) } static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) + int64_t bytes) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { .type = NBD_CMD_TRIM, .from = offset, - .len = bytes, + .len = bytes, /* len is uint32_t */ }; + assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */ + assert(!(s->info.flags & NBD_FLAG_READ_ONLY)); if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) { return 0; diff --git a/block/nvme.c b/block/nvme.c index 2e0fd9e..1cc7b62 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -1360,7 +1360,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, int64_t offset, - int bytes) + int64_t bytes) { BDRVNVMeState *s = bs->opaque; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; @@ -1387,6 +1387,14 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, assert(s->queue_count > 1); + /* + * Filling the @buf requires @offset and @bytes to satisfy restrictions + * defined in nvme_refresh_limits(). + */ + assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift)); + assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift)); + assert((bytes >> s->blkshift) <= UINT32_MAX); + buf = qemu_try_memalign(s->page_size, s->page_size); if (!buf) { return -ENOMEM; @@ -1490,6 +1498,10 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16); bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment, 1UL << s->blkshift); + + bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift; + bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment, + 1UL << s->blkshift); } static void nvme_detach_aio_context(BlockDriverState *bs) diff --git a/block/preallocate.c b/block/preallocate.c index 99e28d9..1d4233f 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -235,7 +235,7 @@ static coroutine_fn int preallocate_co_preadv_part( } static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { return bdrv_co_pdiscard(bs->file, offset, bytes); } diff --git a/block/qcow2.c b/block/qcow2.c index 4b2e869..d509016 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3996,7 +3996,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, } static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { int ret; BDRVQcow2State *s = bs->opaque; diff --git a/block/raw-format.c b/block/raw-format.c index a2485926..bda757f 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -302,7 +302,7 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { int ret; diff --git a/block/rbd.c b/block/rbd.c index 053eb8e..701fbf2 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1197,9 +1197,9 @@ static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs) } static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, - int64_t offset, int count) + int64_t offset, int64_t bytes) { - return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD); + return qemu_rbd_start_co(bs, offset, bytes, NULL, 0, RBD_AIO_DISCARD); } #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES diff --git a/block/throttle.c b/block/throttle.c index c13fe90..6e8d52f 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -145,7 +145,7 @@ static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs, } static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs, - int64_t offset, int bytes) + int64_t offset, int64_t bytes) { ThrottleGroupMember *tgm = bs->opaque; throttle_group_co_io_limits_intercept(tgm, bytes, true); diff --git a/block/trace-events b/block/trace-events index d8a0856..f2d0a9b 100644 --- a/block/trace-events +++ b/block/trace-events @@ -152,8 +152,8 @@ nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p off nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d" nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d" -nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" -nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, int64_t offset, int64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" +nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d" |