aboutsummaryrefslogtreecommitdiff
path: root/nbd
diff options
context:
space:
mode:
authorEric Blake <eblake@redhat.com>2023-09-25 14:22:42 -0500
committerEric Blake <eblake@redhat.com>2023-10-05 11:02:08 -0500
commit2dcbb11b399ada51f734229b612e4f561a2aae0a (patch)
tree49c6f465cdb59bd7a969bafe5645bbafcec0122c /nbd
parent1dec4643d13cb3ba8315e07ce89c7fd4faaff2de (diff)
downloadqemu-2dcbb11b399ada51f734229b612e4f561a2aae0a.zip
qemu-2dcbb11b399ada51f734229b612e4f561a2aae0a.tar.gz
qemu-2dcbb11b399ada51f734229b612e4f561a2aae0a.tar.bz2
nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS
Allow a client to request a subset of negotiated meta contexts. For example, a client may ask to use a single connection to learn about both block status and dirty bitmaps, but where the dirty bitmap queries only need to be performed on a subset of the disk; forcing the server to compute that information on block status queries in the rest of the disk is wasted effort (both at the server, and on the amount of traffic sent over the wire to be parsed and ignored by the client). Qemu as an NBD client never requests to use more than one meta context, so it has no need to use block status payloads. Testing this instead requires support from libnbd, which CAN access multiple meta contexts in parallel from a single NBD connection; an interop test submitted to the libnbd project at the same time as this patch demonstrates the feature working, as well as testing some corner cases (for example, when the payload length is longer than the export length), although other corner cases (like passing the same id duplicated) requires a protocol fuzzer because libnbd is not wired up to break the protocol that badly. This also includes tweaks to 'qemu-nbd --list' to show when a server is advertising the capability, and to the testsuite to reflect the addition to that output. Of note: qemu will always advertise the new feature bit during NBD_OPT_INFO if extended headers have alreay been negotiated (regardless of whether any NBD_OPT_SET_META_CONTEXT negotiation has occurred); but for NBD_OPT_GO, qemu only advertises the feature if block status is also enabled (that is, if the client does not negotiate any contexts, then NBD_CMD_BLOCK_STATUS cannot be used, so the feature is not advertised). Signed-off-by: Eric Blake <eblake@redhat.com> Message-ID: <20230925192229.3186470-26-eblake@redhat.com> [eblake: fix logic to reject unnegotiated contexts] Signed-off-by: Eric Blake <eblake@redhat.com>
Diffstat (limited to 'nbd')
-rw-r--r--nbd/server.c117
-rw-r--r--nbd/trace-events1
2 files changed, 111 insertions, 7 deletions
diff --git a/nbd/server.c b/nbd/server.c
index 2dce9c3..859c163 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -512,6 +512,9 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
stq_be_p(buf, client->exp->size);
stw_be_p(buf + 8, myflags);
@@ -699,6 +702,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED &&
+ (client->contexts.count || client->opt == NBD_OPT_INFO)) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
stq_be_p(buf, exp->size);
stw_be_p(buf + 8, myflags);
@@ -2420,6 +2427,90 @@ static int coroutine_fn nbd_co_send_bitmap(NBDClient *client,
return nbd_co_send_extents(client, request, ea, last, context_id, errp);
}
+/*
+ * nbd_co_block_status_payload_read
+ * Called when a client wants a subset of negotiated contexts via a
+ * BLOCK_STATUS payload. Check the payload for valid length and
+ * contents. On success, return 0 with request updated to effective
+ * length. If request was invalid but all payload consumed, return 0
+ * with request->len and request->contexts->count set to 0 (which will
+ * trigger an appropriate NBD_EINVAL response later on). Return
+ * negative errno if the payload was not fully consumed.
+ */
+static int
+nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
+ Error **errp)
+{
+ uint64_t payload_len = request->len;
+ g_autofree char *buf = NULL;
+ size_t count, i, nr_bitmaps;
+ uint32_t id;
+
+ if (payload_len > NBD_MAX_BUFFER_SIZE) {
+ error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+ request->len, NBD_MAX_BUFFER_SIZE);
+ return -EINVAL;
+ }
+
+ assert(client->contexts.exp == client->exp);
+ nr_bitmaps = client->exp->nr_export_bitmaps;
+ request->contexts = g_new0(NBDMetaContexts, 1);
+ request->contexts->exp = client->exp;
+
+ if (payload_len % sizeof(uint32_t) ||
+ payload_len < sizeof(NBDBlockStatusPayload) ||
+ payload_len > (sizeof(NBDBlockStatusPayload) +
+ sizeof(id) * client->contexts.count)) {
+ goto skip;
+ }
+
+ buf = g_malloc(payload_len);
+ if (nbd_read(client->ioc, buf, payload_len,
+ "CMD_BLOCK_STATUS data", errp) < 0) {
+ return -EIO;
+ }
+ trace_nbd_co_receive_request_payload_received(request->cookie,
+ payload_len);
+ request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
+ count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
+ payload_len = 0;
+
+ for (i = 0; i < count; i++) {
+ id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
+ if (id == NBD_META_ID_BASE_ALLOCATION) {
+ if (!client->contexts.base_allocation ||
+ request->contexts->base_allocation) {
+ goto skip;
+ }
+ request->contexts->base_allocation = true;
+ } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
+ if (!client->contexts.allocation_depth ||
+ request->contexts->allocation_depth) {
+ goto skip;
+ }
+ request->contexts->allocation_depth = true;
+ } else {
+ unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
+
+ if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
+ request->contexts->bitmaps[idx]) {
+ goto skip;
+ }
+ request->contexts->bitmaps[idx] = true;
+ }
+ }
+
+ request->len = ldq_be_p(buf);
+ request->contexts->count = count;
+ return 0;
+
+ skip:
+ trace_nbd_co_receive_block_status_payload_compliance(request->from,
+ request->len);
+ request->len = request->contexts->count = 0;
+ return nbd_drop(client->ioc, payload_len, errp);
+}
+
/* nbd_co_receive_request
* Collect a client request. Return 0 if request looks valid, -EIO to drop
* connection right away, -EAGAIN to indicate we were interrupted and the
@@ -2505,7 +2596,18 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
break;
case NBD_CMD_BLOCK_STATUS:
- request->contexts = &client->contexts;
+ if (extended_with_payload) {
+ ret = nbd_co_block_status_payload_read(client, request, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ /* payload now consumed */
+ check_length = false;
+ payload_len = 0;
+ valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+ } else {
+ request->contexts = &client->contexts;
+ }
valid_flags |= NBD_CMD_FLAG_REQ_ONE;
break;
@@ -2750,16 +2852,16 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
case NBD_CMD_BLOCK_STATUS:
assert(request->contexts);
- if (!request->len) {
- return nbd_send_generic_reply(client, request, -EINVAL,
- "need non-zero length", errp);
- }
assert(client->mode >= NBD_MODE_EXTENDED ||
request->len <= UINT32_MAX);
if (request->contexts->count) {
bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
int contexts_remaining = request->contexts->count;
+ if (!request->len) {
+ return nbd_send_generic_reply(client, request, -EINVAL,
+ "need non-zero length", errp);
+ }
if (request->contexts->base_allocation) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
@@ -2896,8 +2998,9 @@ static coroutine_fn void nbd_trip(void *opaque)
goto disconnect;
}
- /* We must disconnect after NBD_CMD_WRITE if we did not
- * read the payload.
+ /*
+ * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
+ * payload if we did not read the payload.
*/
if (!req->complete) {
error_setg(&local_err, "Request handling failed in intermediate state");
diff --git a/nbd/trace-events b/nbd/trace-events
index cb5d719..00ae321 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -70,6 +70,7 @@ nbd_co_send_chunk_read(uint64_t cookie, uint64_t offset, void *data, uint64_t si
nbd_co_send_chunk_read_hole(uint64_t cookie, uint64_t offset, uint64_t size) "Send structured read hole reply: cookie = %" PRIu64 ", offset = %" PRIu64 ", len = %" PRIu64
nbd_co_send_extents(uint64_t cookie, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: cookie = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)"
nbd_co_send_chunk_error(uint64_t cookie, int err, const char *errname, const char *msg) "Send structured error reply: cookie = %" PRIu64 ", error = %d (%s), msg = '%s'"
+nbd_co_receive_block_status_payload_compliance(uint64_t from, uint64_t len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%" PRIx64
nbd_co_receive_request_decode_type(uint64_t cookie, uint16_t type, const char *name) "Decoding type: cookie = %" PRIu64 ", type = %" PRIu16 " (%s)"
nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload received: cookie = %" PRIu64 ", len = %" PRIu64
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64