aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-04-02 03:46:30 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-04-02 03:46:30 +0100
commit47175951a691a8a2d483f0cbc7f31b4e3814bffa (patch)
tree647d88581930fed3c16baf82cd4d9c816ea59024
parent230ce19814ecc6bff8edac3b5b86e7c82f422c6c (diff)
parent75d34eb98ca0bb2f49d607fcaefd9c482e56b15d (diff)
downloadqemu-47175951a691a8a2d483f0cbc7f31b4e3814bffa.zip
qemu-47175951a691a8a2d483f0cbc7f31b4e3814bffa.tar.gz
qemu-47175951a691a8a2d483f0cbc7f31b4e3814bffa.tar.bz2
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2019-04-01' into staging
nbd patches for 2019-04-01 - Better behavior of qemu-img map on NBD images - Fixes for NBD protocol alignment corner cases: - the server has fewer places where it sends reads or block status not aligned to its advertised block size - the client has more cases where it can work around server non-compliance present in qemu 3.1 - the client now avoids non-compliant requests when interoperating with nbdkit or other servers not advertising block size # gpg: Signature made Mon 01 Apr 2019 15:06:54 BST # gpg: using RSA key A7A16B4A2527436A # gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full] # gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full] # gpg: aka "[jpeg image of size 6874]" [full] # Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A * remotes/ericb/tags/pull-nbd-2019-04-01: nbd/client: Trace server noncompliance on structured reads nbd/server: Advertise actual minimum block size block: Add bdrv_get_request_alignment() nbd/client: Support qemu-img convert from unaligned size nbd/client: Reject inaccessible tail of inconsistent server nbd/client: Report offsets in bdrv_block_status nbd/client: Lower min_block for block-status, unaligned size iotests: Add 241 to test NBD on unaligned images nbd-client: Work around server BLOCK_STATUS misalignment at EOF qemu-img: Gracefully shutdown when map can't finish nbd: Permit simple error to NBD_CMD_BLOCK_STATUS nbd: Don't lose server's error to NBD_CMD_BLOCK_STATUS nbd: Tolerate some server non-compliance in NBD_CMD_BLOCK_STATUS qemu-img: Report bdrv_block_status failures Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/block-backend.c7
-rw-r--r--block/nbd-client.c124
-rw-r--r--block/nbd.c19
-rw-r--r--block/trace-events2
-rw-r--r--include/sysemu/block-backend.h1
-rw-r--r--nbd/client.c8
-rw-r--r--nbd/server.c13
-rw-r--r--qemu-img.c16
-rw-r--r--tests/qemu-iotests/209.out4
-rw-r--r--tests/qemu-iotests/223.out22
-rw-r--r--tests/qemu-iotests/233.out2
-rwxr-xr-xtests/qemu-iotests/241100
-rw-r--r--tests/qemu-iotests/241.out28
-rw-r--r--tests/qemu-iotests/group1
14 files changed, 299 insertions, 48 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index edad02a..f78e82a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1764,6 +1764,13 @@ int blk_get_flags(BlockBackend *blk)
}
}
+/* Returns the minimum request alignment, in bytes; guaranteed nonzero */
+uint32_t blk_get_request_alignment(BlockBackend *blk)
+{
+ BlockDriverState *bs = blk_bs(blk);
+ return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
+}
+
/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
uint32_t blk_get_max_transfer(BlockBackend *blk)
{
diff --git a/block/nbd-client.c b/block/nbd-client.c
index bfbaf7e..790ecc1 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -211,7 +211,8 @@ static inline uint64_t payload_advance64(uint8_t **payload)
return ldq_be_p(*payload - 8);
}
-static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
+static int nbd_parse_offset_hole_payload(NBDClientSession *client,
+ NBDStructuredReplyChunk *chunk,
uint8_t *payload, uint64_t orig_offset,
QEMUIOVector *qiov, Error **errp)
{
@@ -233,6 +234,10 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
" region");
return -EINVAL;
}
+ if (client->info.min_block &&
+ !QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
+ trace_nbd_structured_read_compliance("hole");
+ }
qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
@@ -240,8 +245,8 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
}
/* nbd_parse_blockstatus_payload
- * support only one extent in reply and only for
- * base:allocation context
+ * Based on our request, we expect only one extent in reply, for the
+ * base:allocation context.
*/
static int nbd_parse_blockstatus_payload(NBDClientSession *client,
NBDStructuredReplyChunk *chunk,
@@ -250,7 +255,8 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
{
uint32_t context_id;
- if (chunk->length != sizeof(context_id) + sizeof(*extent)) {
+ /* The server succeeded, so it must have sent [at least] one extent */
+ if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
error_setg(errp, "Protocol error: invalid payload for "
"NBD_REPLY_TYPE_BLOCK_STATUS");
return -EINVAL;
@@ -268,18 +274,50 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
extent->length = payload_advance32(&payload);
extent->flags = payload_advance32(&payload);
- if (extent->length == 0 ||
- (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
- client->info.min_block))) {
+ if (extent->length == 0) {
error_setg(errp, "Protocol error: server sent status chunk with "
- "invalid length");
+ "zero length");
return -EINVAL;
}
- /* The server is allowed to send us extra information on the final
- * extent; just clamp it to the length we requested. */
+ /*
+ * A server sending unaligned block status is in violation of the
+ * protocol, but as qemu-nbd 3.1 is such a server (at least for
+ * POSIX files that are not a multiple of 512 bytes, since qemu
+ * rounds files up to 512-byte multiples but lseek(SEEK_HOLE)
+ * still sees an implicit hole beyond the real EOF), it's nicer to
+ * work around the misbehaving server. If the request included
+ * more than the final unaligned block, truncate it back to an
+ * aligned result; if the request was only the final block, round
+ * up to the full block and change the status to fully-allocated
+ * (always a safe status, even if it loses information).
+ */
+ if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
+ client->info.min_block)) {
+ trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
+ if (extent->length > client->info.min_block) {
+ extent->length = QEMU_ALIGN_DOWN(extent->length,
+ client->info.min_block);
+ } else {
+ extent->length = client->info.min_block;
+ extent->flags = 0;
+ }
+ }
+
+ /*
+ * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
+ * sent us any more than one extent, nor should it have included
+ * status beyond our request in that extent. However, it's easy
+ * enough to ignore the server's noncompliance without killing the
+ * connection; just ignore trailing extents, and clamp things to
+ * the length of our request.
+ */
+ if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
+ trace_nbd_parse_blockstatus_compliance("more than one extent");
+ }
if (extent->length > orig_length) {
extent->length = orig_length;
+ trace_nbd_parse_blockstatus_compliance("extent length too large");
}
return 0;
@@ -357,6 +395,9 @@ static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
" region");
return -EINVAL;
}
+ if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) {
+ trace_nbd_structured_read_compliance("data");
+ }
qemu_iovec_init(&sub_qiov, qiov->niov);
qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
@@ -679,7 +720,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
* in qiov */
break;
case NBD_REPLY_TYPE_OFFSET_HOLE:
- ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
+ ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload,
offset, qiov, &local_err);
if (ret < 0) {
s->quit = true;
@@ -718,9 +759,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
bool received = false;
assert(!extent->length);
- NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
- NULL, &reply, &payload)
- {
+ NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
int ret;
NBDStructuredReplyChunk *chunk = &reply.structured;
@@ -758,12 +797,9 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
payload = NULL;
}
- if (!extent->length && !iter.err) {
- error_setg(&iter.err,
- "Server did not reply with any status extents");
- if (!iter.ret) {
- iter.ret = -EIO;
- }
+ if (!extent->length && !iter.request_ret) {
+ error_setg(&local_err, "Server did not reply with any status extents");
+ nbd_iter_channel_error(&iter, -EIO, &local_err);
}
error_propagate(errp, iter.err);
@@ -820,6 +856,25 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
if (!bytes) {
return 0;
}
+ /*
+ * Work around the fact that the block layer doesn't do
+ * byte-accurate sizing yet - if the read exceeds the server's
+ * advertised size because the block layer rounded size up, then
+ * truncate the request to the server and tail-pad with zero.
+ */
+ if (offset >= client->info.size) {
+ assert(bytes < BDRV_SECTOR_SIZE);
+ qemu_iovec_memset(qiov, 0, 0, bytes);
+ return 0;
+ }
+ if (offset + bytes > client->info.size) {
+ uint64_t slop = offset + bytes - client->info.size;
+
+ assert(slop < BDRV_SECTOR_SIZE);
+ qemu_iovec_memset(qiov, bytes - slop, 0, slop);
+ request.len -= slop;
+ }
+
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
return ret;
@@ -938,15 +993,35 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
.from = offset,
.len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
bs->bl.request_alignment),
- client->info.max_block), bytes),
+ client->info.max_block),
+ MIN(bytes, client->info.size - offset)),
.flags = NBD_CMD_FLAG_REQ_ONE,
};
if (!client->info.base_allocation) {
*pnum = bytes;
- return BDRV_BLOCK_DATA;
+ *map = offset;
+ *file = bs;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
+ /*
+ * Work around the fact that the block layer doesn't do
+ * byte-accurate sizing yet - if the status request exceeds the
+ * server's advertised size because the block layer rounded size
+ * up, we truncated the request to the server (above), or are
+ * called on just the hole.
+ */
+ if (offset >= client->info.size) {
+ *pnum = bytes;
+ assert(bytes < BDRV_SECTOR_SIZE);
+ /* Intentionally don't report offset_valid for the hole */
+ return BDRV_BLOCK_ZERO;
+ }
+
+ if (client->info.min_block) {
+ assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
+ }
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
return ret;
@@ -967,8 +1042,11 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
assert(extent.length);
*pnum = extent.length;
+ *map = offset;
+ *file = bs;
return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
- (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
+ (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) |
+ BDRV_BLOCK_OFFSET_VALID;
}
void nbd_client_detach_aio_context(BlockDriverState *bs)
diff --git a/block/nbd.c b/block/nbd.c
index 2e72df5..208be59 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -437,7 +437,24 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
uint32_t min = s->info.min_block;
uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
- bs->bl.request_alignment = min ? min : BDRV_SECTOR_SIZE;
+ /*
+ * If the server did not advertise an alignment:
+ * - a size that is not sector-aligned implies that an alignment
+ * of 1 can be used to access those tail bytes
+ * - advertisement of block status requires an alignment of 1, so
+ * that we don't violate block layer constraints that block
+ * status is always aligned (as we can't control whether the
+ * server will report sub-sector extents, such as a hole at EOF
+ * on an unaligned POSIX file)
+ * - otherwise, assume the server is so old that we are safer avoiding
+ * sub-sector requests
+ */
+ if (!min) {
+ min = (!QEMU_IS_ALIGNED(s->info.size, BDRV_SECTOR_SIZE) ||
+ s->info.base_allocation) ? 1 : BDRV_SECTOR_SIZE;
+ }
+
+ bs->bl.request_alignment = min;
bs->bl.max_pdiscard = max;
bs->bl.max_pwrite_zeroes = max;
bs->bl.max_transfer = max;
diff --git a/block/trace-events b/block/trace-events
index e6bb5a8..7335a42 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -157,6 +157,8 @@ nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pa
iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d"
# nbd-client.c
+nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
+nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index e2066eb..3be05c2 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -177,6 +177,7 @@ bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
+uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
int blk_get_max_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
diff --git a/nbd/client.c b/nbd/client.c
index de7da48..427980b 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -426,6 +426,14 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
nbd_send_opt_abort(ioc);
return -1;
}
+ if (info->min_block &&
+ !QEMU_IS_ALIGNED(info->size, info->min_block)) {
+ error_setg(errp, "export size %" PRIu64 "is not multiple of "
+ "minimum block size %" PRIu32, info->size,
+ info->min_block);
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
break;
diff --git a/nbd/server.c b/nbd/server.c
index fd013a2..218a2aa 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -607,13 +607,16 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
/* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
* according to whether the client requested it, and according to
* whether this is OPT_INFO or OPT_GO. */
- /* minimum - 1 for back-compat, or 512 if client is new enough.
- * TODO: consult blk_bs(blk)->bl.request_alignment? */
- sizes[0] =
- (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
+ /* minimum - 1 for back-compat, or actual if client will obey it. */
+ if (client->opt == NBD_OPT_INFO || blocksize) {
+ sizes[0] = blk_get_request_alignment(exp->blk);
+ } else {
+ sizes[0] = 1;
+ }
+ assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
/* preferred - Hard-code to 4096 for now.
* TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
- sizes[1] = 4096;
+ sizes[1] = MAX(4096, sizes[0]);
/* maximum - At most 32M, but smaller as appropriate. */
sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
diff --git a/qemu-img.c b/qemu-img.c
index 8ee63da..76a961d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1630,6 +1630,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
count, &count, NULL, NULL);
}
if (ret < 0) {
+ error_report("error while reading block status of sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
return ret;
}
n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
@@ -2734,14 +2736,14 @@ static int img_info(int argc, char **argv)
return 0;
}
-static void dump_map_entry(OutputFormat output_format, MapEntry *e,
- MapEntry *next)
+static int dump_map_entry(OutputFormat output_format, MapEntry *e,
+ MapEntry *next)
{
switch (output_format) {
case OFORMAT_HUMAN:
if (e->data && !e->has_offset) {
error_report("File contains external, encrypted or compressed clusters.");
- exit(1);
+ return -1;
}
if (e->data && !e->zero) {
printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
@@ -2774,6 +2776,7 @@ static void dump_map_entry(OutputFormat output_format, MapEntry *e,
}
break;
}
+ return 0;
}
static int get_block_status(BlockDriverState *bs, int64_t offset,
@@ -2966,12 +2969,15 @@ static int img_map(int argc, char **argv)
}
if (curr.length > 0) {
- dump_map_entry(output_format, &curr, &next);
+ ret = dump_map_entry(output_format, &curr, &next);
+ if (ret < 0) {
+ goto out;
+ }
}
curr = next;
}
- dump_map_entry(output_format, &curr, NULL);
+ ret = dump_map_entry(output_format, &curr, NULL);
out:
blk_unref(blk);
diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out
index 0d29724..214e27b 100644
--- a/tests/qemu-iotests/209.out
+++ b/tests/qemu-iotests/209.out
@@ -1,2 +1,2 @@
-[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true},
-{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true, "offset": 0},
+{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false, "offset": 524288}]
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index 95c40a1..d5201b2 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -41,7 +41,7 @@ exports available: 2
export: 'n'
size: 4194304
flags: 0x4ef ( readonly flush fua trim zeroes df cache )
- min block: 512
+ min block: 1
opt block: 4096
max block: 33554432
available meta contexts: 2
@@ -50,7 +50,7 @@ exports available: 2
export: 'n2'
size: 4194304
flags: 0x4ed ( flush fua trim zeroes df cache )
- min block: 512
+ min block: 1
opt block: 4096
max block: 33554432
available meta contexts: 2
@@ -67,18 +67,18 @@ read 1048576/1048576 bytes at offset 1048576
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 2097152/2097152 bytes at offset 2097152
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true},
-{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false},
-{ "start": 1048576, "length": 3145728, "depth": 0, "zero": false, "data": true}]
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 1048576, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true},
+{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
=== Contrast to small granularity dirty-bitmap ===
-[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true},
+[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 512, "length": 512, "depth": 0, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true},
+{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
=== End qemu NBD server ===
@@ -94,10 +94,10 @@ read 2097152/2097152 bytes at offset 2097152
=== Use qemu-nbd as server ===
[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true},
+{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
-[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true},
+[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 512, "length": 512, "depth": 0, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true},
+{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
*** done
diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out
index 5acbc13..9511b6e 100644
--- a/tests/qemu-iotests/233.out
+++ b/tests/qemu-iotests/233.out
@@ -38,7 +38,7 @@ exports available: 1
export: ''
size: 67108864
flags: 0x4ed ( flush fua trim zeroes df cache )
- min block: 512
+ min block: 1
opt block: 4096
max block: 33554432
available meta contexts: 1
diff --git a/tests/qemu-iotests/241 b/tests/qemu-iotests/241
new file mode 100755
index 0000000..4b19685
--- /dev/null
+++ b/tests/qemu-iotests/241
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Test qemu-nbd vs. unaligned images
+#
+# Copyright (C) 2018-2019 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+nbd_unix_socket=$TEST_DIR/test_qemu_nbd_socket
+
+_cleanup()
+{
+ _cleanup_test_img
+ nbd_server_stop
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.nbd
+
+_supported_fmt raw
+_supported_proto nbd
+_supported_os Linux
+_require_command QEMU_NBD
+
+# can't use _make_test_img, because qemu-img rounds image size up,
+# and because we want to use Unix socket rather than TCP port. Likewise,
+# we have to redirect TEST_IMG to our server.
+# This tests that we can deal with the hole at the end of an unaligned
+# raw file (either because the server doesn't advertise alignment too
+# large, or because the client ignores the server's noncompliance - even
+# though we can't actually wire iotests into checking trace messages).
+printf %01000d 0 > "$TEST_IMG_FILE"
+TEST_IMG="nbd:unix:$nbd_unix_socket"
+
+echo
+echo "=== Exporting unaligned raw image, natural alignment ==="
+echo
+
+nbd_server_start_unix_socket -f $IMGFMT "$TEST_IMG_FILE"
+
+$QEMU_NBD_PROG --list -k $nbd_unix_socket | grep '\(size\|min\)'
+$QEMU_IMG map -f raw --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -f raw -c map "$TEST_IMG"
+nbd_server_stop
+
+echo
+echo "=== Exporting unaligned raw image, forced server sector alignment ==="
+echo
+
+# Intentionally omit '-f' to force image probing, which in turn forces
+# sector alignment, here at the server.
+nbd_server_start_unix_socket "$TEST_IMG_FILE"
+
+$QEMU_NBD_PROG --list -k $nbd_unix_socket | grep '\(size\|min\)'
+$QEMU_IMG map -f raw --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -f raw -c map "$TEST_IMG"
+nbd_server_stop
+
+echo
+echo "=== Exporting unaligned raw image, forced client sector alignment ==="
+echo
+
+# Now force sector alignment at the client.
+nbd_server_start_unix_socket -f $IMGFMT "$TEST_IMG_FILE"
+
+$QEMU_NBD_PROG --list -k $nbd_unix_socket | grep '\(size\|min\)'
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -c map "$TEST_IMG"
+nbd_server_stop
+
+# Not tested yet: we also want to ensure that qemu as NBD client does
+# not access beyond the end of a server's advertised unaligned size:
+# nbdkit -U - memory size=513 --run 'qemu-io -f raw -c "r 512 512" $nbd'
+# However, since qemu as server always rounds up to a sector alignment,
+# we would have to use nbdkit to provoke the current client failures.
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/241.out b/tests/qemu-iotests/241.out
new file mode 100644
index 0000000..f481074
--- /dev/null
+++ b/tests/qemu-iotests/241.out
@@ -0,0 +1,28 @@
+QA output created by 241
+
+=== Exporting unaligned raw image, natural alignment ===
+
+ size: 1024
+ min block: 1
+[{ "start": 0, "length": 1000, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "zero": true, "data": true, "offset": OFFSET}]
+1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
+
+=== Exporting unaligned raw image, forced server sector alignment ===
+
+WARNING: Image format was not specified for '/home/eblake/qemu/tests/qemu-iotests/scratch/t.raw' and probing guessed raw.
+ Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted.
+ Specify the 'raw' format explicitly to remove the restrictions.
+ size: 1024
+ min block: 512
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
+
+=== Exporting unaligned raw image, forced client sector alignment ===
+
+ size: 1024
+ min block: 1
+[{ "start": 0, "length": 1000, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "zero": true, "data": true, "offset": OFFSET}]
+1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 41da10c..bae7718 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -240,6 +240,7 @@
238 auto quick
239 rw auto quick
240 auto quick
+241 rw auto quick
242 rw auto quick
243 rw auto quick
244 rw auto quick