From 0e7106d8b5f7ef4f9df10baf1dfb3db482bcd046 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Thu, 10 Jan 2013 16:03:47 +0800 Subject: sheepdog: implement direct write semantics Sheepdog supports both writeback/writethrough write but has not yet supported DIRECTIO semantics which bypass the cache completely even if Sheepdog daemon is set up with cache enabled. Suppose cache is enabled on Sheepdog daemon size, the new cache control is cache=writeback # enable the writeback semantics for write cache=writethrough # enable the emulated writethrough semantics for write cache=directsync # disable cache competely Guest WCE toggling on the run time to toggle writeback/writethrough is also supported. Cc: MORITA Kazutaka Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Reviewed-by: Stefan Hajnoczi Reviewed-by: MORITA Kazutaka Signed-off-by: Kevin Wolf --- block/sheepdog.c | 70 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 30 deletions(-) (limited to 'block') diff --git a/block/sheepdog.c b/block/sheepdog.c index e821746..462c4b2 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -36,7 +36,8 @@ #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 -#define SD_FLAG_CMD_CACHE 0x04 +#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ +#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ #define SD_RES_SUCCESS 0x00 /* Success */ #define SD_RES_UNKNOWN 0x01 /* Unknown error */ @@ -293,7 +294,7 @@ typedef struct BDRVSheepdogState { char name[SD_MAX_VDI_LEN]; bool is_snapshot; - bool cache_enabled; + uint32_t cache_flags; char *addr; char *port; @@ -977,8 +978,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; } - if (s->cache_enabled) { - hdr.flags |= SD_FLAG_CMD_CACHE; + if (s->cache_flags) { + hdr.flags |= s->cache_flags; } hdr.oid = oid; @@ -1023,7 +1024,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, static int read_write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, - bool write, bool create, bool cache) + bool write, bool create, uint32_t cache_flags) { SheepdogObjReq hdr; SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; @@ -1047,9 +1048,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, hdr.opcode = SD_OP_READ_OBJ; } - if (cache) { - hdr.flags |= SD_FLAG_CMD_CACHE; - } + hdr.flags |= cache_flags; hdr.oid = oid; hdr.data_length = datalen; @@ -1072,18 +1071,19 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } static int read_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, bool cache) + unsigned int datalen, uint64_t offset, + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, false, - false, cache); + false, cache_flags); } static int write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, bool create, - bool cache) + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, true, - create, cache); + create, cache_flags); } static int sd_open(BlockDriverState *bs, const char *filename, int flags) @@ -1118,12 +1118,22 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - s->cache_enabled = true; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; + /* + * QEMU block layer emulates writethrough cache as 'writeback + flush', so + * we always set SD_FLAG_CMD_CACHE (writeback cache) as default. + */ + s->cache_flags = SD_FLAG_CMD_CACHE; + if (flags & BDRV_O_NOCACHE) { + s->cache_flags = SD_FLAG_CMD_DIRECT; + } + + if (s->cache_flags == SD_FLAG_CMD_CACHE) { + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; + } } if (snapid || tag[0] != '\0') { @@ -1140,7 +1150,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, - s->cache_enabled); + s->cache_flags); closesocket(fd); @@ -1387,7 +1397,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - if (s->cache_enabled) { + if (s->cache_flags) { closesocket(s->flush_fd); } g_free(s->addr); @@ -1423,7 +1433,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); close(fd); if (ret < 0) { @@ -1506,7 +1516,7 @@ static int sd_create_branch(BDRVSheepdogState *s) } ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1707,7 +1717,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) int ret; unsigned int wlen = 0, rlen = 0; - if (!s->cache_enabled) { + if (s->cache_flags != SD_FLAG_CMD_CACHE) { return 0; } @@ -1723,7 +1733,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) if (rsp->result == SD_RES_INVALID_PARMS) { dprintf("disable write cache since the server doesn't support it\n"); - s->cache_enabled = false; + s->cache_flags = SD_FLAG_CMD_DIRECT; closesocket(s->flush_fd); return 0; } @@ -1774,7 +1784,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -1791,7 +1801,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) inode = (SheepdogInode *)g_malloc(datalen); ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0, s->cache_enabled); + s->inode.nr_copies, datalen, 0, s->cache_flags); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -1845,7 +1855,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1942,7 +1952,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) /* we don't need to read entire object */ ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, - s->cache_enabled); + s->cache_flags); if (ret) { continue; @@ -2003,11 +2013,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, if (load) { ret = read_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, - s->cache_enabled); + s->cache_flags); } else { ret = write_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, create, - s->cache_enabled); + s->cache_flags); } if (ret < 0) { -- cgit v1.1 From b608c8dc02c78ee95455a0989bdf1b41c768b2ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Jan 2013 15:28:35 +0100 Subject: raw-posix: fix bdrv_aio_ioctl When the raw-posix aio=thread code was moved from posix-aio-compat.c to block/raw-posix.c, there was an unintended change to the ioctl code. The code used to return the ioctl command, which posix_aio_read() would later morph into a zero. This hack is not necessary anymore, and in fact breaks scsi-generic (which expects a zero return code). Remove it. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/raw-posix.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'block') diff --git a/block/raw-posix.c b/block/raw-posix.c index 87d888e..0e705ba 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -455,15 +455,7 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) return -errno; } - /* - * This looks weird, but the aio code only considers a request - * successful if it has written the full number of bytes. - * - * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, - * so in fact we return the ioctl command here to make posix_aio_read() - * happy.. - */ - return aiocb->aio_nbytes; + return 0; } static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) -- cgit v1.1 From c53b1c5114bdf7fc945cbf11436da61789ca2267 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 11 Jan 2013 16:41:27 +0100 Subject: block: make qiov_is_aligned() public The qiov_is_aligned() function checks whether a QEMUIOVector meets a BlockDriverState's alignment requirements. This is needed by virtio-blk-data-plane so: 1. Move the function from block/raw-posix.c to block/block.c. 2. Make it public in block/block.h. 3. Rename to bdrv_qiov_is_aligned(). 4. Change return type from int to bool. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/raw-posix.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'block') diff --git a/block/raw-posix.c b/block/raw-posix.c index 0e705ba..c3d7fda 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -430,22 +430,6 @@ static void raw_reopen_abort(BDRVReopenState *state) #endif */ -/* - * Check if all memory in this vector is sector aligned. - */ -static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) -{ - int i; - - for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { - return 0; - } - } - - return 1; -} - static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { int ret; @@ -714,7 +698,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * driver that it needs to copy the buffer. */ if ((bs->open_flags & BDRV_O_NOCACHE)) { - if (!qiov_is_aligned(bs, qiov)) { + if (!bdrv_qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { -- cgit v1.1