aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorKlaus Jensen <k.jensen@samsung.com>2021-02-07 21:06:01 +0100
committerKlaus Jensen <k.jensen@samsung.com>2021-03-09 11:00:57 +0100
commitf80a1c331ac2fe7428ddd76b1f059642d6a91338 (patch)
tree54d51acfc14d54e0918036aca5603b027d0035a8 /hw
parent569dbe19c415865a3b2a1ca806f780d1bd5da2db (diff)
downloadqemu-f80a1c331ac2fe7428ddd76b1f059642d6a91338.zip
qemu-f80a1c331ac2fe7428ddd76b1f059642d6a91338.tar.gz
qemu-f80a1c331ac2fe7428ddd76b1f059642d6a91338.tar.bz2
hw/block/nvme: try to deal with the iov/qsg duality
Introduce NvmeSg and try to deal with that pesky qsg/iov duality that haunts all the memory-related functions. Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/block/nvme.c191
-rw-r--r--hw/block/nvme.h17
2 files changed, 117 insertions, 91 deletions
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 8f27fa7..a1e28c6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -441,15 +441,31 @@ static void nvme_req_clear(NvmeRequest *req)
req->status = NVME_SUCCESS;
}
-static void nvme_req_exit(NvmeRequest *req)
+static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
{
- if (req->qsg.sg) {
- qemu_sglist_destroy(&req->qsg);
+ if (dma) {
+ pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+ sg->flags = NVME_SG_DMA;
+ } else {
+ qemu_iovec_init(&sg->iov, 0);
+ }
+
+ sg->flags |= NVME_SG_ALLOC;
+}
+
+static inline void nvme_sg_unmap(NvmeSg *sg)
+{
+ if (!(sg->flags & NVME_SG_ALLOC)) {
+ return;
}
- if (req->iov.iov) {
- qemu_iovec_destroy(&req->iov);
+ if (sg->flags & NVME_SG_DMA) {
+ qemu_sglist_destroy(&sg->qsg);
+ } else {
+ qemu_iovec_destroy(&sg->iov);
}
+
+ memset(sg, 0x0, sizeof(*sg));
}
static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
@@ -486,8 +502,7 @@ static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
return NVME_SUCCESS;
}
-static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- hwaddr addr, size_t len)
+static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
{
bool cmb = false, pmr = false;
@@ -504,38 +519,31 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
}
if (cmb || pmr) {
- if (qsg && qsg->sg) {
+ if (sg->flags & NVME_SG_DMA) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(iov);
-
- if (!iov->iov) {
- qemu_iovec_init(iov, 1);
- }
-
if (cmb) {
- return nvme_map_addr_cmb(n, iov, addr, len);
+ return nvme_map_addr_cmb(n, &sg->iov, addr, len);
} else {
- return nvme_map_addr_pmr(n, iov, addr, len);
+ return nvme_map_addr_pmr(n, &sg->iov, addr, len);
}
}
- if (iov && iov->iov) {
+ if (!(sg->flags & NVME_SG_DMA)) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(qsg);
-
- if (!qsg->sg) {
- pci_dma_sglist_init(qsg, &n->parent_obj, 1);
- }
-
- qemu_sglist_add(qsg, addr, len);
+ qemu_sglist_add(&sg->qsg, addr, len);
return NVME_SUCCESS;
}
+static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
+{
+ return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
+}
+
static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
uint32_t len, NvmeRequest *req)
{
@@ -545,20 +553,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
uint16_t status;
int ret;
- QEMUSGList *qsg = &req->qsg;
- QEMUIOVector *iov = &req->iov;
-
trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
- if (nvme_addr_is_cmb(n, prp1) || (nvme_addr_is_pmr(n, prp1))) {
- qemu_iovec_init(iov, num_prps);
- } else {
- pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
- }
+ nvme_sg_init(n, &req->sg, nvme_addr_is_dma(n, prp1));
- status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
+ status = nvme_map_addr(n, &req->sg, prp1, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -573,7 +574,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp2);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
while (len != 0) {
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
@@ -581,7 +583,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
if (i == n->max_prp_ents - 1 && len > n->page_size) {
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
i = 0;
@@ -591,20 +594,22 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp_ent);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
prp_ent = le64_to_cpu(prp_list[i]);
}
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
trans_len = MIN(len, n->page_size);
- status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
+ status = nvme_map_addr(n, &req->sg, prp_ent, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -613,24 +618,28 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
} else {
if (unlikely(prp2 & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prp2_align(prp2);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
- status = nvme_map_addr(n, qsg, iov, prp2, len);
+ status = nvme_map_addr(n, &req->sg, prp2, len);
if (status) {
- return status;
+ goto unmap;
}
}
}
return NVME_SUCCESS;
+
+unmap:
+ nvme_sg_unmap(&req->sg);
+ return status;
}
/*
* Map 'nsgld' data descriptors from 'segment'. The function will subtract the
* number of bytes mapped in len.
*/
-static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
- QEMUIOVector *iov,
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
NvmeSglDescriptor *segment, uint64_t nsgld,
size_t *len, NvmeRequest *req)
{
@@ -688,7 +697,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}
- status = nvme_map_addr(n, qsg, iov, addr, trans_len);
+ status = nvme_map_addr(n, sg, addr, trans_len);
if (status) {
return status;
}
@@ -700,9 +709,8 @@ next:
return NVME_SUCCESS;
}
-static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- NvmeSglDescriptor sgl, size_t len,
- NvmeRequest *req)
+static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
+ size_t len, NvmeRequest *req)
{
/*
* Read the segment in chunks of 256 descriptors (one 4k page) to avoid
@@ -725,12 +733,14 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
trace_pci_nvme_map_sgl(nvme_cid(req), NVME_SGL_TYPE(sgl.type), len);
+ nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
+
/*
* If the entire transfer can be described with a single data block it can
* be mapped directly.
*/
if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
- status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, sgld, 1, &len, req);
if (status) {
goto unmap;
}
@@ -768,7 +778,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
goto unmap;
}
- status = nvme_map_sgl_data(n, qsg, iov, segment, SEG_CHUNK_SIZE,
+ status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
&len, req);
if (status) {
goto unmap;
@@ -795,7 +805,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
switch (NVME_SGL_TYPE(last_sgld->type)) {
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, req);
if (status) {
goto unmap;
}
@@ -822,7 +832,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
* Do not map the last descriptor; it will be a Segment or Last Segment
* descriptor and is handled by the next iteration.
*/
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld - 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, req);
if (status) {
goto unmap;
}
@@ -838,14 +848,7 @@ out:
return NVME_SUCCESS;
unmap:
- if (iov->iov) {
- qemu_iovec_destroy(iov);
- }
-
- if (qsg->sg) {
- qemu_sglist_destroy(qsg);
- }
-
+ nvme_sg_unmap(sg);
return status;
}
@@ -866,8 +869,7 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, size_t len, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}
- return nvme_map_sgl(n, &req->qsg, &req->iov, req->cmd.dptr.sgl, len,
- req);
+ return nvme_map_sgl(n, &req->sg, req->cmd.dptr.sgl, len, req);
default:
return NVME_INVALID_FIELD;
}
@@ -883,16 +885,13 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return status;
}
- /* assert that only one of qsg and iov carries data */
- assert((req->qsg.nsg > 0) != (req->iov.niov > 0));
-
- if (req->qsg.nsg > 0) {
+ if (req->sg.flags & NVME_SG_DMA) {
uint64_t residual;
if (dir == DMA_DIRECTION_TO_DEVICE) {
- residual = dma_buf_write(ptr, len, &req->qsg);
+ residual = dma_buf_write(ptr, len, &req->sg.qsg);
} else {
- residual = dma_buf_read(ptr, len, &req->qsg);
+ residual = dma_buf_read(ptr, len, &req->sg.qsg);
}
if (unlikely(residual)) {
@@ -903,9 +902,9 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
size_t bytes;
if (dir == DMA_DIRECTION_TO_DEVICE) {
- bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
+ bytes = qemu_iovec_to_buf(&req->sg.iov, 0, ptr, len);
} else {
- bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
+ bytes = qemu_iovec_from_buf(&req->sg.iov, 0, ptr, len);
}
if (unlikely(bytes != len)) {
@@ -917,6 +916,32 @@ static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return status;
}
+static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ assert(req->sg.flags & NVME_SG_ALLOC);
+
+ if (req->sg.flags & NVME_SG_DMA) {
+ req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
+ }
+}
+
+static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ assert(req->sg.flags & NVME_SG_ALLOC);
+
+ if (req->sg.flags & NVME_SG_DMA) {
+ req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
+ }
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -947,7 +972,7 @@ static void nvme_post_cqes(void *opaque)
}
QTAILQ_REMOVE(&cq->req_list, req, entry);
nvme_inc_cq_tail(cq);
- nvme_req_exit(req);
+ nvme_sg_unmap(&req->sg);
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
@@ -1644,14 +1669,14 @@ static void nvme_copy_in_complete(NvmeRequest *req)
zone->w_ptr += ctx->nlb;
}
- qemu_iovec_init(&req->iov, 1);
- qemu_iovec_add(&req->iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
+ qemu_iovec_init(&req->sg.iov, 1);
+ qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
- &req->iov, 0, nvme_copy_cb, req);
+ &req->sg.iov, 0, nvme_copy_cb, req);
return;
@@ -2087,13 +2112,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_READ);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_read(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_preadv(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
return NVME_NO_COMPLETE;
invalid:
@@ -2173,13 +2192,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_WRITE);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_write(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_pwritev(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
} else {
req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 294fac1..96afefa 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -29,6 +29,20 @@ typedef struct NvmeAsyncEvent {
NvmeAerResult result;
} NvmeAsyncEvent;
+enum {
+ NVME_SG_ALLOC = 1 << 0,
+ NVME_SG_DMA = 1 << 1,
+};
+
+typedef struct NvmeSg {
+ int flags;
+
+ union {
+ QEMUSGList qsg;
+ QEMUIOVector iov;
+ };
+} NvmeSg;
+
typedef struct NvmeRequest {
struct NvmeSQueue *sq;
struct NvmeNamespace *ns;
@@ -38,8 +52,7 @@ typedef struct NvmeRequest {
NvmeCqe cqe;
NvmeCmd cmd;
BlockAcctCookie acct;
- QEMUSGList qsg;
- QEMUIOVector iov;
+ NvmeSg sg;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;