diff options
author | Klaus Jensen <k.jensen@samsung.com> | 2021-06-17 21:06:56 +0200 |
---|---|---|
committer | Klaus Jensen <k.jensen@samsung.com> | 2021-06-29 07:16:25 +0200 |
commit | 3bcf26d3d6192e350b165c681aff13f6697f30fb (patch) | |
tree | bd1e5feed0865d40a958ae4ce31bad29fe9befa3 /hw | |
parent | 63d96e4ffd71a4ac2769448a567d46ed71e5bdd6 (diff) | |
download | qemu-3bcf26d3d6192e350b165c681aff13f6697f30fb.zip qemu-3bcf26d3d6192e350b165c681aff13f6697f30fb.tar.gz qemu-3bcf26d3d6192e350b165c681aff13f6697f30fb.tar.bz2 |
hw/nvme: reimplement format nvm to allow cancellation
Prior to this patch, the aios associated with broadcast format are
submitted anonymously (no aiocb reference saved from the blk_aio call).
Fix this by formatting the namespaces one after another, saving a
reference to the aiocb for each.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/nvme/ctrl.c | 263 | ||||
-rw-r--r-- | hw/nvme/trace-events | 4 |
2 files changed, 143 insertions, 124 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5b550ec..3b8c542 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1924,42 +1924,6 @@ out: nvme_rw_complete_cb(req, ret); } -struct nvme_aio_format_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - - /* number of outstanding write zeroes for this namespace */ - int *count; -}; - -static void nvme_aio_format_cb(void *opaque, int ret) -{ - struct nvme_aio_format_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = ctx->ns; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count = ctx->count; - - g_free(ctx); - - if (ret) { - nvme_aio_err(req, ret); - } - - if (--(*count)) { - return; - } - - g_free(count); - ns->status = 0x0; - - if (--(*num_formats)) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - static void nvme_verify_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; @@ -5272,138 +5236,195 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, - uint8_t mset, uint8_t pi, uint8_t pil, - NvmeRequest *req) -{ - int64_t len, offset; - struct nvme_aio_format_ctx *ctx; - BlockBackend *blk = ns->blkconf.blk; - uint16_t ms; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count; - - if (ns->params.zoned) { - return NVME_INVALID_FORMAT | NVME_DNR; - } +typedef struct NvmeFormatAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + QEMUBH *bh; + NvmeRequest *req; + int ret; - trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil); + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; + int64_t offset; +} NvmeFormatAIOCB; - if (lbaf > ns->id_ns.nlbaf) { - return NVME_INVALID_FORMAT | NVME_DNR; - } +static void nvme_format_bh(void *opaque); - ms = ns->id_ns.lbaf[lbaf].ms; +static void nvme_format_cancel(BlockAIOCB *aiocb) +{ + NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common); - if (pi && (ms < sizeof(NvmeDifTuple))) { - return NVME_INVALID_FORMAT | NVME_DNR; + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); } +} - if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { - return NVME_INVALID_FIELD | NVME_DNR; - } +static const AIOCBInfo nvme_format_aiocb_info = { + .aiocb_size = sizeof(NvmeFormatAIOCB), + .cancel_async = nvme_format_cancel, + .get_aio_context = nvme_get_aio_context, +}; + +static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pil = (dw10 >> 8) & 0x1; - nvme_ns_drain(ns); - nvme_ns_shutdown(ns); - nvme_ns_cleanup(ns); + trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil); ns->id_ns.dps = (pil << 3) | pi; ns->id_ns.flbas = lbaf | (mset << 4); nvme_ns_init_format(ns); +} - ns->status = NVME_FORMAT_IN_PROGRESS; +static void nvme_format_ns_cb(void *opaque, int ret) +{ + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = iocb->ns; + int bytes; - len = ns->size; - offset = 0; + if (ret < 0) { + iocb->ret = ret; + goto done; + } - count = g_new(int, 1); - *count = 1; + assert(ns); - (*num_formats)++; + if (iocb->offset < ns->size) { + bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset); - while (len) { - ctx = g_new(struct nvme_aio_format_ctx, 1); - ctx->req = req; - ctx->ns = ns; - ctx->count = count; + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset, + bytes, BDRV_REQ_MAY_UNMAP, + nvme_format_ns_cb, iocb); - size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); + iocb->offset += bytes; + return; + } - (*count)++; + nvme_format_set(ns, &req->cmd); + ns->status = 0x0; + iocb->ns = NULL; + iocb->offset = 0; - blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP, - nvme_aio_format_cb, ctx); +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} - offset += bytes; - len -= bytes; +static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) +{ + if (ns->params.zoned) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + if (lbaf > ns->id_ns.nlbaf) { + return NVME_INVALID_FORMAT | NVME_DNR; } - if (--(*count)) { - return NVME_NO_COMPLETE; + if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) { + return NVME_INVALID_FORMAT | NVME_DNR; } - g_free(count); - ns->status = 0x0; - (*num_formats)--; + if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { + return NVME_INVALID_FIELD | NVME_DNR; + } return NVME_SUCCESS; } -static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +static void nvme_format_bh(void *opaque) { - NvmeNamespace *ns; + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); - uint32_t nsid = le32_to_cpu(req->cmd.nsid); uint8_t lbaf = dw10 & 0xf; - uint8_t mset = (dw10 >> 4) & 0x1; uint8_t pi = (dw10 >> 5) & 0x7; - uint8_t pil = (dw10 >> 8) & 0x1; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; uint16_t status; int i; - trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil); - - /* 1-initialize; see the comment in nvme_dsm */ - *num_formats = 1; + if (iocb->ret < 0) { + goto done; + } - if (nsid != NVME_NSID_BROADCAST) { - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } } + } - ns = nvme_ns(n, nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; - } + if (!iocb->ns) { + goto done; + } - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; + status = nvme_format_check(iocb->ns, lbaf, pi); + if (status) { + req->status = status; + goto done; + } + + iocb->ns->status = NVME_FORMAT_IN_PROGRESS; + nvme_format_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); +} + +static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFormatAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_format_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + iocb->offset = 0; + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; } - } else { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; - break; - } + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; } } - /* account for the 1-initialization */ - if (--(*num_formats)) { - return NVME_NO_COMPLETE; - } + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; - return req->status; +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + return status; } static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index dc00c28..48d10c3 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -10,9 +10,7 @@ pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32"" -pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format_set(uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" |