diff options
author | Klaus Jensen <k.jensen@samsung.com> | 2021-02-04 09:55:48 +0100 |
---|---|---|
committer | Klaus Jensen <k.jensen@samsung.com> | 2021-03-18 12:34:51 +0100 |
commit | 146f720c55637410062041f68dc908645cd18aaa (patch) | |
tree | cee4032d0543f13643b2cec0dd47687902c0fb60 /hw/block/nvme.c | |
parent | bc3a65e99254cfe001bd16a569a5aa7d20f930e8 (diff) | |
download | qemu-146f720c55637410062041f68dc908645cd18aaa.zip qemu-146f720c55637410062041f68dc908645cd18aaa.tar.gz qemu-146f720c55637410062041f68dc908645cd18aaa.tar.bz2 |
hw/block/nvme: end-to-end data protection
Add support for namespaces formatted with protection information. The
type of end-to-end data protection (i.e. Type 1, Type 2 or Type 3) is
selected with the `pi` nvme-ns device parameter. If the number of
metadata bytes is larger than 8, the `pil` nvme-ns device parameter may
be used to control the location of the 8-byte DIF tuple. The default
`pil` value of '0', causes the DIF tuple to be transferred as the last
8 bytes of the metadata. Set to 1 to store this in the first eight bytes
instead.
Co-authored-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'hw/block/nvme.c')
-rw-r--r-- | hw/block/nvme.c | 258 |
1 files changed, 219 insertions, 39 deletions
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 2c4757a..7af651d 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -144,6 +144,7 @@ #include "trace.h" #include "nvme.h" #include "nvme-ns.h" +#include "nvme-dif.h" #define NVME_MAX_IOQPAIRS 0xffff #define NVME_DB_SIZE 4 @@ -226,15 +227,6 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { static void nvme_process_sq(void *opaque); -static uint16_t nvme_cid(NvmeRequest *req) -{ - if (!req) { - return 0xffff; - } - - return le16_to_cpu(req->cqe.cid); -} - static uint16_t nvme_sqid(NvmeRequest *req) { return le16_to_cpu(req->sq->sqid); @@ -933,8 +925,8 @@ unmap: return status; } -static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd) +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd) { uint64_t prp1, prp2; @@ -986,9 +978,16 @@ static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len, static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) { NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); size_t len = nvme_l2b(ns, nlb); uint16_t status; + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { + goto out; + } + if (nvme_ns_ext(ns)) { NvmeSg sg; @@ -1006,6 +1005,7 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) return NVME_SUCCESS; } +out: return nvme_map_dptr(n, &req->sg, len, &req->cmd); } @@ -1035,11 +1035,6 @@ static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) return nvme_map_mptr(n, &req->sg, len, &req->cmd); } -typedef enum NvmeTxDirection { - NVME_TX_DIRECTION_TO_DEVICE = 0, - NVME_TX_DIRECTION_FROM_DEVICE = 1, -} NvmeTxDirection; - static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len, uint32_t bytes, int32_t skip_bytes, int64_t offset, @@ -1164,12 +1159,15 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE); } -static uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req) +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) { NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); - if (nvme_ns_ext(ns)) { + if (nvme_ns_ext(ns) && + !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { size_t lsize = nvme_lsize(ns); size_t msize = nvme_msize(ns); @@ -1180,8 +1178,8 @@ static uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return nvme_tx(n, &req->sg, ptr, len, dir); } -static uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req) +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req) { NvmeNamespace *ns = req->ns; uint16_t status; @@ -1777,7 +1775,7 @@ static void nvme_misc_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } -static void nvme_rw_complete_cb(void *opaque, int ret) +void nvme_rw_complete_cb(void *opaque, int ret) { NvmeRequest *req = opaque; NvmeNamespace *ns = req->ns; @@ -1984,11 +1982,13 @@ struct nvme_copy_ctx { uint8_t *bounce; uint8_t *mbounce; uint32_t nlb; + NvmeCopySourceRange *ranges; }; struct nvme_copy_in_ctx { NvmeRequest *req; QEMUIOVector iov; + NvmeCopySourceRange *range; }; static void nvme_copy_complete_cb(void *opaque, int ret) @@ -2062,6 +2062,70 @@ static void nvme_copy_in_complete(NvmeRequest *req) block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + uint16_t prinfor = (copy->control[0] >> 4) & 0xf; + uint16_t prinfow = (copy->control[2] >> 2) & 0xf; + uint16_t nr = copy->nr + 1; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; + uint16_t apptag, appmask; + uint32_t reftag; + uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce; + size_t len, mlen; + int i; + + /* + * The dif helpers expects prinfo to be similar to the control field of + * the NvmeRwCmd, so shift by 10 to fake it. + */ + prinfor = prinfor << 10; + prinfow = prinfow << 10; + + for (i = 0; i < nr; i++) { + range = &ctx->ranges[i]; + slba = le64_to_cpu(range->slba); + nlb = le16_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + mlen = nvme_m2b(ns, nlb); + apptag = le16_to_cpu(range->apptag); + appmask = le16_to_cpu(range->appmask); + reftag = le32_to_cpu(range->reftag); + + status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba, + apptag, appmask, reftag); + if (status) { + goto invalid; + } + + buf += len; + mbuf += mlen; + } + + apptag = le16_to_cpu(copy->apptag); + appmask = le16_to_cpu(copy->appmask); + reftag = le32_to_cpu(copy->reftag); + + if (prinfow & NVME_RW_PRINFO_PRACT) { + size_t len = nvme_l2b(ns, ctx->nlb); + size_t mlen = nvme_m2b(ns, ctx->nlb); + + status = nvme_check_prinfo(ns, prinfow, sdlba, reftag); + if (status) { + goto invalid; + } + + nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce, + mlen, apptag, reftag); + } else { + status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen, + prinfow, sdlba, apptag, appmask, reftag); + if (status) { + goto invalid; + } + } + } + status = nvme_check_bounds(ns, sdlba, ctx->nlb); if (status) { trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); @@ -2156,7 +2220,13 @@ struct nvme_compare_ctx { static void nvme_compare_mdata_cb(void *opaque, int ret) { NvmeRequest *req = opaque; + NvmeNamespace *ns = req->ns; NvmeCtrl *n = nvme_ctrl(req); + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint16_t ctrl = le16_to_cpu(rw->control); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; uint16_t status = NVME_SUCCESS; @@ -2172,6 +2242,40 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) goto out; } + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + uint64_t slba = le64_to_cpu(rw->slba); + uint8_t *bufp; + uint8_t *mbufp = ctx->mdata.bounce; + uint8_t *end = mbufp + ctx->mdata.iov.size; + size_t msize = nvme_msize(ns); + int16_t pil = 0; + + status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, + ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + slba, apptag, appmask, reftag); + if (status) { + req->status = status; + goto out; + } + + /* + * When formatted with protection information, do not compare the DIF + * tuple. + */ + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + } + + for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) { + if (memcmp(bufp + pil, mbufp + pil, msize - pil)) { + req->status = NVME_CMP_FAILURE; + goto out; + } + } + + goto out; + } + if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) { req->status = NVME_CMP_FAILURE; goto out; @@ -2331,12 +2435,18 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = req->ns; NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - g_autofree NvmeCopySourceRange *range = NULL; uint16_t nr = copy->nr + 1; uint8_t format = copy->control[0] & 0xf; - uint32_t nlb = 0; + /* + * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the + * NVME_RW_PRINFO constants. + */ + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10; + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10; + + uint32_t nlb = 0; uint8_t *bounce = NULL, *bouncep = NULL; uint8_t *mbounce = NULL, *mbouncep = NULL; struct nvme_copy_ctx *ctx; @@ -2345,6 +2455,11 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (!(n->id_ctrl.ocfs & (1 << format))) { trace_pci_nvme_err_copy_invalid_format(format); return NVME_INVALID_FIELD | NVME_DNR; @@ -2354,39 +2469,41 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) return NVME_CMD_SIZE_LIMIT | NVME_DNR; } - range = g_new(NvmeCopySourceRange, nr); + ctx = g_new(struct nvme_copy_ctx, 1); + ctx->ranges = g_new(NvmeCopySourceRange, nr); - status = nvme_h2c(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange), - req); + status = nvme_h2c(n, (uint8_t *)ctx->ranges, + nr * sizeof(NvmeCopySourceRange), req); if (status) { - return status; + goto out; } for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1; + uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); + uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto out; } status = nvme_check_bounds(ns, slba, _nlb); if (status) { trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); - return status; + goto out; } if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { status = nvme_check_dulbe(ns, slba, _nlb); if (status) { - return status; + goto out; } } if (ns->params.zoned) { status = nvme_check_zone_read(ns, slba, _nlb); if (status) { - return status; + goto out; } } @@ -2394,7 +2511,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } if (nlb > le32_to_cpu(ns->id_ns.mcl)) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto out; } bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); @@ -2405,8 +2523,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, BLOCK_ACCT_READ); - ctx = g_new(struct nvme_copy_ctx, 1); - ctx->bounce = bounce; ctx->mbounce = mbounce; ctx->nlb = nlb; @@ -2415,8 +2531,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) req->opaque = ctx; for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t nlb = le16_to_cpu(range[i].nlb) + 1; + uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); + uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; size_t len = nvme_l2b(ns, nlb); int64_t offset = nvme_l2b(ns, slba); @@ -2463,6 +2579,12 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } return NVME_NO_COMPLETE; + +out: + g_free(ctx->ranges); + g_free(ctx); + + return status; } static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) @@ -2472,6 +2594,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) BlockBackend *blk = ns->blkconf.blk; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); size_t data_len = nvme_l2b(ns, nlb); size_t len = data_len; int64_t offset = nvme_l2b(ns, slba); @@ -2480,6 +2603,10 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + if (nvme_ns_ext(ns)) { len += nvme_m2b(ns, nlb); } @@ -2582,6 +2709,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) NvmeNamespace *ns = req->ns; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); uint64_t data_size = nvme_l2b(ns, nlb); uint64_t mapped_size = data_size; uint64_t data_offset; @@ -2590,6 +2718,14 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) if (nvme_ns_ext(ns)) { mapped_size += nvme_m2b(ns, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = ctrl & NVME_RW_PRINFO_PRACT; + + if (pract && nvme_msize(ns) == 8) { + mapped_size = data_size; + } + } } trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba); @@ -2620,6 +2756,10 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) } } + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + status = nvme_map_data(n, nlb, req); if (status) { goto invalid; @@ -2644,6 +2784,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, NvmeNamespace *ns = req->ns; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; + uint16_t ctrl = le16_to_cpu(rw->control); uint64_t data_size = nvme_l2b(ns, nlb); uint64_t mapped_size = data_size; uint64_t data_offset; @@ -2654,6 +2795,14 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (nvme_ns_ext(ns)) { mapped_size += nvme_m2b(ns, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + bool pract = ctrl & NVME_RW_PRINFO_PRACT; + + if (pract && nvme_msize(ns) == 8) { + mapped_size -= nvme_m2b(ns, nlb); + } + } } trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode), @@ -2676,6 +2825,8 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, zone = nvme_get_zone_by_slba(ns, slba); if (append) { + bool piremap = !!(ctrl & NVME_RW_PIREMAP); + if (unlikely(slba != zone->d.zslba)) { trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba); status = NVME_INVALID_FIELD; @@ -2689,7 +2840,32 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, } slba = zone->w_ptr; + rw->slba = cpu_to_le64(slba); res->slba = cpu_to_le64(slba); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + if (!piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + /* fallthrough */ + + case NVME_ID_NS_DPS_TYPE_2: + if (piremap) { + uint32_t reftag = le32_to_cpu(rw->reftag); + rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba)); + } + + break; + + case NVME_ID_NS_DPS_TYPE_3: + if (piremap) { + return NVME_INVALID_PROT_INFO | NVME_DNR; + } + + break; + } } status = nvme_check_zone_write(ns, zone, slba, nlb); @@ -2707,6 +2883,10 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, data_offset = nvme_l2b(ns, slba); + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + return nvme_dif_rw(n, req); + } + if (!wrz) { status = nvme_map_data(n, nlb, req); if (status) { |