aboutsummaryrefslogtreecommitdiff
path: root/hw/block/nvme.c
diff options
context:
space:
mode:
authorKlaus Jensen <k.jensen@samsung.com>2021-02-04 09:55:48 +0100
committerKlaus Jensen <k.jensen@samsung.com>2021-03-18 12:34:51 +0100
commit146f720c55637410062041f68dc908645cd18aaa (patch)
treecee4032d0543f13643b2cec0dd47687902c0fb60 /hw/block/nvme.c
parentbc3a65e99254cfe001bd16a569a5aa7d20f930e8 (diff)
downloadqemu-146f720c55637410062041f68dc908645cd18aaa.zip
qemu-146f720c55637410062041f68dc908645cd18aaa.tar.gz
qemu-146f720c55637410062041f68dc908645cd18aaa.tar.bz2
hw/block/nvme: end-to-end data protection
Add support for namespaces formatted with protection information. The type of end-to-end data protection (i.e. Type 1, Type 2 or Type 3) is selected with the `pi` nvme-ns device parameter. If the number of metadata bytes is larger than 8, the `pil` nvme-ns device parameter may be used to control the location of the 8-byte DIF tuple. The default `pil` value of '0', causes the DIF tuple to be transferred as the last 8 bytes of the metadata. Set to 1 to store this in the first eight bytes instead. Co-authored-by: Gollu Appalanaidu <anaidu.gollu@samsung.com> Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com> Signed-off-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'hw/block/nvme.c')
-rw-r--r--hw/block/nvme.c258
1 files changed, 219 insertions, 39 deletions
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2c4757a..7af651d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -144,6 +144,7 @@
#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
+#include "nvme-dif.h"
#define NVME_MAX_IOQPAIRS 0xffff
#define NVME_DB_SIZE 4
@@ -226,15 +227,6 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
static void nvme_process_sq(void *opaque);
-static uint16_t nvme_cid(NvmeRequest *req)
-{
- if (!req) {
- return 0xffff;
- }
-
- return le16_to_cpu(req->cqe.cid);
-}
-
static uint16_t nvme_sqid(NvmeRequest *req)
{
return le16_to_cpu(req->sq->sqid);
@@ -933,8 +925,8 @@ unmap:
return status;
}
-static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
- NvmeCmd *cmd)
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+ NvmeCmd *cmd)
{
uint64_t prp1, prp2;
@@ -986,9 +978,16 @@ static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
size_t len = nvme_l2b(ns, nlb);
uint16_t status;
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
+ goto out;
+ }
+
if (nvme_ns_ext(ns)) {
NvmeSg sg;
@@ -1006,6 +1005,7 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
return NVME_SUCCESS;
}
+out:
return nvme_map_dptr(n, &req->sg, len, &req->cmd);
}
@@ -1035,11 +1035,6 @@ static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
return nvme_map_mptr(n, &req->sg, len, &req->cmd);
}
-typedef enum NvmeTxDirection {
- NVME_TX_DIRECTION_TO_DEVICE = 0,
- NVME_TX_DIRECTION_FROM_DEVICE = 1,
-} NvmeTxDirection;
-
static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
uint32_t len, uint32_t bytes,
int32_t skip_bytes, int64_t offset,
@@ -1164,12 +1159,15 @@ static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
}
-static uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- NvmeTxDirection dir, NvmeRequest *req)
+uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
- if (nvme_ns_ext(ns)) {
+ if (nvme_ns_ext(ns) &&
+ !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
size_t lsize = nvme_lsize(ns);
size_t msize = nvme_msize(ns);
@@ -1180,8 +1178,8 @@ static uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return nvme_tx(n, &req->sg, ptr, len, dir);
}
-static uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- NvmeTxDirection dir, NvmeRequest *req)
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
uint16_t status;
@@ -1777,7 +1775,7 @@ static void nvme_misc_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
-static void nvme_rw_complete_cb(void *opaque, int ret)
+void nvme_rw_complete_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
NvmeNamespace *ns = req->ns;
@@ -1984,11 +1982,13 @@ struct nvme_copy_ctx {
uint8_t *bounce;
uint8_t *mbounce;
uint32_t nlb;
+ NvmeCopySourceRange *ranges;
};
struct nvme_copy_in_ctx {
NvmeRequest *req;
QEMUIOVector iov;
+ NvmeCopySourceRange *range;
};
static void nvme_copy_complete_cb(void *opaque, int ret)
@@ -2062,6 +2062,70 @@ static void nvme_copy_in_complete(NvmeRequest *req)
block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ uint16_t prinfor = (copy->control[0] >> 4) & 0xf;
+ uint16_t prinfow = (copy->control[2] >> 2) & 0xf;
+ uint16_t nr = copy->nr + 1;
+ NvmeCopySourceRange *range;
+ uint64_t slba;
+ uint32_t nlb;
+ uint16_t apptag, appmask;
+ uint32_t reftag;
+ uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce;
+ size_t len, mlen;
+ int i;
+
+ /*
+ * The dif helpers expects prinfo to be similar to the control field of
+ * the NvmeRwCmd, so shift by 10 to fake it.
+ */
+ prinfor = prinfor << 10;
+ prinfow = prinfow << 10;
+
+ for (i = 0; i < nr; i++) {
+ range = &ctx->ranges[i];
+ slba = le64_to_cpu(range->slba);
+ nlb = le16_to_cpu(range->nlb) + 1;
+ len = nvme_l2b(ns, nlb);
+ mlen = nvme_m2b(ns, nlb);
+ apptag = le16_to_cpu(range->apptag);
+ appmask = le16_to_cpu(range->appmask);
+ reftag = le32_to_cpu(range->reftag);
+
+ status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba,
+ apptag, appmask, reftag);
+ if (status) {
+ goto invalid;
+ }
+
+ buf += len;
+ mbuf += mlen;
+ }
+
+ apptag = le16_to_cpu(copy->apptag);
+ appmask = le16_to_cpu(copy->appmask);
+ reftag = le32_to_cpu(copy->reftag);
+
+ if (prinfow & NVME_RW_PRINFO_PRACT) {
+ size_t len = nvme_l2b(ns, ctx->nlb);
+ size_t mlen = nvme_m2b(ns, ctx->nlb);
+
+ status = nvme_check_prinfo(ns, prinfow, sdlba, reftag);
+ if (status) {
+ goto invalid;
+ }
+
+ nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce,
+ mlen, apptag, reftag);
+ } else {
+ status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen,
+ prinfow, sdlba, apptag, appmask, reftag);
+ if (status) {
+ goto invalid;
+ }
+ }
+ }
+
status = nvme_check_bounds(ns, sdlba, ctx->nlb);
if (status) {
trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
@@ -2156,7 +2220,13 @@ struct nvme_compare_ctx {
static void nvme_compare_mdata_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
NvmeCtrl *n = nvme_ctrl(req);
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint16_t ctrl = le16_to_cpu(rw->control);
+ uint16_t apptag = le16_to_cpu(rw->apptag);
+ uint16_t appmask = le16_to_cpu(rw->appmask);
+ uint32_t reftag = le32_to_cpu(rw->reftag);
struct nvme_compare_ctx *ctx = req->opaque;
g_autofree uint8_t *buf = NULL;
uint16_t status = NVME_SUCCESS;
@@ -2172,6 +2242,40 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
goto out;
}
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint8_t *bufp;
+ uint8_t *mbufp = ctx->mdata.bounce;
+ uint8_t *end = mbufp + ctx->mdata.iov.size;
+ size_t msize = nvme_msize(ns);
+ int16_t pil = 0;
+
+ status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+ ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
+ slba, apptag, appmask, reftag);
+ if (status) {
+ req->status = status;
+ goto out;
+ }
+
+ /*
+ * When formatted with protection information, do not compare the DIF
+ * tuple.
+ */
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+ }
+
+ for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) {
+ if (memcmp(bufp + pil, mbufp + pil, msize - pil)) {
+ req->status = NVME_CMP_FAILURE;
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
req->status = NVME_CMP_FAILURE;
goto out;
@@ -2331,12 +2435,18 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
- g_autofree NvmeCopySourceRange *range = NULL;
uint16_t nr = copy->nr + 1;
uint8_t format = copy->control[0] & 0xf;
- uint32_t nlb = 0;
+ /*
+ * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the
+ * NVME_RW_PRINFO constants.
+ */
+ uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10;
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10;
+
+ uint32_t nlb = 0;
uint8_t *bounce = NULL, *bouncep = NULL;
uint8_t *mbounce = NULL, *mbouncep = NULL;
struct nvme_copy_ctx *ctx;
@@ -2345,6 +2455,11 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+ ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
if (!(n->id_ctrl.ocfs & (1 << format))) {
trace_pci_nvme_err_copy_invalid_format(format);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -2354,39 +2469,41 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SIZE_LIMIT | NVME_DNR;
}
- range = g_new(NvmeCopySourceRange, nr);
+ ctx = g_new(struct nvme_copy_ctx, 1);
+ ctx->ranges = g_new(NvmeCopySourceRange, nr);
- status = nvme_h2c(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange),
- req);
+ status = nvme_h2c(n, (uint8_t *)ctx->ranges,
+ nr * sizeof(NvmeCopySourceRange), req);
if (status) {
- return status;
+ goto out;
}
for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(range[i].slba);
- uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1;
+ uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
+ uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
- return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto out;
}
status = nvme_check_bounds(ns, slba, _nlb);
if (status) {
trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
- return status;
+ goto out;
}
if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
status = nvme_check_dulbe(ns, slba, _nlb);
if (status) {
- return status;
+ goto out;
}
}
if (ns->params.zoned) {
status = nvme_check_zone_read(ns, slba, _nlb);
if (status) {
- return status;
+ goto out;
}
}
@@ -2394,7 +2511,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
}
if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
- return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ goto out;
}
bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
@@ -2405,8 +2523,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_READ);
- ctx = g_new(struct nvme_copy_ctx, 1);
-
ctx->bounce = bounce;
ctx->mbounce = mbounce;
ctx->nlb = nlb;
@@ -2415,8 +2531,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
req->opaque = ctx;
for (i = 0; i < nr; i++) {
- uint64_t slba = le64_to_cpu(range[i].slba);
- uint32_t nlb = le16_to_cpu(range[i].nlb) + 1;
+ uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
+ uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
int64_t offset = nvme_l2b(ns, slba);
@@ -2463,6 +2579,12 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
}
return NVME_NO_COMPLETE;
+
+out:
+ g_free(ctx->ranges);
+ g_free(ctx);
+
+ return status;
}
static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
@@ -2472,6 +2594,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
BlockBackend *blk = ns->blkconf.blk;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
size_t data_len = nvme_l2b(ns, nlb);
size_t len = data_len;
int64_t offset = nvme_l2b(ns, slba);
@@ -2480,6 +2603,10 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
if (nvme_ns_ext(ns)) {
len += nvme_m2b(ns, nlb);
}
@@ -2582,6 +2709,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
NvmeNamespace *ns = req->ns;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
uint64_t data_size = nvme_l2b(ns, nlb);
uint64_t mapped_size = data_size;
uint64_t data_offset;
@@ -2590,6 +2718,14 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
if (nvme_ns_ext(ns)) {
mapped_size += nvme_m2b(ns, nlb);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+
+ if (pract && nvme_msize(ns) == 8) {
+ mapped_size = data_size;
+ }
+ }
}
trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
@@ -2620,6 +2756,10 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
}
}
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ return nvme_dif_rw(n, req);
+ }
+
status = nvme_map_data(n, nlb, req);
if (status) {
goto invalid;
@@ -2644,6 +2784,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
NvmeNamespace *ns = req->ns;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+ uint16_t ctrl = le16_to_cpu(rw->control);
uint64_t data_size = nvme_l2b(ns, nlb);
uint64_t mapped_size = data_size;
uint64_t data_offset;
@@ -2654,6 +2795,14 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
if (nvme_ns_ext(ns)) {
mapped_size += nvme_m2b(ns, nlb);
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ bool pract = ctrl & NVME_RW_PRINFO_PRACT;
+
+ if (pract && nvme_msize(ns) == 8) {
+ mapped_size -= nvme_m2b(ns, nlb);
+ }
+ }
}
trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
@@ -2676,6 +2825,8 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
zone = nvme_get_zone_by_slba(ns, slba);
if (append) {
+ bool piremap = !!(ctrl & NVME_RW_PIREMAP);
+
if (unlikely(slba != zone->d.zslba)) {
trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
status = NVME_INVALID_FIELD;
@@ -2689,7 +2840,32 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
}
slba = zone->w_ptr;
+ rw->slba = cpu_to_le64(slba);
res->slba = cpu_to_le64(slba);
+
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_1:
+ if (!piremap) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ /* fallthrough */
+
+ case NVME_ID_NS_DPS_TYPE_2:
+ if (piremap) {
+ uint32_t reftag = le32_to_cpu(rw->reftag);
+ rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
+ }
+
+ break;
+
+ case NVME_ID_NS_DPS_TYPE_3:
+ if (piremap) {
+ return NVME_INVALID_PROT_INFO | NVME_DNR;
+ }
+
+ break;
+ }
}
status = nvme_check_zone_write(ns, zone, slba, nlb);
@@ -2707,6 +2883,10 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
data_offset = nvme_l2b(ns, slba);
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ return nvme_dif_rw(n, req);
+ }
+
if (!wrz) {
status = nvme_map_data(n, nlb, req);
if (status) {