From f42be416cd0d50bcb2987031fbab09ee257a5058 Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Fri, 21 May 2021 11:38:42 +0530 Subject: hw/nvme: fix style Identify command related functions style fix. Signed-off-by: Gollu Appalanaidu Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 0bcaf71..40a7efc 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4291,7 +4291,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) } static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, - bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -4326,7 +4326,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, - bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; @@ -4373,7 +4373,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, - bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)&req->cmd; -- cgit v1.1 From 18de1526baa606789fbe11f17087543ab20a9c49 Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Wed, 21 Apr 2021 18:26:46 +0530 Subject: hw/nvme: add identify namespace flbas/mc enums Add enums for the Identify Namespace FLBAS and MC fields. Signed-off-by: Gollu Appalanaidu [k.jensen: squashed separate flbas/mc commits into one] Signed-off-by: Klaus Jensen --- hw/nvme/ns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 992e5a1..8066e31 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -82,10 +82,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ms = ns->params.ms; if (ns->params.ms) { - id_ns->mc = 0x3; + id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; if (ns->params.mset) { - id_ns->flbas |= 0x10; + id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; } id_ns->dpc = 0x1f; -- cgit v1.1 From 421a30927140945c6aa957c2c0e7ad695984483d Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Wed, 21 Apr 2021 18:21:00 +0530 Subject: hw/nvme: fix lbaf formats initialization Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. And make LBAF array as read-only. Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ns.c | 51 ++++++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 8066e31..3fec9c6 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -81,39 +81,32 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; - if (ns->params.ms) { - id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; + id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; - if (ns->params.mset) { - id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; - } - - id_ns->dpc = 0x1f; - id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - - NvmeLBAF lbaf[16] = { - [0] = { .ds = 9 }, - [1] = { .ds = 9, .ms = 8 }, - [2] = { .ds = 9, .ms = 16 }, - [3] = { .ds = 9, .ms = 64 }, - [4] = { .ds = 12 }, - [5] = { .ds = 12, .ms = 8 }, - [6] = { .ds = 12, .ms = 16 }, - [7] = { .ds = 12, .ms = 64 }, - }; - - memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 7; - } else { - NvmeLBAF lbaf[16] = { - [0] = { .ds = 9 }, - [1] = { .ds = 12 }, - }; + if (ms && ns->params.mset) { + id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; + } - memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 1; + id_ns->dpc = 0x1f; + id_ns->dps = ns->params.pi; + if (ns->params.pi && ns->params.pil) { + id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; } + static const NvmeLBAF lbaf[16] = { + [0] = { .ds = 9 }, + [1] = { .ds = 9, .ms = 8 }, + [2] = { .ds = 9, .ms = 16 }, + [3] = { .ds = 9, .ms = 64 }, + [4] = { .ds = 12 }, + [5] = { .ds = 12, .ms = 8 }, + [6] = { .ds = 12, .ms = 16 }, + [7] = { .ds = 12, .ms = 64 }, + }; + + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); + id_ns->nlbaf = 7; + for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = &id_ns->lbaf[i]; if (lbaf->ds == ds) { -- cgit v1.1 From cccc2651f4222e587582867e966b054dd4b51a0e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 28 May 2021 11:05:07 +0000 Subject: hw/nvme: add param to control auto zone transitioning to zone state closed In the Zoned Namespace Command Set Specification, chapter 2.5.1 Managing resources "The controller may transition zones in the ZSIO:Implicitly Opened state to the ZSC:Closed state for resource management purposes." The word may in this sentence means that automatically transitioning an implicitly opened zone to closed is completely optional. Add a new parameter so that the user can control if this automatic transitioning should be performed or not. Being able to control this can help with verifying that e.g. a user-space program behaves properly even without this optional ZNS feature. The default value is set to true, in order to not change the existing behavior. Signed-off-by: Niklas Cassel [k.jensen: moved parameter to controller] Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 32 ++++++++++++++++++++++---------- hw/nvme/nvme.h | 1 + 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 40a7efc..8dd9cb2 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -34,6 +34,7 @@ * aerl=,aer_max_queued=, \ * mdts=,vsl=, \ * zoned.zasl=, \ + * zoned.auto_transition=, \ * subsys= * -device nvme-ns,drive=,bus=,nsid=,\ * zoned=, \ @@ -100,6 +101,11 @@ * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e. * defaulting to the value of `mdts`). * + * - `zoned.auto_transition` + * Indicates if zones in zone state implicitly opened can be automatically + * transitioned to zone state closed for resource management purposes. + * Defaults to 'on'. + * * nvme namespace device parameters * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - `shared` @@ -1686,8 +1692,8 @@ enum { NVME_ZRM_AUTO = 1 << 0, }; -static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone, - int flags) +static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone, int flags) { int act = 0; uint16_t status; @@ -1699,7 +1705,9 @@ static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone, /* fallthrough */ case NVME_ZONE_STATE_CLOSED: - nvme_zrm_auto_transition_zone(ns); + if (n->params.auto_transition_zones) { + nvme_zrm_auto_transition_zone(ns); + } status = nvme_aor_check(ns, act, 1); if (status) { return status; @@ -1735,14 +1743,16 @@ static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone, } } -static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone) +static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone) { - return nvme_zrm_open_flags(ns, zone, NVME_ZRM_AUTO); + return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO); } -static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone) +static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns, + NvmeZone *zone) { - return nvme_zrm_open_flags(ns, zone, 0); + return nvme_zrm_open_flags(n, ns, zone, 0); } static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, @@ -2283,7 +2293,7 @@ static void nvme_copy_in_complete(NvmeRequest *req) goto invalid; } - status = nvme_zrm_auto(ns, zone); + status = nvme_zrm_auto(nvme_ctrl(req), ns, zone); if (status) { goto invalid; } @@ -3080,7 +3090,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, goto invalid; } - status = nvme_zrm_auto(ns, zone); + status = nvme_zrm_auto(n, ns, zone); if (status) { goto invalid; } @@ -3169,7 +3179,7 @@ enum NvmeZoneProcessingMask { static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state, NvmeRequest *req) { - return nvme_zrm_open(ns, zone); + return nvme_zrm_open(nvme_ctrl(req), ns, zone); } static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone, @@ -6259,6 +6269,8 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), + DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, + params.auto_transition_zones, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 81a35cd..93a7e0e 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -382,6 +382,7 @@ typedef struct NvmeParams { uint8_t vsl; bool use_intel_id; uint8_t zasl; + bool auto_transition_zones; bool legacy_cmb; } NvmeParams; -- cgit v1.1 From 3553c48fcb6c8469d41de6410b01298d5f9d975e Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Tue, 27 Apr 2021 12:00:52 +0530 Subject: hw/nvme: fix csi field for cns 0x00 and 0x11 As per the TP 4056d Namespace types CNS 0x00 and CNS 0x11 CSI field shouldn't use but it is being used for these two Identify command CNS values, fix that. Remove 'nvme_csi_has_nvm_support()' helper as suggested by Klaus we can safely assume NVM command set support for all namespaces. Suggested-by: Klaus Jensen Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8dd9cb2..7dea64b 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4188,16 +4188,6 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, id, sizeof(id), req); } -static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns) -{ - switch (ns->csi) { - case NVME_CSI_NVM: - case NVME_CSI_ZONED: - return true; - } - return false; -} - static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_identify_ctrl(); @@ -4254,7 +4244,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) } } - if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { + if (active || ns->csi == NVME_CSI_NVM) { return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); } @@ -4325,7 +4315,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } } - if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { + if (c->csi == NVME_CSI_NVM) { return nvme_rpt_empty_id_struct(n, req); } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), -- cgit v1.1 From 6870cfb8140d8cf545a464b1e21521fdbeeb3417 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Mon, 14 Jun 2021 22:19:00 +0200 Subject: hw/nvme: namespace parameter for EUI-64 The EUI-64 field is the only identifier for NVMe namespaces in UEFI device paths. Add a new namespace property "eui64", that provides the user the option to specify the EUI-64. Signed-off-by: Heinrich Schuchardt Acked-by: Klaus Jensen Reviewed-by: Klaus Jensen --- hw/nvme/ctrl.c | 58 +++++++++++++++++++++++++++++++++++----------------------- hw/nvme/ns.c | 2 ++ hw/nvme/nvme.h | 1 + 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 7dea64b..762bb82 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4426,19 +4426,19 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) NvmeIdentify *c = (NvmeIdentify *)&req->cmd; uint32_t nsid = le32_to_cpu(c->nsid); uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {}; - - struct data { - struct { - NvmeIdNsDescr hdr; - uint8_t v[NVME_NIDL_UUID]; - } uuid; - struct { - NvmeIdNsDescr hdr; - uint8_t v; - } csi; - }; - - struct data *ns_descrs = (struct data *)list; + uint8_t *pos = list; + struct { + NvmeIdNsDescr hdr; + uint8_t v[NVME_NIDL_UUID]; + } QEMU_PACKED uuid; + struct { + NvmeIdNsDescr hdr; + uint64_t v; + } QEMU_PACKED eui64; + struct { + NvmeIdNsDescr hdr; + uint8_t v; + } QEMU_PACKED csi; trace_pci_nvme_identify_ns_descr_list(nsid); @@ -4452,17 +4452,29 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) } /* - * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data - * structure, a Namespace UUID (nidt = 3h) must be reported in the - * Namespace Identification Descriptor. Add the namespace UUID here. + * If the EUI-64 field is 0 and the NGUID field is 0, the namespace must + * provide a valid Namespace UUID in the Namespace Identification Descriptor + * data structure. QEMU does not yet support setting NGUID. */ - ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID; - ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID; - memcpy(&ns_descrs->uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); - - ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI; - ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI; - ns_descrs->csi.v = ns->csi; + uuid.hdr.nidt = NVME_NIDT_UUID; + uuid.hdr.nidl = NVME_NIDL_UUID; + memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); + memcpy(pos, &uuid, sizeof(uuid)); + pos += sizeof(uuid); + + if (ns->params.eui64) { + eui64.hdr.nidt = NVME_NIDT_EUI64; + eui64.hdr.nidl = NVME_NIDL_EUI64; + eui64.v = cpu_to_be64(ns->params.eui64); + memcpy(pos, &eui64, sizeof(eui64)); + pos += sizeof(eui64); + } + + csi.hdr.nidt = NVME_NIDT_CSI; + csi.hdr.nidl = NVME_NIDL_CSI; + csi.v = ns->csi; + memcpy(pos, &csi, sizeof(csi)); + pos += sizeof(csi); return nvme_c2h(n, list, sizeof(list), req); } diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 3fec9c6..45e457d 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -77,6 +77,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->mssrl = cpu_to_le16(ns->params.mssrl); id_ns->mcl = cpu_to_le32(ns->params.mcl); id_ns->msrc = ns->params.msrc; + id_ns->eui64 = cpu_to_be64(ns->params.eui64); ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; @@ -511,6 +512,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 93a7e0e..ac90e13 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -83,6 +83,7 @@ typedef struct NvmeNamespaceParams { bool shared; uint32_t nsid; QemuUUID uuid; + uint64_t eui64; uint16_t ms; uint8_t mset; -- cgit v1.1 From 3276dde4f262588f3645f2adbc84d07cb6981d3e Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Mon, 14 Jun 2021 22:19:01 +0200 Subject: hw/nvme: default for namespace EUI-64 On machines with version > 6.0 replace a missing EUI-64 by a generated value. Signed-off-by: Heinrich Schuchardt Reviewed-by: Klaus Jensen --- hw/core/machine.c | 1 + hw/nvme/ns.c | 9 +++++++++ hw/nvme/nvme.h | 2 ++ 3 files changed, 12 insertions(+) (limited to 'hw') diff --git a/hw/core/machine.c b/hw/core/machine.c index 55b9bc7..d0e9348 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,6 +39,7 @@ GlobalProperty hw_compat_6_0[] = { { "gpex-pcihost", "allow-unmapped-accesses", "false" }, { "i8042", "extended-state", "false"}, + { "nvme-ns", "eui64-default", "off"}, }; const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 45e457d..4275c3d 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -56,6 +56,7 @@ void nvme_ns_init_format(NvmeNamespace *ns) static int nvme_ns_init(NvmeNamespace *ns, Error **errp) { + static uint64_t ns_count; NvmeIdNs *id_ns = &ns->id_ns; uint8_t ds; uint16_t ms; @@ -73,6 +74,12 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->nmic |= NVME_NMIC_NS_SHARED; } + /* Substitute a missing EUI-64 by an autogenerated one */ + ++ns_count; + if (!ns->params.eui64 && ns->params.eui64_default) { + ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT; + } + /* simple copy */ id_ns->mssrl = cpu_to_le16(ns->params.mssrl); id_ns->mcl = cpu_to_le32(ns->params.mcl); @@ -533,6 +540,8 @@ static Property nvme_ns_props[] = { params.max_open_zones, 0), DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, params.zd_extension_size, 0), + DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, + true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index ac90e13..371ac9b 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -26,6 +26,7 @@ #define NVME_MAX_CONTROLLERS 32 #define NVME_MAX_NAMESPACES 256 +#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) typedef struct NvmeCtrl NvmeCtrl; typedef struct NvmeNamespace NvmeNamespace; @@ -84,6 +85,7 @@ typedef struct NvmeNamespaceParams { uint32_t nsid; QemuUUID uuid; uint64_t eui64; + bool eui64_default; uint16_t ms; uint8_t mset; -- cgit v1.1 From 38f4ac65ac88ebf4f1869c1361f40b1817b4a39c Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:47 +0200 Subject: hw/nvme: reimplement flush to allow cancellation Prior to this patch, a broadcast flush would result in submitting multiple "fire and forget" aios (no reference saved to the aiocbs returned from the blk_aio_flush calls). Fix this by issuing the flushes one after another. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 204 +++++++++++++++++++++++++++++++-------------------- hw/nvme/nvme.h | 2 + hw/nvme/trace-events | 6 +- 3 files changed, 129 insertions(+), 83 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 762bb82..26c65a1 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1788,22 +1788,19 @@ static inline bool nvme_is_write(NvmeRequest *req) rw->opcode == NVME_CMD_WRITE_ZEROES; } +static AioContext *nvme_get_aio_context(BlockAIOCB *acb) +{ + return qemu_get_aio_context(); +} + static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - BlockBackend *blk = ns->blkconf.blk; - BlockAcctCookie *acct = &req->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk)); + trace_pci_nvme_misc_cb(nvme_cid(req)); if (ret) { - block_acct_failed(stats, acct); nvme_aio_err(req, ret); - } else { - block_acct_done(stats, acct); } nvme_enqueue_req_completion(nvme_cq(req), req); @@ -1919,41 +1916,6 @@ static void nvme_aio_format_cb(void *opaque, int ret) nvme_enqueue_req_completion(nvme_cq(req), req); } -struct nvme_aio_flush_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - BlockAcctCookie acct; -}; - -static void nvme_aio_flush_cb(void *opaque, int ret) -{ - struct nvme_aio_flush_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - - BlockBackend *blk = ctx->ns->blkconf.blk; - BlockAcctCookie *acct = &ctx->acct; - BlockAcctStats *stats = blk_get_stats(blk); - - trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk)); - - if (!ret) { - block_acct_done(stats, acct); - } else { - block_acct_failed(stats, acct); - nvme_aio_err(req, ret); - } - - (*num_flushes)--; - g_free(ctx); - - if (*num_flushes) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - static void nvme_verify_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; @@ -2868,57 +2830,139 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) -{ - uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uintptr_t *num_flushes = (uintptr_t *)&req->opaque; - uint16_t status; - struct nvme_aio_flush_ctx *ctx; +typedef struct NvmeFlushAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; +} NvmeFlushAIOCB; - trace_pci_nvme_flush(nvme_cid(req), nsid); +static void nvme_flush_cancel(BlockAIOCB *acb) +{ + NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common); - if (nsid != NVME_NSID_BROADCAST) { - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { - return NVME_INVALID_FIELD | NVME_DNR; - } + iocb->ret = -ECANCELED; - block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_FLUSH); - req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req); - return NVME_NO_COMPLETE; + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); } +} - /* 1-initialize; see comment in nvme_dsm */ - *num_flushes = 1; +static const AIOCBInfo nvme_flush_aiocb_info = { + .aiocb_size = sizeof(NvmeFlushAIOCB), + .cancel_async = nvme_flush_cancel, + .get_aio_context = nvme_get_aio_context, +}; - for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } +static void nvme_flush_ns_cb(void *opaque, int ret) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeNamespace *ns = iocb->ns; - ctx = g_new(struct nvme_aio_flush_ctx, 1); - ctx->req = req; - ctx->ns = ns; + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } - (*num_flushes)++; + if (ns) { + trace_pci_nvme_flush_ns(iocb->nsid); - block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0, - BLOCK_ACCT_FLUSH); - blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx); + iocb->ns = NULL; + iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb); + return; } - /* account for the 1-initialization */ - (*num_flushes)--; +out: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} - if (*num_flushes) { - status = NVME_NO_COMPLETE; - } else { - status = req->status; +static void nvme_flush_bh(void *opaque) +{ + NvmeFlushAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + int i; + + if (iocb->ret < 0) { + goto done; + } + + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } + } + } + + if (!iocb->ns) { + goto done; + } + + nvme_flush_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); + + return; +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFlushAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_flush_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; + } + + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + + iocb->nsid = nsid; } + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; + +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + return status; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 371ac9b..7f3d0a1 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -28,6 +28,8 @@ #define NVME_MAX_NAMESPACES 256 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) +QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); + typedef struct NvmeCtrl NvmeCtrl; typedef struct NvmeNamespace NvmeNamespace; diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ea33d0c..ce6b6ff 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -7,16 +7,16 @@ pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" -pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32"" pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -- cgit v1.1 From ff0ac2c8b89dfcfbb4186d8ae5469d4e80a10da9 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:48 +0200 Subject: hw/nvme: add nvme_block_status_all helper Pull the gist of nvme_check_dulbe() into a helper function. This is in preparation for dsm refactoring. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 26c65a1..5777c7e 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1438,18 +1438,15 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, return NVME_SUCCESS; } -static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, - uint32_t nlb) +static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb, int flags) { BlockDriverState *bs = blk_bs(ns->blkconf.blk); int64_t pnum = 0, bytes = nvme_l2b(ns, nlb); int64_t offset = nvme_l2b(ns, slba); - bool zeroed; int ret; - Error *local_err = NULL; - /* * `pnum` holds the number of bytes after offset that shares the same * allocation status as the byte at offset. If `pnum` is different from @@ -1461,23 +1458,41 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); if (ret < 0) { - error_setg_errno(&local_err, -ret, "unable to get block status"); - error_report_err(local_err); - - return NVME_INTERNAL_DEV_ERROR; + return ret; } - zeroed = !!(ret & BDRV_BLOCK_ZERO); - trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); + trace_pci_nvme_block_status(offset, bytes, pnum, ret, + !!(ret & BDRV_BLOCK_ZERO)); - if (zeroed) { - return NVME_DULB; + if (!(ret & flags)) { + return 1; } offset += pnum; } while (pnum != bytes); + return 0; +} + +static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, + uint32_t nlb) +{ + int ret; + Error *err = NULL; + + ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA); + if (ret) { + if (ret < 0) { + error_setg_errno(&err, -ret, "unable to get block status"); + error_report_err(err); + + return NVME_INTERNAL_DEV_ERROR; + } + + return NVME_DULB; + } + return NVME_SUCCESS; } -- cgit v1.1 From d7d1474fd85d6c3fce46fa2edd051a6ff3f17cd1 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:49 +0200 Subject: hw/nvme: reimplement dsm to allow cancellation Prior to this patch, a loop was used to issue multiple "fire and forget" aios for each range in the command. Without a reference to the aiocb returned from the blk_aio_pdiscard calls, the aios cannot be canceled. Fix this by processing the ranges one after another. As a bonus, this fixes how metadata is cleared (i.e. we only zero it out if the data was succesfully discarded). Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 223 +++++++++++++++++++++++++++++++++++---------------- hw/nvme/trace-events | 4 +- 2 files changed, 157 insertions(+), 70 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5777c7e..bb9ee93 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2015,26 +2015,6 @@ out: nvme_verify_cb(ctx, ret); } -static void nvme_aio_discard_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - uintptr_t *discards = (uintptr_t *)&req->opaque; - - trace_pci_nvme_aio_discard_cb(nvme_cid(req)); - - if (ret) { - nvme_aio_err(req, ret); - } - - (*discards)--; - - if (*discards) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - struct nvme_zone_reset_ctx { NvmeRequest *req; NvmeZone *zone; @@ -2495,75 +2475,182 @@ out: nvme_enqueue_req_completion(nvme_cq(req), req); } -static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) +typedef struct NvmeDSMAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + NvmeDsmRange *range; + unsigned int nr; + unsigned int idx; +} NvmeDSMAIOCB; + +static void nvme_dsm_cancel(BlockAIOCB *aiocb) { + NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common); + + /* break nvme_dsm_cb loop */ + iocb->idx = iocb->nr; + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } else { + /* + * We only reach this if nvme_dsm_cancel() has already been called or + * the command ran to completion and nvme_dsm_bh is scheduled to run. + */ + assert(iocb->idx == iocb->nr); + } +} + +static const AIOCBInfo nvme_dsm_aiocb_info = { + .aiocb_size = sizeof(NvmeDSMAIOCB), + .cancel_async = nvme_dsm_cancel, +}; + +static void nvme_dsm_bh(void *opaque) +{ + NvmeDSMAIOCB *iocb = opaque; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); +} + +static void nvme_dsm_cb(void *opaque, int ret); + +static void nvme_dsm_md_cb(void *opaque, int ret) +{ + NvmeDSMAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd; + NvmeDsmRange *range; + uint64_t slba; + uint32_t nlb; - uint32_t attr = le32_to_cpu(dsm->attributes); - uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1; + if (ret < 0) { + iocb->ret = ret; + goto done; + } - uint16_t status = NVME_SUCCESS; + if (!ns->lbaf.ms) { + nvme_dsm_cb(iocb, 0); + return; + } - trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr); + range = &iocb->range[iocb->idx - 1]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb); - if (attr & NVME_DSMGMT_AD) { - int64_t offset; - size_t len; - NvmeDsmRange range[nr]; - uintptr_t *discards = (uintptr_t *)&req->opaque; + /* + * Check that all block were discarded (zeroed); otherwise we do not zero + * the metadata. + */ - status = nvme_h2c(n, (uint8_t *)range, sizeof(range), req); - if (status) { - return status; + ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO); + if (ret) { + if (ret < 0) { + iocb->ret = ret; + goto done; } - /* - * AIO callbacks may be called immediately, so initialize discards to 1 - * to make sure the the callback does not complete the request before - * all discards have been issued. - */ - *discards = 1; + nvme_dsm_cb(iocb, 0); + } - for (int i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(range[i].slba); - uint32_t nlb = le32_to_cpu(range[i].nlb); + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba), + nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP, + nvme_dsm_cb, iocb); + return; - if (nvme_check_bounds(ns, slba, nlb)) { - continue; - } +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} - trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba, - nlb); +static void nvme_dsm_cb(void *opaque, int ret) +{ + NvmeDSMAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); + NvmeNamespace *ns = req->ns; + NvmeDsmRange *range; + uint64_t slba; + uint32_t nlb; - if (nlb > n->dmrsl) { - trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl); - } + if (ret < 0) { + iocb->ret = ret; + goto done; + } - offset = nvme_l2b(ns, slba); - len = nvme_l2b(ns, nlb); +next: + if (iocb->idx == iocb->nr) { + goto done; + } - while (len) { - size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); + range = &iocb->range[iocb->idx++]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb); - (*discards)++; + trace_pci_nvme_dsm_deallocate(slba, nlb); - blk_aio_pdiscard(ns->blkconf.blk, offset, bytes, - nvme_aio_discard_cb, req); + if (nlb > n->dmrsl) { + trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl); + goto next; + } - offset += bytes; - len -= bytes; - } - } + if (nvme_check_bounds(ns, slba, nlb)) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, + ns->id_ns.nsze); + goto next; + } - /* account for the 1-initialization */ - (*discards)--; + iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba), + nvme_l2b(ns, nlb), + nvme_dsm_md_cb, iocb); + return; - if (*discards) { - status = NVME_NO_COMPLETE; - } else { - status = req->status; +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} + +static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd; + uint32_t attr = le32_to_cpu(dsm->attributes); + uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1; + uint16_t status = NVME_SUCCESS; + + trace_pci_nvme_dsm(nr, attr); + + if (attr & NVME_DSMGMT_AD) { + NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb); + iocb->ret = 0; + iocb->range = g_new(NvmeDsmRange, nr); + iocb->nr = nr; + iocb->idx = 0; + + status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr, + req); + if (status) { + return status; } + + req->aiocb = &iocb->common; + nvme_dsm_cb(iocb, 0); + + return NVME_NO_COMPLETE; } return status; diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ce6b6ff..eea4e31 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -37,8 +37,8 @@ pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" bl pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" -pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" -pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" +pci_nvme_dsm(uint32_t nr, uint32_t attr) "nr %"PRIu32" attr 0x%"PRIx32"" +pci_nvme_dsm_deallocate(uint64_t slba, uint32_t nlb) "slba %"PRIu64" nlb %"PRIu32"" pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" -- cgit v1.1 From 0ca5c3ccac72ce58a11f97b74f304934df72e90a Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:50 +0200 Subject: hw/nvme: save reftag when generating pi Prepare nvme_dif_pract_generate_dif() and nvme_dif_check() to be callable in smaller increments by making the reftag a pointer parameter updated by the function. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 10 +++++----- hw/nvme/dif.c | 22 +++++++++++----------- hw/nvme/nvme.h | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index bb9ee93..699919a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1968,7 +1968,7 @@ static void nvme_verify_cb(void *opaque, int ret) req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, - ctrl, slba, apptag, appmask, reftag); + ctrl, slba, apptag, appmask, &reftag); } out: @@ -2204,7 +2204,7 @@ static void nvme_copy_in_complete(NvmeRequest *req) reftag = le32_to_cpu(range->reftag); status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba, - apptag, appmask, reftag); + apptag, appmask, &reftag); if (status) { goto invalid; } @@ -2227,10 +2227,10 @@ static void nvme_copy_in_complete(NvmeRequest *req) } nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce, - mlen, apptag, reftag); + mlen, apptag, &reftag); } else { status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen, - prinfow, sdlba, apptag, appmask, reftag); + prinfow, sdlba, apptag, appmask, &reftag); if (status) { goto invalid; } @@ -2370,7 +2370,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); + slba, apptag, appmask, &reftag); if (status) { req->status = status; goto out; diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 88efcbe..f5f86f1 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -41,7 +41,7 @@ static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag) + uint32_t *reftag) { uint8_t *end = buf + len; int16_t pil = 0; @@ -51,7 +51,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, } trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, - apptag, reftag); + apptag, *reftag); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); @@ -63,10 +63,10 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, dif->guard = cpu_to_be16(crc); dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(reftag); + dif->reftag = cpu_to_be32(*reftag); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { - reftag++; + (*reftag)++; } } } @@ -132,13 +132,13 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t ctrl, uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag) + uint16_t appmask, uint32_t *reftag) { uint8_t *end = buf + len; int16_t pil = 0; uint16_t status; - status = nvme_check_prinfo(ns, ctrl, slba, reftag); + status = nvme_check_prinfo(ns, ctrl, slba, *reftag); if (status) { return status; } @@ -153,13 +153,13 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, - appmask, reftag); + appmask, *reftag); if (status) { return status; } if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { - reftag++; + (*reftag)++; } } @@ -270,7 +270,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); + slba, apptag, appmask, &reftag); if (status) { req->status = status; goto out; @@ -478,11 +478,11 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) /* splice generated protection information into the buffer */ nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, - apptag, reftag); + apptag, &reftag); } else { status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, - slba, apptag, appmask, reftag); + slba, apptag, appmask, &reftag); if (status) { goto err; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 7f3d0a1..98a7d1c 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -542,11 +542,11 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, uint64_t slba); void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag); + uint32_t *reftag); uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t ctrl, uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag); + uint16_t appmask, uint32_t *reftag); uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); -- cgit v1.1 From 189a8bf7f64aff9c2e26a6e6f2adc654eb9bbe81 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:51 +0200 Subject: hw/nvme: remove assert from nvme_get_zone_by_slba Make nvme_get_zone_by_slba() return NULL if the slba is out of range. This allows the function to be used without guarding the call with a call to nvme_check_bounds(), in preparation for the next patch. Add asserts after calling nvme_get_zone_by_slba() instead. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 699919a..a984b80 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1542,7 +1542,10 @@ static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba) { uint32_t zone_idx = nvme_zone_idx(ns, slba); - assert(zone_idx < ns->num_zones); + if (zone_idx >= ns->num_zones) { + return NULL; + } + return &ns->zone_array[zone_idx]; } @@ -1619,11 +1622,16 @@ static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone) static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba, uint32_t nlb) { - NvmeZone *zone = nvme_get_zone_by_slba(ns, slba); - uint64_t bndry = nvme_zone_rd_boundary(ns, zone); - uint64_t end = slba + nlb; + NvmeZone *zone; + uint64_t bndry, end; uint16_t status; + zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); + + bndry = nvme_zone_rd_boundary(ns, zone); + end = slba + nlb; + status = nvme_check_zone_state_for_read(zone); if (status) { ; @@ -1790,6 +1798,7 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) slba = le64_to_cpu(rw->slba); nlb = le16_to_cpu(rw->nlb) + 1; zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); nvme_advance_zone_wp(ns, zone, nlb); } @@ -3186,6 +3195,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (ns->params.zoned) { zone = nvme_get_zone_by_slba(ns, slba); + assert(zone); if (append) { bool piremap = !!(ctrl & NVME_RW_PIREMAP); -- cgit v1.1 From 2a132309e45dfce6dcae901388c05c3cc3cb8d73 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:52 +0200 Subject: hw/nvme: use prinfo directly in nvme_check_prinfo and nvme_dif_check The nvme_check_prinfo() and nvme_dif_check() functions operate on the 16 bit "control" member of the NvmeCmd. These functions do not otherwise operate on an NvmeCmd or an NvmeRequest, so change them to expect the actual 4 bit PRINFO field and add constants that work on this field as well. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 67 ++++++++++++++++++++++------------------------------------ hw/nvme/dif.c | 44 +++++++++++++++++++------------------- hw/nvme/nvme.h | 4 ++-- 3 files changed, 49 insertions(+), 66 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index a984b80..589aad5 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1010,16 +1010,12 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) { NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); + bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); + bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); size_t len = nvme_l2b(ns, nlb); uint16_t status; - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { - goto out; - } - - if (nvme_ns_ext(ns)) { + if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { NvmeSg sg; len += nvme_m2b(ns, nlb); @@ -1036,7 +1032,6 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) return NVME_SUCCESS; } -out: return nvme_map_dptr(n, &req->sg, len, &req->cmd); } @@ -1195,10 +1190,10 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, { NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); + bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); + bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); - if (nvme_ns_ext(ns) && - !(ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, ns->lbaf.ms, 0, dir); } @@ -1950,14 +1945,13 @@ static void nvme_verify_cb(void *opaque, int ret) BlockAcctStats *stats = blk_get_stats(blk); NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); uint16_t status; - trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, - appmask, reftag); + trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); if (ret) { block_acct_failed(stats, acct); @@ -1977,7 +1971,7 @@ static void nvme_verify_cb(void *opaque, int ret) req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, - ctrl, slba, apptag, appmask, &reftag); + prinfo, slba, apptag, appmask, &reftag); } out: @@ -2183,8 +2177,8 @@ static void nvme_copy_in_complete(NvmeRequest *req) block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - uint16_t prinfor = (copy->control[0] >> 4) & 0xf; - uint16_t prinfow = (copy->control[2] >> 2) & 0xf; + uint8_t prinfor = (copy->control[0] >> 4) & 0xf; + uint8_t prinfow = (copy->control[2] >> 2) & 0xf; uint16_t nr = copy->nr + 1; NvmeCopySourceRange *range; uint64_t slba; @@ -2195,13 +2189,6 @@ static void nvme_copy_in_complete(NvmeRequest *req) size_t len, mlen; int i; - /* - * The dif helpers expects prinfo to be similar to the control field of - * the NvmeRwCmd, so shift by 10 to fake it. - */ - prinfor = prinfor << 10; - prinfow = prinfow << 10; - for (i = 0; i < nr; i++) { range = &ctx->ranges[i]; slba = le64_to_cpu(range->slba); @@ -2342,7 +2329,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) NvmeNamespace *ns = req->ns; NvmeCtrl *n = nvme_ctrl(req); NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); @@ -2378,7 +2365,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) int16_t pil = 0; status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, slba, apptag, appmask, &reftag); if (status) { req->status = status; @@ -2674,7 +2661,7 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t len = nvme_l2b(ns, nlb); int64_t offset = nvme_l2b(ns, slba); - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint32_t reftag = le32_to_cpu(rw->reftag); NvmeBounceContext *ctx = NULL; uint16_t status; @@ -2682,12 +2669,12 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - status = nvme_check_prinfo(ns, ctrl, slba, reftag); + status = nvme_check_prinfo(ns, prinfo, slba, reftag); if (status) { return status; } - if (ctrl & NVME_RW_PRINFO_PRACT) { + if (prinfo & NVME_PRINFO_PRACT) { return NVME_INVALID_PROT_INFO | NVME_DNR; } } @@ -2731,13 +2718,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) uint16_t nr = copy->nr + 1; uint8_t format = copy->control[0] & 0xf; - - /* - * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the - * NVME_RW_PRINFO constants. - */ - uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10; - uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10; + uint8_t prinfor = (copy->control[0] >> 4) & 0xf; + uint8_t prinfow = (copy->control[2] >> 2) & 0xf; uint32_t nlb = 0; uint8_t *bounce = NULL, *bouncep = NULL; @@ -2749,7 +2731,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) { + ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { return NVME_INVALID_FIELD | NVME_DNR; } @@ -2886,7 +2868,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) BlockBackend *blk = ns->blkconf.blk; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); size_t data_len = nvme_l2b(ns, nlb); size_t len = data_len; int64_t offset = nvme_l2b(ns, slba); @@ -2895,7 +2877,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb); - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) { + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) { return NVME_INVALID_PROT_INFO | NVME_DNR; } @@ -3083,7 +3065,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) NvmeNamespace *ns = req->ns; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint64_t data_size = nvme_l2b(ns, nlb); uint64_t mapped_size = data_size; uint64_t data_offset; @@ -3094,7 +3076,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) mapped_size += nvme_m2b(ns, nlb); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - bool pract = ctrl & NVME_RW_PRINFO_PRACT; + bool pract = prinfo & NVME_PRINFO_PRACT; if (pract && ns->lbaf.ms == 8) { mapped_size = data_size; @@ -3158,6 +3140,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(ctrl); uint64_t data_size = nvme_l2b(ns, nlb); uint64_t mapped_size = data_size; uint64_t data_offset; @@ -3170,7 +3153,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, mapped_size += nvme_m2b(ns, nlb); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - bool pract = ctrl & NVME_RW_PRINFO_PRACT; + bool pract = prinfo & NVME_PRINFO_PRACT; if (pract && ns->lbaf.ms == 8) { mapped_size -= nvme_m2b(ns, nlb); diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index f5f86f1..5dbd18b 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -15,11 +15,11 @@ #include "nvme.h" #include "trace.h" -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, uint32_t reftag) { if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && - (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { + (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { return NVME_INVALID_PROT_INFO | NVME_DNR; } @@ -73,7 +73,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, uint8_t *buf, uint8_t *mbuf, size_t pil, - uint16_t ctrl, uint16_t apptag, + uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) { switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { @@ -95,7 +95,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, return NVME_SUCCESS; } - if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { + if (prinfo & NVME_PRINFO_PRCHK_GUARD) { uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { @@ -109,7 +109,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, } } - if (ctrl & NVME_RW_PRINFO_PRCHK_APP) { + if (prinfo & NVME_PRINFO_PRCHK_APP) { trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, appmask); @@ -118,7 +118,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, } } - if (ctrl & NVME_RW_PRINFO_PRCHK_REF) { + if (prinfo & NVME_PRINFO_PRCHK_REF) { trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); if (be32_to_cpu(dif->reftag) != reftag) { @@ -130,7 +130,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, } uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, uint64_t slba, uint16_t apptag, uint16_t appmask, uint32_t *reftag) { @@ -138,7 +138,7 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, int16_t pil = 0; uint16_t status; - status = nvme_check_prinfo(ns, ctrl, slba, *reftag); + status = nvme_check_prinfo(ns, prinfo, slba, *reftag); if (status) { return status; } @@ -147,12 +147,12 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil); + trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, + status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag, appmask, *reftag); if (status) { return status; @@ -248,14 +248,14 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) NvmeCtrl *n = nvme_ctrl(req); NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); uint16_t status; - trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, - appmask, reftag); + trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask, + reftag); if (ret) { goto out; @@ -269,7 +269,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) } status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, slba, apptag, appmask, &reftag); if (status) { req->status = status; @@ -283,7 +283,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) goto out; } - if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) { + if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) { goto out; } @@ -364,15 +364,15 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) size_t mlen = nvme_m2b(ns, nlb); size_t mapped_len = len; int64_t offset = nvme_l2b(ns, slba); - uint16_t ctrl = le16_to_cpu(rw->control); + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); - bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT); + bool pract = !!(prinfo & NVME_PRINFO_PRACT); NvmeBounceContext *ctx; uint16_t status; - trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl)); + trace_pci_nvme_dif_rw(pract, prinfo); ctx = g_new0(NvmeBounceContext, 1); ctx->req = req; @@ -380,7 +380,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) if (wrz) { BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; - if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) { + if (prinfo & NVME_PRINFO_PRCHK_MASK) { status = NVME_INVALID_PROT_INFO | NVME_DNR; goto err; } @@ -389,7 +389,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) uint8_t *mbuf, *end; int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); - status = nvme_check_prinfo(ns, ctrl, slba, reftag); + status = nvme_check_prinfo(ns, prinfo, slba, reftag); if (status) { goto err; } @@ -469,7 +469,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) } } - status = nvme_check_prinfo(ns, ctrl, slba, reftag); + status = nvme_check_prinfo(ns, prinfo, slba, reftag); if (status) { goto err; } @@ -481,7 +481,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) apptag, &reftag); } else { status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, - ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, + ctx->mdata.bounce, ctx->mdata.iov.size, prinfo, slba, apptag, appmask, &reftag); if (status) { goto err; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 98a7d1c..2509b8b 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -536,7 +536,7 @@ static const uint16_t t10_dif_crc_table[256] = { 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 }; -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, uint32_t reftag); uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, uint64_t slba); @@ -544,7 +544,7 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t apptag, uint32_t *reftag); uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, uint64_t slba, uint16_t apptag, uint16_t appmask, uint32_t *reftag); uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); -- cgit v1.1 From f1c97407c5728e284b4e2b331e08c0d88ba568da Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:53 +0200 Subject: hw/nvme: add dw0/1 to the req completion trace event Some commands report additional useful information in dw0 and dw1 of the completion queue entry. Add them to the trace. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 2 ++ hw/nvme/trace-events | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 589aad5..c1d95e9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1290,6 +1290,8 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) { assert(cq->cqid == req->sq->cqid); trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid, + le32_to_cpu(req->cqe.result), + le32_to_cpu(req->cqe.dw1), req->status); if (req->status) { diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index eea4e31..a3e1134 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -80,7 +80,7 @@ pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PR pci_nvme_enqueue_event_noqueue(int queued) "queued %d" pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" -pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" +pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16"" pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" -- cgit v1.1 From 796d20681d9bef4f863565f1a2f2dbe28e2322c7 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:54 +0200 Subject: hw/nvme: reimplement the copy command to allow aio cancellation Before this patch the code would issue several aios simultaneously without saving a reference to the aiocb. Without the aiocb reference the individual copies cannot be canceled. Fix this by issuing copies of the ranges one after another. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 678 ++++++++++++++++++++++++++++----------------------- hw/nvme/trace-events | 3 +- 2 files changed, 370 insertions(+), 311 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index c1d95e9..b0cc8c4 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2093,226 +2093,6 @@ out: nvme_aio_zone_reset_complete_cb(opaque, ret); } -struct nvme_copy_ctx { - int copies; - uint8_t *bounce; - uint8_t *mbounce; - uint32_t nlb; - NvmeCopySourceRange *ranges; -}; - -struct nvme_copy_in_ctx { - NvmeRequest *req; - QEMUIOVector iov; - NvmeCopySourceRange *range; -}; - -static void nvme_copy_complete_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - if (ret) { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); - nvme_aio_err(req, ret); - goto out; - } - - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - -out: - if (ns->params.zoned) { - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - - nvme_advance_zone_wp(ns, zone, ctx->nlb); - } - - g_free(ctx->bounce); - g_free(ctx->mbounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_copy_cb(void *opaque, int ret) -{ - NvmeRequest *req = opaque; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - trace_pci_nvme_copy_cb(nvme_cid(req)); - - if (ret) { - goto out; - } - - if (ns->lbaf.ms) { - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - int64_t offset = nvme_moff(ns, sdlba); - - qemu_iovec_reset(&req->sg.iov); - qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb)); - - req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0, - nvme_copy_complete_cb, req); - return; - } - -out: - nvme_copy_complete_cb(opaque, ret); -} - -static void nvme_copy_in_complete(NvmeRequest *req) -{ - NvmeNamespace *ns = req->ns; - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; - struct nvme_copy_ctx *ctx = req->opaque; - uint64_t sdlba = le64_to_cpu(copy->sdlba); - uint16_t status; - - trace_pci_nvme_copy_in_complete(nvme_cid(req)); - - block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct); - - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - uint8_t prinfor = (copy->control[0] >> 4) & 0xf; - uint8_t prinfow = (copy->control[2] >> 2) & 0xf; - uint16_t nr = copy->nr + 1; - NvmeCopySourceRange *range; - uint64_t slba; - uint32_t nlb; - uint16_t apptag, appmask; - uint32_t reftag; - uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce; - size_t len, mlen; - int i; - - for (i = 0; i < nr; i++) { - range = &ctx->ranges[i]; - slba = le64_to_cpu(range->slba); - nlb = le16_to_cpu(range->nlb) + 1; - len = nvme_l2b(ns, nlb); - mlen = nvme_m2b(ns, nlb); - apptag = le16_to_cpu(range->apptag); - appmask = le16_to_cpu(range->appmask); - reftag = le32_to_cpu(range->reftag); - - status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba, - apptag, appmask, &reftag); - if (status) { - goto invalid; - } - - buf += len; - mbuf += mlen; - } - - apptag = le16_to_cpu(copy->apptag); - appmask = le16_to_cpu(copy->appmask); - reftag = le32_to_cpu(copy->reftag); - - if (prinfow & NVME_RW_PRINFO_PRACT) { - size_t len = nvme_l2b(ns, ctx->nlb); - size_t mlen = nvme_m2b(ns, ctx->nlb); - - status = nvme_check_prinfo(ns, prinfow, sdlba, reftag); - if (status) { - goto invalid; - } - - nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce, - mlen, apptag, &reftag); - } else { - status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen, - prinfow, sdlba, apptag, appmask, &reftag); - if (status) { - goto invalid; - } - } - } - - status = nvme_check_bounds(ns, sdlba, ctx->nlb); - if (status) { - goto invalid; - } - - if (ns->params.zoned) { - NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - - status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb); - if (status) { - goto invalid; - } - - status = nvme_zrm_auto(nvme_ctrl(req), ns, zone); - if (status) { - goto invalid; - } - - zone->w_ptr += ctx->nlb; - } - - qemu_iovec_init(&req->sg.iov, 1); - qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb)); - - block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_WRITE); - - req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba), - &req->sg.iov, 0, nvme_copy_cb, req); - - return; - -invalid: - req->status = status; - - g_free(ctx->bounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_aio_copy_in_cb(void *opaque, int ret) -{ - struct nvme_copy_in_ctx *in_ctx = opaque; - NvmeRequest *req = in_ctx->req; - NvmeNamespace *ns = req->ns; - struct nvme_copy_ctx *ctx = req->opaque; - - qemu_iovec_destroy(&in_ctx->iov); - g_free(in_ctx); - - trace_pci_nvme_aio_copy_in_cb(nvme_cid(req)); - - if (ret) { - nvme_aio_err(req, ret); - } - - ctx->copies--; - - if (ctx->copies) { - return; - } - - if (req->status) { - block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct); - - g_free(ctx->bounce); - g_free(ctx->mbounce); - g_free(ctx); - - nvme_enqueue_req_completion(nvme_cq(req), req); - - return; - } - - nvme_copy_in_complete(req); -} - struct nvme_compare_ctx { struct { QEMUIOVector iov; @@ -2713,153 +2493,433 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) +typedef struct NvmeCopyAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + NvmeCopySourceRange *ranges; + int nr; + int idx; + + uint8_t *bounce; + QEMUIOVector iov; + struct { + BlockAcctCookie read; + BlockAcctCookie write; + } acct; + + uint32_t reftag; + uint64_t slba; + + NvmeZone *zone; +} NvmeCopyAIOCB; + +static void nvme_copy_cancel(BlockAIOCB *aiocb) { + NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common); + + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } +} + +static const AIOCBInfo nvme_copy_aiocb_info = { + .aiocb_size = sizeof(NvmeCopyAIOCB), + .cancel_async = nvme_copy_cancel, +}; + +static void nvme_copy_bh(void *opaque) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk); - uint16_t nr = copy->nr + 1; - uint8_t format = copy->control[0] & 0xf; - uint8_t prinfor = (copy->control[0] >> 4) & 0xf; - uint8_t prinfow = (copy->control[2] >> 2) & 0xf; + if (iocb->idx != iocb->nr) { + req->cqe.result = cpu_to_le32(iocb->idx); + } - uint32_t nlb = 0; - uint8_t *bounce = NULL, *bouncep = NULL; - uint8_t *mbounce = NULL, *mbouncep = NULL; - struct nvme_copy_ctx *ctx; - uint16_t status; - int i; + qemu_iovec_destroy(&iocb->iov); + g_free(iocb->bounce); - trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { - return NVME_INVALID_FIELD | NVME_DNR; + if (iocb->ret < 0) { + block_acct_failed(stats, &iocb->acct.read); + block_acct_failed(stats, &iocb->acct.write); + } else { + block_acct_done(stats, &iocb->acct.read); + block_acct_done(stats, &iocb->acct.write); } - if (!(n->id_ctrl.ocfs & (1 << format))) { - trace_pci_nvme_err_copy_invalid_format(format); - return NVME_INVALID_FIELD | NVME_DNR; + iocb->common.cb(iocb->common.opaque, iocb->ret); + qemu_aio_unref(iocb); +} + +static void nvme_copy_cb(void *opaque, int ret); + +static void nvme_copy_out_completed_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range = &iocb->ranges[iocb->idx]; + uint32_t nlb = le32_to_cpu(range->nlb) + 1; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; } - if (nr > ns->id_ns.msrc + 1) { - return NVME_CMD_SIZE_LIMIT | NVME_DNR; + if (ns->params.zoned) { + nvme_advance_zone_wp(ns, iocb->zone, nlb); } - ctx = g_new(struct nvme_copy_ctx, 1); - ctx->ranges = g_new(NvmeCopySourceRange, nr); + iocb->idx++; + iocb->slba += nlb; +out: + nvme_copy_cb(iocb, iocb->ret); +} - status = nvme_h2c(n, (uint8_t *)ctx->ranges, - nr * sizeof(NvmeCopySourceRange), req); - if (status) { +static void nvme_copy_out_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint32_t nlb; + size_t mlen; + uint8_t *mbounce; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { goto out; } - for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); - uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; + if (!ns->lbaf.ms) { + nvme_copy_out_completed_cb(iocb, 0); + return; + } + + range = &iocb->ranges[iocb->idx]; + nlb = le32_to_cpu(range->nlb) + 1; - if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) { - status = NVME_CMD_SIZE_LIMIT | NVME_DNR; - goto out; - } + mlen = nvme_m2b(ns, nlb); + mbounce = iocb->bounce + nvme_l2b(ns, nlb); + + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, mbounce, mlen); + + iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba), + &iocb->iov, 0, nvme_copy_out_completed_cb, + iocb); + + return; - status = nvme_check_bounds(ns, slba, _nlb); +out: + nvme_copy_cb(iocb, ret); +} + +static void nvme_copy_in_completed_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint32_t nlb; + size_t len; + uint16_t status; + + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } + + range = &iocb->ranges[iocb->idx]; + nlb = le32_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + + trace_pci_nvme_copy_out(iocb->slba, nlb); + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + + uint16_t apptag = le16_to_cpu(range->apptag); + uint16_t appmask = le16_to_cpu(range->appmask); + uint32_t reftag = le32_to_cpu(range->reftag); + + uint64_t slba = le64_to_cpu(range->slba); + size_t mlen = nvme_m2b(ns, nlb); + uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); + + status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor, + slba, apptag, appmask, &reftag); if (status) { - goto out; + goto invalid; } - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, _nlb); + apptag = le16_to_cpu(copy->apptag); + appmask = le16_to_cpu(copy->appmask); + + if (prinfow & NVME_PRINFO_PRACT) { + status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag); if (status) { - goto out; + goto invalid; } - } - if (ns->params.zoned) { - status = nvme_check_zone_read(ns, slba, _nlb); + nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen, + apptag, &iocb->reftag); + } else { + status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, + prinfow, iocb->slba, apptag, appmask, + &iocb->reftag); if (status) { - goto out; + goto invalid; } } + } - nlb += _nlb; + status = nvme_check_bounds(ns, iocb->slba, nlb); + if (status) { + goto invalid; } - if (nlb > le32_to_cpu(ns->id_ns.mcl)) { - status = NVME_CMD_SIZE_LIMIT | NVME_DNR; - goto out; + if (ns->params.zoned) { + status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb); + if (status) { + goto invalid; + } + + iocb->zone->w_ptr += nlb; } - bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); - if (ns->lbaf.ms) { - mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb)); + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce, len); + + iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba), + &iocb->iov, 0, nvme_copy_out_cb, iocb); + + return; + +invalid: + req->status = status; + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); } - block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0, - BLOCK_ACCT_READ); + return; - ctx->bounce = bounce; - ctx->mbounce = mbounce; - ctx->nlb = nlb; - ctx->copies = 1; +out: + nvme_copy_cb(iocb, ret); +} - req->opaque = ctx; +static void nvme_copy_in_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; - for (i = 0; i < nr; i++) { - uint64_t slba = le64_to_cpu(ctx->ranges[i].slba); - uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1; + if (ret < 0) { + iocb->ret = ret; + goto out; + } else if (iocb->ret < 0) { + goto out; + } - size_t len = nvme_l2b(ns, nlb); - int64_t offset = nvme_l2b(ns, slba); + if (!ns->lbaf.ms) { + nvme_copy_in_completed_cb(iocb, 0); + return; + } - trace_pci_nvme_copy_source_range(slba, nlb); + range = &iocb->ranges[iocb->idx]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb) + 1; - struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1); - in_ctx->req = req; + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb), + nvme_m2b(ns, nlb)); - qemu_iovec_init(&in_ctx->iov, 1); - qemu_iovec_add(&in_ctx->iov, bouncep, len); + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba), + &iocb->iov, 0, nvme_copy_in_completed_cb, + iocb); + return; - ctx->copies++; +out: + nvme_copy_cb(iocb, iocb->ret); +} - blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0, - nvme_aio_copy_in_cb, in_ctx); +static void nvme_copy_cb(void *opaque, int ret) +{ + NvmeCopyAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + NvmeCopySourceRange *range; + uint64_t slba; + uint32_t nlb; + size_t len; + uint16_t status; - bouncep += len; + if (ret < 0) { + iocb->ret = ret; + goto done; + } else if (iocb->ret < 0) { + goto done; + } - if (ns->lbaf.ms) { - len = nvme_m2b(ns, nlb); - offset = nvme_moff(ns, slba); + if (iocb->idx == iocb->nr) { + goto done; + } - in_ctx = g_new(struct nvme_copy_in_ctx, 1); - in_ctx->req = req; + range = &iocb->ranges[iocb->idx]; + slba = le64_to_cpu(range->slba); + nlb = le32_to_cpu(range->nlb) + 1; + len = nvme_l2b(ns, nlb); + + trace_pci_nvme_copy_source_range(slba, nlb); - qemu_iovec_init(&in_ctx->iov, 1); - qemu_iovec_add(&in_ctx->iov, mbouncep, len); + if (nlb > le16_to_cpu(ns->id_ns.mssrl)) { + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto invalid; + } - ctx->copies++; + status = nvme_check_bounds(ns, slba, nlb); + if (status) { + goto invalid; + } - blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0, - nvme_aio_copy_in_cb, in_ctx); + if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { + status = nvme_check_dulbe(ns, slba, nlb); + if (status) { + goto invalid; + } + } - mbouncep += len; + if (ns->params.zoned) { + status = nvme_check_zone_read(ns, slba, nlb); + if (status) { + goto invalid; } } - /* account for the 1-initialization */ - ctx->copies--; + qemu_iovec_reset(&iocb->iov); + qemu_iovec_add(&iocb->iov, iocb->bounce, len); + + iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), + &iocb->iov, 0, nvme_copy_in_cb, iocb); + return; - if (!ctx->copies) { - nvme_copy_in_complete(req); +invalid: + req->status = status; +done: + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); } +} - return NVME_NO_COMPLETE; -out: - g_free(ctx->ranges); - g_free(ctx); +static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns = req->ns; + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); + uint16_t nr = copy->nr + 1; + uint8_t format = copy->control[0] & 0xf; + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + + uint16_t status; + + trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format); + + iocb->ranges = NULL; + iocb->zone = NULL; + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && + ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } + + if (!(n->id_ctrl.ocfs & (1 << format))) { + trace_pci_nvme_err_copy_invalid_format(format); + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } + + if (nr > ns->id_ns.msrc + 1) { + status = NVME_CMD_SIZE_LIMIT | NVME_DNR; + goto invalid; + } + + iocb->ranges = g_new(NvmeCopySourceRange, nr); + + status = nvme_h2c(n, (uint8_t *)iocb->ranges, + sizeof(NvmeCopySourceRange) * nr, req); + if (status) { + goto invalid; + } + + iocb->slba = le64_to_cpu(copy->sdlba); + + if (ns->params.zoned) { + iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba); + if (!iocb->zone) { + status = NVME_LBA_RANGE | NVME_DNR; + goto invalid; + } + status = nvme_zrm_auto(n, ns, iocb->zone); + if (status) { + goto invalid; + } + } + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_copy_bh, iocb); + iocb->ret = 0; + iocb->nr = nr; + iocb->idx = 0; + iocb->reftag = le32_to_cpu(copy->reftag); + iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), + ns->lbasz + ns->lbaf.ms); + + qemu_iovec_init(&iocb->iov, 1); + + block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0, + BLOCK_ACCT_READ); + block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0, + BLOCK_ACCT_WRITE); + + req->aiocb = &iocb->common; + nvme_copy_cb(iocb, 0); + + return NVME_NO_COMPLETE; + +invalid: + g_free(iocb->ranges); + qemu_aio_unref(iocb); return status; } diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index a3e1134..cd65f8b 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -30,8 +30,7 @@ pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "app pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" -pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -- cgit v1.1 From 63d96e4ffd71a4ac2769448a567d46ed71e5bdd6 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:55 +0200 Subject: hw/nvme: reimplement zone reset to allow cancellation Prior to this patch, the aios associated with zone reset are submitted anonymously (no reference saved to the aiocb from the blk_aio call). Fix this by resetting the zones one after another, saving a reference to the aiocb for each reset. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 288 ++++++++++++++++++++++++++++++--------------------- hw/nvme/trace-events | 2 +- 2 files changed, 169 insertions(+), 121 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index b0cc8c4..5b550ec 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1691,6 +1691,29 @@ static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone) } } +static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone) +{ + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + nvme_aor_dec_open(ns); + /* fallthrough */ + case NVME_ZONE_STATE_CLOSED: + nvme_aor_dec_active(ns); + /* fallthrough */ + case NVME_ZONE_STATE_FULL: + zone->w_ptr = zone->d.zslba; + zone->d.wp = zone->w_ptr; + nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); + /* fallthrough */ + case NVME_ZONE_STATE_EMPTY: + return NVME_SUCCESS; + + default: + return NVME_ZONE_INVAL_TRANSITION; + } +} + static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) { NvmeZone *zone; @@ -2020,79 +2043,6 @@ out: nvme_verify_cb(ctx, ret); } -struct nvme_zone_reset_ctx { - NvmeRequest *req; - NvmeZone *zone; -}; - -static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret) -{ - struct nvme_zone_reset_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeZone *zone = ctx->zone; - uintptr_t *resets = (uintptr_t *)&req->opaque; - - if (ret) { - nvme_aio_err(req, ret); - goto out; - } - - switch (nvme_get_zone_state(zone)) { - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - nvme_aor_dec_open(ns); - /* fall through */ - case NVME_ZONE_STATE_CLOSED: - nvme_aor_dec_active(ns); - /* fall through */ - case NVME_ZONE_STATE_FULL: - zone->w_ptr = zone->d.zslba; - zone->d.wp = zone->w_ptr; - nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY); - /* fall through */ - default: - break; - } - -out: - g_free(ctx); - - (*resets)--; - - if (*resets) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - -static void nvme_aio_zone_reset_cb(void *opaque, int ret) -{ - struct nvme_zone_reset_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = req->ns; - NvmeZone *zone = ctx->zone; - - trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba); - - if (ret) { - goto out; - } - - if (ns->lbaf.ms) { - int64_t offset = nvme_moff(ns, zone->d.zslba); - - blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, - nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, - nvme_aio_zone_reset_complete_cb, ctx); - return; - } - -out: - nvme_aio_zone_reset_complete_cb(opaque, ret); -} - struct nvme_compare_ctx { struct { QEMUIOVector iov; @@ -3395,41 +3345,6 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone, return nvme_zrm_finish(ns, zone); } -static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone, - NvmeZoneState state, NvmeRequest *req) -{ - uintptr_t *resets = (uintptr_t *)&req->opaque; - struct nvme_zone_reset_ctx *ctx; - - switch (state) { - case NVME_ZONE_STATE_EMPTY: - return NVME_SUCCESS; - case NVME_ZONE_STATE_EXPLICITLY_OPEN: - case NVME_ZONE_STATE_IMPLICITLY_OPEN: - case NVME_ZONE_STATE_CLOSED: - case NVME_ZONE_STATE_FULL: - break; - default: - return NVME_ZONE_INVAL_TRANSITION; - } - - /* - * The zone reset aio callback needs to know the zone that is being reset - * in order to transition the zone on completion. - */ - ctx = g_new(struct nvme_zone_reset_ctx, 1); - ctx->req = req; - ctx->zone = zone; - - (*resets)++; - - blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba), - nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, - nvme_aio_zone_reset_cb, ctx); - - return NVME_NO_COMPLETE; -} - static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state, NvmeRequest *req) { @@ -3558,12 +3473,144 @@ out: return status; } +typedef struct NvmeZoneResetAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + NvmeRequest *req; + QEMUBH *bh; + int ret; + + bool all; + int idx; + NvmeZone *zone; +} NvmeZoneResetAIOCB; + +static void nvme_zone_reset_cancel(BlockAIOCB *aiocb) +{ + NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common); + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + + iocb->idx = ns->num_zones; + + iocb->ret = -ECANCELED; + + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; + } +} + +static const AIOCBInfo nvme_zone_reset_aiocb_info = { + .aiocb_size = sizeof(NvmeZoneResetAIOCB), + .cancel_async = nvme_zone_reset_cancel, +}; + +static void nvme_zone_reset_bh(void *opaque) +{ + NvmeZoneResetAIOCB *iocb = opaque; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); +} + +static void nvme_zone_reset_cb(void *opaque, int ret); + +static void nvme_zone_reset_epilogue_cb(void *opaque, int ret) +{ + NvmeZoneResetAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + int64_t moff; + int count; + + if (ret < 0) { + nvme_zone_reset_cb(iocb, ret); + return; + } + + if (!ns->lbaf.ms) { + nvme_zone_reset_cb(iocb, 0); + return; + } + + moff = nvme_moff(ns, iocb->zone->d.zslba); + count = nvme_m2b(ns, ns->zone_size); + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count, + BDRV_REQ_MAY_UNMAP, + nvme_zone_reset_cb, iocb); + return; +} + +static void nvme_zone_reset_cb(void *opaque, int ret) +{ + NvmeZoneResetAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = req->ns; + + if (ret < 0) { + iocb->ret = ret; + goto done; + } + + if (iocb->zone) { + nvme_zrm_reset(ns, iocb->zone); + + if (!iocb->all) { + goto done; + } + } + + while (iocb->idx < ns->num_zones) { + NvmeZone *zone = &ns->zone_array[iocb->idx++]; + + switch (nvme_get_zone_state(zone)) { + case NVME_ZONE_STATE_EMPTY: + if (!iocb->all) { + goto done; + } + + continue; + + case NVME_ZONE_STATE_EXPLICITLY_OPEN: + case NVME_ZONE_STATE_IMPLICITLY_OPEN: + case NVME_ZONE_STATE_CLOSED: + case NVME_ZONE_STATE_FULL: + iocb->zone = zone; + break; + + default: + continue; + } + + trace_pci_nvme_zns_zone_reset(zone->d.zslba); + + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, + nvme_l2b(ns, zone->d.zslba), + nvme_l2b(ns, ns->zone_size), + BDRV_REQ_MAY_UNMAP, + nvme_zone_reset_epilogue_cb, + iocb); + return; + } + +done: + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } +} + static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = (NvmeCmd *)&req->cmd; NvmeNamespace *ns = req->ns; NvmeZone *zone; - uintptr_t *resets; + NvmeZoneResetAIOCB *iocb; uint8_t *zd_ext; uint32_t dw13 = le32_to_cpu(cmd->cdw13); uint64_t slba = 0; @@ -3574,7 +3621,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE; action = dw13 & 0xff; - all = dw13 & 0x100; + all = !!(dw13 & 0x100); req->status = NVME_SUCCESS; @@ -3618,21 +3665,22 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req) break; case NVME_ZONE_ACTION_RESET: - resets = (uintptr_t *)&req->opaque; - - if (all) { - proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES | - NVME_PROC_FULL_ZONES; - } trace_pci_nvme_reset_zone(slba, zone_idx, all); - *resets = 1; + iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk, + nvme_misc_cb, req); - status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req); + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb); + iocb->ret = 0; + iocb->all = all; + iocb->idx = zone_idx; + iocb->zone = NULL; - (*resets)--; + req->aiocb = &iocb->common; + nvme_zone_reset_cb(iocb, 0); - return *resets ? NVME_NO_COMPLETE : req->status; + return NVME_NO_COMPLETE; case NVME_ZONE_ACTION_OFFLINE: if (all) { diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index cd65f8b..dc00c28 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -44,7 +44,6 @@ pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" @@ -100,6 +99,7 @@ pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=% pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64"" pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" -- cgit v1.1 From 3bcf26d3d6192e350b165c681aff13f6697f30fb Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:56 +0200 Subject: hw/nvme: reimplement format nvm to allow cancellation Prior to this patch, the aios associated with broadcast format are submitted anonymously (no aiocb reference saved from the blk_aio call). Fix this by formatting the namespaces one after another, saving a reference to the aiocb for each. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 263 +++++++++++++++++++++++++++------------------------ hw/nvme/trace-events | 4 +- 2 files changed, 143 insertions(+), 124 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5b550ec..3b8c542 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1924,42 +1924,6 @@ out: nvme_rw_complete_cb(req, ret); } -struct nvme_aio_format_ctx { - NvmeRequest *req; - NvmeNamespace *ns; - - /* number of outstanding write zeroes for this namespace */ - int *count; -}; - -static void nvme_aio_format_cb(void *opaque, int ret) -{ - struct nvme_aio_format_ctx *ctx = opaque; - NvmeRequest *req = ctx->req; - NvmeNamespace *ns = ctx->ns; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count = ctx->count; - - g_free(ctx); - - if (ret) { - nvme_aio_err(req, ret); - } - - if (--(*count)) { - return; - } - - g_free(count); - ns->status = 0x0; - - if (--(*num_formats)) { - return; - } - - nvme_enqueue_req_completion(nvme_cq(req), req); -} - static void nvme_verify_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; @@ -5272,138 +5236,195 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } -static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, - uint8_t mset, uint8_t pi, uint8_t pil, - NvmeRequest *req) -{ - int64_t len, offset; - struct nvme_aio_format_ctx *ctx; - BlockBackend *blk = ns->blkconf.blk; - uint16_t ms; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; - int *count; - - if (ns->params.zoned) { - return NVME_INVALID_FORMAT | NVME_DNR; - } +typedef struct NvmeFormatAIOCB { + BlockAIOCB common; + BlockAIOCB *aiocb; + QEMUBH *bh; + NvmeRequest *req; + int ret; - trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil); + NvmeNamespace *ns; + uint32_t nsid; + bool broadcast; + int64_t offset; +} NvmeFormatAIOCB; - if (lbaf > ns->id_ns.nlbaf) { - return NVME_INVALID_FORMAT | NVME_DNR; - } +static void nvme_format_bh(void *opaque); - ms = ns->id_ns.lbaf[lbaf].ms; +static void nvme_format_cancel(BlockAIOCB *aiocb) +{ + NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common); - if (pi && (ms < sizeof(NvmeDifTuple))) { - return NVME_INVALID_FORMAT | NVME_DNR; + if (iocb->aiocb) { + blk_aio_cancel_async(iocb->aiocb); } +} - if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { - return NVME_INVALID_FIELD | NVME_DNR; - } +static const AIOCBInfo nvme_format_aiocb_info = { + .aiocb_size = sizeof(NvmeFormatAIOCB), + .cancel_async = nvme_format_cancel, + .get_aio_context = nvme_get_aio_context, +}; + +static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pil = (dw10 >> 8) & 0x1; - nvme_ns_drain(ns); - nvme_ns_shutdown(ns); - nvme_ns_cleanup(ns); + trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil); ns->id_ns.dps = (pil << 3) | pi; ns->id_ns.flbas = lbaf | (mset << 4); nvme_ns_init_format(ns); +} - ns->status = NVME_FORMAT_IN_PROGRESS; +static void nvme_format_ns_cb(void *opaque, int ret) +{ + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeNamespace *ns = iocb->ns; + int bytes; - len = ns->size; - offset = 0; + if (ret < 0) { + iocb->ret = ret; + goto done; + } - count = g_new(int, 1); - *count = 1; + assert(ns); - (*num_formats)++; + if (iocb->offset < ns->size) { + bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset); - while (len) { - ctx = g_new(struct nvme_aio_format_ctx, 1); - ctx->req = req; - ctx->ns = ns; - ctx->count = count; + iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset, + bytes, BDRV_REQ_MAY_UNMAP, + nvme_format_ns_cb, iocb); - size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len); + iocb->offset += bytes; + return; + } - (*count)++; + nvme_format_set(ns, &req->cmd); + ns->status = 0x0; + iocb->ns = NULL; + iocb->offset = 0; - blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP, - nvme_aio_format_cb, ctx); +done: + iocb->aiocb = NULL; + qemu_bh_schedule(iocb->bh); +} - offset += bytes; - len -= bytes; +static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) +{ + if (ns->params.zoned) { + return NVME_INVALID_FORMAT | NVME_DNR; + } + if (lbaf > ns->id_ns.nlbaf) { + return NVME_INVALID_FORMAT | NVME_DNR; } - if (--(*count)) { - return NVME_NO_COMPLETE; + if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) { + return NVME_INVALID_FORMAT | NVME_DNR; } - g_free(count); - ns->status = 0x0; - (*num_formats)--; + if (pi && pi > NVME_ID_NS_DPS_TYPE_3) { + return NVME_INVALID_FIELD | NVME_DNR; + } return NVME_SUCCESS; } -static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +static void nvme_format_bh(void *opaque) { - NvmeNamespace *ns; + NvmeFormatAIOCB *iocb = opaque; + NvmeRequest *req = iocb->req; + NvmeCtrl *n = nvme_ctrl(req); uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); - uint32_t nsid = le32_to_cpu(req->cmd.nsid); uint8_t lbaf = dw10 & 0xf; - uint8_t mset = (dw10 >> 4) & 0x1; uint8_t pi = (dw10 >> 5) & 0x7; - uint8_t pil = (dw10 >> 8) & 0x1; - uintptr_t *num_formats = (uintptr_t *)&req->opaque; uint16_t status; int i; - trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil); - - /* 1-initialize; see the comment in nvme_dsm */ - *num_formats = 1; + if (iocb->ret < 0) { + goto done; + } - if (nsid != NVME_NSID_BROADCAST) { - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; + if (iocb->broadcast) { + for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) { + iocb->ns = nvme_ns(n, i); + if (iocb->ns) { + iocb->nsid = i; + break; + } } + } - ns = nvme_ns(n, nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; - } + if (!iocb->ns) { + goto done; + } - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; + status = nvme_format_check(iocb->ns, lbaf, pi); + if (status) { + req->status = status; + goto done; + } + + iocb->ns->status = NVME_FORMAT_IN_PROGRESS; + nvme_format_ns_cb(iocb, 0); + return; + +done: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_aio_unref(iocb); +} + +static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeFormatAIOCB *iocb; + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint16_t status; + + iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); + + iocb->req = req; + iocb->bh = qemu_bh_new(nvme_format_bh, iocb); + iocb->ret = 0; + iocb->ns = NULL; + iocb->nsid = 0; + iocb->broadcast = (nsid == NVME_NSID_BROADCAST); + iocb->offset = 0; + + if (!iocb->broadcast) { + if (!nvme_nsid_valid(n, nsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto out; } - } else { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req); - if (status && status != NVME_NO_COMPLETE) { - req->status = status; - break; - } + iocb->ns = nvme_ns(n, nsid); + if (!iocb->ns) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; } } - /* account for the 1-initialization */ - if (--(*num_formats)) { - return NVME_NO_COMPLETE; - } + req->aiocb = &iocb->common; + qemu_bh_schedule(iocb->bh); + + return NVME_NO_COMPLETE; - return req->status; +out: + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); + return status; } static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index dc00c28..48d10c3 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -10,9 +10,7 @@ pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32"" -pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format_set(uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -- cgit v1.1 From e76fb260ca8fc2420a4ce792324af0544628b331 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 21:06:57 +0200 Subject: Partially revert "hw/block/nvme: drain namespaces on sq deletion" This partially reverts commit 98f84f5a4eca5c03e32fff20f246d9b4b96d6422. Since all "multi aio" commands are now reimplemented to properly track the nested aiocbs, we can revert the "hack" that was introduced to make sure all requests we're properly drained upon sq deletion. The revert is partial since we keep the assert that no outstanding requests remain on the submission queue after the explicit cancellation. Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 3b8c542..6a0c2cc 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -3918,7 +3918,6 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) NvmeSQueue *sq; NvmeCQueue *cq; uint16_t qid = le16_to_cpu(c->qid); - uint32_t nsid; if (unlikely(!qid || nvme_check_sqid(n, qid))) { trace_pci_nvme_err_invalid_del_sq(qid); @@ -3930,22 +3929,8 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { r = QTAILQ_FIRST(&sq->out_req_list); - if (r->aiocb) { - blk_aio_cancel(r->aiocb); - } - } - - /* - * Drain all namespaces if there are still outstanding requests that we - * could not cancel explicitly. - */ - if (!QTAILQ_EMPTY(&sq->out_req_list)) { - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); - if (ns) { - nvme_ns_drain(ns); - } - } + assert(r->aiocb); + blk_aio_cancel(r->aiocb); } assert(QTAILQ_EMPTY(&sq->out_req_list)); -- cgit v1.1 From 5f4eb94dbb2251b290a1e8b89dc7732865e8a43b Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Mon, 14 Jun 2021 21:59:26 +0530 Subject: hw/nvme: fix endianess conversion and add controller list Add the controller identifiers list CNS 0x13, available list of ctrls in NVM Subsystem that may or may not be attached to namespaces. In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion for the nsid field. These two CNS values shows affect when there exists a Subsystem. Added condition if there is no Subsystem return invalid field in command. Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 26 ++++++++++++++++++-------- hw/nvme/trace-events | 2 +- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6a0c2cc..541c081 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4449,9 +4449,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, + bool attached) { NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); uint16_t min_id = le16_to_cpu(c->ctrlid); uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; uint16_t *ids = &list[1]; @@ -4459,15 +4461,21 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) NvmeCtrl *ctrl; int cntlid, nr_ids = 0; - trace_pci_nvme_identify_ns_attached_list(min_id); + trace_pci_nvme_identify_ctrl_list(c->cns, min_id); - if (c->nsid == NVME_NSID_BROADCAST) { + if (!n->subsys) { return NVME_INVALID_FIELD | NVME_DNR; } - ns = nvme_subsys_ns(n->subsys, c->nsid); - if (!ns) { - return NVME_INVALID_FIELD | NVME_DNR; + if (attached) { + if (nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } } for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { @@ -4476,7 +4484,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) continue; } - if (!nvme_ns(ctrl, c->nsid)) { + if (attached && !nvme_ns(ctrl, nsid)) { continue; } @@ -4703,7 +4711,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) case NVME_ID_CNS_NS_PRESENT: return nvme_identify_ns(n, req, false); case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: - return nvme_identify_ns_attached_list(n, req); + return nvme_identify_ctrl_list(n, req, true); + case NVME_ID_CNS_CTRL_LIST: + return nvme_identify_ctrl_list(n, req, false); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index 48d10c3..f9a1f14 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -51,7 +51,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" -- cgit v1.1 From eeef43290d1598363f886acb4c78652073271011 Mon Sep 17 00:00:00 2001 From: Gollu Appalanaidu Date: Mon, 14 Jun 2021 21:59:27 +0530 Subject: hw/nvme: documentation fix In the documentation of the '-detached' param "be" and "not" has been used side by side, fix that. Signed-off-by: Gollu Appalanaidu Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 541c081..dd2b4fa 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -120,7 +120,7 @@ * This parameter is only valid together with the `subsys` parameter. If left * at the default value (`false/off`), the namespace will be attached to all * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the - * namespace will be be available in the subsystem not not attached to any + * namespace will be available in the subsystem but not attached to any * controllers. * * Setting `zoned` to true selects Zoned Command Set at the namespace. -- cgit v1.1 From 2b02aabc9d02f9e95946cf639f546bb61f1721b7 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Mon, 7 Jun 2021 11:47:57 +0200 Subject: hw/nvme: fix missing check for PMR capability Qiang Liu reported that an access on an unknown address is triggered in memory_region_set_enabled because a check on CAP.PMRS is missing for the PMRCTL register write when no PMR is configured. Cc: qemu-stable@nongnu.org Fixes: 75c3c9de961d ("hw/block/nvme: disable PMR at boot up") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/362 Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index dd2b4fa..5c6c7d3 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5860,6 +5860,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, "invalid write to PMRCAP register, ignored"); return; case 0xe04: /* PMRCTL */ + if (!NVME_CAP_PMRS(n->bar.cap)) { + return; + } + n->bar.pmrctl = data; if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(&n->pmr.dev->mr, true); -- cgit v1.1 From 83d7ed5c570d4c1d5163951b3057cac2ae7da4ff Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Thu, 17 Jun 2021 20:55:42 +0200 Subject: hw/nvme: fix pin-based interrupt behavior (again) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jakub noticed[1] that, when using pin-based interrupts, the device will unconditionally deasssert when any CQEs are acknowledged. However, the pin should not be deasserted if other completion queues still holds unacknowledged CQEs. The bug is an artifact of commit ca247d35098d ("hw/block/nvme: fix pin-based interrupt behavior") which fixed one bug but introduced another. This is the third time someone tries to fix pin-based interrupts (see commit 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt behaviour of NVMe"))... Third time's the charm, so fix it, again, by keeping track of how many CQs have unacknowledged CQEs and only deassert when all are cleared. [1]: <20210610114624.304681-1-jakub.jermar@kernkonzept.com> Cc: qemu-stable@nongnu.org Fixes: ca247d35098d ("hw/block/nvme: fix pin-based interrupt behavior") Reported-by: Jakub Jermář Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/nvme/ctrl.c | 18 +++++++++++++++++- hw/nvme/nvme.h | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5c6c7d3..629b0d3 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -473,7 +473,9 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) return; } else { assert(cq->vector < 32); - n->irq_status &= ~(1 << cq->vector); + if (!n->cq_pending) { + n->irq_status &= ~(1 << cq->vector); + } nvme_irq_check(n); } } @@ -1253,6 +1255,7 @@ static void nvme_post_cqes(void *opaque) NvmeCQueue *cq = opaque; NvmeCtrl *n = cq->ctrl; NvmeRequest *req, *next; + bool pending = cq->head != cq->tail; int ret; QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { @@ -1282,6 +1285,10 @@ static void nvme_post_cqes(void *opaque) QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { + if (cq->irq_enabled && !pending) { + n->cq_pending++; + } + nvme_irq_assert(n, cq); } } @@ -4297,6 +4304,11 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_del_cq_notempty(qid); return NVME_INVALID_QUEUE_DEL; } + + if (cq->irq_enabled && cq->tail != cq->head) { + n->cq_pending--; + } + nvme_irq_deassert(n, cq); trace_pci_nvme_del_cq(qid); nvme_free_cq(cq, n); @@ -6039,6 +6051,10 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) } if (cq->tail == cq->head) { + if (cq->irq_enabled) { + n->cq_pending--; + } + nvme_irq_deassert(n, cq); } } else { diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 2509b8b..56f8ece 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -410,6 +410,7 @@ typedef struct NvmeCtrl { uint32_t max_q_ents; uint8_t outstanding_aers; uint32_t irq_status; + int cq_pending; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ uint64_t starttime_ms; -- cgit v1.1