aboutsummaryrefslogtreecommitdiff
path: root/hw/block/nvme-ns.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-02-09 13:24:37 +0000
committerPeter Maydell <peter.maydell@linaro.org>2021-02-09 13:24:37 +0000
commit1214d55d1c41fbab3a9973a05085b8760647e411 (patch)
tree9d4a512e685025beed402d9f54c417555f305c97 /hw/block/nvme-ns.c
parent41d306ec7d9885752fec434904df08b9c1aa3add (diff)
parent3e22762edc74be3e1ecafc361351a9640d114978 (diff)
downloadqemu-1214d55d1c41fbab3a9973a05085b8760647e411.zip
qemu-1214d55d1c41fbab3a9973a05085b8760647e411.tar.gz
qemu-1214d55d1c41fbab3a9973a05085b8760647e411.tar.bz2
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging
Emulated NVMe device updates * deallocate or unwritten logical block error feature (me) * dataset management command (me) * compare command (Gollu Appalanaidu) * namespace types (Niklas Cassel) * zoned namespaces (Dmitry Fomichev) * smart critical warning toggle (Zhenwei Pi) * allow cmb and pmr to coexist (me) * pmr rds/wds support (Naveen Nagar) * cmb v1.4 logic (Padmakar Kalghatgi) And a lot of smaller fixes from Gollu Appalanaidu and Minwoo Im. # gpg: Signature made Tue 09 Feb 2021 07:25:18 GMT # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * remotes/nvme/tags/nvme-next-pull-request: (56 commits) hw/block/nvme: refactor the logic for zone write checks hw/block/nvme: fix zone boundary check for append hw/block/nvme: fix wrong parameter name 'cross_read' hw/block/nvme: align with existing style hw/block/nvme: fix set feature save field check hw/block/nvme: fix set feature for error recovery hw/block/nvme: error if drive less than a zone size hw/block/nvme: lift cmb restrictions hw/block/nvme: bump to v1.4 hw/block/nvme: move cmb logic to v1.4 hw/block/nvme: add PMR RDS/WDS support hw/block/nvme: disable PMR at boot up hw/block/nvme: remove redundant zeroing of PMR registers hw/block/nvme: rename PMR/CMB shift/mask fields hw/block/nvme: allow cmb and pmr to coexist hw/block/nvme: move msix table and pba to BAR 0 hw/block/nvme: indicate CMB support through controller capabilities register hw/block/nvme: fix 64 bit register hi/lo split writes hw/block/nvme: add size to mmio read/write trace events hw/block/nvme: trigger async event during injecting smart warning ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw/block/nvme-ns.c')
-rw-r--r--hw/block/nvme-ns.c290
1 files changed, 277 insertions, 13 deletions
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 2670787..dfed71a 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -16,6 +16,7 @@
#include "qemu/units.h"
#include "qemu/cutils.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "hw/block/block.h"
#include "hw/pci/pci.h"
#include "sysemu/sysemu.h"
@@ -25,28 +26,47 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
+#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
-static void nvme_ns_init(NvmeNamespace *ns)
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
+ BlockDriverInfo bdi;
NvmeIdNs *id_ns = &ns->id_ns;
int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+ int npdg;
- if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
- ns->id_ns.dlfeat = 0x9;
- }
+ ns->id_ns.dlfeat = 0x9;
id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
+ ns->csi = NVME_CSI_NVM;
+
/* no thin provisioning */
id_ns->ncap = id_ns->nsze;
id_ns->nuse = id_ns->ncap;
+
+ /* support DULBE and I/O optimization fields */
+ id_ns->nsfeat |= (0x4 | 0x10);
+
+ npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
+
+ if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
+ bdi.cluster_size > ns->blkconf.discard_granularity) {
+ npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
+ }
+
+ id_ns->npda = id_ns->npdg = npdg - 1;
+
+ return 0;
}
-static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
{
bool read_only;
@@ -59,19 +79,225 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
return -1;
}
+ if (ns->blkconf.discard_granularity == -1) {
+ ns->blkconf.discard_granularity =
+ MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+ }
+
ns->size = blk_getlength(ns->blkconf.blk);
if (ns->size < 0) {
error_setg_errno(errp, -ns->size, "could not get blockdev size");
return -1;
}
- if (blk_enable_write_cache(ns->blkconf.blk)) {
- n->features.vwc = 0x1;
+ return 0;
+}
+
+static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
+{
+ uint64_t zone_size, zone_cap;
+ uint32_t lbasz = ns->blkconf.logical_block_size;
+
+ /* Make sure that the values of ZNS properties are sane */
+ if (ns->params.zone_size_bs) {
+ zone_size = ns->params.zone_size_bs;
+ } else {
+ zone_size = NVME_DEFAULT_ZONE_SIZE;
+ }
+ if (ns->params.zone_cap_bs) {
+ zone_cap = ns->params.zone_cap_bs;
+ } else {
+ zone_cap = zone_size;
+ }
+ if (zone_cap > zone_size) {
+ error_setg(errp, "zone capacity %"PRIu64"B exceeds "
+ "zone size %"PRIu64"B", zone_cap, zone_size);
+ return -1;
+ }
+ if (zone_size < lbasz) {
+ error_setg(errp, "zone size %"PRIu64"B too small, "
+ "must be at least %"PRIu32"B", zone_size, lbasz);
+ return -1;
+ }
+ if (zone_cap < lbasz) {
+ error_setg(errp, "zone capacity %"PRIu64"B too small, "
+ "must be at least %"PRIu32"B", zone_cap, lbasz);
+ return -1;
+ }
+
+ /*
+ * Save the main zone geometry values to avoid
+ * calculating them later again.
+ */
+ ns->zone_size = zone_size / lbasz;
+ ns->zone_capacity = zone_cap / lbasz;
+ ns->num_zones = ns->size / lbasz / ns->zone_size;
+
+ /* Do a few more sanity checks of ZNS properties */
+ if (!ns->num_zones) {
+ error_setg(errp,
+ "insufficient drive capacity, must be at least the size "
+ "of one zone (%"PRIu64"B)", zone_size);
+ return -1;
+ }
+
+ if (ns->params.max_open_zones > ns->num_zones) {
+ error_setg(errp,
+ "max_open_zones value %u exceeds the number of zones %u",
+ ns->params.max_open_zones, ns->num_zones);
+ return -1;
+ }
+ if (ns->params.max_active_zones > ns->num_zones) {
+ error_setg(errp,
+ "max_active_zones value %u exceeds the number of zones %u",
+ ns->params.max_active_zones, ns->num_zones);
+ return -1;
+ }
+
+ if (ns->params.zd_extension_size) {
+ if (ns->params.zd_extension_size & 0x3f) {
+ error_setg(errp,
+ "zone descriptor extension size must be a multiple of 64B");
+ return -1;
+ }
+ if ((ns->params.zd_extension_size >> 6) > 0xff) {
+ error_setg(errp, "zone descriptor extension size is too large");
+ return -1;
+ }
}
return 0;
}
+static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
+{
+ uint64_t start = 0, zone_size = ns->zone_size;
+ uint64_t capacity = ns->num_zones * zone_size;
+ NvmeZone *zone;
+ int i;
+
+ ns->zone_array = g_new0(NvmeZone, ns->num_zones);
+ if (ns->params.zd_extension_size) {
+ ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
+ ns->num_zones);
+ }
+
+ QTAILQ_INIT(&ns->exp_open_zones);
+ QTAILQ_INIT(&ns->imp_open_zones);
+ QTAILQ_INIT(&ns->closed_zones);
+ QTAILQ_INIT(&ns->full_zones);
+
+ zone = ns->zone_array;
+ for (i = 0; i < ns->num_zones; i++, zone++) {
+ if (start + zone_size > capacity) {
+ zone_size = capacity - start;
+ }
+ zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
+ nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
+ zone->d.za = 0;
+ zone->d.zcap = ns->zone_capacity;
+ zone->d.zslba = start;
+ zone->d.wp = start;
+ zone->w_ptr = start;
+ start += zone_size;
+ }
+
+ ns->zone_size_log2 = 0;
+ if (is_power_of_2(ns->zone_size)) {
+ ns->zone_size_log2 = 63 - clz64(ns->zone_size);
+ }
+}
+
+static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index)
+{
+ NvmeIdNsZoned *id_ns_z;
+
+ nvme_ns_zoned_init_state(ns);
+
+ id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
+
+ /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
+ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
+ id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
+ id_ns_z->zoc = 0;
+ id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
+
+ id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size);
+ id_ns_z->lbafe[lba_index].zdes =
+ ns->params.zd_extension_size >> 6; /* Units of 64B */
+
+ ns->csi = NVME_CSI_ZONED;
+ ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
+ ns->id_ns.ncap = ns->id_ns.nsze;
+ ns->id_ns.nuse = ns->id_ns.ncap;
+
+ /*
+ * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
+ * status of logical blocks. Since the spec defines that logical blocks
+ * SHALL be deallocated when then zone is in the Empty or Offline states,
+ * we can only support DULBE if the zone size is a multiple of the
+ * calculated NPDG.
+ */
+ if (ns->zone_size % (ns->id_ns.npdg + 1)) {
+ warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
+ "the calculated deallocation granularity (%d blocks); "
+ "DULBE support disabled",
+ ns->zone_size, ns->id_ns.npdg + 1);
+
+ ns->id_ns.nsfeat &= ~0x4;
+ }
+
+ ns->id_ns_zoned = id_ns_z;
+}
+
+static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
+{
+ uint8_t state;
+
+ zone->w_ptr = zone->d.wp;
+ state = nvme_get_zone_state(zone);
+ if (zone->d.wp != zone->d.zslba ||
+ (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
+ if (state != NVME_ZONE_STATE_CLOSED) {
+ trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
+ nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
+ }
+ nvme_aor_inc_active(ns);
+ QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
+ } else {
+ trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
+ nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
+ }
+}
+
+/*
+ * Close all the zones that are currently open.
+ */
+static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
+{
+ NvmeZone *zone, *next;
+
+ QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
+ QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
+ nvme_aor_dec_active(ns);
+ nvme_clear_zone(ns, zone);
+ }
+ QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
+ QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
+ nvme_aor_dec_open(ns);
+ nvme_aor_dec_active(ns);
+ nvme_clear_zone(ns, zone);
+ }
+ QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
+ QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
+ nvme_aor_dec_open(ns);
+ nvme_aor_dec_active(ns);
+ nvme_clear_zone(ns, zone);
+ }
+
+ assert(ns->nr_open_zones == 0);
+}
+
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
if (!ns->blkconf.blk) {
@@ -82,20 +308,25 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
return 0;
}
-int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
{
if (nvme_ns_check_constraints(ns, errp)) {
return -1;
}
- if (nvme_ns_init_blk(n, ns, errp)) {
+ if (nvme_ns_init_blk(ns, errp)) {
return -1;
}
- nvme_ns_init(ns);
- if (nvme_register_namespace(n, ns, errp)) {
+ if (nvme_ns_init(ns, errp)) {
return -1;
}
+ if (ns->params.zoned) {
+ if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
+ return -1;
+ }
+ nvme_ns_init_zoned(ns, 0);
+ }
return 0;
}
@@ -105,9 +336,21 @@ void nvme_ns_drain(NvmeNamespace *ns)
blk_drain(ns->blkconf.blk);
}
-void nvme_ns_flush(NvmeNamespace *ns)
+void nvme_ns_shutdown(NvmeNamespace *ns)
{
blk_flush(ns->blkconf.blk);
+ if (ns->params.zoned) {
+ nvme_zoned_ns_shutdown(ns);
+ }
+}
+
+void nvme_ns_cleanup(NvmeNamespace *ns)
+{
+ if (ns->params.zoned) {
+ g_free(ns->id_ns_zoned);
+ g_free(ns->zone_array);
+ g_free(ns->zd_extensions);
+ }
}
static void nvme_ns_realize(DeviceState *dev, Error **errp)
@@ -117,16 +360,37 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
NvmeCtrl *n = NVME(s->parent);
Error *local_err = NULL;
- if (nvme_ns_setup(n, ns, &local_err)) {
+ if (nvme_ns_setup(ns, &local_err)) {
error_propagate_prepend(errp, local_err,
"could not setup namespace: ");
return;
}
+
+ if (nvme_register_namespace(n, ns, errp)) {
+ error_propagate_prepend(errp, local_err,
+ "could not register namespace: ");
+ return;
+ }
+
}
static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
+ DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
+ DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
+ DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
+ NVME_DEFAULT_ZONE_SIZE),
+ DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
+ 0),
+ DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
+ params.cross_zone_read, false),
+ DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
+ params.max_active_zones, 0),
+ DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
+ params.max_open_zones, 0),
+ DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
+ params.zd_extension_size, 0),
DEFINE_PROP_END_OF_LIST(),
};