diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-02-09 13:24:37 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-02-09 13:24:37 +0000 |
commit | 1214d55d1c41fbab3a9973a05085b8760647e411 (patch) | |
tree | 9d4a512e685025beed402d9f54c417555f305c97 /hw/block/nvme-ns.c | |
parent | 41d306ec7d9885752fec434904df08b9c1aa3add (diff) | |
parent | 3e22762edc74be3e1ecafc361351a9640d114978 (diff) | |
download | qemu-1214d55d1c41fbab3a9973a05085b8760647e411.zip qemu-1214d55d1c41fbab3a9973a05085b8760647e411.tar.gz qemu-1214d55d1c41fbab3a9973a05085b8760647e411.tar.bz2 |
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-request' into staging
Emulated NVMe device updates
* deallocate or unwritten logical block error feature (me)
* dataset management command (me)
* compare command (Gollu Appalanaidu)
* namespace types (Niklas Cassel)
* zoned namespaces (Dmitry Fomichev)
* smart critical warning toggle (Zhenwei Pi)
* allow cmb and pmr to coexist (me)
* pmr rds/wds support (Naveen Nagar)
* cmb v1.4 logic (Padmakar Kalghatgi)
And a lot of smaller fixes from Gollu Appalanaidu and Minwoo Im.
# gpg: Signature made Tue 09 Feb 2021 07:25:18 GMT
# gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838
# Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9
* remotes/nvme/tags/nvme-next-pull-request: (56 commits)
hw/block/nvme: refactor the logic for zone write checks
hw/block/nvme: fix zone boundary check for append
hw/block/nvme: fix wrong parameter name 'cross_read'
hw/block/nvme: align with existing style
hw/block/nvme: fix set feature save field check
hw/block/nvme: fix set feature for error recovery
hw/block/nvme: error if drive less than a zone size
hw/block/nvme: lift cmb restrictions
hw/block/nvme: bump to v1.4
hw/block/nvme: move cmb logic to v1.4
hw/block/nvme: add PMR RDS/WDS support
hw/block/nvme: disable PMR at boot up
hw/block/nvme: remove redundant zeroing of PMR registers
hw/block/nvme: rename PMR/CMB shift/mask fields
hw/block/nvme: allow cmb and pmr to coexist
hw/block/nvme: move msix table and pba to BAR 0
hw/block/nvme: indicate CMB support through controller capabilities register
hw/block/nvme: fix 64 bit register hi/lo split writes
hw/block/nvme: add size to mmio read/write trace events
hw/block/nvme: trigger async event during injecting smart warning
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw/block/nvme-ns.c')
-rw-r--r-- | hw/block/nvme-ns.c | 290 |
1 files changed, 277 insertions, 13 deletions
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 2670787..dfed71a 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -16,6 +16,7 @@ #include "qemu/units.h" #include "qemu/cutils.h" #include "qemu/log.h" +#include "qemu/error-report.h" #include "hw/block/block.h" #include "hw/pci/pci.h" #include "sysemu/sysemu.h" @@ -25,28 +26,47 @@ #include "hw/qdev-properties.h" #include "hw/qdev-core.h" +#include "trace.h" #include "nvme.h" #include "nvme-ns.h" -static void nvme_ns_init(NvmeNamespace *ns) +#define MIN_DISCARD_GRANULARITY (4 * KiB) + +static int nvme_ns_init(NvmeNamespace *ns, Error **errp) { + BlockDriverInfo bdi; NvmeIdNs *id_ns = &ns->id_ns; int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + int npdg; - if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) { - ns->id_ns.dlfeat = 0x9; - } + ns->id_ns.dlfeat = 0x9; id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size); id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns)); + ns->csi = NVME_CSI_NVM; + /* no thin provisioning */ id_ns->ncap = id_ns->nsze; id_ns->nuse = id_ns->ncap; + + /* support DULBE and I/O optimization fields */ + id_ns->nsfeat |= (0x4 | 0x10); + + npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size; + + if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 && + bdi.cluster_size > ns->blkconf.discard_granularity) { + npdg = bdi.cluster_size / ns->blkconf.logical_block_size; + } + + id_ns->npda = id_ns->npdg = npdg - 1; + + return 0; } -static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) { bool read_only; @@ -59,19 +79,225 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) return -1; } + if (ns->blkconf.discard_granularity == -1) { + ns->blkconf.discard_granularity = + MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); + } + ns->size = blk_getlength(ns->blkconf.blk); if (ns->size < 0) { error_setg_errno(errp, -ns->size, "could not get blockdev size"); return -1; } - if (blk_enable_write_cache(ns->blkconf.blk)) { - n->features.vwc = 0x1; + return 0; +} + +static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) +{ + uint64_t zone_size, zone_cap; + uint32_t lbasz = ns->blkconf.logical_block_size; + + /* Make sure that the values of ZNS properties are sane */ + if (ns->params.zone_size_bs) { + zone_size = ns->params.zone_size_bs; + } else { + zone_size = NVME_DEFAULT_ZONE_SIZE; + } + if (ns->params.zone_cap_bs) { + zone_cap = ns->params.zone_cap_bs; + } else { + zone_cap = zone_size; + } + if (zone_cap > zone_size) { + error_setg(errp, "zone capacity %"PRIu64"B exceeds " + "zone size %"PRIu64"B", zone_cap, zone_size); + return -1; + } + if (zone_size < lbasz) { + error_setg(errp, "zone size %"PRIu64"B too small, " + "must be at least %"PRIu32"B", zone_size, lbasz); + return -1; + } + if (zone_cap < lbasz) { + error_setg(errp, "zone capacity %"PRIu64"B too small, " + "must be at least %"PRIu32"B", zone_cap, lbasz); + return -1; + } + + /* + * Save the main zone geometry values to avoid + * calculating them later again. + */ + ns->zone_size = zone_size / lbasz; + ns->zone_capacity = zone_cap / lbasz; + ns->num_zones = ns->size / lbasz / ns->zone_size; + + /* Do a few more sanity checks of ZNS properties */ + if (!ns->num_zones) { + error_setg(errp, + "insufficient drive capacity, must be at least the size " + "of one zone (%"PRIu64"B)", zone_size); + return -1; + } + + if (ns->params.max_open_zones > ns->num_zones) { + error_setg(errp, + "max_open_zones value %u exceeds the number of zones %u", + ns->params.max_open_zones, ns->num_zones); + return -1; + } + if (ns->params.max_active_zones > ns->num_zones) { + error_setg(errp, + "max_active_zones value %u exceeds the number of zones %u", + ns->params.max_active_zones, ns->num_zones); + return -1; + } + + if (ns->params.zd_extension_size) { + if (ns->params.zd_extension_size & 0x3f) { + error_setg(errp, + "zone descriptor extension size must be a multiple of 64B"); + return -1; + } + if ((ns->params.zd_extension_size >> 6) > 0xff) { + error_setg(errp, "zone descriptor extension size is too large"); + return -1; + } } return 0; } +static void nvme_ns_zoned_init_state(NvmeNamespace *ns) +{ + uint64_t start = 0, zone_size = ns->zone_size; + uint64_t capacity = ns->num_zones * zone_size; + NvmeZone *zone; + int i; + + ns->zone_array = g_new0(NvmeZone, ns->num_zones); + if (ns->params.zd_extension_size) { + ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * + ns->num_zones); + } + + QTAILQ_INIT(&ns->exp_open_zones); + QTAILQ_INIT(&ns->imp_open_zones); + QTAILQ_INIT(&ns->closed_zones); + QTAILQ_INIT(&ns->full_zones); + + zone = ns->zone_array; + for (i = 0; i < ns->num_zones; i++, zone++) { + if (start + zone_size > capacity) { + zone_size = capacity - start; + } + zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; + nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); + zone->d.za = 0; + zone->d.zcap = ns->zone_capacity; + zone->d.zslba = start; + zone->d.wp = start; + zone->w_ptr = start; + start += zone_size; + } + + ns->zone_size_log2 = 0; + if (is_power_of_2(ns->zone_size)) { + ns->zone_size_log2 = 63 - clz64(ns->zone_size); + } +} + +static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index) +{ + NvmeIdNsZoned *id_ns_z; + + nvme_ns_zoned_init_state(ns); + + id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); + + /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ + id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); + id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); + id_ns_z->zoc = 0; + id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; + + id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size); + id_ns_z->lbafe[lba_index].zdes = + ns->params.zd_extension_size >> 6; /* Units of 64B */ + + ns->csi = NVME_CSI_ZONED; + ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); + ns->id_ns.ncap = ns->id_ns.nsze; + ns->id_ns.nuse = ns->id_ns.ncap; + + /* + * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" + * status of logical blocks. Since the spec defines that logical blocks + * SHALL be deallocated when then zone is in the Empty or Offline states, + * we can only support DULBE if the zone size is a multiple of the + * calculated NPDG. + */ + if (ns->zone_size % (ns->id_ns.npdg + 1)) { + warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " + "the calculated deallocation granularity (%d blocks); " + "DULBE support disabled", + ns->zone_size, ns->id_ns.npdg + 1); + + ns->id_ns.nsfeat &= ~0x4; + } + + ns->id_ns_zoned = id_ns_z; +} + +static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) +{ + uint8_t state; + + zone->w_ptr = zone->d.wp; + state = nvme_get_zone_state(zone); + if (zone->d.wp != zone->d.zslba || + (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { + if (state != NVME_ZONE_STATE_CLOSED) { + trace_pci_nvme_clear_ns_close(state, zone->d.zslba); + nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); + } + nvme_aor_inc_active(ns); + QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); + } else { + trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); + nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); + } +} + +/* + * Close all the zones that are currently open. + */ +static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) +{ + NvmeZone *zone, *next; + + QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { + QTAILQ_REMOVE(&ns->closed_zones, zone, entry); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { + QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); + nvme_aor_dec_open(ns); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { + QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); + nvme_aor_dec_open(ns); + nvme_aor_dec_active(ns); + nvme_clear_zone(ns, zone); + } + + assert(ns->nr_open_zones == 0); +} + static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { if (!ns->blkconf.blk) { @@ -82,20 +308,25 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return 0; } -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +int nvme_ns_setup(NvmeNamespace *ns, Error **errp) { if (nvme_ns_check_constraints(ns, errp)) { return -1; } - if (nvme_ns_init_blk(n, ns, errp)) { + if (nvme_ns_init_blk(ns, errp)) { return -1; } - nvme_ns_init(ns); - if (nvme_register_namespace(n, ns, errp)) { + if (nvme_ns_init(ns, errp)) { return -1; } + if (ns->params.zoned) { + if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { + return -1; + } + nvme_ns_init_zoned(ns, 0); + } return 0; } @@ -105,9 +336,21 @@ void nvme_ns_drain(NvmeNamespace *ns) blk_drain(ns->blkconf.blk); } -void nvme_ns_flush(NvmeNamespace *ns) +void nvme_ns_shutdown(NvmeNamespace *ns) { blk_flush(ns->blkconf.blk); + if (ns->params.zoned) { + nvme_zoned_ns_shutdown(ns); + } +} + +void nvme_ns_cleanup(NvmeNamespace *ns) +{ + if (ns->params.zoned) { + g_free(ns->id_ns_zoned); + g_free(ns->zone_array); + g_free(ns->zd_extensions); + } } static void nvme_ns_realize(DeviceState *dev, Error **errp) @@ -117,16 +360,37 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) NvmeCtrl *n = NVME(s->parent); Error *local_err = NULL; - if (nvme_ns_setup(n, ns, &local_err)) { + if (nvme_ns_setup(ns, &local_err)) { error_propagate_prepend(errp, local_err, "could not setup namespace: "); return; } + + if (nvme_register_namespace(n, ns, errp)) { + error_propagate_prepend(errp, local_err, + "could not register namespace: "); + return; + } + } static Property nvme_ns_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), + DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), + DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, + NVME_DEFAULT_ZONE_SIZE), + DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, + 0), + DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, + params.cross_zone_read, false), + DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, + params.max_active_zones, 0), + DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, + params.max_open_zones, 0), + DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, + params.zd_extension_size, 0), DEFINE_PROP_END_OF_LIST(), }; |