aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/block/meson.build2
-rw-r--r--hw/block/nvme-ns.c38
-rw-r--r--hw/block/nvme-ns.h13
-rw-r--r--hw/block/nvme-subsys.c116
-rw-r--r--hw/block/nvme-subsys.h60
-rw-r--r--hw/block/nvme.c1439
-rw-r--r--hw/block/nvme.h63
-rw-r--r--hw/block/trace-events21
-rw-r--r--include/block/nvme.h88
9 files changed, 1445 insertions, 395 deletions
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 4bf994c..5492829 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c'))
specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 93ac6e1..eda6a0c 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,15 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->npda = id_ns->npdg = npdg - 1;
+ if (nvme_ns_shared(ns)) {
+ id_ns->nmic |= NVME_NMIC_NS_SHARED;
+ }
+
+ /* simple copy */
+ id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
+ id_ns->mcl = cpu_to_le32(ns->params.mcl);
+ id_ns->msrc = ns->params.msrc;
+
return 0;
}
@@ -154,6 +163,18 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
return -1;
}
+ if (ns->params.max_active_zones) {
+ if (ns->params.max_open_zones > ns->params.max_active_zones) {
+ error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)",
+ ns->params.max_open_zones, ns->params.max_active_zones);
+ return -1;
+ }
+
+ if (!ns->params.max_open_zones) {
+ ns->params.max_open_zones = ns->params.max_active_zones;
+ }
+ }
+
if (ns->params.zd_extension_size) {
if (ns->params.zd_extension_size & 0x3f) {
error_setg(errp,
@@ -363,16 +384,27 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
return;
}
- if (nvme_register_namespace(n, ns, errp)) {
- return;
+ if (ns->subsys) {
+ if (nvme_subsys_register_ns(ns, errp)) {
+ return;
+ }
+ } else {
+ if (nvme_register_namespace(n, ns, errp)) {
+ return;
+ }
}
-
}
static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+ DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
+ DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
+ DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
+ DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
+ DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
NVME_DEFAULT_ZONE_SIZE),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 293ac99..318d3ae 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -26,9 +26,14 @@ typedef struct NvmeZone {
} NvmeZone;
typedef struct NvmeNamespaceParams {
+ bool detached;
uint32_t nsid;
QemuUUID uuid;
+ uint16_t mssrl;
+ uint32_t mcl;
+ uint8_t msrc;
+
bool zoned;
bool cross_zone_read;
uint64_t zone_size_bs;
@@ -47,6 +52,9 @@ typedef struct NvmeNamespace {
const uint32_t *iocs;
uint8_t csi;
+ NvmeSubsystem *subsys;
+ QTAILQ_ENTRY(NvmeNamespace) entry;
+
NvmeIdNsZoned *id_ns_zoned;
NvmeZone *zone_array;
QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +85,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
return -1;
}
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+ return !!ns->subsys;
+}
+
static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
{
NvmeIdNs *id_ns = &ns->id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
new file mode 100644
index 0000000..af4804a
--- /dev/null
+++ b/hw/block/nvme-subsys.c
@@ -0,0 +1,116 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2. Refer COPYING.
+ */
+
+#include "qemu/units.h"
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+#include "hw/block/block.h"
+#include "block/aio.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "nvme.h"
+#include "nvme-subsys.h"
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ int cntlid;
+
+ for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+ if (!subsys->ctrls[cntlid]) {
+ break;
+ }
+ }
+
+ if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+ error_setg(errp, "no more free controller id");
+ return -1;
+ }
+
+ subsys->ctrls[cntlid] = n;
+
+ return cntlid;
+}
+
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+ NvmeSubsystem *subsys = ns->subsys;
+ NvmeCtrl *n;
+ int i;
+
+ if (subsys->namespaces[nvme_nsid(ns)]) {
+ error_setg(errp, "namespace %d already registerd to subsy %s",
+ nvme_nsid(ns), subsys->parent_obj.id);
+ return -1;
+ }
+
+ subsys->namespaces[nvme_nsid(ns)] = ns;
+
+ for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+ n = subsys->ctrls[i];
+
+ if (n && nvme_register_namespace(n, ns, errp)) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+ const char *nqn = subsys->params.nqn ?
+ subsys->params.nqn : subsys->parent_obj.id;
+
+ snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
+ "nqn.2019-08.org.qemu:%s", nqn);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+ NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+ nvme_subsys_setup(subsys);
+}
+
+static Property nvme_subsystem_props[] = {
+ DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+ dc->realize = nvme_subsys_realize;
+ dc->desc = "Virtual NVMe subsystem";
+
+ device_class_set_props(dc, nvme_subsystem_props);
+}
+
+static const TypeInfo nvme_subsys_info = {
+ .name = TYPE_NVME_SUBSYS,
+ .parent = TYPE_DEVICE,
+ .class_init = nvme_subsys_class_init,
+ .instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+ type_register_static(&nvme_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
new file mode 100644
index 0000000..fb66ae7
--- /dev/null
+++ b/hw/block/nvme-subsys.h
@@ -0,0 +1,60 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2. Refer COPYING.
+ */
+
+#ifndef NVME_SUBSYS_H
+#define NVME_SUBSYS_H
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+#define NVME_SUBSYS_MAX_CTRLS 32
+#define NVME_SUBSYS_MAX_NAMESPACES 256
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+typedef struct NvmeSubsystem {
+ DeviceState parent_obj;
+ uint8_t subnqn[256];
+
+ NvmeCtrl *ctrls[NVME_SUBSYS_MAX_CTRLS];
+ /* Allocated namespaces for this subsystem */
+ NvmeNamespace *namespaces[NVME_SUBSYS_MAX_NAMESPACES + 1];
+
+ struct {
+ char *nqn;
+ } params;
+} NvmeSubsystem;
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp);
+
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+ uint32_t cntlid)
+{
+ if (!subsys) {
+ return NULL;
+ }
+
+ return subsys->ctrls[cntlid];
+}
+
+/*
+ * Return allocated namespace of the specified nsid in the subsystem.
+ */
+static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
+ uint32_t nsid)
+{
+ if (!subsys) {
+ return NULL;
+ }
+
+ return subsys->namespaces[nsid];
+}
+
+#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index fb83636..d439e44 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -17,14 +17,17 @@
/**
* Usage: add options:
* -drive file=<file>,if=none,id=<drive_id>
+ * -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id>
* -device nvme,serial=<serial>,id=<bus_name>, \
* cmb_size_mb=<cmb_size_mb[optional]>, \
* [pmrdev=<mem_backend_file_id>,] \
* max_ioqpairs=<N[optional]>, \
- * aerl=<N[optional]>, aer_max_queued=<N[optional]>, \
- * mdts=<N[optional]>,zoned.append_size_limit=<N[optional]> \
+ * aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
+ * mdts=<N[optional]>,zoned.zasl=<N[optional]>, \
+ * subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
- * zoned=<true|false[optional]>
+ * zoned=<true|false[optional]>, \
+ * subsys=<subsys_id>,detached=<true|false[optional]>
*
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
@@ -38,9 +41,27 @@
*
* The PMR will use BAR 4/5 exclusively.
*
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
+ *
+ * nvme subsystem device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * - `nqn`
+ * This parameter provides the `<nqn_id>` part of the string
+ * `nqn.2019-08.org.qemu:<nqn_id>` which will be reported in the SUBNQN field
+ * of subsystem controllers. Note that `<nqn_id>` should be unique per
+ * subsystem, but this is not enforced by QEMU. If not specified, it will
+ * default to the value of the `id` parameter (`<subsys_id>`).
*
* nvme device parameters
* ~~~~~~~~~~~~~~~~~~~~~~
+ * - `subsys`
+ * Specifying this parameter attaches the controller to the subsystem and
+ * the SUBNQN field in the controller will report the NQN of the subsystem
+ * device. This also enables multi controller capability represented in
+ * Identify Controller data structure in CMIC (Controller Multi-path I/O and
+ * Namesapce Sharing Capabilities).
+ *
* - `aerl`
* The Asynchronous Event Request Limit (AERL). Indicates the maximum number
* of concurrently outstanding Asynchronous Event Request commands support
@@ -51,13 +72,31 @@
* completion when there are no outstanding AERs. When the maximum number of
* enqueued events are reached, subsequent events will be dropped.
*
- * - `zoned.append_size_limit`
- * The maximum I/O size in bytes that is allowed in Zone Append command.
- * The default is 128KiB. Since internally this this value is maintained as
- * ZASL = log2(<maximum append size> / <page size>), some values assigned
- * to this property may be rounded down and result in a lower maximum ZA
- * data size being in effect. By setting this property to 0, users can make
- * ZASL to be equal to MDTS. This property only affects zoned namespaces.
+ * - `mdts`
+ * Indicates the maximum data transfer size for a command that transfers data
+ * between host-accessible memory and the controller. The value is specified
+ * as a power of two (2^n) and is in units of the minimum memory page size
+ * (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB).
+ *
+ * - `zoned.zasl`
+ * Indicates the maximum data transfer size for the Zone Append command. Like
+ * `mdts`, the value is specified as a power of two (2^n) and is in units of
+ * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
+ * defaulting to the value of `mdts`).
+ *
+ * nvme namespace device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * - `subsys`
+ * If given, the namespace will be attached to all controllers in the
+ * subsystem. Otherwise, `bus` must be given to attach this namespace to a
+ * specific controller as a non-shared namespace.
+ *
+ * - `detached`
+ * This parameter is only valid together with the `subsys` parameter. If left
+ * at the default value (`false/off`), the namespace will be attached to all
+ * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
+ * namespace will be be available in the subsystem not not attached to any
+ * controllers.
*
* Setting `zoned` to true selects Zoned Command Set at the namespace.
* In this case, the following namespace properties are available to configure
@@ -157,6 +196,7 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
+ [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
};
static const uint32_t nvme_cse_iocs_none[256];
@@ -167,6 +207,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
};
@@ -176,6 +217,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -407,15 +449,31 @@ static void nvme_req_clear(NvmeRequest *req)
req->status = NVME_SUCCESS;
}
-static void nvme_req_exit(NvmeRequest *req)
+static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
{
- if (req->qsg.sg) {
- qemu_sglist_destroy(&req->qsg);
+ if (dma) {
+ pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+ sg->flags = NVME_SG_DMA;
+ } else {
+ qemu_iovec_init(&sg->iov, 0);
}
- if (req->iov.iov) {
- qemu_iovec_destroy(&req->iov);
+ sg->flags |= NVME_SG_ALLOC;
+}
+
+static inline void nvme_sg_unmap(NvmeSg *sg)
+{
+ if (!(sg->flags & NVME_SG_ALLOC)) {
+ return;
}
+
+ if (sg->flags & NVME_SG_DMA) {
+ qemu_sglist_destroy(&sg->qsg);
+ } else {
+ qemu_iovec_destroy(&sg->iov);
+ }
+
+ memset(sg, 0x0, sizeof(*sg));
}
static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
@@ -452,8 +510,7 @@ static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
return NVME_SUCCESS;
}
-static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- hwaddr addr, size_t len)
+static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
{
bool cmb = false, pmr = false;
@@ -470,40 +527,33 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
}
if (cmb || pmr) {
- if (qsg && qsg->sg) {
+ if (sg->flags & NVME_SG_DMA) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(iov);
-
- if (!iov->iov) {
- qemu_iovec_init(iov, 1);
- }
-
if (cmb) {
- return nvme_map_addr_cmb(n, iov, addr, len);
+ return nvme_map_addr_cmb(n, &sg->iov, addr, len);
} else {
- return nvme_map_addr_pmr(n, iov, addr, len);
+ return nvme_map_addr_pmr(n, &sg->iov, addr, len);
}
}
- if (iov && iov->iov) {
+ if (!(sg->flags & NVME_SG_DMA)) {
return NVME_INVALID_USE_OF_CMB | NVME_DNR;
}
- assert(qsg);
-
- if (!qsg->sg) {
- pci_dma_sglist_init(qsg, &n->parent_obj, 1);
- }
-
- qemu_sglist_add(qsg, addr, len);
+ qemu_sglist_add(&sg->qsg, addr, len);
return NVME_SUCCESS;
}
-static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
- uint32_t len, NvmeRequest *req)
+static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
+{
+ return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
+}
+
+static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
+ uint64_t prp2, uint32_t len)
{
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
trans_len = MIN(len, trans_len);
@@ -511,20 +561,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
uint16_t status;
int ret;
- QEMUSGList *qsg = &req->qsg;
- QEMUIOVector *iov = &req->iov;
-
trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
- if (nvme_addr_is_cmb(n, prp1) || (nvme_addr_is_pmr(n, prp1))) {
- qemu_iovec_init(iov, num_prps);
- } else {
- pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
- }
+ nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
- status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
+ status = nvme_map_addr(n, sg, prp1, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -539,7 +582,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp2);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
while (len != 0) {
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
@@ -547,7 +591,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
if (i == n->max_prp_ents - 1 && len > n->page_size) {
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
i = 0;
@@ -557,20 +602,22 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
prp_trans);
if (ret) {
trace_pci_nvme_err_addr_read(prp_ent);
- return NVME_DATA_TRAS_ERROR;
+ status = NVME_DATA_TRAS_ERROR;
+ goto unmap;
}
prp_ent = le64_to_cpu(prp_list[i]);
}
if (unlikely(prp_ent & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
trans_len = MIN(len, n->page_size);
- status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
+ status = nvme_map_addr(n, sg, prp_ent, trans_len);
if (status) {
- return status;
+ goto unmap;
}
len -= trans_len;
@@ -579,26 +626,30 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
} else {
if (unlikely(prp2 & (n->page_size - 1))) {
trace_pci_nvme_err_invalid_prp2_align(prp2);
- return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+ goto unmap;
}
- status = nvme_map_addr(n, qsg, iov, prp2, len);
+ status = nvme_map_addr(n, sg, prp2, len);
if (status) {
- return status;
+ goto unmap;
}
}
}
return NVME_SUCCESS;
+
+unmap:
+ nvme_sg_unmap(sg);
+ return status;
}
/*
* Map 'nsgld' data descriptors from 'segment'. The function will subtract the
* number of bytes mapped in len.
*/
-static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
- QEMUIOVector *iov,
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
NvmeSglDescriptor *segment, uint64_t nsgld,
- size_t *len, NvmeRequest *req)
+ size_t *len, NvmeCmd *cmd)
{
dma_addr_t addr, trans_len;
uint32_t dlen;
@@ -609,7 +660,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
switch (type) {
case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
- if (req->cmd.opcode == NVME_CMD_WRITE) {
+ if (cmd->opcode == NVME_CMD_WRITE) {
continue;
}
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
@@ -638,7 +689,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
break;
}
- trace_pci_nvme_err_invalid_sgl_excess_length(nvme_cid(req));
+ trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}
@@ -654,7 +705,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}
- status = nvme_map_addr(n, qsg, iov, addr, trans_len);
+ status = nvme_map_addr(n, sg, addr, trans_len);
if (status) {
return status;
}
@@ -666,9 +717,8 @@ next:
return NVME_SUCCESS;
}
-static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
- NvmeSglDescriptor sgl, size_t len,
- NvmeRequest *req)
+static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
+ size_t len, NvmeCmd *cmd)
{
/*
* Read the segment in chunks of 256 descriptors (one 4k page) to avoid
@@ -689,14 +739,16 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
sgld = &sgl;
addr = le64_to_cpu(sgl.addr);
- trace_pci_nvme_map_sgl(nvme_cid(req), NVME_SGL_TYPE(sgl.type), len);
+ trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
+
+ nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
/*
* If the entire transfer can be described with a single data block it can
* be mapped directly.
*/
if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
- status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
if (status) {
goto unmap;
}
@@ -734,8 +786,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
goto unmap;
}
- status = nvme_map_sgl_data(n, qsg, iov, segment, SEG_CHUNK_SIZE,
- &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
+ &len, cmd);
if (status) {
goto unmap;
}
@@ -761,7 +813,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
switch (NVME_SGL_TYPE(last_sgld->type)) {
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
if (status) {
goto unmap;
}
@@ -788,7 +840,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
* Do not map the last descriptor; it will be a Segment or Last Segment
* descriptor and is handled by the next iteration.
*/
- status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld - 1, &len, req);
+ status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
if (status) {
goto unmap;
}
@@ -804,83 +856,120 @@ out:
return NVME_SUCCESS;
unmap:
- if (iov->iov) {
- qemu_iovec_destroy(iov);
- }
-
- if (qsg->sg) {
- qemu_sglist_destroy(qsg);
- }
-
+ nvme_sg_unmap(sg);
return status;
}
-static uint16_t nvme_map_dptr(NvmeCtrl *n, size_t len, NvmeRequest *req)
+static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+ NvmeCmd *cmd)
{
uint64_t prp1, prp2;
- switch (NVME_CMD_FLAGS_PSDT(req->cmd.flags)) {
+ switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
case NVME_PSDT_PRP:
- prp1 = le64_to_cpu(req->cmd.dptr.prp1);
- prp2 = le64_to_cpu(req->cmd.dptr.prp2);
+ prp1 = le64_to_cpu(cmd->dptr.prp1);
+ prp2 = le64_to_cpu(cmd->dptr.prp2);
- return nvme_map_prp(n, prp1, prp2, len, req);
+ return nvme_map_prp(n, sg, prp1, prp2, len);
case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
case NVME_PSDT_SGL_MPTR_SGL:
- /* SGLs shall not be used for Admin commands in NVMe over PCIe */
- if (!req->sq->sqid) {
- return NVME_INVALID_FIELD | NVME_DNR;
- }
-
- return nvme_map_sgl(n, &req->qsg, &req->iov, req->cmd.dptr.sgl, len,
- req);
+ return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
default:
return NVME_INVALID_FIELD;
}
}
-static uint16_t nvme_dma(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- DMADirection dir, NvmeRequest *req)
-{
- uint16_t status = NVME_SUCCESS;
+typedef enum NvmeTxDirection {
+ NVME_TX_DIRECTION_TO_DEVICE = 0,
+ NVME_TX_DIRECTION_FROM_DEVICE = 1,
+} NvmeTxDirection;
- status = nvme_map_dptr(n, len, req);
- if (status) {
- return status;
- }
-
- /* assert that only one of qsg and iov carries data */
- assert((req->qsg.nsg > 0) != (req->iov.niov > 0));
+static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len,
+ NvmeTxDirection dir)
+{
+ assert(sg->flags & NVME_SG_ALLOC);
- if (req->qsg.nsg > 0) {
+ if (sg->flags & NVME_SG_DMA) {
uint64_t residual;
- if (dir == DMA_DIRECTION_TO_DEVICE) {
- residual = dma_buf_write(ptr, len, &req->qsg);
+ if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+ residual = dma_buf_write(ptr, len, &sg->qsg);
} else {
- residual = dma_buf_read(ptr, len, &req->qsg);
+ residual = dma_buf_read(ptr, len, &sg->qsg);
}
if (unlikely(residual)) {
trace_pci_nvme_err_invalid_dma();
- status = NVME_INVALID_FIELD | NVME_DNR;
+ return NVME_INVALID_FIELD | NVME_DNR;
}
} else {
size_t bytes;
- if (dir == DMA_DIRECTION_TO_DEVICE) {
- bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
+ if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+ bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
} else {
- bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
+ bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
}
if (unlikely(bytes != len)) {
trace_pci_nvme_err_invalid_dma();
- status = NVME_INVALID_FIELD | NVME_DNR;
+ return NVME_INVALID_FIELD | NVME_DNR;
}
}
- return status;
+ return NVME_SUCCESS;
+}
+
+static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeRequest *req)
+{
+ uint16_t status;
+
+ status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
+}
+
+static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+ NvmeRequest *req)
+{
+ uint16_t status;
+
+ status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+ if (status) {
+ return status;
+ }
+
+ return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
+}
+
+static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ assert(req->sg.flags & NVME_SG_ALLOC);
+
+ if (req->sg.flags & NVME_SG_DMA) {
+ req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
+ }
+}
+
+static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
+ BlockCompletionFunc *cb, NvmeRequest *req)
+{
+ assert(req->sg.flags & NVME_SG_ALLOC);
+
+ if (req->sg.flags & NVME_SG_DMA) {
+ req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+ cb, req);
+ } else {
+ req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
+ }
}
static void nvme_post_cqes(void *opaque)
@@ -913,7 +1002,7 @@ static void nvme_post_cqes(void *opaque)
}
QTAILQ_REMOVE(&cq->req_list, req, entry);
nvme_inc_cq_tail(cq);
- nvme_req_exit(req);
+ nvme_sg_unmap(&req->sg);
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
@@ -1048,6 +1137,7 @@ static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
uint8_t mdts = n->params.mdts;
if (mdts && len > n->page_size << mdts) {
+ trace_pci_nvme_err_mdts(len);
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -1129,7 +1219,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret)
break;
}
- trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
error_setg_errno(&local_err, -ret, "aio failed");
error_report_err(local_err);
@@ -1185,9 +1275,8 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
return NVME_INTERNAL_DEV_ERROR;
}
-static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
- NvmeZone *zone, uint64_t slba,
- uint32_t nlb)
+static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
+ uint64_t slba, uint32_t nlb)
{
uint64_t zcap = nvme_zone_wr_boundary(zone);
uint16_t status;
@@ -1212,8 +1301,6 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
{
- uint16_t status;
-
switch (nvme_get_zone_state(zone)) {
case NVME_ZONE_STATE_EMPTY:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
@@ -1221,16 +1308,15 @@ static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
case NVME_ZONE_STATE_FULL:
case NVME_ZONE_STATE_CLOSED:
case NVME_ZONE_STATE_READ_ONLY:
- status = NVME_SUCCESS;
- break;
+ return NVME_SUCCESS;
case NVME_ZONE_STATE_OFFLINE:
- status = NVME_ZONE_OFFLINE;
- break;
+ trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
+ return NVME_ZONE_OFFLINE;
default:
assert(false);
}
- return status;
+ return NVME_INTERNAL_DEV_ERROR;
}
static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
@@ -1265,7 +1351,45 @@ static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
return status;
}
-static void nvme_auto_transition_zone(NvmeNamespace *ns)
+static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
+{
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_FULL:
+ return NVME_SUCCESS;
+
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fallthrough */
+ case NVME_ZONE_STATE_EMPTY:
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
+ return NVME_SUCCESS;
+
+ default:
+ return NVME_ZONE_INVAL_TRANSITION;
+ }
+}
+
+static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
+{
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+ /* fall through */
+ case NVME_ZONE_STATE_CLOSED:
+ return NVME_SUCCESS;
+
+ default:
+ return NVME_ZONE_INVAL_TRANSITION;
+ }
+}
+
+static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
{
NvmeZone *zone;
@@ -1277,85 +1401,92 @@ static void nvme_auto_transition_zone(NvmeNamespace *ns)
* Automatically close this implicitly open zone.
*/
QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
- nvme_aor_dec_open(ns);
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+ nvme_zrm_close(ns, zone);
}
}
}
-static uint16_t nvme_auto_open_zone(NvmeNamespace *ns, NvmeZone *zone)
+static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone,
+ bool implicit)
{
- uint16_t status = NVME_SUCCESS;
- uint8_t zs = nvme_get_zone_state(zone);
+ int act = 0;
+ uint16_t status;
- if (zs == NVME_ZONE_STATE_EMPTY) {
- nvme_auto_transition_zone(ns);
- status = nvme_aor_check(ns, 1, 1);
- } else if (zs == NVME_ZONE_STATE_CLOSED) {
- nvme_auto_transition_zone(ns);
- status = nvme_aor_check(ns, 0, 1);
- }
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EMPTY:
+ act = 1;
- return status;
+ /* fallthrough */
+
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_zrm_auto_transition_zone(ns);
+ status = nvme_aor_check(ns, act, 1);
+ if (status) {
+ return status;
+ }
+
+ if (act) {
+ nvme_aor_inc_active(ns);
+ }
+
+ nvme_aor_inc_open(ns);
+
+ if (implicit) {
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
+ return NVME_SUCCESS;
+ }
+
+ /* fallthrough */
+
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ if (implicit) {
+ return NVME_SUCCESS;
+ }
+
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
+
+ /* fallthrough */
+
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ return NVME_SUCCESS;
+
+ default:
+ return NVME_ZONE_INVAL_TRANSITION;
+ }
}
-static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
- bool failed)
+static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone)
{
- NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
- NvmeZone *zone;
- NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
- uint64_t slba;
- uint32_t nlb;
+ return __nvme_zrm_open(ns, zone, true);
+}
- slba = le64_to_cpu(rw->slba);
- nlb = le16_to_cpu(rw->nlb) + 1;
- zone = nvme_get_zone_by_slba(ns, slba);
+static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone)
+{
+ return __nvme_zrm_open(ns, zone, false);
+}
+static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
+ uint32_t nlb)
+{
zone->d.wp += nlb;
- if (failed) {
- res->slba = 0;
- }
-
if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
- switch (nvme_get_zone_state(zone)) {
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_EMPTY:
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- break;
- default:
- assert(false);
- }
+ nvme_zrm_finish(ns, zone);
}
}
-static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
- uint32_t nlb)
+static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
{
- uint8_t zs;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ NvmeZone *zone;
+ uint64_t slba;
+ uint32_t nlb;
- zone->w_ptr += nlb;
+ slba = le64_to_cpu(rw->slba);
+ nlb = le16_to_cpu(rw->nlb) + 1;
+ zone = nvme_get_zone_by_slba(ns, slba);
- if (zone->w_ptr < nvme_zone_wr_boundary(zone)) {
- zs = nvme_get_zone_state(zone);
- switch (zs) {
- case NVME_ZONE_STATE_EMPTY:
- nvme_aor_inc_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_inc_open(ns);
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
- }
- }
+ __nvme_advance_zone_wp(ns, zone, nlb);
}
static inline bool nvme_is_write(NvmeRequest *req)
@@ -1379,9 +1510,37 @@ static void nvme_rw_cb(void *opaque, int ret)
trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
if (ns->params.zoned && nvme_is_write(req)) {
- nvme_finalize_zoned_write(ns, req, ret != 0);
+ nvme_finalize_zoned_write(ns, req);
+ }
+
+ if (!ret) {
+ block_acct_done(stats, acct);
+ } else {
+ block_acct_failed(stats, acct);
+ nvme_aio_err(req, ret);
}
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+struct nvme_aio_flush_ctx {
+ NvmeRequest *req;
+ NvmeNamespace *ns;
+ BlockAcctCookie acct;
+};
+
+static void nvme_aio_flush_cb(void *opaque, int ret)
+{
+ struct nvme_aio_flush_ctx *ctx = opaque;
+ NvmeRequest *req = ctx->req;
+ uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
+
+ BlockBackend *blk = ctx->ns->blkconf.blk;
+ BlockAcctCookie *acct = &ctx->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
+ trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
+
if (!ret) {
block_acct_done(stats, acct);
} else {
@@ -1389,6 +1548,13 @@ static void nvme_rw_cb(void *opaque, int ret)
nvme_aio_err(req, ret);
}
+ (*num_flushes)--;
+ g_free(ctx);
+
+ if (*num_flushes) {
+ return;
+ }
+
nvme_enqueue_req_completion(nvme_cq(req), req);
}
@@ -1459,10 +1625,139 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_copy_ctx {
+ int copies;
+ uint8_t *bounce;
+ uint32_t nlb;
+};
+
+struct nvme_copy_in_ctx {
+ NvmeRequest *req;
+ QEMUIOVector iov;
+};
+
+static void nvme_copy_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
+ struct nvme_copy_ctx *ctx = req->opaque;
+
+ trace_pci_nvme_copy_cb(nvme_cid(req));
+
+ if (ns->params.zoned) {
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
+ NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
+
+ __nvme_advance_zone_wp(ns, zone, ctx->nlb);
+ }
+
+ if (!ret) {
+ block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ } else {
+ block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+ nvme_aio_err(req, ret);
+ }
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_copy_in_complete(NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ struct nvme_copy_ctx *ctx = req->opaque;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
+ uint16_t status;
+
+ trace_pci_nvme_copy_in_complete(nvme_cid(req));
+
+ block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+
+ status = nvme_check_bounds(ns, sdlba, ctx->nlb);
+ if (status) {
+ trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
+ goto invalid;
+ }
+
+ if (ns->params.zoned) {
+ NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
+
+ status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb);
+ if (status) {
+ goto invalid;
+ }
+
+ status = nvme_zrm_auto(ns, zone);
+ if (status) {
+ goto invalid;
+ }
+
+ zone->w_ptr += ctx->nlb;
+ }
+
+ qemu_iovec_init(&req->sg.iov, 1);
+ qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_WRITE);
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
+ &req->sg.iov, 0, nvme_copy_cb, req);
+
+ return;
+
+invalid:
+ req->status = status;
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_aio_copy_in_cb(void *opaque, int ret)
+{
+ struct nvme_copy_in_ctx *in_ctx = opaque;
+ NvmeRequest *req = in_ctx->req;
+ NvmeNamespace *ns = req->ns;
+ struct nvme_copy_ctx *ctx = req->opaque;
+
+ qemu_iovec_destroy(&in_ctx->iov);
+ g_free(in_ctx);
+
+ trace_pci_nvme_aio_copy_in_cb(nvme_cid(req));
+
+ if (ret) {
+ nvme_aio_err(req, ret);
+ }
+
+ ctx->copies--;
+
+ if (ctx->copies) {
+ return;
+ }
+
+ if (req->status) {
+ block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+
+ return;
+ }
+
+ nvme_copy_in_complete(req);
+}
+
struct nvme_compare_ctx {
QEMUIOVector iov;
uint8_t *bounce;
- size_t len;
};
static void nvme_compare_cb(void *opaque, int ret)
@@ -1483,16 +1778,15 @@ static void nvme_compare_cb(void *opaque, int ret)
goto out;
}
- buf = g_malloc(ctx->len);
+ buf = g_malloc(ctx->iov.size);
- status = nvme_dma(nvme_ctrl(req), buf, ctx->len, DMA_DIRECTION_TO_DEVICE,
- req);
+ status = nvme_h2c(nvme_ctrl(req), buf, ctx->iov.size, req);
if (status) {
req->status = status;
goto out;
}
- if (memcmp(buf, ctx->bounce, ctx->len)) {
+ if (memcmp(buf, ctx->bounce, ctx->iov.size)) {
req->status = NVME_CMP_FAILURE;
}
@@ -1522,8 +1816,7 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
NvmeDsmRange range[nr];
uintptr_t *discards = (uintptr_t *)&req->opaque;
- status = nvme_dma(n, (uint8_t *)range, sizeof(range),
- DMA_DIRECTION_TO_DEVICE, req);
+ status = nvme_h2c(n, (uint8_t *)range, sizeof(range), req);
if (status) {
return status;
}
@@ -1548,6 +1841,10 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba,
nlb);
+ if (nlb > n->dmrsl) {
+ trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
+ }
+
offset = nvme_l2b(ns, slba);
len = nvme_l2b(ns, nlb);
@@ -1577,6 +1874,121 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ g_autofree NvmeCopySourceRange *range = NULL;
+
+ uint16_t nr = copy->nr + 1;
+ uint8_t format = copy->control[0] & 0xf;
+ uint32_t nlb = 0;
+
+ uint8_t *bounce = NULL, *bouncep = NULL;
+ struct nvme_copy_ctx *ctx;
+ uint16_t status;
+ int i;
+
+ trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+
+ if (!(n->id_ctrl.ocfs & (1 << format))) {
+ trace_pci_nvme_err_copy_invalid_format(format);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (nr > ns->id_ns.msrc + 1) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ range = g_new(NvmeCopySourceRange, nr);
+
+ status = nvme_h2c(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange),
+ req);
+ if (status) {
+ return status;
+ }
+
+ for (i = 0; i < nr; i++) {
+ uint64_t slba = le64_to_cpu(range[i].slba);
+ uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1;
+
+ if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ status = nvme_check_bounds(ns, slba, _nlb);
+ if (status) {
+ trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
+ return status;
+ }
+
+ if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+ status = nvme_check_dulbe(ns, slba, _nlb);
+ if (status) {
+ return status;
+ }
+ }
+
+ if (ns->params.zoned) {
+ status = nvme_check_zone_read(ns, slba, _nlb);
+ if (status) {
+ return status;
+ }
+ }
+
+ nlb += _nlb;
+ }
+
+ if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_READ);
+
+ ctx = g_new(struct nvme_copy_ctx, 1);
+
+ ctx->bounce = bounce;
+ ctx->nlb = nlb;
+ ctx->copies = 1;
+
+ req->opaque = ctx;
+
+ for (i = 0; i < nr; i++) {
+ uint64_t slba = le64_to_cpu(range[i].slba);
+ uint32_t nlb = le16_to_cpu(range[i].nlb) + 1;
+
+ size_t len = nvme_l2b(ns, nlb);
+ int64_t offset = nvme_l2b(ns, slba);
+
+ trace_pci_nvme_copy_source_range(slba, nlb);
+
+ struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1);
+ in_ctx->req = req;
+
+ qemu_iovec_init(&in_ctx->iov, 1);
+ qemu_iovec_add(&in_ctx->iov, bouncep, len);
+
+ ctx->copies++;
+
+ blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
+ nvme_aio_copy_in_cb, in_ctx);
+
+ bouncep += len;
+ }
+
+ /* account for the 1-initialization */
+ ctx->copies--;
+
+ if (!ctx->copies) {
+ nvme_copy_in_complete(req);
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
@@ -1594,7 +2006,6 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
status = nvme_check_mdts(n, len);
if (status) {
- trace_pci_nvme_err_mdts(nvme_cid(req), len);
return status;
}
@@ -1615,7 +2026,6 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
ctx = g_new(struct nvme_compare_ctx, 1);
ctx->bounce = bounce;
- ctx->len = len;
req->opaque = ctx;
@@ -1630,10 +2040,56 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
- block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
- return NVME_NO_COMPLETE;
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
+ uint16_t status;
+ struct nvme_aio_flush_ctx *ctx;
+ NvmeNamespace *ns;
+
+ trace_pci_nvme_flush(nvme_cid(req), nsid);
+
+ if (nsid != NVME_NSID_BROADCAST) {
+ req->ns = nvme_ns(n, nsid);
+ if (unlikely(!req->ns)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);
+ return NVME_NO_COMPLETE;
+ }
+
+ /* 1-initialize; see comment in nvme_dsm */
+ *num_flushes = 1;
+
+ for (int i = 1; i <= n->num_namespaces; i++) {
+ ns = nvme_ns(n, i);
+ if (!ns) {
+ continue;
+ }
+
+ ctx = g_new(struct nvme_aio_flush_ctx, 1);
+ ctx->req = req;
+ ctx->ns = ns;
+
+ (*num_flushes)++;
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
+ }
+
+ /* account for the 1-initialization */
+ (*num_flushes)--;
+
+ if (*num_flushes) {
+ status = NVME_NO_COMPLETE;
+ } else {
+ status = req->status;
+ }
+
+ return status;
}
static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
@@ -1651,7 +2107,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
status = nvme_check_mdts(n, data_size);
if (status) {
- trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
goto invalid;
}
@@ -1669,7 +2124,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
}
}
- status = nvme_map_dptr(n, data_size, req);
+ status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd);
if (status) {
goto invalid;
}
@@ -1685,13 +2140,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_READ);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_read(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_preadv(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
return NVME_NO_COMPLETE;
invalid:
@@ -1719,7 +2168,6 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
if (!wrz) {
status = nvme_check_mdts(n, data_size);
if (status) {
- trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
goto invalid;
}
}
@@ -1740,48 +2188,40 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
goto invalid;
}
- if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
- trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
- status = NVME_INVALID_FIELD;
- goto invalid;
+ if (n->params.zasl && data_size > n->page_size << n->params.zasl) {
+ trace_pci_nvme_err_zasl(data_size);
+ return NVME_INVALID_FIELD | NVME_DNR;
}
slba = zone->w_ptr;
res->slba = cpu_to_le64(slba);
}
- status = nvme_check_zone_write(n, ns, zone, slba, nlb);
+ status = nvme_check_zone_write(ns, zone, slba, nlb);
if (status) {
goto invalid;
}
- status = nvme_auto_open_zone(ns, zone);
+ status = nvme_zrm_auto(ns, zone);
if (status) {
goto invalid;
}
- nvme_advance_zone_wp(ns, zone, nlb);
+ zone->w_ptr += nlb;
}
data_offset = nvme_l2b(ns, slba);
if (!wrz) {
- status = nvme_map_dptr(n, data_size, req);
+ status = nvme_map_dptr(n, &req->sg, data_size, &req->cmd);
if (status) {
goto invalid;
}
block_acct_start(blk_get_stats(blk), &req->acct, data_size,
BLOCK_ACCT_WRITE);
- if (req->qsg.sg) {
- req->aiocb = dma_blk_write(blk, &req->qsg, data_offset,
- BDRV_SECTOR_SIZE, nvme_rw_cb, req);
- } else {
- req->aiocb = blk_aio_pwritev(blk, data_offset, &req->iov, 0,
- nvme_rw_cb, req);
- }
+ nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
} else {
- block_acct_start(blk_get_stats(blk), &req->acct, 0, BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
req);
@@ -1846,73 +2286,19 @@ enum NvmeZoneProcessingMask {
static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
- uint16_t status;
-
- switch (state) {
- case NVME_ZONE_STATE_EMPTY:
- status = nvme_aor_check(ns, 1, 0);
- if (status) {
- return status;
- }
- nvme_aor_inc_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- status = nvme_aor_check(ns, 0, 1);
- if (status) {
- if (state == NVME_ZONE_STATE_EMPTY) {
- nvme_aor_dec_active(ns);
- }
- return status;
- }
- nvme_aor_inc_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
- /* fall through */
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- return NVME_SUCCESS;
- default:
- return NVME_ZONE_INVAL_TRANSITION;
- }
+ return nvme_zrm_open(ns, zone);
}
static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
- switch (state) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- return NVME_SUCCESS;
- default:
- return NVME_ZONE_INVAL_TRANSITION;
- }
+ return nvme_zrm_close(ns, zone);
}
static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
- switch (state) {
- case NVME_ZONE_STATE_EXPLICITLY_OPEN:
- case NVME_ZONE_STATE_IMPLICITLY_OPEN:
- nvme_aor_dec_open(ns);
- /* fall through */
- case NVME_ZONE_STATE_CLOSED:
- nvme_aor_dec_active(ns);
- /* fall through */
- case NVME_ZONE_STATE_EMPTY:
- zone->w_ptr = nvme_zone_wr_boundary(zone);
- zone->d.wp = zone->w_ptr;
- nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
- /* fall through */
- case NVME_ZONE_STATE_FULL:
- return NVME_SUCCESS;
- default:
- return NVME_ZONE_INVAL_TRANSITION;
- }
+ return nvme_zrm_finish(ns, zone);
}
static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
@@ -2168,8 +2554,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}
zd_ext = nvme_get_zd_extension(ns, zone_idx);
- status = nvme_dma(n, zd_ext, ns->params.zd_extension_size,
- DMA_DIRECTION_TO_DEVICE, req);
+ status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
if (status) {
trace_pci_nvme_err_zd_extension_map_error(zone_idx);
return status;
@@ -2267,7 +2652,6 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
status = nvme_check_mdts(n, data_size);
if (status) {
- trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
return status;
}
@@ -2324,8 +2708,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
}
}
- status = nvme_dma(n, (uint8_t *)buf, data_size,
- DMA_DIRECTION_FROM_DEVICE, req);
+ status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
g_free(buf);
@@ -2343,6 +2726,29 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_NSID | NVME_DNR;
}
+ /*
+ * In the base NVM command set, Flush may apply to all namespaces
+ * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used
+ * along with TP 4056 (Namespace Types), it may be pretty screwed up.
+ *
+ * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the
+ * opcode with a specific command since we cannot determine a unique I/O
+ * command set. Opcode 0x0 could have any other meaning than something
+ * equivalent to flushing and say it DOES have completely different
+ * semantics in some other command set - does an NSID of 0xFFFFFFFF then
+ * mean "for all namespaces, apply whatever command set specific command
+ * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply
+ * whatever command that uses the 0x0 opcode if, and only if, it allows
+ * NSID to be 0xFFFFFFFF"?
+ *
+ * Anyway (and luckily), for now, we do not care about this since the
+ * device only supports namespace types that includes the NVM Flush command
+ * (NVM and Zoned), so always do an NVM Flush.
+ */
+ if (req->cmd.opcode == NVME_CMD_FLUSH) {
+ return nvme_flush(n, req);
+ }
+
req->ns = nvme_ns(n, nsid);
if (unlikely(!req->ns)) {
return NVME_INVALID_FIELD | NVME_DNR;
@@ -2354,8 +2760,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
}
switch (req->cmd.opcode) {
- case NVME_CMD_FLUSH:
- return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
case NVME_CMD_ZONE_APPEND:
@@ -2368,6 +2772,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_compare(n, req);
case NVME_CMD_DSM:
return nvme_dsm(n, req);
+ case NVME_CMD_COPY:
+ return nvme_copy(n, req);
case NVME_CMD_ZONE_MGMT_SEND:
return nvme_zone_mgmt_send(n, req);
case NVME_CMD_ZONE_MGMT_RECV:
@@ -2568,8 +2974,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
nvme_clear_events(n, NVME_AER_TYPE_SMART);
}
- return nvme_dma(n, (uint8_t *) &smart + off, trans_len,
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
}
static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
@@ -2587,8 +2992,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
trans_len = MIN(sizeof(fw_log) - off, buf_len);
- return nvme_dma(n, (uint8_t *) &fw_log + off, trans_len,
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
}
static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
@@ -2608,8 +3012,49 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
memset(&errlog, 0x0, sizeof(errlog));
trans_len = MIN(sizeof(errlog) - off, buf_len);
- return nvme_dma(n, (uint8_t *)&errlog, trans_len,
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
+}
+
+static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ uint32_t nslist[1024];
+ uint32_t trans_len;
+ int i = 0;
+ uint32_t nsid;
+
+ memset(nslist, 0x0, sizeof(nslist));
+ trans_len = MIN(sizeof(nslist) - off, buf_len);
+
+ while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
+ NVME_CHANGED_NSID_SIZE) {
+ /*
+ * If more than 1024 namespaces, the first entry in the log page should
+ * be set to 0xffffffff and the others to 0 as spec.
+ */
+ if (i == ARRAY_SIZE(nslist)) {
+ memset(nslist, 0x0, sizeof(nslist));
+ nslist[0] = 0xffffffff;
+ break;
+ }
+
+ nslist[i++] = nsid;
+ clear_bit(nsid, n->changed_nsids);
+ }
+
+ /*
+ * Remove all the remaining list entries in case returns directly due to
+ * more than 1024 namespaces.
+ */
+ if (nslist[0] == 0xffffffff) {
+ bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
+ }
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
+ }
+
+ return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
}
static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
@@ -2649,8 +3094,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
trans_len = MIN(sizeof(log) - off, buf_len);
- return nvme_dma(n, ((uint8_t *)&log) + off, trans_len,
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
}
static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
@@ -2686,7 +3130,6 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
status = nvme_check_mdts(n, len);
if (status) {
- trace_pci_nvme_err_mdts(nvme_cid(req), len);
return status;
}
@@ -2697,6 +3140,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
return nvme_smart_info(n, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, len, off, req);
+ case NVME_LOG_CHANGED_NSLIST:
+ return nvme_changed_nslist(n, rae, len, off, req);
case NVME_LOG_CMD_EFFECTS:
return nvme_cmd_effects(n, csi, len, off, req);
default:
@@ -2819,7 +3264,7 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
{
uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
- return nvme_dma(n, id, sizeof(id), DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, id, sizeof(id), req);
}
static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns)
@@ -2836,31 +3281,33 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_identify_ctrl();
- return nvme_dma(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
}
static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
- NvmeIdCtrlZoned id = {};
+ uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
trace_pci_nvme_identify_ctrl_csi(c->csi);
- if (c->csi == NVME_CSI_NVM) {
- return nvme_rpt_empty_id_struct(n, req);
- } else if (c->csi == NVME_CSI_ZONED) {
- if (n->params.zasl_bs) {
- id.zasl = n->zasl;
- }
- return nvme_dma(n, (uint8_t *)&id, sizeof(id),
- DMA_DIRECTION_FROM_DEVICE, req);
+ switch (c->csi) {
+ case NVME_CSI_NVM:
+ ((NvmeIdCtrlNvm *)&id)->dmrsl = cpu_to_le32(n->dmrsl);
+ break;
+
+ case NVME_CSI_ZONED:
+ ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
+ break;
+
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
}
- return NVME_INVALID_FIELD | NVME_DNR;
+ return nvme_c2h(n, id, sizeof(id), req);
}
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -2874,18 +3321,64 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
- return nvme_rpt_empty_id_struct(n, req);
+ if (!active) {
+ ns = nvme_subsys_ns(n->subsys, nsid);
+ if (!ns) {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
+ } else {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
}
if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
- return nvme_dma(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs),
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
}
return NVME_INVALID_CMD_SET | NVME_DNR;
}
-static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ uint16_t min_id = le16_to_cpu(c->ctrlid);
+ uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+ uint16_t *ids = &list[1];
+ NvmeNamespace *ns;
+ NvmeCtrl *ctrl;
+ int cntlid, nr_ids = 0;
+
+ trace_pci_nvme_identify_ns_attached_list(min_id);
+
+ if (c->nsid == NVME_NSID_BROADCAST) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ns = nvme_subsys_ns(n->subsys, c->nsid);
+ if (!ns) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
+ ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
+ if (!ctrl) {
+ continue;
+ }
+
+ if (!nvme_ns_is_attached(ctrl, ns)) {
+ continue;
+ }
+
+ ids[nr_ids++] = cntlid;
+ }
+
+ list[0] = nr_ids;
+
+ return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
+}
+
+static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -2899,20 +3392,28 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
- return nvme_rpt_empty_id_struct(n, req);
+ if (!active) {
+ ns = nvme_subsys_ns(n->subsys, nsid);
+ if (!ns) {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
+ } else {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
}
if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
return nvme_rpt_empty_id_struct(n, req);
} else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
- return nvme_dma(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
+ req);
}
return NVME_INVALID_FIELD | NVME_DNR;
}
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -2937,7 +3438,14 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
for (i = 1; i <= n->num_namespaces; i++) {
ns = nvme_ns(n, i);
if (!ns) {
- continue;
+ if (!active) {
+ ns = nvme_subsys_ns(n->subsys, i);
+ if (!ns) {
+ continue;
+ }
+ } else {
+ continue;
+ }
}
if (ns->params.nsid <= min_nsid) {
continue;
@@ -2948,10 +3456,11 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
}
}
- return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, list, data_len, req);
}
-static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
+ bool active)
{
NvmeNamespace *ns;
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -2977,7 +3486,14 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req)
for (i = 1; i <= n->num_namespaces; i++) {
ns = nvme_ns(n, i);
if (!ns) {
- continue;
+ if (!active) {
+ ns = nvme_subsys_ns(n->subsys, i);
+ if (!ns) {
+ continue;
+ }
+ } else {
+ continue;
+ }
}
if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
continue;
@@ -2988,7 +3504,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req)
}
}
- return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, list, data_len, req);
}
static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
@@ -3035,7 +3551,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
ns_descrs->csi.v = ns->csi;
- return nvme_dma(n, list, sizeof(list), DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, list, sizeof(list), req);
}
static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
@@ -3048,34 +3564,39 @@ static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
NVME_SET_CSI(*list, NVME_CSI_NVM);
NVME_SET_CSI(*list, NVME_CSI_ZONED);
- return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, list, data_len, req);
}
static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
- switch (le32_to_cpu(c->cns)) {
+ trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
+ c->csi);
+
+ switch (c->cns) {
case NVME_ID_CNS_NS:
- /* fall through */
+ return nvme_identify_ns(n, req, true);
case NVME_ID_CNS_NS_PRESENT:
- return nvme_identify_ns(n, req);
+ return nvme_identify_ns(n, req, false);
+ case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
+ return nvme_identify_ns_attached_list(n, req);
case NVME_ID_CNS_CS_NS:
- /* fall through */
+ return nvme_identify_ns_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
- return nvme_identify_ns_csi(n, req);
+ return nvme_identify_ns_csi(n, req, false);
case NVME_ID_CNS_CTRL:
return nvme_identify_ctrl(n, req);
case NVME_ID_CNS_CS_CTRL:
return nvme_identify_ctrl_csi(n, req);
case NVME_ID_CNS_NS_ACTIVE_LIST:
- /* fall through */
+ return nvme_identify_nslist(n, req, true);
case NVME_ID_CNS_NS_PRESENT_LIST:
- return nvme_identify_nslist(n, req);
+ return nvme_identify_nslist(n, req, false);
case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
- /* fall through */
+ return nvme_identify_nslist_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT_LIST:
- return nvme_identify_nslist_csi(n, req);
+ return nvme_identify_nslist_csi(n, req, false);
case NVME_ID_CNS_NS_DESCR_LIST:
return nvme_identify_ns_descr_list(n, req);
case NVME_ID_CNS_IO_COMMAND_SET:
@@ -3137,8 +3658,7 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
{
uint64_t timestamp = nvme_get_timestamp(n);
- return nvme_dma(n, (uint8_t *)&timestamp, sizeof(timestamp),
- DMA_DIRECTION_FROM_DEVICE, req);
+ return nvme_c2h(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
}
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
@@ -3299,8 +3819,7 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
uint16_t ret;
uint64_t timestamp;
- ret = nvme_dma(n, (uint8_t *)&timestamp, sizeof(timestamp),
- DMA_DIRECTION_TO_DEVICE, req);
+ ret = nvme_h2c(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
if (ret) {
return ret;
}
@@ -3472,6 +3991,71 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
+static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns);
+static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns;
+ NvmeCtrl *ctrl;
+ uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+ bool attach = !(dw10 & 0xf);
+ uint16_t *nr_ids = &list[0];
+ uint16_t *ids = &list[1];
+ uint16_t ret;
+ int i;
+
+ trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
+
+ ns = nvme_subsys_ns(n->subsys, nsid);
+ if (!ns) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
+ if (ret) {
+ return ret;
+ }
+
+ if (!*nr_ids) {
+ return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+ }
+
+ for (i = 0; i < *nr_ids; i++) {
+ ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
+ if (!ctrl) {
+ return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+ }
+
+ if (attach) {
+ if (nvme_ns_is_attached(ctrl, ns)) {
+ return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
+ }
+
+ nvme_ns_attach(ctrl, ns);
+ __nvme_select_ns_iocs(ctrl, ns);
+ } else {
+ if (!nvme_ns_is_attached(ctrl, ns)) {
+ return NVME_NS_NOT_ATTACHED | NVME_DNR;
+ }
+
+ nvme_ns_detach(ctrl, ns);
+ }
+
+ /*
+ * Add namespace id to the changed namespace id list for event clearing
+ * via Get Log Page command.
+ */
+ if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
+ nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
+ NVME_LOG_CHANGED_NSLIST);
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -3482,6 +4066,11 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_OPCODE | NVME_DNR;
}
+ /* SGLs shall not be used for Admin commands in NVMe over PCIe */
+ if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
switch (req->cmd.opcode) {
case NVME_ADM_CMD_DELETE_SQ:
return nvme_del_sq(n, req);
@@ -3503,6 +4092,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_get_feature(n, req);
case NVME_ADM_CMD_ASYNC_EV_REQ:
return nvme_aer(n, req);
+ case NVME_ADM_CMD_NS_ATTACHMENT:
+ return nvme_ns_attachment(n, req);
default:
assert(false);
}
@@ -3604,6 +4195,25 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
}
}
+static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ ns->iocs = nvme_cse_iocs_none;
+ switch (ns->csi) {
+ case NVME_CSI_NVM:
+ if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
+ ns->iocs = nvme_cse_iocs_nvm;
+ }
+ break;
+ case NVME_CSI_ZONED:
+ if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
+ ns->iocs = nvme_cse_iocs_zoned;
+ } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
+ ns->iocs = nvme_cse_iocs_nvm;
+ }
+ break;
+ }
+}
+
static void nvme_select_ns_iocs(NvmeCtrl *n)
{
NvmeNamespace *ns;
@@ -3614,21 +4224,8 @@ static void nvme_select_ns_iocs(NvmeCtrl *n)
if (!ns) {
continue;
}
- ns->iocs = nvme_cse_iocs_none;
- switch (ns->csi) {
- case NVME_CSI_NVM:
- if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
- case NVME_CSI_ZONED:
- if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
- ns->iocs = nvme_cse_iocs_zoned;
- } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
- }
+
+ __nvme_select_ns_iocs(n, ns);
}
}
@@ -3726,17 +4323,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
NVME_AQA_ASQS(n->bar.aqa) + 1);
- if (!n->params.zasl_bs) {
- n->zasl = n->params.mdts;
- } else {
- if (n->params.zasl_bs < n->page_size) {
- trace_pci_nvme_err_startfail_zasl_too_small(n->params.zasl_bs,
- n->page_size);
- return -1;
- }
- n->zasl = 31 - clz32(n->params.zasl_bs / n->page_size);
- }
-
nvme_set_timestamp(n, 0ULL);
QTAILQ_INIT(&n->aer_queue);
@@ -4245,11 +4831,10 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
host_memory_backend_set_mapped(n->pmr.dev, true);
}
- if (n->params.zasl_bs) {
- if (!is_power_of_2(n->params.zasl_bs)) {
- error_setg(errp, "zone append size limit has to be a power of 2");
- return;
- }
+ if (n->params.zasl > n->params.mdts) {
+ error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
+ "than or equal to mdts (Maximum Data Transfer Size)");
+ return;
}
}
@@ -4267,6 +4852,20 @@ static void nvme_init_state(NvmeCtrl *n)
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
}
+static int nvme_attach_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+ if (nvme_ns_is_attached(n, ns)) {
+ error_setg(errp,
+ "namespace %d is already attached to controller %d",
+ nvme_nsid(ns), n->cntlid);
+ return -1;
+ }
+
+ nvme_ns_attach(n, ns);
+
+ return 0;
+}
+
int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
{
uint32_t nsid = nvme_nsid(ns);
@@ -4298,7 +4897,26 @@ int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
trace_pci_nvme_register_namespace(nsid);
- n->namespaces[nsid - 1] = ns;
+ /*
+ * If subsys is not given, namespae is always attached to the controller
+ * because there's no subsystem to manage namespace allocation.
+ */
+ if (!n->subsys) {
+ if (ns->params.detached) {
+ error_setg(errp,
+ "detached needs nvme-subsys specified nvme or nvme-ns");
+ return -1;
+ }
+
+ return nvme_attach_namespace(n, ns, errp);
+ } else {
+ if (!ns->params.detached) {
+ return nvme_attach_namespace(n, ns, errp);
+ }
+ }
+
+ n->dmrsl = MIN_NON_ZERO(n->dmrsl,
+ BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
return 0;
}
@@ -4405,24 +5023,49 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
return 0;
}
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ NvmeIdCtrl *id = &n->id_ctrl;
+
+ if (!subsys) {
+ snprintf((char *)id->subnqn, sizeof(id->subnqn),
+ "nqn.2019-08.org.qemu:%s", n->params.serial);
+ } else {
+ pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+ }
+}
+
static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
{
NvmeIdCtrl *id = &n->id_ctrl;
uint8_t *pci_conf = pci_dev->config;
- char *subnqn;
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+ id->cntlid = cpu_to_le16(n->cntlid);
+
+ id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+
id->rab = 6;
- id->ieee[0] = 0x00;
- id->ieee[1] = 0x02;
- id->ieee[2] = 0xb3;
+
+ if (n->params.use_intel_id) {
+ id->ieee[0] = 0xb3;
+ id->ieee[1] = 0x02;
+ id->ieee[2] = 0x00;
+ } else {
+ id->ieee[0] = 0x00;
+ id->ieee[1] = 0x54;
+ id->ieee[2] = 0x52;
+ }
+
id->mdts = n->params.mdts;
id->ver = cpu_to_le32(NVME_SPEC_VER);
- id->oacs = cpu_to_le16(0);
+ id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT);
id->cntrltype = 0x1;
/*
@@ -4450,20 +5093,31 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->nn = cpu_to_le32(n->num_namespaces);
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES | NVME_ONCS_DSM |
- NVME_ONCS_COMPARE);
+ NVME_ONCS_COMPARE | NVME_ONCS_COPY);
- id->vwc = (0x2 << 1) | 0x1;
+ /*
+ * NOTE: If this device ever supports a command set that does NOT use 0x0
+ * as a Flush-equivalent operation, support for the broadcast NSID in Flush
+ * should probably be removed.
+ *
+ * See comment in nvme_io_cmd.
+ */
+ id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
+
+ id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
- subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
- strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
- g_free(subnqn);
+ nvme_init_subnqn(n);
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
+ if (n->subsys) {
+ id->cmic |= NVME_CMIC_MULTI_CTRL;
+ }
+
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
NVME_CAP_SET_CQR(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4478,6 +5132,24 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
n->bar.intmc = n->bar.intms = 0;
}
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+ int cntlid;
+
+ if (!n->subsys) {
+ return 0;
+ }
+
+ cntlid = nvme_subsys_register_ctrl(n, errp);
+ if (cntlid < 0) {
+ return -1;
+ }
+
+ n->cntlid = cntlid;
+
+ return 0;
+}
+
static void nvme_realize(PCIDevice *pci_dev, Error **errp)
{
NvmeCtrl *n = NVME(pci_dev);
@@ -4498,6 +5170,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
return;
}
+ if (nvme_init_subsys(n, errp)) {
+ error_propagate(errp, local_err);
+ return;
+ }
nvme_init_ctrl(n, pci_dev);
/* setup a namespace if the controller drive property was given */
@@ -4550,6 +5226,8 @@ static Property nvme_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
+ DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
@@ -4560,8 +5238,7 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
- DEFINE_PROP_SIZE32("zoned.append_size_limit", NvmeCtrl, params.zasl_bs,
- NVME_DEFAULT_MAX_ZA_SIZE),
+ DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index dee6092..4955d64 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,7 @@
#define HW_NVME_H
#include "block/nvme.h"
+#include "nvme-subsys.h"
#include "nvme-ns.h"
#define NVME_MAX_NAMESPACES 256
@@ -9,6 +10,12 @@
#define NVME_DEFAULT_ZONE_SIZE (128 * MiB)
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
+/*
+ * Subsystem namespace list for allocated namespaces should be larger than
+ * attached namespace list in a controller.
+ */
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_SUBSYS_MAX_NAMESPACES);
+
typedef struct NvmeParams {
char *serial;
uint32_t num_queues; /* deprecated since 5.1 */
@@ -19,7 +26,7 @@ typedef struct NvmeParams {
uint32_t aer_max_queued;
uint8_t mdts;
bool use_intel_id;
- uint32_t zasl_bs;
+ uint8_t zasl;
bool legacy_cmb;
} NvmeParams;
@@ -28,6 +35,20 @@ typedef struct NvmeAsyncEvent {
NvmeAerResult result;
} NvmeAsyncEvent;
+enum {
+ NVME_SG_ALLOC = 1 << 0,
+ NVME_SG_DMA = 1 << 1,
+};
+
+typedef struct NvmeSg {
+ int flags;
+
+ union {
+ QEMUSGList qsg;
+ QEMUIOVector iov;
+ };
+} NvmeSg;
+
typedef struct NvmeRequest {
struct NvmeSQueue *sq;
struct NvmeNamespace *ns;
@@ -37,8 +58,7 @@ typedef struct NvmeRequest {
NvmeCqe cqe;
NvmeCmd cmd;
BlockAcctCookie acct;
- QEMUSGList qsg;
- QEMUIOVector iov;
+ NvmeSg sg;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
@@ -68,6 +88,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
+ case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
@@ -133,6 +154,7 @@ typedef struct NvmeCtrl {
NvmeBus bus;
BlockConf conf;
+ uint16_t cntlid;
bool qs_created;
uint32_t page_size;
uint16_t page_bits;
@@ -168,9 +190,19 @@ typedef struct NvmeCtrl {
QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
int aer_queued;
- uint8_t zasl;
+ uint32_t dmrsl;
+
+ /* Namespace ID is started with 1 so bitmap should be 1-based */
+#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
+ DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
+
+ NvmeSubsystem *subsys;
NvmeNamespace namespace;
+ /*
+ * Attached namespaces to this controller. If subsys is not given, all
+ * namespaces in this list will always be attached.
+ */
NvmeNamespace *namespaces[NVME_MAX_NAMESPACES];
NvmeSQueue **sq;
NvmeCQueue **cq;
@@ -189,6 +221,29 @@ static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
return n->namespaces[nsid - 1];
}
+static inline bool nvme_ns_is_attached(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ int nsid;
+
+ for (nsid = 1; nsid <= n->num_namespaces; nsid++) {
+ if (nvme_ns(n, nsid) == ns) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static inline void nvme_ns_attach(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ n->namespaces[nvme_nsid(ns) - 1] = ns;
+}
+
+static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ n->namespaces[nvme_nsid(ns) - 1] = NULL;
+}
+
static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
{
NvmeSQueue *sq = req->sq;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index d32475c..ef06d2e 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -37,26 +37,36 @@ pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2
pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
-pci_nvme_map_sgl(uint16_t cid, uint8_t typ, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" len %"PRIu64""
+pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
+pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
+pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
+pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
+pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8""
pci_nvme_identify_ctrl(void) "identify controller"
pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16""
pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
@@ -75,6 +85,8 @@ pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
+pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
@@ -107,15 +119,17 @@ pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", sl
pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
# nvme traces for error conditions
-pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
+pci_nvme_err_mdts(size_t len) "len %zu"
+pci_nvme_err_zasl(size_t len) "len %zu"
pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_err_cfs(void) "controller fatal status"
pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
+pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
-pci_nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16""
+pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32""
pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64""
pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
@@ -136,7 +150,6 @@ pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"
pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
-pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""
pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 07cfc92..372d0f2 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -566,6 +566,7 @@ enum NvmeAdminCommands {
NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c,
NVME_ADM_CMD_ACTIVATE_FW = 0x10,
NVME_ADM_CMD_DOWNLOAD_FW = 0x11,
+ NVME_ADM_CMD_NS_ATTACHMENT = 0x15,
NVME_ADM_CMD_FORMAT_NVM = 0x80,
NVME_ADM_CMD_SECURITY_SEND = 0x81,
NVME_ADM_CMD_SECURITY_RECV = 0x82,
@@ -579,6 +580,7 @@ enum NvmeIoCommands {
NVME_CMD_COMPARE = 0x05,
NVME_CMD_WRITE_ZEROES = 0x08,
NVME_CMD_DSM = 0x09,
+ NVME_CMD_COPY = 0x19,
NVME_CMD_ZONE_MGMT_SEND = 0x79,
NVME_CMD_ZONE_MGMT_RECV = 0x7a,
NVME_CMD_ZONE_APPEND = 0x7d,
@@ -724,9 +726,41 @@ typedef struct QEMU_PACKED NvmeDsmRange {
uint64_t slba;
} NvmeDsmRange;
+enum {
+ NVME_COPY_FORMAT_0 = 0x0,
+};
+
+typedef struct QEMU_PACKED NvmeCopyCmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint32_t rsvd2[4];
+ NvmeCmdDptr dptr;
+ uint64_t sdlba;
+ uint8_t nr;
+ uint8_t control[3];
+ uint16_t rsvd13;
+ uint16_t dspec;
+ uint32_t reftag;
+ uint16_t apptag;
+ uint16_t appmask;
+} NvmeCopyCmd;
+
+typedef struct QEMU_PACKED NvmeCopySourceRange {
+ uint8_t rsvd0[8];
+ uint64_t slba;
+ uint16_t nlb;
+ uint8_t rsvd18[6];
+ uint32_t reftag;
+ uint16_t apptag;
+ uint16_t appmask;
+} NvmeCopySourceRange;
+
enum NvmeAsyncEventRequest {
NVME_AER_TYPE_ERROR = 0,
NVME_AER_TYPE_SMART = 1,
+ NVME_AER_TYPE_NOTICE = 2,
NVME_AER_TYPE_IO_SPECIFIC = 6,
NVME_AER_TYPE_VENDOR_SPECIFIC = 7,
NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0,
@@ -738,6 +772,7 @@ enum NvmeAsyncEventRequest {
NVME_AER_INFO_SMART_RELIABILITY = 0,
NVME_AER_INFO_SMART_TEMP_THRESH = 1,
NVME_AER_INFO_SMART_SPARE_THRESH = 2,
+ NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED = 0,
};
typedef struct QEMU_PACKED NvmeAerResult {
@@ -804,9 +839,13 @@ enum NvmeStatusCodes {
NVME_FEAT_NOT_CHANGEABLE = 0x010e,
NVME_FEAT_NOT_NS_SPEC = 0x010f,
NVME_FW_REQ_SUSYSTEM_RESET = 0x0110,
+ NVME_NS_ALREADY_ATTACHED = 0x0118,
+ NVME_NS_NOT_ATTACHED = 0x011A,
+ NVME_NS_CTRL_LIST_INVALID = 0x011C,
NVME_CONFLICTING_ATTRS = 0x0180,
NVME_INVALID_PROT_INFO = 0x0181,
NVME_WRITE_TO_RO = 0x0182,
+ NVME_CMD_SIZE_LIMIT = 0x0183,
NVME_ZONE_BOUNDARY_ERROR = 0x01b8,
NVME_ZONE_FULL = 0x01b9,
NVME_ZONE_READ_ONLY = 0x01ba,
@@ -903,6 +942,7 @@ enum NvmeLogIdentifier {
NVME_LOG_ERROR_INFO = 0x01,
NVME_LOG_SMART_INFO = 0x02,
NVME_LOG_FW_SLOT_INFO = 0x03,
+ NVME_LOG_CHANGED_NSLIST = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
};
@@ -918,6 +958,7 @@ typedef struct QEMU_PACKED NvmePSD {
uint8_t resv[16];
} NvmePSD;
+#define NVME_CONTROLLER_LIST_SIZE 2048
#define NVME_IDENTIFY_DATA_SIZE 4096
enum NvmeIdCns {
@@ -930,6 +971,7 @@ enum NvmeIdCns {
NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
+ NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
@@ -994,7 +1036,7 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
uint8_t nvscc;
uint8_t rsvd531;
uint16_t acwu;
- uint8_t rsvd534[2];
+ uint16_t ocfs;
uint32_t sgls;
uint8_t rsvd540[228];
uint8_t subnqn[256];
@@ -1008,10 +1050,25 @@ typedef struct NvmeIdCtrlZoned {
uint8_t rsvd1[4095];
} NvmeIdCtrlZoned;
+typedef struct NvmeIdCtrlNvm {
+ uint8_t vsl;
+ uint8_t wzsl;
+ uint8_t wusl;
+ uint8_t dmrl;
+ uint32_t dmrsl;
+ uint64_t dmsl;
+ uint8_t rsvd16[4080];
+} NvmeIdCtrlNvm;
+
+enum NvmeIdCtrlOaes {
+ NVME_OAES_NS_ATTR = 1 << 8,
+};
+
enum NvmeIdCtrlOacs {
NVME_OACS_SECURITY = 1 << 0,
NVME_OACS_FORMAT = 1 << 1,
NVME_OACS_FW = 1 << 2,
+ NVME_OACS_NS_MGMT = 1 << 3,
};
enum NvmeIdCtrlOncs {
@@ -1022,6 +1079,19 @@ enum NvmeIdCtrlOncs {
NVME_ONCS_FEATURES = 1 << 4,
NVME_ONCS_RESRVATIONS = 1 << 5,
NVME_ONCS_TIMESTAMP = 1 << 6,
+ NVME_ONCS_COPY = 1 << 8,
+};
+
+enum NvmeIdCtrlOcfs {
+ NVME_OCFS_COPY_FORMAT_0 = 1 << 0,
+};
+
+enum NvmeIdctrlVwc {
+ NVME_VWC_PRESENT = 1 << 0,
+ NVME_VWC_NSID_BROADCAST_NO_SUPPORT = 0 << 1,
+ NVME_VWC_NSID_BROADCAST_RESERVED = 1 << 1,
+ NVME_VWC_NSID_BROADCAST_CTRL_SPEC = 2 << 1,
+ NVME_VWC_NSID_BROADCAST_SUPPORT = 3 << 1,
};
enum NvmeIdCtrlFrmw {
@@ -1034,6 +1104,10 @@ enum NvmeIdCtrlLpa {
NVME_LPA_EXTENDED = 1 << 2,
};
+enum NvmeIdCtrlCmic {
+ NVME_CMIC_MULTI_CTRL = 1 << 1,
+};
+
#define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
#define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
#define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
@@ -1171,7 +1245,10 @@ typedef struct QEMU_PACKED NvmeIdNs {
uint16_t npdg;
uint16_t npda;
uint16_t nows;
- uint8_t rsvd74[30];
+ uint16_t mssrl;
+ uint32_t mcl;
+ uint8_t msrc;
+ uint8_t rsvd81[23];
uint8_t nguid[16];
uint64_t eui64;
NvmeLBAF lbaf[16];
@@ -1199,6 +1276,10 @@ enum NvmeNsIdentifierType {
NVME_NIDT_CSI = 0x04,
};
+enum NvmeIdNsNmic {
+ NVME_NMIC_NS_SHARED = 1 << 0,
+};
+
enum NvmeCsi {
NVME_CSI_NVM = 0x00,
NVME_CSI_ZONED = 0x02,
@@ -1323,6 +1404,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8);
QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRange) != 32);
QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64);
@@ -1330,6 +1412,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
@@ -1337,6 +1420,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlNvm) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);