aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-03-03 10:20:48 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2025-03-03 10:20:48 +0800
commit70fc2bde913ef8a6b77ded6831534d9c79541e17 (patch)
treef8f275258af4b12f6c23d177837870344aa5bd31 /hw
parentef3b5f5cd89fb573729a6dc10f1a1c49b20d0113 (diff)
parentcad58ada8f104bf342097a7a683ef594ac949c8d (diff)
downloadqemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.zip
qemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.tar.gz
qemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.tar.bz2
Merge tag 'pull-nvme-20250227' of https://gitlab.com/birkelund/qemu into staging
nvme queue # -----BEGIN PGP SIGNATURE----- # # iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmfAMFEACgkQTeGvMW1P # DenstAf9GuLVxVUhKDlAJwyRl9Z3lrPMkKwoYF2B75fmqJhW0wZh5VSh6z/s5Qx7 # h/5soFrAMlcZPg5FO0OkY9d4psPlDHBPnuGqX2zLxx0zZnpC/QThSa6hzmETDwfv # mXEMA/AnXar9MqjrbeR2QjVRphP9mzWpaK7JLmvX9KYvMVxqXSEq5TuylbIeyBQ5 # rSWlVnuKvVLRVtavDpZjHAk5q6CgO1nQ0N3IyIjZmllphCgrJVX5PMtiLur3dPSF # nYv2TR3uZJmlHR9qsFEc1aIBKNSBhwBJljRuIJe+yFTI8rxCClNlqMQOfgBJp4z6 # GYHm0w0p0NLn/V5dTqLsJoHs20u46A== # =703u # -----END PGP SIGNATURE----- # gpg: Signature made Thu 27 Feb 2025 17:28:49 HKT # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * tag 'pull-nvme-20250227' of https://gitlab.com/birkelund/qemu: hw/nvme: remove nvme_aio_err() hw/nvme: set error status code explicitly for misc commands hw/nvme: only set command abort requested when cancelled due to Abort hw/nvme: rework csi handling hw/nvme: be compliant wrt. dsm processing limits nvme: fix iocs status code values hw/nvme: add knob for doorbell buffer config support hw/nvme: make oacs dynamic hw/nvme: always initialize a subsystem hw/nvme: Add OCP SMART / Health Information Extended Log Page Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/nvme/ctrl.c460
-rw-r--r--hw/nvme/ns.c62
-rw-r--r--hw/nvme/nvme.h11
3 files changed, 311 insertions, 222 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8175751..e62c6a3 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -266,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
};
-static const uint32_t nvme_cse_acs[256] = {
+static const uint32_t nvme_cse_acs_default[256] = {
[NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
@@ -277,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
- [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
- [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
- [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
+ [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC |
+ NVME_CMD_EFF_CCC,
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP,
};
-static const uint32_t nvme_cse_iocs_none[256];
-
-static const uint32_t nvme_cse_iocs_nvm[256] = {
+static const uint32_t nvme_cse_iocs_nvm_default[256] = {
[NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -300,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
};
-static const uint32_t nvme_cse_iocs_zoned[256] = {
+static const uint32_t nvme_cse_iocs_zoned_default[256] = {
[NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -309,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
@@ -1762,47 +1762,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
-static void nvme_aio_err(NvmeRequest *req, int ret)
-{
- uint16_t status = NVME_SUCCESS;
- Error *local_err = NULL;
-
- switch (req->cmd.opcode) {
- case NVME_CMD_READ:
- status = NVME_UNRECOVERED_READ;
- break;
- case NVME_CMD_FLUSH:
- case NVME_CMD_WRITE:
- case NVME_CMD_WRITE_ZEROES:
- case NVME_CMD_ZONE_APPEND:
- case NVME_CMD_COPY:
- status = NVME_WRITE_FAULT;
- break;
- default:
- status = NVME_INTERNAL_DEV_ERROR;
- break;
- }
-
- if (ret == -ECANCELED) {
- status = NVME_CMD_ABORT_REQ;
- }
-
- trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
-
- error_setg_errno(&local_err, -ret, "aio failed");
- error_report_err(local_err);
-
- /*
- * Set the command status code to the first encountered error but allow a
- * subsequent Internal Device Error to trump it.
- */
- if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
- return;
- }
-
- req->status = status;
-}
-
static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
{
return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
@@ -2161,11 +2120,16 @@ static inline bool nvme_is_write(NvmeRequest *req)
static void nvme_misc_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
+ uint16_t cid = nvme_cid(req);
- trace_pci_nvme_misc_cb(nvme_cid(req));
+ trace_pci_nvme_misc_cb(cid);
if (ret) {
- nvme_aio_err(req, ret);
+ if (!req->status) {
+ req->status = NVME_INTERNAL_DEV_ERROR;
+ }
+
+ trace_pci_nvme_err_aio(cid, strerror(-ret), req->status);
}
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -2182,8 +2146,30 @@ void nvme_rw_complete_cb(void *opaque, int ret)
trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
if (ret) {
+ Error *err = NULL;
+
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_READ:
+ req->status = NVME_UNRECOVERED_READ;
+ break;
+
+ case NVME_CMD_WRITE:
+ case NVME_CMD_WRITE_ZEROES:
+ case NVME_CMD_ZONE_APPEND:
+ req->status = NVME_WRITE_FAULT;
+ break;
+
+ default:
+ req->status = NVME_INTERNAL_DEV_ERROR;
+ break;
+ }
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
+ error_setg_errno(&err, -ret, "aio failed");
+ error_report_err(err);
} else {
block_acct_done(stats, acct);
}
@@ -2268,7 +2254,10 @@ static void nvme_verify_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2367,7 +2356,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2449,7 +2441,10 @@ static void nvme_compare_data_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2928,6 +2923,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret)
if (ret < 0) {
iocb->ret = ret;
+ req->status = NVME_WRITE_FAULT;
goto out;
} else if (iocb->ret < 0) {
goto out;
@@ -2992,6 +2988,7 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
if (ret < 0) {
iocb->ret = ret;
+ req->status = NVME_UNRECOVERED_READ;
goto out;
} else if (iocb->ret < 0) {
goto out;
@@ -3514,6 +3511,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret)
if (ret < 0) {
iocb->ret = ret;
+ iocb->req->status = NVME_WRITE_FAULT;
goto out;
} else if (iocb->ret < 0) {
goto out;
@@ -4605,6 +4603,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
};
}
+static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+ switch (req->cmd.opcode) {
+ case NVME_CMD_WRITE:
+ return nvme_write(n, req);
+ case NVME_CMD_READ:
+ return nvme_read(n, req);
+ case NVME_CMD_COMPARE:
+ return nvme_compare(n, req);
+ case NVME_CMD_WRITE_ZEROES:
+ return nvme_write_zeroes(n, req);
+ case NVME_CMD_DSM:
+ return nvme_dsm(n, req);
+ case NVME_CMD_VERIFY:
+ return nvme_verify(n, req);
+ case NVME_CMD_COPY:
+ return nvme_copy(n, req);
+ case NVME_CMD_IO_MGMT_RECV:
+ return nvme_io_mgmt_recv(n, req);
+ case NVME_CMD_IO_MGMT_SEND:
+ return nvme_io_mgmt_send(n, req);
+ }
+
+ g_assert_not_reached();
+}
+
+static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+ if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ return __nvme_io_cmd_nvm(n, req);
+}
+
+static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req)
+{
+ if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_ZONE_APPEND:
+ return nvme_zone_append(n, req);
+ case NVME_CMD_ZONE_MGMT_SEND:
+ return nvme_zone_mgmt_send(n, req);
+ case NVME_CMD_ZONE_MGMT_RECV:
+ return nvme_zone_mgmt_recv(n, req);
+ }
+
+ return __nvme_io_cmd_nvm(n, req);
+}
+
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns;
@@ -4646,11 +4699,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
- trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
- return NVME_INVALID_OPCODE | NVME_DNR;
- }
-
if (ns->status) {
return ns->status;
}
@@ -4661,36 +4709,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
req->ns = ns;
- switch (req->cmd.opcode) {
- case NVME_CMD_WRITE_ZEROES:
- return nvme_write_zeroes(n, req);
- case NVME_CMD_ZONE_APPEND:
- return nvme_zone_append(n, req);
- case NVME_CMD_WRITE:
- return nvme_write(n, req);
- case NVME_CMD_READ:
- return nvme_read(n, req);
- case NVME_CMD_COMPARE:
- return nvme_compare(n, req);
- case NVME_CMD_DSM:
- return nvme_dsm(n, req);
- case NVME_CMD_VERIFY:
- return nvme_verify(n, req);
- case NVME_CMD_COPY:
- return nvme_copy(n, req);
- case NVME_CMD_ZONE_MGMT_SEND:
- return nvme_zone_mgmt_send(n, req);
- case NVME_CMD_ZONE_MGMT_RECV:
- return nvme_zone_mgmt_recv(n, req);
- case NVME_CMD_IO_MGMT_RECV:
- return nvme_io_mgmt_recv(n, req);
- case NVME_CMD_IO_MGMT_SEND:
- return nvme_io_mgmt_send(n, req);
- default:
- g_assert_not_reached();
+ switch (ns->csi) {
+ case NVME_CSI_NVM:
+ return nvme_io_cmd_nvm(n, req);
+ case NVME_CSI_ZONED:
+ return nvme_io_cmd_zoned(n, req);
}
- return NVME_INVALID_OPCODE | NVME_DNR;
+ g_assert_not_reached();
}
static void nvme_cq_notifier(EventNotifier *e)
@@ -4799,6 +4825,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
r = QTAILQ_FIRST(&sq->out_req_list);
assert(r->aiocb);
+ r->status = NVME_CMD_ABORT_SQ_DEL;
blk_aio_cancel(r->aiocb);
}
@@ -4917,6 +4944,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
}
+static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
+{
+ NvmeNamespace *ns = NULL;
+ NvmeSmartLogExtended smart_l = { 0 };
+ struct nvme_stats stats = { 0 };
+ uint32_t trans_len;
+
+ if (off >= sizeof(smart_l)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ /* accumulate all stats from all namespaces */
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+ if (ns) {
+ nvme_set_blk_stats(ns, &stats);
+ }
+ }
+
+ smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written);
+ smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read);
+ smart_l.log_page_version = 0x0005;
+
+ static const uint8_t guid[16] = {
+ 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4,
+ 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF
+ };
+ memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid));
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
+ trans_len = MIN(sizeof(smart_l) - off, buf_len);
+ return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req);
+}
+
static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
@@ -5110,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
NvmeEffectsLog log = {};
- const uint32_t *src_iocs = NULL;
+ const uint32_t *iocs = NULL;
uint32_t trans_len;
if (off >= sizeof(log)) {
@@ -5120,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
case NVME_CC_CSS_NVM:
- src_iocs = nvme_cse_iocs_nvm;
- /* fall through */
- case NVME_CC_CSS_ADMIN_ONLY:
+ iocs = n->cse.iocs.nvm;
break;
- case NVME_CC_CSS_CSI:
+
+ case NVME_CC_CSS_ALL:
switch (csi) {
case NVME_CSI_NVM:
- src_iocs = nvme_cse_iocs_nvm;
+ iocs = n->cse.iocs.nvm;
break;
case NVME_CSI_ZONED:
- src_iocs = nvme_cse_iocs_zoned;
+ iocs = n->cse.iocs.zoned;
break;
}
+
+ break;
}
- memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
+ memcpy(log.acs, n->cse.acs, sizeof(log.acs));
- if (src_iocs) {
- memcpy(log.iocs, src_iocs, sizeof(log.iocs));
+ if (iocs) {
+ memcpy(log.iocs, iocs, sizeof(log.iocs));
}
trans_len = MIN(sizeof(log) - off, buf_len);
@@ -5146,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
}
+static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req, uint8_t lid)
+{
+ switch (lid) {
+ case NVME_OCP_EXTENDED_SMART_INFO:
+ if (n->params.ocp) {
+ return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
+ }
+ break;
+ /* add a case for each additional vendor specific log id */
+ }
+
+ trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+ return NVME_INVALID_FIELD | NVME_DNR;
+}
+
static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
{
size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
@@ -5396,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
return nvme_smart_info(n, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, len, off, req);
+ case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
+ return nvme_vendor_specific_log(n, rae, len, off, req, lid);
case NVME_LOG_CHANGED_NSLIST:
return nvme_changed_nslist(n, rae, len, off, req);
case NVME_LOG_CMD_EFFECTS:
@@ -5583,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
switch (c->csi) {
case NVME_CSI_NVM:
id_nvm->vsl = n->params.vsl;
+ id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX;
id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
+ id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl;
break;
case NVME_CSI_ZONED:
@@ -5625,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
}
- return NVME_INVALID_CMD_SET | NVME_DNR;
+ return NVME_INVALID_IOCS | NVME_DNR;
}
static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
@@ -6048,6 +6136,7 @@ static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) {
if (r->cqe.cid == cid) {
if (r->aiocb) {
+ r->status = NVME_CMD_ABORT_REQ;
blk_aio_cancel_async(r->aiocb);
}
break;
@@ -6591,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
case NVME_COMMAND_SET_PROFILE:
if (dw11 & 0x1ff) {
trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
- return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
+ return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR;
}
break;
case NVME_FDP_MODE:
@@ -6640,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
-static void nvme_update_dmrsl(NvmeCtrl *n)
+static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns)
{
- int nsid;
+ if (ns) {
+ n->dmrsl =
+ MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+
+ return;
+ }
- for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
- NvmeNamespace *ns = nvme_ns(n, nsid);
+ for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+ ns = nvme_ns(n, nsid);
if (!ns) {
continue;
}
- n->dmrsl = MIN_NON_ZERO(n->dmrsl,
- BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+ n->dmrsl =
+ MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
}
}
-static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
+static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi)
{
- uint32_t cc = ldl_le_p(&n->bar.cc);
+ uint32_t cc;
- ns->iocs = nvme_cse_iocs_none;
- switch (ns->csi) {
+ switch (csi) {
case NVME_CSI_NVM:
- if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
+ return true;
+
case NVME_CSI_ZONED:
- if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
- ns->iocs = nvme_cse_iocs_zoned;
- } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
+ cc = ldl_le_p(&n->bar.cc);
+
+ return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL;
}
+
+ g_assert_not_reached();
+}
+
+static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ assert(ns->attached > 0);
+
+ n->namespaces[ns->params.nsid] = NULL;
+ ns->attached--;
}
static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
@@ -6718,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
switch (sel) {
case NVME_NS_ATTACHMENT_ATTACH:
- if (nvme_ns(ctrl, nsid)) {
+ if (nvme_ns(n, nsid)) {
return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
}
@@ -6726,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
return NVME_NS_PRIVATE | NVME_DNR;
}
+ if (!nvme_csi_supported(n, ns->csi)) {
+ return NVME_IOCS_NOT_SUPPORTED | NVME_DNR;
+ }
+
nvme_attach_ns(ctrl, ns);
- nvme_select_iocs_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, ns);
break;
case NVME_NS_ATTACHMENT_DETACH:
- if (!nvme_ns(ctrl, nsid)) {
- return NVME_NS_NOT_ATTACHED | NVME_DNR;
- }
-
- ctrl->namespaces[nsid] = NULL;
- ns->attached--;
-
- nvme_update_dmrsl(ctrl);
+ nvme_detach_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, NULL);
break;
@@ -7242,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
nvme_adm_opc_str(req->cmd.opcode));
- if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
@@ -7589,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
}
}
-static void nvme_select_iocs(NvmeCtrl *n)
-{
- NvmeNamespace *ns;
- int i;
-
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
- }
-
- nvme_select_iocs_ns(n, ns);
- }
-}
-
static int nvme_start_ctrl(NvmeCtrl *n)
{
uint64_t cap = ldq_le_p(&n->bar.cap);
@@ -7670,7 +7751,18 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
- nvme_select_iocs(n);
+ /* verify that the command sets of attached namespaces are supported */
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+
+ if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+ if (!ns->attached || ns->params.shared) {
+ nvme_attach_ns(n, ns);
+ }
+ }
+ }
+
+ nvme_update_dsm_limits(n, NULL);
return 0;
}
@@ -8682,6 +8774,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
uint64_t cap = ldq_le_p(&n->bar.cap);
NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
uint32_t ctratt;
+ uint16_t oacs;
+
+ memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs));
+ memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm));
+ memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default,
+ sizeof(n->cse.iocs.zoned));
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -8712,9 +8810,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->mdts = n->params.mdts;
id->ver = cpu_to_le32(NVME_SPEC_VER);
- id->oacs =
- cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF |
- NVME_OACS_DIRECTIVES);
+
+ oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES;
+
+ if (n->params.dbcs) {
+ oacs |= NVME_OACS_DBCS;
+
+ n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP;
+ }
+
+ if (n->params.sriov_max_vfs) {
+ oacs |= NVME_OACS_VMS;
+
+ n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP;
+ }
+
+ id->oacs = cpu_to_le16(oacs);
+
id->cntrltype = 0x1;
/*
@@ -8765,15 +8877,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
- if (n->subsys) {
- id->cmic |= NVME_CMIC_MULTI_CTRL;
- ctratt |= NVME_CTRATT_ENDGRPS;
+ id->cmic |= NVME_CMIC_MULTI_CTRL;
+ ctratt |= NVME_CTRATT_ENDGRPS;
- id->endgidmax = cpu_to_le16(0x1);
+ id->endgidmax = cpu_to_le16(0x1);
- if (n->subsys->endgrp.fdp.enabled) {
- ctratt |= NVME_CTRATT_FDPS;
- }
+ if (n->subsys->endgrp.fdp.enabled) {
+ ctratt |= NVME_CTRATT_FDPS;
}
id->ctratt = cpu_to_le32(ctratt);
@@ -8781,9 +8891,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NVME_CAP_SET_MQES(cap, n->params.mqes);
NVME_CAP_SET_CQR(cap, 1);
NVME_CAP_SET_TO(cap, 0xf);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
+ NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS);
+ NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS);
NVME_CAP_SET_MPSMAX(cap, 4);
NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
@@ -8802,7 +8911,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
int cntlid;
if (!n->subsys) {
- return 0;
+ DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS);
+
+ qdev_prop_set_string(dev, "nqn", n->params.serial);
+
+ if (!qdev_realize(dev, NULL, errp)) {
+ return -1;
+ }
+
+ n->subsys = NVME_SUBSYS(dev);
}
cntlid = nvme_subsys_register_ctrl(n, errp);
@@ -8822,9 +8939,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
n->namespaces[nsid] = ns;
ns->attached++;
-
- n->dmrsl = MIN_NON_ZERO(n->dmrsl,
- BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
}
static void nvme_realize(PCIDevice *pci_dev, Error **errp)
@@ -8880,7 +8994,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
return;
}
- nvme_attach_ns(n, ns);
+ n->subsys->namespaces[ns->params.nsid] = ns;
}
}
@@ -8892,17 +9006,15 @@ static void nvme_exit(PCIDevice *pci_dev)
nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
- if (n->subsys) {
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (ns) {
- ns->attached--;
- }
+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+ if (ns) {
+ ns->attached--;
}
-
- nvme_subsys_unregister_ctrl(n->subsys, n);
}
+ nvme_subsys_unregister_ctrl(n->subsys, n);
+
g_free(n->cq);
g_free(n->sq);
g_free(n->aer_reqs);
@@ -8951,6 +9063,7 @@ static const Property nvme_props[] = {
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+ DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
@@ -8971,6 +9084,7 @@ static const Property nvme_props[] = {
DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
+ DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false),
};
static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 410df29..98c1e75 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -727,25 +727,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
uint32_t nsid = ns->params.nsid;
int i;
- if (!n->subsys) {
- /* If no subsys, the ns cannot be attached to more than one ctrl. */
- ns->params.shared = false;
- if (ns->params.detached) {
- error_setg(errp, "detached requires that the nvme device is "
- "linked to an nvme-subsys device");
- return;
- }
- } else {
- /*
- * If this namespace belongs to a subsystem (through a link on the
- * controller device), reparent the device.
- */
- if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
- return;
- }
- ns->subsys = subsys;
- ns->endgrp = &subsys->endgrp;
+ assert(subsys);
+
+ /* reparent to subsystem bus */
+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
+ return;
}
+ ns->subsys = subsys;
+ ns->endgrp = &subsys->endgrp;
if (nvme_ns_setup(ns, errp)) {
return;
@@ -753,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
if (!nsid) {
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
+ if (nvme_subsys_ns(subsys, i)) {
continue;
}
@@ -765,38 +754,15 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
error_setg(errp, "no free namespace id");
return;
}
- } else {
- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
- error_setg(errp, "namespace id '%d' already allocated", nsid);
- return;
- }
+ } else if (nvme_subsys_ns(subsys, nsid)) {
+ error_setg(errp, "namespace id '%d' already allocated", nsid);
+ return;
}
- if (subsys) {
- subsys->namespaces[nsid] = ns;
-
- ns->id_ns.endgid = cpu_to_le16(0x1);
- ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
-
- if (ns->params.detached) {
- return;
- }
-
- if (ns->params.shared) {
- for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
- NvmeCtrl *ctrl = subsys->ctrls[i];
-
- if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
- nvme_attach_ns(ctrl, ns);
- }
- }
-
- return;
- }
-
- }
+ subsys->namespaces[nsid] = ns;
- nvme_attach_ns(n, ns);
+ ns->id_ns.endgid = cpu_to_le16(0x1);
+ ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
}
static const Property nvme_ns_props[] = {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 7242206..6f782ba 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -237,7 +237,6 @@ typedef struct NvmeNamespace {
NvmeLBAF lbaf;
unsigned int nlbaf;
size_t lbasz;
- const uint32_t *iocs;
uint8_t csi;
uint16_t status;
int attached;
@@ -539,12 +538,14 @@ typedef struct NvmeParams {
bool auto_transition_zones;
bool legacy_cmb;
bool ioeventfd;
+ bool dbcs;
uint16_t sriov_max_vfs;
uint16_t sriov_vq_flexible;
uint16_t sriov_vi_flexible;
uint32_t sriov_max_vq_per_vf;
uint32_t sriov_max_vi_per_vf;
bool msix_exclusive_bar;
+ bool ocp;
struct {
bool mem;
@@ -584,6 +585,14 @@ typedef struct NvmeCtrl {
bool dbbuf_enabled;
struct {
+ uint32_t acs[256];
+ struct {
+ uint32_t nvm[256];
+ uint32_t zoned[256];
+ } iocs;
+ } cse;
+
+ struct {
MemoryRegion mem;
uint8_t *buf;
bool cmse;