diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-03 10:20:48 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-03 10:20:48 +0800 |
commit | 70fc2bde913ef8a6b77ded6831534d9c79541e17 (patch) | |
tree | f8f275258af4b12f6c23d177837870344aa5bd31 /hw | |
parent | ef3b5f5cd89fb573729a6dc10f1a1c49b20d0113 (diff) | |
parent | cad58ada8f104bf342097a7a683ef594ac949c8d (diff) | |
download | qemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.zip qemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.tar.gz qemu-70fc2bde913ef8a6b77ded6831534d9c79541e17.tar.bz2 |
Merge tag 'pull-nvme-20250227' of https://gitlab.com/birkelund/qemu into staging
nvme queue
# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmfAMFEACgkQTeGvMW1P
# DenstAf9GuLVxVUhKDlAJwyRl9Z3lrPMkKwoYF2B75fmqJhW0wZh5VSh6z/s5Qx7
# h/5soFrAMlcZPg5FO0OkY9d4psPlDHBPnuGqX2zLxx0zZnpC/QThSa6hzmETDwfv
# mXEMA/AnXar9MqjrbeR2QjVRphP9mzWpaK7JLmvX9KYvMVxqXSEq5TuylbIeyBQ5
# rSWlVnuKvVLRVtavDpZjHAk5q6CgO1nQ0N3IyIjZmllphCgrJVX5PMtiLur3dPSF
# nYv2TR3uZJmlHR9qsFEc1aIBKNSBhwBJljRuIJe+yFTI8rxCClNlqMQOfgBJp4z6
# GYHm0w0p0NLn/V5dTqLsJoHs20u46A==
# =703u
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 27 Feb 2025 17:28:49 HKT
# gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838
# Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9
* tag 'pull-nvme-20250227' of https://gitlab.com/birkelund/qemu:
hw/nvme: remove nvme_aio_err()
hw/nvme: set error status code explicitly for misc commands
hw/nvme: only set command abort requested when cancelled due to Abort
hw/nvme: rework csi handling
hw/nvme: be compliant wrt. dsm processing limits
nvme: fix iocs status code values
hw/nvme: add knob for doorbell buffer config support
hw/nvme: make oacs dynamic
hw/nvme: always initialize a subsystem
hw/nvme: Add OCP SMART / Health Information Extended Log Page
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/nvme/ctrl.c | 460 | ||||
-rw-r--r-- | hw/nvme/ns.c | 62 | ||||
-rw-r--r-- | hw/nvme/nvme.h | 11 |
3 files changed, 311 insertions, 222 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8175751..e62c6a3 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -266,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, }; -static const uint32_t nvme_cse_acs[256] = { +static const uint32_t nvme_cse_acs_default[256] = { [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP, @@ -277,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, - [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC | + NVME_CMD_EFF_CCC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP, }; -static const uint32_t nvme_cse_iocs_none[256]; - -static const uint32_t nvme_cse_iocs_nvm[256] = { +static const uint32_t nvme_cse_iocs_nvm_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -300,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; -static const uint32_t nvme_cse_iocs_zoned[256] = { +static const uint32_t nvme_cse_iocs_zoned_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -309,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, @@ -1762,47 +1762,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, return NVME_SUCCESS; } -static void nvme_aio_err(NvmeRequest *req, int ret) -{ - uint16_t status = NVME_SUCCESS; - Error *local_err = NULL; - - switch (req->cmd.opcode) { - case NVME_CMD_READ: - status = NVME_UNRECOVERED_READ; - break; - case NVME_CMD_FLUSH: - case NVME_CMD_WRITE: - case NVME_CMD_WRITE_ZEROES: - case NVME_CMD_ZONE_APPEND: - case NVME_CMD_COPY: - status = NVME_WRITE_FAULT; - break; - default: - status = NVME_INTERNAL_DEV_ERROR; - break; - } - - if (ret == -ECANCELED) { - status = NVME_CMD_ABORT_REQ; - } - - trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status); - - error_setg_errno(&local_err, -ret, "aio failed"); - error_report_err(local_err); - - /* - * Set the command status code to the first encountered error but allow a - * subsequent Internal Device Error to trump it. - */ - if (req->status && status != NVME_INTERNAL_DEV_ERROR) { - return; - } - - req->status = status; -} - static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba) { return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 : @@ -2161,11 +2120,16 @@ static inline bool nvme_is_write(NvmeRequest *req) static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; + uint16_t cid = nvme_cid(req); - trace_pci_nvme_misc_cb(nvme_cid(req)); + trace_pci_nvme_misc_cb(cid); if (ret) { - nvme_aio_err(req, ret); + if (!req->status) { + req->status = NVME_INTERNAL_DEV_ERROR; + } + + trace_pci_nvme_err_aio(cid, strerror(-ret), req->status); } nvme_enqueue_req_completion(nvme_cq(req), req); @@ -2182,8 +2146,30 @@ void nvme_rw_complete_cb(void *opaque, int ret) trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); if (ret) { + Error *err = NULL; + block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + + switch (req->cmd.opcode) { + case NVME_CMD_READ: + req->status = NVME_UNRECOVERED_READ; + break; + + case NVME_CMD_WRITE: + case NVME_CMD_WRITE_ZEROES: + case NVME_CMD_ZONE_APPEND: + req->status = NVME_WRITE_FAULT; + break; + + default: + req->status = NVME_INTERNAL_DEV_ERROR; + break; + } + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + + error_setg_errno(&err, -ret, "aio failed"); + error_report_err(err); } else { block_acct_done(stats, acct); } @@ -2268,7 +2254,10 @@ static void nvme_verify_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2367,7 +2356,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2449,7 +2441,10 @@ static void nvme_compare_data_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2928,6 +2923,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; @@ -2992,6 +2988,7 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + req->status = NVME_UNRECOVERED_READ; goto out; } else if (iocb->ret < 0) { goto out; @@ -3514,6 +3511,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + iocb->req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; @@ -4605,6 +4603,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req) }; } +static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + switch (req->cmd.opcode) { + case NVME_CMD_WRITE: + return nvme_write(n, req); + case NVME_CMD_READ: + return nvme_read(n, req); + case NVME_CMD_COMPARE: + return nvme_compare(n, req); + case NVME_CMD_WRITE_ZEROES: + return nvme_write_zeroes(n, req); + case NVME_CMD_DSM: + return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); + case NVME_CMD_COPY: + return nvme_copy(n, req); + case NVME_CMD_IO_MGMT_RECV: + return nvme_io_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_SEND: + return nvme_io_mgmt_send(n, req); + } + + g_assert_not_reached(); +} + +static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + return __nvme_io_cmd_nvm(n, req); +} + +static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + switch (req->cmd.opcode) { + case NVME_CMD_ZONE_APPEND: + return nvme_zone_append(n, req); + case NVME_CMD_ZONE_MGMT_SEND: + return nvme_zone_mgmt_send(n, req); + case NVME_CMD_ZONE_MGMT_RECV: + return nvme_zone_mgmt_recv(n, req); + } + + return __nvme_io_cmd_nvm(n, req); +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4646,11 +4699,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } - if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { - trace_pci_nvme_err_invalid_opc(req->cmd.opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - if (ns->status) { return ns->status; } @@ -4661,36 +4709,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) req->ns = ns; - switch (req->cmd.opcode) { - case NVME_CMD_WRITE_ZEROES: - return nvme_write_zeroes(n, req); - case NVME_CMD_ZONE_APPEND: - return nvme_zone_append(n, req); - case NVME_CMD_WRITE: - return nvme_write(n, req); - case NVME_CMD_READ: - return nvme_read(n, req); - case NVME_CMD_COMPARE: - return nvme_compare(n, req); - case NVME_CMD_DSM: - return nvme_dsm(n, req); - case NVME_CMD_VERIFY: - return nvme_verify(n, req); - case NVME_CMD_COPY: - return nvme_copy(n, req); - case NVME_CMD_ZONE_MGMT_SEND: - return nvme_zone_mgmt_send(n, req); - case NVME_CMD_ZONE_MGMT_RECV: - return nvme_zone_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_RECV: - return nvme_io_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_SEND: - return nvme_io_mgmt_send(n, req); - default: - g_assert_not_reached(); + switch (ns->csi) { + case NVME_CSI_NVM: + return nvme_io_cmd_nvm(n, req); + case NVME_CSI_ZONED: + return nvme_io_cmd_zoned(n, req); } - return NVME_INVALID_OPCODE | NVME_DNR; + g_assert_not_reached(); } static void nvme_cq_notifier(EventNotifier *e) @@ -4799,6 +4825,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) while (!QTAILQ_EMPTY(&sq->out_req_list)) { r = QTAILQ_FIRST(&sq->out_req_list); assert(r->aiocb); + r->status = NVME_CMD_ABORT_SQ_DEL; blk_aio_cancel(r->aiocb); } @@ -4917,6 +4944,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeNamespace *ns = NULL; + NvmeSmartLogExtended smart_l = { 0 }; + struct nvme_stats stats = { 0 }; + uint32_t trans_len; + + if (off >= sizeof(smart_l)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + /* accumulate all stats from all namespaces */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + nvme_set_blk_stats(ns, &stats); + } + } + + smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written); + smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read); + smart_l.log_page_version = 0x0005; + + static const uint8_t guid[16] = { + 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4, + 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF + }; + memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid)); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + trans_len = MIN(sizeof(smart_l) - off, buf_len); + return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req); +} + static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -5110,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, uint64_t off, NvmeRequest *req) { NvmeEffectsLog log = {}; - const uint32_t *src_iocs = NULL; + const uint32_t *iocs = NULL; uint32_t trans_len; if (off >= sizeof(log)) { @@ -5120,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { case NVME_CC_CSS_NVM: - src_iocs = nvme_cse_iocs_nvm; - /* fall through */ - case NVME_CC_CSS_ADMIN_ONLY: + iocs = n->cse.iocs.nvm; break; - case NVME_CC_CSS_CSI: + + case NVME_CC_CSS_ALL: switch (csi) { case NVME_CSI_NVM: - src_iocs = nvme_cse_iocs_nvm; + iocs = n->cse.iocs.nvm; break; case NVME_CSI_ZONED: - src_iocs = nvme_cse_iocs_zoned; + iocs = n->cse.iocs.zoned; break; } + + break; } - memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs)); + memcpy(log.acs, n->cse.acs, sizeof(log.acs)); - if (src_iocs) { - memcpy(log.iocs, src_iocs, sizeof(log.iocs)); + if (iocs) { + memcpy(log.iocs, iocs, sizeof(log.iocs)); } trans_len = MIN(sizeof(log) - off, buf_len); @@ -5146,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req, uint8_t lid) +{ + switch (lid) { + case NVME_OCP_EXTENDED_SMART_INFO: + if (n->params.ocp) { + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); + } + break; + /* add a case for each additional vendor specific log id */ + } + + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); + return NVME_INVALID_FIELD | NVME_DNR; +} + static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss) { size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr) @@ -5396,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_smart_info(n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, len, off, req); + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: + return nvme_vendor_specific_log(n, rae, len, off, req, lid); case NVME_LOG_CHANGED_NSLIST: return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: @@ -5583,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) switch (c->csi) { case NVME_CSI_NVM: id_nvm->vsl = n->params.vsl; + id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX; id_nvm->dmrsl = cpu_to_le32(n->dmrsl); + id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl; break; case NVME_CSI_ZONED: @@ -5625,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); } - return NVME_INVALID_CMD_SET | NVME_DNR; + return NVME_INVALID_IOCS | NVME_DNR; } static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, @@ -6048,6 +6136,7 @@ static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) { if (r->cqe.cid == cid) { if (r->aiocb) { + r->status = NVME_CMD_ABORT_REQ; blk_aio_cancel_async(r->aiocb); } break; @@ -6591,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) case NVME_COMMAND_SET_PROFILE: if (dw11 & 0x1ff) { trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); - return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; + return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR; } break; case NVME_FDP_MODE: @@ -6640,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static void nvme_update_dmrsl(NvmeCtrl *n) +static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns) { - int nsid; + if (ns) { + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + + return; + } - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); + for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { + ns = nvme_ns(n, nsid); if (!ns) { continue; } - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } } -static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi) { - uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t cc; - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { + switch (csi) { case NVME_CSI_NVM: - if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + return true; + case NVME_CSI_ZONED: - if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + cc = ldl_le_p(&n->bar.cc); + + return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL; } + + g_assert_not_reached(); +} + +static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + assert(ns->attached > 0); + + n->namespaces[ns->params.nsid] = NULL; + ns->attached--; } static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) @@ -6718,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) switch (sel) { case NVME_NS_ATTACHMENT_ATTACH: - if (nvme_ns(ctrl, nsid)) { + if (nvme_ns(n, nsid)) { return NVME_NS_ALREADY_ATTACHED | NVME_DNR; } @@ -6726,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_NS_PRIVATE | NVME_DNR; } + if (!nvme_csi_supported(n, ns->csi)) { + return NVME_IOCS_NOT_SUPPORTED | NVME_DNR; + } + nvme_attach_ns(ctrl, ns); - nvme_select_iocs_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, ns); break; case NVME_NS_ATTACHMENT_DETACH: - if (!nvme_ns(ctrl, nsid)) { - return NVME_NS_NOT_ATTACHED | NVME_DNR; - } - - ctrl->namespaces[nsid] = NULL; - ns->attached--; - - nvme_update_dmrsl(ctrl); + nvme_detach_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, NULL); break; @@ -7242,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, nvme_adm_opc_str(req->cmd.opcode)); - if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } @@ -7589,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) } } -static void nvme_select_iocs(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_select_iocs_ns(n, ns); - } -} - static int nvme_start_ctrl(NvmeCtrl *n) { uint64_t cap = ldq_le_p(&n->bar.cap); @@ -7670,7 +7751,18 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); - nvme_select_iocs(n); + /* verify that the command sets of attached namespaces are supported */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); + + if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) { + if (!ns->attached || ns->params.shared) { + nvme_attach_ns(n, ns); + } + } + } + + nvme_update_dsm_limits(n, NULL); return 0; } @@ -8682,6 +8774,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); uint32_t ctratt; + uint16_t oacs; + + memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs)); + memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm)); + memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default, + sizeof(n->cse.iocs.zoned)); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -8712,9 +8810,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = - cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF | - NVME_OACS_DIRECTIVES); + + oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES; + + if (n->params.dbcs) { + oacs |= NVME_OACS_DBCS; + + n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP; + } + + if (n->params.sriov_max_vfs) { + oacs |= NVME_OACS_VMS; + + n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP; + } + + id->oacs = cpu_to_le16(oacs); + id->cntrltype = 0x1; /* @@ -8765,15 +8877,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - if (n->subsys) { - id->cmic |= NVME_CMIC_MULTI_CTRL; - ctratt |= NVME_CTRATT_ENDGRPS; + id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; - id->endgidmax = cpu_to_le16(0x1); + id->endgidmax = cpu_to_le16(0x1); - if (n->subsys->endgrp.fdp.enabled) { - ctratt |= NVME_CTRATT_FDPS; - } + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; } id->ctratt = cpu_to_le32(ctratt); @@ -8781,9 +8891,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NVME_CAP_SET_MQES(cap, n->params.mqes); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS); NVME_CAP_SET_MPSMAX(cap, 4); NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); @@ -8802,7 +8911,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp) int cntlid; if (!n->subsys) { - return 0; + DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS); + + qdev_prop_set_string(dev, "nqn", n->params.serial); + + if (!qdev_realize(dev, NULL, errp)) { + return -1; + } + + n->subsys = NVME_SUBSYS(dev); } cntlid = nvme_subsys_register_ctrl(n, errp); @@ -8822,9 +8939,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) n->namespaces[nsid] = ns; ns->attached++; - - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } static void nvme_realize(PCIDevice *pci_dev, Error **errp) @@ -8880,7 +8994,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - nvme_attach_ns(n, ns); + n->subsys->namespaces[ns->params.nsid] = ns; } } @@ -8892,17 +9006,15 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n, NVME_RESET_FUNCTION); - if (n->subsys) { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (ns) { - ns->attached--; - } + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; } - - nvme_subsys_unregister_ctrl(n->subsys, n); } + nvme_subsys_unregister_ctrl(n->subsys, n); + g_free(n->cq); g_free(n->sq); g_free(n->aer_reqs); @@ -8951,6 +9063,7 @@ static const Property nvme_props[] = { DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false), + DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true), DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), @@ -8971,6 +9084,7 @@ static const Property nvme_props[] = { DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0), DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0), DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0), + DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false), }; static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 410df29..98c1e75 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -727,25 +727,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) uint32_t nsid = ns->params.nsid; int i; - if (!n->subsys) { - /* If no subsys, the ns cannot be attached to more than one ctrl. */ - ns->params.shared = false; - if (ns->params.detached) { - error_setg(errp, "detached requires that the nvme device is " - "linked to an nvme-subsys device"); - return; - } - } else { - /* - * If this namespace belongs to a subsystem (through a link on the - * controller device), reparent the device. - */ - if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { - return; - } - ns->subsys = subsys; - ns->endgrp = &subsys->endgrp; + assert(subsys); + + /* reparent to subsystem bus */ + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { + return; } + ns->subsys = subsys; + ns->endgrp = &subsys->endgrp; if (nvme_ns_setup(ns, errp)) { return; @@ -753,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) if (!nsid) { for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { + if (nvme_subsys_ns(subsys, i)) { continue; } @@ -765,38 +754,15 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) error_setg(errp, "no free namespace id"); return; } - } else { - if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { - error_setg(errp, "namespace id '%d' already allocated", nsid); - return; - } + } else if (nvme_subsys_ns(subsys, nsid)) { + error_setg(errp, "namespace id '%d' already allocated", nsid); + return; } - if (subsys) { - subsys->namespaces[nsid] = ns; - - ns->id_ns.endgid = cpu_to_le16(0x1); - ns->id_ns_ind.endgrpid = cpu_to_le16(0x1); - - if (ns->params.detached) { - return; - } - - if (ns->params.shared) { - for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { - NvmeCtrl *ctrl = subsys->ctrls[i]; - - if (ctrl && ctrl != SUBSYS_SLOT_RSVD) { - nvme_attach_ns(ctrl, ns); - } - } - - return; - } - - } + subsys->namespaces[nsid] = ns; - nvme_attach_ns(n, ns); + ns->id_ns.endgid = cpu_to_le16(0x1); + ns->id_ns_ind.endgrpid = cpu_to_le16(0x1); } static const Property nvme_ns_props[] = { diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 7242206..6f782ba 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -237,7 +237,6 @@ typedef struct NvmeNamespace { NvmeLBAF lbaf; unsigned int nlbaf; size_t lbasz; - const uint32_t *iocs; uint8_t csi; uint16_t status; int attached; @@ -539,12 +538,14 @@ typedef struct NvmeParams { bool auto_transition_zones; bool legacy_cmb; bool ioeventfd; + bool dbcs; uint16_t sriov_max_vfs; uint16_t sriov_vq_flexible; uint16_t sriov_vi_flexible; uint32_t sriov_max_vq_per_vf; uint32_t sriov_max_vi_per_vf; bool msix_exclusive_bar; + bool ocp; struct { bool mem; @@ -584,6 +585,14 @@ typedef struct NvmeCtrl { bool dbbuf_enabled; struct { + uint32_t acs[256]; + struct { + uint32_t nvm[256]; + uint32_t zoned[256]; + } iocs; + } cse; + + struct { MemoryRegion mem; uint8_t *buf; bool cmse; |