diff options
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r-- | hw/nvme/ctrl.c | 502 |
1 files changed, 313 insertions, 189 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 68903d1..e764ec7 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -22,7 +22,7 @@ * * Usage * ----- - * See docs/system/nvme.rst for extensive documentation. + * See docs/system/devices/nvme.rst for extensive documentation. * * Add options: * -drive file=<file>,if=none,id=<drive_id> @@ -266,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, }; -static const uint32_t nvme_cse_acs[256] = { +static const uint32_t nvme_cse_acs_default[256] = { [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP, @@ -277,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, - [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC | + NVME_CMD_EFF_CCC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP, }; -static const uint32_t nvme_cse_iocs_none[256]; - -static const uint32_t nvme_cse_iocs_nvm[256] = { +static const uint32_t nvme_cse_iocs_nvm_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -300,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; -static const uint32_t nvme_cse_iocs_zoned[256] = { +static const uint32_t nvme_cse_iocs_zoned_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -309,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, @@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, */ #define SEG_CHUNK_SIZE 256 - NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; + QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE]; + NvmeSglDescriptor *sgld, *last_sgld; uint64_t nsgld; uint32_t seg_len; uint16_t status; @@ -1762,47 +1763,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, return NVME_SUCCESS; } -static void nvme_aio_err(NvmeRequest *req, int ret) -{ - uint16_t status = NVME_SUCCESS; - Error *local_err = NULL; - - switch (req->cmd.opcode) { - case NVME_CMD_READ: - status = NVME_UNRECOVERED_READ; - break; - case NVME_CMD_FLUSH: - case NVME_CMD_WRITE: - case NVME_CMD_WRITE_ZEROES: - case NVME_CMD_ZONE_APPEND: - case NVME_CMD_COPY: - status = NVME_WRITE_FAULT; - break; - default: - status = NVME_INTERNAL_DEV_ERROR; - break; - } - - if (ret == -ECANCELED) { - status = NVME_CMD_ABORT_REQ; - } - - trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status); - - error_setg_errno(&local_err, -ret, "aio failed"); - error_report_err(local_err); - - /* - * Set the command status code to the first encountered error but allow a - * subsequent Internal Device Error to trump it. - */ - if (req->status && status != NVME_INTERNAL_DEV_ERROR) { - return; - } - - req->status = status; -} - static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba) { return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 : @@ -2161,11 +2121,16 @@ static inline bool nvme_is_write(NvmeRequest *req) static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; + uint16_t cid = nvme_cid(req); - trace_pci_nvme_misc_cb(nvme_cid(req)); + trace_pci_nvme_misc_cb(cid); if (ret) { - nvme_aio_err(req, ret); + if (!req->status) { + req->status = NVME_INTERNAL_DEV_ERROR; + } + + trace_pci_nvme_err_aio(cid, strerror(-ret), req->status); } nvme_enqueue_req_completion(nvme_cq(req), req); @@ -2182,8 +2147,30 @@ void nvme_rw_complete_cb(void *opaque, int ret) trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); if (ret) { + Error *err = NULL; + block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + + switch (req->cmd.opcode) { + case NVME_CMD_READ: + req->status = NVME_UNRECOVERED_READ; + break; + + case NVME_CMD_WRITE: + case NVME_CMD_WRITE_ZEROES: + case NVME_CMD_ZONE_APPEND: + req->status = NVME_WRITE_FAULT; + break; + + default: + req->status = NVME_INTERNAL_DEV_ERROR; + break; + } + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + + error_setg_errno(&err, -ret, "aio failed"); + error_report_err(err); } else { block_acct_done(stats, acct); } @@ -2268,7 +2255,10 @@ static void nvme_verify_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2367,7 +2357,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2449,7 +2442,10 @@ static void nvme_compare_data_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2928,6 +2924,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; @@ -2992,6 +2989,7 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + req->status = NVME_UNRECOVERED_READ; goto out; } else if (iocb->ret < 0) { goto out; @@ -3514,6 +3512,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + iocb->req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; @@ -4605,6 +4604,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req) }; } +static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + switch (req->cmd.opcode) { + case NVME_CMD_WRITE: + return nvme_write(n, req); + case NVME_CMD_READ: + return nvme_read(n, req); + case NVME_CMD_COMPARE: + return nvme_compare(n, req); + case NVME_CMD_WRITE_ZEROES: + return nvme_write_zeroes(n, req); + case NVME_CMD_DSM: + return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); + case NVME_CMD_COPY: + return nvme_copy(n, req); + case NVME_CMD_IO_MGMT_RECV: + return nvme_io_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_SEND: + return nvme_io_mgmt_send(n, req); + } + + g_assert_not_reached(); +} + +static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + return __nvme_io_cmd_nvm(n, req); +} + +static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + switch (req->cmd.opcode) { + case NVME_CMD_ZONE_APPEND: + return nvme_zone_append(n, req); + case NVME_CMD_ZONE_MGMT_SEND: + return nvme_zone_mgmt_send(n, req); + case NVME_CMD_ZONE_MGMT_RECV: + return nvme_zone_mgmt_recv(n, req); + } + + return __nvme_io_cmd_nvm(n, req); +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4646,11 +4700,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } - if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { - trace_pci_nvme_err_invalid_opc(req->cmd.opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - if (ns->status) { return ns->status; } @@ -4661,36 +4710,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) req->ns = ns; - switch (req->cmd.opcode) { - case NVME_CMD_WRITE_ZEROES: - return nvme_write_zeroes(n, req); - case NVME_CMD_ZONE_APPEND: - return nvme_zone_append(n, req); - case NVME_CMD_WRITE: - return nvme_write(n, req); - case NVME_CMD_READ: - return nvme_read(n, req); - case NVME_CMD_COMPARE: - return nvme_compare(n, req); - case NVME_CMD_DSM: - return nvme_dsm(n, req); - case NVME_CMD_VERIFY: - return nvme_verify(n, req); - case NVME_CMD_COPY: - return nvme_copy(n, req); - case NVME_CMD_ZONE_MGMT_SEND: - return nvme_zone_mgmt_send(n, req); - case NVME_CMD_ZONE_MGMT_RECV: - return nvme_zone_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_RECV: - return nvme_io_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_SEND: - return nvme_io_mgmt_send(n, req); - default: - g_assert_not_reached(); + switch (ns->csi) { + case NVME_CSI_NVM: + return nvme_io_cmd_nvm(n, req); + case NVME_CSI_ZONED: + return nvme_io_cmd_zoned(n, req); } - return NVME_INVALID_OPCODE | NVME_DNR; + g_assert_not_reached(); } static void nvme_cq_notifier(EventNotifier *e) @@ -4799,6 +4826,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) while (!QTAILQ_EMPTY(&sq->out_req_list)) { r = QTAILQ_FIRST(&sq->out_req_list); assert(r->aiocb); + r->status = NVME_CMD_ABORT_SQ_DEL; blk_aio_cancel(r->aiocb); } @@ -4917,6 +4945,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeNamespace *ns = NULL; + NvmeSmartLogExtended smart_l = { 0 }; + struct nvme_stats stats = { 0 }; + uint32_t trans_len; + + if (off >= sizeof(smart_l)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + /* accumulate all stats from all namespaces */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + nvme_set_blk_stats(ns, &stats); + } + } + + smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written); + smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read); + smart_l.log_page_version = 0x0005; + + static const uint8_t guid[16] = { + 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4, + 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF + }; + memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid)); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + trans_len = MIN(sizeof(smart_l) - off, buf_len); + return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req); +} + static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -5062,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint32_t nslist[1024]; + uint32_t nslist[1024] = {}; uint32_t trans_len; int i = 0; uint32_t nsid; @@ -5072,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len); while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != @@ -5110,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, uint64_t off, NvmeRequest *req) { NvmeEffectsLog log = {}; - const uint32_t *src_iocs = NULL; + const uint32_t *iocs = NULL; uint32_t trans_len; if (off >= sizeof(log)) { @@ -5120,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { case NVME_CC_CSS_NVM: - src_iocs = nvme_cse_iocs_nvm; - /* fall through */ - case NVME_CC_CSS_ADMIN_ONLY: + iocs = n->cse.iocs.nvm; break; - case NVME_CC_CSS_CSI: + + case NVME_CC_CSS_ALL: switch (csi) { case NVME_CSI_NVM: - src_iocs = nvme_cse_iocs_nvm; + iocs = n->cse.iocs.nvm; break; case NVME_CSI_ZONED: - src_iocs = nvme_cse_iocs_zoned; + iocs = n->cse.iocs.zoned; break; } + + break; } - memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs)); + memcpy(log.acs, n->cse.acs, sizeof(log.acs)); - if (src_iocs) { - memcpy(log.iocs, src_iocs, sizeof(log.iocs)); + if (iocs) { + memcpy(log.iocs, iocs, sizeof(log.iocs)); } trans_len = MIN(sizeof(log) - off, buf_len); @@ -5146,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req, uint8_t lid) +{ + switch (lid) { + case NVME_OCP_EXTENDED_SMART_INFO: + if (n->params.ocp) { + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); + } + break; + /* add a case for each additional vendor specific log id */ + } + + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); + return NVME_INVALID_FIELD | NVME_DNR; +} + static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss) { size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr) @@ -5396,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_smart_info(n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, len, off, req); + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: + return nvme_vendor_specific_log(n, rae, len, off, req, lid); case NVME_LOG_CHANGED_NSLIST: return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: @@ -5583,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) switch (c->csi) { case NVME_CSI_NVM: id_nvm->vsl = n->params.vsl; + id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX; id_nvm->dmrsl = cpu_to_le32(n->dmrsl); + id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl; break; case NVME_CSI_ZONED: @@ -5625,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); } - return NVME_INVALID_CMD_SET | NVME_DNR; + return NVME_INVALID_IOCS | NVME_DNR; } static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, @@ -6048,6 +6136,7 @@ static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) { if (r->cqe.cid == cid) { if (r->aiocb) { + r->status = NVME_CMD_ABORT_REQ; blk_aio_cancel_async(r->aiocb); } break; @@ -6591,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) case NVME_COMMAND_SET_PROFILE: if (dw11 & 0x1ff) { trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); - return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; + return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR; } break; case NVME_FDP_MODE: @@ -6640,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static void nvme_update_dmrsl(NvmeCtrl *n) +static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns) { - int nsid; + if (ns) { + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + + return; + } - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); + for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { + ns = nvme_ns(n, nsid); if (!ns) { continue; } - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } } -static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi) { - uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t cc; - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { + switch (csi) { case NVME_CSI_NVM: - if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + return true; + case NVME_CSI_ZONED: - if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + cc = ldl_le_p(&n->bar.cc); + + return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL; } + + g_assert_not_reached(); +} + +static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + assert(ns->attached > 0); + + n->namespaces[ns->params.nsid] = NULL; + ns->attached--; } static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) @@ -6718,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) switch (sel) { case NVME_NS_ATTACHMENT_ATTACH: - if (nvme_ns(ctrl, nsid)) { + if (nvme_ns(n, nsid)) { return NVME_NS_ALREADY_ATTACHED | NVME_DNR; } @@ -6726,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_NS_PRIVATE | NVME_DNR; } + if (!nvme_csi_supported(n, ns->csi)) { + return NVME_IOCS_NOT_SUPPORTED | NVME_DNR; + } + nvme_attach_ns(ctrl, ns); - nvme_select_iocs_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, ns); break; case NVME_NS_ATTACHMENT_DETACH: - if (!nvme_ns(ctrl, nsid)) { - return NVME_NS_NOT_ATTACHED | NVME_DNR; - } - - ctrl->namespaces[nsid] = NULL; - ns->attached--; - - nvme_update_dmrsl(ctrl); + nvme_detach_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, NULL); break; @@ -7242,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, nvme_adm_opc_str(req->cmd.opcode)); - if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } @@ -7589,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) } } -static void nvme_select_iocs(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_select_iocs_ns(n, ns); - } -} - static int nvme_start_ctrl(NvmeCtrl *n) { uint64_t cap = ldq_le_p(&n->bar.cap); @@ -7670,7 +7751,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); - nvme_select_iocs(n); + /* verify that the command sets of attached namespaces are supported */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); + + if (!ns || (!ns->params.shared && ns->ctrl != n)) { + continue; + } + + if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) { + if (!ns->attached || ns->params.shared) { + nvme_attach_ns(n, ns); + } + } + } + + nvme_update_dsm_limits(n, NULL); return 0; } @@ -8481,7 +8577,8 @@ out: return pow2ceil(bar_size); } -static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) +static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, + Error **errp) { uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; @@ -8490,12 +8587,16 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) le16_to_cpu(cap->vifrsm), NULL, NULL); - pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, - n->params.sriov_max_vfs, n->params.sriov_max_vfs, - NVME_VF_OFFSET, NVME_VF_STRIDE); + if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) { + return false; + } pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); + + return true; } static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) @@ -8503,8 +8604,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) Error *err = NULL; int ret; - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, - PCI_PM_SIZEOF, &err); + ret = pci_pm_init(pci_dev, offset, &err); if (err) { error_report_err(err); return ret; @@ -8620,6 +8720,11 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) return false; } + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs && + !nvme_init_sriov(n, pci_dev, 0x120, errp)) { + return false; + } + nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); pcie_cap_deverr_init(pci_dev); @@ -8649,10 +8754,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_init_pmr(n, pci_dev); } - if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { - nvme_init_sriov(n, pci_dev, 0x120); - } - return true; } @@ -8676,6 +8777,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); uint32_t ctratt; + uint16_t oacs; + + memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs)); + memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm)); + memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default, + sizeof(n->cse.iocs.zoned)); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -8706,9 +8813,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = - cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF | - NVME_OACS_DIRECTIVES); + + oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES; + + if (n->params.dbcs) { + oacs |= NVME_OACS_DBCS; + + n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP; + } + + if (n->params.sriov_max_vfs) { + oacs |= NVME_OACS_VMS; + + n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP; + } + + id->oacs = cpu_to_le16(oacs); + id->cntrltype = 0x1; /* @@ -8759,15 +8880,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - if (n->subsys) { - id->cmic |= NVME_CMIC_MULTI_CTRL; - ctratt |= NVME_CTRATT_ENDGRPS; + id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; - id->endgidmax = cpu_to_le16(0x1); + id->endgidmax = cpu_to_le16(0x1); - if (n->subsys->endgrp.fdp.enabled) { - ctratt |= NVME_CTRATT_FDPS; - } + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; } id->ctratt = cpu_to_le32(ctratt); @@ -8775,9 +8894,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NVME_CAP_SET_MQES(cap, n->params.mqes); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS); NVME_CAP_SET_MPSMAX(cap, 4); NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); @@ -8796,7 +8914,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp) int cntlid; if (!n->subsys) { - return 0; + DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS); + + qdev_prop_set_string(dev, "nqn", n->params.serial); + + if (!qdev_realize(dev, NULL, errp)) { + return -1; + } + + n->subsys = NVME_SUBSYS(dev); } cntlid = nvme_subsys_register_ctrl(n, errp); @@ -8816,9 +8942,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) n->namespaces[nsid] = ns; ns->attached++; - - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } static void nvme_realize(PCIDevice *pci_dev, Error **errp) @@ -8869,12 +8992,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (n->namespace.blkconf.blk) { ns = &n->namespace; ns->params.nsid = 1; + ns->ctrl = n; if (nvme_ns_setup(ns, errp)) { return; } - nvme_attach_ns(n, ns); + n->subsys->namespaces[ns->params.nsid] = ns; } } @@ -8886,17 +9010,15 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n, NVME_RESET_FUNCTION); - if (n->subsys) { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (ns) { - ns->attached--; - } + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; } - - nvme_subsys_unregister_ctrl(n->subsys, n); } + nvme_subsys_unregister_ctrl(n->subsys, n); + g_free(n->cq); g_free(n->sq); g_free(n->aer_reqs); @@ -8945,6 +9067,7 @@ static const Property nvme_props[] = { DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false), + DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true), DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), @@ -8965,6 +9088,7 @@ static const Property nvme_props[] = { DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0), DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0), DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0), + DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false), }; static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, @@ -9059,7 +9183,7 @@ static const VMStateDescription nvme_vmstate = { .unmigratable = 1, }; -static void nvme_class_init(ObjectClass *oc, void *data) +static void nvme_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); @@ -9097,7 +9221,7 @@ static const TypeInfo nvme_info = { .instance_size = sizeof(NvmeCtrl), .instance_init = nvme_instance_init, .class_init = nvme_class_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { } }, |