diff options
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r-- | hw/nvme/ctrl.c | 1218 |
1 files changed, 915 insertions, 303 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5b1b0ca..2200028 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -40,6 +40,9 @@ * sriov_vi_flexible=<N[optional]> \ * sriov_max_vi_per_vf=<N[optional]> \ * sriov_max_vq_per_vf=<N[optional]> \ + * atomic.dn=<on|off[optional]>, \ + * atomic.awun<N[optional]>, \ + * atomic.awupf<N[optional]>, \ * subsys=<subsys_id> * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\ * zoned=<true|false[optional]>, \ @@ -198,11 +201,12 @@ #include "qemu/range.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "sysemu/sysemu.h" -#include "sysemu/block-backend.h" -#include "sysemu/hostmem.h" +#include "system/system.h" +#include "system/block-backend.h" +#include "system/hostmem.h" #include "hw/pci/msix.h" #include "hw/pci/pcie_sriov.h" +#include "system/spdm-socket.h" #include "migration/vmstate.h" #include "nvme.h" @@ -253,6 +257,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE, [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, + [NVME_WRITE_ATOMICITY] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE, @@ -261,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, }; -static const uint32_t nvme_cse_acs[256] = { +static const uint32_t nvme_cse_acs_default[256] = { [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP, @@ -272,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, - [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, - [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC | + NVME_CMD_EFF_CCC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP, }; -static const uint32_t nvme_cse_iocs_none[256]; - -static const uint32_t nvme_cse_iocs_nvm[256] = { +static const uint32_t nvme_cse_iocs_nvm_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -295,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; -static const uint32_t nvme_cse_iocs_zoned[256] = { +static const uint32_t nvme_cse_iocs_zoned_default[256] = { [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, @@ -304,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP, @@ -651,6 +656,12 @@ static void nvme_irq_check(NvmeCtrl *n) if (msix_enabled(pci)) { return; } + + /* vfs does not implement intx */ + if (pci_is_vf(pci)) { + return; + } + if (~intms & n->irq_status) { pci_irq_assert(pci); } else { @@ -1046,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, */ #define SEG_CHUNK_SIZE 256 - NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; + QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE]; + NvmeSglDescriptor *sgld, *last_sgld; uint64_t nsgld; uint32_t seg_len; uint16_t status; @@ -1515,9 +1527,16 @@ static void nvme_post_cqes(void *opaque) stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } + QTAILQ_REMOVE(&cq->req_list, req, entry); + nvme_inc_cq_tail(cq); nvme_sg_unmap(&req->sg); + + if (QTAILQ_EMPTY(&sq->req_list) && !nvme_sq_empty(sq)) { + qemu_bh_schedule(sq->bh); + } + QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); } if (cq->tail != cq->head) { @@ -1648,9 +1667,16 @@ static void nvme_smart_event(NvmeCtrl *n, uint8_t event) static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) { + NvmeAsyncEvent *event, *next; + n->aer_mask &= ~(1 << event_type); - if (!QTAILQ_EMPTY(&n->aer_queue)) { - nvme_process_aers(n); + + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + if (event->result.event_type == event_type) { + QTAILQ_REMOVE(&n->aer_queue, event, entry); + n->aer_queued--; + g_free(event); + } } } @@ -1737,43 +1763,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba, return NVME_SUCCESS; } -static void nvme_aio_err(NvmeRequest *req, int ret) -{ - uint16_t status = NVME_SUCCESS; - Error *local_err = NULL; - - switch (req->cmd.opcode) { - case NVME_CMD_READ: - status = NVME_UNRECOVERED_READ; - break; - case NVME_CMD_FLUSH: - case NVME_CMD_WRITE: - case NVME_CMD_WRITE_ZEROES: - case NVME_CMD_ZONE_APPEND: - case NVME_CMD_COPY: - status = NVME_WRITE_FAULT; - break; - default: - status = NVME_INTERNAL_DEV_ERROR; - break; - } - - trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status); - - error_setg_errno(&local_err, -ret, "aio failed"); - error_report_err(local_err); - - /* - * Set the command status code to the first encountered error but allow a - * subsequent Internal Device Error to trump it. - */ - if (req->status && status != NVME_INTERNAL_DEV_ERROR) { - return; - } - - req->status = status; -} - static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba) { return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 : @@ -1811,7 +1800,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone) trace_pci_nvme_err_zone_is_read_only(zslba); return NVME_ZONE_READ_ONLY; default: - assert(false); + g_assert_not_reached(); } return NVME_INTERNAL_DEV_ERROR; @@ -1865,7 +1854,7 @@ static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone) trace_pci_nvme_err_zone_is_offline(zone->d.zslba); return NVME_ZONE_OFFLINE; default: - assert(false); + g_assert_not_reached(); } return NVME_INTERNAL_DEV_ERROR; @@ -2132,11 +2121,16 @@ static inline bool nvme_is_write(NvmeRequest *req) static void nvme_misc_cb(void *opaque, int ret) { NvmeRequest *req = opaque; + uint16_t cid = nvme_cid(req); - trace_pci_nvme_misc_cb(nvme_cid(req)); + trace_pci_nvme_misc_cb(cid); if (ret) { - nvme_aio_err(req, ret); + if (!req->status) { + req->status = NVME_INTERNAL_DEV_ERROR; + } + + trace_pci_nvme_err_aio(cid, strerror(-ret), req->status); } nvme_enqueue_req_completion(nvme_cq(req), req); @@ -2153,8 +2147,30 @@ void nvme_rw_complete_cb(void *opaque, int ret) trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk)); if (ret) { + Error *err = NULL; + block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + + switch (req->cmd.opcode) { + case NVME_CMD_READ: + req->status = NVME_UNRECOVERED_READ; + break; + + case NVME_CMD_WRITE: + case NVME_CMD_WRITE_ZEROES: + case NVME_CMD_ZONE_APPEND: + req->status = NVME_WRITE_FAULT; + break; + + default: + req->status = NVME_INTERNAL_DEV_ERROR; + break; + } + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + + error_setg_errno(&err, -ret, "aio failed"); + error_report_err(err); } else { block_acct_done(stats, acct); } @@ -2239,7 +2255,10 @@ static void nvme_verify_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2338,7 +2357,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2420,7 +2442,10 @@ static void nvme_compare_data_cb(void *opaque, int ret) if (ret) { block_acct_failed(stats, acct); - nvme_aio_err(req, ret); + req->status = NVME_UNRECOVERED_READ; + + trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status); + goto out; } @@ -2591,6 +2616,7 @@ next: done: iocb->aiocb = NULL; iocb->common.cb(iocb->common.opaque, iocb->ret); + g_free(iocb->range); qemu_aio_unref(iocb); } @@ -2640,6 +2666,7 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t len = nvme_l2b(ns, nlb); + size_t data_len = len; int64_t offset = nvme_l2b(ns, slba); uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint32_t reftag = le32_to_cpu(rw->reftag); @@ -2659,7 +2686,11 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) } } - if (len > n->page_size << n->params.vsl) { + if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) { + data_len += nvme_m2b(ns, nlb); + } + + if (data_len > (n->page_size << n->params.vsl)) { return NVME_INVALID_FIELD | NVME_DNR; } @@ -2695,6 +2726,7 @@ typedef struct NvmeCopyAIOCB { BlockAIOCB common; BlockAIOCB *aiocb; NvmeRequest *req; + NvmeCtrl *n; int ret; void *ranges; @@ -2713,6 +2745,8 @@ typedef struct NvmeCopyAIOCB { uint64_t slba; NvmeZone *zone; + NvmeNamespace *sns; + uint32_t tcl; } NvmeCopyAIOCB; static void nvme_copy_cancel(BlockAIOCB *aiocb) @@ -2759,13 +2793,19 @@ static void nvme_copy_done(NvmeCopyAIOCB *iocb) static void nvme_do_copy(NvmeCopyAIOCB *iocb); -static void nvme_copy_source_range_parse_format0(void *ranges, int idx, - uint64_t *slba, uint32_t *nlb, - uint16_t *apptag, - uint16_t *appmask, - uint64_t *reftag) +static void nvme_copy_source_range_parse_format0_2(void *ranges, + int idx, uint64_t *slba, + uint32_t *nlb, + uint32_t *snsid, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) { - NvmeCopySourceRangeFormat0 *_ranges = ranges; + NvmeCopySourceRangeFormat0_2 *_ranges = ranges; + + if (snsid) { + *snsid = le32_to_cpu(_ranges[idx].sparams); + } if (slba) { *slba = le64_to_cpu(_ranges[idx].slba); @@ -2788,13 +2828,19 @@ static void nvme_copy_source_range_parse_format0(void *ranges, int idx, } } -static void nvme_copy_source_range_parse_format1(void *ranges, int idx, - uint64_t *slba, uint32_t *nlb, - uint16_t *apptag, - uint16_t *appmask, - uint64_t *reftag) +static void nvme_copy_source_range_parse_format1_3(void *ranges, int idx, + uint64_t *slba, + uint32_t *nlb, + uint32_t *snsid, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) { - NvmeCopySourceRangeFormat1 *_ranges = ranges; + NvmeCopySourceRangeFormat1_3 *_ranges = ranges; + + if (snsid) { + *snsid = le32_to_cpu(_ranges[idx].sparams); + } if (slba) { *slba = le64_to_cpu(_ranges[idx].slba); @@ -2826,18 +2872,20 @@ static void nvme_copy_source_range_parse_format1(void *ranges, int idx, static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format, uint64_t *slba, uint32_t *nlb, - uint16_t *apptag, uint16_t *appmask, - uint64_t *reftag) + uint32_t *snsid, uint16_t *apptag, + uint16_t *appmask, uint64_t *reftag) { switch (format) { case NVME_COPY_FORMAT_0: - nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag, - appmask, reftag); + case NVME_COPY_FORMAT_2: + nvme_copy_source_range_parse_format0_2(ranges, idx, slba, nlb, snsid, + apptag, appmask, reftag); break; case NVME_COPY_FORMAT_1: - nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag, - appmask, reftag); + case NVME_COPY_FORMAT_3: + nvme_copy_source_range_parse_format1_3(ranges, idx, slba, nlb, snsid, + apptag, appmask, reftag); break; default: @@ -2853,10 +2901,10 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns, for (int idx = 0; idx < nr; idx++) { uint32_t nlb; nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL, - &nlb, NULL, NULL, NULL); + &nlb, NULL, NULL, NULL, NULL); copy_len += nlb; } - + iocb->tcl = copy_len; if (copy_len > ns->id_ns.mcl) { return NVME_CMD_SIZE_LIMIT | NVME_DNR; } @@ -2868,21 +2916,22 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; - NvmeNamespace *ns = req->ns; + NvmeNamespace *dns = req->ns; uint32_t nlb; nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, - &nlb, NULL, NULL, NULL); + &nlb, NULL, NULL, NULL, NULL); if (ret < 0) { iocb->ret = ret; + req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; } - if (ns->params.zoned) { - nvme_advance_zone_wp(ns, iocb->zone, nlb); + if (dns->params.zoned) { + nvme_advance_zone_wp(dns, iocb->zone, nlb); } iocb->idx++; @@ -2895,25 +2944,25 @@ static void nvme_copy_out_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; - NvmeNamespace *ns = req->ns; + NvmeNamespace *dns = req->ns; uint32_t nlb; size_t mlen; uint8_t *mbounce; - if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { + if (ret < 0 || iocb->ret < 0 || !dns->lbaf.ms) { goto out; } nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, - &nlb, NULL, NULL, NULL); + &nlb, NULL, NULL, NULL, NULL); - mlen = nvme_m2b(ns, nlb); - mbounce = iocb->bounce + nvme_l2b(ns, nlb); + mlen = nvme_m2b(dns, nlb); + mbounce = iocb->bounce + nvme_l2b(dns, nlb); qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, mbounce, mlen); - iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba), + iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_moff(dns, iocb->slba), &iocb->iov, 0, nvme_copy_out_completed_cb, iocb); @@ -2927,59 +2976,71 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; - NvmeNamespace *ns = req->ns; + NvmeNamespace *sns = iocb->sns; + NvmeNamespace *dns = req->ns; + NvmeCopyCmd *copy = NULL; + uint8_t *mbounce = NULL; uint32_t nlb; uint64_t slba; uint16_t apptag, appmask; uint64_t reftag; - size_t len; + size_t len, mlen; uint16_t status; if (ret < 0) { iocb->ret = ret; + req->status = NVME_UNRECOVERED_READ; goto out; } else if (iocb->ret < 0) { goto out; } nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, - &nlb, &apptag, &appmask, &reftag); - len = nvme_l2b(ns, nlb); + &nlb, NULL, &apptag, &appmask, &reftag); trace_pci_nvme_copy_out(iocb->slba, nlb); - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { - NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + len = nvme_l2b(sns, nlb); + + if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps)) { + copy = (NvmeCopyCmd *)&req->cmd; uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); - uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); - size_t mlen = nvme_m2b(ns, nlb); - uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); + mlen = nvme_m2b(sns, nlb); + mbounce = iocb->bounce + nvme_l2b(sns, nlb); - status = nvme_dif_mangle_mdata(ns, mbounce, mlen, slba); + status = nvme_dif_mangle_mdata(sns, mbounce, mlen, slba); if (status) { goto invalid; } - status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor, + status = nvme_dif_check(sns, iocb->bounce, len, mbounce, mlen, prinfor, slba, apptag, appmask, &reftag); if (status) { goto invalid; } + } + + if (NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) { + copy = (NvmeCopyCmd *)&req->cmd; + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + + mlen = nvme_m2b(dns, nlb); + mbounce = iocb->bounce + nvme_l2b(dns, nlb); apptag = le16_to_cpu(copy->apptag); appmask = le16_to_cpu(copy->appmask); if (prinfow & NVME_PRINFO_PRACT) { - status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag); + status = nvme_check_prinfo(dns, prinfow, iocb->slba, iocb->reftag); if (status) { goto invalid; } - nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen, + nvme_dif_pract_generate_dif(dns, iocb->bounce, len, mbounce, mlen, apptag, &iocb->reftag); } else { - status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, + status = nvme_dif_check(dns, iocb->bounce, len, mbounce, mlen, prinfow, iocb->slba, apptag, appmask, &iocb->reftag); if (status) { @@ -2988,13 +3049,13 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) } } - status = nvme_check_bounds(ns, iocb->slba, nlb); + status = nvme_check_bounds(dns, iocb->slba, nlb); if (status) { goto invalid; } - if (ns->params.zoned) { - status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb); + if (dns->params.zoned) { + status = nvme_check_zone_write(dns, iocb->zone, iocb->slba, nlb); if (status) { goto invalid; } @@ -3007,7 +3068,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, iocb->bounce, len); - iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba), + block_acct_start(blk_get_stats(dns->blkconf.blk), &iocb->acct.write, 0, + BLOCK_ACCT_WRITE); + + iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_l2b(dns, iocb->slba), &iocb->iov, 0, nvme_copy_out_cb, iocb); return; @@ -3022,23 +3086,22 @@ out: static void nvme_copy_in_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; - NvmeRequest *req = iocb->req; - NvmeNamespace *ns = req->ns; + NvmeNamespace *sns = iocb->sns; uint64_t slba; uint32_t nlb; - if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) { + if (ret < 0 || iocb->ret < 0 || !sns->lbaf.ms) { goto out; } nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, - &nlb, NULL, NULL, NULL); + &nlb, NULL, NULL, NULL, NULL); qemu_iovec_reset(&iocb->iov); - qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb), - nvme_m2b(ns, nlb)); + qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(sns, nlb), + nvme_m2b(sns, nlb)); - iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba), + iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_moff(sns, slba), &iocb->iov, 0, nvme_copy_in_completed_cb, iocb); return; @@ -3047,14 +3110,78 @@ out: nvme_copy_in_completed_cb(iocb, ret); } +static inline bool nvme_csi_supports_copy(uint8_t csi) +{ + return csi == NVME_CSI_NVM || csi == NVME_CSI_ZONED; +} + +static inline bool nvme_copy_ns_format_match(NvmeNamespace *sns, + NvmeNamespace *dns) +{ + return sns->lbaf.ds == dns->lbaf.ds && sns->lbaf.ms == dns->lbaf.ms; +} + +static bool nvme_copy_matching_ns_format(NvmeNamespace *sns, NvmeNamespace *dns, + bool pi_enable) +{ + if (!nvme_csi_supports_copy(sns->csi) || + !nvme_csi_supports_copy(dns->csi)) { + return false; + } + + if (!pi_enable && !nvme_copy_ns_format_match(sns, dns)) { + return false; + } + + if (pi_enable && (!nvme_copy_ns_format_match(sns, dns) || + sns->id_ns.dps != dns->id_ns.dps)) { + return false; + } + + return true; +} + +static inline bool nvme_copy_corresp_pi_match(NvmeNamespace *sns, + NvmeNamespace *dns) +{ + return sns->lbaf.ms == 0 && + ((dns->lbaf.ms == 8 && dns->pif == 0) || + (dns->lbaf.ms == 16 && dns->pif == 1)); +} + +static bool nvme_copy_corresp_pi_format(NvmeNamespace *sns, NvmeNamespace *dns, + bool sns_pi_en) +{ + if (!nvme_csi_supports_copy(sns->csi) || + !nvme_csi_supports_copy(dns->csi)) { + return false; + } + + if (!sns_pi_en && !nvme_copy_corresp_pi_match(sns, dns)) { + return false; + } + + if (sns_pi_en && !nvme_copy_corresp_pi_match(dns, sns)) { + return false; + } + + return true; +} + static void nvme_do_copy(NvmeCopyAIOCB *iocb) { NvmeRequest *req = iocb->req; - NvmeNamespace *ns = req->ns; + NvmeNamespace *sns; + NvmeNamespace *dns = req->ns; + NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; + uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); + uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); uint64_t slba; uint32_t nlb; size_t len; uint16_t status; + uint32_t dnsid = le32_to_cpu(req->cmd.nsid); + uint32_t snsid = dnsid; if (iocb->ret < 0) { goto done; @@ -3064,40 +3191,124 @@ static void nvme_do_copy(NvmeCopyAIOCB *iocb) goto done; } - nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, - &nlb, NULL, NULL, NULL); - len = nvme_l2b(ns, nlb); + if (iocb->format == 2 || iocb->format == 3) { + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, + &slba, &nlb, &snsid, NULL, NULL, NULL); + if (snsid != dnsid) { + if (snsid == NVME_NSID_BROADCAST || + !nvme_nsid_valid(iocb->n, snsid)) { + status = NVME_INVALID_NSID | NVME_DNR; + goto invalid; + } + iocb->sns = nvme_ns(iocb->n, snsid); + if (unlikely(!iocb->sns)) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } + } else { + if (((slba + nlb) > iocb->slba) && + ((slba + nlb) < (iocb->slba + iocb->tcl))) { + status = NVME_CMD_OVERLAP_IO_RANGE | NVME_DNR; + goto invalid; + } + } + } else { + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, + &slba, &nlb, NULL, NULL, NULL, NULL); + } + + sns = iocb->sns; + if ((snsid == dnsid) && NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) && + ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { + status = NVME_INVALID_FIELD | NVME_DNR; + goto invalid; + } else if (snsid != dnsid) { + if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) && + !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) { + if (!nvme_copy_matching_ns_format(sns, dns, false)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } + } + if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) && + NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) { + if ((prinfor & NVME_PRINFO_PRACT) != + (prinfow & NVME_PRINFO_PRACT)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } else { + if (!nvme_copy_matching_ns_format(sns, dns, true)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } + } + } + + if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) && + NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) { + if (!(prinfow & NVME_PRINFO_PRACT)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } else { + if (!nvme_copy_corresp_pi_format(sns, dns, false)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } + } + } + + if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) && + !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) { + if (!(prinfor & NVME_PRINFO_PRACT)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } else { + if (!nvme_copy_corresp_pi_format(sns, dns, true)) { + status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR; + goto invalid; + } + } + } + } + len = nvme_l2b(sns, nlb); trace_pci_nvme_copy_source_range(slba, nlb); - if (nlb > le16_to_cpu(ns->id_ns.mssrl)) { + if (nlb > le16_to_cpu(sns->id_ns.mssrl)) { status = NVME_CMD_SIZE_LIMIT | NVME_DNR; goto invalid; } - status = nvme_check_bounds(ns, slba, nlb); + status = nvme_check_bounds(sns, slba, nlb); if (status) { goto invalid; } - if (NVME_ERR_REC_DULBE(ns->features.err_rec)) { - status = nvme_check_dulbe(ns, slba, nlb); + if (NVME_ERR_REC_DULBE(sns->features.err_rec)) { + status = nvme_check_dulbe(sns, slba, nlb); if (status) { goto invalid; } } - if (ns->params.zoned) { - status = nvme_check_zone_read(ns, slba, nlb); + if (sns->params.zoned) { + status = nvme_check_zone_read(sns, slba, nlb); if (status) { goto invalid; } } + g_free(iocb->bounce); + iocb->bounce = g_malloc_n(le16_to_cpu(sns->id_ns.mssrl), + sns->lbasz + sns->lbaf.ms); + qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, iocb->bounce, len); - iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba), + block_acct_start(blk_get_stats(sns->blkconf.blk), &iocb->acct.read, 0, + BLOCK_ACCT_READ); + + iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_l2b(sns, slba), &iocb->iov, 0, nvme_copy_in_cb, iocb); return; @@ -3116,9 +3327,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) nvme_misc_cb, req); uint16_t nr = copy->nr + 1; uint8_t format = copy->control[0] & 0xf; - uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); - uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); - size_t len = sizeof(NvmeCopySourceRangeFormat0); + size_t len = sizeof(NvmeCopySourceRangeFormat0_2); uint16_t status; @@ -3127,13 +3336,9 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) iocb->ranges = NULL; iocb->zone = NULL; - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) { - status = NVME_INVALID_FIELD | NVME_DNR; - goto invalid; - } - - if (!(n->id_ctrl.ocfs & (1 << format))) { + if (!(n->id_ctrl.ocfs & (1 << format)) || + ((format == 2 || format == 3) && + !(n->features.hbs.cdfe & (1 << format)))) { trace_pci_nvme_err_copy_invalid_format(format); status = NVME_INVALID_FIELD | NVME_DNR; goto invalid; @@ -3144,14 +3349,14 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) goto invalid; } - if ((ns->pif == 0x0 && format != 0x0) || - (ns->pif != 0x0 && format != 0x1)) { + if ((ns->pif == 0x0 && (format != 0x0 && format != 0x2)) || + (ns->pif != 0x0 && (format != 0x1 && format != 0x3))) { status = NVME_INVALID_FORMAT | NVME_DNR; goto invalid; } if (ns->pif) { - len = sizeof(NvmeCopySourceRangeFormat1); + len = sizeof(NvmeCopySourceRangeFormat1_3); } iocb->format = format; @@ -3187,17 +3392,13 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) iocb->idx = 0; iocb->reftag = le32_to_cpu(copy->reftag); iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32; - iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), - ns->lbasz + ns->lbaf.ms); qemu_iovec_init(&iocb->iov, 1); - block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0, - BLOCK_ACCT_READ); - block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0, - BLOCK_ACCT_WRITE); - req->aiocb = &iocb->common; + iocb->sns = req->ns; + iocb->n = n; + iocb->bounce = NULL; nvme_do_copy(iocb); return NVME_NO_COMPLETE; @@ -3232,7 +3433,11 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) len += nvme_m2b(ns, nlb); } - status = nvme_check_mdts(n, len); + if (NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt)) { + status = nvme_check_mdts(n, data_len); + } else { + status = nvme_check_mdts(n, len); + } if (status) { return status; } @@ -3307,6 +3512,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret) if (ret < 0) { iocb->ret = ret; + iocb->req->status = NVME_WRITE_FAULT; goto out; } else if (iocb->ret < 0) { goto out; @@ -3409,7 +3615,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) BlockBackend *blk = ns->blkconf.blk; uint16_t status; - if (nvme_ns_ext(ns)) { + if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) { mapped_size += nvme_m2b(ns, nlb); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { @@ -3521,7 +3727,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, BlockBackend *blk = ns->blkconf.blk; uint16_t status; - if (nvme_ns_ext(ns)) { + if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) { mapped_size += nvme_m2b(ns, nlb); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { @@ -4167,7 +4373,7 @@ static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl) static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) { - NvmeCmd *cmd = (NvmeCmd *)&req->cmd; + NvmeCmd *cmd = &req->cmd; NvmeNamespace *ns = req->ns; /* cdw12 is zero-based number of dwords to return. Convert to bytes */ uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2; @@ -4300,7 +4506,7 @@ static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req, nruhsd = ns->fdp.nphs * endgrp->fdp.nrg; trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr); - buf = g_malloc(trans_len); + buf = g_malloc0(trans_len); trans_len = MIN(trans_len, len); @@ -4398,6 +4604,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req) }; } +static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + switch (req->cmd.opcode) { + case NVME_CMD_WRITE: + return nvme_write(n, req); + case NVME_CMD_READ: + return nvme_read(n, req); + case NVME_CMD_COMPARE: + return nvme_compare(n, req); + case NVME_CMD_WRITE_ZEROES: + return nvme_write_zeroes(n, req); + case NVME_CMD_DSM: + return nvme_dsm(n, req); + case NVME_CMD_VERIFY: + return nvme_verify(n, req); + case NVME_CMD_COPY: + return nvme_copy(n, req); + case NVME_CMD_IO_MGMT_RECV: + return nvme_io_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_SEND: + return nvme_io_mgmt_send(n, req); + } + + g_assert_not_reached(); +} + +static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + return __nvme_io_cmd_nvm(n, req); +} + +static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req) +{ + if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + trace_pci_nvme_err_invalid_opc(req->cmd.opcode); + return NVME_INVALID_OPCODE | NVME_DNR; + } + + switch (req->cmd.opcode) { + case NVME_CMD_ZONE_APPEND: + return nvme_zone_append(n, req); + case NVME_CMD_ZONE_MGMT_SEND: + return nvme_zone_mgmt_send(n, req); + case NVME_CMD_ZONE_MGMT_RECV: + return nvme_zone_mgmt_recv(n, req); + } + + return __nvme_io_cmd_nvm(n, req); +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4406,10 +4667,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); - if (!nvme_nsid_valid(n, nsid)) { - return NVME_INVALID_NSID | NVME_DNR; - } - /* * In the base NVM command set, Flush may apply to all namespaces * (indicated by NSID being set to FFFFFFFFh). But if that feature is used @@ -4429,20 +4686,20 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) * device only supports namespace types that includes the NVM Flush command * (NVM and Zoned), so always do an NVM Flush. */ + if (req->cmd.opcode == NVME_CMD_FLUSH) { return nvme_flush(n, req); } + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_NSID | NVME_DNR; + } + ns = nvme_ns(n, nsid); if (unlikely(!ns)) { return NVME_INVALID_FIELD | NVME_DNR; } - if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { - trace_pci_nvme_err_invalid_opc(req->cmd.opcode); - return NVME_INVALID_OPCODE | NVME_DNR; - } - if (ns->status) { return ns->status; } @@ -4453,36 +4710,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) req->ns = ns; - switch (req->cmd.opcode) { - case NVME_CMD_WRITE_ZEROES: - return nvme_write_zeroes(n, req); - case NVME_CMD_ZONE_APPEND: - return nvme_zone_append(n, req); - case NVME_CMD_WRITE: - return nvme_write(n, req); - case NVME_CMD_READ: - return nvme_read(n, req); - case NVME_CMD_COMPARE: - return nvme_compare(n, req); - case NVME_CMD_DSM: - return nvme_dsm(n, req); - case NVME_CMD_VERIFY: - return nvme_verify(n, req); - case NVME_CMD_COPY: - return nvme_copy(n, req); - case NVME_CMD_ZONE_MGMT_SEND: - return nvme_zone_mgmt_send(n, req); - case NVME_CMD_ZONE_MGMT_RECV: - return nvme_zone_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_RECV: - return nvme_io_mgmt_recv(n, req); - case NVME_CMD_IO_MGMT_SEND: - return nvme_io_mgmt_send(n, req); - default: - assert(false); + switch (ns->csi) { + case NVME_CSI_NVM: + return nvme_io_cmd_nvm(n, req); + case NVME_CSI_ZONED: + return nvme_io_cmd_zoned(n, req); } - return NVME_INVALID_OPCODE | NVME_DNR; + g_assert_not_reached(); } static void nvme_cq_notifier(EventNotifier *e) @@ -4591,6 +4826,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) while (!QTAILQ_EMPTY(&sq->out_req_list)) { r = QTAILQ_FIRST(&sq->out_req_list); assert(r->aiocb); + r->status = NVME_CMD_ABORT_SQ_DEL; blk_aio_cancel(r->aiocb); } @@ -4709,6 +4945,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeNamespace *ns = NULL; + NvmeSmartLogExtended smart_l = { 0 }; + struct nvme_stats stats = { 0 }; + uint32_t trans_len; + + if (off >= sizeof(smart_l)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + /* accumulate all stats from all namespaces */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + nvme_set_blk_stats(ns, &stats); + } + } + + smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written); + smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read); + smart_l.log_page_version = 0x0005; + + static const uint8_t guid[16] = { + 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4, + 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF + }; + memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid)); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + trans_len = MIN(sizeof(smart_l) - off, buf_len); + return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req); +} + static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -4854,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint32_t nslist[1024]; + uint32_t nslist[1024] = {}; uint32_t trans_len; int i = 0; uint32_t nsid; @@ -4864,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len); while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != @@ -4902,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, uint64_t off, NvmeRequest *req) { NvmeEffectsLog log = {}; - const uint32_t *src_iocs = NULL; + const uint32_t *iocs = NULL; uint32_t trans_len; if (off >= sizeof(log)) { @@ -4912,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) { case NVME_CC_CSS_NVM: - src_iocs = nvme_cse_iocs_nvm; - /* fall through */ - case NVME_CC_CSS_ADMIN_ONLY: + iocs = n->cse.iocs.nvm; break; - case NVME_CC_CSS_CSI: + + case NVME_CC_CSS_ALL: switch (csi) { case NVME_CSI_NVM: - src_iocs = nvme_cse_iocs_nvm; + iocs = n->cse.iocs.nvm; break; case NVME_CSI_ZONED: - src_iocs = nvme_cse_iocs_zoned; + iocs = n->cse.iocs.zoned; break; } + + break; } - memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs)); + memcpy(log.acs, n->cse.acs, sizeof(log.acs)); - if (src_iocs) { - memcpy(log.iocs, src_iocs, sizeof(log.iocs)); + if (iocs) { + memcpy(log.iocs, iocs, sizeof(log.iocs)); } trans_len = MIN(sizeof(log) - off, buf_len); @@ -4938,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req, uint8_t lid) +{ + switch (lid) { + case NVME_OCP_EXTENDED_SMART_INFO: + if (n->params.ocp) { + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); + } + break; + /* add a case for each additional vendor specific log id */ + } + + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); + return NVME_INVALID_FIELD | NVME_DNR; +} + static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss) { size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr) @@ -5188,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_smart_info(n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, len, off, req); + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: + return nvme_vendor_specific_log(n, rae, len, off, req, lid); case NVME_LOG_CHANGED_NSLIST: return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: @@ -5221,7 +5515,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) event_notifier_set_handler(&cq->notifier, NULL); event_notifier_cleanup(&cq->notifier); } - if (msix_enabled(pci)) { + if (msix_enabled(pci) && cq->irq_enabled) { msix_vector_unuse(pci, cq->vector); } if (cq->cqid) { @@ -5262,9 +5556,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, { PCIDevice *pci = PCI_DEVICE(n); - if (msix_enabled(pci)) { + if (msix_enabled(pci) && irq_enabled) { msix_vector_use(pci, vector); } + cq->ctrl = n; cq->cqid = cqid; cq->size = size; @@ -5374,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req) switch (c->csi) { case NVME_CSI_NVM: id_nvm->vsl = n->params.vsl; + id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX; id_nvm->dmrsl = cpu_to_le32(n->dmrsl); + id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl; break; case NVME_CSI_ZONED: @@ -5416,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req); } - return NVME_INVALID_CMD_SET | NVME_DNR; + return NVME_INVALID_IOCS | NVME_DNR; } static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, @@ -5497,6 +5794,33 @@ static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req); } +static uint16_t nvme_identify_ns_ind(NvmeCtrl *n, NvmeRequest *req, bool alloc) +{ + NvmeNamespace *ns; + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint32_t nsid = le32_to_cpu(c->nsid); + + trace_pci_nvme_identify_ns_ind(nsid); + + if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + if (alloc) { + ns = nvme_subsys_ns(n->subsys, nsid); + if (!ns) { + return nvme_rpt_empty_id_struct(n, req); + } + } else { + return nvme_rpt_empty_id_struct(n, req); + } + } + + return nvme_c2h(n, (uint8_t *)&ns->id_ns_ind, sizeof(NvmeIdNsInd), req); +} + static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, bool active) { @@ -5751,6 +6075,10 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) return nvme_identify_sec_ctrl_list(n, req); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); + case NVME_ID_CNS_CS_IND_NS: + return nvme_identify_ns_ind(n, req, false); + case NVME_ID_CNS_CS_IND_NS_ALLOCATED: + return nvme_identify_ns_ind(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: return nvme_identify_ns_csi(n, req, false); case NVME_ID_CNS_CTRL: @@ -5780,12 +6108,41 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req) { uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff; + uint16_t cid = (le32_to_cpu(req->cmd.cdw10) >> 16) & 0xffff; + NvmeSQueue *sq = n->sq[sqid]; + NvmeRequest *r, *next; + int i; req->cqe.result = 1; if (nvme_check_sqid(n, sqid)) { return NVME_INVALID_FIELD | NVME_DNR; } + if (sqid == 0) { + for (i = 0; i < n->outstanding_aers; i++) { + NvmeRequest *re = n->aer_reqs[i]; + if (re->cqe.cid == cid) { + memmove(n->aer_reqs + i, n->aer_reqs + i + 1, + (n->outstanding_aers - i - 1) * sizeof(NvmeRequest *)); + n->outstanding_aers--; + re->status = NVME_CMD_ABORT_REQ; + req->cqe.result = 0; + nvme_enqueue_req_completion(&n->admin_cq, re); + return NVME_SUCCESS; + } + } + } + + QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) { + if (r->cqe.cid == cid) { + if (r->aiocb) { + r->status = NVME_CMD_ABORT_REQ; + blk_aio_cancel_async(r->aiocb); + } + break; + } + } + return NVME_SUCCESS; } @@ -6090,8 +6447,10 @@ defaults: if (ret) { return ret; } - goto out; + break; + case NVME_WRITE_ATOMICITY: + result = n->dn; break; default: result = nvme_feature_default[fid]; @@ -6175,6 +6534,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) uint8_t save = NVME_SETFEAT_SAVE(dw10); uint16_t status; int i; + NvmeIdCtrl *id = &n->id_ctrl; + NvmeAtomic *atomic = &n->atomic; trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11); @@ -6319,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) case NVME_COMMAND_SET_PROFILE: if (dw11 & 0x1ff) { trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); - return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; + return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR; } break; case NVME_FDP_MODE: @@ -6327,6 +6688,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) return NVME_CMD_SEQ_ERROR | NVME_DNR; case NVME_FDP_EVENTS: return nvme_set_feature_fdp_events(n, ns, req); + case NVME_WRITE_ATOMICITY: + + n->dn = 0x1 & dw11; + + if (n->dn) { + atomic->atomic_max_write_size = le16_to_cpu(id->awupf) + 1; + } else { + atomic->atomic_max_write_size = le16_to_cpu(id->awun) + 1; + } + + if (atomic->atomic_max_write_size == 1) { + atomic->atomic_writes = 0; + } else { + atomic->atomic_writes = 1; + } + break; default: return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; } @@ -6352,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } -static void nvme_update_dmrsl(NvmeCtrl *n) +static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns) { - int nsid; + if (ns) { + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); - for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { - NvmeNamespace *ns = nvme_ns(n, nsid); + return; + } + + for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { + ns = nvme_ns(n, nsid); if (!ns) { continue; } - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); + n->dmrsl = + MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } } -static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi) { - uint32_t cc = ldl_le_p(&n->bar.cc); + uint32_t cc; - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { + switch (csi) { case NVME_CSI_NVM: - if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + return true; + case NVME_CSI_ZONED: - if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; + cc = ldl_le_p(&n->bar.cc); + + return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL; } + + g_assert_not_reached(); +} + +static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + assert(ns->attached > 0); + + n->namespaces[ns->params.nsid] = NULL; + ns->attached--; } static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) @@ -6430,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) switch (sel) { case NVME_NS_ATTACHMENT_ATTACH: - if (nvme_ns(ctrl, nsid)) { + if (nvme_ns(n, nsid)) { return NVME_NS_ALREADY_ATTACHED | NVME_DNR; } @@ -6438,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) return NVME_NS_PRIVATE | NVME_DNR; } + if (!nvme_csi_supported(n, ns->csi)) { + return NVME_IOCS_NOT_SUPPORTED | NVME_DNR; + } + nvme_attach_ns(ctrl, ns); - nvme_select_iocs_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, ns); break; case NVME_NS_ATTACHMENT_DETACH: - if (!nvme_ns(ctrl, nsid)) { - return NVME_NS_NOT_ATTACHED | NVME_DNR; - } - - ctrl->namespaces[nsid] = NULL; - ns->attached--; - - nvme_update_dmrsl(ctrl); + nvme_detach_ns(ctrl, ns); + nvme_update_dsm_limits(ctrl, NULL); break; @@ -6954,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, nvme_adm_opc_str(req->cmd.opcode)); - if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } @@ -7002,7 +7386,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) case NVME_ADM_CMD_DIRECTIVE_RECV: return nvme_directive_receive(n, req); default: - assert(false); + g_assert_not_reached(); } return NVME_INVALID_OPCODE | NVME_DNR; @@ -7024,6 +7408,81 @@ static void nvme_update_sq_tail(NvmeSQueue *sq) trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail); } +#define NVME_ATOMIC_NO_START 0 +#define NVME_ATOMIC_START_ATOMIC 1 +#define NVME_ATOMIC_START_NONATOMIC 2 + +static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd, + NvmeAtomic *atomic) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb); + uint64_t elba = slba + nlb; + bool cmd_atomic_wr = true; + int i; + + if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) && + ((rw->nlb + 1) > atomic->atomic_max_write_size))) { + cmd_atomic_wr = false; + } + + /* + * Walk the queues to see if there are any atomic conflicts. + */ + for (i = 1; i < n->params.max_ioqpairs + 1; i++) { + NvmeSQueue *sq; + NvmeRequest *req; + NvmeRwCmd *req_rw; + uint64_t req_slba; + uint32_t req_nlb; + uint64_t req_elba; + + sq = n->sq[i]; + if (!sq) { + continue; + } + + /* + * Walk all the requests on a given queue. + */ + QTAILQ_FOREACH(req, &sq->out_req_list, entry) { + req_rw = (NvmeRwCmd *)&req->cmd; + + if (((req_rw->opcode == NVME_CMD_WRITE) || + (req_rw->opcode == NVME_CMD_READ)) && + (cmd->nsid == req->ns->params.nsid)) { + req_slba = le64_to_cpu(req_rw->slba); + req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb); + req_elba = req_slba + req_nlb; + + if (cmd_atomic_wr) { + if ((elba >= req_slba) && (slba <= req_elba)) { + return NVME_ATOMIC_NO_START; + } + } else { + if (req->atomic_write && ((elba >= req_slba) && + (slba <= req_elba))) { + return NVME_ATOMIC_NO_START; + } + } + } + } + } + if (cmd_atomic_wr) { + return NVME_ATOMIC_START_ATOMIC; + } + return NVME_ATOMIC_START_NONATOMIC; +} + +static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd) +{ + if (n->atomic.atomic_writes) { + return &n->atomic; + } + return NULL; +} + static void nvme_process_sq(void *opaque) { NvmeSQueue *sq = opaque; @@ -7040,6 +7499,9 @@ static void nvme_process_sq(void *opaque) } while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { + NvmeAtomic *atomic; + bool cmd_is_atomic; + addr = sq->dma_addr + (sq->head << NVME_SQES); if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { trace_pci_nvme_err_addr_read(addr); @@ -7047,6 +7509,26 @@ static void nvme_process_sq(void *opaque) stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); break; } + + atomic = nvme_get_atomic(n, &cmd); + + cmd_is_atomic = false; + if (sq->sqid && atomic) { + int ret; + + ret = nvme_atomic_write_check(n, &cmd, atomic); + switch (ret) { + case NVME_ATOMIC_NO_START: + qemu_bh_schedule(sq->bh); + return; + case NVME_ATOMIC_START_ATOMIC: + cmd_is_atomic = true; + break; + case NVME_ATOMIC_START_NONATOMIC: + default: + break; + } + } nvme_inc_sq_head(sq); req = QTAILQ_FIRST(&sq->req_list); @@ -7056,6 +7538,10 @@ static void nvme_process_sq(void *opaque) req->cqe.cid = cmd.cid; memcpy(&req->cmd, &cmd, sizeof(NvmeCmd)); + if (sq->sqid && atomic) { + req->atomic_write = cmd_is_atomic; + } + status = sq->sqid ? nvme_io_cmd(n, req) : nvme_admin_cmd(n, req); if (status != NVME_NO_COMPLETE) { @@ -7159,6 +7645,8 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst) n->outstanding_aers = 0; n->qs_created = false; + n->dn = n->params.atomic_dn; /* Set Disable Normal */ + nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); if (pci_is_vf(pci_dev)) { @@ -7197,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) } } -static void nvme_select_iocs(NvmeCtrl *n) -{ - NvmeNamespace *ns; - int i; - - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (!ns) { - continue; - } - - nvme_select_iocs_ns(n, ns); - } -} - static int nvme_start_ctrl(NvmeCtrl *n) { uint64_t cap = ldq_le_p(&n->bar.cap); @@ -7278,7 +7751,22 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); - nvme_select_iocs(n); + /* verify that the command sets of attached namespaces are supported */ + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); + + if (!ns || (!ns->params.shared && ns->ctrl != n)) { + continue; + } + + if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) { + if (!ns->attached || ns->params.shared) { + nvme_attach_ns(n, ns); + } + } + } + + nvme_update_dsm_limits(n, NULL); return 0; } @@ -7603,7 +8091,6 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) /* Completion queue doorbell write */ uint16_t new_head = val & 0xffff; - int start_sqs; NvmeCQueue *cq; qid = (addr - (0x1000 + (1 << 2))) >> 3; @@ -7654,18 +8141,15 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head); - start_sqs = nvme_cq_full(cq) ? 1 : 0; + /* scheduled deferred cqe posting if queue was previously full */ + if (nvme_cq_full(cq)) { + qemu_bh_schedule(cq->bh); + } + cq->head = new_head; if (!qid && n->dbbuf_enabled) { stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED); } - if (start_sqs) { - NvmeSQueue *sq; - QTAILQ_FOREACH(sq, &cq->sq_list, entry) { - qemu_bh_schedule(sq->bh); - } - qemu_bh_schedule(cq->bh); - } if (cq->tail == cq->head) { if (cq->irq_enabled) { @@ -7935,6 +8419,8 @@ static void nvme_init_state(NvmeCtrl *n) NvmeSecCtrlEntry *list = n->sec_ctrl_list; NvmeSecCtrlEntry *sctrl; PCIDevice *pci = PCI_DEVICE(n); + NvmeAtomic *atomic = &n->atomic; + NvmeIdCtrl *id = &n->id_ctrl; uint8_t max_vfs; int i; @@ -7992,6 +8478,29 @@ static void nvme_init_state(NvmeCtrl *n) cpu_to_le16(n->params.sriov_max_vi_per_vf) : cap->vifrt / MAX(max_vfs, 1); } + + /* Atomic Write */ + id->awun = cpu_to_le16(n->params.atomic_awun); + id->awupf = cpu_to_le16(n->params.atomic_awupf); + n->dn = n->params.atomic_dn; + + if (id->awun || id->awupf) { + if (id->awupf > id->awun) { + id->awupf = 0; + } + + if (n->dn) { + atomic->atomic_max_write_size = id->awupf + 1; + } else { + atomic->atomic_max_write_size = id->awun + 1; + } + + if (atomic->atomic_max_write_size == 1) { + atomic->atomic_writes = 0; + } else { + atomic->atomic_writes = 1; + } + } } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -8080,8 +8589,7 @@ static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, n->params.sriov_max_vfs, n->params.sriov_max_vfs, - NVME_VF_OFFSET, NVME_VF_STRIDE, - errp)) { + NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) { return false; } @@ -8096,8 +8604,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) Error *err = NULL; int ret; - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, - PCI_PM_SIZEOF, &err); + ret = pci_pm_init(pci_dev, offset, &err); if (err) { error_report_err(err); return ret; @@ -8113,6 +8620,27 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) return 0; } +static bool pcie_doe_spdm_rsp(DOECap *doe_cap) +{ + void *req = pcie_doe_get_write_mbox_ptr(doe_cap); + uint32_t req_len = pcie_doe_get_obj_len(req) * 4; + void *rsp = doe_cap->read_mbox; + uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE; + + uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE, + req, req_len, rsp, rsp_len); + doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4); + + return recvd != 0; +} + +static DOEProtocol doe_spdm_prot[] = { + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp }, + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp }, + { } +}; + static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { ERRP_GUARD(); @@ -8122,7 +8650,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) unsigned nr_vectors; int ret; - pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_conf[PCI_INTERRUPT_PIN] = pci_is_vf(pci_dev) ? 0 : 1; pci_config_set_prog_interface(pci_conf, 0x2); if (n->params.use_intel_id) { @@ -8194,12 +8722,30 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs && !nvme_init_sriov(n, pci_dev, 0x120, errp)) { - msix_uninit(pci_dev, &n->bar0, &n->bar0); return false; } nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); + pcie_cap_deverr_init(pci_dev); + + /* DOE Initialisation */ + if (pci_dev->spdm_port) { + uint16_t doe_offset = n->params.sriov_max_vfs ? + PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF + : PCI_CONFIG_SPACE_SIZE; + + pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset, + doe_spdm_prot, true, 0); + + pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port, + errp); + + if (pci_dev->doe_spdm.spdm_socket < 0) { + return false; + } + } + if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); } @@ -8231,6 +8777,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); uint32_t ctratt; + uint16_t oacs; + + memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs)); + memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm)); + memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default, + sizeof(n->cse.iocs.zoned)); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -8241,7 +8793,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); + ctratt = NVME_CTRATT_ELBAS; + if (n->params.ctratt.mem) { + ctratt |= NVME_CTRATT_MEM; + } id->rab = 6; @@ -8257,9 +8813,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); - id->oacs = - cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF | - NVME_OACS_DIRECTIVES); + + oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES; + + if (n->params.dbcs) { + oacs |= NVME_OACS_DBCS; + + n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP; + } + + if (n->params.sriov_max_vfs) { + oacs |= NVME_OACS_VMS; + + n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP; + } + + id->oacs = cpu_to_le16(oacs); + id->cntrltype = 0x1; /* @@ -8287,7 +8857,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->nn = cpu_to_le32(NVME_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | NVME_ONCS_FEATURES | NVME_ONCS_DSM | - NVME_ONCS_COMPARE | NVME_ONCS_COPY); + NVME_ONCS_COMPARE | NVME_ONCS_COPY | + NVME_ONCS_NVMCSA | NVME_ONCS_NVMAFC); /* * NOTE: If this device ever supports a command set that does NOT use 0x0 @@ -8298,8 +8869,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) */ id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; - id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1); - id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN); + id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1 | + NVME_OCFS_COPY_FORMAT_2 | NVME_OCFS_COPY_FORMAT_3); + id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | + NVME_CTRL_SGLS_MPTR_SGL); nvme_init_subnqn(n); @@ -8307,15 +8880,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); - if (n->subsys) { - id->cmic |= NVME_CMIC_MULTI_CTRL; - ctratt |= NVME_CTRATT_ENDGRPS; + id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; - id->endgidmax = cpu_to_le16(0x1); + id->endgidmax = cpu_to_le16(0x1); - if (n->subsys->endgrp.fdp.enabled) { - ctratt |= NVME_CTRATT_FDPS; - } + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; } id->ctratt = cpu_to_le32(ctratt); @@ -8323,9 +8894,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NVME_CAP_SET_MQES(cap, n->params.mqes); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP); - NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS); + NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS); NVME_CAP_SET_MPSMAX(cap, 4); NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0); NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0); @@ -8344,7 +8914,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp) int cntlid; if (!n->subsys) { - return 0; + DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS); + + qdev_prop_set_string(dev, "nqn", n->params.serial); + + if (!qdev_realize(dev, NULL, errp)) { + return -1; + } + + n->subsys = NVME_SUBSYS(dev); } cntlid = nvme_subsys_register_ctrl(n, errp); @@ -8364,9 +8942,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) n->namespaces[nsid] = ns; ns->attached++; - - n->dmrsl = MIN_NON_ZERO(n->dmrsl, - BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); } static void nvme_realize(PCIDevice *pci_dev, Error **errp) @@ -8389,6 +8964,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) */ n->params.serial = g_strdup(pn->params.serial); n->subsys = pn->subsys; + + /* + * Assigning this link (strong link) causes an `object_unref` later in + * `object_release_link_property`. Increment the refcount to balance + * this out. + */ + object_ref(OBJECT(pn->subsys)); } if (!nvme_check_params(n, errp)) { @@ -8410,12 +8992,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (n->namespace.blkconf.blk) { ns = &n->namespace; ns->params.nsid = 1; + ns->ctrl = n; if (nvme_ns_setup(ns, errp)) { return; } - nvme_attach_ns(n, ns); + n->subsys->namespaces[ns->params.nsid] = ns; } } @@ -8427,17 +9010,15 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n, NVME_RESET_FUNCTION); - if (n->subsys) { - for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { - ns = nvme_ns(n, i); - if (ns) { - ns->attached--; - } + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) { + ns->attached--; } - - nvme_subsys_unregister_ctrl(n->subsys, n); } + nvme_subsys_unregister_ctrl(n->subsys, n); + g_free(n->cq); g_free(n->sq); g_free(n->aer_reqs); @@ -8446,6 +9027,11 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cmb.buf); } + if (pci_dev->doe_spdm.spdm_socket > 0) { + spdm_socket_close(pci_dev->doe_spdm.spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE); + } + if (n->pmr.dev) { host_memory_backend_set_mapped(n->pmr.dev, false); } @@ -8454,11 +9040,16 @@ static void nvme_exit(PCIDevice *pci_dev) pcie_sriov_pf_exit(pci_dev); } - msix_uninit(pci_dev, &n->bar0, &n->bar0); + if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) { + msix_uninit_exclusive_bar(pci_dev); + } else { + msix_uninit(pci_dev, &n->bar0, &n->bar0); + } + memory_region_del_subregion(&n->bar0, &n->iomem); } -static Property nvme_props[] = { +static const Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf), DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND, HostMemoryBackend *), @@ -8476,6 +9067,7 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false), DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false), DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false), + DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true), DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), @@ -8491,7 +9083,12 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar, false), DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0), + DEFINE_PROP_BOOL("ctratt.mem", NvmeCtrl, params.ctratt.mem, false), + DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0), + DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0), + DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0), + DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false), }; static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, @@ -8562,23 +9159,38 @@ static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, { uint16_t old_num_vfs = pcie_sriov_num_vfs(dev); + if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + pcie_doe_write_config(&dev->doe_spdm, address, val, len); + } pci_default_write_config(dev, address, val, len); pcie_cap_flr_write_config(dev, address, val, len); nvme_sriov_post_write_config(dev, old_num_vfs); } +static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len) +{ + uint32_t val; + if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) { + return val; + } + } + return pci_default_read_config(dev, address, len); +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, }; -static void nvme_class_init(ObjectClass *oc, void *data) +static void nvme_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); pc->realize = nvme_realize; pc->config_write = nvme_pci_write_config; + pc->config_read = nvme_pci_read_config; pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAGE_EXPRESS; pc->revision = 2; @@ -8587,7 +9199,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) dc->desc = "Non-Volatile Memory Express"; device_class_set_props(dc, nvme_props); dc->vmsd = &nvme_vmstate; - dc->reset = nvme_pci_reset; + device_class_set_legacy_reset(dc, nvme_pci_reset); } static void nvme_instance_init(Object *obj) @@ -8609,7 +9221,7 @@ static const TypeInfo nvme_info = { .instance_size = sizeof(NvmeCtrl), .instance_init = nvme_instance_init, .class_init = nvme_class_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { } }, |