aboutsummaryrefslogtreecommitdiff
path: root/hw/nvme/ctrl.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/nvme/ctrl.c')
-rw-r--r--hw/nvme/ctrl.c1218
1 files changed, 915 insertions, 303 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 5b1b0ca..2200028 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -40,6 +40,9 @@
* sriov_vi_flexible=<N[optional]> \
* sriov_max_vi_per_vf=<N[optional]> \
* sriov_max_vq_per_vf=<N[optional]> \
+ * atomic.dn=<on|off[optional]>, \
+ * atomic.awun<N[optional]>, \
+ * atomic.awupf<N[optional]>, \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -198,11 +201,12 @@
#include "qemu/range.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/block-backend.h"
-#include "sysemu/hostmem.h"
+#include "system/system.h"
+#include "system/block-backend.h"
+#include "system/hostmem.h"
#include "hw/pci/msix.h"
#include "hw/pci/pcie_sriov.h"
+#include "system/spdm-socket.h"
#include "migration/vmstate.h"
#include "nvme.h"
@@ -253,6 +257,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
[NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
+ [NVME_WRITE_ATOMICITY] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
[NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
@@ -261,7 +266,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
};
-static const uint32_t nvme_cse_acs[256] = {
+static const uint32_t nvme_cse_acs_default[256] = {
[NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
@@ -272,17 +277,14 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
- [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
- [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
- [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
+ [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC |
+ NVME_CMD_EFF_CCC,
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP,
};
-static const uint32_t nvme_cse_iocs_none[256];
-
-static const uint32_t nvme_cse_iocs_nvm[256] = {
+static const uint32_t nvme_cse_iocs_nvm_default[256] = {
[NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -295,7 +297,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
};
-static const uint32_t nvme_cse_iocs_zoned[256] = {
+static const uint32_t nvme_cse_iocs_zoned_default[256] = {
[NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -304,6 +306,9 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
@@ -651,6 +656,12 @@ static void nvme_irq_check(NvmeCtrl *n)
if (msix_enabled(pci)) {
return;
}
+
+ /* vfs does not implement intx */
+ if (pci_is_vf(pci)) {
+ return;
+ }
+
if (~intms & n->irq_status) {
pci_irq_assert(pci);
} else {
@@ -1046,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
*/
#define SEG_CHUNK_SIZE 256
- NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+ QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE];
+ NvmeSglDescriptor *sgld, *last_sgld;
uint64_t nsgld;
uint32_t seg_len;
uint16_t status;
@@ -1515,9 +1527,16 @@ static void nvme_post_cqes(void *opaque)
stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
break;
}
+
QTAILQ_REMOVE(&cq->req_list, req, entry);
+
nvme_inc_cq_tail(cq);
nvme_sg_unmap(&req->sg);
+
+ if (QTAILQ_EMPTY(&sq->req_list) && !nvme_sq_empty(sq)) {
+ qemu_bh_schedule(sq->bh);
+ }
+
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
@@ -1648,9 +1667,16 @@ static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
{
+ NvmeAsyncEvent *event, *next;
+
n->aer_mask &= ~(1 << event_type);
- if (!QTAILQ_EMPTY(&n->aer_queue)) {
- nvme_process_aers(n);
+
+ QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+ if (event->result.event_type == event_type) {
+ QTAILQ_REMOVE(&n->aer_queue, event, entry);
+ n->aer_queued--;
+ g_free(event);
+ }
}
}
@@ -1737,43 +1763,6 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
return NVME_SUCCESS;
}
-static void nvme_aio_err(NvmeRequest *req, int ret)
-{
- uint16_t status = NVME_SUCCESS;
- Error *local_err = NULL;
-
- switch (req->cmd.opcode) {
- case NVME_CMD_READ:
- status = NVME_UNRECOVERED_READ;
- break;
- case NVME_CMD_FLUSH:
- case NVME_CMD_WRITE:
- case NVME_CMD_WRITE_ZEROES:
- case NVME_CMD_ZONE_APPEND:
- case NVME_CMD_COPY:
- status = NVME_WRITE_FAULT;
- break;
- default:
- status = NVME_INTERNAL_DEV_ERROR;
- break;
- }
-
- trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
-
- error_setg_errno(&local_err, -ret, "aio failed");
- error_report_err(local_err);
-
- /*
- * Set the command status code to the first encountered error but allow a
- * subsequent Internal Device Error to trump it.
- */
- if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
- return;
- }
-
- req->status = status;
-}
-
static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
{
return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
@@ -1811,7 +1800,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
trace_pci_nvme_err_zone_is_read_only(zslba);
return NVME_ZONE_READ_ONLY;
default:
- assert(false);
+ g_assert_not_reached();
}
return NVME_INTERNAL_DEV_ERROR;
@@ -1865,7 +1854,7 @@ static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
return NVME_ZONE_OFFLINE;
default:
- assert(false);
+ g_assert_not_reached();
}
return NVME_INTERNAL_DEV_ERROR;
@@ -2132,11 +2121,16 @@ static inline bool nvme_is_write(NvmeRequest *req)
static void nvme_misc_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
+ uint16_t cid = nvme_cid(req);
- trace_pci_nvme_misc_cb(nvme_cid(req));
+ trace_pci_nvme_misc_cb(cid);
if (ret) {
- nvme_aio_err(req, ret);
+ if (!req->status) {
+ req->status = NVME_INTERNAL_DEV_ERROR;
+ }
+
+ trace_pci_nvme_err_aio(cid, strerror(-ret), req->status);
}
nvme_enqueue_req_completion(nvme_cq(req), req);
@@ -2153,8 +2147,30 @@ void nvme_rw_complete_cb(void *opaque, int ret)
trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
if (ret) {
+ Error *err = NULL;
+
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_READ:
+ req->status = NVME_UNRECOVERED_READ;
+ break;
+
+ case NVME_CMD_WRITE:
+ case NVME_CMD_WRITE_ZEROES:
+ case NVME_CMD_ZONE_APPEND:
+ req->status = NVME_WRITE_FAULT;
+ break;
+
+ default:
+ req->status = NVME_INTERNAL_DEV_ERROR;
+ break;
+ }
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
+ error_setg_errno(&err, -ret, "aio failed");
+ error_report_err(err);
} else {
block_acct_done(stats, acct);
}
@@ -2239,7 +2255,10 @@ static void nvme_verify_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2338,7 +2357,10 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2420,7 +2442,10 @@ static void nvme_compare_data_cb(void *opaque, int ret)
if (ret) {
block_acct_failed(stats, acct);
- nvme_aio_err(req, ret);
+ req->status = NVME_UNRECOVERED_READ;
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
+
goto out;
}
@@ -2591,6 +2616,7 @@ next:
done:
iocb->aiocb = NULL;
iocb->common.cb(iocb->common.opaque, iocb->ret);
+ g_free(iocb->range);
qemu_aio_unref(iocb);
}
@@ -2640,6 +2666,7 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
size_t len = nvme_l2b(ns, nlb);
+ size_t data_len = len;
int64_t offset = nvme_l2b(ns, slba);
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint32_t reftag = le32_to_cpu(rw->reftag);
@@ -2659,7 +2686,11 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
}
}
- if (len > n->page_size << n->params.vsl) {
+ if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
+ data_len += nvme_m2b(ns, nlb);
+ }
+
+ if (data_len > (n->page_size << n->params.vsl)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -2695,6 +2726,7 @@ typedef struct NvmeCopyAIOCB {
BlockAIOCB common;
BlockAIOCB *aiocb;
NvmeRequest *req;
+ NvmeCtrl *n;
int ret;
void *ranges;
@@ -2713,6 +2745,8 @@ typedef struct NvmeCopyAIOCB {
uint64_t slba;
NvmeZone *zone;
+ NvmeNamespace *sns;
+ uint32_t tcl;
} NvmeCopyAIOCB;
static void nvme_copy_cancel(BlockAIOCB *aiocb)
@@ -2759,13 +2793,19 @@ static void nvme_copy_done(NvmeCopyAIOCB *iocb)
static void nvme_do_copy(NvmeCopyAIOCB *iocb);
-static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
- uint64_t *slba, uint32_t *nlb,
- uint16_t *apptag,
- uint16_t *appmask,
- uint64_t *reftag)
+static void nvme_copy_source_range_parse_format0_2(void *ranges,
+ int idx, uint64_t *slba,
+ uint32_t *nlb,
+ uint32_t *snsid,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
{
- NvmeCopySourceRangeFormat0 *_ranges = ranges;
+ NvmeCopySourceRangeFormat0_2 *_ranges = ranges;
+
+ if (snsid) {
+ *snsid = le32_to_cpu(_ranges[idx].sparams);
+ }
if (slba) {
*slba = le64_to_cpu(_ranges[idx].slba);
@@ -2788,13 +2828,19 @@ static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
}
}
-static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
- uint64_t *slba, uint32_t *nlb,
- uint16_t *apptag,
- uint16_t *appmask,
- uint64_t *reftag)
+static void nvme_copy_source_range_parse_format1_3(void *ranges, int idx,
+ uint64_t *slba,
+ uint32_t *nlb,
+ uint32_t *snsid,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
{
- NvmeCopySourceRangeFormat1 *_ranges = ranges;
+ NvmeCopySourceRangeFormat1_3 *_ranges = ranges;
+
+ if (snsid) {
+ *snsid = le32_to_cpu(_ranges[idx].sparams);
+ }
if (slba) {
*slba = le64_to_cpu(_ranges[idx].slba);
@@ -2826,18 +2872,20 @@ static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
uint64_t *slba, uint32_t *nlb,
- uint16_t *apptag, uint16_t *appmask,
- uint64_t *reftag)
+ uint32_t *snsid, uint16_t *apptag,
+ uint16_t *appmask, uint64_t *reftag)
{
switch (format) {
case NVME_COPY_FORMAT_0:
- nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag,
- appmask, reftag);
+ case NVME_COPY_FORMAT_2:
+ nvme_copy_source_range_parse_format0_2(ranges, idx, slba, nlb, snsid,
+ apptag, appmask, reftag);
break;
case NVME_COPY_FORMAT_1:
- nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag,
- appmask, reftag);
+ case NVME_COPY_FORMAT_3:
+ nvme_copy_source_range_parse_format1_3(ranges, idx, slba, nlb, snsid,
+ apptag, appmask, reftag);
break;
default:
@@ -2853,10 +2901,10 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns,
for (int idx = 0; idx < nr; idx++) {
uint32_t nlb;
nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
- &nlb, NULL, NULL, NULL);
+ &nlb, NULL, NULL, NULL, NULL);
copy_len += nlb;
}
-
+ iocb->tcl = copy_len;
if (copy_len > ns->id_ns.mcl) {
return NVME_CMD_SIZE_LIMIT | NVME_DNR;
}
@@ -2868,21 +2916,22 @@ static void nvme_copy_out_completed_cb(void *opaque, int ret)
{
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
- NvmeNamespace *ns = req->ns;
+ NvmeNamespace *dns = req->ns;
uint32_t nlb;
nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
- &nlb, NULL, NULL, NULL);
+ &nlb, NULL, NULL, NULL, NULL);
if (ret < 0) {
iocb->ret = ret;
+ req->status = NVME_WRITE_FAULT;
goto out;
} else if (iocb->ret < 0) {
goto out;
}
- if (ns->params.zoned) {
- nvme_advance_zone_wp(ns, iocb->zone, nlb);
+ if (dns->params.zoned) {
+ nvme_advance_zone_wp(dns, iocb->zone, nlb);
}
iocb->idx++;
@@ -2895,25 +2944,25 @@ static void nvme_copy_out_cb(void *opaque, int ret)
{
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
- NvmeNamespace *ns = req->ns;
+ NvmeNamespace *dns = req->ns;
uint32_t nlb;
size_t mlen;
uint8_t *mbounce;
- if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
+ if (ret < 0 || iocb->ret < 0 || !dns->lbaf.ms) {
goto out;
}
nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
- &nlb, NULL, NULL, NULL);
+ &nlb, NULL, NULL, NULL, NULL);
- mlen = nvme_m2b(ns, nlb);
- mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+ mlen = nvme_m2b(dns, nlb);
+ mbounce = iocb->bounce + nvme_l2b(dns, nlb);
qemu_iovec_reset(&iocb->iov);
qemu_iovec_add(&iocb->iov, mbounce, mlen);
- iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba),
+ iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_moff(dns, iocb->slba),
&iocb->iov, 0, nvme_copy_out_completed_cb,
iocb);
@@ -2927,59 +2976,71 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
{
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
- NvmeNamespace *ns = req->ns;
+ NvmeNamespace *sns = iocb->sns;
+ NvmeNamespace *dns = req->ns;
+ NvmeCopyCmd *copy = NULL;
+ uint8_t *mbounce = NULL;
uint32_t nlb;
uint64_t slba;
uint16_t apptag, appmask;
uint64_t reftag;
- size_t len;
+ size_t len, mlen;
uint16_t status;
if (ret < 0) {
iocb->ret = ret;
+ req->status = NVME_UNRECOVERED_READ;
goto out;
} else if (iocb->ret < 0) {
goto out;
}
nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
- &nlb, &apptag, &appmask, &reftag);
- len = nvme_l2b(ns, nlb);
+ &nlb, NULL, &apptag, &appmask, &reftag);
trace_pci_nvme_copy_out(iocb->slba, nlb);
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
- NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ len = nvme_l2b(sns, nlb);
+
+ if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps)) {
+ copy = (NvmeCopyCmd *)&req->cmd;
uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
- uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
- size_t mlen = nvme_m2b(ns, nlb);
- uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+ mlen = nvme_m2b(sns, nlb);
+ mbounce = iocb->bounce + nvme_l2b(sns, nlb);
- status = nvme_dif_mangle_mdata(ns, mbounce, mlen, slba);
+ status = nvme_dif_mangle_mdata(sns, mbounce, mlen, slba);
if (status) {
goto invalid;
}
- status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
+ status = nvme_dif_check(sns, iocb->bounce, len, mbounce, mlen, prinfor,
slba, apptag, appmask, &reftag);
if (status) {
goto invalid;
}
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
+ copy = (NvmeCopyCmd *)&req->cmd;
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+
+ mlen = nvme_m2b(dns, nlb);
+ mbounce = iocb->bounce + nvme_l2b(dns, nlb);
apptag = le16_to_cpu(copy->apptag);
appmask = le16_to_cpu(copy->appmask);
if (prinfow & NVME_PRINFO_PRACT) {
- status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag);
+ status = nvme_check_prinfo(dns, prinfow, iocb->slba, iocb->reftag);
if (status) {
goto invalid;
}
- nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
+ nvme_dif_pract_generate_dif(dns, iocb->bounce, len, mbounce, mlen,
apptag, &iocb->reftag);
} else {
- status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
+ status = nvme_dif_check(dns, iocb->bounce, len, mbounce, mlen,
prinfow, iocb->slba, apptag, appmask,
&iocb->reftag);
if (status) {
@@ -2988,13 +3049,13 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
}
}
- status = nvme_check_bounds(ns, iocb->slba, nlb);
+ status = nvme_check_bounds(dns, iocb->slba, nlb);
if (status) {
goto invalid;
}
- if (ns->params.zoned) {
- status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
+ if (dns->params.zoned) {
+ status = nvme_check_zone_write(dns, iocb->zone, iocb->slba, nlb);
if (status) {
goto invalid;
}
@@ -3007,7 +3068,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
qemu_iovec_reset(&iocb->iov);
qemu_iovec_add(&iocb->iov, iocb->bounce, len);
- iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba),
+ block_acct_start(blk_get_stats(dns->blkconf.blk), &iocb->acct.write, 0,
+ BLOCK_ACCT_WRITE);
+
+ iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_l2b(dns, iocb->slba),
&iocb->iov, 0, nvme_copy_out_cb, iocb);
return;
@@ -3022,23 +3086,22 @@ out:
static void nvme_copy_in_cb(void *opaque, int ret)
{
NvmeCopyAIOCB *iocb = opaque;
- NvmeRequest *req = iocb->req;
- NvmeNamespace *ns = req->ns;
+ NvmeNamespace *sns = iocb->sns;
uint64_t slba;
uint32_t nlb;
- if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
+ if (ret < 0 || iocb->ret < 0 || !sns->lbaf.ms) {
goto out;
}
nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
- &nlb, NULL, NULL, NULL);
+ &nlb, NULL, NULL, NULL, NULL);
qemu_iovec_reset(&iocb->iov);
- qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
- nvme_m2b(ns, nlb));
+ qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(sns, nlb),
+ nvme_m2b(sns, nlb));
- iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba),
+ iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_moff(sns, slba),
&iocb->iov, 0, nvme_copy_in_completed_cb,
iocb);
return;
@@ -3047,14 +3110,78 @@ out:
nvme_copy_in_completed_cb(iocb, ret);
}
+static inline bool nvme_csi_supports_copy(uint8_t csi)
+{
+ return csi == NVME_CSI_NVM || csi == NVME_CSI_ZONED;
+}
+
+static inline bool nvme_copy_ns_format_match(NvmeNamespace *sns,
+ NvmeNamespace *dns)
+{
+ return sns->lbaf.ds == dns->lbaf.ds && sns->lbaf.ms == dns->lbaf.ms;
+}
+
+static bool nvme_copy_matching_ns_format(NvmeNamespace *sns, NvmeNamespace *dns,
+ bool pi_enable)
+{
+ if (!nvme_csi_supports_copy(sns->csi) ||
+ !nvme_csi_supports_copy(dns->csi)) {
+ return false;
+ }
+
+ if (!pi_enable && !nvme_copy_ns_format_match(sns, dns)) {
+ return false;
+ }
+
+ if (pi_enable && (!nvme_copy_ns_format_match(sns, dns) ||
+ sns->id_ns.dps != dns->id_ns.dps)) {
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool nvme_copy_corresp_pi_match(NvmeNamespace *sns,
+ NvmeNamespace *dns)
+{
+ return sns->lbaf.ms == 0 &&
+ ((dns->lbaf.ms == 8 && dns->pif == 0) ||
+ (dns->lbaf.ms == 16 && dns->pif == 1));
+}
+
+static bool nvme_copy_corresp_pi_format(NvmeNamespace *sns, NvmeNamespace *dns,
+ bool sns_pi_en)
+{
+ if (!nvme_csi_supports_copy(sns->csi) ||
+ !nvme_csi_supports_copy(dns->csi)) {
+ return false;
+ }
+
+ if (!sns_pi_en && !nvme_copy_corresp_pi_match(sns, dns)) {
+ return false;
+ }
+
+ if (sns_pi_en && !nvme_copy_corresp_pi_match(dns, sns)) {
+ return false;
+ }
+
+ return true;
+}
+
static void nvme_do_copy(NvmeCopyAIOCB *iocb)
{
NvmeRequest *req = iocb->req;
- NvmeNamespace *ns = req->ns;
+ NvmeNamespace *sns;
+ NvmeNamespace *dns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
+ uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
uint64_t slba;
uint32_t nlb;
size_t len;
uint16_t status;
+ uint32_t dnsid = le32_to_cpu(req->cmd.nsid);
+ uint32_t snsid = dnsid;
if (iocb->ret < 0) {
goto done;
@@ -3064,40 +3191,124 @@ static void nvme_do_copy(NvmeCopyAIOCB *iocb)
goto done;
}
- nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
- &nlb, NULL, NULL, NULL);
- len = nvme_l2b(ns, nlb);
+ if (iocb->format == 2 || iocb->format == 3) {
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format,
+ &slba, &nlb, &snsid, NULL, NULL, NULL);
+ if (snsid != dnsid) {
+ if (snsid == NVME_NSID_BROADCAST ||
+ !nvme_nsid_valid(iocb->n, snsid)) {
+ status = NVME_INVALID_NSID | NVME_DNR;
+ goto invalid;
+ }
+ iocb->sns = nvme_ns(iocb->n, snsid);
+ if (unlikely(!iocb->sns)) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto invalid;
+ }
+ } else {
+ if (((slba + nlb) > iocb->slba) &&
+ ((slba + nlb) < (iocb->slba + iocb->tcl))) {
+ status = NVME_CMD_OVERLAP_IO_RANGE | NVME_DNR;
+ goto invalid;
+ }
+ }
+ } else {
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format,
+ &slba, &nlb, NULL, NULL, NULL, NULL);
+ }
+
+ sns = iocb->sns;
+ if ((snsid == dnsid) && NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
+ ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ goto invalid;
+ } else if (snsid != dnsid) {
+ if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
+ !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
+ if (!nvme_copy_matching_ns_format(sns, dns, false)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ }
+ }
+ if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
+ NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
+ if ((prinfor & NVME_PRINFO_PRACT) !=
+ (prinfow & NVME_PRINFO_PRACT)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ } else {
+ if (!nvme_copy_matching_ns_format(sns, dns, true)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ }
+ }
+ }
+
+ if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
+ NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
+ if (!(prinfow & NVME_PRINFO_PRACT)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ } else {
+ if (!nvme_copy_corresp_pi_format(sns, dns, false)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ }
+ }
+ }
+
+ if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
+ !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
+ if (!(prinfor & NVME_PRINFO_PRACT)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ } else {
+ if (!nvme_copy_corresp_pi_format(sns, dns, true)) {
+ status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
+ goto invalid;
+ }
+ }
+ }
+ }
+ len = nvme_l2b(sns, nlb);
trace_pci_nvme_copy_source_range(slba, nlb);
- if (nlb > le16_to_cpu(ns->id_ns.mssrl)) {
+ if (nlb > le16_to_cpu(sns->id_ns.mssrl)) {
status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
goto invalid;
}
- status = nvme_check_bounds(ns, slba, nlb);
+ status = nvme_check_bounds(sns, slba, nlb);
if (status) {
goto invalid;
}
- if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
- status = nvme_check_dulbe(ns, slba, nlb);
+ if (NVME_ERR_REC_DULBE(sns->features.err_rec)) {
+ status = nvme_check_dulbe(sns, slba, nlb);
if (status) {
goto invalid;
}
}
- if (ns->params.zoned) {
- status = nvme_check_zone_read(ns, slba, nlb);
+ if (sns->params.zoned) {
+ status = nvme_check_zone_read(sns, slba, nlb);
if (status) {
goto invalid;
}
}
+ g_free(iocb->bounce);
+ iocb->bounce = g_malloc_n(le16_to_cpu(sns->id_ns.mssrl),
+ sns->lbasz + sns->lbaf.ms);
+
qemu_iovec_reset(&iocb->iov);
qemu_iovec_add(&iocb->iov, iocb->bounce, len);
- iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba),
+ block_acct_start(blk_get_stats(sns->blkconf.blk), &iocb->acct.read, 0,
+ BLOCK_ACCT_READ);
+
+ iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_l2b(sns, slba),
&iocb->iov, 0, nvme_copy_in_cb, iocb);
return;
@@ -3116,9 +3327,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
nvme_misc_cb, req);
uint16_t nr = copy->nr + 1;
uint8_t format = copy->control[0] & 0xf;
- uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
- uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
- size_t len = sizeof(NvmeCopySourceRangeFormat0);
+ size_t len = sizeof(NvmeCopySourceRangeFormat0_2);
uint16_t status;
@@ -3127,13 +3336,9 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
iocb->ranges = NULL;
iocb->zone = NULL;
- if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
- ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
- status = NVME_INVALID_FIELD | NVME_DNR;
- goto invalid;
- }
-
- if (!(n->id_ctrl.ocfs & (1 << format))) {
+ if (!(n->id_ctrl.ocfs & (1 << format)) ||
+ ((format == 2 || format == 3) &&
+ !(n->features.hbs.cdfe & (1 << format)))) {
trace_pci_nvme_err_copy_invalid_format(format);
status = NVME_INVALID_FIELD | NVME_DNR;
goto invalid;
@@ -3144,14 +3349,14 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
- if ((ns->pif == 0x0 && format != 0x0) ||
- (ns->pif != 0x0 && format != 0x1)) {
+ if ((ns->pif == 0x0 && (format != 0x0 && format != 0x2)) ||
+ (ns->pif != 0x0 && (format != 0x1 && format != 0x3))) {
status = NVME_INVALID_FORMAT | NVME_DNR;
goto invalid;
}
if (ns->pif) {
- len = sizeof(NvmeCopySourceRangeFormat1);
+ len = sizeof(NvmeCopySourceRangeFormat1_3);
}
iocb->format = format;
@@ -3187,17 +3392,13 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
iocb->idx = 0;
iocb->reftag = le32_to_cpu(copy->reftag);
iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32;
- iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
- ns->lbasz + ns->lbaf.ms);
qemu_iovec_init(&iocb->iov, 1);
- block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0,
- BLOCK_ACCT_READ);
- block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0,
- BLOCK_ACCT_WRITE);
-
req->aiocb = &iocb->common;
+ iocb->sns = req->ns;
+ iocb->n = n;
+ iocb->bounce = NULL;
nvme_do_copy(iocb);
return NVME_NO_COMPLETE;
@@ -3232,7 +3433,11 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
len += nvme_m2b(ns, nlb);
}
- status = nvme_check_mdts(n, len);
+ if (NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt)) {
+ status = nvme_check_mdts(n, data_len);
+ } else {
+ status = nvme_check_mdts(n, len);
+ }
if (status) {
return status;
}
@@ -3307,6 +3512,7 @@ static void nvme_flush_ns_cb(void *opaque, int ret)
if (ret < 0) {
iocb->ret = ret;
+ iocb->req->status = NVME_WRITE_FAULT;
goto out;
} else if (iocb->ret < 0) {
goto out;
@@ -3409,7 +3615,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
- if (nvme_ns_ext(ns)) {
+ if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
@@ -3521,7 +3727,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
BlockBackend *blk = ns->blkconf.blk;
uint16_t status;
- if (nvme_ns_ext(ns)) {
+ if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
mapped_size += nvme_m2b(ns, nlb);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
@@ -4167,7 +4373,7 @@ static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
{
- NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+ NvmeCmd *cmd = &req->cmd;
NvmeNamespace *ns = req->ns;
/* cdw12 is zero-based number of dwords to return. Convert to bytes */
uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
@@ -4300,7 +4506,7 @@ static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req,
nruhsd = ns->fdp.nphs * endgrp->fdp.nrg;
trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr);
- buf = g_malloc(trans_len);
+ buf = g_malloc0(trans_len);
trans_len = MIN(trans_len, len);
@@ -4398,6 +4604,61 @@ static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
};
}
+static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+ switch (req->cmd.opcode) {
+ case NVME_CMD_WRITE:
+ return nvme_write(n, req);
+ case NVME_CMD_READ:
+ return nvme_read(n, req);
+ case NVME_CMD_COMPARE:
+ return nvme_compare(n, req);
+ case NVME_CMD_WRITE_ZEROES:
+ return nvme_write_zeroes(n, req);
+ case NVME_CMD_DSM:
+ return nvme_dsm(n, req);
+ case NVME_CMD_VERIFY:
+ return nvme_verify(n, req);
+ case NVME_CMD_COPY:
+ return nvme_copy(n, req);
+ case NVME_CMD_IO_MGMT_RECV:
+ return nvme_io_mgmt_recv(n, req);
+ case NVME_CMD_IO_MGMT_SEND:
+ return nvme_io_mgmt_send(n, req);
+ }
+
+ g_assert_not_reached();
+}
+
+static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
+{
+ if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ return __nvme_io_cmd_nvm(n, req);
+}
+
+static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req)
+{
+ if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_ZONE_APPEND:
+ return nvme_zone_append(n, req);
+ case NVME_CMD_ZONE_MGMT_SEND:
+ return nvme_zone_mgmt_send(n, req);
+ case NVME_CMD_ZONE_MGMT_RECV:
+ return nvme_zone_mgmt_recv(n, req);
+ }
+
+ return __nvme_io_cmd_nvm(n, req);
+}
+
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns;
@@ -4406,10 +4667,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
- if (!nvme_nsid_valid(n, nsid)) {
- return NVME_INVALID_NSID | NVME_DNR;
- }
-
/*
* In the base NVM command set, Flush may apply to all namespaces
* (indicated by NSID being set to FFFFFFFFh). But if that feature is used
@@ -4429,20 +4686,20 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
* device only supports namespace types that includes the NVM Flush command
* (NVM and Zoned), so always do an NVM Flush.
*/
+
if (req->cmd.opcode == NVME_CMD_FLUSH) {
return nvme_flush(n, req);
}
+ if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
- trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
- return NVME_INVALID_OPCODE | NVME_DNR;
- }
-
if (ns->status) {
return ns->status;
}
@@ -4453,36 +4710,14 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
req->ns = ns;
- switch (req->cmd.opcode) {
- case NVME_CMD_WRITE_ZEROES:
- return nvme_write_zeroes(n, req);
- case NVME_CMD_ZONE_APPEND:
- return nvme_zone_append(n, req);
- case NVME_CMD_WRITE:
- return nvme_write(n, req);
- case NVME_CMD_READ:
- return nvme_read(n, req);
- case NVME_CMD_COMPARE:
- return nvme_compare(n, req);
- case NVME_CMD_DSM:
- return nvme_dsm(n, req);
- case NVME_CMD_VERIFY:
- return nvme_verify(n, req);
- case NVME_CMD_COPY:
- return nvme_copy(n, req);
- case NVME_CMD_ZONE_MGMT_SEND:
- return nvme_zone_mgmt_send(n, req);
- case NVME_CMD_ZONE_MGMT_RECV:
- return nvme_zone_mgmt_recv(n, req);
- case NVME_CMD_IO_MGMT_RECV:
- return nvme_io_mgmt_recv(n, req);
- case NVME_CMD_IO_MGMT_SEND:
- return nvme_io_mgmt_send(n, req);
- default:
- assert(false);
+ switch (ns->csi) {
+ case NVME_CSI_NVM:
+ return nvme_io_cmd_nvm(n, req);
+ case NVME_CSI_ZONED:
+ return nvme_io_cmd_zoned(n, req);
}
- return NVME_INVALID_OPCODE | NVME_DNR;
+ g_assert_not_reached();
}
static void nvme_cq_notifier(EventNotifier *e)
@@ -4591,6 +4826,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
r = QTAILQ_FIRST(&sq->out_req_list);
assert(r->aiocb);
+ r->status = NVME_CMD_ABORT_SQ_DEL;
blk_aio_cancel(r->aiocb);
}
@@ -4709,6 +4945,45 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
}
+static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
+{
+ NvmeNamespace *ns = NULL;
+ NvmeSmartLogExtended smart_l = { 0 };
+ struct nvme_stats stats = { 0 };
+ uint32_t trans_len;
+
+ if (off >= sizeof(smart_l)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ /* accumulate all stats from all namespaces */
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+ if (ns) {
+ nvme_set_blk_stats(ns, &stats);
+ }
+ }
+
+ smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written);
+ smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read);
+ smart_l.log_page_version = 0x0005;
+
+ static const uint8_t guid[16] = {
+ 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4,
+ 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF
+ };
+ memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid));
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
+ trans_len = MIN(sizeof(smart_l) - off, buf_len);
+ return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req);
+}
+
static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
@@ -4854,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
- uint32_t nslist[1024];
+ uint32_t nslist[1024] = {};
uint32_t trans_len;
int i = 0;
uint32_t nsid;
@@ -4864,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
return NVME_INVALID_FIELD | NVME_DNR;
}
- memset(nslist, 0x0, sizeof(nslist));
trans_len = MIN(sizeof(nslist) - off, buf_len);
while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
@@ -4902,7 +5176,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
NvmeEffectsLog log = {};
- const uint32_t *src_iocs = NULL;
+ const uint32_t *iocs = NULL;
uint32_t trans_len;
if (off >= sizeof(log)) {
@@ -4912,25 +5186,26 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
case NVME_CC_CSS_NVM:
- src_iocs = nvme_cse_iocs_nvm;
- /* fall through */
- case NVME_CC_CSS_ADMIN_ONLY:
+ iocs = n->cse.iocs.nvm;
break;
- case NVME_CC_CSS_CSI:
+
+ case NVME_CC_CSS_ALL:
switch (csi) {
case NVME_CSI_NVM:
- src_iocs = nvme_cse_iocs_nvm;
+ iocs = n->cse.iocs.nvm;
break;
case NVME_CSI_ZONED:
- src_iocs = nvme_cse_iocs_zoned;
+ iocs = n->cse.iocs.zoned;
break;
}
+
+ break;
}
- memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
+ memcpy(log.acs, n->cse.acs, sizeof(log.acs));
- if (src_iocs) {
- memcpy(log.iocs, src_iocs, sizeof(log.iocs));
+ if (iocs) {
+ memcpy(log.iocs, iocs, sizeof(log.iocs));
}
trans_len = MIN(sizeof(log) - off, buf_len);
@@ -4938,6 +5213,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
}
+static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req, uint8_t lid)
+{
+ switch (lid) {
+ case NVME_OCP_EXTENDED_SMART_INFO:
+ if (n->params.ocp) {
+ return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
+ }
+ break;
+ /* add a case for each additional vendor specific log id */
+ }
+
+ trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+ return NVME_INVALID_FIELD | NVME_DNR;
+}
+
static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
{
size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
@@ -5188,6 +5480,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
return nvme_smart_info(n, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, len, off, req);
+ case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
+ return nvme_vendor_specific_log(n, rae, len, off, req, lid);
case NVME_LOG_CHANGED_NSLIST:
return nvme_changed_nslist(n, rae, len, off, req);
case NVME_LOG_CMD_EFFECTS:
@@ -5221,7 +5515,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
event_notifier_set_handler(&cq->notifier, NULL);
event_notifier_cleanup(&cq->notifier);
}
- if (msix_enabled(pci)) {
+ if (msix_enabled(pci) && cq->irq_enabled) {
msix_vector_unuse(pci, cq->vector);
}
if (cq->cqid) {
@@ -5262,9 +5556,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
{
PCIDevice *pci = PCI_DEVICE(n);
- if (msix_enabled(pci)) {
+ if (msix_enabled(pci) && irq_enabled) {
msix_vector_use(pci, vector);
}
+
cq->ctrl = n;
cq->cqid = cqid;
cq->size = size;
@@ -5374,7 +5669,9 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
switch (c->csi) {
case NVME_CSI_NVM:
id_nvm->vsl = n->params.vsl;
+ id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX;
id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
+ id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl;
break;
case NVME_CSI_ZONED:
@@ -5416,7 +5713,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
}
- return NVME_INVALID_CMD_SET | NVME_DNR;
+ return NVME_INVALID_IOCS | NVME_DNR;
}
static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
@@ -5497,6 +5794,33 @@ static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
}
+static uint16_t nvme_identify_ns_ind(NvmeCtrl *n, NvmeRequest *req, bool alloc)
+{
+ NvmeNamespace *ns;
+ NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ uint32_t nsid = le32_to_cpu(c->nsid);
+
+ trace_pci_nvme_identify_ns_ind(nsid);
+
+ if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ ns = nvme_ns(n, nsid);
+ if (unlikely(!ns)) {
+ if (alloc) {
+ ns = nvme_subsys_ns(n->subsys, nsid);
+ if (!ns) {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
+ } else {
+ return nvme_rpt_empty_id_struct(n, req);
+ }
+ }
+
+ return nvme_c2h(n, (uint8_t *)&ns->id_ns_ind, sizeof(NvmeIdNsInd), req);
+}
+
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
bool active)
{
@@ -5751,6 +6075,10 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
return nvme_identify_sec_ctrl_list(n, req);
case NVME_ID_CNS_CS_NS:
return nvme_identify_ns_csi(n, req, true);
+ case NVME_ID_CNS_CS_IND_NS:
+ return nvme_identify_ns_ind(n, req, false);
+ case NVME_ID_CNS_CS_IND_NS_ALLOCATED:
+ return nvme_identify_ns_ind(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
return nvme_identify_ns_csi(n, req, false);
case NVME_ID_CNS_CTRL:
@@ -5780,12 +6108,41 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
{
uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
+ uint16_t cid = (le32_to_cpu(req->cmd.cdw10) >> 16) & 0xffff;
+ NvmeSQueue *sq = n->sq[sqid];
+ NvmeRequest *r, *next;
+ int i;
req->cqe.result = 1;
if (nvme_check_sqid(n, sqid)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
+ if (sqid == 0) {
+ for (i = 0; i < n->outstanding_aers; i++) {
+ NvmeRequest *re = n->aer_reqs[i];
+ if (re->cqe.cid == cid) {
+ memmove(n->aer_reqs + i, n->aer_reqs + i + 1,
+ (n->outstanding_aers - i - 1) * sizeof(NvmeRequest *));
+ n->outstanding_aers--;
+ re->status = NVME_CMD_ABORT_REQ;
+ req->cqe.result = 0;
+ nvme_enqueue_req_completion(&n->admin_cq, re);
+ return NVME_SUCCESS;
+ }
+ }
+ }
+
+ QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) {
+ if (r->cqe.cid == cid) {
+ if (r->aiocb) {
+ r->status = NVME_CMD_ABORT_REQ;
+ blk_aio_cancel_async(r->aiocb);
+ }
+ break;
+ }
+ }
+
return NVME_SUCCESS;
}
@@ -6090,8 +6447,10 @@ defaults:
if (ret) {
return ret;
}
- goto out;
+ break;
+ case NVME_WRITE_ATOMICITY:
+ result = n->dn;
break;
default:
result = nvme_feature_default[fid];
@@ -6175,6 +6534,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
uint8_t save = NVME_SETFEAT_SAVE(dw10);
uint16_t status;
int i;
+ NvmeIdCtrl *id = &n->id_ctrl;
+ NvmeAtomic *atomic = &n->atomic;
trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@@ -6319,7 +6680,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
case NVME_COMMAND_SET_PROFILE:
if (dw11 & 0x1ff) {
trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
- return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
+ return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR;
}
break;
case NVME_FDP_MODE:
@@ -6327,6 +6688,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SEQ_ERROR | NVME_DNR;
case NVME_FDP_EVENTS:
return nvme_set_feature_fdp_events(n, ns, req);
+ case NVME_WRITE_ATOMICITY:
+
+ n->dn = 0x1 & dw11;
+
+ if (n->dn) {
+ atomic->atomic_max_write_size = le16_to_cpu(id->awupf) + 1;
+ } else {
+ atomic->atomic_max_write_size = le16_to_cpu(id->awun) + 1;
+ }
+
+ if (atomic->atomic_max_write_size == 1) {
+ atomic->atomic_writes = 0;
+ } else {
+ atomic->atomic_writes = 1;
+ }
+ break;
default:
return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
}
@@ -6352,40 +6729,49 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
-static void nvme_update_dmrsl(NvmeCtrl *n)
+static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns)
{
- int nsid;
+ if (ns) {
+ n->dmrsl =
+ MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
- for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
- NvmeNamespace *ns = nvme_ns(n, nsid);
+ return;
+ }
+
+ for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+ ns = nvme_ns(n, nsid);
if (!ns) {
continue;
}
- n->dmrsl = MIN_NON_ZERO(n->dmrsl,
- BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+ n->dmrsl =
+ MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
}
}
-static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
+static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi)
{
- uint32_t cc = ldl_le_p(&n->bar.cc);
+ uint32_t cc;
- ns->iocs = nvme_cse_iocs_none;
- switch (ns->csi) {
+ switch (csi) {
case NVME_CSI_NVM:
- if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
+ return true;
+
case NVME_CSI_ZONED:
- if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
- ns->iocs = nvme_cse_iocs_zoned;
- } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
- ns->iocs = nvme_cse_iocs_nvm;
- }
- break;
+ cc = ldl_le_p(&n->bar.cc);
+
+ return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL;
}
+
+ g_assert_not_reached();
+}
+
+static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ assert(ns->attached > 0);
+
+ n->namespaces[ns->params.nsid] = NULL;
+ ns->attached--;
}
static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
@@ -6430,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
switch (sel) {
case NVME_NS_ATTACHMENT_ATTACH:
- if (nvme_ns(ctrl, nsid)) {
+ if (nvme_ns(n, nsid)) {
return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
}
@@ -6438,20 +6824,18 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
return NVME_NS_PRIVATE | NVME_DNR;
}
+ if (!nvme_csi_supported(n, ns->csi)) {
+ return NVME_IOCS_NOT_SUPPORTED | NVME_DNR;
+ }
+
nvme_attach_ns(ctrl, ns);
- nvme_select_iocs_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, ns);
break;
case NVME_NS_ATTACHMENT_DETACH:
- if (!nvme_ns(ctrl, nsid)) {
- return NVME_NS_NOT_ATTACHED | NVME_DNR;
- }
-
- ctrl->namespaces[nsid] = NULL;
- ns->attached--;
-
- nvme_update_dmrsl(ctrl);
+ nvme_detach_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, NULL);
break;
@@ -6954,7 +7338,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
nvme_adm_opc_str(req->cmd.opcode));
- if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+ if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
@@ -7002,7 +7386,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
case NVME_ADM_CMD_DIRECTIVE_RECV:
return nvme_directive_receive(n, req);
default:
- assert(false);
+ g_assert_not_reached();
}
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -7024,6 +7408,81 @@ static void nvme_update_sq_tail(NvmeSQueue *sq)
trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
}
+#define NVME_ATOMIC_NO_START 0
+#define NVME_ATOMIC_START_ATOMIC 1
+#define NVME_ATOMIC_START_NONATOMIC 2
+
+static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,
+ NvmeAtomic *atomic)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb);
+ uint64_t elba = slba + nlb;
+ bool cmd_atomic_wr = true;
+ int i;
+
+ if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) &&
+ ((rw->nlb + 1) > atomic->atomic_max_write_size))) {
+ cmd_atomic_wr = false;
+ }
+
+ /*
+ * Walk the queues to see if there are any atomic conflicts.
+ */
+ for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
+ NvmeSQueue *sq;
+ NvmeRequest *req;
+ NvmeRwCmd *req_rw;
+ uint64_t req_slba;
+ uint32_t req_nlb;
+ uint64_t req_elba;
+
+ sq = n->sq[i];
+ if (!sq) {
+ continue;
+ }
+
+ /*
+ * Walk all the requests on a given queue.
+ */
+ QTAILQ_FOREACH(req, &sq->out_req_list, entry) {
+ req_rw = (NvmeRwCmd *)&req->cmd;
+
+ if (((req_rw->opcode == NVME_CMD_WRITE) ||
+ (req_rw->opcode == NVME_CMD_READ)) &&
+ (cmd->nsid == req->ns->params.nsid)) {
+ req_slba = le64_to_cpu(req_rw->slba);
+ req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb);
+ req_elba = req_slba + req_nlb;
+
+ if (cmd_atomic_wr) {
+ if ((elba >= req_slba) && (slba <= req_elba)) {
+ return NVME_ATOMIC_NO_START;
+ }
+ } else {
+ if (req->atomic_write && ((elba >= req_slba) &&
+ (slba <= req_elba))) {
+ return NVME_ATOMIC_NO_START;
+ }
+ }
+ }
+ }
+ }
+ if (cmd_atomic_wr) {
+ return NVME_ATOMIC_START_ATOMIC;
+ }
+ return NVME_ATOMIC_START_NONATOMIC;
+}
+
+static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd)
+{
+ if (n->atomic.atomic_writes) {
+ return &n->atomic;
+ }
+ return NULL;
+}
+
static void nvme_process_sq(void *opaque)
{
NvmeSQueue *sq = opaque;
@@ -7040,6 +7499,9 @@ static void nvme_process_sq(void *opaque)
}
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
+ NvmeAtomic *atomic;
+ bool cmd_is_atomic;
+
addr = sq->dma_addr + (sq->head << NVME_SQES);
if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
trace_pci_nvme_err_addr_read(addr);
@@ -7047,6 +7509,26 @@ static void nvme_process_sq(void *opaque)
stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
break;
}
+
+ atomic = nvme_get_atomic(n, &cmd);
+
+ cmd_is_atomic = false;
+ if (sq->sqid && atomic) {
+ int ret;
+
+ ret = nvme_atomic_write_check(n, &cmd, atomic);
+ switch (ret) {
+ case NVME_ATOMIC_NO_START:
+ qemu_bh_schedule(sq->bh);
+ return;
+ case NVME_ATOMIC_START_ATOMIC:
+ cmd_is_atomic = true;
+ break;
+ case NVME_ATOMIC_START_NONATOMIC:
+ default:
+ break;
+ }
+ }
nvme_inc_sq_head(sq);
req = QTAILQ_FIRST(&sq->req_list);
@@ -7056,6 +7538,10 @@ static void nvme_process_sq(void *opaque)
req->cqe.cid = cmd.cid;
memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
+ if (sq->sqid && atomic) {
+ req->atomic_write = cmd_is_atomic;
+ }
+
status = sq->sqid ? nvme_io_cmd(n, req) :
nvme_admin_cmd(n, req);
if (status != NVME_NO_COMPLETE) {
@@ -7159,6 +7645,8 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
n->outstanding_aers = 0;
n->qs_created = false;
+ n->dn = n->params.atomic_dn; /* Set Disable Normal */
+
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
if (pci_is_vf(pci_dev)) {
@@ -7197,21 +7685,6 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
}
}
-static void nvme_select_iocs(NvmeCtrl *n)
-{
- NvmeNamespace *ns;
- int i;
-
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
- }
-
- nvme_select_iocs_ns(n, ns);
- }
-}
-
static int nvme_start_ctrl(NvmeCtrl *n)
{
uint64_t cap = ldq_le_p(&n->bar.cap);
@@ -7278,7 +7751,22 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
- nvme_select_iocs(n);
+ /* verify that the command sets of attached namespaces are supported */
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+
+ if (!ns || (!ns->params.shared && ns->ctrl != n)) {
+ continue;
+ }
+
+ if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+ if (!ns->attached || ns->params.shared) {
+ nvme_attach_ns(n, ns);
+ }
+ }
+ }
+
+ nvme_update_dsm_limits(n, NULL);
return 0;
}
@@ -7603,7 +8091,6 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
/* Completion queue doorbell write */
uint16_t new_head = val & 0xffff;
- int start_sqs;
NvmeCQueue *cq;
qid = (addr - (0x1000 + (1 << 2))) >> 3;
@@ -7654,18 +8141,15 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
- start_sqs = nvme_cq_full(cq) ? 1 : 0;
+ /* scheduled deferred cqe posting if queue was previously full */
+ if (nvme_cq_full(cq)) {
+ qemu_bh_schedule(cq->bh);
+ }
+
cq->head = new_head;
if (!qid && n->dbbuf_enabled) {
stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
}
- if (start_sqs) {
- NvmeSQueue *sq;
- QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
- qemu_bh_schedule(sq->bh);
- }
- qemu_bh_schedule(cq->bh);
- }
if (cq->tail == cq->head) {
if (cq->irq_enabled) {
@@ -7935,6 +8419,8 @@ static void nvme_init_state(NvmeCtrl *n)
NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
PCIDevice *pci = PCI_DEVICE(n);
+ NvmeAtomic *atomic = &n->atomic;
+ NvmeIdCtrl *id = &n->id_ctrl;
uint8_t max_vfs;
int i;
@@ -7992,6 +8478,29 @@ static void nvme_init_state(NvmeCtrl *n)
cpu_to_le16(n->params.sriov_max_vi_per_vf) :
cap->vifrt / MAX(max_vfs, 1);
}
+
+ /* Atomic Write */
+ id->awun = cpu_to_le16(n->params.atomic_awun);
+ id->awupf = cpu_to_le16(n->params.atomic_awupf);
+ n->dn = n->params.atomic_dn;
+
+ if (id->awun || id->awupf) {
+ if (id->awupf > id->awun) {
+ id->awupf = 0;
+ }
+
+ if (n->dn) {
+ atomic->atomic_max_write_size = id->awupf + 1;
+ } else {
+ atomic->atomic_max_write_size = id->awun + 1;
+ }
+
+ if (atomic->atomic_max_write_size == 1) {
+ atomic->atomic_writes = 0;
+ } else {
+ atomic->atomic_writes = 1;
+ }
+ }
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -8080,8 +8589,7 @@ static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
- NVME_VF_OFFSET, NVME_VF_STRIDE,
- errp)) {
+ NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) {
return false;
}
@@ -8096,8 +8604,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
Error *err = NULL;
int ret;
- ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &err);
+ ret = pci_pm_init(pci_dev, offset, &err);
if (err) {
error_report_err(err);
return ret;
@@ -8113,6 +8620,27 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
return 0;
}
+static bool pcie_doe_spdm_rsp(DOECap *doe_cap)
+{
+ void *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ uint32_t req_len = pcie_doe_get_obj_len(req) * 4;
+ void *rsp = doe_cap->read_mbox;
+ uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE;
+
+ uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket,
+ SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE,
+ req, req_len, rsp, rsp_len);
+ doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4);
+
+ return recvd != 0;
+}
+
+static DOEProtocol doe_spdm_prot[] = {
+ { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp },
+ { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp },
+ { }
+};
+
static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
{
ERRP_GUARD();
@@ -8122,7 +8650,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
unsigned nr_vectors;
int ret;
- pci_conf[PCI_INTERRUPT_PIN] = 1;
+ pci_conf[PCI_INTERRUPT_PIN] = pci_is_vf(pci_dev) ? 0 : 1;
pci_config_set_prog_interface(pci_conf, 0x2);
if (n->params.use_intel_id) {
@@ -8194,12 +8722,30 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
!nvme_init_sriov(n, pci_dev, 0x120, errp)) {
- msix_uninit(pci_dev, &n->bar0, &n->bar0);
return false;
}
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
+ pcie_cap_deverr_init(pci_dev);
+
+ /* DOE Initialisation */
+ if (pci_dev->spdm_port) {
+ uint16_t doe_offset = n->params.sriov_max_vfs ?
+ PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF
+ : PCI_CONFIG_SPACE_SIZE;
+
+ pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset,
+ doe_spdm_prot, true, 0);
+
+ pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port,
+ errp);
+
+ if (pci_dev->doe_spdm.spdm_socket < 0) {
+ return false;
+ }
+ }
+
if (n->params.cmb_size_mb) {
nvme_init_cmb(n, pci_dev);
}
@@ -8231,6 +8777,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
uint64_t cap = ldq_le_p(&n->bar.cap);
NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
uint32_t ctratt;
+ uint16_t oacs;
+
+ memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs));
+ memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm));
+ memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default,
+ sizeof(n->cse.iocs.zoned));
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -8241,7 +8793,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->cntlid = cpu_to_le16(n->cntlid);
id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+
ctratt = NVME_CTRATT_ELBAS;
+ if (n->params.ctratt.mem) {
+ ctratt |= NVME_CTRATT_MEM;
+ }
id->rab = 6;
@@ -8257,9 +8813,23 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->mdts = n->params.mdts;
id->ver = cpu_to_le32(NVME_SPEC_VER);
- id->oacs =
- cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF |
- NVME_OACS_DIRECTIVES);
+
+ oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES;
+
+ if (n->params.dbcs) {
+ oacs |= NVME_OACS_DBCS;
+
+ n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP;
+ }
+
+ if (n->params.sriov_max_vfs) {
+ oacs |= NVME_OACS_VMS;
+
+ n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP;
+ }
+
+ id->oacs = cpu_to_le16(oacs);
+
id->cntrltype = 0x1;
/*
@@ -8287,7 +8857,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES | NVME_ONCS_DSM |
- NVME_ONCS_COMPARE | NVME_ONCS_COPY);
+ NVME_ONCS_COMPARE | NVME_ONCS_COPY |
+ NVME_ONCS_NVMCSA | NVME_ONCS_NVMAFC);
/*
* NOTE: If this device ever supports a command set that does NOT use 0x0
@@ -8298,8 +8869,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
*/
id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
- id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1);
- id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
+ id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1 |
+ NVME_OCFS_COPY_FORMAT_2 | NVME_OCFS_COPY_FORMAT_3);
+ id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
+ NVME_CTRL_SGLS_MPTR_SGL);
nvme_init_subnqn(n);
@@ -8307,15 +8880,13 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
- if (n->subsys) {
- id->cmic |= NVME_CMIC_MULTI_CTRL;
- ctratt |= NVME_CTRATT_ENDGRPS;
+ id->cmic |= NVME_CMIC_MULTI_CTRL;
+ ctratt |= NVME_CTRATT_ENDGRPS;
- id->endgidmax = cpu_to_le16(0x1);
+ id->endgidmax = cpu_to_le16(0x1);
- if (n->subsys->endgrp.fdp.enabled) {
- ctratt |= NVME_CTRATT_FDPS;
- }
+ if (n->subsys->endgrp.fdp.enabled) {
+ ctratt |= NVME_CTRATT_FDPS;
}
id->ctratt = cpu_to_le32(ctratt);
@@ -8323,9 +8894,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NVME_CAP_SET_MQES(cap, n->params.mqes);
NVME_CAP_SET_CQR(cap, 1);
NVME_CAP_SET_TO(cap, 0xf);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
- NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
+ NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS);
+ NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS);
NVME_CAP_SET_MPSMAX(cap, 4);
NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
@@ -8344,7 +8914,15 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
int cntlid;
if (!n->subsys) {
- return 0;
+ DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS);
+
+ qdev_prop_set_string(dev, "nqn", n->params.serial);
+
+ if (!qdev_realize(dev, NULL, errp)) {
+ return -1;
+ }
+
+ n->subsys = NVME_SUBSYS(dev);
}
cntlid = nvme_subsys_register_ctrl(n, errp);
@@ -8364,9 +8942,6 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
n->namespaces[nsid] = ns;
ns->attached++;
-
- n->dmrsl = MIN_NON_ZERO(n->dmrsl,
- BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
}
static void nvme_realize(PCIDevice *pci_dev, Error **errp)
@@ -8389,6 +8964,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
*/
n->params.serial = g_strdup(pn->params.serial);
n->subsys = pn->subsys;
+
+ /*
+ * Assigning this link (strong link) causes an `object_unref` later in
+ * `object_release_link_property`. Increment the refcount to balance
+ * this out.
+ */
+ object_ref(OBJECT(pn->subsys));
}
if (!nvme_check_params(n, errp)) {
@@ -8410,12 +8992,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
if (n->namespace.blkconf.blk) {
ns = &n->namespace;
ns->params.nsid = 1;
+ ns->ctrl = n;
if (nvme_ns_setup(ns, errp)) {
return;
}
- nvme_attach_ns(n, ns);
+ n->subsys->namespaces[ns->params.nsid] = ns;
}
}
@@ -8427,17 +9010,15 @@ static void nvme_exit(PCIDevice *pci_dev)
nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
- if (n->subsys) {
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (ns) {
- ns->attached--;
- }
+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+ if (ns) {
+ ns->attached--;
}
-
- nvme_subsys_unregister_ctrl(n->subsys, n);
}
+ nvme_subsys_unregister_ctrl(n->subsys, n);
+
g_free(n->cq);
g_free(n->sq);
g_free(n->aer_reqs);
@@ -8446,6 +9027,11 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->cmb.buf);
}
+ if (pci_dev->doe_spdm.spdm_socket > 0) {
+ spdm_socket_close(pci_dev->doe_spdm.spdm_socket,
+ SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE);
+ }
+
if (n->pmr.dev) {
host_memory_backend_set_mapped(n->pmr.dev, false);
}
@@ -8454,11 +9040,16 @@ static void nvme_exit(PCIDevice *pci_dev)
pcie_sriov_pf_exit(pci_dev);
}
- msix_uninit(pci_dev, &n->bar0, &n->bar0);
+ if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
+ msix_uninit_exclusive_bar(pci_dev);
+ } else {
+ msix_uninit(pci_dev, &n->bar0, &n->bar0);
+ }
+
memory_region_del_subregion(&n->bar0, &n->iomem);
}
-static Property nvme_props[] = {
+static const Property nvme_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
@@ -8476,6 +9067,7 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+ DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
@@ -8491,7 +9083,12 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
false),
DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
- DEFINE_PROP_END_OF_LIST(),
+ DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
+ DEFINE_PROP_BOOL("ctratt.mem", NvmeCtrl, params.ctratt.mem, false),
+ DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
+ DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
+ DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
+ DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false),
};
static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
@@ -8562,23 +9159,38 @@ static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
{
uint16_t old_num_vfs = pcie_sriov_num_vfs(dev);
+ if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
+ pcie_doe_write_config(&dev->doe_spdm, address, val, len);
+ }
pci_default_write_config(dev, address, val, len);
pcie_cap_flr_write_config(dev, address, val, len);
nvme_sriov_post_write_config(dev, old_num_vfs);
}
+static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len)
+{
+ uint32_t val;
+ if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
+ if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) {
+ return val;
+ }
+ }
+ return pci_default_read_config(dev, address, len);
+}
+
static const VMStateDescription nvme_vmstate = {
.name = "nvme",
.unmigratable = 1,
};
-static void nvme_class_init(ObjectClass *oc, void *data)
+static void nvme_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
pc->realize = nvme_realize;
pc->config_write = nvme_pci_write_config;
+ pc->config_read = nvme_pci_read_config;
pc->exit = nvme_exit;
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->revision = 2;
@@ -8587,7 +9199,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
dc->desc = "Non-Volatile Memory Express";
device_class_set_props(dc, nvme_props);
dc->vmsd = &nvme_vmstate;
- dc->reset = nvme_pci_reset;
+ device_class_set_legacy_reset(dc, nvme_pci_reset);
}
static void nvme_instance_init(Object *obj)
@@ -8609,7 +9221,7 @@ static const TypeInfo nvme_info = {
.instance_size = sizeof(NvmeCtrl),
.instance_init = nvme_instance_init,
.class_init = nvme_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ INTERFACE_PCIE_DEVICE },
{ }
},