aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-03-07 09:58:25 +0000
committerPeter Maydell <peter.maydell@linaro.org>2023-03-07 09:58:25 +0000
commitc1feaf76833f5b29f887fd64371512731cbf7086 (patch)
treecc534ce3570cf5f4b0212177354fc221fc9f2e57 /hw
parent67128074c9bd39f607548b27d2d51c3f0ca9d35e (diff)
parent73064edfb864743cde2c08f319609344af02aeb3 (diff)
downloadqemu-c1feaf76833f5b29f887fd64371512731cbf7086.zip
qemu-c1feaf76833f5b29f887fd64371512731cbf7086.tar.gz
qemu-c1feaf76833f5b29f887fd64371512731cbf7086.tar.bz2
Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into staging
hw/nvme updates * basic support for directives * simple support for endurance groups * emulation of flexible data placement (tp4146) # -----BEGIN PGP SIGNATURE----- # # iQEzBAABCgAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmQF+doACgkQTeGvMW1P # DenIEgf+MLsRQ3kKUmsgVNnPuR69M0COfyaz0AnfX6YEIL9ukFJQPsmASfPmHof5 # tCYIFyKEpZt/givmzSI1jdpm0uX2MRwLGLYRdNhEPVjo+TfGda15x7DgpBEduqjq # mChUS2wrmgP9TZne+kTAU28pUpU7hcfrt1RkDOO86W8oJmpBeIyGe6vikVhQppKW # fAIKvhNfN3p5Kxq1fhE6I5YzKd2vvKtBvPpZp2uFe6LHXEcVV/FPcTx3Ph+um/o6 # ScmmxowT4Wqk4EgXh1ohephlxB89aWgwLNHLHcfte6UCU9x4eSmTC2T3pf7piBaE # pGLpzPoYk6BAurwrMuxCxYgStl6SzQ== # =CNSk # -----END PGP SIGNATURE----- # gpg: Signature made Mon 06 Mar 2023 14:34:02 GMT # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [full] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [full] # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu: hw/nvme: flexible data placement emulation hw/nvme: basic directives support hw/nvme: add basic endurance group support hw/nvme: store a pointer to the NvmeSubsystem in the NvmeNamespace hw/nvme: move adjustment of data_units{read,written} Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/nvme/ctrl.c802
-rw-r--r--hw/nvme/ns.c147
-rw-r--r--hw/nvme/nvme.h92
-rw-r--r--hw/nvme/subsys.c94
-rw-r--r--hw/nvme/trace-events1
5 files changed, 1122 insertions, 14 deletions
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f25cc2c..49c1210 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -238,6 +238,8 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
[NVME_TIMESTAMP] = true,
[NVME_HOST_BEHAVIOR_SUPPORT] = true,
[NVME_COMMAND_SET_PROFILE] = true,
+ [NVME_FDP_MODE] = true,
+ [NVME_FDP_EVENTS] = true,
};
static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
@@ -249,6 +251,8 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
[NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
[NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE,
+ [NVME_FDP_MODE] = NVME_FEAT_CAP_CHANGE,
+ [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
};
static const uint32_t nvme_cse_acs[256] = {
@@ -266,6 +270,8 @@ static const uint32_t nvme_cse_acs[256] = {
[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
[NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP,
+ [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP,
};
static const uint32_t nvme_cse_iocs_none[256];
@@ -279,6 +285,8 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
+ [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
};
static const uint32_t nvme_cse_iocs_zoned[256] = {
@@ -297,12 +305,66 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
static void nvme_process_sq(void *opaque);
static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
+static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n);
static uint16_t nvme_sqid(NvmeRequest *req)
{
return le16_to_cpu(req->sq->sqid);
}
+static inline uint16_t nvme_make_pid(NvmeNamespace *ns, uint16_t rg,
+ uint16_t ph)
+{
+ uint16_t rgif = ns->endgrp->fdp.rgif;
+
+ if (!rgif) {
+ return ph;
+ }
+
+ return (rg << (16 - rgif)) | ph;
+}
+
+static inline bool nvme_ph_valid(NvmeNamespace *ns, uint16_t ph)
+{
+ return ph < ns->fdp.nphs;
+}
+
+static inline bool nvme_rg_valid(NvmeEnduranceGroup *endgrp, uint16_t rg)
+{
+ return rg < endgrp->fdp.nrg;
+}
+
+static inline uint16_t nvme_pid2ph(NvmeNamespace *ns, uint16_t pid)
+{
+ uint16_t rgif = ns->endgrp->fdp.rgif;
+
+ if (!rgif) {
+ return pid;
+ }
+
+ return pid & ((1 << (15 - rgif)) - 1);
+}
+
+static inline uint16_t nvme_pid2rg(NvmeNamespace *ns, uint16_t pid)
+{
+ uint16_t rgif = ns->endgrp->fdp.rgif;
+
+ if (!rgif) {
+ return 0;
+ }
+
+ return pid >> (16 - rgif);
+}
+
+static inline bool nvme_parse_pid(NvmeNamespace *ns, uint16_t pid,
+ uint16_t *ph, uint16_t *rg)
+{
+ *rg = nvme_pid2rg(ns, pid);
+ *ph = nvme_pid2ph(ns, pid);
+
+ return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg);
+}
+
static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state)
{
@@ -376,6 +438,69 @@ static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
return nvme_zns_check_resources(ns, act, opn, 0);
}
+static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer *ebuf)
+{
+ NvmeFdpEvent *ret = NULL;
+ bool is_full = ebuf->next == ebuf->start && ebuf->nelems;
+
+ ret = &ebuf->events[ebuf->next++];
+ if (unlikely(ebuf->next == NVME_FDP_MAX_EVENTS)) {
+ ebuf->next = 0;
+ }
+ if (is_full) {
+ ebuf->start = ebuf->next;
+ } else {
+ ebuf->nelems++;
+ }
+
+ memset(ret, 0, sizeof(NvmeFdpEvent));
+ ret->timestamp = nvme_get_timestamp(n);
+
+ return ret;
+}
+
+static inline int log_event(NvmeRuHandle *ruh, uint8_t event_type)
+{
+ return (ruh->event_filter >> nvme_fdp_evf_shifts[event_type]) & 0x1;
+}
+
+static bool nvme_update_ruh(NvmeCtrl *n, NvmeNamespace *ns, uint16_t pid)
+{
+ NvmeEnduranceGroup *endgrp = ns->endgrp;
+ NvmeRuHandle *ruh;
+ NvmeReclaimUnit *ru;
+ NvmeFdpEvent *e = NULL;
+ uint16_t ph, rg, ruhid;
+
+ if (!nvme_parse_pid(ns, pid, &ph, &rg)) {
+ return false;
+ }
+
+ ruhid = ns->fdp.phs[ph];
+
+ ruh = &endgrp->fdp.ruhs[ruhid];
+ ru = &ruh->rus[rg];
+
+ if (ru->ruamw) {
+ if (log_event(ruh, FDP_EVT_RU_NOT_FULLY_WRITTEN)) {
+ e = nvme_fdp_alloc_event(n, &endgrp->fdp.host_events);
+ e->type = FDP_EVT_RU_NOT_FULLY_WRITTEN;
+ e->flags = FDPEF_PIV | FDPEF_NSIDV | FDPEF_LV;
+ e->pid = cpu_to_le16(pid);
+ e->nsid = cpu_to_le32(ns->params.nsid);
+ e->rgid = cpu_to_le16(rg);
+ e->ruhid = cpu_to_le16(ruhid);
+ }
+
+ /* log (eventual) GC overhead of prematurely swapping the RU */
+ nvme_fdp_stat_inc(&endgrp->fdp.mbmw, nvme_l2b(ns, ru->ruamw));
+ }
+
+ ru->ruamw = ruh->ruamw;
+
+ return true;
+}
+
static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
{
hwaddr hi, lo;
@@ -3320,6 +3445,41 @@ invalid:
return status | NVME_DNR;
}
+static void nvme_do_write_fdp(NvmeCtrl *n, NvmeRequest *req, uint64_t slba,
+ uint32_t nlb)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ uint64_t data_size = nvme_l2b(ns, nlb);
+ uint32_t dw12 = le32_to_cpu(req->cmd.cdw12);
+ uint8_t dtype = (dw12 >> 20) & 0xf;
+ uint16_t pid = le16_to_cpu(rw->dspec);
+ uint16_t ph, rg, ruhid;
+ NvmeReclaimUnit *ru;
+
+ if (dtype != NVME_DIRECTIVE_DATA_PLACEMENT ||
+ !nvme_parse_pid(ns, pid, &ph, &rg)) {
+ ph = 0;
+ rg = 0;
+ }
+
+ ruhid = ns->fdp.phs[ph];
+ ru = &ns->endgrp->fdp.ruhs[ruhid].rus[rg];
+
+ nvme_fdp_stat_inc(&ns->endgrp->fdp.hbmw, data_size);
+ nvme_fdp_stat_inc(&ns->endgrp->fdp.mbmw, data_size);
+
+ while (nlb) {
+ if (nlb < ru->ruamw) {
+ ru->ruamw -= nlb;
+ break;
+ }
+
+ nlb -= ru->ruamw;
+ nvme_update_ruh(n, ns, pid);
+ }
+}
+
static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
bool wrz)
{
@@ -3429,6 +3589,8 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
zone->w_ptr += nlb;
}
+ } else if (ns->endgrp && ns->endgrp->fdp.enabled) {
+ nvme_do_write_fdp(n, req, slba, nlb);
}
data_offset = nvme_l2b(ns, slba);
@@ -4086,6 +4248,126 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req,
+ size_t len)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeEnduranceGroup *endgrp;
+ NvmeRuhStatus *hdr;
+ NvmeRuhStatusDescr *ruhsd;
+ unsigned int nruhsd;
+ uint16_t rg, ph, *ruhid;
+ size_t trans_len;
+ g_autofree uint8_t *buf = NULL;
+
+ if (!n->subsys) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (ns->params.nsid == 0 || ns->params.nsid == 0xffffffff) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ if (!n->subsys->endgrp.fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ endgrp = ns->endgrp;
+
+ nruhsd = ns->fdp.nphs * endgrp->fdp.nrg;
+ trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr);
+ buf = g_malloc(trans_len);
+
+ trans_len = MIN(trans_len, len);
+
+ hdr = (NvmeRuhStatus *)buf;
+ ruhsd = (NvmeRuhStatusDescr *)(buf + sizeof(NvmeRuhStatus));
+
+ hdr->nruhsd = cpu_to_le16(nruhsd);
+
+ ruhid = ns->fdp.phs;
+
+ for (ph = 0; ph < ns->fdp.nphs; ph++, ruhid++) {
+ NvmeRuHandle *ruh = &endgrp->fdp.ruhs[*ruhid];
+
+ for (rg = 0; rg < endgrp->fdp.nrg; rg++, ruhsd++) {
+ uint16_t pid = nvme_make_pid(ns, rg, ph);
+
+ ruhsd->pid = cpu_to_le16(pid);
+ ruhsd->ruhid = *ruhid;
+ ruhsd->earutr = 0;
+ ruhsd->ruamw = cpu_to_le64(ruh->rus[rg].ruamw);
+ }
+ }
+
+ return nvme_c2h(n, buf, trans_len, req);
+}
+
+static uint16_t nvme_io_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeCmd *cmd = &req->cmd;
+ uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t numd = le32_to_cpu(cmd->cdw11);
+ uint8_t mo = (cdw10 & 0xff);
+ size_t len = (numd + 1) << 2;
+
+ switch (mo) {
+ case NVME_IOMR_MO_NOP:
+ return 0;
+ case NVME_IOMR_MO_RUH_STATUS:
+ return nvme_io_mgmt_recv_ruhs(n, req, len);
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ };
+}
+
+static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeCmd *cmd = &req->cmd;
+ NvmeNamespace *ns = req->ns;
+ uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
+ uint16_t ret = NVME_SUCCESS;
+ uint32_t npid = (cdw10 >> 1) + 1;
+ unsigned int i = 0;
+ g_autofree uint16_t *pids = NULL;
+ uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
+
+ if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ pids = g_new(uint16_t, npid);
+
+ ret = nvme_h2c(n, pids, npid * sizeof(uint16_t), req);
+ if (ret) {
+ return ret;
+ }
+
+ for (; i < npid; i++) {
+ if (!nvme_update_ruh(n, ns, pids[i])) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+ }
+
+ return ret;
+}
+
+static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeCmd *cmd = &req->cmd;
+ uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
+ uint8_t mo = (cdw10 & 0xff);
+
+ switch (mo) {
+ case NVME_IOMS_MO_NOP:
+ return 0;
+ case NVME_IOMS_MO_RUH_UPDATE:
+ return nvme_io_mgmt_send_ruh_update(n, req);
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ };
+}
+
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns;
@@ -4162,6 +4444,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_zone_mgmt_send(n, req);
case NVME_CMD_ZONE_MGMT_RECV:
return nvme_zone_mgmt_recv(n, req);
+ case NVME_CMD_IO_MGMT_RECV:
+ return nvme_io_mgmt_recv(n, req);
+ case NVME_CMD_IO_MGMT_SEND:
+ return nvme_io_mgmt_send(n, req);
default:
assert(false);
}
@@ -4386,8 +4672,8 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
{
BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
- stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
- stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
+ stats->units_read += s->nr_bytes[BLOCK_ACCT_READ];
+ stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE];
stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
}
@@ -4401,6 +4687,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint32_t trans_len;
NvmeNamespace *ns;
time_t current_ms;
+ uint64_t u_read, u_written;
if (off >= sizeof(smart)) {
return NVME_INVALID_FIELD | NVME_DNR;
@@ -4427,10 +4714,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
trans_len = MIN(sizeof(smart) - off, buf_len);
smart.critical_warning = n->smart_critical_warning;
- smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
- 1000));
- smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
- 1000));
+ u_read = DIV_ROUND_UP(stats.units_read >> BDRV_SECTOR_BITS, 1000);
+ u_written = DIV_ROUND_UP(stats.units_written >> BDRV_SECTOR_BITS, 1000);
+
+ smart.data_units_read[0] = cpu_to_le64(u_read);
+ smart.data_units_written[0] = cpu_to_le64(u_written);
smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
@@ -4452,6 +4740,48 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
}
+static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
+ uint16_t endgrpid = (dw11 >> 16) & 0xffff;
+ struct nvme_stats stats = {};
+ NvmeEndGrpLog info = {};
+ int i;
+
+ if (!n->subsys || endgrpid != 0x1) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (off >= sizeof(info)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ if (!ns) {
+ continue;
+ }
+
+ nvme_set_blk_stats(ns, &stats);
+ }
+
+ info.data_units_read[0] =
+ cpu_to_le64(DIV_ROUND_UP(stats.units_read / 1000000000, 1000000000));
+ info.data_units_written[0] =
+ cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000));
+ info.media_units_written[0] =
+ cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000));
+
+ info.host_read_commands[0] = cpu_to_le64(stats.read_commands);
+ info.host_write_commands[0] = cpu_to_le64(stats.write_commands);
+
+ buf_len = MIN(sizeof(info) - off, buf_len);
+
+ return nvme_c2h(n, (uint8_t *)&info + off, buf_len, req);
+}
+
+
static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
NvmeRequest *req)
{
@@ -4577,6 +4907,207 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
}
+static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
+{
+ size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
+ + vss;
+ return ROUND_UP(entry_siz, 8);
+}
+
+static uint16_t nvme_fdp_confs(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ uint32_t log_size, trans_len;
+ g_autofree uint8_t *buf = NULL;
+ NvmeFdpDescrHdr *hdr;
+ NvmeRuhDescr *ruhd;
+ NvmeEnduranceGroup *endgrp;
+ NvmeFdpConfsHdr *log;
+ size_t nruh, fdp_descr_size;
+ int i;
+
+ if (endgrpid != 1 || !n->subsys) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ endgrp = &n->subsys->endgrp;
+
+ if (endgrp->fdp.enabled) {
+ nruh = endgrp->fdp.nruh;
+ } else {
+ nruh = 1;
+ }
+
+ fdp_descr_size = sizeof_fdp_conf_descr(nruh, FDPVSS);
+ log_size = sizeof(NvmeFdpConfsHdr) + fdp_descr_size;
+
+ if (off >= log_size) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(log_size - off, buf_len);
+
+ buf = g_malloc0(log_size);
+ log = (NvmeFdpConfsHdr *)buf;
+ hdr = (NvmeFdpDescrHdr *)(log + 1);
+ ruhd = (NvmeRuhDescr *)(buf + sizeof(*log) + sizeof(*hdr));
+
+ log->num_confs = cpu_to_le16(0);
+ log->size = cpu_to_le32(log_size);
+
+ hdr->descr_size = cpu_to_le16(fdp_descr_size);
+ if (endgrp->fdp.enabled) {
+ hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, VALID, 1);
+ hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, RGIF, endgrp->fdp.rgif);
+ hdr->nrg = cpu_to_le16(endgrp->fdp.nrg);
+ hdr->nruh = cpu_to_le16(endgrp->fdp.nruh);
+ hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1);
+ hdr->nnss = cpu_to_le32(NVME_MAX_NAMESPACES);
+ hdr->runs = cpu_to_le64(endgrp->fdp.runs);
+
+ for (i = 0; i < nruh; i++) {
+ ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED;
+ ruhd++;
+ }
+ } else {
+ /* 1 bit for RUH in PIF -> 2 RUHs max. */
+ hdr->nrg = cpu_to_le16(1);
+ hdr->nruh = cpu_to_le16(1);
+ hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1);
+ hdr->nnss = cpu_to_le32(1);
+ hdr->runs = cpu_to_le64(96 * MiB);
+
+ ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED;
+ }
+
+ return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req);
+}
+
+static uint16_t nvme_fdp_ruh_usage(NvmeCtrl *n, uint32_t endgrpid,
+ uint32_t dw10, uint32_t dw12,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
+{
+ NvmeRuHandle *ruh;
+ NvmeRuhuLog *hdr;
+ NvmeRuhuDescr *ruhud;
+ NvmeEnduranceGroup *endgrp;
+ g_autofree uint8_t *buf = NULL;
+ uint32_t log_size, trans_len;
+ uint16_t i;
+
+ if (endgrpid != 1 || !n->subsys) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ endgrp = &n->subsys->endgrp;
+
+ if (!endgrp->fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ log_size = sizeof(NvmeRuhuLog) + endgrp->fdp.nruh * sizeof(NvmeRuhuDescr);
+
+ if (off >= log_size) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(log_size - off, buf_len);
+
+ buf = g_malloc0(log_size);
+ hdr = (NvmeRuhuLog *)buf;
+ ruhud = (NvmeRuhuDescr *)(hdr + 1);
+
+ ruh = endgrp->fdp.ruhs;
+ hdr->nruh = cpu_to_le16(endgrp->fdp.nruh);
+
+ for (i = 0; i < endgrp->fdp.nruh; i++, ruhud++, ruh++) {
+ ruhud->ruha = ruh->ruha;
+ }
+
+ return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req);
+}
+
+static uint16_t nvme_fdp_stats(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ NvmeEnduranceGroup *endgrp;
+ NvmeFdpStatsLog log = {};
+ uint32_t trans_len;
+
+ if (off >= sizeof(NvmeFdpStatsLog)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (endgrpid != 1 || !n->subsys) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (!n->subsys->endgrp.fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ endgrp = &n->subsys->endgrp;
+
+ trans_len = MIN(sizeof(log) - off, buf_len);
+
+ /* spec value is 128 bit, we only use 64 bit */
+ log.hbmw[0] = cpu_to_le64(endgrp->fdp.hbmw);
+ log.mbmw[0] = cpu_to_le64(endgrp->fdp.mbmw);
+ log.mbe[0] = cpu_to_le64(endgrp->fdp.mbe);
+
+ return nvme_c2h(n, (uint8_t *)&log + off, trans_len, req);
+}
+
+static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
+{
+ NvmeEnduranceGroup *endgrp;
+ NvmeCmd *cmd = &req->cmd;
+ bool host_events = (cmd->cdw10 >> 8) & 0x1;
+ uint32_t log_size, trans_len;
+ NvmeFdpEventBuffer *ebuf;
+ g_autofree NvmeFdpEventsLog *elog = NULL;
+ NvmeFdpEvent *event;
+
+ if (endgrpid != 1 || !n->subsys) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ endgrp = &n->subsys->endgrp;
+
+ if (!endgrp->fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ if (host_events) {
+ ebuf = &endgrp->fdp.host_events;
+ } else {
+ ebuf = &endgrp->fdp.ctrl_events;
+ }
+
+ log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent);
+ trans_len = MIN(log_size - off, buf_len);
+ elog = g_malloc0(log_size);
+ elog->num_events = cpu_to_le32(ebuf->nelems);
+ event = (NvmeFdpEvent *)(elog + 1);
+
+ if (ebuf->nelems && ebuf->start == ebuf->next) {
+ unsigned int nelems = (NVME_FDP_MAX_EVENTS - ebuf->start);
+ /* wrap over, copy [start;NVME_FDP_MAX_EVENTS[ and [0; next[ */
+ memcpy(event, &ebuf->events[ebuf->start],
+ sizeof(NvmeFdpEvent) * nelems);
+ memcpy(event + nelems, ebuf->events,
+ sizeof(NvmeFdpEvent) * ebuf->next);
+ } else if (ebuf->start < ebuf->next) {
+ memcpy(event, &ebuf->events[ebuf->start],
+ sizeof(NvmeFdpEvent) * (ebuf->next - ebuf->start));
+ }
+
+ return nvme_c2h(n, (uint8_t *)elog + off, trans_len, req);
+}
+
static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = &req->cmd;
@@ -4589,13 +5120,14 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
uint8_t lsp = (dw10 >> 8) & 0xf;
uint8_t rae = (dw10 >> 15) & 0x1;
uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24;
- uint32_t numdl, numdu;
+ uint32_t numdl, numdu, lspi;
uint64_t off, lpol, lpou;
size_t len;
uint16_t status;
numdl = (dw10 >> 16);
numdu = (dw11 & 0xffff);
+ lspi = (dw11 >> 16);
lpol = dw12;
lpou = dw13;
@@ -4624,6 +5156,16 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
return nvme_changed_nslist(n, rae, len, off, req);
case NVME_LOG_CMD_EFFECTS:
return nvme_cmd_effects(n, csi, len, off, req);
+ case NVME_LOG_ENDGRP:
+ return nvme_endgrp_info(n, rae, len, off, req);
+ case NVME_LOG_FDP_CONFS:
+ return nvme_fdp_confs(n, lspi, len, off, req);
+ case NVME_LOG_FDP_RUH_USAGE:
+ return nvme_fdp_ruh_usage(n, lspi, dw10, dw12, len, off, req);
+ case NVME_LOG_FDP_STATS:
+ return nvme_fdp_stats(n, lspi, len, off, req);
+ case NVME_LOG_FDP_EVENTS:
+ return nvme_fdp_events(n, lspi, len, off, req);
default:
trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -5210,6 +5752,84 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
return nvme_c2h(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
}
+static int nvme_get_feature_fdp(NvmeCtrl *n, uint32_t endgrpid,
+ uint32_t *result)
+{
+ *result = 0;
+
+ if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ *result = FIELD_DP16(0, FEAT_FDP, FDPE, 1);
+ *result = FIELD_DP16(*result, FEAT_FDP, CONF_NDX, 0);
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_get_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeRequest *req, uint32_t *result)
+{
+ NvmeCmd *cmd = &req->cmd;
+ uint32_t cdw11 = le32_to_cpu(cmd->cdw11);
+ uint16_t ph = cdw11 & 0xffff;
+ uint8_t noet = (cdw11 >> 16) & 0xff;
+ uint16_t ruhid, ret;
+ uint32_t nentries = 0;
+ uint8_t s_events_ndx = 0;
+ size_t s_events_siz = sizeof(NvmeFdpEventDescr) * noet;
+ g_autofree NvmeFdpEventDescr *s_events = g_malloc0(s_events_siz);
+ NvmeRuHandle *ruh;
+ NvmeFdpEventDescr *s_event;
+
+ if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ if (!nvme_ph_valid(ns, ph)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ruhid = ns->fdp.phs[ph];
+ ruh = &n->subsys->endgrp.fdp.ruhs[ruhid];
+
+ assert(ruh);
+
+ if (unlikely(noet == 0)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ for (uint8_t event_type = 0; event_type < FDP_EVT_MAX; event_type++) {
+ uint8_t shift = nvme_fdp_evf_shifts[event_type];
+ if (!shift && event_type) {
+ /*
+ * only first entry (event_type == 0) has a shift value of 0
+ * other entries are simply unpopulated.
+ */
+ continue;
+ }
+
+ nentries++;
+
+ s_event = &s_events[s_events_ndx];
+ s_event->evt = event_type;
+ s_event->evta = (ruh->event_filter >> shift) & 0x1;
+
+ /* break if all `noet` entries are filled */
+ if ((++s_events_ndx) == noet) {
+ break;
+ }
+ }
+
+ ret = nvme_c2h(n, s_events, s_events_siz, req);
+ if (ret) {
+ return ret;
+ }
+
+ *result = nentries;
+ return NVME_SUCCESS;
+}
+
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = &req->cmd;
@@ -5222,6 +5842,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
uint16_t iv;
NvmeNamespace *ns;
int i;
+ uint16_t endgrpid = 0, ret = NVME_SUCCESS;
static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
[NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
@@ -5319,6 +5940,33 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
case NVME_HOST_BEHAVIOR_SUPPORT:
return nvme_c2h(n, (uint8_t *)&n->features.hbs,
sizeof(n->features.hbs), req);
+ case NVME_FDP_MODE:
+ endgrpid = dw11 & 0xff;
+
+ if (endgrpid != 0x1) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ret = nvme_get_feature_fdp(n, endgrpid, &result);
+ if (ret) {
+ return ret;
+ }
+ goto out;
+ case NVME_FDP_EVENTS:
+ if (!nvme_nsid_valid(n, nsid)) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ ns = nvme_ns(n, nsid);
+ if (unlikely(!ns)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ret = nvme_get_feature_fdp_events(n, ns, req, &result);
+ if (ret) {
+ return ret;
+ }
+ goto out;
default:
break;
}
@@ -5352,6 +6000,20 @@ defaults:
result |= NVME_INTVC_NOCOALESCING;
}
break;
+ case NVME_FDP_MODE:
+ endgrpid = dw11 & 0xff;
+
+ if (endgrpid != 0x1) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ret = nvme_get_feature_fdp(n, endgrpid, &result);
+ if (ret) {
+ return ret;
+ }
+ goto out;
+
+ break;
default:
result = nvme_feature_default[fid];
break;
@@ -5359,7 +6021,7 @@ defaults:
out:
req->cqe.result = cpu_to_le32(result);
- return NVME_SUCCESS;
+ return ret;
}
static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
@@ -5377,6 +6039,51 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
return NVME_SUCCESS;
}
+static uint16_t nvme_set_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeRequest *req)
+{
+ NvmeCmd *cmd = &req->cmd;
+ uint32_t cdw11 = le32_to_cpu(cmd->cdw11);
+ uint16_t ph = cdw11 & 0xffff;
+ uint8_t noet = (cdw11 >> 16) & 0xff;
+ uint16_t ret, ruhid;
+ uint8_t enable = le32_to_cpu(cmd->cdw12) & 0x1;
+ uint8_t event_mask = 0;
+ unsigned int i;
+ g_autofree uint8_t *events = g_malloc0(noet);
+ NvmeRuHandle *ruh = NULL;
+
+ assert(ns);
+
+ if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
+ return NVME_FDP_DISABLED | NVME_DNR;
+ }
+
+ if (!nvme_ph_valid(ns, ph)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ruhid = ns->fdp.phs[ph];
+ ruh = &n->subsys->endgrp.fdp.ruhs[ruhid];
+
+ ret = nvme_h2c(n, events, noet, req);
+ if (ret) {
+ return ret;
+ }
+
+ for (i = 0; i < noet; i++) {
+ event_mask |= (1 << nvme_fdp_evf_shifts[events[i]]);
+ }
+
+ if (enable) {
+ ruh->event_filter |= event_mask;
+ } else {
+ ruh->event_filter = ruh->event_filter & ~event_mask;
+ }
+
+ return NVME_SUCCESS;
+}
+
static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns = NULL;
@@ -5536,6 +6243,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
}
break;
+ case NVME_FDP_MODE:
+ /* spec: abort with cmd seq err if there's one or more NS' in endgrp */
+ return NVME_CMD_SEQ_ERROR | NVME_DNR;
+ case NVME_FDP_EVENTS:
+ return nvme_set_feature_fdp_events(n, ns, req);
default:
return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
}
@@ -6104,6 +6816,61 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
return NVME_SUCCESS;
}
+static uint16_t nvme_directive_send(NvmeCtrl *n, NvmeRequest *req)
+{
+ return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns;
+ uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+ uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
+ uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint8_t doper, dtype;
+ uint32_t numd, trans_len;
+ NvmeDirectiveIdentify id = {
+ .supported = 1 << NVME_DIRECTIVE_IDENTIFY,
+ .enabled = 1 << NVME_DIRECTIVE_IDENTIFY,
+ };
+
+ numd = dw10 + 1;
+ doper = dw11 & 0xff;
+ dtype = (dw11 >> 8) & 0xff;
+
+ trans_len = MIN(sizeof(NvmeDirectiveIdentify), numd << 2);
+
+ if (nsid == NVME_NSID_BROADCAST || dtype != NVME_DIRECTIVE_IDENTIFY ||
+ doper != NVME_DIRECTIVE_RETURN_PARAMS) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ ns = nvme_ns(n, nsid);
+ if (!ns) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ switch (dtype) {
+ case NVME_DIRECTIVE_IDENTIFY:
+ switch (doper) {
+ case NVME_DIRECTIVE_RETURN_PARAMS:
+ if (ns->endgrp->fdp.enabled) {
+ id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
+ id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
+ id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
+ }
+
+ return nvme_c2h(n, (uint8_t *)&id, trans_len, req);
+
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ default:
+ return NVME_INVALID_FIELD;
+ }
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -6152,6 +6919,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_dbbuf_config(n, req);
case NVME_ADM_CMD_FORMAT_NVM:
return nvme_format(n, req);
+ case NVME_ADM_CMD_DIRECTIVE_SEND:
+ return nvme_directive_send(n, req);
+ case NVME_ADM_CMD_DIRECTIVE_RECV:
+ return nvme_directive_receive(n, req);
default:
assert(false);
}
@@ -7380,6 +8151,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
uint8_t *pci_conf = pci_dev->config;
uint64_t cap = ldq_le_p(&n->bar.cap);
NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+ uint32_t ctratt;
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -7390,7 +8162,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->cntlid = cpu_to_le16(n->cntlid);
id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
- id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
+ ctratt = NVME_CTRATT_ELBAS;
id->rab = 6;
@@ -7407,7 +8179,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->mdts = n->params.mdts;
id->ver = cpu_to_le32(NVME_SPEC_VER);
id->oacs =
- cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF);
+ cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF |
+ NVME_OACS_DIRECTIVES);
id->cntrltype = 0x1;
/*
@@ -7457,8 +8230,17 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
if (n->subsys) {
id->cmic |= NVME_CMIC_MULTI_CTRL;
+ ctratt |= NVME_CTRATT_ENDGRPS;
+
+ id->endgidmax = cpu_to_le16(0x1);
+
+ if (n->subsys->endgrp.fdp.enabled) {
+ ctratt |= NVME_CTRATT_FDPS;
+ }
}
+ id->ctratt = cpu_to_le32(ctratt);
+
NVME_CAP_SET_MQES(cap, 0x7ff);
NVME_CAP_SET_CQR(cap, 1);
NVME_CAP_SET_TO(cap, 0xf);
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 62a1f97..cfac960 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -14,8 +14,10 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
+#include "qemu/bitops.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
@@ -377,6 +379,130 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
assert(ns->nr_open_zones == 0);
}
+static NvmeRuHandle *nvme_find_ruh_by_attr(NvmeEnduranceGroup *endgrp,
+ uint8_t ruha, uint16_t *ruhid)
+{
+ for (uint16_t i = 0; i < endgrp->fdp.nruh; i++) {
+ NvmeRuHandle *ruh = &endgrp->fdp.ruhs[i];
+
+ if (ruh->ruha == ruha) {
+ *ruhid = i;
+ return ruh;
+ }
+ }
+
+ return NULL;
+}
+
+static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp)
+{
+ NvmeEnduranceGroup *endgrp = ns->endgrp;
+ NvmeRuHandle *ruh;
+ uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+ unsigned int *ruhid, *ruhids;
+ char *r, *p, *token;
+ uint16_t *ph;
+
+ if (!ns->params.fdp.ruhs) {
+ ns->fdp.nphs = 1;
+ ph = ns->fdp.phs = g_new(uint16_t, 1);
+
+ ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_CTRL, ph);
+ if (!ruh) {
+ ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_UNUSED, ph);
+ if (!ruh) {
+ error_setg(errp, "no unused reclaim unit handles left");
+ return false;
+ }
+
+ ruh->ruha = NVME_RUHA_CTRL;
+ ruh->lbafi = lbafi;
+ ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds;
+
+ for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) {
+ ruh->rus[rg].ruamw = ruh->ruamw;
+ }
+ } else if (ruh->lbafi != lbafi) {
+ error_setg(errp, "lba format index of controller assigned "
+ "reclaim unit handle does not match namespace lba "
+ "format index");
+ return false;
+ }
+
+ return true;
+ }
+
+ ruhid = ruhids = g_new0(unsigned int, endgrp->fdp.nruh);
+ r = p = strdup(ns->params.fdp.ruhs);
+
+ /* parse the placement handle identifiers */
+ while ((token = qemu_strsep(&p, ";")) != NULL) {
+ ns->fdp.nphs += 1;
+ if (ns->fdp.nphs > NVME_FDP_MAXPIDS ||
+ ns->fdp.nphs == endgrp->fdp.nruh) {
+ error_setg(errp, "too many placement handles");
+ free(r);
+ return false;
+ }
+
+ if (qemu_strtoui(token, NULL, 0, ruhid++) < 0) {
+ error_setg(errp, "cannot parse reclaim unit handle identifier");
+ free(r);
+ return false;
+ }
+ }
+
+ free(r);
+
+ ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs);
+
+ ruhid = ruhids;
+
+ /* verify the identifiers */
+ for (unsigned int i = 0; i < ns->fdp.nphs; i++, ruhid++, ph++) {
+ if (*ruhid >= endgrp->fdp.nruh) {
+ error_setg(errp, "invalid reclaim unit handle identifier");
+ return false;
+ }
+
+ ruh = &endgrp->fdp.ruhs[*ruhid];
+
+ switch (ruh->ruha) {
+ case NVME_RUHA_UNUSED:
+ ruh->ruha = NVME_RUHA_HOST;
+ ruh->lbafi = lbafi;
+ ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds;
+
+ for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) {
+ ruh->rus[rg].ruamw = ruh->ruamw;
+ }
+
+ break;
+
+ case NVME_RUHA_HOST:
+ if (ruh->lbafi != lbafi) {
+ error_setg(errp, "lba format index of host assigned"
+ "reclaim unit handle does not match namespace "
+ "lba format index");
+ return false;
+ }
+
+ break;
+
+ case NVME_RUHA_CTRL:
+ error_setg(errp, "reclaim unit handle is controller assigned");
+ return false;
+
+ default:
+ abort();
+ }
+
+ *ph = *ruhid;
+ }
+
+ return true;
+}
+
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
unsigned int pi_size;
@@ -417,6 +543,11 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
return -1;
}
+ if (ns->params.zoned && ns->endgrp && ns->endgrp->fdp.enabled) {
+ error_setg(errp, "cannot be a zoned- in an FDP configuration");
+ return -1;
+ }
+
if (ns->params.zoned) {
if (ns->params.max_active_zones) {
if (ns->params.max_open_zones > ns->params.max_active_zones) {
@@ -502,6 +633,12 @@ int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
nvme_ns_init_zoned(ns);
}
+ if (ns->endgrp && ns->endgrp->fdp.enabled) {
+ if (!nvme_ns_init_fdp(ns, errp)) {
+ return -1;
+ }
+ }
+
return 0;
}
@@ -525,6 +662,10 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
g_free(ns->zone_array);
g_free(ns->zd_extensions);
}
+
+ if (ns->endgrp && ns->endgrp->fdp.enabled) {
+ g_free(ns->fdp.phs);
+ }
}
static void nvme_ns_unrealize(DeviceState *dev)
@@ -561,6 +702,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
return;
}
+ ns->subsys = subsys;
+ ns->endgrp = &subsys->endgrp;
}
if (nvme_ns_setup(ns, errp)) {
@@ -591,6 +734,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
if (subsys) {
subsys->namespaces[nsid] = ns;
+ ns->id_ns.endgid = cpu_to_le16(0x1);
+
if (ns->params.detached) {
return;
}
@@ -606,6 +751,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
return;
}
+
}
nvme_attach_ns(n, ns);
@@ -644,6 +790,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1),
DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default,
false),
+ DEFINE_PROP_STRING("fdp.ruhs", NvmeNamespace, params.fdp.ruhs),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 16da27a..209e8f5 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -27,6 +27,8 @@
#define NVME_MAX_CONTROLLERS 256
#define NVME_MAX_NAMESPACES 256
#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
+#define NVME_FDP_MAX_EVENTS 63
+#define NVME_FDP_MAXPIDS 128
QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
@@ -45,17 +47,68 @@ typedef struct NvmeBus {
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
#define SUBSYS_SLOT_RSVD (void *)0xFFFF
+typedef struct NvmeReclaimUnit {
+ uint64_t ruamw;
+} NvmeReclaimUnit;
+
+typedef struct NvmeRuHandle {
+ uint8_t ruht;
+ uint8_t ruha;
+ uint64_t event_filter;
+ uint8_t lbafi;
+ uint64_t ruamw;
+
+ /* reclaim units indexed by reclaim group */
+ NvmeReclaimUnit *rus;
+} NvmeRuHandle;
+
+typedef struct NvmeFdpEventBuffer {
+ NvmeFdpEvent events[NVME_FDP_MAX_EVENTS];
+ unsigned int nelems;
+ unsigned int start;
+ unsigned int next;
+} NvmeFdpEventBuffer;
+
+typedef struct NvmeEnduranceGroup {
+ uint8_t event_conf;
+
+ struct {
+ NvmeFdpEventBuffer host_events, ctrl_events;
+
+ uint16_t nruh;
+ uint16_t nrg;
+ uint8_t rgif;
+ uint64_t runs;
+
+ uint64_t hbmw;
+ uint64_t mbmw;
+ uint64_t mbe;
+
+ bool enabled;
+
+ NvmeRuHandle *ruhs;
+ } fdp;
+} NvmeEnduranceGroup;
+
typedef struct NvmeSubsystem {
DeviceState parent_obj;
NvmeBus bus;
uint8_t subnqn[256];
char *serial;
- NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
- NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+ NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
+ NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+ NvmeEnduranceGroup endgrp;
struct {
char *nqn;
+
+ struct {
+ bool enabled;
+ uint64_t runs;
+ uint16_t nruh;
+ uint32_t nrg;
+ } fdp;
} params;
} NvmeSubsystem;
@@ -96,6 +149,21 @@ typedef struct NvmeZone {
QTAILQ_ENTRY(NvmeZone) entry;
} NvmeZone;
+#define FDP_EVT_MAX 0xff
+#define NVME_FDP_MAX_NS_RUHS 32u
+#define FDPVSS 0
+
+static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = {
+ /* Host events */
+ [FDP_EVT_RU_NOT_FULLY_WRITTEN] = 0,
+ [FDP_EVT_RU_ATL_EXCEEDED] = 1,
+ [FDP_EVT_CTRL_RESET_RUH] = 2,
+ [FDP_EVT_INVALID_PID] = 3,
+ /* CTRL events */
+ [FDP_EVT_MEDIA_REALLOC] = 32,
+ [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33,
+};
+
typedef struct NvmeNamespaceParams {
bool detached;
bool shared;
@@ -125,6 +193,10 @@ typedef struct NvmeNamespaceParams {
uint32_t numzrwa;
uint64_t zrwas;
uint64_t zrwafg;
+
+ struct {
+ char *ruhs;
+ } fdp;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
@@ -167,10 +239,18 @@ typedef struct NvmeNamespace {
int32_t nr_active_zones;
NvmeNamespaceParams params;
+ NvmeSubsystem *subsys;
+ NvmeEnduranceGroup *endgrp;
struct {
uint32_t err_rec;
} features;
+
+ struct {
+ uint16_t nphs;
+ /* reclaim unit handle identifiers indexed by placement handle */
+ uint16_t *phs;
+ } fdp;
} NvmeNamespace;
static inline uint32_t nvme_nsid(NvmeNamespace *ns)
@@ -274,6 +354,12 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns)
assert(ns->nr_active_zones >= 0);
}
+static inline void nvme_fdp_stat_inc(uint64_t *a, uint64_t b)
+{
+ uint64_t ret = *a + b;
+ *a = ret < *a ? UINT64_MAX : ret;
+}
+
void nvme_ns_init_format(NvmeNamespace *ns);
int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
void nvme_ns_drain(NvmeNamespace *ns);
@@ -340,7 +426,9 @@ static inline const char *nvme_adm_opc_str(uint8_t opc)
case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
+ case NVME_ADM_CMD_DIRECTIVE_SEND: return "NVME_ADM_CMD_DIRECTIVE_SEND";
case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
+ case NVME_ADM_CMD_DIRECTIVE_RECV: return "NVME_ADM_CMD_DIRECTIVE_RECV";
case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
default: return "NVME_ADM_CMD_UNKNOWN";
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 9d26436..24ddec8 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -7,10 +7,13 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "qapi/error.h"
#include "nvme.h"
+#define NVME_DEFAULT_RU_SIZE (96 * MiB)
+
static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
{
NvmeSubsystem *subsys = n->subsys;
@@ -109,13 +112,95 @@ void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
n->cntlid = -1;
}
-static void nvme_subsys_setup(NvmeSubsystem *subsys)
+static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif)
+{
+ uint16_t val;
+ unsigned int i;
+
+ if (unlikely(nrg == 1)) {
+ /* PIDRG_NORGI scenario, all of pid is used for PHID */
+ *rgif = 0;
+ return true;
+ }
+
+ val = nrg;
+ i = 0;
+ while (val) {
+ val >>= 1;
+ i++;
+ }
+ *rgif = i;
+
+ /* ensure remaining bits suffice to represent number of phids in a RG */
+ if (unlikely((UINT16_MAX >> i) < nruh)) {
+ *rgif = 0;
+ return false;
+ }
+
+ return true;
+}
+
+static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp)
+{
+ NvmeEnduranceGroup *endgrp = &subsys->endgrp;
+
+ if (!subsys->params.fdp.runs) {
+ error_setg(errp, "fdp.runs must be non-zero");
+ return false;
+ }
+
+ endgrp->fdp.runs = subsys->params.fdp.runs;
+
+ if (!subsys->params.fdp.nrg) {
+ error_setg(errp, "fdp.nrg must be non-zero");
+ return false;
+ }
+
+ endgrp->fdp.nrg = subsys->params.fdp.nrg;
+
+ if (!subsys->params.fdp.nruh) {
+ error_setg(errp, "fdp.nruh must be non-zero");
+ return false;
+ }
+
+ endgrp->fdp.nruh = subsys->params.fdp.nruh;
+
+ if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) {
+ error_setg(errp,
+ "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")",
+ endgrp->fdp.nruh, endgrp->fdp.nrg);
+ return false;
+ }
+
+ endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh);
+
+ for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) {
+ endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) {
+ .ruht = NVME_RUHT_INITIALLY_ISOLATED,
+ .ruha = NVME_RUHA_UNUSED,
+ };
+
+ endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg);
+ }
+
+ endgrp->fdp.enabled = true;
+
+ return true;
+}
+
+static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp)
{
const char *nqn = subsys->params.nqn ?
subsys->params.nqn : subsys->parent_obj.id;
snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
"nqn.2019-08.org.qemu:%s", nqn);
+
+ if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) {
+ return false;
+ }
+
+ return true;
}
static void nvme_subsys_realize(DeviceState *dev, Error **errp)
@@ -124,11 +209,16 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
- nvme_subsys_setup(subsys);
+ nvme_subsys_setup(subsys, errp);
}
static Property nvme_subsystem_props[] = {
DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
+ DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false),
+ DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs,
+ NVME_DEFAULT_RU_SIZE),
+ DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1),
+ DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index b16f226..7f7837e 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -117,6 +117,7 @@ pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", sl
pci_nvme_zoned_zrwa_implicit_flush(uint64_t zslba, uint32_t nlb) "zslba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_pci_reset(void) "PCI Function Level Reset"
pci_nvme_virt_mngmt(uint16_t cid, uint16_t act, uint16_t cntlid, const char* rt, uint16_t nr) "cid %"PRIu16", act=0x%"PRIx16", ctrlid=%"PRIu16" %s nr=%"PRIu16""
+pci_nvme_fdp_ruh_change(uint16_t rgid, uint16_t ruhid) "change RU on RUH rgid=%"PRIu16", ruhid=%"PRIu16""
# error conditions
pci_nvme_err_mdts(size_t len) "len %zu"