aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--docs/system/devices/nvme.rst7
-rw-r--r--hw/core/machine.c1
-rw-r--r--hw/nvme/ctrl.c97
-rw-r--r--hw/nvme/meson.build2
-rw-r--r--hw/nvme/nguid.c187
-rw-r--r--hw/nvme/ns.c2
-rw-r--r--hw/nvme/nvme.h27
8 files changed, 290 insertions, 34 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0087e4f..42e5914 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2407,6 +2407,7 @@ F: docs/system/devices/virtio-snd.rst
nvme
M: Keith Busch <kbusch@kernel.org>
M: Klaus Jensen <its@irrelevant.dk>
+R: Jesper Devantier <foss@defmacro.it>
L: qemu-block@nongnu.org
S: Supported
F: hw/nvme/*
diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index 4ea957b..d2b1ca9 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -81,6 +81,13 @@ There are a number of parameters available:
Set the UUID of the namespace. This will be reported as a "Namespace UUID"
descriptor in the Namespace Identification Descriptor List.
+``nguid``
+ Set the NGUID of the namespace. This will be reported as a "Namespace Globally
+ Unique Identifier" descriptor in the Namespace Identification Descriptor List.
+ It is specified as a string of hexadecimal digits containing exactly 16 bytes
+ or "auto" for a random value. An optional '-' separator could be used to group
+ bytes. If not specified the NGUID will remain all zeros.
+
``eui64``
Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
Unique Identifier" descriptor in the Namespace Identification Descriptor List.
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f64dc5c..e483b34 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -102,6 +102,7 @@ GlobalProperty hw_compat_5_2[] = {
{ "PIIX4_PM", "smm-compat", "on"},
{ "virtio-blk-device", "report-discard-granularity", "off" },
{ "virtio-net-pci-base", "vectors", "3"},
+ { "nvme", "msix-exclusive-bar", "on"},
};
const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 76fe039..036b154 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns,
uint32_t nlb;
nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
&nlb, NULL, NULL, NULL);
- copy_len += nlb + 1;
+ copy_len += nlb;
}
if (copy_len > ns->id_ns.mcl) {
@@ -5642,6 +5642,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
} QEMU_PACKED uuid = {};
struct {
NvmeIdNsDescr hdr;
+ uint8_t v[NVME_NIDL_NGUID];
+ } QEMU_PACKED nguid = {};
+ struct {
+ NvmeIdNsDescr hdr;
uint64_t v;
} QEMU_PACKED eui64 = {};
struct {
@@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
pos += sizeof(uuid);
}
+ if (!nvme_nguid_is_null(&ns->params.nguid)) {
+ nguid.hdr.nidt = NVME_NIDT_NGUID;
+ nguid.hdr.nidl = NVME_NIDL_NGUID;
+ memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID);
+ memcpy(pos, &nguid, sizeof(nguid));
+ pos += sizeof(nguid);
+ }
+
if (ns->params.eui64) {
eui64.hdr.nidt = NVME_NIDT_EUI64;
eui64.hdr.nidl = NVME_NIDL_EUI64;
@@ -7798,6 +7810,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
}
if (n->pmr.dev) {
+ if (params->msix_exclusive_bar) {
+ error_setg(errp, "not enough BARs available to enable PMR");
+ return false;
+ }
+
if (host_memory_backend_is_mapped(n->pmr.dev)) {
error_setg(errp, "can't use already busy memdev: %s",
object_get_canonical_path_component(OBJECT(n->pmr.dev)));
@@ -8003,13 +8020,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
memory_region_set_enabled(&n->pmr.dev->mr, false);
}
-static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
- unsigned *msix_table_offset,
- unsigned *msix_pba_offset)
+static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
+ unsigned *msix_table_offset,
+ unsigned *msix_pba_offset)
{
- uint64_t bar_size, msix_table_size, msix_pba_size;
+ uint64_t bar_size, msix_table_size;
bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
+
+ if (total_irqs == 0) {
+ goto out;
+ }
+
bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
if (msix_table_offset) {
@@ -8024,11 +8046,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
*msix_pba_offset = bar_size;
}
- msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8;
- bar_size += msix_pba_size;
+ bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
- bar_size = pow2ceil(bar_size);
- return bar_size;
+out:
+ return pow2ceil(bar_size);
}
static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
@@ -8036,7 +8057,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
uint16_t vf_dev_id = n->params.use_intel_id ?
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
- uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm),
+ uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
le16_to_cpu(cap->vifrsm),
NULL, NULL);
@@ -8075,7 +8096,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
ERRP_GUARD();
uint8_t *pci_conf = pci_dev->config;
uint64_t bar_size;
- unsigned msix_table_offset, msix_pba_offset;
+ unsigned msix_table_offset = 0, msix_pba_offset = 0;
int ret;
pci_conf[PCI_INTERRUPT_PIN] = 1;
@@ -8097,24 +8118,38 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
pcie_ari_init(pci_dev, 0x100);
}
- /* add one to max_ioqpairs to account for the admin queue pair */
- bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize,
- &msix_table_offset, &msix_pba_offset);
+ if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
+ bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
+ memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+ bar_size);
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem);
+ ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
+ } else {
+ assert(n->params.msix_qsize >= 1);
- memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
- memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
- msix_table_offset);
- memory_region_add_subregion(&n->bar0, 0, &n->iomem);
+ /* add one to max_ioqpairs to account for the admin queue pair */
+ bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
+ n->params.msix_qsize, &msix_table_offset,
+ &msix_pba_offset);
- if (pci_is_vf(pci_dev)) {
- pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
- } else {
- pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
+ memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+ msix_table_offset);
+ memory_region_add_subregion(&n->bar0, 0, &n->iomem);
+
+ if (pci_is_vf(pci_dev)) {
+ pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
+ } else {
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+ }
+
+ ret = msix_init(pci_dev, n->params.msix_qsize,
+ &n->bar0, 0, msix_table_offset,
+ &n->bar0, 0, msix_pba_offset, 0, errp);
}
- ret = msix_init(pci_dev, n->params.msix_qsize,
- &n->bar0, 0, msix_table_offset,
- &n->bar0, 0, msix_pba_offset, 0, errp);
+
if (ret == -ENOTSUP) {
/* report that msix is not supported, but do not error out */
warn_report_err(*errp);
@@ -8309,9 +8344,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
if (pci_is_vf(pci_dev)) {
/*
* VFs derive settings from the parent. PF's lifespan exceeds
- * that of VF's, so it's safe to share params.serial.
+ * that of VF's.
*/
memcpy(&n->params, &pn->params, sizeof(NvmeParams));
+
+ /*
+ * Set PF's serial value to a new string memory to prevent 'serial'
+ * property object release of PF when a VF is removed from the system.
+ */
+ n->params.serial = g_strdup(pn->params.serial);
n->subsys = pn->subsys;
}
@@ -8412,6 +8453,8 @@ static Property nvme_props[] = {
params.sriov_max_vi_per_vf, 0),
DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
params.sriov_max_vq_per_vf, 0),
+ DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
+ false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca..7d5caa5 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1 @@
-system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nguid.c')) \ No newline at end of file
diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c
new file mode 100644
index 0000000..829832b
--- /dev/null
+++ b/hw/nvme/nguid.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU NVMe NGUID functions
+ *
+ * Copyright 2024 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/visitor.h"
+#include "qemu/ctype.h"
+#include "nvme.h"
+
+#define NGUID_SEPARATOR '-'
+
+#define NGUID_VALUE_AUTO "auto"
+
+#define NGUID_FMT \
+ "%02hhx%02hhx%02hhx%02hhx" \
+ "%02hhx%02hhx%02hhx%02hhx" \
+ "%02hhx%02hhx%02hhx%02hhx" \
+ "%02hhx%02hhx%02hhx%02hhx"
+
+#define NGUID_STR_LEN (2 * NGUID_LEN + 1)
+
+bool nvme_nguid_is_null(const NvmeNGUID *nguid)
+{
+ static NvmeNGUID null_nguid;
+ return memcmp(nguid, &null_nguid, sizeof(NvmeNGUID)) == 0;
+}
+
+static void nvme_nguid_generate(NvmeNGUID *out)
+{
+ int i;
+ uint32_t x;
+
+ QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0);
+
+ for (i = 0; i < NGUID_LEN; i += sizeof(x)) {
+ x = g_random_int();
+ memcpy(&out->data[i], &x, sizeof(x));
+ }
+}
+
+/*
+ * The Linux Kernel typically prints the NGUID of an NVMe namespace using the
+ * same format as the UUID. For instance:
+ *
+ * $ cat /sys/class/block/nvme0n1/nguid
+ * e9accd3b-8390-4e13-167c-f0593437f57d
+ *
+ * When there is no UUID but there is NGUID the Kernel will print the NGUID as
+ * wwid and it won't use the UUID format:
+ *
+ * $ cat /sys/class/block/nvme0n1/wwid
+ * eui.e9accd3b83904e13167cf0593437f57d
+ *
+ * The NGUID has different fields compared to the UUID, so the grouping used in
+ * the UUID format has no relation with the 3 fields of the NGUID.
+ *
+ * This implementation won't expect a strict format as the UUID one and instead
+ * it will admit any string of hexadecimal digits. Byte groups could be created
+ * using the '-' separator. The number of bytes needs to be exactly 16 and the
+ * separator '-' has to be exactly in a byte boundary. The following are
+ * examples of accepted formats for the NGUID string:
+ *
+ * nguid="e9accd3b-8390-4e13-167c-f0593437f57d"
+ * nguid="e9accd3b83904e13167cf0593437f57d"
+ * nguid="FEDCBA9876543210-ABCDEF-0123456789"
+ */
+static bool nvme_nguid_is_valid(const char *str)
+{
+ int i;
+ int digit_count = 0;
+
+ for (i = 0; i < strlen(str); i++) {
+ const char c = str[i];
+ if (qemu_isxdigit(c)) {
+ digit_count++;
+ continue;
+ }
+ if (c == NGUID_SEPARATOR) {
+ /*
+ * We need to make sure the separator is in a byte boundary, the
+ * string does not start with the separator and they are not back to
+ * back "--".
+ */
+ if ((i > 0) && (str[i - 1] != NGUID_SEPARATOR) &&
+ (digit_count % 2) == 0) {
+ continue;
+ }
+ }
+ return false;
+ }
+ /*
+ * The string should have the correct byte length and not finish with the
+ * separator
+ */
+ return (digit_count == (2 * NGUID_LEN)) && (str[i - 1] != NGUID_SEPARATOR);
+}
+
+static int nvme_nguid_parse(const char *str, NvmeNGUID *nguid)
+{
+ uint8_t *id = &nguid->data[0];
+ int ret = 0;
+ int i;
+ const char *ptr = str;
+
+ if (!nvme_nguid_is_valid(str)) {
+ return -1;
+ }
+
+ for (i = 0; i < NGUID_LEN; i++) {
+ ret = sscanf(ptr, "%02hhx", &id[i]);
+ if (ret != 1) {
+ return -1;
+ }
+ ptr += 2;
+ if (*ptr == NGUID_SEPARATOR) {
+ ptr++;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * When converted back to string this implementation will use a raw hex number
+ * with no separators, for instance:
+ *
+ * "e9accd3b83904e13167cf0593437f57d"
+ */
+static void nvme_nguid_stringify(const NvmeNGUID *nguid, char *out)
+{
+ const uint8_t *id = &nguid->data[0];
+ snprintf(out, NGUID_STR_LEN, NGUID_FMT,
+ id[0], id[1], id[2], id[3], id[4], id[5], id[6], id[7],
+ id[8], id[9], id[10], id[11], id[12], id[13], id[14], id[15]);
+}
+
+static void get_nguid(Object *obj, Visitor *v, const char *name, void *opaque,
+ Error **errp)
+{
+ Property *prop = opaque;
+ NvmeNGUID *nguid = object_field_prop_ptr(obj, prop);
+ char buffer[NGUID_STR_LEN];
+ char *p = buffer;
+
+ nvme_nguid_stringify(nguid, buffer);
+
+ visit_type_str(v, name, &p, errp);
+}
+
+static void set_nguid(Object *obj, Visitor *v, const char *name, void *opaque,
+ Error **errp)
+{
+ Property *prop = opaque;
+ NvmeNGUID *nguid = object_field_prop_ptr(obj, prop);
+ char *str;
+
+ if (!visit_type_str(v, name, &str, errp)) {
+ return;
+ }
+
+ if (!strcmp(str, NGUID_VALUE_AUTO)) {
+ nvme_nguid_generate(nguid);
+ } else if (nvme_nguid_parse(str, nguid) < 0) {
+ error_set_from_qdev_prop_error(errp, EINVAL, obj, name, str);
+ }
+ g_free(str);
+}
+
+const PropertyInfo qdev_prop_nguid = {
+ .name = "str",
+ .description =
+ "NGUID or \"" NGUID_VALUE_AUTO "\" for random value",
+ .get = get_nguid,
+ .set = set_nguid,
+};
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 0eabcf5..ea8db17 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -89,6 +89,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->mcl = cpu_to_le32(ns->params.mcl);
id_ns->msrc = ns->params.msrc;
id_ns->eui64 = cpu_to_be64(ns->params.eui64);
+ memcpy(&id_ns->nguid, &ns->params.nguid.data, sizeof(id_ns->nguid));
ds = 31 - clz32(ns->blkconf.logical_block_size);
ms = ns->params.ms;
@@ -797,6 +798,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid),
+ DEFINE_PROP_NGUID_NODEFAULT("nguid", NvmeNamespace, params.nguid),
DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 5f2ae7b..bed8191 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -171,13 +171,27 @@ static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = {
[FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33,
};
+#define NGUID_LEN 16
+
+typedef struct {
+ uint8_t data[NGUID_LEN];
+} NvmeNGUID;
+
+bool nvme_nguid_is_null(const NvmeNGUID *nguid);
+
+extern const PropertyInfo qdev_prop_nguid;
+
+#define DEFINE_PROP_NGUID_NODEFAULT(_name, _state, _field) \
+ DEFINE_PROP(_name, _state, _field, qdev_prop_nguid, NvmeNGUID)
+
typedef struct NvmeNamespaceParams {
- bool detached;
- bool shared;
- uint32_t nsid;
- QemuUUID uuid;
- uint64_t eui64;
- bool eui64_default;
+ bool detached;
+ bool shared;
+ uint32_t nsid;
+ QemuUUID uuid;
+ NvmeNGUID nguid;
+ uint64_t eui64;
+ bool eui64_default;
uint16_t ms;
uint8_t mset;
@@ -522,6 +536,7 @@ typedef struct NvmeParams {
uint16_t sriov_vi_flexible;
uint8_t sriov_max_vq_per_vf;
uint8_t sriov_max_vi_per_vf;
+ bool msix_exclusive_bar;
} NvmeParams;
typedef struct NvmeCtrl {