aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-01-07 16:25:00 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-01-07 16:25:00 +0000
commit973d306dd64bba7fc13f7e8b12ef43c089243d31 (patch)
tree088a5983b6f2c0305111eaff06ea5731594308e7
parent0cb04061728a5e6cf579c43b8c3695fb70d07697 (diff)
parent56fc1e6ac6bde95bc0369d358587f2234d4dddad (diff)
downloadqemu-973d306dd64bba7fc13f7e8b12ef43c089243d31.zip
qemu-973d306dd64bba7fc13f7e8b12ef43c089243d31.tar.gz
qemu-973d306dd64bba7fc13f7e8b12ef43c089243d31.tar.bz2
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pci, pc: fixes, features Bugfixes all over the place. HMAT support. New flags for vhost-user-blk utility. Auto-tuning of seg max for virtio storage. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Mon 06 Jan 2020 17:05:05 GMT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (32 commits) intel_iommu: add present bit check for pasid table entries intel_iommu: a fix to vtd_find_as_from_bus_num() virtio-net: delete also control queue when TX/RX deleted virtio: reset region cache when on queue deletion virtio-mmio: update queue size on guest write tests: add virtio-scsi and virtio-blk seg_max_adjust test virtio: make seg_max virtqueue size dependent hw: fix using 4.2 compat in 5.0 machine types for i440fx/q35 vhost-user-scsi: reset the device if supported vhost-user: add VHOST_USER_RESET_DEVICE to reset devices hw/pci/pci_host: Let pci_data_[read/write] use unsigned 'size' argument hw/pci/pci_host: Remove redundant PCI_DPRINTF() virtio-mmio: Clear v2 transport state on soft reset ACPI: add expected files for HMAT tests (acpihmat) tests/bios-tables-test: add test cases for ACPI HMAT tests/numa: Add case for QMP build HMAT hmat acpi: Build Memory Side Cache Information Structure(s) hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s) hmat acpi: Build Memory Proximity Domain Attributes Structure(s) numa: Extend CLI to provide memory side cache information ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--contrib/vhost-user-blk/vhost-user-blk.c110
-rw-r--r--docs/interop/vhost-user.json31
-rw-r--r--docs/interop/vhost-user.rst32
-rw-r--r--hw/acpi/Kconfig7
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/hmat.c268
-rw-r--r--hw/acpi/hmat.h42
-rw-r--r--hw/block/virtio-blk.c18
-rw-r--r--hw/char/virtio-serial-bus.c8
-rw-r--r--hw/core/machine.c68
-rw-r--r--hw/core/numa.c297
-rw-r--r--hw/i386/acpi-build.c5
-rw-r--r--hw/i386/intel_iommu.c100
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/pc_piix.c1
-rw-r--r--hw/i386/pc_q35.c1
-rw-r--r--hw/input/virtio-input.c5
-rw-r--r--hw/net/virtio-net.c3
-rw-r--r--hw/pci/pci_host.c25
-rw-r--r--hw/scsi/vhost-scsi.c2
-rw-r--r--hw/scsi/vhost-user-scsi.c24
-rw-r--r--hw/scsi/virtio-scsi.c19
-rw-r--r--hw/virtio/vhost-user.c8
-rw-r--r--hw/virtio/virtio-balloon.c7
-rw-r--r--hw/virtio/virtio-mmio.c17
-rw-r--r--hw/virtio/virtio-pci.c14
-rw-r--r--hw/virtio/virtio.c64
-rw-r--r--include/hw/pci/pci_host.h4
-rw-r--r--include/hw/virtio/virtio-blk.h1
-rw-r--r--include/hw/virtio/virtio-scsi.h1
-rw-r--r--include/hw/virtio/virtio.h18
-rw-r--r--include/sysemu/numa.h63
-rw-r--r--qapi/machine.json180
-rw-r--r--qemu-options.hx95
-rwxr-xr-xtests/acceptance/virtio_seg_max_adjust.py134
-rw-r--r--tests/bios-tables-test.c44
-rw-r--r--tests/data/acpi/pc/APIC.acpihmatbin0 -> 128 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.acpihmatbin0 -> 6455 bytes
-rw-r--r--tests/data/acpi/pc/HMAT.acpihmatbin0 -> 280 bytes
-rw-r--r--tests/data/acpi/pc/SRAT.acpihmatbin0 -> 280 bytes
-rw-r--r--tests/data/acpi/q35/APIC.acpihmatbin0 -> 128 bytes
-rw-r--r--tests/data/acpi/q35/DSDT.acpihmatbin0 -> 9203 bytes
-rw-r--r--tests/data/acpi/q35/HMAT.acpihmatbin0 -> 280 bytes
-rw-r--r--tests/data/acpi/q35/SRAT.acpihmatbin0 -> 280 bytes
-rw-r--r--tests/numa-test.c213
45 files changed, 1797 insertions, 134 deletions
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
index ae61034..6fd91c7 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -576,70 +576,90 @@ vub_new(char *blk_file)
return vdev_blk;
}
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_blk_file;
+static gboolean opt_print_caps;
+static gboolean opt_read_only;
+
+static GOptionEntry entries[] = {
+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+ "Print capabilities", NULL },
+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+ "Use inherited fd socket", "FDNUM" },
+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+ "Use UNIX socket path", "PATH" },
+ {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
+ "block device or file path", "PATH"},
+ { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
+ "Enable read-only", NULL }
+};
+
int main(int argc, char **argv)
{
- int opt;
- char *unix_socket = NULL;
- char *blk_file = NULL;
- bool enable_ro = false;
int lsock = -1, csock = -1;
VubDev *vdev_blk = NULL;
-
- while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
- switch (opt) {
- case 'b':
- blk_file = g_strdup(optarg);
- break;
- case 's':
- unix_socket = g_strdup(optarg);
- break;
- case 'r':
- enable_ro = true;
- break;
- case 'h':
- default:
- printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
- " | -r Enable read-only ] | [ -h ]\n", argv[0]);
- return 0;
+ GError *error = NULL;
+ GOptionContext *context;
+
+ context = g_option_context_new(NULL);
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_printerr("Option parsing failed: %s\n", error->message);
+ exit(EXIT_FAILURE);
+ }
+ if (opt_print_caps) {
+ g_print("{\n");
+ g_print(" \"type\": \"block\",\n");
+ g_print(" \"features\": [\n");
+ g_print(" \"read-only\",\n");
+ g_print(" \"blk-file\"\n");
+ g_print(" ]\n");
+ g_print("}\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!opt_blk_file) {
+ g_print("%s\n", g_option_context_get_help(context, true, NULL));
+ exit(EXIT_FAILURE);
+ }
+
+ if (opt_socket_path) {
+ lsock = unix_sock_new(opt_socket_path);
+ if (lsock < 0) {
+ exit(EXIT_FAILURE);
}
+ } else if (opt_fdnum < 0) {
+ g_print("%s\n", g_option_context_get_help(context, true, NULL));
+ exit(EXIT_FAILURE);
+ } else {
+ lsock = opt_fdnum;
}
- if (!unix_socket || !blk_file) {
- printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
- " | -r Enable read-only ] | [ -h ]\n", argv[0]);
- return -1;
- }
-
- lsock = unix_sock_new(unix_socket);
- if (lsock < 0) {
- goto err;
- }
-
- csock = accept(lsock, (void *)0, (void *)0);
+ csock = accept(lsock, NULL, NULL);
if (csock < 0) {
- fprintf(stderr, "Accept error %s\n", strerror(errno));
- goto err;
+ g_printerr("Accept error %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
}
- vdev_blk = vub_new(blk_file);
+ vdev_blk = vub_new(opt_blk_file);
if (!vdev_blk) {
- goto err;
+ exit(EXIT_FAILURE);
}
- if (enable_ro) {
+ if (opt_read_only) {
vdev_blk->enable_ro = true;
}
if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
vub_panic_cb, &vub_iface)) {
- fprintf(stderr, "Failed to initialized libvhost-user-glib\n");
- goto err;
+ g_printerr("Failed to initialize libvhost-user-glib\n");
+ exit(EXIT_FAILURE);
}
g_main_loop_run(vdev_blk->loop);
-
+ g_main_loop_unref(vdev_blk->loop);
+ g_option_context_free(context);
vug_deinit(&vdev_blk->parent);
-
-err:
vub_free(vdev_blk);
if (csock >= 0) {
close(csock);
@@ -647,8 +667,8 @@ err:
if (lsock >= 0) {
close(lsock);
}
- g_free(unix_socket);
- g_free(blk_file);
+ g_free(opt_socket_path);
+ g_free(opt_blk_file);
return 0;
}
diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json
index da6aaf5..ce0ef74 100644
--- a/docs/interop/vhost-user.json
+++ b/docs/interop/vhost-user.json
@@ -55,6 +55,37 @@
}
##
+# @VHostUserBackendBlockFeature:
+#
+# List of vhost user "block" features.
+#
+# @read-only: The --read-only command line option is supported.
+# @blk-file: The --blk-file command line option is supported.
+#
+# Since: 5.0
+##
+{
+ 'enum': 'VHostUserBackendBlockFeature',
+ 'data': [ 'read-only', 'blk-file' ]
+}
+
+##
+# @VHostUserBackendCapabilitiesBlock:
+#
+# Capabilities reported by vhost user "block" backends
+#
+# @features: list of supported features.
+#
+# Since: 5.0
+##
+{
+ 'struct': 'VHostUserBackendCapabilitiesBlock',
+ 'data': {
+ 'features': [ 'VHostUserBackendBlockFeature' ]
+ }
+}
+
+##
# @VHostUserBackendInputFeature:
#
# List of vhost user "input" features.
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 7827b71..5f8b3a4 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -785,6 +785,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+ #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
Master message types
--------------------
@@ -1190,6 +1191,20 @@ Master message types
ancillary data. The GPU protocol is used to inform the master of
rendering state and updates. See vhost-user-gpu.rst for details.
+``VHOST_USER_RESET_DEVICE``
+ :id: 34
+ :equivalent ioctl: N/A
+ :master payload: N/A
+ :slave payload: N/A
+
+ Ask the vhost user backend to disable all rings and reset all
+ internal device state to the initial state, ready to be
+ reinitialized. The backend retains ownership of the device
+ throughout the reset operation.
+
+ Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol
+ feature is set by the backend.
+
Slave message types
-------------------
@@ -1376,3 +1391,20 @@ Command line options:
Enable virgl rendering support.
(optional)
+
+vhost-user-blk
+--------------
+
+Command line options:
+
+--blk-file=PATH
+
+ Specify block device or file path.
+
+ (optional)
+
+--read-only
+
+ Enable read-only.
+
+ (optional)
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e..54209c6 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
select ACPI_NVDIMM
select ACPI_CPU_HOTPLUG
select ACPI_MEMORY_HOTPLUG
+ select ACPI_HMAT
config ACPI_X86_ICH
bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
bool
depends on ACPI
+config ACPI_HMAT
+ bool
+ depends on ACPI
+
config ACPI_PCI
bool
depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
depends on PC
config ACPI_HW_REDUCED
- bool
- depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 9925305..777da07 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 0000000..7c24bb5
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,268 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+ uint32_t initiator, uint32_t mem_node)
+{
+
+ /* Memory Proximity Domain Attributes Structure */
+ /* Type */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 40, 4);
+ /* Flags */
+ build_append_int_noprefix(table_data, flags, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Proximity Domain for the Attached Initiator */
+ build_append_int_noprefix(table_data, initiator, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, mem_node, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+ /*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+}
+
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+ uint32_t num_initiator, uint32_t num_target,
+ uint32_t *initiator_list)
+{
+ int i, index;
+ HMAT_LB_Data *lb_data;
+ uint16_t *entry_list;
+ uint32_t base;
+ /* Length in bytes for entire structure */
+ uint32_t lb_length
+ = 32 /* Table length upto and including Entry Base Unit */
+ + 4 * num_initiator /* Initiator Proximity Domain List */
+ + 4 * num_target /* Target Proximity Domain List */
+ + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+ /* Type */
+ build_append_int_noprefix(table_data, 1, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, lb_length, 4);
+ /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+ assert(!(hmat_lb->hierarchy >> 4));
+ build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+ /* Data Type */
+ build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Number of Initiator Proximity Domains (s) */
+ build_append_int_noprefix(table_data, num_initiator, 4);
+ /* Number of Target Proximity Domains (t) */
+ build_append_int_noprefix(table_data, num_target, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+
+ /* Entry Base Unit */
+ if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+ /* Convert latency base from nanoseconds to picosecond */
+ base = hmat_lb->base * 1000;
+ } else {
+ /* Convert bandwidth base from Byte to Megabyte */
+ base = hmat_lb->base / MiB;
+ }
+ build_append_int_noprefix(table_data, base, 8);
+
+ /* Initiator Proximity Domain List */
+ for (i = 0; i < num_initiator; i++) {
+ build_append_int_noprefix(table_data, initiator_list[i], 4);
+ }
+
+ /* Target Proximity Domain List */
+ for (i = 0; i < num_target; i++) {
+ build_append_int_noprefix(table_data, i, 4);
+ }
+
+ /* Latency or Bandwidth Entries */
+ entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+ index = lb_data->initiator * num_target + lb_data->target;
+
+ entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+ }
+
+ for (i = 0; i < num_initiator * num_target; i++) {
+ build_append_int_noprefix(table_data, entry_list[i], 2);
+ }
+
+ g_free(entry_list);
+}
+
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+ /*
+ * Cache Attributes: Bits [3:0] – Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+ uint32_t cache_attr = total_levels;
+
+ /* Bits [7:4] : Cache Level described in this structure */
+ cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+ /* Bits [11:8] - Cache Associativity */
+ cache_attr |= (uint32_t) hmat_cache->associativity << 8;
+
+ /* Bits [15:12] - Write Policy */
+ cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+ /* Bits [31:16] - Cache Line size in bytes */
+ cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+ /* Type */
+ build_append_int_noprefix(table_data, 2, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 32, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /* Memory Side Cache Size */
+ build_append_int_noprefix(table_data, hmat_cache->size, 8);
+ /* Cache Attributes */
+ build_append_int_noprefix(table_data, cache_attr, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+ build_append_int_noprefix(table_data, 0, 2);
+}
+
+/* Build HMAT sub table structures */
+static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
+{
+ uint16_t flags;
+ uint32_t num_initiator = 0;
+ uint32_t initiator_list[MAX_NODES];
+ int i, hierarchy, type, cache_level, total_levels;
+ HMAT_LB_Info *hmat_lb;
+ NumaHmatCacheOptions *hmat_cache;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ flags = 0;
+
+ if (numa_state->nodes[i].initiator < MAX_NODES) {
+ flags |= HMAT_PROXIMITY_INITIATOR_VALID;
+ }
+
+ build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
+ }
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_state->nodes[i].has_cpu) {
+ initiator_list[num_initiator++] = i;
+ }
+ }
+
+ /*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+ for (hierarchy = HMAT_LB_MEM_MEMORY;
+ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) {
+ for (type = HMAT_LB_DATA_ACCESS_LATENCY;
+ type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
+ hmat_lb = numa_state->hmat_lb[hierarchy][type];
+
+ if (hmat_lb && hmat_lb->list->len) {
+ build_hmat_lb(table_data, hmat_lb, num_initiator,
+ numa_state->num_nodes, initiator_list);
+ }
+ }
+ }
+
+ /*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ total_levels = 0;
+ for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+ if (numa_state->hmat_cache[i][cache_level]) {
+ total_levels++;
+ }
+ }
+ for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+ hmat_cache = numa_state->hmat_cache[i][cache_level];
+ if (hmat_cache) {
+ build_hmat_cache(table_data, total_levels, hmat_cache);
+ }
+ }
+ }
+}
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
+{
+ int hmat_start = table_data->len;
+
+ /* reserve space for HMAT header */
+ acpi_data_push(table_data, 40);
+
+ hmat_build_table_structs(table_data, numa_state);
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + hmat_start),
+ "HMAT", table_data->len - hmat_start, 2, NULL, NULL);
+}
diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
new file mode 100644
index 0000000..437dbc6
--- /dev/null
+++ b/hw/acpi/hmat.h
@@ -0,0 +1,42 @@
+/*
+ * HMAT ACPI Implementation Header
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HMAT_H
+#define HMAT_H
+
+#include "hw/acpi/aml-build.h"
+
+/*
+ * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure,
+ * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in
+ * the Proximity Domain for the Attached Initiator field is valid.
+ * Other bits reserved.
+ */
+#define HMAT_PROXIMITY_INITIATOR_VALID 0x1
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state);
+
+#endif
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index d62e637..9bee514 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
{
VirtIOBlockReq *req;
MultiReqBuffer mrb = {};
+ bool suppress_notifications = virtio_queue_get_notification(vq);
bool progress = false;
aio_context_acquire(blk_get_aio_context(s->blk));
blk_io_plug(s->blk);
do {
- virtio_queue_set_notification(vq, 0);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 0);
+ }
while ((req = virtio_blk_get_request(s, vq))) {
progress = true;
@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
}
}
- virtio_queue_set_notification(vq, 1);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 1);
+ }
} while (!virtio_queue_empty(vq));
if (mrb.num_reqs) {
@@ -908,7 +913,8 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blk_get_geometry(s->blk, &capacity);
memset(&blkcfg, 0, sizeof(blkcfg));
virtio_stq_p(vdev, &blkcfg.capacity, capacity);
- virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
+ virtio_stl_p(vdev, &blkcfg.seg_max,
+ s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2);
virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls);
virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
@@ -1133,6 +1139,11 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
error_setg(errp, "num-queues property must be larger than 0");
return;
}
+ if (conf->queue_size <= 2) {
+ error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
+ "must be > 2", conf->queue_size);
+ return;
+ }
if (!is_power_of_2(conf->queue_size) ||
conf->queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
@@ -1262,6 +1273,7 @@ static Property virtio_blk_properties[] = {
true),
DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
+ DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true),
DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
IOThread *),
DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features,
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 3325904..e1cbce3 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -1126,9 +1126,17 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOSerial *vser = VIRTIO_SERIAL(dev);
+ int i;
QLIST_REMOVE(vser, next);
+ virtio_delete_queue(vser->c_ivq);
+ virtio_delete_queue(vser->c_ovq);
+ for (i = 0; i < vser->bus.max_nr_ports; i++) {
+ virtio_delete_queue(vser->ivqs[i]);
+ virtio_delete_queue(vser->ovqs[i]);
+ }
+
g_free(vser->ivqs);
g_free(vser->ovqs);
g_free(vser->ports_map);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 73bf1f8..4f30fb5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -29,11 +29,15 @@
GlobalProperty hw_compat_4_2[] = {
{ "virtio-blk-device", "x-enable-wce-if-config-wce", "off" },
+ { "virtio-blk-device", "seg-max-adjust", "off"},
+ { "virtio-scsi-device", "seg_max_adjust", "off"},
+ { "vhost-blk-device", "seg_max_adjust", "off"},
};
const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);
GlobalProperty hw_compat_4_1[] = {
{ "virtio-pci", "x-pcie-flr-init", "off" },
+ { "virtio-device", "use-disabled-flag", "false" },
};
const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1);
@@ -429,6 +433,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp)
ms->nvdimms_state->is_enabled = value;
}
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->numa_state->hmat_enabled = value;
+}
+
static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -556,6 +574,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ NodeInfo *numa_info = machine->numa_state->nodes;
bool match = false;
int i;
@@ -625,6 +644,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
match = true;
slot->props.node_id = props->node_id;
slot->props.has_node_id = props->has_node_id;
+
+ if (machine->numa_state->hmat_enabled) {
+ if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+ (props->node_id != numa_info[props->node_id].initiator)) {
+ error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+ " should be itself", props->node_id);
+ return;
+ }
+ numa_info[props->node_id].has_cpu = true;
+ numa_info[props->node_id].initiator = props->node_id;
+ }
}
if (!match) {
@@ -845,6 +875,13 @@ static void machine_initfn(Object *obj)
if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
ms->numa_state = g_new0(NumaState, 1);
+ object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ &error_abort);
+ object_property_set_description(obj, "hmat",
+ "Set on/off to enable/disable "
+ "ACPI Heterogeneous Memory Attribute "
+ "Table (HMAT)", NULL);
}
/* Register notifier when init is done for sysbus sanity checks */
@@ -912,6 +949,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
return g_string_free(s, false);
}
+static void numa_validate_initiator(NumaState *numa_state)
+{
+ int i;
+ NodeInfo *numa_info = numa_state->nodes;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_info[i].initiator == MAX_NODES) {
+ error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].present) {
+ error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].has_cpu) {
+ error_report("The initiator of NUMA node %d is invalid", i);
+ exit(1);
+ }
+ }
+}
+
static void machine_numa_finish_cpu_init(MachineState *machine)
{
int i;
@@ -952,6 +1015,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
machine_set_cpu_numa_node(machine, &props, &error_fatal);
}
}
+
+ if (machine->numa_state->hmat_enabled) {
+ numa_validate_initiator(machine->numa_state);
+ }
+
if (s->len && !qtest_enabled()) {
warn_report("CPU(s) not present in any NUMA nodes: %s",
s->str);
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 19f082d..0d1b4be 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
@@ -129,6 +130,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
}
+
+ /*
+ * If not set the initiator, set it to MAX_NODES. And if
+ * HMAT is enabled and this node has no cpus, QEMU will raise error.
+ */
+ numa_info[nodenr].initiator = MAX_NODES;
+ if (node->has_initiator) {
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ if (node->initiator >= MAX_NODES) {
+ error_report("The initiator id %" PRIu16 " expects an integer "
+ "between 0 and %d", node->initiator,
+ MAX_NODES - 1);
+ return;
+ }
+
+ numa_info[nodenr].initiator = node->initiator;
+ }
numa_info[nodenr].present = true;
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
ms->numa_state->num_nodes++;
@@ -171,6 +195,253 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
ms->numa_state->have_numa_distance = true;
}
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp)
+{
+ int i, first_bit, last_bit;
+ uint64_t max_entry, temp_base, bitmap_copy;
+ NodeInfo *numa_info = numa_state->nodes;
+ HMAT_LB_Info *hmat_lb =
+ numa_state->hmat_lb[node->hierarchy][node->data_type];
+ HMAT_LB_Data lb_data = {};
+ HMAT_LB_Data *lb_temp;
+
+ /* Error checking */
+ if (node->initiator > numa_state->num_nodes) {
+ error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+ node->initiator, numa_state->num_nodes);
+ return;
+ }
+ if (node->target > numa_state->num_nodes) {
+ error_setg(errp, "Invalid target=%d, it should be less than %d",
+ node->target, numa_state->num_nodes);
+ return;
+ }
+ if (!numa_info[node->initiator].has_cpu) {
+ error_setg(errp, "Invalid initiator=%d, it isn't an "
+ "initiator proximity domain", node->initiator);
+ return;
+ }
+ if (!numa_info[node->target].present) {
+ error_setg(errp, "The target=%d should point to an existing node",
+ node->target);
+ return;
+ }
+
+ if (!hmat_lb) {
+ hmat_lb = g_malloc0(sizeof(*hmat_lb));
+ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+ }
+ hmat_lb->hierarchy = node->hierarchy;
+ hmat_lb->data_type = node->data_type;
+ lb_data.initiator = node->initiator;
+ lb_data.target = node->target;
+
+ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+ /* Input latency data */
+
+ if (!node->has_latency) {
+ error_setg(errp, "Missing 'latency' option");
+ return;
+ }
+ if (node->has_bandwidth) {
+ error_setg(errp, "Invalid option 'bandwidth' since "
+ "the data type is latency");
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the latency for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+ if (node->latency) {
+ /* Calculate the temporary base and compressed latency */
+ max_entry = node->latency;
+ temp_base = 1;
+ while (QEMU_IS_ALIGNED(max_entry, 10)) {
+ max_entry /= 10;
+ temp_base *= 10;
+ }
+
+ /* Calculate the max compressed latency */
+ temp_base = MIN(hmat_lb->base, temp_base);
+ max_entry = node->latency / hmat_lb->base;
+ max_entry = MAX(hmat_lb->range_bitmap, max_entry);
+
+ /*
+ * For latency hmat_lb->range_bitmap record the max compressed
+ * latency which should be less than 0xFFFF (UINT16_MAX)
+ */
+ if (max_entry >= UINT16_MAX) {
+ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
+ "target=%d should not differ from previously entered "
+ "min or max values on more than %d", node->latency,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = max_entry;
+ }
+
+ /*
+ * Set lb_info_provided bit 0 as 1,
+ * latency information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(0);
+ }
+ lb_data.data = node->latency;
+ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
+ /* Input bandwidth data */
+ if (!node->has_bandwidth) {
+ error_setg(errp, "Missing 'bandwidth' option");
+ return;
+ }
+ if (node->has_latency) {
+ error_setg(errp, "Invalid option 'latency' since "
+ "the data type is bandwidth");
+ return;
+ }
+ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
+ "target=%d should be 1MB aligned", node->bandwidth,
+ node->initiator, node->target);
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the bandwidth for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
+
+ if (node->bandwidth) {
+ /* Keep bitmap unchanged when bandwidth out of range */
+ bitmap_copy = hmat_lb->range_bitmap;
+ bitmap_copy |= node->bandwidth;
+ first_bit = ctz64(bitmap_copy);
+ temp_base = UINT64_C(1) << first_bit;
+ max_entry = node->bandwidth / temp_base;
+ last_bit = 64 - clz64(bitmap_copy);
+
+ /*
+ * For bandwidth, first_bit record the base unit of bandwidth bits,
+ * last_bit record the last bit of the max bandwidth. The max
+ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
+ */
+ if ((last_bit - first_bit) > UINT16_BITS ||
+ max_entry >= UINT16_MAX) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
+ "and target=%d should not differ from previously "
+ "entered values on more than %d", node->bandwidth,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = bitmap_copy;
+ }
+
+ /*
+ * Set lb_info_provided bit 1 as 1,
+ * bandwidth information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(1);
+ }
+ lb_data.data = node->bandwidth;
+ } else {
+ assert(0);
+ }
+
+ g_array_append_val(hmat_lb->list, lb_data);
+}
+
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+ Error **errp)
+{
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ NumaHmatCacheOptions *hmat_cache = NULL;
+
+ if (node->node_id >= nb_numa_nodes) {
+ error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+ "than %d", node->node_id, nb_numa_nodes);
+ return;
+ }
+
+ if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+ error_setg(errp, "The latency and bandwidth information of "
+ "node-id=%" PRIu32 " should be provided before memory side "
+ "cache attributes", node->node_id);
+ return;
+ }
+
+ if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
+ error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
+ "and less than or equal to %d", node->level,
+ HMAT_LB_LEVELS - 1);
+ return;
+ }
+
+ assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
+ assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+ if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+ error_setg(errp, "Duplicate configuration of the side cache for "
+ "node-id=%" PRIu32 " and level=%" PRIu8,
+ node->node_id, node->level);
+ return;
+ }
+
+ if ((node->level > 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+ (node->size >=
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be less than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+ node->level - 1);
+ return;
+ }
+
+ if ((node->level < HMAT_LB_LEVELS - 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+ (node->size <=
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be larger than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+ node->level + 1);
+ return;
+ }
+
+ hmat_cache = g_malloc0(sizeof(*hmat_cache));
+ memcpy(hmat_cache, node, sizeof(*hmat_cache));
+ ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
+}
+
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{
Error *err = NULL;
@@ -208,6 +479,32 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
&err);
break;
+ case NUMA_OPTIONS_TYPE_HMAT_LB:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
+ if (err) {
+ goto end;
+ }
+ break;
+ case NUMA_OPTIONS_TYPE_HMAT_CACHE:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
+ if (err) {
+ goto end;
+ }
+ break;
default:
abort();
}
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 7b8da62..e25df83 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -68,6 +68,7 @@
#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
+#include "hw/acpi/hmat.h"
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
* -M pc-i440fx-2.0. Even if the actual amount of AML generated grows
@@ -2835,6 +2836,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_slit(tables_blob, tables->linker, machine);
}
+ if (machine->numa_state->hmat_enabled) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_hmat(tables_blob, tables->linker, machine->numa_state);
+ }
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 43c94b9..a523ef0 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -686,9 +686,18 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
return true;
}
-static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire)
+static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
+{
+ return pdire->val & 1;
+}
+
+/**
+ * Caller of this function should check present bit if wants
+ * to use pdir entry for futher usage except for fpd bit check.
+ */
+static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
+ uint32_t pasid,
+ VTDPASIDDirEntry *pdire)
{
uint32_t index;
dma_addr_t addr, entry_size;
@@ -703,18 +712,22 @@ static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
return 0;
}
-static int vtd_get_pasid_entry(IntelIOMMUState *s,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_present(VTDPASIDEntry *pe)
+{
+ return pe->val[0] & VTD_PASID_ENTRY_P;
+}
+
+static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
+ uint32_t pasid,
+ dma_addr_t addr,
+ VTDPASIDEntry *pe)
{
uint32_t index;
- dma_addr_t addr, entry_size;
+ dma_addr_t entry_size;
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
- addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
addr = addr + index * entry_size;
if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) {
return -VTD_FR_PASID_TABLE_INV;
@@ -732,25 +745,54 @@ static int vtd_get_pasid_entry(IntelIOMMUState *s,
return 0;
}
-static int vtd_get_pasid_entry_from_pasid(IntelIOMMUState *s,
- dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDEntry *pe)
+/**
+ * Caller of this function should check present bit if wants
+ * to use pasid entry for futher usage except for fpd bit check.
+ */
+static int vtd_get_pe_from_pdire(IntelIOMMUState *s,
+ uint32_t pasid,
+ VTDPASIDDirEntry *pdire,
+ VTDPASIDEntry *pe)
+{
+ dma_addr_t addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+
+ return vtd_get_pe_in_pasid_leaf_table(s, pasid, addr, pe);
+}
+
+/**
+ * This function gets a pasid entry from a specified pasid
+ * table (includes dir and leaf table) with a specified pasid.
+ * Sanity check should be done to ensure return a present
+ * pasid entry to caller.
+ */
+static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
+ dma_addr_t pasid_dir_base,
+ uint32_t pasid,
+ VTDPASIDEntry *pe)
{
int ret;
VTDPASIDDirEntry pdire;
- ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+ ret = vtd_get_pdire_from_pdir_table(pasid_dir_base,
+ pasid, &pdire);
if (ret) {
return ret;
}
- ret = vtd_get_pasid_entry(s, pasid, &pdire, pe);
+ if (!vtd_pdire_present(&pdire)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
if (ret) {
return ret;
}
- return ret;
+ if (!vtd_pe_present(pe)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ return 0;
}
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
@@ -763,7 +805,7 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
pasid = VTD_CE_GET_RID2PASID(ce);
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
- ret = vtd_get_pasid_entry_from_pasid(s, pasid_dir_base, pasid, pe);
+ ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
return ret;
}
@@ -781,7 +823,11 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
pasid = VTD_CE_GET_RID2PASID(ce);
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
- ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+ /*
+ * No present bit check since fpd is meaningful even
+ * if the present bit is clear.
+ */
+ ret = vtd_get_pdire_from_pdir_table(pasid_dir_base, pasid, &pdire);
if (ret) {
return ret;
}
@@ -791,7 +837,15 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
return 0;
}
- ret = vtd_get_pasid_entry(s, pasid, &pdire, &pe);
+ if (!vtd_pdire_present(&pdire)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ /*
+ * No present bit check since fpd is meaningful even
+ * if the present bit is clear.
+ */
+ ret = vtd_get_pe_from_pdire(s, pasid, &pdire, &pe);
if (ret) {
return ret;
}
@@ -948,6 +1002,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
return vtd_bus;
}
}
+ vtd_bus = NULL;
}
return vtd_bus;
}
@@ -2610,16 +2665,15 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
switch (addr) {
/* Root Table Address Register, 64-bit */
case DMAR_RTADDR_REG:
+ val = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
if (size == 4) {
- val = s->root & ((1ULL << 32) - 1);
- } else {
- val = s->root;
+ val = val & ((1ULL << 32) - 1);
}
break;
case DMAR_RTADDR_REG_HI:
assert(size == 4);
- val = s->root >> 32;
+ val = vtd_get_quad_raw(s, DMAR_RTADDR_REG) >> 32;
break;
/* Invalidation Queue Address Register, 64-bit */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index edcf9fc..862033e 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -479,6 +479,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
/* PASID Granular Translation Type Mask */
+#define VTD_PASID_ENTRY_P 1ULL
#define VTD_SM_PASID_ENTRY_PGTT (7ULL << 6)
#define VTD_SM_PASID_ENTRY_FLT (1ULL << 6)
#define VTD_SM_PASID_ENTRY_SLT (2ULL << 6)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 721c7aa..fa12203 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -425,7 +425,6 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m)
m->alias = "pc";
m->is_default = 1;
pcmc->default_cpu_version = 1;
- compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
}
DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 52f4573..84cf925 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -354,7 +354,6 @@ static void pc_q35_5_0_machine_options(MachineClass *m)
pc_q35_machine_options(m);
m->alias = "q35";
pcmc->default_cpu_version = 1;
- compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
}
DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL,
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index ec54e46..9c013af 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -280,6 +280,7 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev);
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOInput *vinput = VIRTIO_INPUT(dev);
Error *local_err = NULL;
if (vic->unrealize) {
@@ -289,8 +290,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
return;
}
}
- virtio_del_queue(vdev, 0);
- virtio_del_queue(vdev, 1);
+ virtio_delete_queue(vinput->evt);
+ virtio_delete_queue(vinput->sts);
virtio_cleanup(vdev);
}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 777d62d..d7d3ad6 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3102,7 +3102,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
for (i = 0; i < max_queues; i++) {
virtio_net_del_queue(n, i);
}
-
+ /* delete also control vq */
+ virtio_del_queue(vdev, max_queues * 2);
qemu_announce_timer_del(&n->announce_timer, false);
g_free(n->vqs);
qemu_del_nic(n->nic);
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index c5f9244..ce7bcdb 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -106,7 +106,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
return ret;
}
-void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
+void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
@@ -115,28 +115,21 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
return;
}
- PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
- __func__, pci_dev->name, config_addr, val, len);
pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
val, len);
}
-uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
+uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
- uint32_t val;
if (!pci_dev) {
return ~0x0;
}
- val = pci_host_config_read_common(pci_dev, config_addr,
- PCI_CONFIG_SPACE_SIZE, len);
- PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
- __func__, pci_dev->name, config_addr, val, len);
-
- return val;
+ return pci_host_config_read_common(pci_dev, config_addr,
+ PCI_CONFIG_SPACE_SIZE, len);
}
static void pci_host_config_write(void *opaque, hwaddr addr,
@@ -167,8 +160,7 @@ static void pci_host_data_write(void *opaque, hwaddr addr,
uint64_t val, unsigned len)
{
PCIHostState *s = opaque;
- PCI_DPRINTF("write addr " TARGET_FMT_plx " len %d val %x\n",
- addr, len, (unsigned)val);
+
if (s->config_reg & (1u << 31))
pci_data_write(s->bus, s->config_reg | (addr & 3), val, len);
}
@@ -177,14 +169,11 @@ static uint64_t pci_host_data_read(void *opaque,
hwaddr addr, unsigned len)
{
PCIHostState *s = opaque;
- uint32_t val;
+
if (!(s->config_reg & (1U << 31))) {
return 0xffffffff;
}
- val = pci_data_read(s->bus, s->config_reg | (addr & 3), len);
- PCI_DPRINTF("read addr " TARGET_FMT_plx " len %d val %x\n",
- addr, len, val);
- return val;
+ return pci_data_read(s->bus, s->config_reg | (addr & 3), len);
}
const MemoryRegionOps pci_host_conf_le_ops = {
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index c693fc7..26f710d 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -275,6 +275,8 @@ static Property vhost_scsi_properties[] = {
DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1),
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size,
128),
+ DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSICommon, conf.seg_max_adjust,
+ true),
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
0xFFFF),
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 6a6c15d..23f972d 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -39,6 +39,10 @@ static const int user_feature_bits[] = {
VHOST_INVALID_FEATURE_BIT
};
+enum VhostUserProtocolFeature {
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+};
+
static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserSCSI *s = (VHostUserSCSI *)vdev;
@@ -62,6 +66,25 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
}
}
+static void vhost_user_scsi_reset(VirtIODevice *vdev)
+{
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev);
+ struct vhost_dev *dev = &vsc->dev;
+
+ /*
+ * Historically, reset was not implemented so only reset devices
+ * that are expecting it.
+ */
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE)) {
+ return;
+ }
+
+ if (dev->vhost_ops->vhost_reset_device) {
+ dev->vhost_ops->vhost_reset_device(dev);
+ }
+}
+
static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
}
@@ -182,6 +205,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_scsi_common_get_features;
vdc->set_config = vhost_scsi_common_set_config;
vdc->set_status = vhost_user_scsi_set_status;
+ vdc->reset = vhost_user_scsi_reset;
fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
}
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index e8b2b64..4bc73a3 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
{
VirtIOSCSIReq *req, *next;
int ret = 0;
+ bool suppress_notifications = virtio_queue_get_notification(vq);
bool progress = false;
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
do {
- virtio_queue_set_notification(vq, 0);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 0);
+ }
while ((req = virtio_scsi_pop_req(s, vq))) {
progress = true;
@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
}
}
- virtio_queue_set_notification(vq, 1);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 1);
+ }
} while (ret != -EINVAL && !virtio_queue_empty(vq));
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
@@ -654,7 +659,8 @@ static void virtio_scsi_get_config(VirtIODevice *vdev,
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev);
virtio_stl_p(vdev, &scsiconf->num_queues, s->conf.num_queues);
- virtio_stl_p(vdev, &scsiconf->seg_max, 128 - 2);
+ virtio_stl_p(vdev, &scsiconf->seg_max,
+ s->conf.seg_max_adjust ? s->conf.virtqueue_size - 2 : 128 - 2);
virtio_stl_p(vdev, &scsiconf->max_sectors, s->conf.max_sectors);
virtio_stl_p(vdev, &scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
virtio_stl_p(vdev, &scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
@@ -893,6 +899,11 @@ void virtio_scsi_common_realize(DeviceState *dev,
virtio_cleanup(vdev);
return;
}
+ if (s->conf.virtqueue_size <= 2) {
+ error_setg(errp, "invalid virtqueue_size property (= %" PRIu32 "), "
+ "must be > 2", s->conf.virtqueue_size);
+ return;
+ }
s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues);
s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
@@ -949,6 +960,8 @@ static Property virtio_scsi_properties[] = {
DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1),
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSI,
parent_obj.conf.virtqueue_size, 128),
+ DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSI,
+ parent_obj.conf.seg_max_adjust, true),
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSI, parent_obj.conf.max_sectors,
0xFFFF),
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSI, parent_obj.conf.cmd_per_lun,
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 02a9b25..d27a10f 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -58,6 +58,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -98,6 +99,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_INFLIGHT_FD = 31,
VHOST_USER_SET_INFLIGHT_FD = 32,
VHOST_USER_GPU_SET_SOCKET = 33,
+ VHOST_USER_RESET_DEVICE = 34,
VHOST_USER_MAX
} VhostUserRequest;
@@ -890,10 +892,14 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
static int vhost_user_reset_device(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .hdr.request = VHOST_USER_RESET_OWNER,
.hdr.flags = VHOST_USER_VERSION,
};
+ msg.hdr.request = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE)
+ ? VHOST_USER_RESET_DEVICE
+ : VHOST_USER_RESET_OWNER;
+
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
return -1;
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 40b04f5..57f3b9f 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -831,6 +831,13 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
}
balloon_stats_destroy_timer(s);
qemu_remove_balloon_handler(s);
+
+ virtio_delete_queue(s->ivq);
+ virtio_delete_queue(s->dvq);
+ virtio_delete_queue(s->svq);
+ if (s->free_page_vq) {
+ virtio_delete_queue(s->free_page_vq);
+ }
virtio_cleanup(vdev);
}
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 94d934c..872f2cd 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -65,6 +65,19 @@ static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy)
virtio_bus_stop_ioeventfd(&proxy->bus);
}
+static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy)
+{
+ int i;
+
+ if (proxy->legacy) {
+ return;
+ }
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ proxy->vqs[i].enabled = 0;
+ }
+}
+
static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
{
VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
@@ -295,8 +308,9 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
break;
case VIRTIO_MMIO_QUEUE_NUM:
trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
+ virtio_queue_set_num(vdev, vdev->queue_sel, value);
+
if (proxy->legacy) {
- virtio_queue_set_num(vdev, vdev->queue_sel, value);
virtio_queue_update_rings(vdev, vdev->queue_sel);
} else {
proxy->vqs[vdev->queue_sel].num = value;
@@ -378,6 +392,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
if (vdev->status == 0) {
virtio_reset(vdev);
+ virtio_mmio_soft_reset(proxy);
}
break;
case VIRTIO_MMIO_QUEUE_DESC_LOW:
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c6b47a9..f723b9f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -608,10 +608,14 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
pcie_cap_flr_write_config(pci_dev, address, val, len);
}
- if (range_covers_byte(address, len, PCI_COMMAND) &&
- !(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
- virtio_pci_stop_ioeventfd(proxy);
- virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+ if (range_covers_byte(address, len, PCI_COMMAND)) {
+ if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
+ virtio_set_disabled(vdev, true);
+ virtio_pci_stop_ioeventfd(proxy);
+ virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+ } else {
+ virtio_set_disabled(vdev, false);
+ }
}
if (proxy->config_cap &&
@@ -1256,6 +1260,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
proxy->vqs[vdev->queue_sel].num = val;
+ virtio_queue_set_num(vdev, vdev->queue_sel,
+ proxy->vqs[vdev->queue_sel].num);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
msix_vector_unuse(&proxy->pci_dev,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 04716b5..7b861e0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
}
}
+bool virtio_queue_get_notification(VirtQueue *vq)
+{
+ return vq->notification;
+}
+
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
vq->notification = enable;
@@ -546,7 +551,7 @@ static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
* Called within rcu_read_lock(). */
static int virtio_queue_empty_rcu(VirtQueue *vq)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 1;
}
@@ -565,7 +570,7 @@ static int virtio_queue_split_empty(VirtQueue *vq)
{
bool empty;
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 1;
}
@@ -783,7 +788,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
virtqueue_unmap_sg(vq, elem, len);
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return;
}
@@ -839,7 +844,7 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
vq->inuse -= count;
return;
}
@@ -1602,7 +1607,7 @@ err_undo_map:
void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return NULL;
}
@@ -1698,7 +1703,7 @@ unsigned int virtqueue_drop_all(VirtQueue *vq)
{
struct VirtIODevice *vdev = vq->vdev;
- if (unlikely(vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 0;
}
@@ -1816,7 +1821,7 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
- if (unlikely(vdev->broken)) {
+ if (virtio_device_disabled(vdev)) {
return;
}
@@ -1920,6 +1925,7 @@ void virtio_reset(void *opaque)
vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
+ vdev->disabled = false;
atomic_set(&vdev->isr, 0);
vdev->config_vector = VIRTIO_NO_VECTOR;
virtio_notify_vector(vdev, vdev->config_vector);
@@ -2330,17 +2336,24 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
return &vdev->vq[i];
}
+void virtio_delete_queue(VirtQueue *vq)
+{
+ vq->vring.num = 0;
+ vq->vring.num_default = 0;
+ vq->handle_output = NULL;
+ vq->handle_aio_output = NULL;
+ g_free(vq->used_elems);
+ vq->used_elems = NULL;
+ virtio_virtqueue_reset_region_cache(vq);
+}
+
void virtio_del_queue(VirtIODevice *vdev, int n)
{
if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
abort();
}
- vdev->vq[n].vring.num = 0;
- vdev->vq[n].vring.num_default = 0;
- vdev->vq[n].handle_output = NULL;
- vdev->vq[n].handle_aio_output = NULL;
- g_free(vdev->vq[n].used_elems);
+ virtio_delete_queue(&vdev->vq[n]);
}
static void virtio_set_isr(VirtIODevice *vdev, int value)
@@ -2553,6 +2566,13 @@ static bool virtio_started_needed(void *opaque)
return vdev->started;
}
+static bool virtio_disabled_needed(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+
+ return vdev->disabled;
+}
+
static const VMStateDescription vmstate_virtqueue = {
.name = "virtqueue_state",
.version_id = 1,
@@ -2718,6 +2738,17 @@ static const VMStateDescription vmstate_virtio_started = {
}
};
+static const VMStateDescription vmstate_virtio_disabled = {
+ .name = "virtio/disabled",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_disabled_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(disabled, VirtIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_virtio = {
.name = "virtio",
.version_id = 1,
@@ -2735,6 +2766,7 @@ static const VMStateDescription vmstate_virtio = {
&vmstate_virtio_extra_state,
&vmstate_virtio_started,
&vmstate_virtio_packed_virtqueues,
+ &vmstate_virtio_disabled,
NULL
}
};
@@ -3384,17 +3416,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
- bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
- progress = virtio_queue_notify_aio_vq(vq);
-
- /* In case the handler function re-enabled notifications */
- virtio_queue_set_notification(vq, 0);
- return progress;
+ return virtio_queue_notify_aio_vq(vq);
}
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
@@ -3569,6 +3596,7 @@ static void virtio_device_instance_finalize(Object *obj)
static Property virtio_properties[] = {
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
+ DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h
index ba31595..9ce088b 100644
--- a/include/hw/pci/pci_host.h
+++ b/include/hw/pci/pci_host.h
@@ -62,8 +62,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t len);
-void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
-uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
+void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len);
+uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len);
extern const MemoryRegionOps pci_host_conf_le_ops;
extern const MemoryRegionOps pci_host_conf_be_ops;
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 9c19f5b..1e62f86 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -38,6 +38,7 @@ struct VirtIOBlkConf
uint32_t request_merging;
uint16_t num_queues;
uint16_t queue_size;
+ bool seg_max_adjust;
uint32_t max_discard_sectors;
uint32_t max_write_zeroes_sectors;
bool x_enable_wce_if_config_wce;
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index 122f7c4..24e7689 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -48,6 +48,7 @@ typedef struct virtio_scsi_config VirtIOSCSIConfig;
struct VirtIOSCSIConf {
uint32_t num_queues;
uint32_t virtqueue_size;
+ bool seg_max_adjust;
uint32_t max_sectors;
uint32_t cmd_per_lun;
#ifdef CONFIG_VHOST_SCSI
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index c32a815..b69d517 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -100,6 +100,8 @@ struct VirtIODevice
uint16_t device_id;
bool vm_running;
bool broken; /* device in invalid state, needs reset */
+ bool use_disabled_flag; /* allow use of 'disable' flag when needed */
+ bool disabled; /* device in temporarily disabled state */
bool use_started;
bool started;
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
@@ -183,6 +185,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
void virtio_del_queue(VirtIODevice *vdev, int n);
+void virtio_delete_queue(VirtQueue *vq);
+
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len);
void virtqueue_flush(VirtQueue *vq, unsigned int count);
@@ -224,6 +228,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
void virtio_notify_config(VirtIODevice *vdev);
+bool virtio_queue_get_notification(VirtQueue *vq);
void virtio_queue_set_notification(VirtQueue *vq, int enable);
int virtio_queue_ready(VirtQueue *vq);
@@ -378,4 +383,17 @@ static inline void virtio_set_started(VirtIODevice *vdev, bool started)
vdev->started = started;
}
}
+
+static inline void virtio_set_disabled(VirtIODevice *vdev, bool disable)
+{
+ if (vdev->use_disabled_flag) {
+ vdev->disabled = disable;
+ }
+}
+
+static inline bool virtio_device_disabled(VirtIODevice *vdev)
+{
+ return unlikely(vdev->disabled || vdev->broken);
+}
+
#endif
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index ae9c41d..ba693cc 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -14,10 +14,35 @@ struct CPUArchId;
#define NUMA_DISTANCE_MAX 254
#define NUMA_DISTANCE_UNREACHABLE 255
+/* the value of AcpiHmatLBInfo flags */
+enum {
+ HMAT_LB_MEM_MEMORY = 0,
+ HMAT_LB_MEM_CACHE_1ST_LEVEL = 1,
+ HMAT_LB_MEM_CACHE_2ND_LEVEL = 2,
+ HMAT_LB_MEM_CACHE_3RD_LEVEL = 3,
+ HMAT_LB_LEVELS /* must be the last entry */
+};
+
+/* the value of AcpiHmatLBInfo data type */
+enum {
+ HMAT_LB_DATA_ACCESS_LATENCY = 0,
+ HMAT_LB_DATA_READ_LATENCY = 1,
+ HMAT_LB_DATA_WRITE_LATENCY = 2,
+ HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
+ HMAT_LB_DATA_READ_BANDWIDTH = 4,
+ HMAT_LB_DATA_WRITE_BANDWIDTH = 5,
+ HMAT_LB_TYPES /* must be the last entry */
+};
+
+#define UINT16_BITS 16
+
struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
+ bool has_cpu;
+ uint8_t lb_info_provided;
+ uint16_t initiator;
uint8_t distance[MAX_NODES];
};
@@ -26,6 +51,31 @@ struct NumaNodeMem {
uint64_t node_plugged_mem;
};
+struct HMAT_LB_Data {
+ uint8_t initiator;
+ uint8_t target;
+ uint64_t data;
+};
+typedef struct HMAT_LB_Data HMAT_LB_Data;
+
+struct HMAT_LB_Info {
+ /* Indicates it's memory or the specified level memory side cache. */
+ uint8_t hierarchy;
+
+ /* Present the type of data, access/read/write latency or bandwidth. */
+ uint8_t data_type;
+
+ /* The range bitmap of bandwidth for calculating common base */
+ uint64_t range_bitmap;
+
+ /* The common base unit for latencies or bandwidths */
+ uint64_t base;
+
+ /* Array to store the latencies or bandwidths */
+ GArray *list;
+};
+typedef struct HMAT_LB_Info HMAT_LB_Info;
+
struct NumaState {
/* Number of NUMA nodes */
int num_nodes;
@@ -33,13 +83,26 @@ struct NumaState {
/* Allow setting NUMA distance for different NUMA nodes */
bool have_numa_distance;
+ /* Detect if HMAT support is enabled. */
+ bool hmat_enabled;
+
/* NUMA nodes information */
NodeInfo nodes[MAX_NODES];
+
+ /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
+ HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
+
+ /* Memory Side Cache Information Structure */
+ NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
};
typedef struct NumaState NumaState;
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
void parse_numa_opts(MachineState *ms);
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp);
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+ Error **errp);
void numa_complete_configuration(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
extern QemuOptsList qemu_numa_opts;
diff --git a/qapi/machine.json b/qapi/machine.json
index ca26779..b3d30bc 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -426,10 +426,14 @@
#
# @cpu: property based CPU(s) to node mapping (Since: 2.10)
#
+# @hmat-lb: memory latency and bandwidth information (Since: 5.0)
+#
+# @hmat-cache: memory side cache information (Since: 5.0)
+#
# Since: 2.1
##
{ 'enum': 'NumaOptionsType',
- 'data': [ 'node', 'dist', 'cpu' ] }
+ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
##
# @NumaOptions:
@@ -444,7 +448,9 @@
'data': {
'node': 'NumaNodeOptions',
'dist': 'NumaDistOptions',
- 'cpu': 'NumaCpuOptions' }}
+ 'cpu': 'NumaCpuOptions',
+ 'hmat-lb': 'NumaHmatLBOptions',
+ 'hmat-cache': 'NumaHmatCacheOptions' }}
##
# @NumaNodeOptions:
@@ -463,6 +469,13 @@
# @memdev: memory backend object. If specified for one node,
# it must be specified for all nodes.
#
+# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145,
+# points to the nodeid which has the memory controller
+# responsible for this NUMA node. This field provides
+# additional information as to the initiator node that
+# is closest (as in directly attached) to this node, and
+# therefore has the best performance (since 5.0)
+#
# Since: 2.1
##
{ 'struct': 'NumaNodeOptions',
@@ -470,7 +483,8 @@
'*nodeid': 'uint16',
'*cpus': ['uint16'],
'*mem': 'size',
- '*memdev': 'str' }}
+ '*memdev': 'str',
+ '*initiator': 'uint16' }}
##
# @NumaDistOptions:
@@ -550,6 +564,166 @@
'data' : {} }
##
+# @HmatLBMemoryHierarchy:
+#
+# The memory hierarchy in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBMemoryHierarchy, see chapter
+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec.
+#
+# @memory: the structure represents the memory performance
+#
+# @first-level: first level of memory side cache
+#
+# @second-level: second level of memory side cache
+#
+# @third-level: third level of memory side cache
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBMemoryHierarchy',
+ 'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] }
+
+##
+# @HmatLBDataType:
+#
+# Data type in the System Locality Latency and Bandwidth
+# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+#
+# For more information about @HmatLBDataType, see chapter
+# 5.2.27.4: Table 5-146: Field "Data Type" of ACPI 6.3 spec.
+#
+# @access-latency: access latency (nanoseconds)
+#
+# @read-latency: read latency (nanoseconds)
+#
+# @write-latency: write latency (nanoseconds)
+#
+# @access-bandwidth: access bandwidth (Bytes per second)
+#
+# @read-bandwidth: read bandwidth (Bytes per second)
+#
+# @write-bandwidth: write bandwidth (Bytes per second)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatLBDataType',
+ 'data': [ 'access-latency', 'read-latency', 'write-latency',
+ 'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] }
+
+##
+# @NumaHmatLBOptions:
+#
+# Set the system locality latency and bandwidth information
+# between Initiator and Target proximity Domains.
+#
+# For more information about @NumaHmatLBOptions, see chapter
+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
+#
+# @initiator: the Initiator Proximity Domain.
+#
+# @target: the Target Proximity Domain.
+#
+# @hierarchy: the Memory Hierarchy. Indicates the performance
+# of memory or side cache.
+#
+# @data-type: presents the type of data, access/read/write
+# latency or hit latency.
+#
+# @latency: the value of latency from @initiator to @target
+# proximity domain, the latency unit is "ns(nanosecond)".
+#
+# @bandwidth: the value of bandwidth between @initiator and @target
+# proximity domain, the bandwidth unit is
+# "Bytes per second".
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatLBOptions',
+ 'data': {
+ 'initiator': 'uint16',
+ 'target': 'uint16',
+ 'hierarchy': 'HmatLBMemoryHierarchy',
+ 'data-type': 'HmatLBDataType',
+ '*latency': 'uint64',
+ '*bandwidth': 'size' }}
+
+##
+# @HmatCacheAssociativity:
+#
+# Cache associativity in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheAssociativity, see chapter
+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+# or cache associativity unknown)
+#
+# @direct: Direct Mapped
+#
+# @complex: Complex Cache Indexing (implementation specific)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheAssociativity',
+ 'data': [ 'none', 'direct', 'complex' ] }
+
+##
+# @HmatCacheWritePolicy:
+#
+# Cache write policy in the Memory Side Cache Information Structure
+# of HMAT
+#
+# For more information of @HmatCacheWritePolicy, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @none: None (no memory side cache in this proximity domain,
+# or cache write policy unknown)
+#
+# @write-back: Write Back (WB)
+#
+# @write-through: Write Through (WT)
+#
+# Since: 5.0
+##
+{ 'enum': 'HmatCacheWritePolicy',
+ 'data': [ 'none', 'write-back', 'write-through' ] }
+
+##
+# @NumaHmatCacheOptions:
+#
+# Set the memory side cache information for a given memory domain.
+#
+# For more information of @NumaHmatCacheOptions, see chapter
+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+#
+# @node-id: the memory proximity domain to which the memory belongs.
+#
+# @size: the size of memory side cache in bytes.
+#
+# @level: the cache level described in this structure.
+#
+# @associativity: the cache associativity,
+# none/direct-mapped/complex(complex cache indexing).
+#
+# @policy: the write policy, none/write-back/write-through.
+#
+# @line: the cache Line size in bytes.
+#
+# Since: 5.0
+##
+{ 'struct': 'NumaHmatCacheOptions',
+ 'data': {
+ 'node-id': 'uint32',
+ 'size': 'size',
+ 'level': 'uint8',
+ 'associativity': 'HmatCacheAssociativity',
+ 'policy': 'HmatCacheWritePolicy',
+ 'line': 'uint16' }}
+
+##
# @HostMemPolicy:
#
# Host memory policy types
diff --git a/qemu-options.hx b/qemu-options.hx
index e9d6231..d4b73ef 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -40,7 +40,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
" nvdimm=on|off controls NVDIMM support (default=off)\n"
" enforce-config-section=on|off enforce configuration section migration (default=off)\n"
- " memory-encryption=@var{} memory encryption object to use (default=none)\n",
+ " memory-encryption=@var{} memory encryption object to use (default=none)\n"
+ " hmat=on|off controls ACPI HMAT support (default=off)\n",
QEMU_ARCH_ALL)
STEXI
@item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@@ -94,6 +95,9 @@ NOTE: this parameter is deprecated. Please use @option{-global}
@option{migration.send-configuration}=@var{on|off} instead.
@item memory-encryption=@var{}
Memory encryption object to use. The default is none.
+@item hmat=on|off
+Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support.
+The default is off.
@end table
ETEXI
@@ -168,19 +172,24 @@ If any on the three values is given, the total number of CPUs @var{n} can be omi
ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
- "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
- "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+ "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
+ "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n"
- "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
+ "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
+ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
+ "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
QEMU_ARCH_ALL)
STEXI
-@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
-@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
+@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
+@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
+@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
@findex -numa
Define a NUMA node and assign RAM and VCPUs to it.
Set the NUMA distance from a source node to a destination node.
+Set the ACPI Heterogeneous Memory Attributes for the given nodes.
Legacy VCPU assignment uses @samp{cpus} option where
@var{firstcpu} and @var{lastcpu} are CPU indexes. Each
@@ -222,6 +231,27 @@ split equally between them.
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
if one node uses @samp{memdev}, all of them have to use it.
+@samp{initiator} is an additional option that points to an @var{initiator}
+NUMA node that has best performance (the lowest latency or largest bandwidth)
+to this NUMA @var{node}. Note that this option can be set only when
+the machine property 'hmat' is set to 'on'.
+
+Following example creates a machine with 2 NUMA nodes, node 0 has CPU.
+node 1 has only memory, and its initiator is node 0. Note that because
+node 0 has CPU, by default the initiator of node 0 is itself and must be
+itself.
+@example
+-machine hmat=on \
+-m 2G,slots=2,maxmem=4G \
+-object memory-backend-ram,size=1G,id=m0 \
+-object memory-backend-ram,size=1G,id=m1 \
+-numa node,nodeid=0,memdev=m0 \
+-numa node,nodeid=1,memdev=m1,initiator=0 \
+-smp 2,sockets=2,maxcpus=2 \
+-numa cpu,node-id=0,socket-id=0 \
+-numa cpu,node-id=0,socket-id=1
+@end example
+
@var{source} and @var{destination} are NUMA node IDs.
@var{distance} is the NUMA distance from @var{source} to @var{destination}.
The distance from a node to itself is always 10. If any pair of nodes is
@@ -238,6 +268,59 @@ specified resources, it just assigns existing resources to NUMA
nodes. This means that one still has to use the @option{-m},
@option{-smp} options to allocate RAM and VCPUs respectively.
+Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information
+between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT).
+Initiator NUMA node can create memory requests, usually it has one or more processors.
+Target NUMA node contains addressable memory.
+
+In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory
+hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure
+represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level',
+this structure represents aggregated performance of memory side caches for each domain.
+@var{type} of 'data-type' is type of data represented by this structure instance:
+if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write'
+bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level',
+'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the
+target memory side cache.
+
+@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value,
+the possible value and units are NUM[M|G|T], mean that the bandwidth value are
+NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
+Note that if latency or bandwidth value is 0, means the corresponding latency or
+bandwidth information is not provided.
+
+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
+@var{size} is the size of memory side cache in bytes. @var{level} is the cache
+level described in this structure, note that the cache level 0 should not be used
+with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
+@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
+
+For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
+a ram, node 1 has only a ram. The processors in node 0 access memory in node
+0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
+The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
+nanoseconds, access-bandwidth is 100 MB/s.
+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
+@example
+-machine hmat=on \
+-m 2G \
+-object memory-backend-ram,size=1G,id=m0 \
+-object memory-backend-ram,size=1G,id=m1 \
+-smp 2 \
+-numa node,nodeid=0,memdev=m0 \
+-numa node,nodeid=1,memdev=m1,initiator=0 \
+-numa cpu,node-id=0,socket-id=0 \
+-numa cpu,node-id=0,socket-id=1 \
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
+@end example
+
ETEXI
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
diff --git a/tests/acceptance/virtio_seg_max_adjust.py b/tests/acceptance/virtio_seg_max_adjust.py
new file mode 100755
index 0000000..5458573
--- /dev/null
+++ b/tests/acceptance/virtio_seg_max_adjust.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+#
+# Test virtio-scsi and virtio-blk queue settings for all machine types
+#
+# Copyright (c) 2019 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sys
+import os
+import re
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.machine import QEMUMachine
+from avocado_qemu import Test
+
+#list of machine types and virtqueue properties to test
+VIRTIO_SCSI_PROPS = {'seg_max_adjust': 'seg_max_adjust'}
+VIRTIO_BLK_PROPS = {'seg_max_adjust': 'seg-max-adjust'}
+
+DEV_TYPES = {'virtio-scsi-pci': VIRTIO_SCSI_PROPS,
+ 'virtio-blk-pci': VIRTIO_BLK_PROPS}
+
+VM_DEV_PARAMS = {'virtio-scsi-pci': ['-device', 'virtio-scsi-pci,id=scsi0'],
+ 'virtio-blk-pci': ['-device',
+ 'virtio-blk-pci,id=scsi0,drive=drive0',
+ '-drive',
+ 'driver=null-co,id=drive0,if=none']}
+
+
+class VirtioMaxSegSettingsCheck(Test):
+ @staticmethod
+ def make_pattern(props):
+ pattern_items = ['{0} = \w+'.format(prop) for prop in props]
+ return '|'.join(pattern_items)
+
+ def query_virtqueue(self, vm, dev_type_name):
+ query_ok = False
+ error = None
+ props = None
+
+ output = vm.command('human-monitor-command',
+ command_line = 'info qtree')
+ props_list = DEV_TYPES[dev_type_name].values();
+ pattern = self.make_pattern(props_list)
+ res = re.findall(pattern, output)
+
+ if len(res) != len(props_list):
+ props_list = set(props_list)
+ res = set(res)
+ not_found = props_list.difference(res)
+ not_found = ', '.join(not_found)
+ error = '({0}): The following properties not found: {1}'\
+ .format(dev_type_name, not_found)
+ else:
+ query_ok = True
+ props = dict()
+ for prop in res:
+ p = prop.split(' = ')
+ props[p[0]] = p[1]
+ return query_ok, props, error
+
+ def check_mt(self, mt, dev_type_name):
+ with QEMUMachine(self.qemu_bin) as vm:
+ vm.set_machine(mt["name"])
+ for s in VM_DEV_PARAMS[dev_type_name]:
+ vm.add_args(s)
+ vm.launch()
+ query_ok, props, error = self.query_virtqueue(vm, dev_type_name)
+
+ if not query_ok:
+ self.fail('machine type {0}: {1}'.format(mt['name'], error))
+
+ for prop_name, prop_val in props.items():
+ expected_val = mt[prop_name]
+ self.assertEqual(expected_val, prop_val)
+
+ @staticmethod
+ def seg_max_adjust_enabled(mt):
+ # machine types >= 5.0 should have seg_max_adjust = true
+ # others seg_max_adjust = false
+ mt = mt.split("-")
+
+ # machine types with one line name and name like pc-x.x
+ if len(mt) <= 2:
+ return False
+
+ # machine types like pc-<chip_name>-x.x[.x]
+ ver = mt[2]
+ ver = ver.split(".");
+
+ # versions >= 5.0 goes with seg_max_adjust enabled
+ major = int(ver[0])
+
+ if major >= 5:
+ return True
+ return False
+
+ def test_machine_types(self):
+ # collect all machine types except 'none', 'isapc', 'microvm'
+ with QEMUMachine(self.qemu_bin) as vm:
+ vm.launch()
+ machines = [m['name'] for m in vm.command('query-machines')]
+ vm.shutdown()
+ machines.remove('none')
+ machines.remove('isapc')
+ machines.remove('microvm')
+
+ for dev_type in DEV_TYPES:
+ # create the list of machine types and their parameters.
+ mtypes = list()
+ for m in machines:
+ if self.seg_max_adjust_enabled(m):
+ enabled = 'true'
+ else:
+ enabled = 'false'
+ mtypes.append({'name': m,
+ DEV_TYPES[dev_type]['seg_max_adjust']: enabled})
+
+ # test each machine type for a device type
+ for mt in mtypes:
+ self.check_mt(mt, dev_type)
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index bc0ad59..f1ac2d7 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void)
}
+static void test_acpi_tcg_acpi_hmat(const char *machine)
+{
+ test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.machine = machine;
+ data.variant = ".acpihmat";
+ test_acpi_one(" -machine hmat=on"
+ " -smp 2,sockets=2"
+ " -m 128M,slots=2,maxmem=1G"
+ " -object memory-backend-ram,size=64M,id=m0"
+ " -object memory-backend-ram,size=64M,id=m1"
+ " -numa node,nodeid=0,memdev=m0"
+ " -numa node,nodeid=1,memdev=m1,initiator=0"
+ " -numa cpu,node-id=0,socket-id=0"
+ " -numa cpu,node-id=0,socket-id=1"
+ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+ "data-type=access-latency,latency=1"
+ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory,"
+ "data-type=access-bandwidth,bandwidth=65534M"
+ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+ "data-type=access-latency,latency=65534"
+ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory,"
+ "data-type=access-bandwidth,bandwidth=32767M"
+ " -numa hmat-cache,node-id=0,size=10K,level=1,"
+ "associativity=direct,policy=write-back,line=8"
+ " -numa hmat-cache,node-id=1,size=10K,level=1,"
+ "associativity=direct,policy=write-back,line=8",
+ &data);
+ free_test_data(&data);
+}
+
+static void test_acpi_q35_tcg_acpi_hmat(void)
+{
+ test_acpi_tcg_acpi_hmat(MACHINE_Q35);
+}
+
+static void test_acpi_piix4_tcg_acpi_hmat(void)
+{
+ test_acpi_tcg_acpi_hmat(MACHINE_PC);
+}
+
static void test_acpi_virt_tcg(void)
{
test_data data = {
@@ -991,6 +1033,8 @@ int main(int argc, char *argv[])
qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
+ qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat);
+ qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat);
} else if (strcmp(arch, "aarch64") == 0) {
qtest_add_func("acpi/virt", test_acpi_virt_tcg);
qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
diff --git a/tests/data/acpi/pc/APIC.acpihmat b/tests/data/acpi/pc/APIC.acpihmat
new file mode 100644
index 0000000..a21f164
--- /dev/null
+++ b/tests/data/acpi/pc/APIC.acpihmat
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat
new file mode 100644
index 0000000..ad890e0
--- /dev/null
+++ b/tests/data/acpi/pc/DSDT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/pc/HMAT.acpihmat b/tests/data/acpi/pc/HMAT.acpihmat
new file mode 100644
index 0000000..c00f7ba
--- /dev/null
+++ b/tests/data/acpi/pc/HMAT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/pc/SRAT.acpihmat b/tests/data/acpi/pc/SRAT.acpihmat
new file mode 100644
index 0000000..1dcae90
--- /dev/null
+++ b/tests/data/acpi/pc/SRAT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/q35/APIC.acpihmat b/tests/data/acpi/q35/APIC.acpihmat
new file mode 100644
index 0000000..a21f164
--- /dev/null
+++ b/tests/data/acpi/q35/APIC.acpihmat
Binary files differ
diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat
new file mode 100644
index 0000000..30e3717
--- /dev/null
+++ b/tests/data/acpi/q35/DSDT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/q35/HMAT.acpihmat b/tests/data/acpi/q35/HMAT.acpihmat
new file mode 100644
index 0000000..c00f7ba
--- /dev/null
+++ b/tests/data/acpi/q35/HMAT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/q35/SRAT.acpihmat b/tests/data/acpi/q35/SRAT.acpihmat
new file mode 100644
index 0000000..1dcae90
--- /dev/null
+++ b/tests/data/acpi/q35/SRAT.acpihmat
Binary files differ
diff --git a/tests/numa-test.c b/tests/numa-test.c
index 8de8581..17dd807 100644
--- a/tests/numa-test.c
+++ b/tests/numa-test.c
@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data)
qtest_quit(qs);
}
+static void pc_hmat_build_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+ /* Fail: Initiator should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Target should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Initiator should contain cpu */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }")));
+
+ /* Fail: Data-type mismatch */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"write-latency\","
+ " 'bandwidth': 524288000 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\","
+ " 'latency': 5 } }")));
+
+ /* Fail: Bandwidth should be 1MB (1048576) aligned */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 1048575 } }")));
+
+ /* Configuring HMAT bandwidth and latency details */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }"))); /* 1 ns */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 5 } }"))); /* Fail: Duplicate configuration */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 68717379584 } }"))); /* 65534 MB/s */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 65534 } }"))); /* 65534 ns */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 34358689792 } }"))); /* 32767 MB/s */
+
+ /* Fail: node_id should be less than the number of nodes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 2, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Fail: level should be less than HMAT_LB_LEVELS (4) */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 4, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Fail: associativity option should be 'none', if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"direct\", 'policy': \"none\","
+ " 'line': 0 } }")));
+ /* Fail: policy option should be 'none', if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"none\", 'policy': \"write-back\","
+ " 'line': 0 } }")));
+ /* Fail: line option should be 0, if level is 0 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"none\", 'policy': \"none\","
+ " 'line': 8 } }")));
+
+ /* Configuring HMAT memory side cache attributes */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }"))); /* Fail: Duplicate configuration */
+ /* Fail: The size of level 2 size should be small than level 1 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 2, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ /* Fail: The size of level 0 size should be larger than level 1 */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 0, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 1, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
+static void pc_hmat_off_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0",
+ data ? (char *)data : "");
+
+ /*
+ * Fail: Enable HMAT with -machine hmat=on
+ * before using any of hmat specific options
+ */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\","
+ " 'initiator': 0 } }")));
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\" } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }")));
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
+static void pc_hmat_erange_cfg(const void *data)
+{
+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on "
+ "-smp 2,sockets=2 "
+ "-m 128M,slots=2,maxmem=1G "
+ "-object memory-backend-ram,size=64M,id=m0 "
+ "-object memory-backend-ram,size=64M,id=m1 "
+ "-numa node,nodeid=0,memdev=m0 "
+ "-numa node,nodeid=1,memdev=m1,initiator=0 "
+ "-numa cpu,node-id=0,socket-id=0 "
+ "-numa cpu,node-id=0,socket-id=1",
+ data ? (char *)data : "");
+
+ /* Can't store the compressed latency */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 1 } }"))); /* 1 ns */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\","
+ " 'latency': 65535 } }"))); /* 65535 ns */
+
+ /* Test the 0 input (bandwidth not provided) */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 0 } }"))); /* 0 MB/s */
+ /* Fail: bandwidth should be provided before memory side cache attributes */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240,"
+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\","
+ " 'line': 8 } }")));
+
+ /* Can't store the compressed bandwidth */
+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node',"
+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1,"
+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\","
+ " 'bandwidth': 68718428160 } }"))); /* 65535 MB/s */
+
+ /* let machine initialization to complete and run */
+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs,
+ "{ 'execute': 'x-exit-preconfig' }")));
+ qtest_qmp_eventwait(qs, "RESUME");
+
+ qtest_quit(qs);
+}
+
int main(int argc, char **argv)
{
const char *args = NULL;
@@ -346,6 +556,9 @@ int main(int argc, char **argv)
if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) {
qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu);
qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg);
+ qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg);
+ qtest_add_data_func("/numa/pc/hmat/off", args, pc_hmat_off_cfg);
+ qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg);
}
if (!strcmp(arch, "ppc64")) {