aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-01-19 16:35:25 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-01-19 16:35:25 +0000
commitb384cd95eb9c6f73ad84ed1bb0717a26e29cc78f (patch)
treebbd0e7a39efd262c9196edea6d75a88d47557d8a
parent3e5bdc6573edf0585e4085e6a4e349b135abf3b4 (diff)
parentd6b6abc51dda79a97f2c7bd6652c1940c068f1ec (diff)
downloadqemu-b384cd95eb9c6f73ad84ed1bb0717a26e29cc78f.zip
qemu-b384cd95eb9c6f73ad84ed1bb0717a26e29cc78f.tar.gz
qemu-b384cd95eb9c6f73ad84ed1bb0717a26e29cc78f.tar.bz2
Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request' into staging
machine queue, 2018-01-19 # gpg: Signature made Fri 19 Jan 2018 16:30:19 GMT # gpg: using RSA key 0x2807936F984DC5A6 # gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>" # Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF D1AA 2807 936F 984D C5A6 * remotes/ehabkost/tags/machine-next-pull-request: fw_cfg: fix memory corruption when all fw_cfg slots are used possible_cpus: add CPUArchId::type field nvdimm: add 'unarmed' option nvdimm: add a macro for property "label-size" hostmem-file: add "align" option scripts: Remove fixed entries from the device-crash-test qdev: Check for the availability of a hotplug controller before adding a device qdev_monitor: Simplify error handling in qdev_device_add() q35: Allow only supported dynamic sysbus devices xen: Add only xen-sysdev to dynamic sysbus device list spapr: Allow only supported dynamic sysbus devices ppc: e500: Allow only supported dynamic sysbus devices hw/arm/virt: Allow only supported dynamic sysbus devices machine: Replace has_dynamic_sysbus with list of allowed devices numa: fix missing '-numa cpu' in '-help' output qemu-options: document memory-backend-ram qemu-options: document missing memory-backend-file options memfd: remove needless include memfd: split qemu_memfd_alloc() Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--backends/hostmem-file.c41
-rw-r--r--docs/nvdimm.txt31
-rw-r--r--exec.c8
-rw-r--r--hw/acpi/nvdimm.c7
-rw-r--r--hw/arm/virt.c8
-rw-r--r--hw/core/machine.c55
-rw-r--r--hw/core/qdev.c28
-rw-r--r--hw/i386/pc.c4
-rw-r--r--hw/i386/pc_q35.c5
-rw-r--r--hw/mem/nvdimm.c28
-rw-r--r--hw/nvram/fw_cfg.c6
-rw-r--r--hw/ppc/e500plat.c4
-rw-r--r--hw/ppc/spapr.c15
-rw-r--r--hw/s390x/s390-virtio-ccw.c1
-rw-r--r--hw/xen/xen_backend.c2
-rw-r--r--include/exec/memory.h3
-rw-r--r--include/hw/boards.h7
-rw-r--r--include/hw/mem/nvdimm.h12
-rw-r--r--include/hw/qdev-core.h1
-rw-r--r--include/qemu/memfd.h1
-rw-r--r--memory.c2
-rw-r--r--numa.c2
-rw-r--r--qdev-monitor.c21
-rw-r--r--qemu-options.hx66
-rwxr-xr-xscripts/device-crash-test8
-rw-r--r--util/memfd.c63
-rw-r--r--vl.c3
27 files changed, 332 insertions, 100 deletions
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index e44c319..e319ec1 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -34,6 +34,7 @@ struct HostMemoryBackendFile {
bool share;
bool discard_data;
char *mem_path;
+ uint64_t align;
};
static void
@@ -58,7 +59,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
path = object_get_canonical_path(OBJECT(backend));
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
path,
- backend->size, fb->share,
+ backend->size, fb->align, fb->share,
fb->mem_path, errp);
g_free(path);
}
@@ -115,6 +116,40 @@ static void file_memory_backend_set_discard_data(Object *o, bool value,
MEMORY_BACKEND_FILE(o)->discard_data = value;
}
+static void file_memory_backend_get_align(Object *o, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+ uint64_t val = fb->align;
+
+ visit_type_size(v, name, &val, errp);
+}
+
+static void file_memory_backend_set_align(Object *o, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+ Error *local_err = NULL;
+ uint64_t val;
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(&local_err, "cannot change property value");
+ goto out;
+ }
+
+ visit_type_size(v, name, &val, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ fb->align = val;
+
+ out:
+ error_propagate(errp, local_err);
+}
+
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -145,6 +180,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
object_class_property_add_str(oc, "mem-path",
get_mem_path, set_mem_path,
&error_abort);
+ object_class_property_add(oc, "align", "int",
+ file_memory_backend_get_align,
+ file_memory_backend_set_align,
+ NULL, NULL, &error_abort);
}
static void file_backend_instance_finalize(Object *o)
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 2d9f8c0..e903d8b 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -122,3 +122,34 @@ Note:
M >= size of RAM devices +
size of statically plugged vNVDIMM devices +
size of hotplugged vNVDIMM devices
+
+Alignment
+---------
+
+QEMU uses mmap(2) to maps vNVDIMM backends and aligns the mapping
+address to the page size (getpagesize(2)) by default. However, some
+types of backends may require an alignment different than the page
+size. In that case, QEMU v2.12.0 and later provide 'align' option to
+memory-backend-file to allow users to specify the proper alignment.
+
+For example, device dax require the 2 MB alignment, so we can use
+following QEMU command line options to use it (/dev/dax0.0) as the
+backend of vNVDIMM:
+
+ -object memory-backend-file,id=mem1,share=on,mem-path=/dev/dax0.0,size=4G,align=2M
+ -device nvdimm,id=nvdimm1,memdev=mem1
+
+Guest Data Persistence
+----------------------
+
+Though QEMU supports multiple types of vNVDIMM backends on Linux,
+currently the only one that can guarantee the guest write persistence
+is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to
+which all guest access do not involve any host-side kernel cache.
+
+When using other types of backends, it's suggested to set 'unarmed'
+option of '-device nvdimm' to 'on', which sets the unarmed flag of the
+guest NVDIMM region mapping structure. This unarmed flag indicates
+guest software that this vNVDIMM device contains a region that cannot
+accept persistent writes. In result, for example, the guest Linux
+NVDIMM driver, marks such vNVDIMM device as read-only.
diff --git a/exec.c b/exec.c
index d28fc0c..629a508 100644
--- a/exec.c
+++ b/exec.c
@@ -1612,7 +1612,13 @@ static void *file_ram_alloc(RAMBlock *block,
void *area;
block->page_size = qemu_fd_getpagesize(fd);
- block->mr->align = block->page_size;
+ if (block->mr->align % block->page_size) {
+ error_setg(errp, "alignment 0x%" PRIx64
+ " must be multiples of page size 0x%zx",
+ block->mr->align, block->page_size);
+ return NULL;
+ }
+ block->mr->align = MAX(block->page_size, block->mr->align);
#if defined(__s390x__)
if (kvm_enabled()) {
block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 6ceea19..59d6e42 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -138,6 +138,8 @@ struct NvdimmNfitMemDev {
} QEMU_PACKED;
typedef struct NvdimmNfitMemDev NvdimmNfitMemDev;
+#define ACPI_NFIT_MEM_NOT_ARMED (1 << 3)
+
/*
* NVDIMM Control Region Structure
*
@@ -284,6 +286,7 @@ static void
nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
{
NvdimmNfitMemDev *nfit_memdev;
+ NVDIMMDevice *nvdimm = NVDIMM(OBJECT(dev));
uint64_t size = object_property_get_uint(OBJECT(dev), PC_DIMM_SIZE_PROP,
NULL);
int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
@@ -312,6 +315,10 @@ nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
/* Only one interleave for PMEM. */
nfit_memdev->interleave_ways = cpu_to_le16(1);
+
+ if (nvdimm->unarmed) {
+ nfit_memdev->flags |= cpu_to_le16(ACPI_NFIT_MEM_NOT_ARMED);
+ }
}
/*
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 543f9bd..a4537af 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -34,6 +34,8 @@
#include "hw/arm/arm.h"
#include "hw/arm/primecell.h"
#include "hw/arm/virt.h"
+#include "hw/vfio/vfio-calxeda-xgmac.h"
+#include "hw/vfio/vfio-amd-xgbe.h"
#include "hw/devices.h"
#include "net/net.h"
#include "sysemu/block-backend.h"
@@ -1357,7 +1359,7 @@ static void machvirt_init(MachineState *machine)
break;
}
- cpuobj = object_new(machine->cpu_type);
+ cpuobj = object_new(possible_cpus->cpus[n].type);
object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
"mp-affinity", NULL);
@@ -1573,6 +1575,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
sizeof(CPUArchId) * max_cpus);
ms->possible_cpus->len = max_cpus;
for (n = 0; n < ms->possible_cpus->len; n++) {
+ ms->possible_cpus->cpus[n].type = ms->cpu_type;
ms->possible_cpus->cpus[n].arch_id =
virt_cpu_mp_affinity(vms, n);
ms->possible_cpus->cpus[n].props.has_thread_id = true;
@@ -1591,7 +1594,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
* configuration of the particular instance.
*/
mc->max_cpus = 255;
- mc->has_dynamic_sysbus = true;
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
mc->pci_allow_0_address = true;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c857f3f..cdc1163 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -334,46 +334,61 @@ static bool machine_get_enforce_config_section(Object *obj, Error **errp)
return ms->enforce_config_section;
}
-static void error_on_sysbus_device(SysBusDevice *sbdev, void *opaque)
+void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type)
{
- error_report("Option '-device %s' cannot be handled by this machine",
- object_class_get_name(object_get_class(OBJECT(sbdev))));
- exit(1);
+ strList *item = g_new0(strList, 1);
+
+ item->value = g_strdup(type);
+ item->next = mc->allowed_dynamic_sysbus_devices;
+ mc->allowed_dynamic_sysbus_devices = item;
}
-static void machine_init_notify(Notifier *notifier, void *data)
+static void validate_sysbus_device(SysBusDevice *sbdev, void *opaque)
{
- Object *machine = qdev_get_machine();
- ObjectClass *oc = object_get_class(machine);
- MachineClass *mc = MACHINE_CLASS(oc);
+ MachineState *machine = opaque;
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ bool allowed = false;
+ strList *wl;
- if (mc->has_dynamic_sysbus) {
- /* Our machine can handle dynamic sysbus devices, we're all good */
- return;
+ for (wl = mc->allowed_dynamic_sysbus_devices;
+ !allowed && wl;
+ wl = wl->next) {
+ allowed |= !!object_dynamic_cast(OBJECT(sbdev), wl->value);
+ }
+
+ if (!allowed) {
+ error_report("Option '-device %s' cannot be handled by this machine",
+ object_class_get_name(object_get_class(OBJECT(sbdev))));
+ exit(1);
}
+}
+
+static void machine_init_notify(Notifier *notifier, void *data)
+{
+ MachineState *machine = MACHINE(qdev_get_machine());
/*
- * Loop through all dynamically created devices and check whether there
- * are sysbus devices among them. If there are, error out.
+ * Loop through all dynamically created sysbus devices and check if they are
+ * all allowed. If a device is not allowed, error out.
*/
- foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL);
+ foreach_dynamic_sysbus_device(validate_sysbus_device, machine);
}
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
{
int i;
- Object *cpu;
HotpluggableCPUList *head = NULL;
- const char *cpu_type;
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ /* force board to initialize possible_cpus if it hasn't been done yet */
+ mc->possible_cpu_arch_ids(machine);
- cpu = machine->possible_cpus->cpus[0].cpu;
- assert(cpu); /* Boot cpu is always present */
- cpu_type = object_get_typename(cpu);
for (i = 0; i < machine->possible_cpus->len; i++) {
+ Object *cpu;
HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
- cpu_item->type = g_strdup(cpu_type);
+ cpu_item->type = g_strdup(machine->possible_cpus->cpus[i].type);
cpu_item->vcpus_count = machine->possible_cpus->cpus[i].vcpus_count;
cpu_item->props = g_memdup(&machine->possible_cpus->cpus[i].props,
sizeof(*cpu_item->props));
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 1111295..f739753 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -253,19 +253,31 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
dev->alias_required_for_version = required_for_version;
}
+HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
+{
+ MachineState *machine;
+ MachineClass *mc;
+ Object *m_obj = qdev_get_machine();
+
+ if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+ machine = MACHINE(m_obj);
+ mc = MACHINE_GET_CLASS(machine);
+ if (mc->get_hotplug_handler) {
+ return mc->get_hotplug_handler(machine, dev);
+ }
+ }
+
+ return NULL;
+}
+
HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
{
- HotplugHandler *hotplug_ctrl = NULL;
+ HotplugHandler *hotplug_ctrl;
if (dev->parent_bus && dev->parent_bus->hotplug_handler) {
hotplug_ctrl = dev->parent_bus->hotplug_handler;
- } else if (object_dynamic_cast(qdev_get_machine(), TYPE_MACHINE)) {
- MachineState *machine = MACHINE(qdev_get_machine());
- MachineClass *mc = MACHINE_GET_CLASS(machine);
-
- if (mc->get_hotplug_handler) {
- hotplug_ctrl = mc->get_hotplug_handler(machine, dev);
- }
+ } else {
+ hotplug_ctrl = qdev_get_machine_hotplug_handler(dev);
}
return hotplug_ctrl;
}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 55686bf..ccc50ba 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1148,7 +1148,8 @@ void pc_cpus_init(PCMachineState *pcms)
pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < smp_cpus; i++) {
- pc_new_cpu(ms->cpu_type, possible_cpus->cpus[i].arch_id, &error_fatal);
+ pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id,
+ &error_fatal);
}
}
@@ -2307,6 +2308,7 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
for (i = 0; i < ms->possible_cpus->len; i++) {
X86CPUTopoInfo topo;
+ ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 5c6c608..ed3a0b8 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -42,6 +42,8 @@
#include "exec/address-spaces.h"
#include "hw/i386/pc.h"
#include "hw/i386/ich9.h"
+#include "hw/i386/amd_iommu.h"
+#include "hw/i386/intel_iommu.h"
#include "hw/smbios/smbios.h"
#include "hw/ide/pci.h"
#include "hw/ide/ahci.h"
@@ -299,7 +301,8 @@ static void pc_q35_machine_options(MachineClass *m)
m->default_machine_opts = "firmware=bios-256k.bin";
m->default_display = "std";
m->no_floppy = 1;
- m->has_dynamic_sysbus = true;
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
m->max_cpus = 288;
}
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 952fce5..61e677f 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
+#include "qapi-visit.h"
#include "hw/mem/nvdimm.h"
static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
@@ -64,11 +65,36 @@ out:
error_propagate(errp, local_err);
}
+static bool nvdimm_get_unarmed(Object *obj, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+ return nvdimm->unarmed;
+}
+
+static void nvdimm_set_unarmed(Object *obj, bool value, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+ Error *local_err = NULL;
+
+ if (memory_region_size(&nvdimm->nvdimm_mr)) {
+ error_setg(&local_err, "cannot change property value");
+ goto out;
+ }
+
+ nvdimm->unarmed = value;
+
+ out:
+ error_propagate(errp, local_err);
+}
+
static void nvdimm_init(Object *obj)
{
- object_property_add(obj, "label-size", "int",
+ object_property_add(obj, NVDIMM_LABLE_SIZE_PROP, "int",
nvdimm_get_label_size, nvdimm_set_label_size, NULL,
NULL, NULL);
+ object_property_add_bool(obj, NVDIMM_UNARMED_PROP,
+ nvdimm_get_unarmed, nvdimm_set_unarmed, NULL);
}
static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 753ac0e..4313484 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -784,7 +784,7 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
* index and "i - 1" is the one being copied from, thus the
* unusual start and end in the for statement.
*/
- for (i = count + 1; i > index; i--) {
+ for (i = count; i > index; i--) {
s->files->f[i] = s->files->f[i - 1];
s->files->f[i].select = cpu_to_be16(FW_CFG_FILE_FIRST + i);
s->entries[0][FW_CFG_FILE_FIRST + i] =
@@ -833,7 +833,6 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
assert(s->files);
index = be32_to_cpu(s->files->count);
- assert(index < fw_cfg_file_slots(s));
for (i = 0; i < index; i++) {
if (strcmp(filename, s->files->f[i].name) == 0) {
@@ -843,6 +842,9 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
return ptr;
}
}
+
+ assert(index < fw_cfg_file_slots(s));
+
/* add new one */
fw_cfg_add_file_callback(s, filename, NULL, NULL, NULL, data, len, true);
return NULL;
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index e59e80f..81d03e1 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -12,9 +12,11 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "e500.h"
+#include "hw/net/fsl_etsec/etsec.h"
#include "hw/boards.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
+#include "hw/sysbus.h"
#include "hw/pci/pci.h"
#include "hw/ppc/openpic.h"
#include "kvm_ppc.h"
@@ -63,7 +65,7 @@ static void e500plat_machine_init(MachineClass *mc)
mc->desc = "generic paravirt e500 platform";
mc->init = e500plat_init;
mc->max_cpus = 32;
- mc->has_dynamic_sysbus = true;
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ETSEC_COMMON);
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 499ab64..a781dd2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2226,11 +2226,6 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
int boot_cores_nr = smp_cpus / smp_threads;
int i;
- if (!type) {
- error_report("Unable to find sPAPR CPU Core definition");
- exit(1);
- }
-
possible_cpus = mc->possible_cpu_arch_ids(machine);
if (mc->has_hotpluggable_cpus) {
if (smp_cpus % smp_threads) {
@@ -3545,6 +3540,7 @@ static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
{
int i;
+ const char *core_type;
int spapr_max_cores = max_cpus / smp_threads;
MachineClass *mc = MACHINE_GET_CLASS(machine);
@@ -3556,12 +3552,19 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
return machine->possible_cpus;
}
+ core_type = spapr_get_cpu_core_type(machine->cpu_type);
+ if (!core_type) {
+ error_report("Unable to find sPAPR CPU Core definition");
+ exit(1);
+ }
+
machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
sizeof(CPUArchId) * spapr_max_cores);
machine->possible_cpus->len = spapr_max_cores;
for (i = 0; i < machine->possible_cpus->len; i++) {
int core_id = i * smp_threads;
+ machine->possible_cpus->cpus[i].type = core_type;
machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
machine->possible_cpus->cpus[i].arch_id = core_id;
machine->possible_cpus->cpus[i].props.has_core_id = true;
@@ -3843,7 +3846,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->default_boot_order = "";
mc->default_ram_size = 512 * M_BYTE;
mc->kvm_type = spapr_kvm_type;
- mc->has_dynamic_sysbus = true;
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
mc->pci_allow_0_address = true;
mc->get_hotplug_handler = spapr_get_hotplug_handler;
hc->pre_plug = spapr_machine_device_pre_plug;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 35df7e1..3807dcb 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -414,6 +414,7 @@ static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms)
sizeof(CPUArchId) * max_cpus);
ms->possible_cpus->len = max_cpus;
for (i = 0; i < ms->possible_cpus->len; i++) {
+ ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
ms->possible_cpus->cpus[i].arch_id = i;
ms->possible_cpus->cpus[i].props.has_core_id = true;
diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index 0f849a2..7445b50 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c
@@ -564,7 +564,7 @@ static void xen_set_dynamic_sysbus(void)
ObjectClass *oc = object_get_class(machine);
MachineClass *mc = MACHINE_CLASS(oc);
- mc->has_dynamic_sysbus = true;
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_XENSYSDEV);
}
int xen_be_register(const char *type, struct XenDevOps *ops)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index a4cabdf..07c5d6d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -465,6 +465,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @name: Region name, becomes part of RAMBlock name used in migration stream
* must be unique within any device
* @size: size of the region.
+ * @align: alignment of the region base address; if 0, the default alignment
+ * (getpagesize()) will be used.
* @share: %true if memory must be mmaped with the MAP_SHARED flag
* @path: the path in which to allocate the RAM.
* @errp: pointer to Error*, to store an error if it happens.
@@ -476,6 +478,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
+ uint64_t align,
bool share,
const char *path,
Error **errp);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 156b16f..efb0a9e 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -76,10 +76,14 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props,
Error **errp);
+void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
+
+
/**
* CPUArchId:
* @arch_id - architecture-dependent CPU ID of present or possible CPU
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
+ * @type - QOM class name of possible @cpu object
* @props - CPU object properties, initialized by board
* #vcpus_count - number of threads provided by @cpu object
*/
@@ -88,6 +92,7 @@ typedef struct {
int64_t vcpus_count;
CpuInstanceProperties props;
Object *cpu;
+ const char *type;
} CPUArchId;
/**
@@ -179,7 +184,6 @@ struct MachineClass {
no_floppy:1,
no_cdrom:1,
no_sdcard:1,
- has_dynamic_sysbus:1,
pci_allow_0_address:1,
legacy_fw_cfg_order:1;
int is_default;
@@ -197,6 +201,7 @@ struct MachineClass {
bool ignore_memory_transaction_failures;
int numa_mem_align_shift;
const char **valid_cpu_types;
+ strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 03e1ff9..7fd87c4 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -47,6 +47,10 @@
#define NVDIMM_CLASS(oc) OBJECT_CLASS_CHECK(NVDIMMClass, (oc), TYPE_NVDIMM)
#define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \
TYPE_NVDIMM)
+
+#define NVDIMM_LABLE_SIZE_PROP "label-size"
+#define NVDIMM_UNARMED_PROP "unarmed"
+
struct NVDIMMDevice {
/* private */
PCDIMMDevice parent_obj;
@@ -71,6 +75,14 @@ struct NVDIMMDevice {
* guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported.
*/
MemoryRegion nvdimm_mr;
+
+ /*
+ * The 'on' value results in the unarmed flag set in ACPI NFIT,
+ * which can be used to notify guest implicitly that the host
+ * backend (e.g., files on HDD, /dev/pmemX, etc.) cannot guarantee
+ * the guest write persistence.
+ */
+ bool unarmed;
};
typedef struct NVDIMMDevice NVDIMMDevice;
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 0a71bf8..51473ee 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -286,6 +286,7 @@ DeviceState *qdev_try_create(BusState *bus, const char *name);
void qdev_init_nofail(DeviceState *dev);
void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version);
+HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
void qdev_unplug(DeviceState *dev, Error **errp);
void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
index 745a8c5..41c24d8 100644
--- a/include/qemu/memfd.h
+++ b/include/qemu/memfd.h
@@ -16,6 +16,7 @@
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#endif
+int qemu_memfd_create(const char *name, size_t size, unsigned int seals);
void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
int *fd);
void qemu_memfd_free(void *ptr, size_t size, int fd);
diff --git a/memory.c b/memory.c
index 4b41fb8..449a142 100644
--- a/memory.c
+++ b/memory.c
@@ -1570,6 +1570,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
+ uint64_t align,
bool share,
const char *path,
Error **errp)
@@ -1578,6 +1579,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
+ mr->align = align;
mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
diff --git a/numa.c b/numa.c
index 7b9c33a..83675a0 100644
--- a/numa.c
+++ b/numa.c
@@ -456,7 +456,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
if (mem_path) {
#ifdef __linux__
Error *err = NULL;
- memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
+ memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
mem_path, &err);
if (err) {
error_report_err(err);
diff --git a/qdev-monitor.c b/qdev-monitor.c
index b4abb4b..c436616 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -613,28 +613,33 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
if (bus) {
qdev_set_parent_bus(dev, bus);
+ } else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {
+ /* No bus, no machine hotplug handler --> device is not hotpluggable */
+ error_setg(&err, "Device '%s' can not be hotplugged on this machine",
+ driver);
+ goto err_del_dev;
}
qdev_set_id(dev, qemu_opts_id(opts));
/* set properties */
if (qemu_opt_foreach(opts, set_property, dev, &err)) {
- error_propagate(errp, err);
- object_unparent(OBJECT(dev));
- object_unref(OBJECT(dev));
- return NULL;
+ goto err_del_dev;
}
dev->opts = opts;
object_property_set_bool(OBJECT(dev), true, "realized", &err);
if (err != NULL) {
- error_propagate(errp, err);
dev->opts = NULL;
- object_unparent(OBJECT(dev));
- object_unref(OBJECT(dev));
- return NULL;
+ goto err_del_dev;
}
return dev;
+
+err_del_dev:
+ error_propagate(errp, err);
+ object_unparent(OBJECT(dev));
+ object_unref(OBJECT(dev));
+ return NULL;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index 678181c..5ff741a 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -169,7 +169,9 @@ ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
- "-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL)
+ "-numa dist,src=source,dst=destination,val=distance\n"
+ "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
+ QEMU_ARCH_ALL)
STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@@ -3972,18 +3974,24 @@ property must be set. These objects are placed in the
@table @option
-@item -object memory-backend-file,id=@var{id},size=@var{size},mem-path=@var{dir},share=@var{on|off},discard-data=@var{on|off}
+@item -object memory-backend-file,id=@var{id},size=@var{size},mem-path=@var{dir},share=@var{on|off},discard-data=@var{on|off},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave},align=@var{align}
Creates a memory file backend object, which can be used to back
-the guest RAM with huge pages. The @option{id} parameter is a
-unique ID that will be used to reference this memory region
-when configuring the @option{-numa} argument. The @option{size}
-option provides the size of the memory region, and accepts
-common suffixes, eg @option{500M}. The @option{mem-path} provides
-the path to either a shared memory or huge page filesystem mount.
+the guest RAM with huge pages.
+
+The @option{id} parameter is a unique ID that will be used to reference this
+memory region when configuring the @option{-numa} argument.
+
+The @option{size} option provides the size of the memory region, and accepts
+common suffixes, eg @option{500M}.
+
+The @option{mem-path} provides the path to either a shared memory or huge page
+filesystem mount.
+
The @option{share} boolean option determines whether the memory
region is marked as private to QEMU, or shared. The latter allows
a co-operating external process to access the QEMU memory region.
+
Setting the @option{discard-data} boolean option to @var{on}
indicates that file contents can be destroyed when QEMU exits,
to avoid unnecessarily flushing data to the backing file. Note
@@ -3991,6 +3999,48 @@ that @option{discard-data} is only an optimization, and QEMU
might not discard file contents if it aborts unexpectedly or is
terminated using SIGKILL.
+The @option{merge} boolean option enables memory merge, also known as
+MADV_MERGEABLE, so that Kernel Samepage Merging will consider the pages for
+memory deduplication.
+
+Setting the @option{dump} boolean option to @var{off} excludes the memory from
+core dumps. This feature is also known as MADV_DONTDUMP.
+
+The @option{prealloc} boolean option enables memory preallocation.
+
+The @option{host-nodes} option binds the memory range to a list of NUMA host
+nodes.
+
+The @option{policy} option sets the NUMA policy to one of the following values:
+
+@table @option
+@item @var{default}
+default host policy
+
+@item @var{preferred}
+prefer the given host node list for allocation
+
+@item @var{bind}
+restrict memory allocation to the given host node list
+
+@item @var{interleave}
+interleave memory allocations across the given host node list
+@end table
+
+The @option{align} option specifies the base address alignment when
+QEMU mmap(2) @option{mem-path}, and accepts common suffixes, eg
+@option{2M}. Some backend store specified by @option{mem-path}
+requires an alignment different than the default one used by QEMU, eg
+the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
+such cases, users can specify the required alignment via this option.
+
+@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
+
+Creates a memory backend object, which can be used to back the guest RAM.
+Memory backend objects offer more control than the @option{-m} option that is
+traditionally used to define guest RAM. Please refer to
+@option{memory-backend-file} for a description of the options.
+
@item -object rng-random,id=@var{id},filename=@var{/dev/random}
Creates a random number generator backend which obtains entropy from
diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 827d8ec..7417177 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -207,11 +207,9 @@ ERROR_WHITELIST = [
# Known crashes will generate error messages, but won't be fatal.
# Those entries must be removed once we fix the crashes.
{'exitcode':-6, 'log':r"Device 'serial0' is in use", 'loglevel':logging.ERROR},
- {'exitcode':-6, 'log':r"spapr_rtas_register: Assertion .*rtas_table\[token\]\.name.* failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"qemu_net_client_setup: Assertion `!peer->peer' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r'RAMBlock "[\w.-]+" already registered', 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"find_ram_offset: Assertion `size != 0' failed.", 'loglevel':logging.ERROR},
- {'exitcode':-6, 'log':r"puv3_load_kernel: Assertion `kernel_filename != NULL' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"add_cpreg_to_hashtable: code should not be reached", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"qemu_alloc_display: Assertion `surface->image != NULL' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"Unexpected error in error_set_from_qdev_prop_error", 'loglevel':logging.ERROR},
@@ -219,16 +217,10 @@ ERROR_WHITELIST = [
{'exitcode':-6, 'log':r"Object .* is not an instance of type generic-pc-machine", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"Object .* is not an instance of type e500-ccsr", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"vmstate_register_with_alias_id: Assertion `!se->compat \|\| se->instance_id == 0' failed", 'loglevel':logging.ERROR},
- {'exitcode':-11, 'device':'stm32f205-soc', 'loglevel':logging.ERROR, 'expected':True},
- {'exitcode':-11, 'device':'xlnx,zynqmp', 'loglevel':logging.ERROR, 'expected':True},
- {'exitcode':-11, 'device':'mips-cps', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'gus', 'loglevel':logging.ERROR, 'expected':True},
- {'exitcode':-11, 'device':'a9mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
- {'exitcode':-11, 'device':'a15mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'isa-serial', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'sb16', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'cs4231a', 'loglevel':logging.ERROR, 'expected':True},
- {'exitcode':-11, 'device':'arm-gicv3', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'machine':'isapc', 'device':'.*-iommu', 'loglevel':logging.ERROR, 'expected':True},
# everything else (including SIGABRT and SIGSEGV) will be a fatal error:
diff --git a/util/memfd.c b/util/memfd.c
index 412e94a..dce61f9 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -27,8 +27,6 @@
#include "qemu/osdep.h"
-#include <glib/gprintf.h>
-
#include "qemu/memfd.h"
#if defined CONFIG_LINUX && !defined CONFIG_MEMFD
@@ -53,6 +51,38 @@ static int memfd_create(const char *name, unsigned int flags)
#define MFD_ALLOW_SEALING 0x0002U
#endif
+int qemu_memfd_create(const char *name, size_t size, unsigned int seals)
+{
+ int mfd = -1;
+
+#ifdef CONFIG_LINUX
+ unsigned int flags = MFD_CLOEXEC;
+
+ if (seals) {
+ flags |= MFD_ALLOW_SEALING;
+ }
+
+ mfd = memfd_create(name, flags);
+ if (mfd < 0) {
+ return -1;
+ }
+
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return -1;
+ }
+
+ if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+ perror("fcntl");
+ close(mfd);
+ return -1;
+ }
+#endif
+
+ return mfd;
+}
+
/*
* This is a best-effort helper for shared memory allocation, with
* optional sealing. The helper will do his best to allocate using
@@ -63,35 +93,14 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
int *fd)
{
void *ptr;
- int mfd = -1;
-
- *fd = -1;
-
-#ifdef CONFIG_LINUX
- if (seals) {
- mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
- }
+ int mfd = qemu_memfd_create(name, size, seals);
+ /* some systems have memfd without sealing */
if (mfd == -1) {
- /* some systems have memfd without sealing */
- mfd = memfd_create(name, MFD_CLOEXEC);
- seals = 0;
+ mfd = qemu_memfd_create(name, size, 0);
}
-#endif
-
- if (mfd != -1) {
- if (ftruncate(mfd, size) == -1) {
- perror("ftruncate");
- close(mfd);
- return NULL;
- }
- if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
- perror("fcntl");
- close(mfd);
- return NULL;
- }
- } else {
+ if (mfd == -1) {
const char *tmpdir = g_get_tmp_dir();
gchar *fname;
diff --git a/vl.c b/vl.c
index 2586f25..e725ecb 100644
--- a/vl.c
+++ b/vl.c
@@ -4611,8 +4611,6 @@ int main(int argc, char **argv, char **envp)
current_machine->boot_order = boot_order;
current_machine->cpu_model = cpu_model;
- parse_numa_opts(current_machine);
-
/* parse features once if machine provides default cpu_type */
if (machine_class->default_cpu_type) {
current_machine->cpu_type = machine_class->default_cpu_type;
@@ -4621,6 +4619,7 @@ int main(int argc, char **argv, char **envp)
cpu_parse_cpu_model(machine_class->default_cpu_type, cpu_model);
}
}
+ parse_numa_opts(current_machine);
machine_run_board_init(current_machine);