aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/i386/acpi-build.c2
-rw-r--r--hw/i386/pc.c84
-rw-r--r--hw/mem/pc-dimm.c84
-rw-r--r--include/hw/i386/pc.h7
-rw-r--r--include/hw/mem/pc-dimm.h15
-rw-r--r--include/sysemu/numa.h11
-rw-r--r--numa.c94
7 files changed, 219 insertions, 78 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 00818b9..aed811a 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1509,7 +1509,7 @@ build_srat(GArray *table_data, GArray *linker, PcGuestInfo *guest_info)
*/
if (hotplugabble_address_space_size) {
numamem = acpi_data_push(table_data, sizeof *numamem);
- acpi_build_srat_memory(numamem, pcms->hotplug_memory_base,
+ acpi_build_srat_memory(numamem, pcms->hotplug_memory.base,
hotplugabble_address_space_size, 0,
MEM_AFFINITY_HOTPLUGGABLE |
MEM_AFFINITY_ENABLED);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7072930..a66416d 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -64,7 +64,6 @@
#include "hw/pci/pci_host.h"
#include "acpi-build.h"
#include "hw/mem/pc-dimm.h"
-#include "trace.h"
#include "qapi/visitor.h"
#include "qapi-visit.h"
@@ -1297,7 +1296,7 @@ FWCfgState *pc_memory_init(MachineState *machine,
exit(EXIT_FAILURE);
}
- pcms->hotplug_memory_base =
+ pcms->hotplug_memory.base =
ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30);
if (pcms->enforce_aligned_dimm) {
@@ -1305,17 +1304,17 @@ FWCfgState *pc_memory_init(MachineState *machine,
hotplug_mem_size += (1ULL << 30) * machine->ram_slots;
}
- if ((pcms->hotplug_memory_base + hotplug_mem_size) <
+ if ((pcms->hotplug_memory.base + hotplug_mem_size) <
hotplug_mem_size) {
error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
machine->maxram_size);
exit(EXIT_FAILURE);
}
- memory_region_init(&pcms->hotplug_memory, OBJECT(pcms),
+ memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms),
"hotplug-memory", hotplug_mem_size);
- memory_region_add_subregion(system_memory, pcms->hotplug_memory_base,
- &pcms->hotplug_memory);
+ memory_region_add_subregion(system_memory, pcms->hotplug_memory.base,
+ &pcms->hotplug_memory.mr);
}
/* Initialize PC system firmware */
@@ -1333,9 +1332,9 @@ FWCfgState *pc_memory_init(MachineState *machine,
fw_cfg = bochs_bios_init();
rom_set_fw(fw_cfg);
- if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) {
+ if (guest_info->has_reserved_memory && pcms->hotplug_memory.base) {
uint64_t *val = g_malloc(sizeof(*val));
- *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30));
+ *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory.base, 0x1ULL << 30));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
}
@@ -1554,88 +1553,31 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
static void pc_dimm_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- int slot;
HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- MachineState *machine = MACHINE(hotplug_dev);
PCDIMMDevice *dimm = PC_DIMM(dev);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *mr = ddc->get_memory_region(dimm);
- uint64_t existing_dimms_capacity = 0;
uint64_t align = TARGET_PAGE_SIZE;
- uint64_t addr;
-
- addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
- if (local_err) {
- goto out;
- }
if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) {
align = memory_region_get_alignment(mr);
}
- addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base,
- memory_region_size(&pcms->hotplug_memory),
- !addr ? NULL : &addr, align,
- memory_region_size(mr), &local_err);
- if (local_err) {
- goto out;
- }
-
- existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
- if (local_err) {
- goto out;
- }
-
- if (existing_dimms_capacity + memory_region_size(mr) >
- machine->maxram_size - machine->ram_size) {
- error_setg(&local_err, "not enough space, currently 0x%" PRIx64
- " in use of total hot pluggable 0x" RAM_ADDR_FMT,
- existing_dimms_capacity,
- machine->maxram_size - machine->ram_size);
- goto out;
- }
-
- object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
- if (local_err) {
- goto out;
- }
- trace_mhp_pc_dimm_assigned_address(addr);
-
- slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
- if (local_err) {
- goto out;
- }
-
- slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
- machine->ram_slots, &local_err);
- if (local_err) {
- goto out;
- }
- object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
- if (local_err) {
- goto out;
- }
- trace_mhp_pc_dimm_assigned_slot(slot);
-
if (!pcms->acpi_dev) {
error_setg(&local_err,
"memory hotplug is not enabled: missing acpi device");
goto out;
}
- if (kvm_enabled() && !kvm_has_free_slot(machine)) {
- error_setg(&local_err, "hypervisor has no free memory slots left");
+ pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
+ if (local_err) {
goto out;
}
- memory_region_add_subregion(&pcms->hotplug_memory,
- addr - pcms->hotplug_memory_base, mr);
- vmstate_register_ram(mr, dev);
-
hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
+ hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
out:
error_propagate(errp, local_err);
}
@@ -1677,9 +1619,7 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev,
goto out;
}
- memory_region_del_subregion(&pcms->hotplug_memory, mr);
- vmstate_unregister_ram(mr, dev);
-
+ pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr);
object_unparent(OBJECT(dev));
out:
@@ -1766,7 +1706,7 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
- int64_t value = memory_region_size(&pcms->hotplug_memory);
+ int64_t value = memory_region_size(&pcms->hotplug_memory.mr);
visit_type_int(v, &value, name, errp);
}
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index e70633d..bb04862 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -23,12 +23,96 @@
#include "qapi/visitor.h"
#include "qemu/range.h"
#include "sysemu/numa.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
typedef struct pc_dimms_capacity {
uint64_t size;
Error **errp;
} pc_dimms_capacity;
+void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
+ MemoryRegion *mr, uint64_t align, Error **errp)
+{
+ int slot;
+ MachineState *machine = MACHINE(qdev_get_machine());
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ Error *local_err = NULL;
+ uint64_t existing_dimms_capacity = 0;
+ uint64_t addr;
+
+ addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ addr = pc_dimm_get_free_addr(hpms->base,
+ memory_region_size(&hpms->mr),
+ !addr ? NULL : &addr, align,
+ memory_region_size(mr), &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ if (existing_dimms_capacity + memory_region_size(mr) >
+ machine->maxram_size - machine->ram_size) {
+ error_setg(&local_err, "not enough space, currently 0x%" PRIx64
+ " in use of total hot pluggable 0x" RAM_ADDR_FMT,
+ existing_dimms_capacity,
+ machine->maxram_size - machine->ram_size);
+ goto out;
+ }
+
+ object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ trace_mhp_pc_dimm_assigned_address(addr);
+
+ slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
+ machine->ram_slots, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ trace_mhp_pc_dimm_assigned_slot(slot);
+
+ if (kvm_enabled() && !kvm_has_free_slot(machine)) {
+ error_setg(&local_err, "hypervisor has no free memory slots left");
+ goto out;
+ }
+
+ memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
+ vmstate_register_ram(mr, dev);
+ numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
+
+out:
+ error_propagate(errp, local_err);
+}
+
+void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
+ MemoryRegion *mr)
+{
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+
+ numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
+ memory_region_del_subregion(&hpms->mr, mr);
+ vmstate_unregister_ram(mr, dev);
+}
+
static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque)
{
pc_dimms_capacity *cap = opaque;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 86c5651..328c8f7 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -15,14 +15,12 @@
#include "hw/pci/pci.h"
#include "hw/boards.h"
#include "hw/compat.h"
+#include "hw/mem/pc-dimm.h"
#define HPET_INTCAP "hpet-intcap"
/**
* PCMachineState:
- * @hotplug_memory_base: address in guest RAM address space where hotplug memory
- * address space begins.
- * @hotplug_memory: hotplug memory addess space container
* @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
* @enforce_aligned_dimm: check that DIMM's address/size is aligned by
* backend's alignment value if provided
@@ -32,8 +30,7 @@ struct PCMachineState {
MachineState parent_obj;
/* <public> */
- ram_addr_t hotplug_memory_base;
- MemoryRegion hotplug_memory;
+ MemoryHotplugState hotplug_memory;
HotplugHandler *acpi_dev;
ISADevice *rtc;
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index f7b80b4..d83bf30 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -70,6 +70,17 @@ typedef struct PCDIMMDeviceClass {
MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm);
} PCDIMMDeviceClass;
+/**
+ * MemoryHotplugState:
+ * @base: address in guest RAM address space where hotplug memory
+ * address space begins.
+ * @mr: hotplug memory address space container
+ */
+typedef struct MemoryHotplugState {
+ ram_addr_t base;
+ MemoryRegion mr;
+} MemoryHotplugState;
+
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
uint64_t *hint, uint64_t align, uint64_t size,
@@ -79,4 +90,8 @@ int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
int qmp_pc_dimm_device_list(Object *obj, void *opaque);
uint64_t pc_existing_dimms_capacity(Error **errp);
+void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
+ MemoryRegion *mr, uint64_t align, Error **errp);
+void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
+ MemoryRegion *mr);
#endif
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 6523b4d..a6392bc 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,16 +10,27 @@
extern int nb_numa_nodes; /* Number of NUMA nodes */
+struct numa_addr_range {
+ ram_addr_t mem_start;
+ ram_addr_t mem_end;
+ QLIST_ENTRY(numa_addr_range) entry;
+};
+
typedef struct node_info {
uint64_t node_mem;
DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
struct HostMemoryBackend *node_memdev;
bool present;
+ QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
} NodeInfo;
+
extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(MachineClass *mc);
void numa_post_machine_init(void);
void query_numa_node_mem(uint64_t node_mem[]);
extern QemuOptsList qemu_numa_opts;
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
+uint32_t numa_get_node(ram_addr_t addr, Error **errp);
#endif
diff --git a/numa.c b/numa.c
index 91fc6c1..3c80059 100644
--- a/numa.c
+++ b/numa.c
@@ -52,6 +52,92 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
int nb_numa_nodes;
NodeInfo numa_info[MAX_NODES];
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+ struct numa_addr_range *range = g_malloc0(sizeof(*range));
+
+ /*
+ * Memory-less nodes can come here with 0 size in which case,
+ * there is nothing to do.
+ */
+ if (!size) {
+ return;
+ }
+
+ range->mem_start = addr;
+ range->mem_end = addr + size - 1;
+ QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
+}
+
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+ struct numa_addr_range *range, *next;
+
+ QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
+ if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
+ QLIST_REMOVE(range, entry);
+ g_free(range);
+ return;
+ }
+ }
+}
+
+static void numa_set_mem_ranges(void)
+{
+ int i;
+ ram_addr_t mem_start = 0;
+
+ /*
+ * Deduce start address of each node and use it to store
+ * the address range info in numa_info address range list
+ */
+ for (i = 0; i < nb_numa_nodes; i++) {
+ numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
+ mem_start += numa_info[i].node_mem;
+ }
+}
+
+/*
+ * Check if @addr falls under NUMA @node.
+ */
+static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
+{
+ struct numa_addr_range *range;
+
+ QLIST_FOREACH(range, &numa_info[node].addr, entry) {
+ if (addr >= range->mem_start && addr <= range->mem_end) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * Given an address, return the index of the NUMA node to which the
+ * address belongs to.
+ */
+uint32_t numa_get_node(ram_addr_t addr, Error **errp)
+{
+ uint32_t i;
+
+ /* For non NUMA configurations, check if the addr falls under node 0 */
+ if (!nb_numa_nodes) {
+ if (numa_addr_belongs_to_node(addr, 0)) {
+ return 0;
+ }
+ }
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ if (numa_addr_belongs_to_node(addr, i)) {
+ return i;
+ }
+ }
+
+ error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
+ "NUMA node", addr);
+ return -1;
+}
+
static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
{
uint16_t nodenr;
@@ -274,6 +360,12 @@ void parse_numa_opts(MachineClass *mc)
}
for (i = 0; i < nb_numa_nodes; i++) {
+ QLIST_INIT(&numa_info[i].addr);
+ }
+
+ numa_set_mem_ranges();
+
+ for (i = 0; i < nb_numa_nodes; i++) {
if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
break;
}
@@ -297,6 +389,8 @@ void parse_numa_opts(MachineClass *mc)
}
validate_numa_cpus();
+ } else {
+ numa_set_mem_node_id(0, ram_size, 0);
}
}