diff options
-rw-r--r-- | hw/i386/acpi-build.c | 2 | ||||
-rw-r--r-- | hw/i386/pc.c | 84 | ||||
-rw-r--r-- | hw/mem/pc-dimm.c | 84 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 7 | ||||
-rw-r--r-- | include/hw/mem/pc-dimm.h | 15 | ||||
-rw-r--r-- | include/sysemu/numa.h | 11 | ||||
-rw-r--r-- | numa.c | 94 |
7 files changed, 219 insertions, 78 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 00818b9..aed811a 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1509,7 +1509,7 @@ build_srat(GArray *table_data, GArray *linker, PcGuestInfo *guest_info) */ if (hotplugabble_address_space_size) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, pcms->hotplug_memory_base, + acpi_build_srat_memory(numamem, pcms->hotplug_memory.base, hotplugabble_address_space_size, 0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7072930..a66416d 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -64,7 +64,6 @@ #include "hw/pci/pci_host.h" #include "acpi-build.h" #include "hw/mem/pc-dimm.h" -#include "trace.h" #include "qapi/visitor.h" #include "qapi-visit.h" @@ -1297,7 +1296,7 @@ FWCfgState *pc_memory_init(MachineState *machine, exit(EXIT_FAILURE); } - pcms->hotplug_memory_base = + pcms->hotplug_memory.base = ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30); if (pcms->enforce_aligned_dimm) { @@ -1305,17 +1304,17 @@ FWCfgState *pc_memory_init(MachineState *machine, hotplug_mem_size += (1ULL << 30) * machine->ram_slots; } - if ((pcms->hotplug_memory_base + hotplug_mem_size) < + if ((pcms->hotplug_memory.base + hotplug_mem_size) < hotplug_mem_size) { error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, machine->maxram_size); exit(EXIT_FAILURE); } - memory_region_init(&pcms->hotplug_memory, OBJECT(pcms), + memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms), "hotplug-memory", hotplug_mem_size); - memory_region_add_subregion(system_memory, pcms->hotplug_memory_base, - &pcms->hotplug_memory); + memory_region_add_subregion(system_memory, pcms->hotplug_memory.base, + &pcms->hotplug_memory.mr); } /* Initialize PC system firmware */ @@ -1333,9 +1332,9 @@ FWCfgState *pc_memory_init(MachineState *machine, fw_cfg = bochs_bios_init(); rom_set_fw(fw_cfg); - if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) { + if (guest_info->has_reserved_memory && pcms->hotplug_memory.base) { uint64_t *val = g_malloc(sizeof(*val)); - *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30)); + *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory.base, 0x1ULL << 30)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); } @@ -1554,88 +1553,31 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) static void pc_dimm_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - int slot; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); - MachineState *machine = MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t existing_dimms_capacity = 0; uint64_t align = TARGET_PAGE_SIZE; - uint64_t addr; - - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); - if (local_err) { - goto out; - } if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) { align = memory_region_get_alignment(mr); } - addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base, - memory_region_size(&pcms->hotplug_memory), - !addr ? NULL : &addr, align, - memory_region_size(mr), &local_err); - if (local_err) { - goto out; - } - - existing_dimms_capacity = pc_existing_dimms_capacity(&local_err); - if (local_err) { - goto out; - } - - if (existing_dimms_capacity + memory_region_size(mr) > - machine->maxram_size - machine->ram_size) { - error_setg(&local_err, "not enough space, currently 0x%" PRIx64 - " in use of total hot pluggable 0x" RAM_ADDR_FMT, - existing_dimms_capacity, - machine->maxram_size - machine->ram_size); - goto out; - } - - object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); - if (local_err) { - goto out; - } - trace_mhp_pc_dimm_assigned_address(addr); - - slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); - if (local_err) { - goto out; - } - - slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, - machine->ram_slots, &local_err); - if (local_err) { - goto out; - } - object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); - if (local_err) { - goto out; - } - trace_mhp_pc_dimm_assigned_slot(slot); - if (!pcms->acpi_dev) { error_setg(&local_err, "memory hotplug is not enabled: missing acpi device"); goto out; } - if (kvm_enabled() && !kvm_has_free_slot(machine)) { - error_setg(&local_err, "hypervisor has no free memory slots left"); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); + if (local_err) { goto out; } - memory_region_add_subregion(&pcms->hotplug_memory, - addr - pcms->hotplug_memory_base, mr); - vmstate_register_ram(mr, dev); - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); out: error_propagate(errp, local_err); } @@ -1677,9 +1619,7 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, goto out; } - memory_region_del_subregion(&pcms->hotplug_memory, mr); - vmstate_unregister_ram(mr, dev); - + pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr); object_unparent(OBJECT(dev)); out: @@ -1766,7 +1706,7 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, const char *name, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); - int64_t value = memory_region_size(&pcms->hotplug_memory); + int64_t value = memory_region_size(&pcms->hotplug_memory.mr); visit_type_int(v, &value, name, errp); } diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index e70633d..bb04862 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -23,12 +23,96 @@ #include "qapi/visitor.h" #include "qemu/range.h" #include "sysemu/numa.h" +#include "sysemu/kvm.h" +#include "trace.h" typedef struct pc_dimms_capacity { uint64_t size; Error **errp; } pc_dimms_capacity; +void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr, uint64_t align, Error **errp) +{ + int slot; + MachineState *machine = MACHINE(qdev_get_machine()); + PCDIMMDevice *dimm = PC_DIMM(dev); + Error *local_err = NULL; + uint64_t existing_dimms_capacity = 0; + uint64_t addr; + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + + addr = pc_dimm_get_free_addr(hpms->base, + memory_region_size(&hpms->mr), + !addr ? NULL : &addr, align, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + + existing_dimms_capacity = pc_existing_dimms_capacity(&local_err); + if (local_err) { + goto out; + } + + if (existing_dimms_capacity + memory_region_size(mr) > + machine->maxram_size - machine->ram_size) { + error_setg(&local_err, "not enough space, currently 0x%" PRIx64 + " in use of total hot pluggable 0x" RAM_ADDR_FMT, + existing_dimms_capacity, + machine->maxram_size - machine->ram_size); + goto out; + } + + object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_address(addr); + + slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + goto out; + } + object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_slot(slot); + + if (kvm_enabled() && !kvm_has_free_slot(machine)) { + error_setg(&local_err, "hypervisor has no free memory slots left"); + goto out; + } + + memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); + vmstate_register_ram(mr, dev); + numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); + +out: + error_propagate(errp, local_err); +} + +void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + + numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); + memory_region_del_subregion(&hpms->mr, mr); + vmstate_unregister_ram(mr, dev); +} + static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque) { pc_dimms_capacity *cap = opaque; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 86c5651..328c8f7 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -15,14 +15,12 @@ #include "hw/pci/pci.h" #include "hw/boards.h" #include "hw/compat.h" +#include "hw/mem/pc-dimm.h" #define HPET_INTCAP "hpet-intcap" /** * PCMachineState: - * @hotplug_memory_base: address in guest RAM address space where hotplug memory - * address space begins. - * @hotplug_memory: hotplug memory addess space container * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling * @enforce_aligned_dimm: check that DIMM's address/size is aligned by * backend's alignment value if provided @@ -32,8 +30,7 @@ struct PCMachineState { MachineState parent_obj; /* <public> */ - ram_addr_t hotplug_memory_base; - MemoryRegion hotplug_memory; + MemoryHotplugState hotplug_memory; HotplugHandler *acpi_dev; ISADevice *rtc; diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index f7b80b4..d83bf30 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -70,6 +70,17 @@ typedef struct PCDIMMDeviceClass { MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); } PCDIMMDeviceClass; +/** + * MemoryHotplugState: + * @base: address in guest RAM address space where hotplug memory + * address space begins. + * @mr: hotplug memory address space container + */ +typedef struct MemoryHotplugState { + ram_addr_t base; + MemoryRegion mr; +} MemoryHotplugState; + uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, uint64_t *hint, uint64_t align, uint64_t size, @@ -79,4 +90,8 @@ int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); int qmp_pc_dimm_device_list(Object *obj, void *opaque); uint64_t pc_existing_dimms_capacity(Error **errp); +void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr, uint64_t align, Error **errp); +void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr); #endif diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 6523b4d..a6392bc 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -10,16 +10,27 @@ extern int nb_numa_nodes; /* Number of NUMA nodes */ +struct numa_addr_range { + ram_addr_t mem_start; + ram_addr_t mem_end; + QLIST_ENTRY(numa_addr_range) entry; +}; + typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); struct HostMemoryBackend *node_memdev; bool present; + QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ } NodeInfo; + extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(MachineClass *mc); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); +uint32_t numa_get_node(ram_addr_t addr, Error **errp); #endif @@ -52,6 +52,92 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. int nb_numa_nodes; NodeInfo numa_info[MAX_NODES]; +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range = g_malloc0(sizeof(*range)); + + /* + * Memory-less nodes can come here with 0 size in which case, + * there is nothing to do. + */ + if (!size) { + return; + } + + range->mem_start = addr; + range->mem_end = addr + size - 1; + QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); +} + +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range, *next; + + QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { + if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { + QLIST_REMOVE(range, entry); + g_free(range); + return; + } + } +} + +static void numa_set_mem_ranges(void) +{ + int i; + ram_addr_t mem_start = 0; + + /* + * Deduce start address of each node and use it to store + * the address range info in numa_info address range list + */ + for (i = 0; i < nb_numa_nodes; i++) { + numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); + mem_start += numa_info[i].node_mem; + } +} + +/* + * Check if @addr falls under NUMA @node. + */ +static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) +{ + struct numa_addr_range *range; + + QLIST_FOREACH(range, &numa_info[node].addr, entry) { + if (addr >= range->mem_start && addr <= range->mem_end) { + return true; + } + } + return false; +} + +/* + * Given an address, return the index of the NUMA node to which the + * address belongs to. + */ +uint32_t numa_get_node(ram_addr_t addr, Error **errp) +{ + uint32_t i; + + /* For non NUMA configurations, check if the addr falls under node 0 */ + if (!nb_numa_nodes) { + if (numa_addr_belongs_to_node(addr, 0)) { + return 0; + } + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_addr_belongs_to_node(addr, i)) { + return i; + } + } + + error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " + "NUMA node", addr); + return -1; +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -274,6 +360,12 @@ void parse_numa_opts(MachineClass *mc) } for (i = 0; i < nb_numa_nodes; i++) { + QLIST_INIT(&numa_info[i].addr); + } + + numa_set_mem_ranges(); + + for (i = 0; i < nb_numa_nodes; i++) { if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { break; } @@ -297,6 +389,8 @@ void parse_numa_opts(MachineClass *mc) } validate_numa_cpus(); + } else { + numa_set_mem_node_id(0, ram_size, 0); } } |