aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorReza Arbab <arbab@linux.ibm.com>2020-07-16 17:56:55 -0500
committerDavid Gibson <david@gibson.dropbear.id.au>2020-07-20 09:21:39 +1000
commita6030d7e0b35a23c82e4a765b53dc3847bcdb4d1 (patch)
tree0476fd6630d17bd727a20779aeae771348bd7011 /hw
parenta4beb5f5d4672400a76cc0551c4f0878e42d921c (diff)
downloadqemu-a6030d7e0b35a23c82e4a765b53dc3847bcdb4d1.zip
qemu-a6030d7e0b35a23c82e4a765b53dc3847bcdb4d1.tar.gz
qemu-a6030d7e0b35a23c82e4a765b53dc3847bcdb4d1.tar.bz2
spapr: Add a new level of NUMA for GPUs
NUMA nodes corresponding to GPU memory currently have the same affinity/distance as normal memory nodes. Add a third NUMA associativity reference point enabling us to give GPU nodes more distance. This is guest visible information, which shouldn't change under a running guest across migration between different qemu versions, so make the change effective only in new (pseries > 5.0) machine types. Before, `numactl -H` output in a guest with 4 GPUs (nodes 2-5): node distances: node 0 1 2 3 4 5 0: 10 40 40 40 40 40 1: 40 10 40 40 40 40 2: 40 40 10 40 40 40 3: 40 40 40 10 40 40 4: 40 40 40 40 10 40 5: 40 40 40 40 40 10 After: node distances: node 0 1 2 3 4 5 0: 10 40 80 80 80 80 1: 40 10 80 80 80 80 2: 80 80 10 80 80 80 3: 80 80 80 10 80 80 4: 80 80 80 80 10 80 5: 80 80 80 80 80 10 These are the same distances as on the host, mirroring the change made to host firmware in skiboot commit f845a648b8cb ("numa/associativity: Add a new level of NUMA for GPU's"). Signed-off-by: Reza Arbab <arbab@linux.ibm.com> Message-Id: <20200716225655.24289-1-arbab@linux.ibm.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc/spapr.c21
-rw-r--r--hw/ppc/spapr_pci.c2
-rw-r--r--hw/ppc/spapr_pci_nvlink2.c13
3 files changed, 31 insertions, 5 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 299908c..0ae293e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -890,10 +890,16 @@ static int spapr_dt_rng(void *fdt)
static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
{
MachineState *ms = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
int rtas;
GString *hypertas = g_string_sized_new(256);
GString *qemu_hypertas = g_string_sized_new(256);
- uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
+ uint32_t refpoints[] = {
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x2),
+ };
+ uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
memory_region_size(&MACHINE(spapr)->device_memory->mr);
uint32_t lrdr_capacity[] = {
@@ -945,8 +951,12 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
qemu_hypertas->str, qemu_hypertas->len));
g_string_free(qemu_hypertas, TRUE);
+ if (smc->pre_5_1_assoc_refpoints) {
+ nr_refpoints = 2;
+ }
+
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
- refpoints, sizeof(refpoints)));
+ refpoints, nr_refpoints * sizeof(refpoints[0])));
_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
maxdomains, sizeof(maxdomains)));
@@ -4584,9 +4594,16 @@ DEFINE_SPAPR_MACHINE(5_1, "5.1", true);
*/
static void spapr_machine_5_0_class_options(MachineClass *mc)
{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
+ };
+
spapr_machine_5_1_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
mc->numa_mem_supported = true;
+ smc->pre_5_1_assoc_refpoints = true;
}
DEFINE_SPAPR_MACHINE(5_0, "5.0", false);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 2168121..363cdb3 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -2089,6 +2089,8 @@ static Property spapr_phb_properties[] = {
pcie_ecs, true),
DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
+ DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
+ pre_5_1_assoc, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index dd8cd6d..76ae77e 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -362,9 +362,9 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
&error_abort);
uint32_t associativity[] = {
cpu_to_be32(0x4),
- SPAPR_GPU_NUMA_ID,
- SPAPR_GPU_NUMA_ID,
- SPAPR_GPU_NUMA_ID,
+ cpu_to_be32(nvslot->numa_id),
+ cpu_to_be32(nvslot->numa_id),
+ cpu_to_be32(nvslot->numa_id),
cpu_to_be32(nvslot->numa_id)
};
uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
@@ -375,6 +375,13 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
_FDT(off);
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
_FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
+
+ if (sphb->pre_5_1_assoc) {
+ associativity[1] = SPAPR_GPU_NUMA_ID;
+ associativity[2] = SPAPR_GPU_NUMA_ID;
+ associativity[3] = SPAPR_GPU_NUMA_ID;
+ }
+
_FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
sizeof(associativity))));