From 021e878f2efaa4fdcb5e11abfa224f5ba813f809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Aug 2020 10:39:20 +0200 Subject: ppc/pnv: Fix TypeInfo of PnvLpcController abstract class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was missing the instance_size field. Cc: Eduardo Habkost Signed-off-by: Cédric Le Goater Message-Id: <20200822083920.2668930-1-clg@kaod.org> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Gibson --- hw/ppc/pnv_lpc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index b5ffa48..23f1e09 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -646,7 +646,6 @@ static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data) static const TypeInfo pnv_lpc_power8_info = { .name = TYPE_PNV8_LPC, .parent = TYPE_PNV_LPC, - .instance_size = sizeof(PnvLpcController), .class_init = pnv_lpc_power8_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_PNV_XSCOM_INTERFACE }, @@ -687,7 +686,6 @@ static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data) static const TypeInfo pnv_lpc_power9_info = { .name = TYPE_PNV9_LPC, .parent = TYPE_PNV_LPC, - .instance_size = sizeof(PnvLpcController), .class_init = pnv_lpc_power9_class_init, }; @@ -768,6 +766,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data) static const TypeInfo pnv_lpc_info = { .name = TYPE_PNV_LPC, .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvLpcController), .class_init = pnv_lpc_class_init, .class_size = sizeof(PnvLpcClass), .abstract = true, -- cgit v1.1 From c02f9892af1f166634e1b4fd722044151acb5e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 20 Aug 2020 18:46:38 +0200 Subject: ppc/pnv: Add a HIOMAP erase command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OPAL test suite runs a read-erase-write test on the PNOR : https://github.com/open-power/op-test/blob/master/testcases/OpTestPNOR.py which revealed that the IPMI HIOMAP handlers didn't support HIOMAP_C_ERASE. Implement the sector erase command by writing 0xFF in the PNOR memory region. Cc: Corey Minyard Reported-by: Klaus Heinrich Kiwi Signed-off-by: Cédric Le Goater Message-Id: <20200820164638.2515681-1-clg@kaod.org> Acked-by: Corey Minyard Signed-off-by: David Gibson --- hw/ppc/pnv_bmc.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'hw/ppc') diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c index 2e1a03d..67ebb16 100644 --- a/hw/ppc/pnv_bmc.c +++ b/hw/ppc/pnv_bmc.c @@ -140,6 +140,27 @@ static uint16_t bytes_to_blocks(uint32_t bytes) return bytes >> BLOCK_SHIFT; } +static uint32_t blocks_to_bytes(uint16_t blocks) +{ + return blocks << BLOCK_SHIFT; +} + +static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size) +{ + MemTxResult result; + int i; + + for (i = 0; i < size / 4; i++) { + result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4, + 0xFFFFFFFF, MO_32, + MEMTXATTRS_UNSPECIFIED); + if (result != MEMTX_OK) { + return -1; + } + } + return 0; +} + static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, RspBuffer *rsp) { @@ -155,10 +176,16 @@ static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, switch (cmd[2]) { case HIOMAP_C_MARK_DIRTY: case HIOMAP_C_FLUSH: - case HIOMAP_C_ERASE: case HIOMAP_C_ACK: break; + case HIOMAP_C_ERASE: + if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]), + blocks_to_bytes(cmd[7] << 8 | cmd[6]))) { + rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED); + } + break; + case HIOMAP_C_GET_INFO: rsp_buffer_push(rsp, 2); /* Version 2 */ rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */ -- cgit v1.1 From b31911c6167432edc7dc5dd37e8c01f426576224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 20 Aug 2020 16:01:06 +0200 Subject: spapr/xive: Use the xics flag to check for XIVE-only IRQ backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sPAPR machine has four different IRQ backends, each implementing the XICS or XIVE interrupt mode or both in the case of the 'dual' backend. If a machine is started in P8 compat mode, QEMU should necessarily support the XICS interrupt mode and in that case, the XIVE-only IRQ backend is invalid. Currently, spapr_irq_check() tests the pointer value to the IRQ backend to check for this condition, instead use the 'xics' flag. It's equivalent and it will ease the introduction of new XIVE-only IRQ backends if needed. Signed-off-by: Cédric Le Goater Message-Id: <20200820140106.2357228-1-clg@kaod.org> Signed-off-by: David Gibson --- hw/ppc/spapr_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index 72bb938..f599603 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -172,7 +172,7 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) * To cover both and not confuse the OS, add an early failure in * QEMU. */ - if (spapr->irq == &spapr_irq_xive) { + if (!spapr->irq->xics) { error_setg(errp, "XIVE-only machines require a POWER9 CPU"); return -1; } -- cgit v1.1 From 90d282d0858cf5a38f3e8a7e201aeab2a0ccbe88 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 25 Aug 2020 18:57:47 -0300 Subject: ppc/spapr_nvdimm: use g_autofree in spapr_nvdimm_validate_opts() Since we're using the string just once, just use g_autofree and avoid leaking it without calling g_free(). Signed-off-by: Daniel Henrique Barboza Message-Id: <20200825215749.213536-2-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_nvdimm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index 81410aa..9a20a65 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -33,7 +33,7 @@ void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, Error **errp) { - char *uuidstr = NULL; + g_autofree char *uuidstr = NULL; QemuUUID uuid; int ret; @@ -54,7 +54,6 @@ void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, &error_abort); ret = qemu_uuid_parse(uuidstr, &uuid); g_assert(!ret); - g_free(uuidstr); if (qemu_uuid_is_null(&uuid)) { error_setg(errp, "NVDIMM device requires the uuid to be set"); -- cgit v1.1 From beb6073fe7f53526bc49f9cb1a3a830c2cdaf3e7 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 25 Aug 2020 18:57:48 -0300 Subject: spapr, spapr_nvdimm: fold NVDIMM validation in the same place NVDIMM has different contraints and conditions than the regular DIMM and we'll need to add at least one more. Instead of relying on 'if (nvdimm)' conditionals in the body of spapr_memory_pre_plug(), use the existing spapr_nvdimm_validate_opts() and put all NVDIMM handling code there. Rename it to spapr_nvdimm_validate() to reflect that the function is now checking more than the nvdimm device options. This makes spapr_memory_pre_plug() a bit easier to follow, and we can tune in NVDIMM parameters and validation in the same place. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200825215749.213536-3-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 18 ++++++------------ hw/ppc/spapr_nvdimm.c | 10 ++++++++-- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index dd2fa48..b0a0444 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3520,7 +3520,6 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev); SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev); - const MachineClass *mc = MACHINE_CLASS(smc); bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); PCDIMMDevice *dimm = PC_DIMM(dev); Error *local_err = NULL; @@ -3533,27 +3532,22 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - if (is_nvdimm && !mc->nvdimm_supported) { - error_setg(errp, "NVDIMM hotplug not supported for this machine"); - return; - } - size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err); if (local_err) { error_propagate(errp, local_err); return; } - if (!is_nvdimm && size % SPAPR_MEMORY_BLOCK_SIZE) { - error_setg(errp, "Hotplugged memory size must be a multiple of " - "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB); - return; - } else if (is_nvdimm) { - spapr_nvdimm_validate_opts(NVDIMM(dev), size, &local_err); + if (is_nvdimm) { + spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, &local_err); if (local_err) { error_propagate(errp, local_err); return; } + } else if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB); + return; } memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index 9a20a65..bc2b654 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -30,13 +30,19 @@ #include "hw/ppc/fdt.h" #include "qemu/range.h" -void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, - Error **errp) +void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, + uint64_t size, Error **errp) { + const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); g_autofree char *uuidstr = NULL; QemuUUID uuid; int ret; + if (!mc->nvdimm_supported) { + error_setg(errp, "NVDIMM hotplug not supported for this machine"); + return; + } + if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, &error_abort) == 0) { error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); -- cgit v1.1 From 28f5a716212b891eef3eadc2c5053c0e0c512e1d Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 25 Aug 2020 18:57:49 -0300 Subject: ppc/spapr_nvdimm: do not enable support with 'nvdimm=off' The NVDIMM support for pSeries was introduced in 5.1, but it didn't contemplate the 'nvdimm' machine option that other archs uses. For every other arch, if no '-machine nvdimm(=on)' is present, it is assumed that the NVDIMM support is disabled. The user must explictly inform that the machine supports NVDIMM. For pseries-5.1 the 'nvdimm' option is completely ignored, and support is always assumed to exist. This leads to situations where the user is able to set 'nvdimm=off' but the guest boots up with the NVDIMMs anyway. Fixing this now, after 5.1 launch, can put the overall NVDIMM support for pseries in a strange place regarding this 'nvdimm' machine option. If we force everything to be like other archs, existing pseries-5.1 guests that didn't use 'nvdimm' to use NVDIMM devices will break. If we attempt to make the newer pseries machines (5.2+) behave like everyone else, but keep pseries-5.1 untouched, we'll have consistency problems on machine upgrade (5.1 will have different default values for NVDIMM support than 5.2). The common ground here is, if the user sets 'nvdimm=off', we must comply regardless of being 5.1 or 5.2+. This patch changes spapr_nvdimm_validate() to verify if the user set NVDIMM support off in the machine options and, in that case, error out if we have a NVDIMM device. The default value for 5.2+ pseries machines will still be 'nvdimm=on' when there is no 'nvdimm' option declared, just like it is today with pseries-5.1. In the end we'll have different default semantics from everyone else in the absence of the 'nvdimm' machine option, but this boat has sailed. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1848887 Signed-off-by: Daniel Henrique Barboza Message-Id: <20200825215749.213536-4-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_nvdimm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index bc2b654..95cbc30 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -27,13 +27,17 @@ #include "hw/ppc/spapr_nvdimm.h" #include "hw/mem/nvdimm.h" #include "qemu/nvdimm-utils.h" +#include "qemu/option.h" #include "hw/ppc/fdt.h" #include "qemu/range.h" +#include "sysemu/sysemu.h" void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, uint64_t size, Error **errp) { const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); + const MachineState *ms = MACHINE(hotplug_dev); + const char *nvdimm_opt = qemu_opt_get(qemu_get_machine_opts(), "nvdimm"); g_autofree char *uuidstr = NULL; QemuUUID uuid; int ret; @@ -43,6 +47,20 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, return; } + /* + * NVDIMM support went live in 5.1 without considering that, in + * other archs, the user needs to enable NVDIMM support with the + * 'nvdimm' machine option and the default behavior is NVDIMM + * support disabled. It is too late to roll back to the standard + * behavior without breaking 5.1 guests. What we can do is to + * ensure that, if the user sets nvdimm=off, we error out + * regardless of being 5.1 or newer. + */ + if (!ms->nvdimms_state->is_enabled && nvdimm_opt) { + error_setg(errp, "nvdimm device found but 'nvdimm=off' was set"); + return; + } + if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, &error_abort) == 0) { error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); -- cgit v1.1 From 554c2169e9251ca2829ab968bd9ba5641a5abe1d Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Wed, 26 Aug 2020 02:55:30 -0300 Subject: ppc/spapr: Use start-powered-off CPUState property PowerPC sPAPR CPUs start in the halted state, and spapr_reset_vcpu() attempts to implement this by setting CPUState::halted to 1. But that's too late for the case of hotplugged CPUs in a machine configure with 2 or more threads per core. By then, other parts of QEMU have already caused the vCPU to run in an unitialized state a couple of times. For example, ppc_cpu_reset() calls ppc_tlb_invalidate_all(), which ends up calling async_run_on_cpu(). This kicks the new vCPU while it has CPUState::halted = 0, causing QEMU to issue a KVM_RUN ioctl on the new vCPU before the guest is able to make the start-cpu RTAS call to initialize its register state. This problem doesn't seem to cause visible issues for regular guests, but on a secure guest running under the Ultravisor it does. The Ultravisor relies on being able to snoop on the start-cpu RTAS call to map vCPUs to guests, and this issue causes it to see a stray vCPU that doesn't belong to any guest. Fix by setting the start-powered-off CPUState property in spapr_create_vcpu(), which makes cpu_common_reset() initialize CPUState::halted to 1 at an earlier moment. Suggested-by: Eduardo Habkost Acked-by: David Gibson Reviewed-by: Greg Kurz Signed-off-by: Thiago Jung Bauermann Message-Id: <20200826055535.951207-4-bauerman@linux.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index c4f47dc..2125fda 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -36,11 +36,6 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) cpu_reset(cs); - /* All CPUs start halted. CPU0 is unhalted from the machine level - * reset code and the rest are explicitly started up by the guest - * using an RTAS call */ - cs->halted = 1; - env->spr[SPR_HIOR] = 0; lpcr = env->spr[SPR_LPCR]; @@ -274,6 +269,11 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) cs = CPU(obj); cpu = POWERPC_CPU(obj); + /* + * All CPUs start halted. CPU0 is unhalted from the machine level reset code + * and the rest are explicitly started up by the guest using an RTAS call. + */ + cs->start_powered_off = true; cs->cpu_index = cc->core_id + i; spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err); if (local_err) { -- cgit v1.1 From a2c93f06cf63c29448f60285b7e103bc07cf014b Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Wed, 26 Aug 2020 02:55:31 -0300 Subject: ppc/e500: Use start-powered-off CPUState property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of setting CPUState::halted to 1 in ppce500_cpu_reset_sec(), use the start-powered-off property which makes cpu_common_reset() initialize it to 1 in common code. Also change creation of CPU object from cpu_create() to object_new() and qdev_realize_and_unref() because cpu_create() realizes the CPU and it's not possible to set a property after the object is realized. Signed-off-by: Thiago Jung Bauermann Message-Id: <20200826055535.951207-5-bauerman@linux.ibm.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Gibson --- hw/ppc/e500.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index ab9884e..ae39b93 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -704,9 +704,6 @@ static void ppce500_cpu_reset_sec(void *opaque) cpu_reset(cs); - /* Secondary CPU starts in halted state for now. Needs to change when - implementing non-kernel boot. */ - cs->halted = 1; cs->exception_index = EXCP_HLT; } @@ -865,7 +862,7 @@ void ppce500_init(MachineState *machine) CPUState *cs; qemu_irq *input; - cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + cpu = POWERPC_CPU(object_new(machine->cpu_type)); env = &cpu->env; cs = CPU(cpu); @@ -875,6 +872,14 @@ void ppce500_init(MachineState *machine) exit(1); } + /* + * Secondary CPU starts in halted state for now. Needs to change + * when implementing non-kernel boot. + */ + object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0, + &error_fatal); + qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal); + if (!firstenv) { firstenv = env; } -- cgit v1.1 From 4192920cbc962770d54cd932748f913a7b1fe3c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Sep 2020 12:40:41 +0200 Subject: hw/ppc/ppc4xx_pci: Use ARRAY_SIZE() instead of magic value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the magic '4' by ARRAY_SIZE(s->irq) which is more explicit. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200901104043.91383-4-f4bug@amsat.org> Reviewed-by: Richard Henderson Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/ppc4xx_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'hw/ppc') diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c index 3ea47df..cd3f192 100644 --- a/hw/ppc/ppc4xx_pci.c +++ b/hw/ppc/ppc4xx_pci.c @@ -320,7 +320,8 @@ static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp) b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq, ppc4xx_pci_map_irq, s->irq, get_system_memory(), - get_system_io(), 0, 4, TYPE_PCI_BUS); + get_system_io(), 0, ARRAY_SIZE(s->irq), + TYPE_PCI_BUS); h->bus = b; pci_create_simple(b, 0, "ppc4xx-host-bridge"); -- cgit v1.1 From f5f239315cbf047aef51022ecb083fc7662c660a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Sep 2020 12:40:42 +0200 Subject: hw/ppc/ppc4xx_pci: Replace pointless warning by assert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We call pci_register_root_bus() to register 4 IRQs with the ppc4xx_pci_set_irq() handler. As it can only be called with values in the [0-4[ range, replace the pointless warning by an assert(). Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200901104043.91383-5-f4bug@amsat.org> Reviewed-by: Richard Henderson Signed-off-by: David Gibson --- hw/ppc/ppc4xx_pci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c index cd3f192..503ef46 100644 --- a/hw/ppc/ppc4xx_pci.c +++ b/hw/ppc/ppc4xx_pci.c @@ -256,10 +256,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level) qemu_irq *pci_irqs = opaque; trace_ppc4xx_pci_set_irq(irq_num); - if (irq_num < 0) { - fprintf(stderr, "%s: PCI irq %d\n", __func__, irq_num); - return; - } + assert(irq_num >= 0); qemu_set_irq(pci_irqs[irq_num], level); } -- cgit v1.1 From 1eee9950264867232a8771e60f3955b84acf538f Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 1 Sep 2020 09:56:39 -0300 Subject: ppc: introducing spapr_numa.c NUMA code helper We're going to make changes in how spapr handles all ibm,associativity* related properties to enhance our current NUMA support. At this moment we have associativity code scattered all around spapr_* files, with hardcoded values and array sizes. This makes it harder to change any NUMA specific parameters in the future. Having everything in the same place allows not only for easier tuning, but also easier understanding since all NUMA related code is on the same file. This patch introduces a new file to gather all NUMA/associativity handling code in spapr, spapr_numa.c. To get things started, let's remove associativity-reference-points and max-associativity-domains code from spapr_dt_rtas() to a new helper called spapr_numa_write_rtas_dt(). This will decouple spapr_dt_rtas() from the NUMA changes that are going to happen in those two properties. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200901125645.118026-2-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/meson.build | 3 ++- hw/ppc/spapr.c | 26 ++------------------------ hw/ppc/spapr_numa.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 25 deletions(-) create mode 100644 hw/ppc/spapr_numa.c (limited to 'hw/ppc') diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 918969b..ffa2ec3 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -25,7 +25,8 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_irq.c', 'spapr_tpm_proxy.c', 'spapr_nvdimm.c', - 'spapr_rtas_ddw.c' + 'spapr_rtas_ddw.c', + 'spapr_numa.c', )) ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c')) ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files( diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b0a0444..a45912a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -81,6 +81,7 @@ #include "hw/mem/memory-device.h" #include "hw/ppc/spapr_tpm_proxy.h" #include "hw/ppc/spapr_nvdimm.h" +#include "hw/ppc/spapr_numa.h" #include "monitor/monitor.h" @@ -891,16 +892,9 @@ static int spapr_dt_rng(void *fdt) static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) { MachineState *ms = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms); int rtas; GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); - uint32_t refpoints[] = { - cpu_to_be32(0x4), - cpu_to_be32(0x4), - cpu_to_be32(0x2), - }; - uint32_t nr_refpoints = ARRAY_SIZE(refpoints); uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + memory_region_size(&MACHINE(spapr)->device_memory->mr); uint32_t lrdr_capacity[] = { @@ -910,14 +904,6 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff), cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; - uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); - uint32_t maxdomains[] = { - cpu_to_be32(4), - maxdomain, - maxdomain, - maxdomain, - cpu_to_be32(spapr->gpu_numa_id), - }; _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); @@ -953,15 +939,7 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) qemu_hypertas->str, qemu_hypertas->len)); g_string_free(qemu_hypertas, TRUE); - if (smc->pre_5_1_assoc_refpoints) { - nr_refpoints = 2; - } - - _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", - refpoints, nr_refpoints * sizeof(refpoints[0]))); - - _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", - maxdomains, sizeof(maxdomains))); + spapr_numa_write_rtas_dt(spapr, fdt, rtas); /* * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log, diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c new file mode 100644 index 0000000..cdf3288 --- /dev/null +++ b/hw/ppc/spapr_numa.c @@ -0,0 +1,50 @@ +/* + * QEMU PowerPC pSeries Logical Partition NUMA associativity handling + * + * Copyright IBM Corp. 2020 + * + * Authors: + * Daniel Henrique Barboza + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "hw/ppc/spapr_numa.h" +#include "hw/ppc/fdt.h" + +/* + * Helper that writes ibm,associativity-reference-points and + * max-associativity-domains in the RTAS pointed by @rtas + * in the DT @fdt. + */ +void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + uint32_t refpoints[] = { + cpu_to_be32(0x4), + cpu_to_be32(0x4), + cpu_to_be32(0x2), + }; + uint32_t nr_refpoints = ARRAY_SIZE(refpoints); + uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); + uint32_t maxdomains[] = { + cpu_to_be32(4), + maxdomain, + maxdomain, + maxdomain, + cpu_to_be32(spapr->gpu_numa_id), + }; + + if (smc->pre_5_1_assoc_refpoints) { + nr_refpoints = 2; + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, nr_refpoints * sizeof(refpoints[0]))); + + _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", + maxdomains, sizeof(maxdomains))); +} -- cgit v1.1 From 6ee1d62e6af84c5ccda06763f2e58fb3f41ba106 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 1 Sep 2020 09:56:40 -0300 Subject: ppc/spapr_nvdimm: turn spapr_dt_nvdimm() static This function is only used inside spapr_nvdimm.c. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200901125645.118026-3-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_nvdimm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index 95cbc30..5188e2f 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -106,16 +106,6 @@ void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp) } } -int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, - void *fdt, int *fdt_start_offset, Error **errp) -{ - NVDIMMDevice *nvdimm = NVDIMM(drc->dev); - - *fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm); - - return 0; -} - void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr) { MachineState *machine = MACHINE(spapr); @@ -127,7 +117,7 @@ void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr) } -int spapr_dt_nvdimm(void *fdt, int parent_offset, +static int spapr_dt_nvdimm(void *fdt, int parent_offset, NVDIMMDevice *nvdimm) { int child_offset; @@ -184,6 +174,16 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset, return child_offset; } +int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(drc->dev); + + *fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm); + + return 0; +} + void spapr_dt_persistent_memory(void *fdt) { int offset = fdt_subnode_offset(fdt, 0, "persistent-memory"); -- cgit v1.1 From f1aa45fffeeb084a9ad8bd08e83c5ec6af223884 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 3 Sep 2020 19:06:33 -0300 Subject: spapr: introduce SpaprMachineState::numa_assoc_array The next step to centralize all NUMA/associativity handling in the spapr machine is to create a 'one stop place' for all things ibm,associativity. This patch introduces numa_assoc_array, a 2 dimensional array that will store all ibm,associativity arrays of all NUMA nodes. This array is initialized in a new spapr_numa_associativity_init() function, called in spapr_machine_init(). It is being initialized with the same values used in other ibm,associativity properties around spapr files (i.e. all zeros, last value is node_id). The idea is to remove all hardcoded definitions and FDT writes of ibm,associativity arrays, doing instead a call to the new helper spapr_numa_write_associativity_dt() helper, that will be able to write the DT with the correct values. We'll start small, handling the trivial cases first. The remaining instances of ibm,associativity will be handled next. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200903220639.563090-2-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 23 ++++++++++------------- hw/ppc/spapr_numa.c | 30 ++++++++++++++++++++++++++++++ hw/ppc/spapr_nvdimm.c | 19 +++++++------------ hw/ppc/spapr_pci.c | 9 ++------- 4 files changed, 49 insertions(+), 32 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a45912a..1ad6f59 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -314,14 +314,9 @@ static void add_str(GString *s, const gchar *s1) g_string_append_len(s, s1, strlen(s1) + 1); } -static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start, - hwaddr size) +static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid, + hwaddr start, hwaddr size) { - uint32_t associativity[] = { - cpu_to_be32(0x4), /* length */ - cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(0x0), cpu_to_be32(nodeid) - }; char mem_name[32]; uint64_t mem_reg_property[2]; int off; @@ -335,8 +330,7 @@ static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start, _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, sizeof(mem_reg_property)))); - _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid); return off; } @@ -649,7 +643,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt) if (!mem_start) { /* spapr_machine_init() checks for rma_size <= node0_size * already */ - spapr_dt_memory_node(fdt, i, 0, spapr->rma_size); + spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size); mem_start += spapr->rma_size; node_size -= spapr->rma_size; } @@ -661,7 +655,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt) sizetmp = 1ULL << ctzl(mem_start); } - spapr_dt_memory_node(fdt, i, mem_start, sizetmp); + spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp); node_size -= sizetmp; mem_start += sizetmp; } @@ -1275,7 +1269,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) /* NVDIMM devices */ if (mc->nvdimm_supported) { - spapr_dt_persistent_memory(fdt); + spapr_dt_persistent_memory(spapr, fdt); } return fdt; @@ -2810,6 +2804,9 @@ static void spapr_machine_init(MachineState *machine) */ spapr->gpu_numa_id = MAX(1, machine->numa_state->num_nodes); + /* Init numa_assoc_array */ + spapr_numa_associativity_init(spapr, machine); + if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) && ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0, spapr->max_compat_pvr)) { @@ -3394,7 +3391,7 @@ int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE; node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP, &error_abort); - *fdt_start_offset = spapr_dt_memory_node(fdt, node, addr, + *fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr, SPAPR_MEMORY_BLOCK_SIZE); return 0; } diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index cdf3288..f6b6fe6 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -15,6 +15,36 @@ #include "hw/ppc/spapr_numa.h" #include "hw/ppc/fdt.h" + +void spapr_numa_associativity_init(SpaprMachineState *spapr, + MachineState *machine) +{ + int nb_numa_nodes = machine->numa_state->num_nodes; + int i; + + /* + * For all associativity arrays: first position is the size, + * position MAX_DISTANCE_REF_POINTS is always the numa_id, + * represented by the index 'i'. + * + * This will break on sparse NUMA setups, when/if QEMU starts + * to support it, because there will be no more guarantee that + * 'i' will be a valid node_id set by the user. + */ + for (i = 0; i < nb_numa_nodes; i++) { + spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); + } +} + +void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, + int offset, int nodeid) +{ + _FDT((fdt_setprop(fdt, offset, "ibm,associativity", + spapr->numa_assoc_array[nodeid], + sizeof(spapr->numa_assoc_array[nodeid])))); +} + /* * Helper that writes ibm,associativity-reference-points and * max-associativity-domains in the RTAS pointed by @rtas diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index 5188e2f..6387205 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -31,6 +31,7 @@ #include "hw/ppc/fdt.h" #include "qemu/range.h" #include "sysemu/sysemu.h" +#include "hw/ppc/spapr_numa.h" void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, uint64_t size, Error **errp) @@ -117,8 +118,8 @@ void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr) } -static int spapr_dt_nvdimm(void *fdt, int parent_offset, - NVDIMMDevice *nvdimm) +static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt, + int parent_offset, NVDIMMDevice *nvdimm) { int child_offset; char *buf; @@ -128,11 +129,6 @@ static int spapr_dt_nvdimm(void *fdt, int parent_offset, &error_abort); uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, &error_abort); - uint32_t associativity[] = { - cpu_to_be32(0x4), /* length */ - cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(0x0), cpu_to_be32(node) - }; uint64_t lsize = nvdimm->label_size; uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, NULL); @@ -152,8 +148,7 @@ static int spapr_dt_nvdimm(void *fdt, int parent_offset, _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); - _FDT((fdt_setprop(fdt, child_offset, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node); buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); @@ -179,12 +174,12 @@ int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, { NVDIMMDevice *nvdimm = NVDIMM(drc->dev); - *fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm); + *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm); return 0; } -void spapr_dt_persistent_memory(void *fdt) +void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) { int offset = fdt_subnode_offset(fdt, 0, "persistent-memory"); GSList *iter, *nvdimms = nvdimm_get_device_list(); @@ -202,7 +197,7 @@ void spapr_dt_persistent_memory(void *fdt) for (iter = nvdimms; iter; iter = iter->next) { NVDIMMDevice *nvdimm = iter->data; - spapr_dt_nvdimm(fdt, offset, nvdimm); + spapr_dt_nvdimm(spapr, fdt, offset, nvdimm); } g_slist_free(nvdimms); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 0a418f1..4d97ff6 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -52,6 +52,7 @@ #include "sysemu/kvm.h" #include "sysemu/hostmem.h" #include "sysemu/numa.h" +#include "hw/ppc/spapr_numa.h" /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 @@ -2321,11 +2322,6 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb, cpu_to_be32(1), cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW) }; - uint32_t associativity[] = {cpu_to_be32(0x4), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(phb->numa_node)}; SpaprTceTable *tcet; SpaprDrc *drc; Error *err = NULL; @@ -2358,8 +2354,7 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb, /* Advertise NUMA via ibm,associativity */ if (phb->numa_node != -1) { - _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity, - sizeof(associativity))); + spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node); } /* Build the interrupt-map, this must matches what is done -- cgit v1.1 From 8f86a408241264db605da9a1215a409475a60bd0 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 3 Sep 2020 19:06:34 -0300 Subject: spapr, spapr_numa: handle vcpu ibm,associativity Vcpus have an additional paramenter to be appended, vcpu_id. This also changes the size of the of property itself, which is being represented in index 0 of numa_assoc_array[cpu->node_id], and defaults to MAX_DISTANCE_REF_POINTS for all cases but vcpus. All this logic makes more sense in spapr_numa.c, where we handle everything NUMA and associativity. A new helper spapr_numa_fixup_cpu_dt() was added, and spapr.c uses it the same way as it was using the former spapr_fixup_cpu_numa_dt(). Signed-off-by: Daniel Henrique Barboza Message-Id: <20200903220639.563090-3-danielhb413@gmail.com> [dwg: Correct uint to int type, which can break windows builds] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 17 +---------------- hw/ppc/spapr_numa.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1ad6f59..badfa86 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -202,21 +202,6 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, return ret; } -static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu) -{ - int index = spapr_get_vcpu_id(cpu); - uint32_t associativity[] = {cpu_to_be32(0x5), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(cpu->node_id), - cpu_to_be32(index)}; - - /* Advertise NUMA via ibm,associativity */ - return fdt_setprop(fdt, offset, "ibm,associativity", associativity, - sizeof(associativity)); -} - static void spapr_dt_pa_features(SpaprMachineState *spapr, PowerPCCPU *cpu, void *fdt, int offset) @@ -785,7 +770,7 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset, pft_size_prop, sizeof(pft_size_prop)))); if (ms->numa_state->num_nodes > 1) { - _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu)); + _FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu)); } _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt)); diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index f6b6fe6..523801f 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -45,6 +45,33 @@ void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, sizeof(spapr->numa_assoc_array[nodeid])))); } +int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, + int offset, PowerPCCPU *cpu) +{ + int vcpu_assoc_size = NUMA_ASSOC_SIZE + 1; + uint32_t vcpu_assoc[vcpu_assoc_size]; + int index = spapr_get_vcpu_id(cpu); + int i; + + /* + * VCPUs have an extra 'cpu_id' value in ibm,associativity + * compared to other resources. Increment the size at index + * 0, copy all associativity domains already set, then put + * cpu_id last. + */ + vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); + + for (i = 1; i <= MAX_DISTANCE_REF_POINTS; i++) { + vcpu_assoc[i] = spapr->numa_assoc_array[cpu->node_id][i]; + } + + vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); + + /* Advertise NUMA via ibm,associativity */ + return fdt_setprop(fdt, offset, "ibm,associativity", + vcpu_assoc, sizeof(vcpu_assoc)); +} + /* * Helper that writes ibm,associativity-reference-points and * max-associativity-domains in the RTAS pointed by @rtas -- cgit v1.1 From 0ee520126a2a1a2539e54b6d0879380cb0bf4ea4 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 3 Sep 2020 19:06:35 -0300 Subject: spapr, spapr_numa: move lookup-arrays handling to spapr_numa.c In a similar fashion as the previous patch, let's move the handling of ibm,associativity-lookup-arrays from spapr.c to spapr_numa.c. A spapr_numa_write_assoc_lookup_arrays() helper was created, and spapr_dt_dynamic_reconfiguration_memory() can now use it to advertise the lookup-arrays. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200903220639.563090-4-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 25 ++----------------------- hw/ppc/spapr_numa.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index badfa86..9bce189 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -535,13 +535,10 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, void *fdt) { MachineState *machine = MACHINE(spapr); - int nb_numa_nodes = machine->numa_state->num_nodes; - int ret, i, offset; + int ret, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32), cpu_to_be32(lmb_size & 0xffffffff)}; - uint32_t *int_buf, *cur_index, buf_len; - int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; MemoryDeviceInfoList *dimms = NULL; /* @@ -582,25 +579,7 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, return ret; } - /* ibm,associativity-lookup-arrays */ - buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t); - cur_index = int_buf = g_malloc0(buf_len); - int_buf[0] = cpu_to_be32(nr_nodes); - int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */ - cur_index += 2; - for (i = 0; i < nr_nodes; i++) { - uint32_t associativity[] = { - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(i) - }; - memcpy(cur_index, associativity, sizeof(associativity)); - cur_index += 4; - } - ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, - (cur_index - int_buf) * sizeof(uint32_t)); - g_free(int_buf); + ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset); return ret; } diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 523801f..42394f3 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -72,6 +72,40 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, vcpu_assoc, sizeof(vcpu_assoc)); } + +int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, + int offset) +{ + MachineState *machine = MACHINE(spapr); + int nb_numa_nodes = machine->numa_state->num_nodes; + int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; + uint32_t *int_buf, *cur_index, buf_len; + int ret, i; + + /* ibm,associativity-lookup-arrays */ + buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t); + cur_index = int_buf = g_malloc0(buf_len); + int_buf[0] = cpu_to_be32(nr_nodes); + /* Number of entries per associativity list */ + int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + cur_index += 2; + for (i = 0; i < nr_nodes; i++) { + /* + * For the lookup-array we use the ibm,associativity array, + * from numa_assoc_array. without the first element (size). + */ + uint32_t *associativity = spapr->numa_assoc_array[i]; + memcpy(cur_index, ++associativity, + sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS); + cur_index += MAX_DISTANCE_REF_POINTS; + } + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, + (cur_index - int_buf) * sizeof(uint32_t)); + g_free(int_buf); + + return ret; +} + /* * Helper that writes ibm,associativity-reference-points and * max-associativity-domains in the RTAS pointed by @rtas -- cgit v1.1 From dd7e1d7ae431ee61b0e59bf505fc39dc17bba24b Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 3 Sep 2020 19:06:36 -0300 Subject: spapr_numa: move NVLink2 associativity handling to spapr_numa.c The NVLink2 GPUs works like a regular NUMA node with its own associativity values, regardless of user input. This can be handled inside spapr_numa_associativity_init(), initializing NVGPU_MAX_NUM associativity arrays that can be used by the GPUs. Signed-off-by: Daniel Henrique Barboza Message-Id: <20200903220639.563090-5-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_numa.c | 28 +++++++++++++++++++++++++++- hw/ppc/spapr_pci_nvlink2.c | 20 +++----------------- 2 files changed, 30 insertions(+), 18 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 42394f3..9073dbb 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -13,14 +13,18 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "hw/ppc/spapr_numa.h" +#include "hw/pci-host/spapr.h" #include "hw/ppc/fdt.h" +/* Moved from hw/ppc/spapr_pci_nvlink2.c */ +#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) void spapr_numa_associativity_init(SpaprMachineState *spapr, MachineState *machine) { + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); int nb_numa_nodes = machine->numa_state->num_nodes; - int i; + int i, j, max_nodes_with_gpus; /* * For all associativity arrays: first position is the size, @@ -35,6 +39,28 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr, spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); } + + /* + * Initialize NVLink GPU associativity arrays. We know that + * the first GPU will take the first available NUMA id, and + * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine. + * At this point we're not sure if there are GPUs or not, but + * let's initialize the associativity arrays and allow NVLink + * GPUs to be handled like regular NUMA nodes later on. + */ + max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; + + for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { + spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + + for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { + uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? + SPAPR_GPU_NUMA_ID : cpu_to_be32(i); + spapr->numa_assoc_array[i][j] = gpu_assoc; + } + + spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); + } } void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c index 76ae77e..8ef9b40 100644 --- a/hw/ppc/spapr_pci_nvlink2.c +++ b/hw/ppc/spapr_pci_nvlink2.c @@ -26,6 +26,7 @@ #include "qemu-common.h" #include "hw/pci/pci.h" #include "hw/pci-host/spapr.h" +#include "hw/ppc/spapr_numa.h" #include "qemu/error-report.h" #include "hw/ppc/fdt.h" #include "hw/pci/pci_bridge.h" @@ -37,8 +38,6 @@ #define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \ ((gn) << 4) | (nn)) -#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) - typedef struct SpaprPhbPciNvGpuSlot { uint64_t tgt; uint64_t gpa; @@ -360,13 +359,6 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt) Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev), "nvlink2-mr[0]", &error_abort); - uint32_t associativity[] = { - cpu_to_be32(0x4), - cpu_to_be32(nvslot->numa_id), - cpu_to_be32(nvslot->numa_id), - cpu_to_be32(nvslot->numa_id), - cpu_to_be32(nvslot->numa_id) - }; uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL); uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) }; char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa); @@ -376,14 +368,8 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt) _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); _FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg)))); - if (sphb->pre_5_1_assoc) { - associativity[1] = SPAPR_GPU_NUMA_ID; - associativity[2] = SPAPR_GPU_NUMA_ID; - associativity[3] = SPAPR_GPU_NUMA_ID; - } - - _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()), + fdt, off, nvslot->numa_id); _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,coherent-device-memory"))); -- cgit v1.1 From f8a13fc3812562ad46db1d37d9a176613731698f Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 4 Sep 2020 14:24:20 -0300 Subject: spapr: move h_home_node_associativity to spapr_numa.c The implementation of this hypercall will be modified to use spapr->numa_assoc_arrays input. Moving it to spapr_numa.c makes make more sense. Reviewed-by: Greg Kurz Signed-off-by: Daniel Henrique Barboza Message-Id: <20200904172422.617460-2-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_hcall.c | 40 ---------------------------------------- hw/ppc/spapr_numa.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 40 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index c1d0122..c2776b6 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1873,42 +1873,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, return ret; } -static target_ulong h_home_node_associativity(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - target_ulong flags = args[0]; - target_ulong procno = args[1]; - PowerPCCPU *tcpu; - int idx; - - /* only support procno from H_REGISTER_VPA */ - if (flags != 0x1) { - return H_FUNCTION; - } - - tcpu = spapr_find_cpu(procno); - if (tcpu == NULL) { - return H_P2; - } - - /* sequence is the same as in the "ibm,associativity" property */ - - idx = 0; -#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ - ((uint64_t)(b) & 0xffffffff)) - args[idx++] = ASSOCIATIVITY(0, 0); - args[idx++] = ASSOCIATIVITY(0, tcpu->node_id); - args[idx++] = ASSOCIATIVITY(procno, -1); - for ( ; idx < 6; idx++) { - args[idx] = -1; - } -#undef ASSOCIATIVITY - - return H_SUCCESS; -} - static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, @@ -2139,10 +2103,6 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); - - /* Virtual Processor Home Node */ - spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, - h_home_node_associativity); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 9073dbb..e60587c 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -165,3 +165,48 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", maxdomains, sizeof(maxdomains))); } + +static target_ulong h_home_node_associativity(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong procno = args[1]; + PowerPCCPU *tcpu; + int idx; + + /* only support procno from H_REGISTER_VPA */ + if (flags != 0x1) { + return H_FUNCTION; + } + + tcpu = spapr_find_cpu(procno); + if (tcpu == NULL) { + return H_P2; + } + + /* sequence is the same as in the "ibm,associativity" property */ + + idx = 0; +#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ + ((uint64_t)(b) & 0xffffffff)) + args[idx++] = ASSOCIATIVITY(0, 0); + args[idx++] = ASSOCIATIVITY(0, tcpu->node_id); + args[idx++] = ASSOCIATIVITY(procno, -1); + for ( ; idx < 6; idx++) { + args[idx] = -1; + } +#undef ASSOCIATIVITY + + return H_SUCCESS; +} + +static void spapr_numa_register_types(void) +{ + /* Virtual Processor Home Node */ + spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, + h_home_node_associativity); +} + +type_init(spapr_numa_register_types) -- cgit v1.1 From d370f9cf0aac54b091047392427d3d5ec93182c3 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 4 Sep 2020 14:24:21 -0300 Subject: spapr_numa: create a vcpu associativity helper The work to be done in h_home_node_associativity() intersects with what is already done in spapr_numa_fixup_cpu_dt(). This patch creates a new helper, spapr_numa_get_vcpu_assoc(), to be used for both spapr_numa_fixup_cpu_dt() and h_home_node_associativity(). While we're at it, use memcpy() instead of loop assignment to created the returned array. Reviewed-by: Greg Kurz Signed-off-by: Daniel Henrique Barboza Message-Id: <20200904172422.617460-3-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_numa.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index e60587c..066ffc2 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -71,31 +71,36 @@ void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, sizeof(spapr->numa_assoc_array[nodeid])))); } -int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, - int offset, PowerPCCPU *cpu) +static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, + PowerPCCPU *cpu) { - int vcpu_assoc_size = NUMA_ASSOC_SIZE + 1; - uint32_t vcpu_assoc[vcpu_assoc_size]; + uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE); int index = spapr_get_vcpu_id(cpu); - int i; /* * VCPUs have an extra 'cpu_id' value in ibm,associativity * compared to other resources. Increment the size at index - * 0, copy all associativity domains already set, then put - * cpu_id last. + * 0, put cpu_id last, then copy the remaining associativity + * domains. */ vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); + vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index); + memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1, + (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t)); - for (i = 1; i <= MAX_DISTANCE_REF_POINTS; i++) { - vcpu_assoc[i] = spapr->numa_assoc_array[cpu->node_id][i]; - } + return vcpu_assoc; +} + +int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, + int offset, PowerPCCPU *cpu) +{ + g_autofree uint32_t *vcpu_assoc = NULL; - vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); + vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu); /* Advertise NUMA via ibm,associativity */ - return fdt_setprop(fdt, offset, "ibm,associativity", - vcpu_assoc, sizeof(vcpu_assoc)); + return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc, + VCPU_ASSOC_SIZE * sizeof(uint32_t)); } -- cgit v1.1 From 876ab8d89d0d288945334c8caa908b07ef847de2 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 4 Sep 2020 14:24:22 -0300 Subject: spapr_numa: use spapr_numa_get_vcpu_assoc() in home_node hcall The current implementation of h_home_node_associativity hard codes the values of associativity domains of the vcpus. Let's make it consider the values already initialized in spapr->numa_assoc_array, via the spapr_numa_get_vcpu_assoc() helper. We want to set it and forget it, and for that we also need to assert that we don't overflow the registers of the hypercall. >From R4 to R9 we can squeeze in 12 associativity domains for vcpus, so let's assert that VCPU_ASSOC_SIZE -1 isn't greater than that. Reviewed-by: Greg Kurz Signed-off-by: Daniel Henrique Barboza Message-Id: <20200904172422.617460-4-danielhb413@gmail.com> Signed-off-by: David Gibson --- hw/ppc/spapr_numa.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'hw/ppc') diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 066ffc2..64fe567 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -176,10 +176,11 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { + g_autofree uint32_t *vcpu_assoc = NULL; target_ulong flags = args[0]; target_ulong procno = args[1]; PowerPCCPU *tcpu; - int idx; + int idx, assoc_idx; /* only support procno from H_REGISTER_VPA */ if (flags != 0x1) { @@ -191,16 +192,40 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, return H_P2; } - /* sequence is the same as in the "ibm,associativity" property */ + /* + * Given that we want to be flexible with the sizes and indexes, + * we must consider that there is a hard limit of how many + * associativities domain we can fit in R4 up to R9, which would be + * 12 associativity domains for vcpus. Assert and bail if that's + * not the case. + */ + G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12); + + vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu); + /* assoc_idx starts at 1 to skip associativity size */ + assoc_idx = 1; - idx = 0; #define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ ((uint64_t)(b) & 0xffffffff)) - args[idx++] = ASSOCIATIVITY(0, 0); - args[idx++] = ASSOCIATIVITY(0, tcpu->node_id); - args[idx++] = ASSOCIATIVITY(procno, -1); - for ( ; idx < 6; idx++) { - args[idx] = -1; + + for (idx = 0; idx < 6; idx++) { + int32_t a, b; + + /* + * vcpu_assoc[] will contain the associativity domains for tcpu, + * including tcpu->node_id and procno, meaning that we don't + * need to use these variables here. + * + * We'll read 2 values at a time to fill up the ASSOCIATIVITY() + * macro. The ternary will fill the remaining registers with -1 + * after we went through vcpu_assoc[]. + */ + a = assoc_idx < VCPU_ASSOC_SIZE ? + be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; + b = assoc_idx < VCPU_ASSOC_SIZE ? + be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; + + args[idx] = ASSOCIATIVITY(a, b); } #undef ASSOCIATIVITY -- cgit v1.1