aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGautham R. Shenoy <ego@linux.vnet.ibm.com>2020-04-27 20:26:40 +0530
committerVasant Hegde <hegdevasant@linux.vnet.ibm.com>2020-06-06 10:17:40 +0530
commit42e2125a1d576207e51947d7a26e9d91eda5e2c8 (patch)
treec23f0f02091d6e76ed49b9a7e8c874d4351b87c3
parent0f1937ef40fca0c3212a9dff1010b832a24fb063 (diff)
downloadskiboot-42e2125a1d576207e51947d7a26e9d91eda5e2c8.zip
skiboot-42e2125a1d576207e51947d7a26e9d91eda5e2c8.tar.gz
skiboot-42e2125a1d576207e51947d7a26e9d91eda5e2c8.tar.bz2
sensors: occ: Fix the GPU detection code
[ Upstream commit f3ac046b386fea80286c72c3217acb407230a8c6 ] commit bebe096ee242 ("sensors: occ: Skip GPU sensors for non-gpu systems") assumes that presence of "ibm,power9-npu" compatible node indicates the presence of GPUs. However this is incorrect, as even OpenCAPI is supported via NPU. Thus ZZ systems, which have OpenCAPI connectors but not GPUs will have "ibm,power9-npu" compatible nodes. This results in OPAL creating device-tree entries for the GPU sensors on ZZ systems which don't even have GPUs. This patch fixes the GPU detection code in occ-sensors, by first checking for "ibm,ioda2-npu2-phb" compatible node which indicates the presence of nvlink. Only if such a node exists, do we check with the OCC for presence of GPUs on systems to confirm the presence of the GPU. Otherwise, we cut the GPU sensors. Thanks to Frederic Barrat <fbarrat@linux.ibm.com> for suggesting "ibm,ioda2-npu2-phb" for detecting the presence of nvlink GPUs. cc: skiboot-stable@lists.ozlabs.org Fixes: commit bebe096ee242 ("sensors: occ: Skip GPU sensors for non-gpu systems") Reported-by: Pavaman Subramaniyam <pavsubra@in.ibm.com> Tested-by: Pavaman Subramaniyam <pavsubra@in.ibm.com> Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com> Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Oliver O'Halloran <oohall@gmail.com> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
-rw-r--r--hw/occ-sensor.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/hw/occ-sensor.c b/hw/occ-sensor.c
index 524d00f..d97cc33 100644
--- a/hw/occ-sensor.c
+++ b/hw/occ-sensor.c
@@ -521,8 +521,26 @@ bool occ_sensors_init(void)
dt_add_property_cells(sg, "#address-cells", 1);
dt_add_property_cells(sg, "#size-cells", 0);
- if (dt_find_compatible_node(dt_root, NULL, "ibm,power9-npu"))
- has_gpu = true;
+ /*
+ * On POWER9, ibm,ioda2-npu2-phb indicates the presence of a
+ * GPU NVlink.
+ */
+ if (dt_find_compatible_node(dt_root, NULL, "ibm,ioda2-npu2-phb")) {
+
+ for_each_chip(chip) {
+ int max_gpus_per_chip = 3, i;
+
+ for(i = 0; i < max_gpus_per_chip; i++) {
+ has_gpu = occ_get_gpu_presence(chip, i);
+
+ if (has_gpu)
+ break;
+ }
+
+ if (has_gpu)
+ break;
+ }
+ }
for_each_chip(chip) {
struct occ_sensor_data_header *hb;