aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReza Arbab <arbab@linux.ibm.com>2018-04-08 15:19:39 -0500
committerStewart Smith <stewart@linux.ibm.com>2018-04-10 15:38:33 +1000
commit4724d2c07fa63b2b95f0f42fb13e07856251e48a (patch)
treeb79a554c20557fb8e759d93c8c3036574c044098
parente0cffe9554a527fb496eda4b561af623afdf01c4 (diff)
downloadskiboot-4724d2c07fa63b2b95f0f42fb13e07856251e48a.zip
skiboot-4724d2c07fa63b2b95f0f42fb13e07856251e48a.tar.gz
skiboot-4724d2c07fa63b2b95f0f42fb13e07856251e48a.tar.bz2
npu2: Move NPU2_XTS_BDF_MAP_VALID assignment to context init
A bad GPU or other condition may leave us with a subset of links that never get initialized. If an ATSD is sent to one of those bricks, it will never complete, leaving us waiting forever for a response: watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [acos:2050] ... Modules linked in: nvidia_uvm(O) nvidia(O) CPU: 23 PID: 2050 Comm: acos Tainted: G W O 4.14.0 #2 task: c0000000285cfc00 task.stack: c000001fea860000 NIP: c0000000000abdf0 LR: c0000000000acc48 CTR: c0000000000ace60 REGS: c000001fea863550 TRAP: 0901 Tainted: G W O (4.14.0) MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 28004484 XER: 20040000 CFAR: c0000000000abdf4 SOFTE: 1 GPR00: c0000000000acc48 c000001fea8637d0 c0000000011f7c00 c000001fea863820 GPR04: 0000000002000000 0004100026000000 c0000000012778c8 c00000000127a560 GPR08: 0000000000000001 0000000000000080 c000201cc7cb7750 ffffffffffffffff GPR12: 0000000000008000 c000000003167e80 NIP [c0000000000abdf0] mmio_invalidate_wait+0x90/0xc0 LR [c0000000000acc48] mmio_invalidate.isra.11+0x158/0x370 ATSDs are only sent to bricks which have a valid entry in the XTS_BDF table. So to prevent the hang, don't set NPU2_XTS_BDF_MAP_VALID unless we make it all the way to creating a context for the BDF. Signed-off-by: Reza Arbab <arbab@linux.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
-rw-r--r--hw/npu2.c15
1 files changed, 8 insertions, 7 deletions
diff --git a/hw/npu2.c b/hw/npu2.c
index 06e06d4..b243e8e 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -2118,6 +2118,11 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
+ if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) {
+ xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1);
+ npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf);
+ }
+
out:
unlock(&p->lock);
return id;
@@ -2182,12 +2187,9 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
lock(&p->lock);
/* Find any existing entries and update them */
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_VALID, 0UL, 1);
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, xts_bdf_lpar, bdf);
+ xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0L, bdf);
id = npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
- &xts_bdf_lpar,
- NPU2_XTS_BDF_MAP_VALID |
- NPU2_XTS_BDF_MAP_BDF);
+ &xts_bdf_lpar, NPU2_XTS_BDF_MAP_BDF);
if (id < 0) {
/* No existing mapping found, find space for a new one */
xts_bdf_lpar = 0;
@@ -2202,8 +2204,7 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
goto out;
}
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_VALID, 0UL, 1);
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_UNFILT, xts_bdf_lpar, 1);
+ xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_UNFILT, 0UL, 1);
xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, xts_bdf_lpar, bdf);
/* We only support radix for the moment */