aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2019-11-22 11:04:22 +1100
committerVasant Hegde <hegdevasant@linux.vnet.ibm.com>2020-03-27 16:35:49 +0530
commit085331e324bf0b57f0d2a0c077ef612beaca6d4b (patch)
treee46c87c2ff6556510b907fa9727088b35602d304
parent5d0f9c6ace746dc4f006e090665bb536c74022cb (diff)
downloadskiboot-085331e324bf0b57f0d2a0c077ef612beaca6d4b.zip
skiboot-085331e324bf0b57f0d2a0c077ef612beaca6d4b.tar.gz
skiboot-085331e324bf0b57f0d2a0c077ef612beaca6d4b.tar.bz2
npu2: Clear fence on all bricks
[ Upstream commit 9be9a77a8352aee0bb74ac0d79f55e1238f76285 ] A bug in the NVidia driver can cause an UR HMI which fences bricks (links). At the moment we clear fence status only for bricks of a specific devices, however this does not appear to be enough and we need to clear fences for all bricks. This is ok as we do not allow using GPUs individually anyway. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Reza Arbab <arbab@linux.ibm.com> Signed-off-by: Oliver O'Halloran <oohall@gmail.com> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
-rw-r--r--hw/npu2-hw-procedures.c17
1 files changed, 12 insertions, 5 deletions
diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
index 6c8dee9..ba1f915 100644
--- a/hw/npu2-hw-procedures.c
+++ b/hw/npu2-hw-procedures.c
@@ -239,8 +239,8 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val)
/* Procedure 1.2.1 - Reset NPU/NDL */
uint32_t reset_ntl(struct npu2_dev *ndev)
{
- uint64_t val;
- int lane;
+ uint64_t val, check;
+ int lane, i;
set_iovalid(ndev, true);
@@ -258,10 +258,17 @@ uint32_t reset_ntl(struct npu2_dev *ndev)
/* Clear fence state for the brick */
val = npu2_read(ndev->npu, NPU2_MISC_FENCE_STATE);
- if (val & PPC_BIT(ndev->brick_index)) {
- NPU2DEVINF(ndev, "Clearing brick fence\n");
- val = PPC_BIT(ndev->brick_index);
+ if (val) {
+ NPU2DEVINF(ndev, "Clearing all bricks fence\n");
npu2_write(ndev->npu, NPU2_MISC_FENCE_STATE, val);
+ for (i = 0, check = 0; i < 4096; i++) {
+ check = npu2_read(ndev->npu, NPU2_NTL_CQ_FENCE_STATUS(ndev));
+ if (!check)
+ break;
+ }
+ if (check)
+ NPU2DEVERR(ndev, "Clearing NPU2_MISC_FENCE_STATE=0x%llx timeout, current=0x%llx\n",
+ val, check);
}
/* Write PRI */