diff options
author | Frederic Barrat <fbarrat@linux.ibm.com> | 2019-10-09 21:38:04 +0200 |
---|---|---|
committer | Vasant Hegde <hegdevasant@linux.vnet.ibm.com> | 2020-03-11 10:58:03 +0530 |
commit | 552e8c7524a8abc6f2c4e73d4585ee4a080599ef (patch) | |
tree | 504e1b9cc00356c454e43f11c1a1676835623d1e | |
parent | 890f3efd0da3a2098dcbb3c23ef46c683933eba7 (diff) | |
download | skiboot-552e8c7524a8abc6f2c4e73d4585ee4a080599ef.zip skiboot-552e8c7524a8abc6f2c4e73d4585ee4a080599ef.tar.gz skiboot-552e8c7524a8abc6f2c4e73d4585ee4a080599ef.tar.bz2 |
npu2-opencapi: Rework link training timeout
[ Upstream commit 2600cfac4db106b219deee042f53e8d9c54d857d ]
Opencapi link state should be polled for up to 3 seconds. Current code
assumes a tight retry loop during fundamental reset at boot, which is
not going to be true on link retraining. So update the timeout
detection code to use a timebase instead of a simple retry count which
could be way too long.
Reviewed-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
-rw-r--r-- | hw/npu2-opencapi.c | 9 | ||||
-rw-r--r-- | include/npu2.h | 2 |
2 files changed, 7 insertions, 4 deletions
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index 1b65220..671698a 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -1161,13 +1161,13 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot) reg = get_odl_status(chip_id, dev->brick_index); if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == OCAPI_LINK_STATE_TRAINED) { - OCAPIINF(dev, "link trained in %lld ms\n", - OCAPI_LINK_TRAINING_TIMEOUT - slot->retries); + OCAPIINF(dev, "link trained in %ld ms\n", + tb_to_msecs(mftb() - dev->train_start)); check_trained_link(dev, reg); pci_slot_set_state(slot, OCAPI_SLOT_LINK_TRAINED); return pci_slot_set_sm_timeout(slot, msecs_to_tb(1)); } - if (slot->retries-- == 0) + if (tb_compare(mftb(), dev->train_timeout) == TB_AAFTERB) return npu2_opencapi_retry_state(slot, reg); return pci_slot_set_sm_timeout(slot, msecs_to_tb(1)); @@ -1273,7 +1273,8 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot) /* Bump lanes - this improves training reliability */ npu2_opencapi_bump_ui_lane(dev); start_training(chip_id, dev); - slot->retries = OCAPI_LINK_TRAINING_TIMEOUT; + dev->train_start = mftb(); + dev->train_timeout = dev->train_start + msecs_to_tb(OCAPI_LINK_TRAINING_TIMEOUT); pci_slot_set_state(slot, OCAPI_SLOT_LINK_START); return slot->ops.poll_link(slot); diff --git a/include/npu2.h b/include/npu2.h index aac7e7a..d2316dc 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -147,6 +147,8 @@ struct npu2_dev { uint64_t linux_pe; bool train_need_fence; bool train_fenced; + unsigned long train_start; + unsigned long train_timeout; }; struct npu2 { |