aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/chiptod.c147
1 files changed, 133 insertions, 14 deletions
diff --git a/hw/chiptod.c b/hw/chiptod.c
index 24ab9aa..bb51a6d 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -76,6 +76,8 @@
#define TOD_ERR_OSC0_PARITY PPC_BIT(1)
#define TOD_ERR_OSC1_PARITY PPC_BIT(2)
#define TOD_ERR_CRITC_PARITY PPC_BIT(13)
+#define TOD_ERR_MP0_STEP_CHECK PPC_BIT(14)
+#define TOD_ERR_MP1_STEP_CHECK PPC_BIT(15)
#define TOD_ERR_PSS_HAMMING_DISTANCE PPC_BIT(18)
#define TOD_ERR_DELAY_COMPL_PARITY PPC_BIT(22)
/* CNTR errors */
@@ -92,6 +94,9 @@
#define TOD_ERR_TTYPE4_RECVD PPC_BIT(42)
#define TOD_ERR_TTYPE5_RECVD PPC_BIT(43)
+/* -- TOD Error interrupt register -- */
+#define TOD_ERROR_INJECT 0x00040031
+
/* Magic TB value. One step cycle ahead of sync */
#define INIT_TB 0x000000000001ff0
@@ -941,33 +946,147 @@ static bool chiptod_set_ttype4_mode(struct proc_chip *chip, bool enable)
return true;
}
+/* Stop TODs on slave chips in backup topology. */
+static void chiptod_stop_slave_tods(void)
+{
+ struct proc_chip *chip = NULL;
+ enum chiptod_topology backup_topo;
+ uint64_t terr = 0;
+
+ /* Inject TOD sync check error on salve TODs to stop them. */
+ terr |= TOD_ERR_TOD_SYNC_CHECK;
+
+ if (current_topology == chiptod_topo_primary)
+ backup_topo = chiptod_topo_secondary;
+ else
+ backup_topo = chiptod_topo_primary;
+
+ for_each_chip(chip) {
+ enum chiptod_chip_role role;
+
+ /* Current chip TOD is already in stooped state */
+ if (chip->id == this_cpu()->chip_id)
+ continue;
+
+ role = chiptod_get_chip_role(backup_topo, chip->id);
+
+ /* Skip backup master chip TOD. */
+ if (role == chiptod_chip_role_MDMT)
+ continue;
+
+ if (xscom_write(chip->id, TOD_ERROR_INJECT, terr) != 0)
+ prerror("CHIPTOD: XSCOM error writing TOD_ERROR_INJ\n");
+
+ if (chiptod_running_check(chip->id)) {
+ prlog(PR_DEBUG,
+ "CHIPTOD: Failed to stop TOD on slave CHIP [%d]\n",
+ chip->id);
+ }
+ }
+}
+
+static bool is_topology_switch_required(void)
+{
+ int32_t active_master_chip;
+ uint64_t tod_error;
+
+ active_master_chip = chiptod_get_active_master();
+
+ /* Check if TOD is running on Active master. */
+ if (chiptod_master_running())
+ return false;
+
+ /*
+ * Check if sync/step network is running.
+ *
+ * If sync/step network is not running on current active topology
+ * then we need switch topology to recover from TOD error.
+ */
+ if (!chiptod_sync_step_check_running(current_topology)) {
+ prlog(PR_DEBUG, "CHIPTOD: Sync/Step network not running\n");
+ return true;
+ }
+
+ /*
+ * Check if there is a step check error reported on
+ * Active master.
+ */
+ if (xscom_read(active_master_chip, TOD_ERROR, &tod_error) != 0) {
+ prerror("CHIPTOD: XSCOM error reading TOD_ERROR reg\n");
+ /*
+ * Can't do anything here. But we already found that
+ * sync/step network is running. Hence return false.
+ */
+ return false;
+ }
+
+ if (tod_error & TOD_ERR_MP0_STEP_CHECK) {
+ prlog(PR_DEBUG, "CHIPTOD: TOD step check error\n");
+ return true;
+ }
+
+ return false;
+}
+
/*
* Sync up TOD with other chips and get TOD in running state.
- * For non-master, we request TOD value from another chip.
- * For master chip, Switch the topology to recover.
+ * Check if current topology is active and running. If not, then
+ * trigger a topology switch.
*/
static int chiptod_start_tod(void)
{
struct proc_chip *chip = NULL;
int rc = 1;
- /* Handle TOD recovery on master chip. */
- if (this_cpu()->chip_id == chiptod_primary) {
+ /* Do a topology switch if required. */
+ if (is_topology_switch_required()) {
+ int32_t mchip = chiptod_get_active_master();
+
+ prlog(PR_DEBUG, "CHIPTOD: Need topology switch to recover\n");
/*
- * TOD is not running on master chip. We need to sync with
- * secondary chip TOD. But before we do that we need to
- * switch topology to make backup master as the new
- * active master. Once we switch the topology we can
- * then request TOD value from new master chip TOD.
- * But make sure we move local chiptod to Not Set before
- * request TOD value.
+ * There is a failure in StepSync network in current
+ * active topology. TOD is not running on active master chip.
+ * We need to sync with backup master chip TOD.
+ * But before we do that we need to switch topology to make
+ * backup master as the new active master. Once we switch the
+ * topology we can then request TOD value from new active
+ * master. But make sure we move local chiptod to Not Set
+ * before requesting TOD value.
+ *
+ * Before triggering a topology switch stop all slave TODs
+ * in backup topology.
*/
- if (xscom_writeme(TOD_TTYPE_1, (1UL << 63)) != 0) {
+ chiptod_stop_slave_tods();
+
+ if (xscom_write(mchip, TOD_TTYPE_1, (1UL << 63)) != 0) {
prerror("CHIPTOD: XSCOM error switching primary/secondary\n");
return 0;
}
- chiptod_primary = chiptod_secondary;
- chiptod_secondary = this_cpu()->chip_id;
+
+ /* Update topology info. */
+ current_topology = query_current_topology();
+ chiptod_update_topology(chiptod_topo_primary);
+ chiptod_update_topology(chiptod_topo_secondary);
+
+ /*
+ * We just switched topologies to recover.
+ * Check if new master TOD is running.
+ */
+ if (!chiptod_master_running()) {
+ prerror("CHIPTOD: TOD is not running on new master.\n");
+ return 0;
+ }
+
+ /*
+ * Enable step checkers on all Chip TODs
+ *
+ * During topology switch, step checkers are disabled
+ * on all Chip TODs by default. Enable them.
+ */
+ if (xscom_writeme(TOD_TTYPE_2, (1UL << 63)) != 0) {
+ prerror("CHIPTOD: XSCOM error enabling steppers\n");
+ return 0;
+ }
}
if (!chiptod_master_running()) {