aboutsummaryrefslogtreecommitdiff
path: root/hw/chiptod.c
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2015-06-05 23:38:39 +0530
committerStewart Smith <stewart@linux.vnet.ibm.com>2015-07-09 16:18:38 +1000
commitdf9197e3c165b9a906785fdf38476bda7278a831 (patch)
tree9ea7c8ecf55321379e57e229d76649ee5c714fa7 /hw/chiptod.c
parent451bd968b8225c82d9cb5aff8ac2c7b24113440c (diff)
downloadskiboot-df9197e3c165b9a906785fdf38476bda7278a831.zip
skiboot-df9197e3c165b9a906785fdf38476bda7278a831.tar.gz
skiboot-df9197e3c165b9a906785fdf38476bda7278a831.tar.bz2
opal: Refactor TOD topology failover recovery.
The current code does not correctly identify need for topology switch and forces the TOD topology switch even when it is not required do so. This patch introduces a check to find out if sync/step network is running and there is no step check error reported on active master. If this check fails, then we need to trigger a topology switch to recover from TOD error. But before triggering topology switch, make sure that all slave chip TODs are stopped except backup master chip. During the topology switch step checkers are disabled and stays disabled even after the switch. This causes future step check errors to go undetected. Hence, make sure that step checkers are enabled on all TODs after topology switch. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'hw/chiptod.c')
-rw-r--r--hw/chiptod.c147
1 files changed, 133 insertions, 14 deletions
diff --git a/hw/chiptod.c b/hw/chiptod.c
index 24ab9aa..bb51a6d 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -76,6 +76,8 @@
#define TOD_ERR_OSC0_PARITY PPC_BIT(1)
#define TOD_ERR_OSC1_PARITY PPC_BIT(2)
#define TOD_ERR_CRITC_PARITY PPC_BIT(13)
+#define TOD_ERR_MP0_STEP_CHECK PPC_BIT(14)
+#define TOD_ERR_MP1_STEP_CHECK PPC_BIT(15)
#define TOD_ERR_PSS_HAMMING_DISTANCE PPC_BIT(18)
#define TOD_ERR_DELAY_COMPL_PARITY PPC_BIT(22)
/* CNTR errors */
@@ -92,6 +94,9 @@
#define TOD_ERR_TTYPE4_RECVD PPC_BIT(42)
#define TOD_ERR_TTYPE5_RECVD PPC_BIT(43)
+/* -- TOD Error interrupt register -- */
+#define TOD_ERROR_INJECT 0x00040031
+
/* Magic TB value. One step cycle ahead of sync */
#define INIT_TB 0x000000000001ff0
@@ -941,33 +946,147 @@ static bool chiptod_set_ttype4_mode(struct proc_chip *chip, bool enable)
return true;
}
+/* Stop TODs on slave chips in backup topology. */
+static void chiptod_stop_slave_tods(void)
+{
+ struct proc_chip *chip = NULL;
+ enum chiptod_topology backup_topo;
+ uint64_t terr = 0;
+
+ /* Inject TOD sync check error on salve TODs to stop them. */
+ terr |= TOD_ERR_TOD_SYNC_CHECK;
+
+ if (current_topology == chiptod_topo_primary)
+ backup_topo = chiptod_topo_secondary;
+ else
+ backup_topo = chiptod_topo_primary;
+
+ for_each_chip(chip) {
+ enum chiptod_chip_role role;
+
+ /* Current chip TOD is already in stooped state */
+ if (chip->id == this_cpu()->chip_id)
+ continue;
+
+ role = chiptod_get_chip_role(backup_topo, chip->id);
+
+ /* Skip backup master chip TOD. */
+ if (role == chiptod_chip_role_MDMT)
+ continue;
+
+ if (xscom_write(chip->id, TOD_ERROR_INJECT, terr) != 0)
+ prerror("CHIPTOD: XSCOM error writing TOD_ERROR_INJ\n");
+
+ if (chiptod_running_check(chip->id)) {
+ prlog(PR_DEBUG,
+ "CHIPTOD: Failed to stop TOD on slave CHIP [%d]\n",
+ chip->id);
+ }
+ }
+}
+
+static bool is_topology_switch_required(void)
+{
+ int32_t active_master_chip;
+ uint64_t tod_error;
+
+ active_master_chip = chiptod_get_active_master();
+
+ /* Check if TOD is running on Active master. */
+ if (chiptod_master_running())
+ return false;
+
+ /*
+ * Check if sync/step network is running.
+ *
+ * If sync/step network is not running on current active topology
+ * then we need switch topology to recover from TOD error.
+ */
+ if (!chiptod_sync_step_check_running(current_topology)) {
+ prlog(PR_DEBUG, "CHIPTOD: Sync/Step network not running\n");
+ return true;
+ }
+
+ /*
+ * Check if there is a step check error reported on
+ * Active master.
+ */
+ if (xscom_read(active_master_chip, TOD_ERROR, &tod_error) != 0) {
+ prerror("CHIPTOD: XSCOM error reading TOD_ERROR reg\n");
+ /*
+ * Can't do anything here. But we already found that
+ * sync/step network is running. Hence return false.
+ */
+ return false;
+ }
+
+ if (tod_error & TOD_ERR_MP0_STEP_CHECK) {
+ prlog(PR_DEBUG, "CHIPTOD: TOD step check error\n");
+ return true;
+ }
+
+ return false;
+}
+
/*
* Sync up TOD with other chips and get TOD in running state.
- * For non-master, we request TOD value from another chip.
- * For master chip, Switch the topology to recover.
+ * Check if current topology is active and running. If not, then
+ * trigger a topology switch.
*/
static int chiptod_start_tod(void)
{
struct proc_chip *chip = NULL;
int rc = 1;
- /* Handle TOD recovery on master chip. */
- if (this_cpu()->chip_id == chiptod_primary) {
+ /* Do a topology switch if required. */
+ if (is_topology_switch_required()) {
+ int32_t mchip = chiptod_get_active_master();
+
+ prlog(PR_DEBUG, "CHIPTOD: Need topology switch to recover\n");
/*
- * TOD is not running on master chip. We need to sync with
- * secondary chip TOD. But before we do that we need to
- * switch topology to make backup master as the new
- * active master. Once we switch the topology we can
- * then request TOD value from new master chip TOD.
- * But make sure we move local chiptod to Not Set before
- * request TOD value.
+ * There is a failure in StepSync network in current
+ * active topology. TOD is not running on active master chip.
+ * We need to sync with backup master chip TOD.
+ * But before we do that we need to switch topology to make
+ * backup master as the new active master. Once we switch the
+ * topology we can then request TOD value from new active
+ * master. But make sure we move local chiptod to Not Set
+ * before requesting TOD value.
+ *
+ * Before triggering a topology switch stop all slave TODs
+ * in backup topology.
*/
- if (xscom_writeme(TOD_TTYPE_1, (1UL << 63)) != 0) {
+ chiptod_stop_slave_tods();
+
+ if (xscom_write(mchip, TOD_TTYPE_1, (1UL << 63)) != 0) {
prerror("CHIPTOD: XSCOM error switching primary/secondary\n");
return 0;
}
- chiptod_primary = chiptod_secondary;
- chiptod_secondary = this_cpu()->chip_id;
+
+ /* Update topology info. */
+ current_topology = query_current_topology();
+ chiptod_update_topology(chiptod_topo_primary);
+ chiptod_update_topology(chiptod_topo_secondary);
+
+ /*
+ * We just switched topologies to recover.
+ * Check if new master TOD is running.
+ */
+ if (!chiptod_master_running()) {
+ prerror("CHIPTOD: TOD is not running on new master.\n");
+ return 0;
+ }
+
+ /*
+ * Enable step checkers on all Chip TODs
+ *
+ * During topology switch, step checkers are disabled
+ * on all Chip TODs by default. Enable them.
+ */
+ if (xscom_writeme(TOD_TTYPE_2, (1UL << 63)) != 0) {
+ prerror("CHIPTOD: XSCOM error enabling steppers\n");
+ return 0;
+ }
}
if (!chiptod_master_running()) {