opal/hmi: Do not send HMI event if no errors are found.

For TOD errors, all the cores in the chip get HMIs. Any one thread from any core can fix the issue and TFMR will have error conditions cleared. Rest of the threads need take any action if TOD errors are already cleared. Hence thread 0 of every core should get a fresh copy of TFMR before going ahead recovery path. Initialize recover = -1, so that if no errors found that thread need not send a HMI event to linux. This helps in stop flooding host with hmi event by every thread even there are no errors found. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 2018-04-16 23:03:49 +0530
committer: Stewart Smith <stewart@linux.ibm.com> 2018-04-17 03:52:10 -0500
commit: 67d738807da0bbd6fe73e30d25753b6de1299df8 (patch)
tree: ffa42db384950ac15ec042abe070452363839d7d /core/hmi.c
parent: 5e20a789d021dd14bb30439cd9b3beb3dfeed9b7 (diff)
download: skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.zip
skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.tar.gz
skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.tar.bz2
1 files changed, 13 insertions, 8 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 95ab96c..eadb75b 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -955,7 +955,7 @@ static int handle_thread_tfac_error(uint64_t tfmr, uint64_t *out_flags)
 static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
 {
 	struct cpu_thread *t, *t0;
-	int recover = 1;
+	int recover = -1;
 
 	t = this_cpu();
 	t0 = find_cpu_by_pir(cpu_get_thread0(t));
@@ -975,11 +975,15 @@ static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
 	if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
 		/* Check if it's still in error state */
 		if (mfspr(SPR_TFMR) & SPR_TFMR_TFMR_CORRUPT)
-			if (!recover_corrupt_tfmr())
+			if (!recover_corrupt_tfmr()) {
+				unlock(&hmi_lock);
 				recover = 0;
+			}
 
-		if (!recover)
+		if (!recover) {
+			unlock(&hmi_lock);
 			goto error_out;
+		}
 
 		tfmr = mfspr(SPR_TFMR);
 
@@ -988,8 +992,10 @@ static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
 			recover = handle_thread_tfac_error(tfmr, out_flags);
 			tfmr &= ~SPR_TFMR_THREAD_ERRORS;
 		}
-		if (!recover)
+		if (!recover) {
+			unlock(&hmi_lock);
 			goto error_out;
+		}
 	}
 
 	/* Tell the OS ... */
@@ -1023,8 +1029,7 @@ static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
 
 	/* Now perform the actual TB recovery on thread 0 */
 	if (t == t0)
-		recover = chiptod_recover_tb_errors(tfmr,
-						&this_cpu()->tb_resynced);
+		recover = chiptod_recover_tb_errors(&this_cpu()->tb_resynced);
 
 error_out:
 	/* Last rendez-vous */
@@ -1043,7 +1048,7 @@ error_out:
 static int handle_tfac_errors(uint64_t hmer, struct OpalHMIEvent *hmi_evt,
 			      uint64_t *out_flags)
 {
-	int recover = 1;
+	int recover = -1;
 	uint64_t tfmr = mfspr(SPR_TFMR);
 
 	/* A TFMR parity error makes us ignore all the local stuff */
@@ -1106,7 +1111,7 @@ static int handle_tfac_errors(uint64_t hmer, struct OpalHMIEvent *hmi_evt,
 						mfspr(SPR_TFMR));
 	}
 
-	if (hmi_evt) {
+	if (recover != -1 && hmi_evt) {
 		hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC;
 		hmi_evt->type = OpalHMI_ERROR_TFAC;
 		hmi_evt->tfmr = tfmr;
author	Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>	2018-04-16 23:03:49 +0530
committer	Stewart Smith <stewart@linux.ibm.com>	2018-04-17 03:52:10 -0500
commit	67d738807da0bbd6fe73e30d25753b6de1299df8 (patch)
tree	ffa42db384950ac15ec042abe070452363839d7d /core/hmi.c
parent	5e20a789d021dd14bb30439cd9b3beb3dfeed9b7 (diff)
download	skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.zip skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.tar.gz skiboot-67d738807da0bbd6fe73e30d25753b6de1299df8.tar.bz2