lock: Fix interactions between lock dependency checker and stack checker

The lock dependency checker does a few nasty things that can cause re-entrancy deadlocks in conjunction with the stack checker or in fact other debug tests. A lot of it revolves around taking a new lock (dl_lock) as part of the locking process. This tries to fix it by making sure we do not hit the stack checker while holding dl_lock. We achieve that in part by directly using the low-level __try_lock and manually unlocking on the dl_lock, and making some functions "nomcount". In addition, we mark the dl_lock as being in the console path to avoid deadlocks with the UART driver. We move the enabling of the deadlock checker to a separate config option from DEBUG_LOCKS as well, in case we chose to disable it by default later on. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
author: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2018-08-15 15:10:38 +1000
committer: Stewart Smith <stewart@linux.ibm.com> 2018-08-16 18:41:17 +1000
commit: 2f2e0ee95a3da96e669b40dcb41ac14177dcac8d (patch)
tree: ce6b551bfda50081d30d8f261d6684508376080e /core
parent: 2925dd08c5e39e5c82286dc65a15fce6623694b2 (diff)
download: skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.zip
skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.tar.gz
skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.tar.bz2
2 files changed, 37 insertions, 15 deletions
diff --git a/core/cpu.c b/core/cpu.c
index 9d75329..c6a1bf9 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -714,6 +714,13 @@ struct cpu_thread *find_cpu_by_pir(u32 pir)
 	return &cpu_stacks[pir].cpu;
 }
 
+struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir)
+{
+	if (pir > cpu_max_pir)
+		return NULL;
+	return &cpu_stacks[pir].cpu;
+}
+
 struct cpu_thread *find_cpu_by_server(u32 server_no)
 {
 	struct cpu_thread *t;
diff --git a/core/lock.c b/core/lock.c
index 1fc71a9..fca8f46 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -29,9 +29,8 @@
 bool bust_locks = true;
 
 #ifdef DEBUG_LOCKS
-static struct lock dl_lock = LOCK_UNLOCKED;
 
-static void lock_error(struct lock *l, const char *reason, uint16_t err)
+static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
 {
 	bust_locks = true;
 
@@ -42,13 +41,13 @@ static void lock_error(struct lock *l, const char *reason, uint16_t err)
 	abort();
 }
 
-static void lock_check(struct lock *l)
+static inline void __nomcount lock_check(struct lock *l)
 {
 	if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir)
 		lock_error(l, "Invalid recursive lock", 0);
 }
 
-static void unlock_check(struct lock *l)
+static inline void __nomcount unlock_check(struct lock *l)
 {
 	if (!(l->lock_val & 1))
 		lock_error(l, "Unlocking unlocked lock", 1);
@@ -102,8 +101,21 @@ static inline bool lock_timeout(unsigned long start)
 	return false;
 }
 #else
+static inline void lock_check(struct lock *l) { };
+static inline void unlock_check(struct lock *l) { };
+static inline bool lock_timeout(unsigned long s) { return false; }
+#endif /* DEBUG_LOCKS */
+
+#if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS)
+
+static struct lock dl_lock = {
+	.lock_val = 0,
+	.in_con_path = true,
+	.owner = LOCK_CALLER
+};
+
 /* Find circular dependencies in the lock requests. */
-static bool check_deadlock(void)
+static __nomcount inline bool check_deadlock(void)
 {
 	uint32_t lock_owner, start, i;
 	struct cpu_thread *next_cpu;
@@ -126,7 +138,7 @@ static bool check_deadlock(void)
 		if (lock_owner == start)
 			return true;
 
-		next_cpu = find_cpu_by_pir(lock_owner);
+		next_cpu = find_cpu_by_pir_nomcount(lock_owner);
 
 		if (!next_cpu)
 			return false;
@@ -141,6 +153,7 @@ static bool check_deadlock(void)
 static void add_lock_request(struct lock *l)
 {
 	struct cpu_thread *curr = this_cpu();
+	bool dead;
 
 	if (curr->state != cpu_state_active &&
 	    curr->state != cpu_state_os)
@@ -148,10 +161,12 @@ static void add_lock_request(struct lock *l)
 
 	/*
 	 * For deadlock detection we must keep the lock states constant
-	 * while doing the deadlock check.
+	 * while doing the deadlock check. However we need to avoid
+	 * clashing with the stack checker, so no mcount and use an
+	 * inline implementation of the lock for the dl_lock
 	 */
 	for (;;) {
-		if (try_lock(&dl_lock))
+		if (__try_lock(curr, &dl_lock))
 			break;
 		smt_lowest();
 		while (dl_lock.lock_val)
@@ -161,10 +176,13 @@ static void add_lock_request(struct lock *l)
 
 	curr->requested_lock = l;
 
-	if (check_deadlock())
-		lock_error(l, "Deadlock detected", 0);
+	dead = check_deadlock();
 
-	unlock(&dl_lock);
+	lwsync();
+	dl_lock.lock_val = 0;
+
+	if (dead)
+		lock_error(l, "Deadlock detected", 0);
 }
 
 static void remove_lock_request(void)
@@ -172,12 +190,9 @@ static void remove_lock_request(void)
 	this_cpu()->requested_lock = NULL;
 }
 #else
-static inline void lock_check(struct lock *l) { };
-static inline void unlock_check(struct lock *l) { };
 static inline void add_lock_request(struct lock *l) { };
 static inline void remove_lock_request(void) { };
-static inline bool lock_timeout(unsigned long s) { return false; }
-#endif /* DEBUG_LOCKS */
+#endif /* #if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) */
 
 bool lock_held_by_me(struct lock *l)
 {
author	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2018-08-15 15:10:38 +1000
committer	Stewart Smith <stewart@linux.ibm.com>	2018-08-16 18:41:17 +1000
commit	2f2e0ee95a3da96e669b40dcb41ac14177dcac8d (patch)
tree	ce6b551bfda50081d30d8f261d6684508376080e /core
parent	2925dd08c5e39e5c82286dc65a15fce6623694b2 (diff)
download	skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.zip skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.tar.gz skiboot-2f2e0ee95a3da96e669b40dcb41ac14177dcac8d.tar.bz2