diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2017-12-20 13:16:23 +1100 |
---|---|---|
committer | Stewart Smith <stewart@linux.vnet.ibm.com> | 2017-12-20 22:15:36 -0600 |
commit | 76d9bcdca58936d761458f8f05960239c4dd8dec (patch) | |
tree | c8377b11be33e62d312810645b8044d3a6f427aa | |
parent | ca612b802adac0c72cd0f10c51a51275e5914101 (diff) | |
download | skiboot-76d9bcdca58936d761458f8f05960239c4dd8dec.zip skiboot-76d9bcdca58936d761458f8f05960239c4dd8dec.tar.gz skiboot-76d9bcdca58936d761458f8f05960239c4dd8dec.tar.bz2 |
lock: Add additional lock auditing code
Keep track of lock owner name and replace lock_depth counter
with a per-cpu list of locks held by the cpu.
This allows us to print the actual locks held in case we hit
the (in)famous message about opal_pollers being run with a
lock held.
It also allows us to warn (and drop them) if locks are still
held when returning to the OS or completing a scheduled job.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
[stewart: fix unit tests]
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
-rw-r--r-- | core/cpu.c | 6 | ||||
-rw-r--r-- | core/lock.c | 41 | ||||
-rw-r--r-- | core/opal.c | 8 | ||||
-rw-r--r-- | core/stack.c | 1 | ||||
-rw-r--r-- | core/test/run-malloc-speed.c | 3 | ||||
-rw-r--r-- | core/test/run-malloc.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_range_is_reserved.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region_init.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region_next.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region_release_unused.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region_release_unused_noalloc.c | 3 | ||||
-rw-r--r-- | core/test/run-mem_region_reservations.c | 3 | ||||
-rw-r--r-- | core/test/run-msg.c | 3 | ||||
-rw-r--r-- | core/test/run-timer.c | 6 | ||||
-rw-r--r-- | core/test/run-trace.c | 3 | ||||
-rw-r--r-- | core/timebase.c | 2 | ||||
-rw-r--r-- | coverity-model.c | 3 | ||||
-rw-r--r-- | hdata/test/stubs.c | 2 | ||||
-rw-r--r-- | include/cpu.h | 2 | ||||
-rw-r--r-- | include/lock.h | 33 |
21 files changed, 106 insertions, 31 deletions
@@ -285,6 +285,11 @@ void cpu_process_jobs(void) if (no_return) free(job); func(data); + if (!list_empty(&cpu->locks_held)) { + prlog(PR_ERR, "OPAL job %s returning with locks held\n", + job->name); + drop_my_locks(true); + } lock(&cpu->job_lock); if (!no_return) { cpu->job_count--; @@ -822,6 +827,7 @@ static void init_cpu_thread(struct cpu_thread *t, init_lock(&t->dctl_lock); init_lock(&t->job_lock); list_head_init(&t->job_queue); + list_head_init(&t->locks_held); t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir; t->state = state; t->pir = pir; diff --git a/core/lock.c b/core/lock.c index b5e3323..edfe1c7 100644 --- a/core/lock.c +++ b/core/lock.c @@ -57,8 +57,8 @@ static void unlock_check(struct lock *l) if (l->in_con_path && this_cpu()->con_suspend == 0) lock_error(l, "Unlock con lock with console not suspended", 3); - if (this_cpu()->lock_depth == 0) - lock_error(l, "Releasing lock with 0 depth", 4); + if (list_empty(&this_cpu()->locks_held)) + lock_error(l, "Releasing lock we don't hold depth", 4); } #else @@ -89,7 +89,7 @@ static inline bool __try_lock(struct cpu_thread *cpu, struct lock *l) return false; } -bool try_lock(struct lock *l) +bool try_lock_caller(struct lock *l, const char *owner) { struct cpu_thread *cpu = this_cpu(); @@ -97,22 +97,23 @@ bool try_lock(struct lock *l) return true; if (__try_lock(cpu, l)) { + l->owner = owner; if (l->in_con_path) cpu->con_suspend++; - cpu->lock_depth++; + list_add(&cpu->locks_held, &l->list); return true; } return false; } -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *owner) { if (bust_locks) return; lock_check(l); for (;;) { - if (try_lock(l)) + if (try_lock_caller(l, owner)) break; smt_lowest(); while (l->lock_val) @@ -130,8 +131,9 @@ void unlock(struct lock *l) unlock_check(l); + l->owner = NULL; + list_del(&l->list); lwsync(); - this_cpu()->lock_depth--; l->lock_val = 0; /* WARNING: On fast reboot, we can be reset right at that @@ -144,7 +146,7 @@ void unlock(struct lock *l) } } -bool lock_recursive(struct lock *l) +bool lock_recursive_caller(struct lock *l, const char *caller) { if (bust_locks) return false; @@ -152,7 +154,7 @@ bool lock_recursive(struct lock *l) if (lock_held_by_me(l)) return false; - lock(l); + lock_caller(l, caller); return true; } @@ -160,3 +162,24 @@ void init_locks(void) { bust_locks = false; } + +void dump_locks_list(void) +{ + struct lock *l; + + prlog(PR_ERR, "Locks held:\n"); + list_for_each(&this_cpu()->locks_held, l, list) + prlog(PR_ERR, " %s\n", l->owner); +} + +void drop_my_locks(bool warn) +{ + struct lock *l; + + while((l = list_pop(&this_cpu()->locks_held, struct lock, list)) != NULL) { + if (warn) + prlog(PR_ERR, " %s\n", l->owner); + unlock(l); + } +} + diff --git a/core/opal.c b/core/opal.c index d33d527..1bca774 100644 --- a/core/opal.c +++ b/core/opal.c @@ -182,6 +182,11 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe) printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n", mfspr(SPR_PIR), cpu, cpu->pir, token, retval); } else { + if (!list_empty(&cpu->locks_held)) { + prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n", + token, retval); + drop_my_locks(true); + } sync(); /* release barrier vs quiescing */ cpu->in_opal_call--; } @@ -557,7 +562,7 @@ void opal_run_pollers(void) } this_cpu()->in_poller = true; - if (this_cpu()->lock_depth && pollers_with_lock_warnings < 64) { + if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) { /** * @fwts-label OPALPollerWithLock * @fwts-advice opal_run_pollers() was called with a lock @@ -565,6 +570,7 @@ void opal_run_pollers(void) * lucky/careful. */ prlog(PR_ERR, "Running pollers with lock held !\n"); + dump_locks_list(); backtrace(); pollers_with_lock_warnings++; if (pollers_with_lock_warnings == 64) { diff --git a/core/stack.c b/core/stack.c index bb7fa62..af4d37d 100644 --- a/core/stack.c +++ b/core/stack.c @@ -21,6 +21,7 @@ #include <stack.h> #include <mem_region.h> #include <unistd.h> +#include <lock.h> #define STACK_BUF_ENTRIES 60 static struct bt_entry bt_buf[STACK_BUF_ENTRIES]; diff --git a/core/test/run-malloc-speed.c b/core/test/run-malloc-speed.c index 279216e..d842bd6 100644 --- a/core/test/run-malloc-speed.c +++ b/core/test/run-malloc-speed.c @@ -55,8 +55,9 @@ static inline void real_free(void *p) char __rodata_start[1], __rodata_end[1]; struct dt_node *dt_root; -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val = 1; } diff --git a/core/test/run-malloc.c b/core/test/run-malloc.c index d79e6f9..2feaacb 100644 --- a/core/test/run-malloc.c +++ b/core/test/run-malloc.c @@ -57,8 +57,9 @@ static inline void real_free(void *p) struct dt_node *dt_root; -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val = 1; } diff --git a/core/test/run-mem_range_is_reserved.c b/core/test/run-mem_range_is_reserved.c index 95c790d..37f7db3 100644 --- a/core/test/run-mem_range_is_reserved.c +++ b/core/test/run-mem_range_is_reserved.c @@ -57,8 +57,9 @@ static void real_free(void *p) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val++; } diff --git a/core/test/run-mem_region.c b/core/test/run-mem_region.c index 6b7f6fb..f2506d6 100644 --- a/core/test/run-mem_region.c +++ b/core/test/run-mem_region.c @@ -55,8 +55,9 @@ static inline void real_free(void *p) struct dt_node *dt_root; -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val++; } diff --git a/core/test/run-mem_region_init.c b/core/test/run-mem_region_init.c index ee7e189..d4265af 100644 --- a/core/test/run-mem_region_init.c +++ b/core/test/run-mem_region_init.c @@ -63,8 +63,9 @@ static inline char *skiboot_strdup(const char *str) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val = 1; } diff --git a/core/test/run-mem_region_next.c b/core/test/run-mem_region_next.c index 7daa269..72d02a9 100644 --- a/core/test/run-mem_region_next.c +++ b/core/test/run-mem_region_next.c @@ -52,8 +52,9 @@ static void real_free(void *p) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val++; } diff --git a/core/test/run-mem_region_release_unused.c b/core/test/run-mem_region_release_unused.c index 712f98a..4941453 100644 --- a/core/test/run-mem_region_release_unused.c +++ b/core/test/run-mem_region_release_unused.c @@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused))) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; l->lock_val++; } diff --git a/core/test/run-mem_region_release_unused_noalloc.c b/core/test/run-mem_region_release_unused_noalloc.c index a79485b..1ad4abb 100644 --- a/core/test/run-mem_region_release_unused_noalloc.c +++ b/core/test/run-mem_region_release_unused_noalloc.c @@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused))) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; l->lock_val++; } diff --git a/core/test/run-mem_region_reservations.c b/core/test/run-mem_region_reservations.c index 584b7c3..f593d9a 100644 --- a/core/test/run-mem_region_reservations.c +++ b/core/test/run-mem_region_reservations.c @@ -57,8 +57,9 @@ static void real_free(void *p) #include <assert.h> #include <stdio.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val++; } diff --git a/core/test/run-msg.c b/core/test/run-msg.c index 2cee515..67418a9 100644 --- a/core/test/run-msg.c +++ b/core/test/run-msg.c @@ -41,8 +41,9 @@ static void *zalloc(size_t size) #include "../opal-msg.c" #include <skiboot.h> -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val = 1; } diff --git a/core/test/run-timer.c b/core/test/run-timer.c index 0270cfe..e45cf63 100644 --- a/core/test/run-timer.c +++ b/core/test/run-timer.c @@ -12,7 +12,11 @@ static uint64_t stamp, last; struct lock; -static inline void lock(struct lock *l) { (void)l; } +static inline void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + (void)l; +} static inline void unlock(struct lock *l) { (void)l; } unsigned long tb_hz = 512000000; diff --git a/core/test/run-trace.c b/core/test/run-trace.c index c319c05..dd4cd45 100644 --- a/core/test/run-trace.c +++ b/core/test/run-trace.c @@ -113,8 +113,9 @@ struct debug_descriptor debug_descriptor = { .trace_mask = -1 }; -void lock(struct lock *l) +void lock_caller(struct lock *l, const char *caller) { + (void)caller; assert(!l->lock_val); l->lock_val = 1; } diff --git a/core/timebase.c b/core/timebase.c index f2dff44..777e4ba 100644 --- a/core/timebase.c +++ b/core/timebase.c @@ -53,7 +53,7 @@ void time_wait(unsigned long duration) { struct cpu_thread *c = this_cpu(); - if (this_cpu()->lock_depth) { + if (!list_empty(&this_cpu()->locks_held)) { time_wait_nopoll(duration); return; } diff --git a/coverity-model.c b/coverity-model.c index b11f3bf..ca23751 100644 --- a/coverity-model.c +++ b/coverity-model.c @@ -10,7 +10,8 @@ void mem_free(struct mem_region *region, void *mem, const char *location) { __coverity_free__(mem); } -void lock(struct lock *l) { +void lock_caller(struct lock *l, const char *caller) +{ __coverity_exclusive_lock_acquire__(l); } diff --git a/hdata/test/stubs.c b/hdata/test/stubs.c index abeb832..92098bf 100644 --- a/hdata/test/stubs.c +++ b/hdata/test/stubs.c @@ -109,7 +109,7 @@ static bool false_stub(void) { return false; } #define NOOP_STUB FALSE_STUB TRUE_STUB(lock_held_by_me); -NOOP_STUB(lock); +NOOP_STUB(lock_caller); NOOP_STUB(unlock); NOOP_STUB(early_uart_init); NOOP_STUB(mem_reserve_fw); diff --git a/include/cpu.h b/include/cpu.h index e3bc75f..bec4b03 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -61,8 +61,8 @@ struct cpu_thread { uint64_t save_r1; void *icp_regs; uint32_t in_opal_call; - uint32_t lock_depth; uint32_t con_suspend; + struct list_head locks_held; bool con_need_flush; bool quiesce_opal_call; bool in_mcount; diff --git a/include/lock.h b/include/lock.h index 7bdfca2..b187573 100644 --- a/include/lock.h +++ b/include/lock.h @@ -20,6 +20,8 @@ #include <stdbool.h> #include <processor.h> #include <cmpxchg.h> +#include <ccan/list/list.h> +#include <ccan/str/str.h> struct lock { /* Lock value has bit 63 as lock bit and the PIR of the owner @@ -32,10 +34,19 @@ struct lock { * in which case taking it will suspend console flushing */ bool in_con_path; + + /* file/line of lock owner */ + const char *owner; + + /* linkage in per-cpu list of owned locks */ + struct list_node list; }; -/* Initializer */ -#define LOCK_UNLOCKED { .lock_val = 0, .in_con_path = 0 } +/* Initializer... not ideal but works for now. If we need different + * values for the fields and/or start getting warnings we'll have to + * play macro tricks + */ +#define LOCK_UNLOCKED { 0 } /* Note vs. libc and locking: * @@ -65,8 +76,14 @@ static inline void init_lock(struct lock *l) *l = (struct lock)LOCK_UNLOCKED; } -extern bool try_lock(struct lock *l); -extern void lock(struct lock *l); +#define LOCK_CALLER __FILE__ ":" stringify(__LINE__) + +#define try_lock(l) try_lock_caller(l, LOCK_CALLER) +#define lock(l) lock_caller(l, LOCK_CALLER) +#define lock_recursive(l) lock_recursive_caller(l, LOCK_CALLER) + +extern bool try_lock_caller(struct lock *l, const char *caller); +extern void lock_caller(struct lock *l, const char *caller); extern void unlock(struct lock *l); extern bool lock_held_by_me(struct lock *l); @@ -77,9 +94,15 @@ extern bool lock_held_by_me(struct lock *l); * returns false if the lock was already held by this cpu. If it returns * true, then the caller shall release it when done. */ -extern bool lock_recursive(struct lock *l); +extern bool lock_recursive_caller(struct lock *l, const char *caller); /* Called after per-cpu data structures are available */ extern void init_locks(void); +/* Dump the list of locks held by this CPU */ +extern void dump_locks_list(void); + +/* Clean all locks held by CPU (and warn if any) */ +extern void drop_my_locks(bool warn); + #endif /* __LOCK_H */ |