diff options
author | Dmitry Vyukov <dvyukov@google.com> | 2021-07-30 08:35:11 +0200 |
---|---|---|
committer | Dmitry Vyukov <dvyukov@google.com> | 2021-07-30 11:39:38 +0200 |
commit | 97795be22f634667ce7a022398c59ccc9f7440eb (patch) | |
tree | ed4186259576a6c928bfd3738f46c5bd9f2ba4e6 | |
parent | dbe36e4073e708816cbeb693ea52832f54f52f2a (diff) | |
download | llvm-97795be22f634667ce7a022398c59ccc9f7440eb.zip llvm-97795be22f634667ce7a022398c59ccc9f7440eb.tar.gz llvm-97795be22f634667ce7a022398c59ccc9f7440eb.tar.bz2 |
tsan: optimize test-only barrier
The updated lots_of_threads.c test with 300 threads
started running for too long on machines with low
hardware parallelism (e.g. taskset -c 0-1).
On lots of CPUs it finishes in ~2 secs. But with
taskset -c 0-1 it runs for hundreds of seconds
effectively spinning in the barrier in the sleep loop.
We now have the handy futex API in sanitizer_common.
Use it instead of the passive spin loop.
It makes the test run only faster with taskset -c 0-1,
it runs for ~1.5 secs, while with full parallelism
it still runs for ~2 secs (but consumes less CPU time).
Depends on D107131.
Reviewed By: vitalybuka
Differential Revision: https://reviews.llvm.org/D107132
-rw-r--r-- | compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp | 40 | ||||
-rw-r--r-- | compiler-rt/test/tsan/test.h | 2 |
2 files changed, 24 insertions, 18 deletions
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 2efc75b..dfceee0 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2916,30 +2916,36 @@ void InitializeInterceptors() { // Note that no_sanitize_thread attribute does not turn off atomic interception // so attaching it to the function defined in user code does not help. // That's why we now have what we have. -constexpr uptr kBarrierThreadBits = 10; -constexpr uptr kBarrierThreads = 1 << kBarrierThreadBits; +constexpr u32 kBarrierThreadBits = 10; +constexpr u32 kBarrierThreads = 1 << kBarrierThreadBits; -extern "C" SANITIZER_INTERFACE_ATTRIBUTE -void __tsan_testonly_barrier_init(u64 *barrier, u32 count) { - if (count >= kBarrierThreads) { - Printf("barrier_init: count is too large (%d)\n", count); +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_init( + atomic_uint32_t *barrier, u32 num_threads) { + if (num_threads >= kBarrierThreads) { + Printf("barrier_init: count is too large (%d)\n", num_threads); Die(); } // kBarrierThreadBits lsb is thread count, // the remaining are count of entered threads. - *barrier = count; + atomic_store(barrier, num_threads, memory_order_relaxed); } -extern "C" SANITIZER_INTERFACE_ATTRIBUTE -void __tsan_testonly_barrier_wait(u64 *barrier) { - constexpr uptr kThreadMask = kBarrierThreads - 1; - unsigned old = __atomic_fetch_add(barrier, kBarrierThreads, __ATOMIC_RELAXED); - unsigned old_epoch = (old >> kBarrierThreadBits) / (old & kThreadMask); +static u32 barrier_epoch(u32 value) { + return (value >> kBarrierThreadBits) / (value & (kBarrierThreads - 1)); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_wait( + atomic_uint32_t *barrier) { + u32 old = atomic_fetch_add(barrier, kBarrierThreads, memory_order_relaxed); + u32 old_epoch = barrier_epoch(old); + if (barrier_epoch(old + kBarrierThreads) != old_epoch) { + FutexWake(barrier, (1 << 30)); + return; + } for (;;) { - unsigned cur = __atomic_load_n(barrier, __ATOMIC_RELAXED); - unsigned cur_epoch = (cur >> kBarrierThreadBits) / (cur & kThreadMask); - if (cur_epoch != old_epoch) - break; - internal_usleep(100); + u32 cur = atomic_load(barrier, memory_order_relaxed); + if (barrier_epoch(cur) != old_epoch) + return; + FutexWait(barrier, cur); } } diff --git a/compiler-rt/test/tsan/test.h b/compiler-rt/test/tsan/test.h index 16d7a12..cbc9380 100644 --- a/compiler-rt/test/tsan/test.h +++ b/compiler-rt/test/tsan/test.h @@ -17,7 +17,7 @@ // TSan-invisible barrier. // Tests use it to establish necessary execution order in a way that does not // interfere with tsan (does not establish synchronization between threads). -typedef unsigned long long invisible_barrier_t; +typedef unsigned invisible_barrier_t; #ifdef __cplusplus extern "C" { |