aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2021-07-30 08:35:11 +0200
committerDmitry Vyukov <dvyukov@google.com>2021-07-30 11:39:38 +0200
commit97795be22f634667ce7a022398c59ccc9f7440eb (patch)
treeed4186259576a6c928bfd3738f46c5bd9f2ba4e6
parentdbe36e4073e708816cbeb693ea52832f54f52f2a (diff)
downloadllvm-97795be22f634667ce7a022398c59ccc9f7440eb.zip
llvm-97795be22f634667ce7a022398c59ccc9f7440eb.tar.gz
llvm-97795be22f634667ce7a022398c59ccc9f7440eb.tar.bz2
tsan: optimize test-only barrier
The updated lots_of_threads.c test with 300 threads started running for too long on machines with low hardware parallelism (e.g. taskset -c 0-1). On lots of CPUs it finishes in ~2 secs. But with taskset -c 0-1 it runs for hundreds of seconds effectively spinning in the barrier in the sleep loop. We now have the handy futex API in sanitizer_common. Use it instead of the passive spin loop. It makes the test run only faster with taskset -c 0-1, it runs for ~1.5 secs, while with full parallelism it still runs for ~2 secs (but consumes less CPU time). Depends on D107131. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D107132
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp40
-rw-r--r--compiler-rt/test/tsan/test.h2
2 files changed, 24 insertions, 18 deletions
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 2efc75b..dfceee0 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2916,30 +2916,36 @@ void InitializeInterceptors() {
// Note that no_sanitize_thread attribute does not turn off atomic interception
// so attaching it to the function defined in user code does not help.
// That's why we now have what we have.
-constexpr uptr kBarrierThreadBits = 10;
-constexpr uptr kBarrierThreads = 1 << kBarrierThreadBits;
+constexpr u32 kBarrierThreadBits = 10;
+constexpr u32 kBarrierThreads = 1 << kBarrierThreadBits;
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_testonly_barrier_init(u64 *barrier, u32 count) {
- if (count >= kBarrierThreads) {
- Printf("barrier_init: count is too large (%d)\n", count);
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_init(
+ atomic_uint32_t *barrier, u32 num_threads) {
+ if (num_threads >= kBarrierThreads) {
+ Printf("barrier_init: count is too large (%d)\n", num_threads);
Die();
}
// kBarrierThreadBits lsb is thread count,
// the remaining are count of entered threads.
- *barrier = count;
+ atomic_store(barrier, num_threads, memory_order_relaxed);
}
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_testonly_barrier_wait(u64 *barrier) {
- constexpr uptr kThreadMask = kBarrierThreads - 1;
- unsigned old = __atomic_fetch_add(barrier, kBarrierThreads, __ATOMIC_RELAXED);
- unsigned old_epoch = (old >> kBarrierThreadBits) / (old & kThreadMask);
+static u32 barrier_epoch(u32 value) {
+ return (value >> kBarrierThreadBits) / (value & (kBarrierThreads - 1));
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_wait(
+ atomic_uint32_t *barrier) {
+ u32 old = atomic_fetch_add(barrier, kBarrierThreads, memory_order_relaxed);
+ u32 old_epoch = barrier_epoch(old);
+ if (barrier_epoch(old + kBarrierThreads) != old_epoch) {
+ FutexWake(barrier, (1 << 30));
+ return;
+ }
for (;;) {
- unsigned cur = __atomic_load_n(barrier, __ATOMIC_RELAXED);
- unsigned cur_epoch = (cur >> kBarrierThreadBits) / (cur & kThreadMask);
- if (cur_epoch != old_epoch)
- break;
- internal_usleep(100);
+ u32 cur = atomic_load(barrier, memory_order_relaxed);
+ if (barrier_epoch(cur) != old_epoch)
+ return;
+ FutexWait(barrier, cur);
}
}
diff --git a/compiler-rt/test/tsan/test.h b/compiler-rt/test/tsan/test.h
index 16d7a12..cbc9380 100644
--- a/compiler-rt/test/tsan/test.h
+++ b/compiler-rt/test/tsan/test.h
@@ -17,7 +17,7 @@
// TSan-invisible barrier.
// Tests use it to establish necessary execution order in a way that does not
// interfere with tsan (does not establish synchronization between threads).
-typedef unsigned long long invisible_barrier_t;
+typedef unsigned invisible_barrier_t;
#ifdef __cplusplus
extern "C" {